# 4.0 QA Crosswalk Results - X310 Abdominal Ultrasound

Quality assurance for abdominal ultrasound billing code crosswalk outputs.

**Workflow**
1. Upload crosswalk results
2. Clean and standardize data
3. Run QA classification (HIGH/MEDIUM/LOW)
4. Export branded results

---
## Step 1: Setup

In [None]:
!pip install openai pandas openpyxl tqdm xlsxwriter -q

import pandas as pd
import re
import json
from tqdm.notebook import tqdm
from google.colab import files
from IPython.display import HTML, display

# ============================================================================
# HELPSEEKER BRAND COLOURS
# ============================================================================
BRAND = {
    'deep_navy': '#0B1F33',
    'midnight_blue': '#102A43',
    'teal_core': '#0FB9B1',
    'aqua_light': '#4FD1C5',
    'slate_blue': '#1E3A5F',
    'white': '#FFFFFF',
    'near_black': '#0A0A0A',
}

# QA Confidence colours
QA_COLORS = {
    'HIGH': BRAND['teal_core'],
    'MEDIUM': BRAND['aqua_light'],
    'LOW': BRAND['slate_blue'],
}

print("Dependencies loaded.")
print("HelpSeeker brand palette configured.")

---
## Step 2: Alberta Code Reference

Source code definition for QA comparison.

In [None]:
# ============================================================================
# ALBERTA CODE REFERENCE (X310)
# ============================================================================
# This defines the source code for QA comparison.
# Update this if QA'ing a different Alberta code.
# ============================================================================

ALBERTA_CODE_REF = {
    'code': 'X310',
    'description': 'Ultrasound, abdominal, complete or at least two abdominal organs',
    'fee': 171.68,
    'category': 'T Test (Diagnostic)',
    
    'clinical_definition': """Complete abdominal ultrasound examination OR ultrasound of at least two abdominal organs.

DIAGNOSTIC ULTRASOUND RULES:
- Includes Doppler colour mapping
- Quantitative spectral analysis with directional flow and/or Doppler measurements (HSC X337) may be claimed IN ADDITION to this code
- May NOT be claimed same day as X311 (limited abdominal) or X312 by same or different physician in same location

DIAGNOSTIC RADIOLOGY RULES:
- Physician must be CPSA-approved to provide diagnostic radiology services (G.R. 11.1.1)

FEE MODIFIERS:
- AGE modifier L13: 30% increase for patients 12 years and younger (base increases to 130% = $223.18)

SAME-DAY EXCLUSIONS:
- X311 (limited abdominal ultrasound)
- X312
- Cannot be claimed by same or different physician in same location on same day

SCOPE OF EXAMINATION:
- Complete abdominal scan typically includes: liver, gallbladder, bile ducts, pancreas, spleen, kidneys, aorta, IVC
- OR at minimum two abdominal organs""",
    
    'service_type': 'Diagnostic imaging procedure (ultrasound)',
    
    'key_attributes': [
        'Diagnostic imaging procedure (NOT a visit/consultation)',
        'Abdominal ultrasound (complete or multi-organ)',
        'Includes Doppler colour mapping',
        'Pediatric age premium (12 years and younger)',
        'Requires CPSA approval for diagnostic radiology',
    ]
}

print("="*70)
print("ALBERTA CODE REFERENCE")
print("="*70)
print(f"\nCode: {ALBERTA_CODE_REF['code']}")
print(f"Description: {ALBERTA_CODE_REF['description']}")
print(f"Fee: ${ALBERTA_CODE_REF['fee']}")
print(f"Category: {ALBERTA_CODE_REF['category']}")
print(f"\nKey Attributes:")
for attr in ALBERTA_CODE_REF['key_attributes']:
    print(f"  â€¢ {attr}")

---
## Step 3: Upload Crosswalk File

In [None]:
print("="*70)
print("UPLOAD CROSSWALK RESULTS")
print("="*70)
print("\nUpload your crosswalk Excel file")
print()

uploaded = files.upload()
INPUT_FILE = list(uploaded.keys())[0]

df = pd.read_excel(INPUT_FILE)

print(f"\nLoaded: {INPUT_FILE}")
print(f"Rows: {len(df)}")
print(f"Columns: {len(df.columns)}")

---
## Step 4: Clean Data

In [None]:
print("="*70)
print("CLEANING DATA")
print("="*70)

def clean_text(text):
    """Remove numbering prefixes, standardize capitalization."""
    if pd.isna(text) or not isinstance(text, str):
        return text
    
    # Remove leading numbering: "7.", "8.", "9.", "10.", "11a." etc.
    text = re.sub(r'^\d+[a-z]?\.\s*', '', text.strip())
    
    # Sentence case (capitalize first letter)
    if text:
        text = text[0].upper() + text[1:] if len(text) > 1 else text.upper()
    
    return text.strip()

def clean_section(text):
    """Clean section names."""
    if pd.isna(text) or not isinstance(text, str):
        return text
    # Remove leading numbering
    text = re.sub(r'^\d+[a-z]?\.\s*', '', text.strip())
    return text.strip()

# Clean columns
text_cols = ['Description', 'AB_Description', 'Reasoning', 'additional_notes']
section_cols = ['Level_1_Section', 'Level_2_Subsection', 'Level_3_Heading', 'Specialty']

for col in text_cols:
    if col in df.columns:
        df[col] = df[col].apply(clean_text)
        print(f"  Cleaned: {col}")

for col in section_cols:
    if col in df.columns:
        df[col] = df[col].apply(clean_section)
        print(f"  Cleaned: {col}")

# Standardize case
if 'Type' in df.columns:
    df['Type'] = df['Type'].str.upper()
if 'Modality' in df.columns:
    df['Modality'] = df['Modality'].str.lower()
if 'Target_Province' in df.columns:
    df['Target_Province'] = df['Target_Province'].str.upper()

print(f"\nCleaning complete. Rows: {len(df)}")

# Save cleaned file
cleaned_file = INPUT_FILE.replace('.xlsx', '_CLEANED.xlsx')
df.to_excel(cleaned_file, index=False)
print(f"Saved: {cleaned_file}")
files.download(cleaned_file)

---
## Step 5: API Key

In [None]:
OPENAI_API_KEY = ""  # Paste key here or leave blank for prompt

if not OPENAI_API_KEY:
    from getpass import getpass
    OPENAI_API_KEY = getpass("Enter OpenAI API Key: ")

from openai import OpenAI
client = OpenAI(api_key=OPENAI_API_KEY)

print("API client initialized.")

---
## Step 6: QA Classification

In [None]:
print("="*70)
print("QA CLASSIFICATION")
print("="*70)

total_cost = 0.0
total_calls = 0

def track_cost(inp, out):
    global total_cost, total_calls
    total_cost += (inp/1e6)*3.0 + (out/1e6)*15.0
    total_calls += 1

def build_qa_prompt(rows_batch):
    """Build abdominal ultrasound-specific QA prompt."""
    
    rows_text = ""
    for idx, row in rows_batch.iterrows():
        rows_text += f"""
ROW {idx}:
  Province: {row.get('Target_Province', 'N/A')}
  Code: {row.get('Code', 'N/A')}
  Description: {row.get('Description', 'N/A')}
  Fee: {row.get('Fee', 'N/A')}
  Type: {row.get('Type', 'N/A')}
  Modality: {row.get('Modality', 'N/A')}
  Reasoning: {row.get('Reasoning', 'N/A')}
"""
    
    return f"""You are a senior physician billing specialist performing quality assurance on a diagnostic imaging billing code crosswalk.

ALBERTA SOURCE CODE: {ALBERTA_CODE_REF['code']}
Description: {ALBERTA_CODE_REF['description']}
Fee: ${ALBERTA_CODE_REF['fee']}

CLINICAL DEFINITION:
{ALBERTA_CODE_REF['clinical_definition']}

KEY MATCHING CRITERIA:
1. Must be ABDOMINAL ULTRASOUND (not other body regions)
2. Must be COMPLETE or MULTI-ORGAN examination (at least 2 abdominal organs)
3. Should be DIAGNOSTIC imaging (not interventional/guided procedures)
4. Doppler/colour flow inclusion is a bonus match indicator
5. Pediatric age modifiers should be noted if present

TARGET CODES TO EVALUATE:
{rows_text}

CLASSIFICATION CRITERIA:

**HIGH** - Strong crosswalk match:
- Complete abdominal ultrasound or multi-organ abdominal scan
- Covers similar anatomical scope (liver, gallbladder, pancreas, spleen, kidneys, etc.)
- Diagnostic ultrasound (not procedure/guidance)
- Similar clinical scope to X310
- Includes or compatible with Doppler imaging

**MEDIUM** - Partial match with caveats:
- Abdominal ultrasound but limited scope (fewer organs than complete)
- Regional abdominal ultrasound (e.g., upper abdomen only, RUQ only)
- Similar service but significantly different fee structure
- Add-on code that supplements a primary abdominal ultrasound match
- Doppler-specific codes that may apply to abdominal imaging

**LOW** - Weak or incorrect match:
- Single organ ultrasound only (e.g., gallbladder only, kidney only)
- Non-abdominal ultrasound (pelvic, obstetric, cardiac, vascular, MSK)
- Ultrasound-guided procedures (biopsy, drainage, injection)
- CT or MRI codes (wrong imaging modality)
- X-ray or fluoroscopy codes
- Unrelated diagnostic service

Return JSON array:
[
  {{
    "row_index": <integer>,
    "qa_confidence": "HIGH|MEDIUM|LOW",
    "target_code": "the code being evaluated",
    "qa_rationale": "1-2 sentence explanation"
  }}
]

JSON only:"""

def run_qa_batch(rows_batch):
    """Run QA on batch of 5 rows."""
    prompt = build_qa_prompt(rows_batch)
    
    try:
        resp = client.chat.completions.create(
            model="gpt-4.1-2025-04-14",
            messages=[{"role": "user", "content": prompt}],
            temperature=0.1,
            max_completion_tokens=2000
        )
        track_cost(resp.usage.prompt_tokens, resp.usage.completion_tokens)
        
        content = resp.choices[0].message.content
        match = re.search(r'\[.*\]', content, re.DOTALL)
        if match:
            return json.loads(match.group())
        return []
    except Exception as e:
        print(f"  ERROR: {e}")
        return []

# Initialize QA columns
df['QA_Confidence'] = ''
df['QA_Rationale'] = ''

print(f"\nRows to QA: {len(df)}")
print(f"Batch size: 5 rows per API call")
print(f"Estimated API calls: {(len(df) + 4) // 5}")

# Process in batches of 5
batch_size = 5
total_batches = (len(df) + batch_size - 1) // batch_size

for batch_num in tqdm(range(total_batches), desc="QA Progress"):
    start_idx = batch_num * batch_size
    end_idx = min(start_idx + batch_size, len(df))
    
    batch_df = df.iloc[start_idx:end_idx]
    results = run_qa_batch(batch_df)
    
    for result in results:
        row_idx = result.get('row_index')
        if row_idx is not None and 0 <= row_idx < len(df):
            df.at[row_idx, 'QA_Confidence'] = result.get('qa_confidence', '')
            df.at[row_idx, 'QA_Rationale'] = result.get('qa_rationale', '')

print(f"\nQA Complete")
print(f"API calls: {total_calls}")
print(f"Cost: ${total_cost:.2f}")

print(f"\n--- QA Summary ---")
print(df['QA_Confidence'].value_counts())

---
## Step 7: Export Branded Results

In [None]:
print("="*70)
print("EXPORT BRANDED RESULTS")
print("="*70)

# ============================================================================
# COLUMN ORDERING
# ============================================================================
# 1. Alberta source columns first
# 2. Target province key info
# 3. QA results
# 4. Other columns (Setting, Premiums, Fee_Type before additional_notes)
# 5. additional_notes LAST
# ============================================================================

ab_cols = ['AB_Code', 'AB_Description', 'AB_Fee']
key_cols = ['Target_Province', 'Code', 'Description', 'Fee', 'Type', 'Modality']
qa_cols = ['QA_Confidence', 'QA_Rationale']
late_cols = ['Setting', 'Fee_Type', 'premium_extended_hours', 'premium_location', 'premium_age', 'premium_other']
last_col = ['additional_notes']

# Build column order
col_order = []
col_order += [c for c in ab_cols if c in df.columns]
col_order += [c for c in key_cols if c in df.columns]
col_order += [c for c in qa_cols if c in df.columns]

# Add remaining columns except late_cols and last_col
used_cols = set(col_order + late_cols + last_col)
other_cols = [c for c in df.columns if c not in used_cols]
col_order += other_cols

# Add late columns (Setting, Fee_Type, Premiums)
col_order += [c for c in late_cols if c in df.columns]

# Add additional_notes last
col_order += [c for c in last_col if c in df.columns]

df_final = df[col_order].copy()

# Sort by province, then confidence
conf_order = {'HIGH': 0, 'MEDIUM': 1, 'LOW': 2, '': 3}
df_final['_sort'] = df_final['QA_Confidence'].map(conf_order)
df_final = df_final.sort_values(['Target_Province', '_sort', 'Code']).drop(columns=['_sort'])
df_final = df_final.reset_index(drop=True)

# Save with brand formatting
final_file = INPUT_FILE.replace('.xlsx', '_QA_FINAL.xlsx')

with pd.ExcelWriter(final_file, engine='xlsxwriter') as writer:
    df_final.to_excel(writer, sheet_name='QA Results', index=False)
    
    workbook = writer.book
    worksheet = writer.sheets['QA Results']
    
    # HelpSeeker brand formats
    header_fmt = workbook.add_format({
        'bold': True,
        'font_name': 'Lato',
        'font_size': 11,
        'bg_color': BRAND['deep_navy'],
        'font_color': BRAND['white'],
        'border': 1,
        'align': 'center',
        'valign': 'vcenter',
    })
    
    high_fmt = workbook.add_format({
        'font_name': 'Lato',
        'bg_color': '#D4F5F3',  # Light teal
        'font_color': BRAND['near_black'],
    })
    
    medium_fmt = workbook.add_format({
        'font_name': 'Lato',
        'bg_color': '#E8F8F7',  # Lighter aqua
        'font_color': BRAND['near_black'],
    })
    
    low_fmt = workbook.add_format({
        'font_name': 'Lato',
        'bg_color': '#F0F4F8',  # Light slate
        'font_color': BRAND['slate_blue'],
    })
    
    # Apply header format
    for col_num, value in enumerate(df_final.columns.values):
        worksheet.write(0, col_num, value, header_fmt)
    
    # Set column widths
    col_widths = {
        'AB_Code': 10,
        'AB_Description': 25,
        'AB_Fee': 10,
        'Target_Province': 12,
        'Code': 12,
        'Description': 50,
        'Fee': 10,
        'Type': 10,
        'Modality': 12,
        'QA_Confidence': 14,
        'QA_Rationale': 60,
        'Setting': 12,
        'Fee_Type': 12,
        'additional_notes': 50,
    }
    
    for col_idx, col_name in enumerate(df_final.columns):
        width = col_widths.get(col_name, 15)
        worksheet.set_column(col_idx, col_idx, width)
    
    # Conditional formatting for QA_Confidence column
    if 'QA_Confidence' in df_final.columns:
        qa_col_idx = list(df_final.columns).index('QA_Confidence')
        col_letter = chr(65 + qa_col_idx) if qa_col_idx < 26 else 'A'  # Handle >26 cols
        
        worksheet.conditional_format(f'{col_letter}2:{col_letter}{len(df_final)+1}', {
            'type': 'text',
            'criteria': 'containing',
            'value': 'HIGH',
            'format': high_fmt
        })
        worksheet.conditional_format(f'{col_letter}2:{col_letter}{len(df_final)+1}', {
            'type': 'text',
            'criteria': 'containing',
            'value': 'MEDIUM',
            'format': medium_fmt
        })
        worksheet.conditional_format(f'{col_letter}2:{col_letter}{len(df_final)+1}', {
            'type': 'text',
            'criteria': 'containing',
            'value': 'LOW',
            'format': low_fmt
        })
    
    # Freeze header row
    worksheet.freeze_panes(1, 0)

print(f"\nSaved: {final_file}")
print(f"Rows: {len(df_final)}")

# Show column order
print(f"\n--- Column Order ---")
for i, col in enumerate(df_final.columns[:15], 1):
    print(f"  {i}. {col}")
if len(df_final.columns) > 15:
    print(f"  ... and {len(df_final.columns) - 15} more")

# Summary stats
print(f"\n--- By Confidence ---")
for conf in ['HIGH', 'MEDIUM', 'LOW']:
    count = len(df_final[df_final['QA_Confidence'] == conf])
    pct = count / len(df_final) * 100 if len(df_final) > 0 else 0
    print(f"  {conf}: {count} ({pct:.1f}%)")

print(f"\n--- By Province ---")
for prov in sorted(df_final['Target_Province'].unique()):
    prov_df = df_final[df_final['Target_Province'] == prov]
    h = len(prov_df[prov_df['QA_Confidence'] == 'HIGH'])
    m = len(prov_df[prov_df['QA_Confidence'] == 'MEDIUM'])
    l = len(prov_df[prov_df['QA_Confidence'] == 'LOW'])
    print(f"  {prov}: {len(prov_df)} total (H:{h} M:{m} L:{l})")

files.download(final_file)

---
## Step 8: Display Results Table

In [None]:
# Display branded HTML table
def style_qa_table(df_display):
    """Apply HelpSeeker brand styling to table."""
    
    def highlight_confidence(val):
        if val == 'HIGH':
            return f'background-color: #D4F5F3; color: {BRAND["near_black"]}; font-weight: bold;'
        elif val == 'MEDIUM':
            return f'background-color: #E8F8F7; color: {BRAND["near_black"]};'
        elif val == 'LOW':
            return f'background-color: #F0F4F8; color: {BRAND["slate_blue"]};'
        return ''
    
    styled = df_display.style\
        .applymap(highlight_confidence, subset=['QA_Confidence'])\
        .set_properties(**{
            'font-family': 'Lato, sans-serif',
            'font-size': '12px',
            'text-align': 'left',
            'padding': '8px',
        })\
        .set_table_styles([
            {'selector': 'th', 'props': [
                ('background-color', BRAND['deep_navy']),
                ('color', BRAND['white']),
                ('font-family', 'Lato, sans-serif'),
                ('font-weight', 'bold'),
                ('font-size', '12px'),
                ('padding', '10px'),
                ('text-align', 'left'),
            ]},
            {'selector': 'td', 'props': [
                ('border-bottom', f'1px solid {BRAND["slate_blue"]}20'),
            ]},
        ])
    
    return styled

# Select display columns
display_cols = ['Target_Province', 'Code', 'Description', 'Fee', 'Modality', 'QA_Confidence', 'QA_Rationale']
display_cols = [c for c in display_cols if c in df_final.columns]

print("="*70)
print("FINAL QA RESULTS")
print("="*70)
print()

# Display styled table
style_qa_table(df_final[display_cols])