# 4.0 QA Crosswalk Results

**Quality Assurance notebook for crosswalk outputs.**

## Workflow
1. Upload crosswalk results file
2. Clean data (capitalization, remove numbering)
3. Save cleaned file
4. Run QA classification (HIGH/MEDIUM/LOW confidence)
5. Export final QA'd results

---
## Step 1: Setup

In [None]:
!pip install openai pandas openpyxl tqdm -q

import pandas as pd
import re
import json
from tqdm.notebook import tqdm
from google.colab import files

print("Dependencies loaded.")
print("Ready to proceed.")

---
## Step 2: Upload Crosswalk File

In [None]:
print("="*70)
print("UPLOAD CROSSWALK RESULTS FILE")
print("="*70)
print("\nUpload your crosswalk Excel file (e.g., 3.03_All_Province_*.xlsx)")
print()

uploaded = files.upload()
INPUT_FILE = list(uploaded.keys())[0]

# Load the file
df = pd.read_excel(INPUT_FILE)

print(f"\n{'='*70}")
print(f"Loaded: {INPUT_FILE}")
print(f"{'='*70}")
print(f"\nRows: {len(df)}")
print(f"Columns: {len(df.columns)}")
print(f"\nColumn names:")
for col in df.columns:
    print(f"  - {col}")

# Show first few rows
print(f"\n{'='*70}")
print("SAMPLE DATA (first 3 rows):")
print("="*70)
df.head(3)

---
## Step 3: Clean Data

In [None]:
print("="*70)
print("CLEANING DATA")
print("="*70)

def clean_text(text):
    """Clean text: remove numbering prefixes, consistent capitalization."""
    if pd.isna(text) or not isinstance(text, str):
        return text
    
    # Remove leading numbering like "7.", "8.", "9.", "10.", "11." etc.
    text = re.sub(r'^\d+\.\s*', '', text.strip())
    
    # Remove leading numbering with letters like "7a.", "10b." etc.
    text = re.sub(r'^\d+[a-z]?\.\s*', '', text.strip())
    
    # Title case for descriptions (capitalize first letter of each sentence)
    if text:
        text = text[0].upper() + text[1:] if len(text) > 1 else text.upper()
    
    return text.strip()

def clean_section_name(text):
    """Clean section names: remove leading numbers, title case."""
    if pd.isna(text) or not isinstance(text, str):
        return text
    
    # Remove leading numbering like "7.", "8.", "9.", "10." etc.
    text = re.sub(r'^\d+\.\s*', '', text.strip())
    
    # Remove leading numbering with letters
    text = re.sub(r'^\d+[a-z]?\.\s*', '', text.strip())
    
    return text.strip()

# Store original for comparison
df_original = df.copy()

# Clean Description column
if 'Description' in df.columns:
    df['Description'] = df['Description'].apply(clean_text)
    print("  - Cleaned: Description")

# Clean AB_Description column
if 'AB_Description' in df.columns:
    df['AB_Description'] = df['AB_Description'].apply(clean_text)
    print("  - Cleaned: AB_Description")

# Clean section columns
section_cols = ['Level_1_Section', 'Level_2_Subsection', 'Level_3_Heading', 'Specialty']
for col in section_cols:
    if col in df.columns:
        df[col] = df[col].apply(clean_section_name)
        print(f"  - Cleaned: {col}")

# Clean Reasoning column
if 'Reasoning' in df.columns:
    df['Reasoning'] = df['Reasoning'].apply(clean_text)
    print("  - Cleaned: Reasoning")

# Clean additional_notes column
if 'additional_notes' in df.columns:
    df['additional_notes'] = df['additional_notes'].apply(clean_text)
    print("  - Cleaned: additional_notes")

# Standardize Type column to uppercase
if 'Type' in df.columns:
    df['Type'] = df['Type'].str.upper()
    print("  - Standardized: Type (uppercase)")

# Standardize Modality column to lowercase
if 'Modality' in df.columns:
    df['Modality'] = df['Modality'].str.lower()
    print("  - Standardized: Modality (lowercase)")

# Standardize Target_Province to uppercase
if 'Target_Province' in df.columns:
    df['Target_Province'] = df['Target_Province'].str.upper()
    print("  - Standardized: Target_Province (uppercase)")

print(f"\n{'='*70}")
print("CLEANING COMPLETE")
print("="*70)
print(f"\nRows: {len(df)}")

# Show sample of cleaned data
print(f"\nSample cleaned data:")
df[['Code', 'Description', 'Fee', 'Type']].head(3)

---
## Step 4: Save Cleaned File

In [None]:
# Generate cleaned filename
cleaned_filename = INPUT_FILE.replace('.xlsx', '_CLEANED.xlsx')

# Save cleaned file
df.to_excel(cleaned_filename, index=False)

print("="*70)
print("CLEANED FILE SAVED")
print("="*70)
print(f"\nSaved: {cleaned_filename}")
print(f"Rows: {len(df)}")

# Download cleaned file
files.download(cleaned_filename)

---
## Step 5: API Key

In [None]:
print("="*70)
print("API KEY")
print("="*70)

OPENAI_API_KEY = ""  # <-- Paste your key here, or leave blank to use getpass

if not OPENAI_API_KEY:
    from getpass import getpass
    OPENAI_API_KEY = getpass("Enter OpenAI API Key: ")

from openai import OpenAI
client = OpenAI(api_key=OPENAI_API_KEY)

print("\nâœ“ API client initialized.")

---
## Step 6: QA Classification

Classify each code match as **HIGH**, **MEDIUM**, or **LOW** confidence crossover.

In [None]:
print("="*70)
print("QA CLASSIFICATION")
print("="*70)

# Cost tracking
total_cost = 0.0
total_calls = 0

def track_cost(inp_tokens, out_tokens):
    global total_cost, total_calls
    total_cost += (inp_tokens/1e6)*3.0 + (out_tokens/1e6)*15.0
    total_calls += 1

def build_qa_prompt(rows_batch, ab_code, ab_description):
    """Build QA prompt for a batch of rows."""
    
    rows_text = ""
    for idx, row in rows_batch.iterrows():
        rows_text += f"""
ROW {idx}:
- Target Province: {row.get('Target_Province', 'N/A')}
- Code: {row.get('Code', 'N/A')}
- Description: {row.get('Description', 'N/A')}
- Fee: {row.get('Fee', 'N/A')}
- Type: {row.get('Type', 'N/A')}
- Modality: {row.get('Modality', 'N/A')}
- Reasoning: {row.get('Reasoning', 'N/A')}
"""
    
    return f"""You are a senior physician billing specialist performing quality assurance on a billing code crosswalk.

ALBERTA SOURCE CODE:
- Code: {ab_code}
- Description: {ab_description}

TARGET CODES TO EVALUATE:
{rows_text}

TASK:
For each row, evaluate how well the target code matches the Alberta source code and assign a confidence level:

- **HIGH**: Direct equivalent - same clinical service, similar fee structure, clear match
- **MEDIUM**: Partial match - related service but different scope, conditions, or fee structure
- **LOW**: Weak match - tangentially related, likely not a true crosswalk equivalent

IMPORTANT: Consider:
1. Clinical equivalence (same procedure/service?)
2. Fee reasonableness (similar value?)
3. Modality match (if applicable)
4. Any conditions or restrictions that affect equivalence

Return JSON array with one object per row:
[
  {{
    "row_index": <integer>,
    "qa_confidence": "HIGH|MEDIUM|LOW",
    "qa_rationale": "Brief explanation (1-2 sentences)"
  }}
]

JSON only:"""

def run_qa_batch(rows_batch, ab_code, ab_description):
    """Run QA on a batch of rows."""
    prompt = build_qa_prompt(rows_batch, ab_code, ab_description)
    
    try:
        resp = client.chat.completions.create(
            model="gpt-5.1-2025-11-13",
            messages=[{"role": "user", "content": prompt}],
            temperature=0.1,
            max_completion_tokens=2000
        )
        track_cost(resp.usage.prompt_tokens, resp.usage.completion_tokens)
        
        content = resp.choices[0].message.content
        
        # Extract JSON array
        match = re.search(r'\[.*\]', content, re.DOTALL)
        if match:
            return json.loads(match.group())
        return []
    except Exception as e:
        print(f"  ERROR: {e}")
        return []

# Get Alberta code info
ab_code = df['AB_Code'].iloc[0] if 'AB_Code' in df.columns else 'Unknown'
ab_description = df['AB_Description'].iloc[0] if 'AB_Description' in df.columns else 'Unknown'

print(f"\nAlberta Source Code: {ab_code}")
print(f"Description: {ab_description}")
print(f"\nTotal rows to QA: {len(df)}")
print(f"Batch size: 5 rows per API call")
print(f"Estimated API calls: {(len(df) + 4) // 5}")

# Initialize QA columns
df['QA_Confidence'] = ''
df['QA_Rationale'] = ''

# Process in batches of 5
batch_size = 5
total_batches = (len(df) + batch_size - 1) // batch_size

print(f"\n{'='*70}")
print("RUNNING QA...")
print("="*70)

for batch_num in tqdm(range(total_batches), desc="QA Batches"):
    start_idx = batch_num * batch_size
    end_idx = min(start_idx + batch_size, len(df))
    
    batch_df = df.iloc[start_idx:end_idx]
    
    results = run_qa_batch(batch_df, ab_code, ab_description)
    
    # Map results back to dataframe
    for result in results:
        row_idx = result.get('row_index')
        if row_idx is not None and start_idx <= row_idx < end_idx:
            df.at[row_idx, 'QA_Confidence'] = result.get('qa_confidence', '')
            df.at[row_idx, 'QA_Rationale'] = result.get('qa_rationale', '')

print(f"\n{'='*70}")
print("QA COMPLETE")
print("="*70)
print(f"\nAPI calls: {total_calls}")
print(f"Estimated cost: ${total_cost:.2f}")

# Summary
print(f"\n--- QA SUMMARY ---")
print(df['QA_Confidence'].value_counts())

---
## Step 7: Final Results

In [None]:
print("="*70)
print("FINAL QA RESULTS")
print("="*70)

# Reorder columns to put QA columns prominently
qa_cols = ['QA_Confidence', 'QA_Rationale']
key_cols = ['AB_Code', 'Target_Province', 'Code', 'Description', 'Fee', 'Type', 'Modality']
other_cols = [c for c in df.columns if c not in qa_cols + key_cols]

# New column order: QA first, then key info, then rest
new_order = [c for c in key_cols if c in df.columns] + qa_cols + [c for c in other_cols if c in df.columns]
df_final = df[new_order]

# Sort by QA_Confidence (HIGH first, then MEDIUM, then LOW)
confidence_order = {'HIGH': 0, 'MEDIUM': 1, 'LOW': 2, '': 3}
df_final['_sort'] = df_final['QA_Confidence'].map(confidence_order)
df_final = df_final.sort_values(['Target_Province', '_sort', 'Code']).drop(columns=['_sort'])

# Save final file
final_filename = INPUT_FILE.replace('.xlsx', '_QA_COMPLETE.xlsx')
df_final.to_excel(final_filename, index=False)

print(f"\nSaved: {final_filename}")
print(f"Total rows: {len(df_final)}")

print(f"\n--- BY CONFIDENCE ---")
for conf in ['HIGH', 'MEDIUM', 'LOW']:
    count = len(df_final[df_final['QA_Confidence'] == conf])
    pct = count / len(df_final) * 100 if len(df_final) > 0 else 0
    print(f"  {conf}: {count} ({pct:.1f}%)")

print(f"\n--- BY PROVINCE ---")
for prov in df_final['Target_Province'].unique():
    prov_df = df_final[df_final['Target_Province'] == prov]
    high = len(prov_df[prov_df['QA_Confidence'] == 'HIGH'])
    med = len(prov_df[prov_df['QA_Confidence'] == 'MEDIUM'])
    low = len(prov_df[prov_df['QA_Confidence'] == 'LOW'])
    print(f"  {prov}: {len(prov_df)} total (H:{high} M:{med} L:{low})")

# Download final file
files.download(final_filename)

print(f"\n{'='*70}")
print("QA WORKFLOW COMPLETE")
print("="*70)

---
## Step 8: Display Results Table

In [None]:
# Display final table
display_cols = ['Target_Province', 'Code', 'Description', 'Fee', 'QA_Confidence', 'QA_Rationale']
display_cols = [c for c in display_cols if c in df_final.columns]

print("FINAL QA TABLE:")
print()
df_final[display_cols]