# Crosswalk Telehealth - Final

Targeted search for **03.03CV - Telehealth consultation**

Fixes:
- Level_2 detection improved (no sentence fragments)
- Skips Appendix J (lookup table, not service definitions)

## 1. Setup

In [None]:
!pip install openai pandas pdfplumber openpyxl tqdm -q
print('Ready')

## 2. Upload Files

In [None]:
from google.colab import files

print("Select ALL 3 files (Ctrl+click):")
uploaded = files.upload()

ALBERTA_FILE = ONTARIO_PDF = ONTARIO_FEES = None
for f in uploaded.keys():
    if f.endswith('.xlsx'): ALBERTA_FILE = f
    elif f.endswith('.pdf'): ONTARIO_PDF = f
    elif '001' in f: ONTARIO_FEES = f

print(f"\nAlberta: {ALBERTA_FILE}")
print(f"PDF: {ONTARIO_PDF}")
print(f"Fees: {ONTARIO_FEES}")

## 3. API Key

In [None]:
OPENAI_API_KEY = ""  # <-- Paste your key

if not OPENAI_API_KEY:
    from getpass import getpass
    OPENAI_API_KEY = getpass("API Key: ")

from openai import OpenAI
client = OpenAI(api_key=OPENAI_API_KEY)
print("API ready")

## 4. Load Data with Improved Section Detection

In [None]:
import pandas as pd
import pdfplumber
import json
import re
from tqdm.notebook import tqdm

# Ontario fees
on_fees = {}
with open(ONTARIO_FEES, 'r') as f:
    for line in f:
        if len(line) >= 30 and line[12:20] == '99999999':
            code = line[0:4].strip()
            try: on_fees[code] = int(line[20:30]) / 1000
            except: pass
print(f"Ontario fee codes: {len(on_fees)}")

# Load PDF with improved section detection
print("\nLoading PDF and detecting sections...")
pdf_pages = {}
page_sections = {}  # page_num -> {"level1": "...", "level2": "..."}
appendix_pages = set()  # Track appendix pages to skip

current_level1 = "UNKNOWN SECTION"
current_level2 = ""
in_appendix = False

def is_valid_level2_header(line):
    """Check if line is a valid Level 2 subsection header."""
    line = line.strip()
    
    # Must start with capital letter followed by lowercase
    if not re.match(r'^[A-Z][a-z]', line):
        return False
    
    # Reasonable length for a header
    if len(line) < 5 or len(line) > 60:
        return False
    
    # Reject if contains sentence-ending punctuation mid-string or at end
    if re.search(r'[.!?]\s*$', line):  # Ends with period/punctuation
        return False
    if re.search(r'[.!?]\s+[A-Z]', line):  # Has sentence break
        return False
    
    # Reject if contains brackets that suggest it's a note/reference
    if re.search(r'[\[\]\(\)]', line):
        return False
    
    # Reject if starts with common non-header words
    non_header_starts = ['The ', 'This ', 'A ', 'An ', 'If ', 'For ', 'When ', 'Where ', 
                         'Note', 'See ', 'Refer', 'Include', 'Exclude', 'Payment']
    for start in non_header_starts:
        if line.startswith(start):
            return False
    
    # Reject if has digits in first few chars (likely a code)
    if any(c.isdigit() for c in line[:4]):
        return False
    
    # Reject if contains dots pattern (table of contents)
    if '....' in line or '. . .' in line:
        return False
    
    # Looks like a valid header
    return True

with pdfplumber.open(ONTARIO_PDF) as pdf:
    for i, page in enumerate(tqdm(pdf.pages)):
        page_num = i + 1
        try:
            text = page.extract_text()
            if not text:
                continue
            
            # Check if we're entering an Appendix
            if 'APPENDIX' in text.upper()[:500]:
                # Check for Appendix header
                if re.search(r'APPENDIX\s+[A-Z]', text.upper()[:500]):
                    in_appendix = True
            
            # Mark appendix pages
            if in_appendix:
                appendix_pages.add(page_num)
            
            pdf_pages[page_num] = text
            
            # Detect section headers from first 10 lines only
            lines = text.split('\n')[:10]
            
            for line in lines:
                line = line.strip()
                
                # Level 1: ALL CAPS headers (like "CONSULTATIONS AND VISITS")
                if len(line) > 10 and line.isupper():
                    # Must not start with digits
                    if not any(c.isdigit() for c in line[:4]):
                        # Must not be PAGE X or contain dots
                        if not line.startswith('PAGE') and '....' not in line:
                            # Check it's not APPENDIX (we handle that separately)
                            if not line.startswith('APPENDIX'):
                                current_level1 = line
                                current_level2 = ""  # Reset level 2
                
                # Level 2: Title case subsection headers
                elif is_valid_level2_header(line):
                    current_level2 = line
            
            page_sections[page_num] = {
                "level1": current_level1,
                "level2": current_level2
            }
            
        except Exception as e:
            print(f"Page {page_num} error: {e}")

print(f"PDF pages: {len(pdf_pages)}")
print(f"Appendix pages (will skip): {len(appendix_pages)}")
print(f"\nSample sections detected:")
for pg in [30, 50, 75, 100, 150, 200]:
    if pg in page_sections and pg not in appendix_pages:
        s = page_sections[pg]
        l2 = s['level2'][:30] if s['level2'] else '(none)'
        print(f"  Page {pg}: {s['level1'][:35]} > {l2}")

## 5. Alberta Code Definition

In [None]:
# Full clinical definition from Alberta Schedule
AB_CODE = "03.03CV"
AB_DESC = "Telehealth consultation"
AB_FEE = 25.09

AB_CLINICAL_DEFINITION = """Assessment of a patient's condition via telephone or secure videoconference.

NOTE:
- At minimum: limited assessment requiring history related to presenting problems, appropriate records review, and advice to the patient
- Total physician time spent providing patient care must be MINIMUM 10 MINUTES
- If less than 10 minutes same day, must use HSC 03.01AD instead
- May only be claimed if service was initiated by the patient or their agent
- May only be claimed if service is personally rendered by the physician
- Benefit includes ordering appropriate diagnostic tests and discussion with patient
- Patient record must include detailed summary of all services including start/stop times
- Time spent on administrative tasks cannot be claimed
- May NOT be claimed same day as: 03.01AD, 03.01S, 03.01T, 03.03FV, 03.05JR, 03.08CV, 08.19CV, 08.19CW, or 08.19CX by same physician for same patient
- May NOT be claimed same day as in-person visit or consultation by same physician for same patient

Category: V Visit (Virtual)
Base rate: $25.09"""

print(f"Alberta Code: {AB_CODE}")
print(f"Description: {AB_DESC}")
print(f"Fee: ${AB_FEE}")

## 6. Targeted Search Function

In [None]:
PAGES_PER_CALL = 10
total_cost = 0.0

def track_cost(inp, out):
    global total_cost
    total_cost += (inp/1e6)*3.0 + (out/1e6)*15.0

def get_section_for_pages(batch_pages):
    """Get level1/level2 section for a batch of pages."""
    for pg in batch_pages:
        if pg in page_sections:
            return page_sections[pg]
    return {"level1": "Unknown", "level2": ""}

def build_prompt(ab_code, ab_desc, ab_fee, clinical_def, batch_pages, context, section_info):
    section_text = section_info['level1']
    if section_info['level2']:
        section_text += f" > {section_info['level2']}"
    
    return f"""You are a senior physician billing specialist mapping Alberta fee codes to Ontario equivalents.

ALBERTA CODE TO MATCH:
- Code: {ab_code}
- Description: {ab_desc}
- Fee: ${ab_fee}

CLINICAL SERVICE DEFINITION:
{clinical_def}

This is a BASIC PATIENT-FACING virtual visit by any physician (not specialist-specific, not physician-to-physician).

ONTARIO SCHEDULE EXCERPT (pages {batch_pages[0]}-{batch_pages[-1]}):
Current Section: {section_text}

{context}

TASK:
Find Ontario codes that bill for THIS SAME CLINICAL ENCOUNTER — a basic virtual care assessment between a physician and patient.

STEP 1 — FIND PRIMARY CODE(S):
What Ontario code(s) would a physician bill for this same 10+ minute patient-facing virtual assessment?
- Look for: Limited/basic virtual care visits, telephone assessments, video assessments
- Separate codes if Ontario splits by modality (phone vs video)

STEP 2 — FIND ADD-ON CODES:
What Ontario codes can be billed IN ADDITION TO the primary code for this type of visit?
- Each add-on must link to specific primary code(s)
- Only include add-ons specifically eligible for virtual care visits
- IMPORTANT: If an add-on has DIFFERENT FEES by modality (telephone vs video), return it as SEPARATE entries

DO NOT INCLUDE:
- Physician-to-physician consultations (K730-K737) — wrong service type
- E-assessments (K738-K741) — these are specialist-to-PCP, not patient-facing
- Specialist consultations (A010, A913, A914) — wrong provider scope
- Ambulance/transport/detention codes — completely different services
- Diagnostic procedure codes (ECG, Holter, imaging) — not consultations
- Psychiatry/psychology specific codes — different specialty
- In-person visit codes (unless Ontario has no virtual equivalent)
- Appendix reference codes (codes ending in A that are just claim submission references)

JSON only:
{{
  "found": true/false,
  "primary_codes": [
    {{
      "code": "...",
      "description": "full description from schedule",
      "fee": 00.00,
      "modality": "telephone|video|both",
      "reasoning": "why this matches"
    }}
  ],
  "add_on_codes": [
    {{
      "code": "...",
      "description": "...",
      "fee": 00.00,
      "modality": "telephone|video|both",
      "links_to": ["primary_code1", "primary_code2"],
      "condition": "when this add-on applies"
    }}
  ]
}}

IMPORTANT: If a code has different fees for telephone vs video, create SEPARATE entries for each modality with the specific fee.

If no relevant codes on these pages: {{"found": false, "primary_codes": [], "add_on_codes": []}}"""

def search_for_matches():
    """Search all pages for matching codes (skipping appendices)."""
    
    all_primary = []
    all_addons = []
    
    # Filter out appendix pages
    page_nums = sorted([p for p in pdf_pages.keys() if p not in appendix_pages])
    batches = [page_nums[i:i+PAGES_PER_CALL] for i in range(0, len(page_nums), PAGES_PER_CALL)]
    
    print(f"Searching {len(page_nums)} pages in {len(batches)} batches (skipped {len(appendix_pages)} appendix pages)...\n")
    
    for batch_pages in tqdm(batches, desc="Searching"):
        context = "\n".join([f"=== PAGE {p} ===\n{pdf_pages[p]}" for p in batch_pages if p in pdf_pages])
        section_info = get_section_for_pages(batch_pages)
        
        prompt = build_prompt(AB_CODE, AB_DESC, AB_FEE, AB_CLINICAL_DEFINITION, batch_pages, context, section_info)
        
        try:
            resp = client.chat.completions.create(
                model="gpt-5.1-2025-11-13",
                messages=[{"role": "user", "content": prompt}],
                temperature=0.1,
                max_completion_tokens=1500
            )
            track_cost(resp.usage.prompt_tokens, resp.usage.completion_tokens)
            
            content = resp.choices[0].message.content
            match = re.search(r'\{[\s\S]*\}', content)
            if match:
                result = json.loads(match.group())
                
                if result.get('found'):
                    n_primary = len(result.get('primary_codes', []))
                    n_addon = len(result.get('add_on_codes', []))
                    print(f"  Pages {batch_pages[0]}-{batch_pages[-1]}: {n_primary} primary, {n_addon} add-ons")
                    
                    for p in result.get('primary_codes', []):
                        p['pages'] = f"{batch_pages[0]}-{batch_pages[-1]}"
                        p['level1'] = section_info['level1']
                        p['level2'] = section_info['level2']
                        all_primary.append(p)
                    
                    for a in result.get('add_on_codes', []):
                        a['pages'] = f"{batch_pages[0]}-{batch_pages[-1]}"
                        a['level1'] = section_info['level1']
                        a['level2'] = section_info['level2']
                        all_addons.append(a)
                        
        except Exception as e:
            print(f"Error on pages {batch_pages[0]}-{batch_pages[-1]}: {e}")
    
    # Deduplicate by code+modality (to keep separate modality entries)
    seen_primary = {}
    for p in all_primary:
        key = f"{p.get('code', '')}_{p.get('modality', '')}"
        if key and key not in seen_primary:
            seen_primary[key] = p
    
    seen_addon = {}
    for a in all_addons:
        key = f"{a.get('code', '')}_{a.get('modality', '')}"
        if key and key not in seen_addon:
            seen_addon[key] = a
    
    return list(seen_primary.values()), list(seen_addon.values())

print("Search function ready")

## 7. RUN SEARCH

In [None]:
print("="*70)
print(f"SEARCHING FOR: {AB_CODE} - {AB_DESC}")
print("="*70)

primary_codes, addon_codes = search_for_matches()

print("\n" + "="*70)
print("RESULTS")
print("="*70)

print(f"\n--- PRIMARY CODES ({len(primary_codes)}) ---")
print("These are the Ontario codes for the same clinical service:\n")
for p in primary_codes:
    fee = p.get('fee') if p.get('fee') else on_fees.get(p['code'], '?')
    print(f"{p['code']:6} | ${str(fee):>7} | {p.get('modality', '?'):10}")
    print(f"         Section: {p.get('level1', '')[:30]} > {p.get('level2', '')[:25]}")
    print(f"         {p.get('description', '')[:55]}")
    print()

print(f"\n--- ADD-ON CODES ({len(addon_codes)}) ---")
print("These can be billed IN ADDITION to primary codes:\n")
for a in addon_codes:
    fee = a.get('fee') if a.get('fee') else on_fees.get(a['code'], '?')
    links = ', '.join(a.get('links_to', [])) if a.get('links_to') else 'unspecified'
    modality = a.get('modality', 'both')
    print(f"{a['code']:6} | ${str(fee):>7} | {modality:10} | Links to: {links}")
    print(f"         Section: {a.get('level1', '')[:30]} > {a.get('level2', '')[:25]}")
    print(f"         {a.get('description', '')[:55]}")
    print(f"         Condition: {a.get('condition', '')[:45]}")
    print()

print("="*70)
print(f"Total cost: ${total_cost:.2f}")
print("="*70)

## 8. Save Results

In [None]:
# Build output with Level_1 and Level_2 columns
# Fee comes from GPT response (handles modality-specific fees)
results = []

for p in primary_codes:
    # Use fee from GPT if provided, else lookup
    fee = p.get('fee') if p.get('fee') else on_fees.get(p.get('code', ''), '')
    results.append({
        'AB_Code': AB_CODE,
        'AB_Description': AB_DESC,
        'AB_Fee': AB_FEE,
        'ON_Code': p.get('code', ''),
        'ON_Description': p.get('description', ''),
        'ON_Fee': fee,
        'Type': 'PRIMARY',
        'Modality': p.get('modality', ''),
        'Links_To': '',
        'Condition': '',
        'Reasoning': p.get('reasoning', ''),
        'Level_1_Section': p.get('level1', ''),
        'Level_2_Subsection': p.get('level2', ''),
        'Pages': p.get('pages', '')
    })

for a in addon_codes:
    # Use fee from GPT if provided, else lookup
    fee = a.get('fee') if a.get('fee') else on_fees.get(a.get('code', ''), '')
    results.append({
        'AB_Code': AB_CODE,
        'AB_Description': AB_DESC,
        'AB_Fee': AB_FEE,
        'ON_Code': a.get('code', ''),
        'ON_Description': a.get('description', ''),
        'ON_Fee': fee,
        'Type': 'ADD-ON',
        'Modality': a.get('modality', ''),
        'Links_To': ', '.join(a.get('links_to', [])) if a.get('links_to') else '',
        'Condition': a.get('condition', ''),
        'Reasoning': '',
        'Level_1_Section': a.get('level1', ''),
        'Level_2_Subsection': a.get('level2', ''),
        'Pages': a.get('pages', '')
    })

df = pd.DataFrame(results)
df.to_excel('crosswalk_telehealth_final.xlsx', index=False)
print(f"Saved {len(results)} results to crosswalk_telehealth_final.xlsx")

from google.colab import files
files.download('crosswalk_telehealth_final.xlsx')

## 9. Preview

In [None]:
df