In [73]:
import pandas as pd
import numpy as np
import simple_icd_10_cm as cm
import os, json, time
from tqdm import tqdm
from openai import OpenAI
from typing import List, Dict

In [74]:
# load, filter, and sample (n=10,000) the data
def load_sampled_mimic(path="processed_data/mimiciv_icd10.parquet", n=10_000, min_codes=5, max_codes=11):
    df = pd.read_parquet(path, columns=["text", "diagnosis_codes"])
    df = df[df["diagnosis_codes"].apply(lambda x: isinstance(x, (list, np.ndarray)) and min_codes <= len(x) < max_codes)]
    return df.sample(n=min(n, len(df)), random_state=42)

# Get the title of an ICD code
def get_icd_title(code, path="processed_data/icd10cm_dict.parquet"):
    df = pd.read_parquet(path)
    row = df.loc[df['icd_code'] == code]
    return row.iloc[0]['long_title'] if not row.empty else "Unknown"

# Create structured input format
def format_row(row, title_dict_path="processed_data/icd10cm_dict.parquet"):
    summary = row['text']
    gold = row['diagnosis_codes']

    out = []
    for code in gold:
        cat = code.split('.')[0]
        diffs = [d for d in cm.get_descendants(cat) if d != code and d not in gold]
        title = cm.get_description(code) if cm.is_valid_item(code) else get_icd_title(code, title_dict_path)
        diff_list = [{"code": d, "title": cm.get_description(d) if cm.is_valid_item(d) else get_icd_title(d, title_dict_path)} for d in diffs]
        out.append({"code": code, "title": title, "differential_codes": diff_list})

    return {"discharge_summary": summary, "icd_gold_standard": out}

# Loads the input file or the df and then processes and saves the input file
def process_and_save(path="processed_data/processed_inputs.jsonl",
                     mimic_path="processed_data/mimiciv_icd10.parquet", n=10_000, min_codes=5, max_codes=11):
    if os.path.exists(path):
        print(f"Loading saved data from {path}")
        with open(path) as f:
            return [json.loads(line) for line in f]
    df = load_sampled_mimic(mimic_path, n, min_codes, max_codes)
    inputs = [format_row(row) for _, row in tqdm(df.iterrows(), total=len(df))]
    with open(path, "w") as f:
        f.writelines(json.dumps(item) + "\n" for item in inputs)
    print(f"Saved {len(inputs)} entries to {path}")
    return inputs

In [75]:
inputs = process_and_save()

Loading saved data from processed_data/processed_inputs.jsonl


In [76]:
# ==== Init LLM Client ====
DS_api = 'your_api'
client = OpenAI(base_url="your_url", api_key=DS_api)
model = 'your_model_name"

In [77]:
# ==== Clean LLM Output ====
def clean_llm_output(output: str) -> str:
    if not isinstance(output, str):
        return None
    text = output.strip()
    if "```" in text:
        parts = [p.strip() for p in text.split("```") if p.strip()]
        text = next((p for p in parts if p.lower().startswith("json") or p.startswith("[")), text)
    if "json" in text.lower():
        text = text.lower().split("json", 1)[1].strip()
    start, end = text.find("["), text.rfind("]") + 1
    return text[start:end].strip() if start != -1 and end != -1 else None

# === Generate Prompt ===
def generate_icd_verification_prompt(discharge_summary: str, icd_gold_standard: list) -> str:
    icd_gold_json = json.dumps(icd_gold_standard, indent=2)
    return f"""You are a clinical coding expert.

You are given:
1. A patient's **discharge summary**.
2. A list of **ICD-10-CM codes** assigned to that patient.
3. For each code, a list of **differential ICD-10 codes** within the same category to help assess coding accuracy.

Your task is to evaluate each assigned ICD code **in the context of the discharge summary**.

---

### STEP 1: Evidence Check
- Determine whether the discharge summary supports assigning this ICD code.
- If supported, provide **direct quote(s)** from the text as evidence.
- If **no evidence**, respond with `"No evidence"` and skip to the next ICD code.

---

### STEP 2: Accuracy Check (only if evidence is found)
- Compare the **assigned ICD code** to its list of **differential codes**.
- Determine if the assigned code is the **most accurate** or if another code is more appropriate.
- Justify your decision based on the evidence.

---

### Expected JSON Output Format:
Return a list of results, one per ICD code, like below:

```json
[
  {{
    "code": "ICD_CODE",
    "title": "ICD Title",
    "evidence": ["Quoted sentence from discharge summary", "..."],
    "accuracy": {{
      "is_accurate": true,
      "better_alternative": null,
      "justification": "Why the gold standard is correct."
    }}
  }},
  {{
    "code": "ICD_CODE",
    "title": "ICD Title",
    "evidence": "No evidence",
    "accuracy": {{
      "is_accurate": null,
      "better_alternative": null,
      "justification": "Not applicable (no evidence)."
    }}
  }}
]

### INPUT

#### Discharge Summary:
{discharge_summary}

#### Assigned ICD Codes List (with differential options per code)
{icd_gold_json}

Use only the differential codes related to a specific ICD code when checking for accuracy.
If there is no supporting evidence in the summary, skip the accuracy check.
Return only the JSON output. Do not include explanations or commentary outside the JSON.
Output:
"""

### **Run Pipeline with Checkpointing & Logging**
def run_icd_verification_pipeline(inputs: List[Dict],
                                   checkpoint_path: str = "processed_data/icd_verification_ckpt.json",
                                   output_path: str = "processed_data/icd_verification_outputs.json"):
    # Load checkpoint
    if os.path.exists(checkpoint_path):
        with open(checkpoint_path) as f:
            checkpoint = json.load(f)
    else:
        checkpoint = {}

    results = []
    for idx, item in enumerate(tqdm(inputs)):
        if str(idx) in checkpoint:
            print(f"[‚úî] Skipping index {idx} (already processed)")
            results.append(checkpoint[str(idx)])
            continue

        try:
            prompt = generate_icd_verification_prompt(item["discharge_summary"], item["icd_gold_standard"])

            response = client.chat.completions.create(
                model="deepseek/deepseek-chat-v3-0324:free",
                messages=[{"role": "user", "content": prompt}]
            )

            raw_output = response.choices[0].message.content
            cleaned_output = clean_llm_output(raw_output)

            parsed = json.loads(cleaned_output)

            # === Logging status ===
            print(f"\n[üîç Index {idx}] ICD Codes:")
            for entry in parsed:
                print(f"- Code: {entry['code']} | Title: {entry['title']}")
                ev = entry['evidence']
                if ev == "No evidence":
                    print("  ‚õî No supporting evidence.")
                else:
                    print("  ‚úÖ Evidence found.")
                    acc = entry['accuracy']
                    if acc["is_accurate"]:
                        print("  üéØ Accurate code.")
                    else:
                        print(f"  ‚ùó Better alternative: {acc['better_alternative']}")

            # === Save checkpoint ===
            checkpoint[str(idx)] = parsed
            results.append(parsed)
            with open(checkpoint_path, "w") as f:
                json.dump(checkpoint, f)

        except Exception as e:
            print(f"[‚ùå Error] Index {idx}: {e}")
            time.sleep(2)
            continue
    return results

In [None]:
results = run_icd_verification_pipeline(inputs)