In [1]:
import pandas as pd
import os

from pipeline_functions import (
    preprocess_transcript,
    add_confidence_scores,
    get_icd10_codes_with_chat_history,
    extract_confidence_summary
)



In [2]:
MAX_TURNS = 50
openai_api_key = os.getenv("OPENAI_API_KEY")

df = pd.read_csv("~/Downloads/Test_Project_ICD10_Dataset.csv")
index = 3
print(df.columns)
print(df.reference_answer.iloc[index])

Index(['encounter_id', 'age', 'age_unit', 'sex', 'visit_reason',
       'reference_answer', 'transcript'],
      dtype='object')
J98.8 Wheezing-associated respiratory infection (WARI)
Z23 Encounter for immunization
J45.990 Exercise-induced RAD (reactive airway disease)


In [3]:
df_name = "test_run_with_confidence_flash2-5_v2.csv"
try:
    existing_df = pd.read_csv(df_name)
    entries = existing_df["entries"].tolist() if "entries" in existing_df.columns else [None] * len(df)
    confidences = existing_df["confidence_summary"].tolist() if "confidence_summary" in existing_df.columns else [None] * len(df)
except:
    entries = [None] * len(df)
    confidences = [None] * len(df)
    cleaned_transcripts = [None] * len(df)

# Find indices that need processing (either empty entries or missing confidence)
indices_to_process = []
for i in range(len(df)):
    if (entries[i] == "[]" or entries[i] is None or 
        confidences[i] is None or confidences[i] == "no_entries"):
        indices_to_process.append(i)

print(f"Found {len(indices_to_process)} entries to process: {indices_to_process}")

cleaned_transcripts = [None] * len(df)


# Process entries with full pipeline
for index in indices_to_process:
    try:
        print(f"Processing index {index} with full pipeline...")
        
        # Step 1: Get the cleaned transcript from preprocessing
        cleaned_transcript = preprocess_transcript(df.transcript.iloc[index], api_key=openai_api_key)
        
        print("Completed step 1")
        # Step 2: Get ICD-10 codes from the cleaned transcript
        entries_result, chat_history = get_icd10_codes_with_chat_history(cleaned_transcript, api_key=openai_api_key)
        
        print("Completed step 2")
        # Step 3: Add confidence scores
        scored_entries = add_confidence_scores(chat_history, entries_result, api_key=openai_api_key)
        print(scored_entries)
        print("Completed step 3")
        # Store ALL outputs back to the dataframe
        entries[index] = scored_entries
        confidences[index] = extract_confidence_summary(scored_entries)
        print(confidences[index])
        # NEW: Store the cleaned transcript back to the dataframe
        cleaned_transcripts[index] = cleaned_transcript
        
        print(f"Successfully processed index {index}")
        print(f"Confidence summary: {confidences[index]}")
        
    except Exception as e:
        print(f"Error processing index {index}: {e}")
        entries[index] = []
        confidences[index] = "processing_failed"
        cleaned_transcripts[index] = "preprocessing_failed"  # NEW

    # Save after each iteration
    temp_df = df.copy()
    temp_df["entries"] = entries
    temp_df["confidence_summary"] = confidences
    temp_df["cleaned_transcript"] = cleaned_transcripts  # NEW: Add cleaned transcript column
    temp_df.to_csv(df_name, index=False)
    print(f"Saved progress: {indices_to_process.index(index) + 1}/{len(indices_to_process)} entries processed")

print("Processing complete!")

Found 5 entries to process: [3, 7, 18, 21, 25]
Processing index 3 with full pipeline...
Response: ChatCompletion(id='dqRYaNPZFZC2qtsPgsK3oAs', choices=[Choice(finish_reason='stop', index=0, logprobs=None, message=ChatCompletionMessage(content='**Chief Complaint:**\nPersistent cough and exertional dyspnea (difficulty breathing) during swimming, worsening over the past two weeks.\n\n**Current Symptoms:**\n*   Chronic cough, especially over the last two weeks.\n*   Congestion, improved with humidifier use and air filter change.\n*   Dyspnea on exertion: Specifically during swim team (freestyle and breaststroke), leading to difficulty breathing and inability to "get air in" after 25 yards; this condition has worsened over the summer.\n*   Wheezing noted on physical examination.\n*   No fever, vomiting, or diarrhea.\n*   Eating well.\n*   Cough intermittently wakes patient at night (once last week), but generally sleeps through the night.\n\n**Medical History:**\n*   Family history of "Reac

In [4]:

scored_entries = add_confidence_scores(chat_history, entries_result, api_key=openai_api_key)