In [1]:
# import necessary libraries
import pandas as pd
import os
import textstat
from openai import OpenAI
import json
import re
import requests
from dotenv import load_dotenv
import math
from IPython.display import FileLink

In [2]:
# import prompts 
from jh_pfx_prompts import example, icd10_example, baseline_zeroshot_prompt, single_fewshot_prompt, single_fewshot_icd10_labeling_prompt

In [3]:
os.environ['OPENAI_API_KEY'] = ''

In [4]:
# api key
OPENAI_API_KEY = os.getenv('OPENAI_API_KEY')
CLIENT = OpenAI(api_key = OPENAI_API_KEY)
OPENAI_MODEL = "gpt-4o"

In [5]:
#reading levels
PROFESSIONAL = "Professional"
COLLEGE_GRADUATE = "College Graduate"
COLLEGE = "College"
TENTH_TO_TWELTH_GRADE = "10th to 12th grade"
EIGTH_TO_NINTH_GRADE = "8th to 9th grade"
SEVENTH_GRADE = "7th grade"
SIXTH_GRADE = "6th grade"
FIFTH_GRADE = "5th grade"
N_A = "N/A"

In [6]:
# https://en.wikipedia.org/wiki/Flesch%E2%80%93Kincaid_readability_tests#Flesch_reading_ease
def map_reading_level(flesch_reading_ease):
    if flesch_reading_ease < 10:
        return PROFESSIONAL
    elif 10.0 <= flesch_reading_ease < 30.0:
        return COLLEGE_GRADUATE
    elif 30.0 <= flesch_reading_ease < 50.0:
        return COLLEGE
    elif 50.0 <= flesch_reading_ease < 60.0:
        return TENTH_TO_TWELTH_GRADE
    elif 60.0 <= flesch_reading_ease < 70.0:
        return EIGTH_TO_NINTH_GRADE
    elif 70.0 <= flesch_reading_ease < 80.0:
        return SEVENTH_GRADE
    elif 80.0 <= flesch_reading_ease < 90.0:
        return SIXTH_GRADE
    elif 90.0 <= flesch_reading_ease < 100.0:
        return FIFTH_GRADE 
    else:
        return N_A

In [7]:
# reading ease variables
fifth_grade = 95
sixth_grade = 85
seventh_grade = 75
eigth_and_ninth_grade = 65
tenth_to_twelfth_grade = 55
college = 40
college_graduate = 20
professional = 5

In [8]:
def adjust_difference(diff, threshold):
    """Adjust the readability difference based on the threshold."""
    if diff > threshold:
        return diff - threshold
    return 0

In [9]:
# import fewshot examples
df_fewshot = pd.read_csv('pfx_fewshot_examples_college.csv')

In [19]:
# import evaluation data 
df_eval = pd.read_csv('pfx_evaluation_data.csv', nrows = 2)

In [11]:
def extract_json(openai_response):
    if openai_response:  # Ensure the response is not None
        # Directly search for JSON within the string response
        json_match = re.search(r'```.*?(\{.*?\}).*?```', openai_response, re.DOTALL)
        if json_match:
            json_str = json_match.group(1)  # Extract JSON-like content
            try:
                # Convert extracted string to a JSON object
                json_object = json.loads(json_str.replace('\n', ''))
                return json_object
            except json.JSONDecodeError as e:
                # Handle JSON decoding errors
                print("JSON decoding failed: ", e)
                return {}
        else:
            print("No JSON object found in the response.")
            return {}
    else:
        return None

In [23]:
def label_icd10s(pfx_output):
    """
    Takes a single PFx response (string or JSON) and returns
    a labeled ICD-10 result as a Python dictionary (or object).
    """

    # Build up the few-shot examples for ICD-10 labeling
    pfx_icd10_fewshot_examples = ""
    for i, row in df_fewshot.iterrows():
        pfx_icd10_fewshot_examples += icd10_example.format(**row)

    # Generate the prompt for ICD-10 labeling
    # (Adjust the '{PFx}' if pfx_output is a dictionary with a specific key you need)
    prompt = single_fewshot_icd10_labeling_prompt.format(
        examples=pfx_icd10_fewshot_examples,
        PFx=pfx_output  # or PFx=pfx_output['key'] if needed
    )

    # Call the model to get ICD-10 codes
    pfx_icd10_response = CLIENT.chat.completions.create(
        model=OPENAI_MODEL,
        temperature=0.0,
        messages=[
            {
                "role": "system",
                "content": "You are an ICD10 medical coder for incidental findings. Always respond with a valid JSON object containing the ICD-10 code and its explanation."
            },
            {
                "role": "system",
                "content": prompt
            }
        ],
        stream=False,
    )

    # Extract the JSON structure (or dictionary) from the LLM response
    labeled_result = extract_json(pfx_icd10_response.choices[0].message.content)  # Accessing the message content

    return labeled_result


In [24]:
# Initialize the results DataFrame
results_df = pd.DataFrame(columns=["finding", "ICD10_code", "PFx", "PFx_ICD10_code"])

# Generate few-shot examples
pfx_fewshot_examples = ""
for i, row in df_fewshot.iterrows():
    pfx_fewshot_examples += example.format(**row)

# Generate PFx for each row in df_eval with 5 runs
for i, row in df_eval.iterrows():
    for run in range(5):  # Perform 5 runs
        # Format the prompt
        prompt = single_fewshot_prompt.format(
            Examples=pfx_fewshot_examples,
            Incidental_Finding=row["Incidental_Finding"],
            Reading_Level=SIXTH_GRADE
        )
        
        # Generate response from the client
        pfx_response = CLIENT.chat.completions.create(
            model=OPENAI_MODEL,
            temperature=0.0,
            messages=[
                {"role": "system", "content": "You are a medical doctor rephrasing and explaining medical terminology to a patient in an understandable manner."},
                {"role": "user", "content": prompt}
            ],
            stream=False,
        )
        
        # Extract JSON from the response
        extracted_response = extract_json(pfx_response.choices[0].message.content)
        
        # Create a new DataFrame for the current row
        new_row = pd.DataFrame({
            "finding": [row["Incidental_Finding"]],
            "ICD10_code": [row.get("ICD10_code", None)],  # Handle missing 'ICD10_code'
            "PFx": [extracted_response["PFx"]],  # Extracted explanation
            "PFx_ICD10_code": [extracted_response.get("PFx_ICD10_code", None)]  # Optional field
        })
        
        # Concatenate the new row to the results DataFrame
        results_df = pd.concat([results_df, new_row], ignore_index=True)


In [26]:
results_df

Unnamed: 0,finding,ICD10_code,PFx,PFx_ICD10_code
0,White matter lesions,R90.82,White matter lesions are small areas in the br...,R90.89
1,White matter lesions,R90.82,White matter lesions are small areas in the br...,R90.89
2,White matter lesions,R90.82,White matter lesions are small areas in the br...,R90.82
3,White matter lesions,R90.82,White matter lesions are small areas in the br...,R90.89
4,White matter lesions,R90.82,White matter lesions are small areas in the br...,R90.89
5,Arachnoid cyst,Q04.3,An arachnoid cyst is a fluid-filled sac that i...,Q04.6
6,Arachnoid cyst,Q04.3,An arachnoid cyst is a fluid-filled sac that i...,Q04.6
7,Arachnoid cyst,Q04.3,An arachnoid cyst is a fluid-filled sac that i...,Q04.6
8,Arachnoid cyst,Q04.3,An arachnoid cyst is a fluid-filled sac that i...,Q04.6
9,Arachnoid cyst,Q04.3,An arachnoid cyst is a fluid-filled sac that i...,Q04.6


In [27]:
# Create a new list to store the labeled ICD10 responses
labeled_icd10_responses = []

# Iterate over each response in results and apply label_icd10s functions 
for response in results_df['PFx']:
    labeled_icd10_responses.append(label_icd10s(response))

In [28]:
labeled_icd10_responses

[{'ICD10_code': 'R90.89'},
 {'ICD10_code': 'R90.89'},
 {'ICD10_code': 'R90.89'},
 {'ICD10_code': 'R90.89'},
 {'ICD10_code': 'R90.89'},
 {'ICD10_code': 'G93.0'},
 {'ICD10_code': 'Q04.6'},
 {'ICD10_code': 'Q04.6'},
 {'ICD10_code': 'Q04.6'},
 {'ICD10_code': 'Q04.6'}]

In [29]:
# Create lists to store the results
agent_icd10_codes = []
icd10_matches = []
pfx_icd10_matches = []
flesch_scores = []

agent_icd10_codes.extend([list(x.values())[0] if x else "" for x in labeled_icd10_responses])

for index, row in results_df.iterrows():
    # Compare to the "ICD10_code" in your DataFrame (if it exists)
    agent_icd10_code = agent_icd10_codes[index]
    icd10_match = (row["ICD10_code"] == agent_icd10_code)
    icd10_matches.append(icd10_match)

    # compare 
    pfx_icd10_match = (row["PFx_ICD10_code"] == row["ICD10_code"])
    pfx_icd10_matches.append(pfx_icd10_match)

    # Calculate the Flesch Reading Ease score
    flesch_score = textstat.flesch_reading_ease(row['PFx'])
    flesch_scores.append(flesch_score)

# Add the results to the DataFrame
results_df['_0_agent_icd10_codes'] = agent_icd10_codes
results_df['_0_icd10_matches'] = icd10_matches
results_df['_0_pfx_icd10_matches'] = pfx_icd10_matches
results_df['_0_flesch'] = flesch_scores

In [31]:
desired_reading_ease = sixth_grade

# Create lists to store the results
accuracy_icd10_matches_list = []
accuracy_pfx_matches_list = []
readability_difference_list = []
overall_score_list = []

# Iterate over each row in the DataFrame
for index, row in results_df.iterrows():
    # Calculate accuracy score
    accuracy_icd10_matches = row["_0_icd10_matches"]
    accuracy_pfx_matches = row["_0_pfx_icd10_matches"]
    flesch_score = row["_0_flesch"]

    # total number of icd10 matches
    total_icd10_matches = accuracy_icd10_matches + accuracy_pfx_matches

    # Adjust weights for overall score
    # Calculate readability score 
    readability_score = flesch_score
    readability_difference = abs(readability_score - desired_reading_ease)

    # Compute the overall score
    overall_score = total_icd10_matches * 0.8  + 0.2 * (1/(readability_difference + 1))

    # Append results to lists
    accuracy_icd10_matches_list.append(float(accuracy_icd10_matches))
    accuracy_pfx_matches_list.append(float(accuracy_pfx_matches))
    readability_difference_list.append(float(readability_difference))
    overall_score_list.append(float(overall_score))

# Create a DataFrame with the results
grades_data = {
    "accuracy_agent_icd10": accuracy_icd10_matches_list,
    "accuracy_pfx_icd10": accuracy_pfx_matches_list,
    "readability_difference": readability_difference_list,
    "overall_score": overall_score_list
}
grades = pd.DataFrame(grades_data)
results_df = pd.concat([results_df, grades], axis=1)

In [32]:
results_df

Unnamed: 0,finding,ICD10_code,PFx,PFx_ICD10_code,_0_agent_icd10_codes,_0_icd10_matches,_0_pfx_icd10_matches,_0_flesch,accuracy_agent_icd10,accuracy_pfx_icd10,readability_difference,overall_score
0,White matter lesions,R90.82,White matter lesions are small areas in the br...,R90.89,R90.89,False,False,69.31,0.0,0.0,15.69,0.011983
1,White matter lesions,R90.82,White matter lesions are small areas in the br...,R90.89,R90.89,False,False,58.62,0.0,0.0,26.38,0.007305
2,White matter lesions,R90.82,White matter lesions are small areas in the br...,R90.82,R90.89,False,True,71.44,0.0,1.0,13.56,0.813736
3,White matter lesions,R90.82,White matter lesions are small areas in the br...,R90.89,R90.89,False,False,70.53,0.0,0.0,14.47,0.012928
4,White matter lesions,R90.82,White matter lesions are small areas in the br...,R90.89,R90.89,False,False,80.62,0.0,0.0,4.38,0.037175
5,Arachnoid cyst,Q04.3,An arachnoid cyst is a fluid-filled sac that i...,Q04.6,G93.0,False,False,79.9,0.0,0.0,5.1,0.032787
6,Arachnoid cyst,Q04.3,An arachnoid cyst is a fluid-filled sac that i...,Q04.6,Q04.6,False,False,72.46,0.0,0.0,12.54,0.014771
7,Arachnoid cyst,Q04.3,An arachnoid cyst is a fluid-filled sac that i...,Q04.6,Q04.6,False,False,72.66,0.0,0.0,12.34,0.014993
8,Arachnoid cyst,Q04.3,An arachnoid cyst is a fluid-filled sac that i...,Q04.6,Q04.6,False,False,72.97,0.0,0.0,12.03,0.015349
9,Arachnoid cyst,Q04.3,An arachnoid cyst is a fluid-filled sac that i...,Q04.6,Q04.6,False,False,72.46,0.0,0.0,12.54,0.014771


In [None]:
results_df.to_csv('PFx_multiple_few.csv', index = false)

In [None]:
FileLink('PFx_multiple_few.csv')