In [78]:
# import necessary libraries
import pandas as pd
import os
import textstat
from openai import OpenAI
import json
import re
import requests
from dotenv import load_dotenv

In [79]:
# import prompts 
from jh_pfx_prompts import example, icd10_example, baseline_zeroshot_prompt, single_fewshot_icd10_labeling_prompt

In [80]:
os.environ['OPENAI_API_KEY'] = ''

In [81]:
# api key
OPENAI_API_KEY = os.getenv('OPENAI_API_KEY')
CLIENT = OpenAI(api_key = OPENAI_API_KEY)
OPENAI_MODEL = "gpt-4o"

In [82]:
#reading levels
PROFESSIONAL = "Professional"
COLLEGE_GRADUATE = "College Graduate"
COLLEGE = "College"
TENTH_TO_TWELTH_GRADE = "10th to 12th grade"
EIGTH_TO_NINTH_GRADE = "8th to 9th grade"
SEVENTH_GRADE = "7th grade"
SIXTH_GRADE = "6th grade"
FIFTH_GRADE = "5th grade"
N_A = "N/A"

In [83]:
# https://en.wikipedia.org/wiki/Flesch%E2%80%93Kincaid_readability_tests#Flesch_reading_ease
def map_reading_level(flesch_reading_ease):
    if flesch_reading_ease < 10:
        return PROFESSIONAL
    elif 10.0 <= flesch_reading_ease < 30.0:
        return COLLEGE_GRADUATE
    elif 30.0 <= flesch_reading_ease < 50.0:
        return COLLEGE
    elif 50.0 <= flesch_reading_ease < 60.0:
        return TENTH_TO_TWELTH_GRADE
    elif 60.0 <= flesch_reading_ease < 70.0:
        return EIGTH_TO_NINTH_GRADE
    elif 70.0 <= flesch_reading_ease < 80.0:
        return SEVENTH_GRADE
    elif 80.0 <= flesch_reading_ease < 90.0:
        return SIXTH_GRADE
    elif 90.0 <= flesch_reading_ease < 100.0:
        return FIFTH_GRADE 
    else:
        return N_A

In [84]:
# reading ease variables
fifth_grade = 95
sixth_grade = 85
seventh_grade = 75
eigth_and_ninth_grade = 65
tenth_to_twelfth_grade = 55
college = 40
college_graduate = 20
professional = 5

In [85]:
def adjust_difference(diff, threshold):
    """Adjust the readability difference based on the threshold."""
    if diff > threshold:
        return diff - threshold
    return 0

In [86]:
# import fewshot examples
df_fewshot = pd.read_csv('pfx_fewshot_examples_college.csv')

In [87]:
# import evaluation data 
df_eval = pd.read_csv('pfx_evaluation_data.csv')

In [88]:
# extract the json from openai
def extract_json(openai_response):
    if openai_response:  # Ensure the response is not None
        try:
            # Extract content from response object
            content = openai_response.message.content
            
            # Search for JSON within the content
            json_match = re.search(r'```json\n(.*?)\n```', content, re.DOTALL)
            if json_match:
                json_str = json_match.group(1)
                return json.loads(json_str)  # Parse JSON string to Python dict
            else:
                print("No JSON found in response content.")
                return None
        except AttributeError as e:
            print(f"Attribute error: {e}. Ensure the input is a valid response object.")
            return None
    else:
        return None

In [25]:
"""
# takes json object and generated icd10 labels for incidental findings 
def label_icd10s(pfx_outputs_json):
    pfx_icd10_fewshot_examples = ""
    for i, row in df_fewshot.iterrows():
        pfx_icd10_fewshot_examples += icd10_example.format(**row)
    pfx_icd10_codes = []
    pfx_icd10_response = None 
    for pfx_output in pfx_outputs_json :
        try:
            prompt = single_fewshot_icd10_labeling_prompt.format(examples = pfx_icd10_fewshot_examples, PFx = pfx_output['PFx'])
        except:
            print("ERROR: %S" % pfx_output)
            return
        pfx_icd10_response = CLIENT.chat.completions.create(
            model = OPENAI_MODEL,
            temperature = 0.0,
            messages = [
                {"role": "system", "content": "You are an ICD10 medical coder for incidental findings. "},
                {"role": "system", "content": prompt}
            ],
            stream = False,
        )
    pfx_icd10_codes.append(pfx_icd10_response.choices[0])
    return list(map(extract_json, pfx_icd10_codes))

    """

In [89]:
def label_icd10s(pfx_output):
    """
    Takes a single PFx response (string or JSON) and returns
    a labeled ICD-10 result as a Python dictionary (or object).
    """

    # Build up the few-shot examples for ICD-10 labeling
    pfx_icd10_fewshot_examples = ""
    for i, row in df_fewshot.iterrows():
        pfx_icd10_fewshot_examples += icd10_example.format(**row)

    # Generate the prompt for ICD-10 labeling
    # (Adjust the '{PFx}' if pfx_output is a dictionary with a specific key you need)
    prompt = single_fewshot_icd10_labeling_prompt.format(
        examples=pfx_icd10_fewshot_examples,
        PFx=pfx_output  # or PFx=pfx_output['key'] if needed
    )

    # Call the model to get ICD-10 codes
    pfx_icd10_response = CLIENT.chat.completions.create(
        model=OPENAI_MODEL,
        temperature=0.0,
        messages=[
            {
                "role": "system",
                "content": "You are an ICD10 medical coder for incidental findings."
            },
            {
                "role": "system",
                "content": prompt
            }
        ],
        stream=False,
    )

    # Extract the JSON structure (or dictionary) from the LLM response
    labeled_result = extract_json(pfx_icd10_response.choices[0])

    return labeled_result


USE THIS WHEN GENERATING PFx FOR A SINGLE INCIDENTAL FINDING

In [72]:
# gets zeroshot response 
pfx_zeroshot_outputs = []
for i, row in df_eval.iloc[24:25].iterrows():
        prompt = baseline_zeroshot_prompt.format(Incidental_Finding = row['Incidental_Finding'], Reading_Level = SIXTH_GRADE)
pfx_response = None 
pfx_response = CLIENT.chat.completions.create(
    model = OPENAI_MODEL,
    temperature = 0.0,
    messages = [
        {"role": "system", "content": "You are a md doctor rephrasing and explaining medical terminology to a patient in an understandable manor."},
        {"role": "system", "content": prompt}
    ],
    stream = False,
)

pfx_zeroshot_outputs.append(pfx_response.choices[0])

In [73]:
# converts to a readable format 
pfx_zeroshot_outputs_json = list(map(extract_json, pfx_zeroshot_outputs))

In [74]:
pfx_zeroshot_outputs_json

[{'finding': 'Pineal cyst',
  'ICD10_code': 'Q04.6',
  'PFx': "A pineal cyst is a small, fluid-filled sac in the pineal gland, which is a tiny part of your brain. Most of the time, these cysts are harmless and don't cause any symptoms. They are often found by accident when you have a brain scan for another reason. Think of it like a small bubble that doesn't usually cause any trouble. Doctors usually just keep an eye on it to make sure it doesn't grow or cause problems. It's important to know that having a pineal cyst is quite common and usually nothing to worry about.",
  'PFx_ICD10_code': 'R93.0'}]

In [75]:
pfx_zeroshot_output_df = pd.DataFrame(pfx_zeroshot_outputs_json)

In [76]:
pfx_zeroshot_output_df

Unnamed: 0,finding,ICD10_code,PFx,PFx_ICD10_code
0,Pineal cyst,Q04.6,"A pineal cyst is a small, fluid-filled sac in ...",R93.0


In [None]:
pfx_zeroshot_outputs_icd10_labels = label_icd10s(pfx_zeroshot_outputs_json)

In [None]:
pfx_zeroshot_output_df['_0_agent_icd10_codes'] = list(map(lambda x: list(x.values())[0] if x else "", pfx_zeroshot_outputs_icd10_labels))
pfx_zeroshot_output_df["_0_icd10_matches"]= pfx_zeroshot_output_df.ICD10_code == pfx_zeroshot_output_df._0_agent_icd10_codes
pfx_zeroshot_output_df["_0_pfx_icd10_matches"] = pfx_zeroshot_output_df.ICD10_code == pfx_zeroshot_output_df["PFx_ICD10_code"] 
pfx_zeroshot_output_df["_0_flesch"] = pfx_zeroshot_output_df['PFx'].apply(textstat.flesch_reading_ease)
pfx_zeroshot_output_df["_0_reading_level"] = pfx_zeroshot_output_df['_0_flesch'].apply(map_reading_level)
pfx_zeroshot_output_df["_0_flesch_kincaid"] = pfx_zeroshot_output_df['PFx'].apply(textstat.flesch_kincaid_grade)
pfx_zeroshot_output_df["_0_reading_level_matches"] = pfx_zeroshot_output_df['_0_reading_level'] == TENTH_TO_TWELTH_GRADE

In [None]:
desired_reading_ease = sixth_grade

# Calculate accuracy score
accuracy_icd10_matches = sum(pfx_zeroshot_output_df["_0_icd10_matches"]) / len(pfx_zeroshot_output_df.index)
accuracy_pfx_matches = sum(pfx_zeroshot_output_df["_0_pfx_icd10_matches"]) / len(pfx_zeroshot_output_df.index)
flesch_score = pfx_zeroshot_output_df["_0_flesch"]

# Adjust weights for overall score
# Calculate readability score 
readability_score = pfx_zeroshot_output_df["_0_flesch"]
readability_difference = abs(readability_score - desired_reading_ease)
if desired_reading_ease >= 55:
    # Adjust element-wise if readability_difference is a Series
    readability_difference = readability_difference.apply(lambda x: x - 5 if x > 5 else 0)
elif desired_reading_ease >= 20:
    readability_difference = readability_difference.apply(lambda x: x - 10 if x > 10 else 0)

readability_difference_p = readability_difference/10

# Compute the overall score
overall_score = (accuracy_icd10_matches * 0.35) + (accuracy_pfx_matches * 0.35) - (readability_difference_p * 0.3)

grades_data = []
grades_data.append({
    "accuracy_agent_icd10": float(accuracy_icd10_matches),  # Assuming accuracy is already a scalar
    "accuracy_pfx_icd10": float(accuracy_pfx_matches),
    "readability_difference": float(readability_difference.iloc[0]),  # Use .iloc[0] to extract scalar
    "overall_score": float(overall_score.iloc[0])  # Use .iloc[0] to extract scalar
}) 
grades = pd.DataFrame(grades_data)


In [None]:
flesch_score

In [None]:
grades

USE THIS FOR GENERATING PFx FOR ALL INCIDENTAL FINDINGS

In [114]:
# empty list to store results
results = pd.DataFrame(columns=["finding", "ICD10_code", "PFx", "PFx_ICD10_code"])

# generate each PFx for each row in df_eval
for i, row in df_eval.iterrows():
    prompt = baseline_zeroshot_prompt.format(Incidental_Finding=row['Incidental_Finding'], Reading_Level=SIXTH_GRADE)
    
    pfx_response = CLIENT.chat.completions.create(
        model=OPENAI_MODEL,
        temperature=0.0,
        messages=[
            {"role": "system", "content": "You are a md doctor rephrasing and explaining medical terminology to a patient in an understandable manner."},
            {"role": "system", "content": prompt}
        ],
        stream=False,
    )
    
    # Extract the JSON from the response
    extracted_response = extract_json(pfx_response.choices[0])
    
    # Append the result to the list
    results.loc[i] = extracted_response

pfx_zeroshot_output_all_df = pd.DataFrame(results)

In [115]:
pfx_zeroshot_output_all_df

Unnamed: 0,finding,ICD10_code,PFx,PFx_ICD10_code
0,White matter lesions,R90.89,White matter lesions are small changes in the ...,Z71.89
1,Arachnoid cyst,G93.0,An arachnoid cyst is like a small bubble fille...,Z03.89
2,Pituitary microadenoma,D35.2,"A pituitary microadenoma is a small, non-cance...",R68.89
3,Pineal cyst,Q04.6,"A pineal cyst is a small, fluid-filled sac in ...",R93.0


In [116]:
# Create a new list to store the labeled ICD10 responses
labeled_icd10_responses = []

# Iterate over each response in pfx_zeroshot_output_all_df and apply the label_icd10s function
for response in pfx_zeroshot_output_all_df['PFx']:
    labeled_icd10_responses.append(label_icd10s(response))

In [129]:
labeled_icd10_responses

[{'ICD10_code': 'R90.89'},
 {'ICD10_code': 'Q04.6'},
 {'ICD10_code': 'D35.2'},
 {'ICD10_code': 'Q04.8'}]

In [118]:
pfx_zeroshot_output_all_df.columns

Index(['finding', 'ICD10_code', 'PFx', 'PFx_ICD10_code'], dtype='object')

In [146]:
for i, item in enumerate(labeled_icd10_responses):
    print(f"Index {i}: {item}, Type: {type(item)}")

Index 0: {'ICD10_code': 'R90.89'}, Type: <class 'dict'>
Index 1: {'ICD10_code': 'Q04.6'}, Type: <class 'dict'>
Index 2: {'ICD10_code': 'D35.2'}, Type: <class 'dict'>
Index 3: {'ICD10_code': 'Q04.8'}, Type: <class 'dict'>


In [147]:
print(f"Last element: {labeled_icd10_responses[-1]}, Type: {type(labeled_icd10_responses[-1])}")

Last element: {'ICD10_code': 'Q04.8'}, Type: <class 'dict'>


In [154]:
# Create lists to store the results
agent_icd10_codes = []
icd10_matches = []
pfx_icd10_matches = []
flesch_scores = []

agent_icd10_codes.extend([list(x.values())[0] if x else "" for x in labeled_icd10_responses])

for index, row in pfx_zeroshot_output_all_df.iterrows():
    # Compare to the "ICD10_code" in your DataFrame (if it exists)
    icd10_match = (row["ICD10_code"] == agent_icd10_code)
    icd10_matches.append(icd10_match)

    # compare 
    pfx_icd10_match = (row["PFx_ICD10_code"] == row["ICD10_code"])
    pfx_icd10_matches.append(pfx_icd10_match)

    # Calculate the Flesch Reading Ease score
    flesch_score = textstat.flesch_reading_ease(row['PFx'])
    flesch_scores.append(flesch_score)

# Add the results to the DataFrame
pfx_zeroshot_output_all_df['_0_agent_icd10_codes'] = agent_icd10_codes
pfx_zeroshot_output_all_df['_0_icd10_matches'] = icd10_matches
pfx_zeroshot_output_all_df['_0_pfx_icd10_matches'] = pfx_icd10_matches
pfx_zeroshot_output_all_df['_0_flesch'] = flesch_scores

Number of rows in DataFrame:4
Length of agent_icd10_codes: 4
Length of pfx_icd10_matches: 4
Length of flesch scores: 4
['R90.89', 'Q04.6', 'D35.2', 'Q04.8']


In [155]:
desired_reading_ease = sixth_grade

# Create lists to store the results
accuracy_icd10_matches_list = []
accuracy_pfx_matches_list = []
readability_difference_list = []
overall_score_list = []

# Iterate over each row in the DataFrame
for index, row in pfx_zeroshot_output_all_df.iterrows():
    # Calculate accuracy score
    accuracy_icd10_matches = row["_0_icd10_matches"]
    accuracy_pfx_matches = row["_0_pfx_icd10_matches"]
    flesch_score = row["_0_flesch"]

    # Adjust weights for overall score
    # Calculate readability score 
    readability_score = flesch_score
    readability_difference = abs(readability_score - desired_reading_ease)
    if desired_reading_ease >= 55:
        readability_difference = readability_difference - 5 if readability_difference > 5 else 0
    elif desired_reading_ease >= 20:
        readability_difference = readability_difference - 10 if readability_difference > 10 else 0

    readability_difference_p = readability_difference / 10

    # Compute the overall score
    overall_score = (accuracy_icd10_matches * 0.35) + (accuracy_pfx_matches * 0.35) - (readability_difference_p * 0.3)

    # Append results to lists
    accuracy_icd10_matches_list.append(float(accuracy_icd10_matches))
    accuracy_pfx_matches_list.append(float(accuracy_pfx_matches))
    readability_difference_list.append(float(readability_difference))
    overall_score_list.append(float(overall_score))

# Create a DataFrame with the results
grades_data = {
    "accuracy_agent_icd10": accuracy_icd10_matches_list,
    "accuracy_pfx_icd10": accuracy_pfx_matches_list,
    "readability_difference": readability_difference_list,
    "overall_score": overall_score_list
}
grades = pd.DataFrame(grades_data)
pfx_zeroshot_output_all_df = pd.concat([pfx_zeroshot_output_all_df, grades], axis=1)


In [156]:
pfx_zeroshot_output_all_df.head()

Unnamed: 0,finding,ICD10_code,PFx,PFx_ICD10_code,_0_agent_icd10_codes,_0_icd10_matches,_0_pfx_icd10_matches,_0_flesch,accuracy_agent_icd10,accuracy_pfx_icd10,readability_difference,overall_score
0,White matter lesions,R90.89,White matter lesions are small changes in the ...,Z71.89,R90.89,True,False,77.37,1.0,0.0,2.63,0.2711
1,Arachnoid cyst,G93.0,An arachnoid cyst is like a small bubble fille...,Z03.89,Q04.6,False,False,81.33,0.0,0.0,0.0,0.0
2,Pituitary microadenoma,D35.2,"A pituitary microadenoma is a small, non-cance...",R68.89,D35.2,False,False,78.38,0.0,0.0,1.62,-0.0486
3,Pineal cyst,Q04.6,"A pineal cyst is a small, fluid-filled sac in ...",R93.0,Q04.8,False,False,79.19,0.0,0.0,0.81,-0.0243


In [None]:
# pfx_zeroshot_output_all_df.to_csv('PFx.csv', index = false)