In [1]:
# import necessary libraries
import pandas as pd
import os
import textstat
from openai import OpenAI
import json
import re
import requests
from dotenv import load_dotenv

In [2]:
# import prompts 
from jh_pfx_prompts import example, icd10_example, baseline_zeroshot_prompt, single_fewshot_prompt, single_fewshot_icd10_labeling_prompt

In [3]:
os.environ['OPENAI_API_KEY'] = ''

In [4]:
# api key
OPENAI_API_KEY = os.getenv('OPENAI_API_KEY')
CLIENT = OpenAI(api_key = OPENAI_API_KEY)
OPENAI_MODEL = "gpt-4o"

In [5]:
#reading levels
PROFESSIONAL = "Professional"
COLLEGE_GRADUATE = "College Graduate"
COLLEGE = "College"
TENTH_TO_TWELTH_GRADE = "10th to 12th grade"
EIGTH_TO_NINTH_GRADE = "8th to 9th grade"
SEVENTH_GRADE = "7th grade"
SIXTH_GRADE = "6th grade"
FIFTH_GRADE = "5th grade"
N_A = "N/A"

In [6]:
# https://en.wikipedia.org/wiki/Flesch%E2%80%93Kincaid_readability_tests#Flesch_reading_ease
def map_reading_level(flesch_reading_ease):
    if flesch_reading_ease < 10:
        return PROFESSIONAL
    elif 10.0 <= flesch_reading_ease < 30.0:
        return COLLEGE_GRADUATE
    elif 30.0 <= flesch_reading_ease < 50.0:
        return COLLEGE
    elif 50.0 <= flesch_reading_ease < 60.0:
        return TENTH_TO_TWELTH_GRADE
    elif 60.0 <= flesch_reading_ease < 70.0:
        return EIGTH_TO_NINTH_GRADE
    elif 70.0 <= flesch_reading_ease < 80.0:
        return SEVENTH_GRADE
    elif 80.0 <= flesch_reading_ease < 90.0:
        return SIXTH_GRADE
    elif 90.0 <= flesch_reading_ease < 100.0:
        return FIFTH_GRADE 
    else:
        return N_A

In [7]:
# reading ease variables
fifth_grade = 95
sixth_grade = 85
seventh_grade = 75
eigth_and_ninth_grade = 65
tenth_to_twelfth_grade = 55
college = 40
college_graduate = 20
professional = 5

In [8]:
def adjust_difference(diff, threshold):
    """Adjust the readability difference based on the threshold."""
    if diff > threshold:
        return diff - threshold
    return 0

In [9]:
# import fewshot examples
df_fewshot = pd.read_csv('pfx_fewshot_examples_college.csv')

In [10]:
# import evaluation data 
df_eval = pd.read_csv('pfx_evaluation_data.csv')

In [11]:
def extract_json(openai_response):
    if openai_response:  # Ensure the response is not None
        # Directly search for JSON within the string response
        json_match = re.search(r'```.*?(\{.*?\}).*?```', openai_response, re.DOTALL)
        if json_match:
            json_str = json_match.group(1)  # Extract JSON-like content
            try:
                # Convert extracted string to a JSON object
                json_object = json.loads(json_str.replace('\n', ''))
                return json_object
            except json.JSONDecodeError as e:
                # Handle JSON decoding errors
                print("JSON decoding failed: ", e)
                return {}
        else:
            print("No JSON object found in the response.")
            return {}
    else:
        return None

In [12]:
def label_icd10s(pfx_outputs_json):
    labels = []
    for response in pfx_outputs_json:
        try:
            # Directly get the ICD10_code from the dictionary
            labels.append(response.get("ICD10_code", "Unknown"))
        except Exception as e:
            print(f"Error processing response: {e}")
            labels.append("Error")
    return labels

In [13]:
load_dotenv()

True

In [14]:
pfx_fewshot_examples = ""
for i, row in df_fewshot.iterrows():
    pfx_fewshot_examples += example.format(**row)

pfx_fewshot_outputs = []

for run in range(5):
    for i, row in df_eval.iloc[:1].iterrows():
        prompt = single_fewshot_prompt.format(
            Examples=pfx_fewshot_examples,
            Incidental_Finding=row["Incidental_Finding"],
            Reading_Level=TENTH_TO_TWELTH_GRADE
        )
        pfx_response = CLIENT.chat.completions.create(
            model=OPENAI_MODEL,
            temperature=0.0,
            messages=[
                {"role": "system", "content": "You are a medical doctor rephrasing and explaining medical terminology to a patient in an understandable manner."},
                {"role": "user", "content": prompt}
            ],
            stream=False,
        )
        # Corrected append
        pfx_fewshot_outputs.append(pfx_response.choices[0].message.content)



In [15]:
pfx_fewshot_outputs_json = list(map(extract_json, pfx_fewshot_outputs))

In [16]:
pfx_fewshot_outputs_json

[{'finding': 'White matter lesions',
  'ICD10_code': 'R90.89',
  'PFx': 'White matter lesions are areas in the brain that appear different on an MRI scan. These lesions are often found incidentally and can be associated with aging, migraines, or other conditions. Patients should know that while these lesions can be common, they are not always a cause for concern. In many cases, they do not cause symptoms and do not require treatment. However, if there are symptoms like memory problems or difficulty with balance, further evaluation may be needed to understand their significance and to discuss potential management strategies.',
  'PFx_ICD10_code': 'R90.89'},
 {'finding': 'White matter lesions',
  'ICD10_code': 'R90.89',
  'PFx': 'White matter lesions are areas in the brain that appear different on an MRI scan. These lesions are often found incidentally and can be associated with aging, migraines, or other conditions. Patients should know that while these lesions can be common, they are n

In [17]:
pfx_fewshot_output_df = pd.DataFrame(pfx_fewshot_outputs_json)

In [18]:
pfx_fewshot_output_df

Unnamed: 0,finding,ICD10_code,PFx,PFx_ICD10_code
0,White matter lesions,R90.89,White matter lesions are areas in the brain th...,R90.89
1,White matter lesions,R90.89,White matter lesions are areas in the brain th...,R90.89
2,White matter lesions,R90.89,White matter lesions are areas in the brain th...,R90.89
3,White matter lesions,R90.89,White matter lesions are areas in the brain th...,R90.89
4,White matter lesions,R90.89,White matter lesions are areas in the brain th...,R90.89


In [19]:
pfx_fewshot_outputs_icd10_labels = label_icd10s(pfx_fewshot_outputs_json)

In [20]:
pfx_fewshot_outputs_icd10_labels

['R90.89', 'R90.89', 'R90.89', 'R90.89', 'R90.89']

In [None]:
# Initialize variables
highest_grade = float('-inf')
best_response = None
grades_data = []  # List to temporarily store data for creating the DataFrame
desired_reading_ease = tenth_to_twelfth_grade

for index, response in enumerate(pfx_fewshot_outputs_json):
    # Create a temporary DataFrame for grading the current response
    temp_df = pd.DataFrame([response])  # Wrap response in a list to simulate grading for one response
    
    # Extract ICD-10 codes for the current response
    icd10_codes = pfx_fewshot_outputs_icd10_labels[index]  # Directly assign the string value
    temp_df["_0_icd10_codes"] = icd10_codes  # Assign single value to the single row
    
    # Apply grading logic for ICD-10 matches
    temp_df["_0_icd10_matches"] = temp_df.ICD10_code == temp_df["_0_icd10_codes"]
    temp_df["_0_pfx_icd10_matches"] = temp_df.ICD10_code == temp_df["PFx_ICD10_code"]
    temp_df["_0_flesch"] = temp_df["PFx"].apply(textstat.flesch_reading_ease)
    
    # Calculate accuracy scores
    accuracy_icd10_matches = sum(temp_df["_0_icd10_matches"]) / len(temp_df.index)
    accuracy_pfx_matches = sum(temp_df["_0_pfx_icd10_matches"]) / len(temp_df.index)
    
    # Calculate readability difference
    readability_score = temp_df["_0_flesch"].mean()
    readability_difference = abs(readability_score - desired_reading_ease)
    
    if desired_reading_ease >= 55:
        readability_difference = max(0, readability_difference - 5)
    elif desired_reading_ease >= 20:
        readability_difference = max(0, readability_difference - 10)
    
    # Compute the overall score using adjusted weights
    overall_score =  (accuracy_icd10_matches * 0.35) + (accuracy_pfx_matches * 0.35) - (readability_difference * 0.3)
    
    # Store the grades for later DataFrame creation
    grades_data.append({
        "response_index": index,
        "accuracy_agent_icd10": accuracy_icd10_matches,
        "accuracy_pfx_icd10": accuracy_pfx_matches,
        "readability_score": readability_score,
        "readability_difference": readability_difference,
        "overall_score": overall_score
    })
    
    # Check if this overall_score is the highest
    if overall_score > highest_grade:
        highest_grade = overall_score
        best_response = response

# Create the grades DataFrame from the collected data
grades = pd.DataFrame(grades_data)


In [45]:
grades

Unnamed: 0,response_index,accuracy_agent_icd10,accuracy_pfx_icd10,readability_difference,overall_score
0,0,1.0,1.0,0,0.7
1,1,1.0,1.0,0,0.7
2,2,1.0,1.0,0,0.7
3,3,1.0,1.0,0,0.7
4,4,1.0,1.0,0,0.7


In [47]:
best_response

{'finding': 'White matter lesions',
 'ICD10_code': 'R90.89',
 'PFx': 'White matter lesions are areas in the brain that appear different on an MRI scan. These lesions are often found incidentally and can be associated with aging, migraines, or other conditions. Patients should know that while these lesions can be common, they are not always a cause for concern. In many cases, they do not cause symptoms and do not require treatment. However, if there are symptoms like memory problems or difficulty with balance, further evaluation may be needed to understand their significance and to discuss potential management strategies.',
 'PFx_ICD10_code': 'R90.89'}