In [1]:
# import necessary libraries
import pandas as pd
import os
import textstat
from openai import OpenAI
import json
import re
import requests
from dotenv import load_dotenv

In [2]:
# import prompts 
from jh_pfx_prompts import example, icd10_example, baseline_zeroshot_prompt, single_fewshot_prompt, single_fewshot_icd10_labeling_prompt

In [None]:
os.environ['OPENAI_API_KEY'] = ''

In [4]:
# api key
OPENAI_API_KEY = os.getenv('OPENAI_API_KEY')
CLIENT = OpenAI(api_key = OPENAI_API_KEY)
OPENAI_MODEL = "gpt-4o"

In [5]:
#reading levels
PROFESSIONAL = "Professional"
COLLEGE_GRADUATE = "College Graduate"
COLLEGE = "College"
TENTH_TO_TWELTH_GRADE = "10th to 12th grade"
EIGTH_TO_NINTH_GRADE = "8th to 9th grade"
SEVENTH_GRADE = "7th grade"
SIXTH_GRADE = "6th grade"
FIFTH_GRADE = "5th grade"
N_A = "N/A"

In [6]:
# https://en.wikipedia.org/wiki/Flesch%E2%80%93Kincaid_readability_tests#Flesch_reading_ease
def map_reading_level(flesch_reading_ease):
    if flesch_reading_ease < 10:
        return PROFESSIONAL
    elif 10.0 <= flesch_reading_ease < 30.0:
        return COLLEGE_GRADUATE
    elif 30.0 <= flesch_reading_ease < 50.0:
        return COLLEGE
    elif 50.0 <= flesch_reading_ease < 60.0:
        return TENTH_TO_TWELTH_GRADE
    elif 60.0 <= flesch_reading_ease < 70.0:
        return EIGTH_TO_NINTH_GRADE
    elif 70.0 <= flesch_reading_ease < 80.0:
        return SEVENTH_GRADE
    elif 80.0 <= flesch_reading_ease < 90.0:
        return SIXTH_GRADE
    elif 90.0 <= flesch_reading_ease < 100.0:
        return FIFTH_GRADE 
    else:
        return N_A

In [7]:
# import fewshot examples
df_fewshot = pd.read_csv('pfx_fewshot_examples_college.csv')

In [8]:
# import evaluation data 
df_eval = pd.read_csv('pfx_evaluation_data.csv')

In [9]:
def extract_json(openai_response):
    if openai_response:  # Ensure the response is not None
        # Directly search for JSON within the string response
        json_match = re.search(r'```.*?(\{.*?\}).*?```', openai_response, re.DOTALL)
        if json_match:
            json_str = json_match.group(1)  # Extract JSON-like content
            try:
                # Convert extracted string to a JSON object
                json_object = json.loads(json_str.replace('\n', ''))
                return json_object
            except json.JSONDecodeError as e:
                # Handle JSON decoding errors
                print("JSON decoding failed: ", e)
                return {}
        else:
            print("No JSON object found in the response.")
            return {}
    else:
        return None

In [10]:
def label_icd10s(pfx_outputs_json):
    labels = []
    for response in pfx_outputs_json:
        try:
            # Directly get the ICD10_code from the dictionary
            labels.append(response.get("ICD10_code", "Unknown"))
        except Exception as e:
            print(f"Error processing response: {e}")
            labels.append("Error")
    return labels

In [11]:
load_dotenv()

True

In [12]:
pfx_fewshot_examples = ""
for i, row in df_fewshot.iterrows():
    pfx_fewshot_examples += example.format(**row)

pfx_fewshot_outputs = []

for run in range(5):
    for i, row in df_eval.iloc[:1].iterrows():
        prompt = single_fewshot_prompt.format(
            Examples=pfx_fewshot_examples,
            Incidental_Finding=row["Incidental_Finding"],
            Reading_Level=TENTH_TO_TWELTH_GRADE
        )
        pfx_response = CLIENT.chat.completions.create(
            model=OPENAI_MODEL,
            temperature=0.0,
            messages=[
                {"role": "system", "content": "You are a medical doctor rephrasing and explaining medical terminology to a patient in an understandable manner."},
                {"role": "user", "content": prompt}
            ],
            stream=False,
        )
        # Corrected append
        pfx_fewshot_outputs.append(pfx_response.choices[0].message.content)



In [13]:
pfx_fewshot_outputs_json = list(map(extract_json, pfx_fewshot_outputs))

In [15]:
pfx_fewshot_outputs_json

[{'finding': 'White matter lesions',
  'ICD10_code': 'R90.82',
  'PFx': "White matter lesions are areas in the brain that appear different on an MRI scan. These changes are often found incidentally and can be related to a variety of factors, including aging, high blood pressure, or other medical conditions. It's important to know that these lesions are common and often do not cause any symptoms. However, in some cases, they might be associated with issues like memory problems or balance difficulties. If you have concerns or notice any new symptoms, it's a good idea to discuss them with your healthcare provider to understand their significance and any potential next steps.",
  'PFx_ICD10_code': 'R90.82'},
 {'finding': 'White matter lesions',
  'ICD10_code': 'R90.82',
  'PFx': "White matter lesions are areas in the brain that appear different on an MRI scan. These changes are often found incidentally and can be related to various factors, including aging, high blood pressure, or other me

In [16]:
pfx_fewshot_output_df = pd.DataFrame(pfx_fewshot_outputs_json)

In [17]:
pfx_fewshot_output_df

Unnamed: 0,finding,ICD10_code,PFx,PFx_ICD10_code
0,White matter lesions,R90.82,White matter lesions are areas in the brain th...,R90.82
1,White matter lesions,R90.82,White matter lesions are areas in the brain th...,R90.82
2,White matter lesions,R90.82,White matter lesions are areas in the brain th...,R90.82
3,White matter lesions,R90.89,White matter lesions are areas in the brain th...,R90.89
4,White matter lesions,R90.82,White matter lesions are areas in the brain th...,R90.82


In [18]:
pfx_fewshot_outputs_icd10_labels = label_icd10s(pfx_fewshot_outputs_json)

In [19]:
pfx_fewshot_outputs_icd10_labels

['R90.82', 'R90.82', 'R90.82', 'R90.89', 'R90.82']

In [59]:
# Initialize variables
highest_grade = float('-inf')
best_response = None
grades_data = []  # List to temporarily store data for creating the DataFrame

for index, response in enumerate(pfx_fewshot_outputs_json):
    # Create a temporary DataFrame for grading the current response
    temp_df = pd.DataFrame([response])  # Wrap response in a list to simulate grading for one response
    
    # Extract ICD-10 codes for the current response
    icd10_codes = pfx_fewshot_outputs_icd10_labels[index]  # Directly assign the string value
    temp_df["_0_icd10_codes"] = icd10_codes  # Assign single value to the single row
    
    # Apply remaining grading logic
    temp_df["_0_icd10_matches"] = temp_df.ICD10_code == temp_df["_0_icd10_codes"]
    temp_df["_0_flesch_kincaid"] = temp_df["PFx"].apply(textstat.flesch_kincaid_grade)
    
    # Adjust readability score without acceptable range
    target_grade_level = 6.0  # Desired readability grade level
    temp_df["_0_readability"] = temp_df["_0_flesch_kincaid"].apply(
        lambda x: max(0, 1 - abs(x - target_grade_level) / target_grade_level)
    )
    
    # Calculate accuracy score
    accuracy = sum(temp_df["_0_icd10_matches"]) / len(temp_df.index)
    readability = temp_df["_0_readability"].mean()  # Average readability score
    
    # Compute the overall score
    overall_score = (readability * 0.3) + (accuracy * 0.7)
    
    # Store the grades for later DataFrame creation
    grades_data.append({
        "response_index": index,
        "accuracy": accuracy,
        "readability": readability,
        "overall_score": overall_score
    })
    
    # Check if this overall_score is the highest
    if overall_score > highest_grade:
        highest_grade = overall_score
        best_response = response

# Create the grades DataFrame from the collected data
grades = pd.DataFrame(grades_data)


In [60]:
print("Response ICD10_code:", temp_df["ICD10_code"].values)
print("_0_icd10_codes:", temp_df["_0_icd10_codes"].values)

Response ICD10_code: ['R90.82']
_0_icd10_codes: ['R90.82']


In [61]:
print("pfx_fewshot_outputs_icd10_labels[index]:", pfx_fewshot_outputs_icd10_labels[index])

pfx_fewshot_outputs_icd10_labels[index]: R90.82


In [62]:
print("Best Response:", best_response)
print("Highest Grade:", highest_grade)

Best Response: {'finding': 'White matter lesions', 'ICD10_code': 'R90.89', 'PFx': "White matter lesions are areas in the brain that appear different on an MRI scan. These changes are often found incidentally, meaning they were not the reason for the scan. They can be associated with aging, high blood pressure, or other conditions. Most of the time, these lesions do not cause symptoms and do not require treatment. However, it's important to maintain a healthy lifestyle, including managing blood pressure and cholesterol, to support brain health. If you experience symptoms like memory problems or balance issues, further evaluation may be needed.", 'PFx_ICD10_code': 'R90.89'}
Highest Grade: 0.845


In [63]:
grades

Unnamed: 0,response_index,accuracy,readability,overall_score
0,0,1.0,0.15,0.745
1,1,1.0,0.05,0.715
2,2,1.0,0.05,0.715
3,3,1.0,0.483333,0.845
4,4,1.0,0.066667,0.72
