In [447]:
import pickle
from openai import OpenAI
import pandas as pd
import random

client = OpenAI(api_key="***")

In [448]:
"""Load data from results folder"""
# Define the path to the results folder
results_folder = 'results'

# Open the pkl file and load its content
with open("./results/tst_dict.pkl", 'rb') as file:
    tst_dict = pickle.load(file)

In [458]:
def get_fid_dict(fid, tr_tst_dict):
    fid_dict = {key: {fid: value[fid]} for key, value in tr_tst_dict.items() if fid in value.keys()}
    return fid_dict

def generate_patient_report(patient_data):
    report_sections = []    
    for category, instances in patient_data.items():
        if category in ['cmr']: #, 'cmr', 'pt_status_event_update'
            section = f" {category.replace('_', ' ').title()} \n"
            for patient_id, events in instances.items():
                for event_id, event_data in events.items():
                    if event_id != 1:
                        continue
                    # section += f"\n  Instance {event_id}:\n"
                    for data_type, details in event_data.items():
                        if data_type != 'metadata':
                            for key, value in details.items():                                
                                if 'Patient ID' in key:
                                    section += f" {key}: {value}"#\n"
                                if 'bsa' in key.lower():
                                    continue
                                if str(value) == 'nan':
                                    continue
                                if 'patient height' in key.lower():
                                    section += f" {key} {value}\n"
                                    height = value
                                if 'age at cmr' in key.lower(): 
                                    section += f" {key} {round(value/365, 2)}\n"
                                    age_at_cmr = value
                                if 'patient weight' in key.lower():
                                    weight = value
                                section += f" {key} {value}\n"
            report_sections.append(section)
    return "\n".join(report_sections), weight, height, age_at_cmr

def fetch_weight_ranges(weight, weight_range_perc=0.4):  
    """Generate weight ranges based on a given weight."""
    range_step = weight_range_perc * weight
    min_range = weight - range_step
    max_range = weight + range_step
    num_ranges = 3
    step_size = (max_range - min_range) / num_ranges
    ranges = [
        str(i + 1) + ". " + str((round(min_range + i * step_size, 1),
                                 round(min_range + (i + 1) * step_size, 1)))
        for i in range(num_ranges)
    ]
    return ranges

# Dynamically select a "correct" range for each example
def assign_correct_range(weight_ranges):
    """Randomly assign a correct range."""
    correct_range = random.randint(1, len(weight_ranges))  # Random range between 1 and 5
    correct_range_text = f"Range {correct_range}"
    return correct_range, correct_range_text

In [459]:
# Get all patient keys
patient_keys = list(tst_dict['cmr'].keys())

# Ensure there are enough patients to form 50 unique triplets
if len(patient_keys) < 60:
    raise ValueError("Not enough patients to form 10 unique triplets.")

# Generate 10 unique sets of triplets
triplets = [random.sample(patient_keys, 3) for _ in range(20)]

output_report = {'prompt_num': [], 'fids': [], 'age_at_cmr': [], 'heights': [],
                 'weights': [], 'weight_bands': [], 'response': [], 'real_answer': []}

In [430]:
"""Creating report for triplets of pts"""
for triple in triplets:
    fid_dicts = []
    for fid in triple:
        fid_dicts.append(get_fid_dict(fid, tst_dict))
    
    fid_report1, weight1, height1, age1 = generate_patient_report(fid_dicts[0])
    fid_report2, weight2, _, _ = generate_patient_report(fid_dicts[1])
    fid_report3, weight3, _, _ = generate_patient_report(fid_dicts[2])
    
    output_report['prompt_num'].extend(["1", "2", "3"])
    output_report['fids'].extend([triple, triple, triple])
    output_report['age_at_cmr'].extend([age1, age1, age1])
    output_report['heights'].extend([height1, height1, height1]) 
    output_report['weights'].extend([weight1, weight1, weight1])    
    
    fid1_ranges = fetch_weight_ranges(weight1)
    fid2_ranges = fetch_weight_ranges(weight2)
    fid3_ranges = fetch_weight_ranges(weight3)
    
    # Assign random correct ranges
    fid1_correct_range, fid1_correct_text = assign_correct_range(fid1_ranges)
    fid2_correct_range, fid2_correct_text = assign_correct_range(fid2_ranges)
    fid3_correct_range, fid3_correct_text = assign_correct_range(fid3_ranges)
    
    # Convert ranges to formatted strings for the prompt
    fid1_range_str = " ".join(fid1_ranges)
    fid2_range_str = " ".join(fid2_ranges)
    fid3_range_str = " ".join(fid3_ranges)
    
    # Create the prompt
    prompt_type1 = (
        f"You are given a report with a patient's age (in years) and height (in centimeters). "
        f"Your task is to predict the patient's weight (in kilograms) for this patient {fid_report1}:\n"
    )
    
    prompt_type2 = (
        f"You are given a report with a patient's age (in years) and height (in centimeters). "
        f"Your task is to predict the patient's weight (in kilograms) by selecting one of the given ranges for this patient: {fid_report1}:\n"
        f"What is the correct weight range?\n\n{fid1_range_str}"
    )
    
    prompt_type3 = (
        f"You are given a report with a patient's age (in years) and height (in centimeters). "
        f"Your task is to predict the patient's weight (in kilograms) by selecting one of the given ranges:\n\n"
        f"Examples:\n"
        f"1. For this patient: {fid_report2}\n"
        f"   - Weight ranges: {fid2_range_str}\n"
        f"   Answer: {fid2_correct_text}\n\n"
        f"2. For this patient {fid_report3}:\n"
        f"   - Weight ranges: {fid3_range_str}\n"
        f"   Answer: {fid3_correct_text}\n\n"
        f"Now for this patient {fid_report1}:\n"
        f"What is the correct weight range?\n\n{fid1_range_str}"
    )
    
    chat_completion1 = client.chat.completions.create(
    model="gpt-4",
    messages=[{"role": "user", "content": prompt_type1}],
    stream=False,
    )
    
    chat_completion2 = client.chat.completions.create(
        model="gpt-4",
        messages=[{"role": "user", "content": prompt_type2}],
        stream=False,
    )
    
    chat_completion3 = client.chat.completions.create(
        model="gpt-4",
        messages=[{"role": "user", "content": prompt_type3}],
        stream=False,
    )
    
    resp1 = chat_completion1.choices[0].to_dict()   
    resp2 = chat_completion2.choices[0].to_dict()
    resp3 = chat_completion3.choices[0].to_dict()
    
    output_report['weight_bands'].extend([fid1_range_str, fid1_range_str, fid1_range_str])
    output_report['response'].extend([resp1['message']['content'], resp2['message']['content'],  resp3['message']['content']])
    output_report['real_answer'].extend([fid1_correct_text, fid1_correct_text, fid1_correct_text])

In [432]:
# pd.DataFrame(output_report).to_csv('chatgpt4_reviews.csv', index=False)

In [463]:
"""After manual analysis of results, display results"""
results = pd.read_csv('chatgpt4_reviews.csv')

results_g1 = results[results['prompt_num'] == 1]
results_g2 = results[results['prompt_num'] == 2]
results_g3 = results[results['prompt_num'] == 3]

"""After manual analysis of results, display results"""
results = pd.read_csv('chatgpt4_reviews.csv')

results_g1 = results[results['prompt_num'] == 1]
results_g2 = results[results['prompt_num'] == 2]
results_g3 = results[results['prompt_num'] == 3]

g1_acc = results_g1[results_g1['human_assessment'] == 1].shape[0] / results_g1.shape[0]
g2_acc = results_g2[results_g2['human_assessment'] == 1].shape[0] / results_g2.shape[0]
g3_acc = results_g3[results_g3['human_assessment'] == 1].shape[0] / results_g3.shape[0]

g1_venture = results_g1[results_g1['ventured_guess'] == 1].shape[0] / results_g1.shape[0]
g2_venture = results_g2[results_g2['ventured_guess'] == 1].shape[0] / results_g2.shape[0]
g3_venture = results_g3[results_g3['ventured_guess'] == 1].shape[0] / results_g3.shape[0]

g1_acc_per_venture = results_g1[results_g1['human_assessment'] == 1].shape[0] / results_g1[results_g1['ventured_guess'] == 1].shape[0]
g2_acc_per_venture = results_g2[results_g2['human_assessment'] == 1].shape[0] / results_g2[results_g2['ventured_guess'] == 1].shape[0]
g3_acc_per_venture = results_g3[results_g3['human_assessment'] == 1].shape[0] / results_g3[results_g3['ventured_guess'] == 1].shape[0]

print(f"Results:\nprompt 1 acc: {g2_acc:.2f}\nprompt 2 acc: {g1_acc:.2f}\nprompt 3 acc: {g3_acc:.2f}\n")
print(f"prompt 1 attempt perc: {g2_venture:.2f}\nprompt 2 attempt perc: {g1_venture:.2f}\nprompt 3 attempt perc: {g3_venture:.2f}\n")
print(f"prompt 1 acc per attempt: {g2_acc_per_venture:.2f}\nprompt 2 acc per attempt: {g1_acc_per_venture:.2f}\nprompt 3 acc per attempt: {g3_acc_per_venture:.2f}")
# results.columns

Results:
prompt 1 acc: 0.00
prompt 2 acc: 0.05
prompt 3 acc: 0.45

prompt 1 attempt perc: 0.35
prompt 2 attempt perc: 0.05
prompt 3 attempt perc: 1.00

prompt 1 acc per attempt: 0.00
prompt 2 acc per attempt: 1.00
prompt 3 acc per attempt: 0.45
