We'll create several patient cases. 

In [1]:
import pandas as pd
import sys
import os
sys.path.append(os.path.abspath('../..'))
import utils.prompts as prompts

In [4]:

# Read the test.csv file
test_df = pd.read_csv('train.csv')
dataset = 'KTAS-triage'
# Create a list to store patient cases and acuity levels
patient_cases = []
acuity_levels = []

# Process each row in the test.csv
for _, row in test_df.iterrows():
    # Format the row using natural serialization
    patient_case = prompts.format_row(row, dataset=dataset, serialization="natural")
    
    # Store the patient case and acuity level separately
    patient_cases.append(patient_case)
    acuity_levels.append(row['KTAS_expert'])

# Create a DataFrame with patient cases and acuity levels
result_df = pd.DataFrame({
    'patient_case': patient_cases,
    'acuity': acuity_levels
})

# Save the result to a CSV file
result_df.to_csv('patient_cases.csv', index=False)

In [5]:
import random
import json
import numpy as np
from tqdm import tqdm
import sys
sys.path.append('../..')
from utils.utils import query_llm

# Function to create a set of 6 patients with 5 having the same acuity and 1 having a different acuity
def create_patient_set(df, same_acuity, diff_acuity):
    # Get 5 patients with the same acuity
    same_acuity_patients = df[df['acuity'] == same_acuity].sample(5)
    
    # Get 1 patient with a different acuity
    diff_acuity_patient = df[df['acuity'] == diff_acuity].sample(1)
    
    # Combine the patients
    patient_set = pd.concat([same_acuity_patients, diff_acuity_patient])
    
    # Shuffle the patients
    patient_set = patient_set.sample(frac=1).reset_index(drop=True)
    
    # Find the index of the patient with different acuity
    diff_index = patient_set[patient_set['acuity'] == diff_acuity].index[0]
    
    return patient_set, diff_index

# Create multiple sets of patients
num_sets = 20
patient_sets = []

# Define possible acuity pairs (same_acuity, diff_acuity)
# We want diff_acuity to be 1 lower than same_acuity
acuity_pairs = [(2, 1), (3, 2), (4, 3), (5, 4)]

# Open a file to write JSONL
with open('multi_patient_cases.jsonl', 'w') as f:
    for _ in tqdm(range(num_sets)):
        # Randomly select an acuity pair
        same_acuity, diff_acuity = random.choice(acuity_pairs)
        
        # Create a patient set
        patient_set, diff_index = create_patient_set(result_df, same_acuity, diff_acuity)
        
        # Create a record for this set
        record = {
            'wuestion': 'Which patient is the most acutely ill and should be seen first?',
            'patient_cases': patient_set['patient_case'].tolist(),
            'correct_index': int(diff_index)
        }
        
        # Write the record as a JSON line
        f.write(json.dumps(record) + '\n')

print(f"Created {num_sets} sets of patients with 5 having the same acuity and 1 having a lower acuity.")
print(f"Data saved to 'multi_patient_cases.jsonl'")

# Display a sample set by reading back the first record
with open('multi_patient_cases.jsonl', 'r') as f:
    sample = json.loads(f.readline())
    
print(f"\nSample set:")
print(f"Correct index: {sample['correct_index']}")
print("Patient cases:")
for i, case in enumerate(sample['patient_cases']):
    print(f"\nPatient {i}:")
    print(case[:200] + "..." if len(case) > 200 else case)



100%|██████████| 20/20 [00:00<00:00, 1078.81it/s]

Created 20 sets of patients with 5 having the same acuity and 1 having a lower acuity.
Data saved to 'multi_patient_cases.jsonl'

Sample set:
Correct index: 4
Patient cases:

Patient 0:
A 68-year-old woman arrives at the emergency department with a chief complaint of 'vomiting'. She has temperature of 37.3°C, heart rate of 66.0 bpm, respiratory rate of 20.0 breaths/min, systolic bloo...

Patient 1:
A 35-year-old woman arrives at the emergency department via private vehicle with a chief complaint of 'epigastric pain'. She has temperature of 36.7°C, heart rate of 74.0 bpm, respiratory rate of 20.0...

Patient 2:
A 26-year-old woman who sustained an injury arrives at the emergency department via private vehicle with a chief complaint of 'laceration, chin'. She has temperature of 36.0°C, heart rate of 114.0 bpm...

Patient 3:
A 43-year-old man arrives at the emergency department with a chief complaint of 'headache'. He has temperature of 36.9°C, heart rate of 108.0 bpm, respiratory rate of


