CT Protocol Automation Analysis

This notebook processes patient data and generates automated CT protocols using OpenAI API

In [1]:
import pandas as pd
from utils import load_data, generate_protocol_recommendations


In [2]:
import os
from config import (
    INPUT_DATA_PATH, 
    OUTPUT_DATA_PATH, 
    OPENAI_API_KEY,
    MODEL_NAME
)
import openai
import json
from tqdm import tqdm

In [3]:
OPENAI_API_KEY = os.getenv('OPENAI_API_KEY')

In [4]:
# Load the data and take first 5 rows, skipping empty columns
test_data = pd.read_csv('data/Data-Extraction-Table.csv', encoding='latin-1', skiprows=1, usecols=lambda column: column.strip() != '')

# Print column names to see what we're working with
print("Available columns:", test_data.columns.tolist())

# Now let's process these rows
results = []
for idx, row in test_data.iterrows():
    try:
        # Extract eGFR, handle potential missing/invalid values
        egfr_value = row.get('eGFR (mL/min)')

       # Define no data variants
        no_data_variants = ["no data", "No data", "NO DATA", "No Data", ""]

        if pd.isna(egfr_value) or egfr_value in no_data_variants:
            egfr = "no data"
        else:
            # Handle ">90" string case
            if isinstance(egfr_value, str) and egfr_value.strip() == ">90":
                egfr = 90
            else:
                # Convert to float but don't cap values greater than 90
                egfr = float(egfr_value)
        
        # Convert row to dict, excluding NaN values
        patient_info = {
            'Study_ID': row['Study ID #'],
            'Location': row['Location [IP, ER, OP]'],
            'Age': row['Age'],
            'Sex': row['Sex'],
            'CT_Exam': row['CT Exam Requested'],
            'Clinical_Info': row['Clinical Information/Reason for Scan'],
            'Prior_Reaction': row['Previous adverse reaction to contrast (if YES, what type)'],
            'eGFR': egfr,
            'Creatinine': row['Creatinine (umol/L)']
        }
        
        # Generate recommendations
        completions = generate_protocol_recommendations(patient_info, egfr)
        
        # Store results
        results.append({
            'Study_ID': row['Study ID #'],
            'Priority': completions['priority'],
            'Protocol': completions['protocol'],
            'IV_Contrast': completions['iv_contrast'],
            'Oral_Contrast': completions['oral_contrast']
        })
        
    except Exception as e:
        print(f"\nError processing row {idx}: {str(e)}")
        results.append({
            'Study_ID': row['Study ID #'],
            'Priority': None,
            'Protocol': None,
            'IV_Contrast': None,
            'Oral_Contrast': None
        })

# Convert results to DataFrame
results_df = pd.DataFrame(results)

# Remove any columns that are unnamed (like "Unnamed: 0")
results_df = results_df.loc[:, ~results_df.columns.str.contains('^Unnamed')]

# Reset the index to start from 1 instead of 0
results_df.index = results_df.index + 1

print("\nResults:")
print(results_df[['Priority', 'Protocol', 'IV_Contrast', 'Oral_Contrast']])

Available columns: ['Study ID #', 'Location [IP, ER, OP]', 'Age', 'Sex', 'CT Exam Requested', 'Clinical Information/Reason for Scan', 'Previous adverse reaction to contrast (if YES, what type)', 'eGFR (mL/min)', 'Creatinine (umol/L)']

Results:
   Priority      Protocol IV_Contrast        Oral_Contrast
1         3  Enterography          C+  Other (3% sorbitol)
2         2           A/P          C+           Water base
3         4        Pelvis          C+                 None
4         1           A/P          C+                 None
5         3      Liver 3P   C+ and C-                 None


In [5]:
pd.set_option('display.max_rows', 300)
print(results_df[['Priority', 'Protocol', 'IV_Contrast', 'Oral_Contrast']])


   Priority      Protocol IV_Contrast        Oral_Contrast
1         3  Enterography          C+  Other (3% sorbitol)
2         2           A/P          C+           Water base
3         4        Pelvis          C+                 None
4         1           A/P          C+                 None
5         3      Liver 3P   C+ and C-                 None
