### I. Research Questions:
1. Which psychiatric diagnosis are associated with higher ICU readmission rates? -> Do patient with mood disorders (296.x), psychotic disorders (295.x) or substance use (303-305) have higher [30]-day readmission rates compared to those without?
2. Does discontinuation or poor psychotropic medications at discharge predict higher readmissions? -> Does the discontinuation of psychotropic medication upon discharge correlate with increased readmission risk?
3. Can we combine patient-level features to predict ICU readmission risk? -> which combination of demographics, ICU, severity scores and medication changes is most predictive of readmission?

In [5]:
import pandas as pd

# Load data
admissions = pd.read_csv('./data/hosp/admissions.csv')
patients = pd.read_csv('./data/hosp/patients.csv')
diagnoses = pd.read_csv('./data/hosp/diagnoses_icd.csv')
icustays = pd.read_csv('./data/icu/icustays.csv')
prescriptions = pd.read_csv('./data/hosp/prescriptions.csv')

print("Initial admissions:", admissions.shape[0], "Patients:", patients['subject_id'].nunique(), "Notes:", diagnoses.shape[0])

# adult only
patients_adult = patients[patients['anchor_age'] >= 18]
admissions = admissions[admissions['subject_id'].isin(patients_adult['subject_id'])].copy()
diagnoses = diagnoses[diagnoses['hadm_id'].isin(admissions['hadm_id'])].copy()

print("After excluding <18:", admissions.shape[0],
      "Patients:", admissions['subject_id'].nunique(),
      "Notes:", diagnoses.shape[0])

# Identify psychiatric conditions
psych_icd_codes = ('296', '295', '303', '304', '305', 'F20', 'F21', 'F22', 'F23', 'F24', 'F25',
                   'F28', 'F29', 'F30', 'F31', 'F32', 'F33', 'F34', 'F39', 'F10', 'F11', 'F12', 
                   'F13', 'F14', 'F15', 'F16', 'F17', 'F18', 'F19', 'F40', 'F41', 'F42', 'F43', 
                   'F44', 'F45', 'F48', 'F50', 'F51', 'F52', 'F53', 'F54', 'F59', 'F60', 'F61', 
                   'F62', 'F63', 'F64', 'F65', 'F66', 'F68', 'F69', 'F70', 'F71', 'F72', 'F73', 
                   'F74', 'F75', 'F78', 'F79', 'F80', 'F81', 'F82', 'F83', 'F84', 'F88', 'F89', 
                   'F90', 'F91', 'F92', 'F93', 'F94', 'F95', 'F98', 'F99')

diagnoses['psych_flag'] = diagnoses['icd_code'].astype(str).str.startswith(psych_icd_codes)
psych_admissions = diagnoses[diagnoses['psych_flag']]['hadm_id'].unique()
admissions = admissions[admissions['hadm_id'].isin(psych_admissions)].copy()
diagnoses = diagnoses[diagnoses['hadm_id'].isin(admissions['hadm_id'])].copy()
print("Psych admissions:", admissions.shape[0], "Patients:", admissions['subject_id'].nunique(), "Notes:", diagnoses.shape[0])

# Merge in patient demographics
admissions = admissions.merge(
    patients[['subject_id', 'gender', 'anchor_age']], 
    on='subject_id', how='left'
)

# Sort by patient and time
admissions = admissions.sort_values(['subject_id', 'admittime']).copy()

# Output frequency of admission types
print("\nAdmission types:")
print(admissions['admission_type'].value_counts())

# Output basic demographics
print("\nPatient demographics:")
print(admissions[['gender', 'anchor_age', 'race']].describe(include='all'))

# Save to CSV
admissions.to_csv('./data/psych_admissions.csv', index=False)
diagnoses.to_csv('./data/psych_diagnoses.csv', index=False)

Initial admissions: 431088 Patients: 299777 Notes: 4752265
After excluding <18: 431088 Patients: 180747 Notes: 4752265
Psych admissions: 126867 Patients: 60782 Notes: 1543747

Admission types:
admission_type
EW EMER.                       37316
EU OBSERVATION                 36198
OBSERVATION ADMIT              22028
URGENT                         11530
SURGICAL SAME DAY ADMISSION     6837
DIRECT OBSERVATION              5117
DIRECT EMER.                    4869
ELECTIVE                        2023
AMBULATORY OBSERVATION           949
Name: count, dtype: int64

Patient demographics:
        gender     anchor_age    race
count   126867  126867.000000  126867
unique       2            NaN      33
top          M            NaN   WHITE
freq     69067            NaN   81213
mean       NaN      50.493848     NaN
std        NaN      17.126590     NaN
min        NaN      18.000000     NaN
25%        NaN      38.000000     NaN
50%        NaN      51.000000     NaN
75%        NaN      62.000000 

In [10]:
# patients.csv contains unique subject_id and gender
patients = pd.read_csv("data/hosp/patients.csv")
psych_admissions = pd.read_csv("data/psych_admissions.csv")

# Get total unique patients
total_any = patients["subject_id"].nunique()

# Get gender distribution for all patients
gender_any = patients["gender"].value_counts()

# For psych patients
psych_subject_ids = psych_admissions["subject_id"].unique()
psych_patients = patients[patients["subject_id"].isin(psych_subject_ids)]

total_psych = psych_patients["subject_id"].nunique()
gender_psych = psych_patients["gender"].value_counts()

# Calculate percentages
gender_any_pct = (gender_any / total_any * 100).round(2)
gender_psych_pct = (gender_psych / total_psych * 100).round(2)

# Print result
for gender in ['M', 'F']:
    print(f"{gender}: {gender_any[gender]} ({gender_any_pct[gender]}%) | Psych: {gender_psych[gender]} ({gender_psych_pct[gender]}%)")

M: 141227 (47.11%) | Psych: 31976 (52.61%)
F: 158550 (52.89%) | Psych: 28806 (47.39%)


In [None]:
# Step 3: Define ICU readmissions
print("ICU admissions:", icustays.shape[0], "Patients:", icustays['subject_id'].nunique())

icu_admissions = icustays.merge(admissions[['hadm_id', 'dischtime', 'admission_type', 'gender', 'anchor_age', 'race']], 
                                 on='hadm_id', how='inner')
icu_admissions = icu_admissions.sort_values(['subject_id', 'intime']).copy()
icu_admissions['next_admission'] = icu_admissions.groupby('subject_id')['intime'].shift(-1)

icu_admissions['dischtime'] = pd.to_datetime(icu_admissions['dischtime'])
icu_admissions['next_admission'] = pd.to_datetime(icu_admissions['next_admission'])

# Flag 30-day ICU readmission
icu_admissions['readmitted_30d'] = (icu_admissions['next_admission'] - icu_admissions['dischtime']).dt.days <= 30
icu_admissions['readmitted_30d'] = icu_admissions['readmitted_30d'].fillna(False)

print("ICU Psych readmissions (30d):", icu_admissions['readmitted_30d'].sum(), 
      "Unique Patients:", icu_admissions[icu_admissions['readmitted_30d']]['subject_id'].nunique())
print(icu_admissions.head())
icu_admissions.to_csv('./data/icu_psy_readmissions.csv', index=False)

ICU admissions: 73141 Patients: 50934
ICU Psych readmissions (30d): 3363 Unique Patients: 2425
   subject_id   hadm_id   stay_id                       first_careunit  \
0    10000032  29079034  39553978   Medical Intensive Care Unit (MICU)   
1    10001217  24597018  37067082  Surgical Intensive Care Unit (SICU)   
2    10001217  27703517  34592300  Surgical Intensive Care Unit (SICU)   
3    10001884  26184834  37510196   Medical Intensive Care Unit (MICU)   
4    10002348  22725460  32610785                   Neuro Intermediate   

                         last_careunit               intime  \
0   Medical Intensive Care Unit (MICU)  2180-07-23 14:00:00   
1  Surgical Intensive Care Unit (SICU)  2157-11-20 19:18:02   
2  Surgical Intensive Care Unit (SICU)  2157-12-19 15:42:24   
3   Medical Intensive Care Unit (MICU)  2131-01-11 04:20:05   
4                   Neuro Intermediate  2112-11-30 23:24:00   

               outtime       los           dischtime     admission_type  \
0  218

In [13]:
# Step 4: Medication discontinuation
psych_meds = ['haloperidol', 'risperidone', 'quetiapine', 'olanzapine', 'lorazepam', 'diazepam', 'alprazolam',
              'fluoxetine', 'sertraline', 'citalopram', 'escitalopram', 'paroxetine', 'venlafaxine', 'duloxetine',
              'bupropion', 'mirtazapine', 'valproate', 'lithium']

# Total prescriptions before filtering
print("Total prescriptions:", prescriptions.shape[0], "Patients:", prescriptions['subject_id'].nunique())

# Filter prescriptions to adult psychiatric admissions
prescriptions = prescriptions[prescriptions['hadm_id'].isin(admissions['hadm_id'])].copy()
print("Prescriptions for psych admissions:", prescriptions.shape[0], "Patients:", prescriptions['subject_id'].nunique())
prescriptions.to_csv('./data/prescripted_patients.csv', index=False)

# Flag psychotropic medications
prescriptions['drug'] = prescriptions['drug'].astype(str).str.lower()
prescriptions['psych_med_flag'] = prescriptions['drug'].isin(psych_meds)
psych_prescriptions = prescriptions[prescriptions['psych_med_flag']].copy()
print("Psych prescriptions:", psych_prescriptions.shape[0], "Patients:", psych_prescriptions['subject_id'].nunique())
psych_prescriptions.to_csv('./data/psych_prescriptions.csv', index=False)

# Parse medication stop time
psych_prescriptions.loc[:, 'stoptime'] = pd.to_datetime(psych_prescriptions['stoptime'], errors='coerce')

# Determine last stop time per admission
last_prescriptions = psych_prescriptions.groupby('hadm_id')['stoptime'].max().reset_index()
last_prescriptions.rename(columns={'stoptime': 'last_psych_med_time'}, inplace=True)

# Only include admissions with psych meds
admissions_with_meds = admissions[admissions['hadm_id'].isin(last_prescriptions['hadm_id'])].copy()

# Merge last med stop time and calculate discontinuation
admissions_with_meds = admissions_with_meds.merge(last_prescriptions, on='hadm_id', how='left')
admissions_with_meds['med_discontinued'] = admissions_with_meds['last_psych_med_time'].isnull()

# Filter discontinued medication cases
discontinued_prescriptions = admissions_with_meds[admissions_with_meds['med_discontinued']].copy()

print("Psych Medication discontinued admissions:", discontinued_prescriptions.shape[0])
print("Unique patients with psych medication discontinued:", discontinued_prescriptions['subject_id'].nunique())

# Output enriched data
last_prescriptions.to_csv('./data/last_prescriptions_processed.csv', index=False)
discontinued_prescriptions.to_csv('./data/psych_meds_discontinued_with_demo.csv', index=False)

Total prescriptions: 15399811 Patients: 158422
Prescriptions for psych admissions: 4706768 Patients: 50439
Psych prescriptions: 198927 Patients: 35130
Psych Medication discontinued admissions: 13
Unique patients with psych medication discontinued: 13


In [7]:
import pandas as pd

chartevents = pd.read_csv("data/icu/chartevents.csv", usecols=["subject_id", "hadm_id", "charttime", "itemid", "valuenum"])
height_ids = [226730]
weight_ids = [226512]

height_df = chartevents[chartevents["itemid"].isin(height_ids)].dropna(subset=["valuenum"]).copy()
weight_df = chartevents[chartevents["itemid"].isin(weight_ids)].dropna(subset=["valuenum"]).copy()

height_first = height_df.sort_values(["subject_id", "hadm_id", "charttime"]).drop_duplicates(["hadm_id"])
weight_first = weight_df.sort_values(["subject_id", "hadm_id", "charttime"]).drop_duplicates(["hadm_id"])

height_first = height_first[["hadm_id", "valuenum"]].rename(columns={"valuenum": "height_cm"})
weight_first = weight_first[["hadm_id", "valuenum"]].rename(columns={"valuenum": "weight_kg"})

bmi_df = pd.merge(weight_first, height_first, on="hadm_id", how="inner")
bmi_df["height_m"] = bmi_df["height_cm"] / 100
bmi_df["bmi"] = bmi_df["weight_kg"] / (bmi_df["height_m"] ** 2)

bmi_df = bmi_df[["hadm_id", "bmi"]]
bmi_df.to_csv("data/derived/bmi.csv", index=False)
print("BMI data saved to data/derived/bmi.csv")


BMI data saved to data/derived/bmi.csv


In [9]:
import pandas as pd
import numpy as np

# Load required files
psych_admissions = pd.read_csv("./data/psych_admissions.csv")
charlson_df = pd.read_csv("./data/derived/charlson.csv")
sofa_df = pd.read_csv("./data/derived/first_day_sofa.csv")
sirs_df = pd.read_csv("./data/derived/sirs.csv")
admissions = pd.read_csv("./data/hosp/admissions.csv")
patients = pd.read_csv("./data/hosp/patients.csv")
icustays = pd.read_csv("./data/icu/icustays.csv")
prescriptions = pd.read_csv("./data/hosp/prescriptions.csv")
bmi_df = pd.read_csv("data/derived/bmi.csv")

# Preprocess and merge
psych_hadm_ids = set(psych_admissions['hadm_id'].unique())
demo = admissions.merge(patients, on='subject_id', how='left')
demo = demo[['subject_id', 'hadm_id', 'gender', 'anchor_age', 'race', 'admission_type']]
demo['is_psych'] = demo['hadm_id'].isin(psych_hadm_ids)
demo['is_readmitted'] = demo.duplicated(subset='subject_id', keep=False)
demo['has_icu_stay'] = demo['hadm_id'].isin(icustays['hadm_id'])

# Psych meds
psych_meds = ['haloperidol', 'risperidone', 'quetiapine', 'olanzapine', 'lorazepam', 'diazepam', 'alprazolam',
              'fluoxetine', 'sertraline', 'citalopram', 'escitalopram', 'paroxetine', 'venlafaxine', 'duloxetine',
              'bupropion', 'mirtazapine', 'valproate', 'lithium']
prescriptions['drug'] = prescriptions['drug'].astype(str).str.lower()
prescriptions['is_psych_med'] = prescriptions['drug'].isin(psych_meds)
psych_meds_hadm_ids = set(prescriptions[prescriptions['is_psych_med']]['hadm_id'].unique())
demo['has_psych_med'] = demo['hadm_id'].isin(psych_meds_hadm_ids)

# Merge scores
demo = demo.merge(charlson_df[['hadm_id', 'charlson_comorbidity_index']], on='hadm_id', how='left')
demo = demo.merge(sofa_df[['hadm_id', 'sofa']], on='hadm_id', how='left')
demo = demo.merge(sirs_df[['hadm_id', 'sirs']], on='hadm_id', how='left')
demo = demo.merge(bmi_df[["hadm_id", "bmi"]], on="hadm_id", how="left")

bmi_any = demo[np.isfinite(demo["bmi"])]["bmi"].mean()
bmi_psych = demo[(np.isfinite(demo["bmi"])) & (demo["is_psych"])]["bmi"].mean()

# Age groups
bins = [18, 30, 45, 60, 75, 90, 120]
labels = ['18-29', '30-44', '45-59', '60-74', '75-89', '90+']
demo['age_group'] = pd.cut(demo['anchor_age'], bins=bins, labels=labels, right=False)

# Build summary table
rows = []

# Age groups
for group in labels:
    any_patients = demo[demo['age_group'] == group]['subject_id'].nunique()
    psych_patients = demo[(demo['age_group'] == group) & (demo['is_psych'])]['subject_id'].nunique()
    rows.append(('Age Group: ' + group, any_patients, psych_patients))

# Gender
for gender in ['M', 'F']:
    any_patients = demo[demo['gender'] == gender]['subject_id'].nunique()
    psych_patients = demo[(demo['gender'] == gender) & (demo['is_psych'])]['subject_id'].nunique()
    rows.append(('Gender: ' + gender, any_patients, psych_patients))

# Admission type
for typ in demo['admission_type'].dropna().unique():
    any_patients = demo[demo['admission_type'] == typ]['subject_id'].nunique()
    psych_patients = demo[(demo['admission_type'] == typ) & (demo['is_psych'])]['subject_id'].nunique()
    rows.append(('Admission Type: ' + typ, any_patients, psych_patients))

# ICU stay
rows.append(('ICU Stay', demo[demo['has_icu_stay']]['subject_id'].nunique(),
             demo[(demo['has_icu_stay']) & (demo['is_psych'])]['subject_id'].nunique()))

# Readmission
rows.append(('Readmission', demo[demo['is_readmitted']]['subject_id'].nunique(),
             demo[(demo['is_readmitted']) & (demo['is_psych'])]['subject_id'].nunique()))

# Psych med
rows.append(('Psych Med', demo[demo['has_psych_med']]['subject_id'].nunique(),
             demo[(demo['has_psych_med']) & (demo['is_psych'])]['subject_id'].nunique()))

# Averages
rows.append(('Charlson Index (Avg)', demo['charlson_comorbidity_index'].mean(), demo[demo['is_psych']]['charlson_comorbidity_index'].mean()))
rows.append(('SOFA (Avg)', demo['sofa'].mean(), demo[demo['is_psych']]['sofa'].mean()))
rows.append(('SIRS (Avg)', demo['sirs'].mean(), demo[demo['is_psych']]['sirs'].mean()))

# BMI
rows.append(('BMI (Avg)', bmi_any, bmi_psych))


# Final table
final_df = pd.DataFrame(rows, columns=['Category', 'Any Patients', 'Psych Patients'])
final_df.to_csv('./data/summary_statistics.csv', index=False)