In [6]:
### FETCH COHORT FROM REMOTE DB ###

import pandas as pd
from src.db import attach_duckdb, load_sql, duckdb_to_df

print("Fetching Cohort...")
attach_duckdb("mimic")

cohort_df = duckdb_to_df(load_sql("cohorts.sql"))
cohort_ids = tuple(cohort_df['hadm_id'].tolist())

print(len(cohort_ids), "patients in cohort fetched.")

Fetching Cohort...
Error attaching PostgreSQL: Binder Error: Failed to attach database: database with name "mimic" already exists


FloatProgress(value=0.0, layout=Layout(width='auto'), style=ProgressStyle(bar_color='black'))

11726 patients in cohort fetched.


In [20]:
### FETCH FEATURES ###

print("Fetching Features...")
features_sql = load_sql("features.sql")
features_df = duckdb_to_df(features_sql.format(cohort_ids=cohort_ids))
full_data = pd.merge(cohort_df, features_df, on='hadm_id', how='inner')

print("Features fetched. Full data shape:", full_data.shape)

Fetching Features...


: 

In [None]:
### DATA PREPROCESSING ###

params = ['hr', 'map', 'crea', 'lac']

for p in params:
    full_data[f'{p}_base'] = full_data[f'{p}_base'].fillna(full_data[f'{p}_base'].median())
    full_data[f'{p}_end'] = full_data[f'{p}_end'].fillna(full_data[f'{p}_end'].median())

for p in params:
    full_data[f'{p}_delta'] = full_data[f'{p}_end'] - full_data[f'{p}_base']

feature_cols = [f'{p}_delta' for p in params] + [f'{p}_base' for p in params] + ['age', 'gender']
full_data['gender'] = full_data['gender'].apply(lambda x: 1 if x == 'M' else 0)

In [None]:
### SPLIT DATA TO TRAINING AND TEST SETS ###

from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler

X = full_data[feature_cols].values
y = full_data['label'].values

# 8. Split and Scale
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42, stratify=y)

scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

In [None]:
### MODEL DEFINITION ###

from src.model import CardiacDataset, MortalityPredictor
from torch.utils.data import DataLoader
import torch.nn as nn
import torch.optim as optim

train_dataset = CardiacDataset(X_train_scaled, y_train)
test_dataset = CardiacDataset(X_test_scaled, y_test)

train_loader = DataLoader(train_dataset, batch_size=32, shuffle=True)
test_loader = DataLoader(test_dataset, batch_size=32, shuffle=False)

model = MortalityPredictor(input_dim=X_train.shape[1])

criterion = nn.BCELoss() # Binary Cross Entropy
optimizer = optim.Adam(model.parameters(), lr=0.001)

In [None]:
### MODEL TRAINING ###

EPOCHS = 20
print("Starting Training...")

for epoch in range(EPOCHS):
    model.train()
    train_loss = 0
    for X_batch, y_batch in train_loader:
        optimizer.zero_grad()
        y_pred = model(X_batch)
        loss = criterion(y_pred, y_batch)
        loss.backward()
        optimizer.step()
        train_loss += loss.item()
    
    print(f"Epoch {epoch+1}/{EPOCHS} | Loss: {train_loss/len(train_loader):.4f}")

In [None]:
### MODEL EVALUATION ###

import torch
from sklearn.metrics import roc_auc_score, classification_report
import numpy as np

model.eval()
all_preds = []
all_targets = []

with torch.no_grad():
    for X_batch, y_batch in test_loader:
        y_pred = model(X_batch)
        all_preds.extend(y_pred.numpy())
        all_targets.extend(y_batch.numpy())

y_pred_probs = np.array(all_preds)
y_targets = np.array(all_targets)

# Calculate AUC
auc = roc_auc_score(y_targets, y_pred_probs)
print(f"\nModel AUC-ROC: {auc:.4f}")

# Convert to binary predictions (Threshold 0.5)
y_pred_binary = (y_pred_probs > 0.5).astype(int)
print("\nClassification Report:")
print(classification_report(y_targets, y_pred_binary))