## Evaluation simple encoder transformer
- trained using a small balanced dataset
- tested with original dataset

In [3]:
import os
import sys
import torch
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

from torch import nn

print(torch.__version__)

notebook_dir = os.getcwd()

project_root = os.path.abspath(os.path.join(notebook_dir, "../.."))
print(project_root)
if project_root not in sys.path:
    sys.path.append(project_root)

EVAL = "02_simple_transformer"

2.2.2
/Users/damianstone/Documents/Code/machine-learning/dl-sepsis-prediction


## Load model

In [4]:
from architectures import TransformerClassifier

model = TransformerClassifier(input_dim=38, num_heads=2)
model.load_state_dict(torch.load("./saved/02_simple_transformer.pth"))



<All keys matched successfully>

## Testing data

In [None]:
from utils import get_data
from sklearn.model_selection import train_test_split

DATA_PATH = get_data.get_dataset_abspath()
load_path = os.path.join(DATA_PATH, "imputed_sofa.parquet")
imputed_df = pd.read_parquet(load_path)

X_tensor = torch.tensor(imputed_df.drop(columns=['SepsisLabel']).values, dtype=torch.float32)
y_tensor = torch.tensor(imputed_df['SepsisLabel'].values, dtype=torch.long)


## Testing loop

In [None]:
import pandas as pd
from torch.utils.data import DataLoader, TensorDataset
from tqdm import tqdm
from torchmetrics import Accuracy

batch_size = 512 
test_dataset = TensorDataset(X_tensor, y_tensor)
test_loader = DataLoader(test_dataset, batch_size=batch_size, shuffle=False)

model.eval()
test_loss, test_acc = 0, 0
loss_fn = nn.BCEWithLogitsLoss()
t_accuracy = Accuracy(task='binary')
all_y_logits, all_y_probs, all_y_pred, all_y_test = [], [], [], []

with torch.inference_mode():
    progress_bar = tqdm(test_loader, desc="Testing", leave=False)
    for X_batch, y_batch in progress_bar:
        y_logits = model(X_batch)
        y_probs = torch.sigmoid(y_logits)
        y_pred = torch.round(y_probs)
        
        loss = loss_fn(y_logits, y_batch.unsqueeze(1).float())
        acc = t_accuracy(y_pred, y_batch.unsqueeze(1).float())
        
        test_loss += loss.item()
        test_acc += acc.item()
        
        progress_bar.set_postfix({"Loss": loss.item(), "Acc": acc.item()})
        
        all_y_logits.append(y_logits.cpu())
        all_y_probs.append(y_probs.cpu())
        all_y_pred.append(y_pred.cpu())
        all_y_test.append(y_batch.cpu())

test_loss /= len(test_loader)
test_acc /= len(test_loader)

print(f"Test Loss: {test_loss:.5f} | Test Accuracy: {test_acc:.2f}%")

Testing:   0%|          | 0/3002 [00:00<?, ?it/s]

                                                                                   

Test Loss: 0.60385 | Test Accuracy: 0.73%




In [None]:
import pandas as pd
import numpy as np
from t_utils import save_eval_csv

all_y_logits = torch.cat(all_y_logits).numpy().flatten()
all_y_probs = torch.cat(all_y_probs).numpy().flatten()
all_y_pred = torch.cat(all_y_pred).numpy().flatten()
all_y_test = torch.cat(all_y_test).numpy().astype(int) 

df = pd.DataFrame({
    'y_logits': all_y_logits,
    'y_probs': all_y_probs,
    'y_pred': all_y_pred,
    'y_test': all_y_test
})


TypeError: cat(): argument 'tensors' (position 1) must be tuple of Tensors, not numpy.ndarray

In [15]:
save_eval_csv(df, EVAL)

In [13]:
accuracy = (df['y_pred'] == df['y_test']).mean() * 100
print(f"Total Accuracy: {accuracy:.2f}%")

Total Accuracy: 72.98%


In [16]:
from t_utils import save_plots, FEATURE_NAMES

y_test = df['y_test'].values
y_probs = df['y_probs'].values
y_pred = df['y_pred'].values

save_plots(
    y_test=y_test,
    y_probs=y_probs,
    y_pred=y_pred,
    model=model, 
    feature_names=FEATURE_NAMES,
    eval=EVAL,
    attention_weights=[]
)