<a href="https://colab.research.google.com/github/deburg0/ROS-sandbox/blob/main/Transformer-LSTM%20model.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

This Jupyter Notebook implements the hybrid Transformer–LSTM model described in the manuscript, designed for
 time-series classification of cell malignancy based on mechanistic simulation outputs.
 The model integrates transformer-based self-attention for capturing long-range dependencies
 with LSTM sequence modeling for temporal dynamics. The script supports .npz input datasets
 containing multi-stressor profiles (ROS, pH, temperature, ion channel conductances, etc.)
 and outputs predicted malignancy classes.

 Select T4 runtime


In [1]:
import numpy as np
import torch
import torch.nn as nn
from torch.utils.data import DataLoader, TensorDataset
from sklearn.metrics import accuracy_score, roc_auc_score, confusion_matrix
import pandas as pd



#### Model Definition

In [2]:
class TransformerLSTM(nn.Module):
    def __init__(self, input_dim, d_model=64, nhead=4, num_layers=2, lstm_hidden=64, num_classes=2):
        super(TransformerLSTM, self).__init__()
        self.input_fc = nn.Linear(input_dim, d_model)
        encoder_layer = nn.TransformerEncoderLayer(d_model=d_model, nhead=nhead, batch_first=True)
        self.transformer_encoder = nn.TransformerEncoder(encoder_layer, num_layers=num_layers)
        self.lstm = nn.LSTM(d_model, lstm_hidden, batch_first=True)
        self.fc_out = nn.Linear(lstm_hidden, num_classes)

    def forward(self, x):
        x = self.input_fc(x)  # (batch, seq_len, d_model)
        x = self.transformer_encoder(x)
        lstm_out, _ = self.lstm(x)
        out = self.fc_out(lstm_out[:, -1, :])  # take last time step
        return out


#### Load Data

In [3]:
def load_npz(path):
    data = np.load(path)
    X = data['X']  # shape: (samples, timesteps, features)
    y = data['y']  # shape: (samples,)
    return X, y


#### Train Function

In [4]:
def train_model(X, y, num_epochs=50, batch_size=16, lr=1e-3):
    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
    n_samples, seq_len, n_features = X.shape

    model = TransformerLSTM(input_dim=n_features).to(device)
    criterion = nn.CrossEntropyLoss()
    optimizer = torch.optim.Adam(model.parameters(), lr=lr)

    dataset = TensorDataset(torch.tensor(X, dtype=torch.float32), torch.tensor(y, dtype=torch.long))
    loader = DataLoader(dataset, batch_size=batch_size, shuffle=True)

    model.train()
    for epoch in range(num_epochs):
        epoch_loss = 0
        for X_batch, y_batch in loader:
            X_batch, y_batch = X_batch.to(device), y_batch.to(device)
            optimizer.zero_grad()
            outputs = model(X_batch)
            loss = criterion(outputs, y_batch)
            loss.backward()
            optimizer.step()
            epoch_loss += loss.item()

        print(f"Epoch {epoch+1}/{num_epochs}, Loss: {epoch_loss/len(loader):.4f}")

    return model


#### Evaluation

In [5]:
def evaluate_model(model, X, y):
    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
    model.eval()
    with torch.no_grad():
        outputs = model(torch.tensor(X, dtype=torch.float32).to(device))
        preds = torch.argmax(outputs, dim=1).cpu().numpy()
        acc = accuracy_score(y, preds)
        auc = roc_auc_score(y, preds)
        cm = confusion_matrix(y, preds)
    return acc, auc, cm, preds


In [6]:
# Upload .npz File
!wget https://raw.githubusercontent.com/deburg0/ROS-sandbox/main/omics_informed_demo.npz

--2026-01-30 19:49:48--  https://raw.githubusercontent.com/deburg0/ROS-sandbox/main/omics_informed_demo.npz
Resolving raw.githubusercontent.com (raw.githubusercontent.com)... 185.199.108.133, 185.199.109.133, 185.199.110.133, ...
Connecting to raw.githubusercontent.com (raw.githubusercontent.com)|185.199.108.133|:443... connected.
HTTP request sent, awaiting response... 200 OK
Length: 2309024 (2.2M) [application/octet-stream]
Saving to: ‘omics_informed_demo.npz’


2026-01-30 19:49:48 (85.6 MB/s) - ‘omics_informed_demo.npz’ saved [2309024/2309024]



In [7]:
# Path to NPZ time-series dataset (change this to your file)
npz_path = "omics_informed_demo.npz"
X, y = load_npz(npz_path)

In [8]:

model = train_model(X, y, num_epochs=50, batch_size=16, lr=1e-3)

acc, auc, cm, preds = evaluate_model(model, X, y)
print("Accuracy:", acc)
print("AUC:", auc)
print("Confusion Matrix:\n", cm)


Epoch 1/50, Loss: 0.3473
Epoch 2/50, Loss: 0.1008
Epoch 3/50, Loss: 0.0384
Epoch 4/50, Loss: 0.0160
Epoch 5/50, Loss: 0.0082
Epoch 6/50, Loss: 0.0050
Epoch 7/50, Loss: 0.0035
Epoch 8/50, Loss: 0.0027
Epoch 9/50, Loss: 0.0022
Epoch 10/50, Loss: 0.0019
Epoch 11/50, Loss: 0.0017
Epoch 12/50, Loss: 0.0015
Epoch 13/50, Loss: 0.0014
Epoch 14/50, Loss: 0.0013
Epoch 15/50, Loss: 0.0012
Epoch 16/50, Loss: 0.0011
Epoch 17/50, Loss: 0.0010
Epoch 18/50, Loss: 0.0010
Epoch 19/50, Loss: 0.0009
Epoch 20/50, Loss: 0.0009
Epoch 21/50, Loss: 0.0008
Epoch 22/50, Loss: 0.0008
Epoch 23/50, Loss: 0.0007
Epoch 24/50, Loss: 0.0007
Epoch 25/50, Loss: 0.0007
Epoch 26/50, Loss: 0.0006
Epoch 27/50, Loss: 0.0006
Epoch 28/50, Loss: 0.0006
Epoch 29/50, Loss: 0.0006
Epoch 30/50, Loss: 0.0005
Epoch 31/50, Loss: 0.0005
Epoch 32/50, Loss: 0.0005
Epoch 33/50, Loss: 0.0005
Epoch 34/50, Loss: 0.0005
Epoch 35/50, Loss: 0.0004
Epoch 36/50, Loss: 0.0004
Epoch 37/50, Loss: 0.0004
Epoch 38/50, Loss: 0.0004
Epoch 39/50, Loss: 0.



In [9]:
 # Save Model Weights

torch.save(model.state_dict(), 'model_weights.pth')


In [10]:
# Save predictions to Excel
df_preds = pd.DataFrame({
    "y_true": y,
    "y_pred": preds
})
df_preds.to_excel("transformer_lstm_results.xlsx", index=False)
print("Results saved to transformer_lstm_results.xlsx")

Results saved to transformer_lstm_results.xlsx


In [11]:
# Results are save to local disk which is transient