In [None]:
import random
import numpy as np
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import Dataset, DataLoader
from torch.nn.utils.rnn import pack_padded_sequence, pad_packed_sequence
import matplotlib.pyplot as plt
from joblib import load, dump
from sklearn.metrics import mean_squared_error, mean_absolute_error
import pandas as pd

X_train, y_train, t_train = load('train_set.joblib')
X_test,  y_test,  t_test  = load('test_set.joblib')
activity_scaler           = load('activity_scaler.joblib')

train_df = X_train.copy()
train_df['y'] = y_train.values
groups   = train_df.groupby('patient_id', sort=False)

class PatientSeriesDataset(Dataset):
    def __init__(self, grouped_df):
        self.samples = []
        for pid, grp in grouped_df:
            feats = grp.drop(columns=['patient_id','y','prev_act']).values.astype(np.float32)
            target = grp['y'].values[-1].astype(np.float32)
            # input = all but last, target = last
            self.samples.append((torch.from_numpy(feats[:-1]), torch.tensor(target)))
    def __len__(self):
        return len(self.samples)
    def __getitem__(self, idx):
        return self.samples[idx]

def collate_fn(batch):
    seqs, targets = zip(*batch)
    lengths = [s.size(0) for s in seqs]
    padded  = nn.utils.rnn.pad_sequence(seqs, batch_first=True)
    return padded, torch.stack(targets), torch.tensor(lengths)

train_ds     = PatientSeriesDataset(groups)
train_loader = DataLoader(train_ds, batch_size=8, shuffle=True, collate_fn=collate_fn)


class SimpleRNN(nn.Module):
    def __init__(self, input_size, hidden_size=32, num_layers=1):
        super().__init__()
        self.rnn = nn.RNN(input_size, hidden_size, num_layers, batch_first=True)
        self.fc  = nn.Linear(hidden_size, 1)
    def forward(self, x, lengths):
        packed_in  = pack_padded_sequence(x, lengths, batch_first=True, enforce_sorted=False)
        packed_out, _ = self.rnn(packed_in)
        out, _    = pad_packed_sequence(packed_out, batch_first=True)
        # extract last valid hidden state for each sequence
        idx = (lengths - 1).view(-1,1,1).expand(-1,1,out.size(2))
        last = out.gather(1, idx).squeeze(1)  # (batch, hidden_size)
        return self.fc(last).squeeze(1)       # (batch,)

n_feats  = X_train.drop(columns=['patient_id','prev_act']).shape[1]
model    = SimpleRNN(input_size=n_feats)
criterion = nn.MSELoss()
optimizer = optim.Adam(model.parameters(), lr=1e-3)


for epoch in range(1, 5):
    model.train()
    total_loss = 0.0
    for seqs, targets, lengths in train_loader:
        optimizer.zero_grad()
        preds = model(seqs, lengths)
        loss  = criterion(preds, targets)
        loss.backward()
        optimizer.step()
        total_loss += loss.item()
    print(f"Epoch {epoch:02d} — Train MSE: {total_loss/len(train_loader):.4f}")



Epoch 01 — Train MSE: 0.0454
Epoch 02 — Train MSE: 0.0157
Epoch 03 — Train MSE: 0.0153
Epoch 04 — Train MSE: 0.0146
