## Load Data

In [6]:
import pandas as pd
import numpy as np
import pickle
from pathlib import Path


def load_wesad_subject(subject_path: Path) -> pd.DataFrame:
    subject_path = Path(subject_path)

    # 1. Load the pickle file (WESAD pickles require 'latin1' encoding)
    with subject_path.open('rb') as file:
        data = pickle.load(file, encoding='latin1')

    # 2. Extract the chest signals
    chest_data = data['signal']['chest']

    # 3. Build DataFrame for chest sensors
    df = pd.DataFrame()
    df['ACC_x'] = chest_data['ACC'][:, 0]
    df['ACC_y'] = chest_data['ACC'][:, 1]
    df['ACC_z'] = chest_data['ACC'][:, 2]
    df['ECG']   = chest_data['ECG'].flatten()
    df['EMG']   = chest_data['EMG'].flatten()
    df['EDA']   = chest_data['EDA'].flatten()
    df['Temp']  = chest_data['Temp'].flatten()
    df['Resp']  = chest_data['Resp'].flatten()

    # 4. Labels (as provided in the pickle)
    labels = data['label']
    df['label'] = np.asarray(labels).flatten()

    # 5. Subject ID (fallback to folder name if not present)
    df['subject'] = data.get('subject', subject_path.parent.name)

    return df


base_path = Path(r"H:\Research\archive (1)\WESAD")
subject_id = "S2"
file_path = base_path / subject_id / f"{subject_id}.pkl"

print("Resolved path:", file_path)

if not file_path.exists():
    raise FileNotFoundError(
        f"File not found at {file_path}. Verify the drive/path and subject folder.")

df_s2 = load_wesad_subject(file_path)
print("Success! Loaded", subject_id)
print("Shape:", df_s2.shape)
print("Columns:", list(df_s2.columns)[:12], "...")
print("\nSample Data:\n", df_s2.head())

Resolved path: H:\Research\archive (1)\WESAD\S2\S2.pkl
Success! Loaded S2
Shape: (4255300, 10)
Columns: ['ACC_x', 'ACC_y', 'ACC_z', 'ECG', 'EMG', 'EDA', 'Temp', 'Resp', 'label', 'subject'] ...

Sample Data:
     ACC_x   ACC_y   ACC_z       ECG       EMG       EDA       Temp      Resp  \
0  0.9554 -0.2220 -0.5580  0.021423 -0.004440  5.250549  30.120758 -1.148987   
1  0.9258 -0.2216 -0.5538  0.020325  0.004349  5.267334  30.129517 -1.124573   
2  0.9082 -0.2196 -0.5392  0.016525  0.005173  5.243301  30.138214 -1.152039   
3  0.8974 -0.2102 -0.5122  0.016708  0.007187  5.249405  30.129517 -1.158142   
4  0.8882 -0.2036 -0.4824  0.011673 -0.015152  5.286407  30.130951 -1.161194   

   label subject  
0      0      S2  
1      0      S2  
2      0      S2  
3      0      S2  
4      0      S2  
Success! Loaded S2
Shape: (4255300, 10)
Columns: ['ACC_x', 'ACC_y', 'ACC_z', 'ECG', 'EMG', 'EDA', 'Temp', 'Resp', 'label', 'subject'] ...

Sample Data:
     ACC_x   ACC_y   ACC_z       ECG       EM

In [7]:
import numpy as np
from scipy import stats

def create_windows(df, window_seconds=2, stride_seconds=1, sampling_rate=700):
        
    # 1. Define window size in "steps" (rows)
    window_steps = int(window_seconds * sampling_rate)
    stride_steps = int(stride_seconds * sampling_rate)
    
    feature_cols = ['ACC_x', 'ACC_y', 'ACC_z', 'ECG', 'EMG', 'EDA', 'Temp', 'Resp']
    data = df[feature_cols].values
    labels = df['label'].values
    
    X_windows = []
    y_windows = []
    
    
    for i in range(0, len(df) - window_steps, stride_steps):
        

        window_data = data[i : i + window_steps]
        window_labels = labels[i : i + window_steps]
        mode_label = stats.mode(window_labels, keepdims=True)[0][0]    
        X_windows.append(window_data.transpose()) 
        y_windows.append(mode_label)
        
    return np.array(X_windows), np.array(y_windows)

X_s2, y_s2 = create_windows(df_s2, window_seconds=2, stride_seconds=0.25)

print(f"Original shape: {df_s2.shape}")
print(f"New X shape: {X_s2.shape}  <-- (Samples, Channels, TimeSteps)")
print(f"New y shape: {y_s2.shape}")



Original shape: (4255300, 10)
New X shape: (24308, 8, 1400)  <-- (Samples, Channels, TimeSteps)
New y shape: (24308,)


In [8]:
import torch
from torch.utils.data import TensorDataset, DataLoader
from sklearn.model_selection import train_test_split

# 1. Convert to PyTorch Tensors
# X shape: (Samples, Channels, TimeSteps) -> Float
# y shape: (Samples,) -> Long (Integers for classification)
tensor_x = torch.Tensor(X_s2) 
tensor_y = torch.Tensor(y_s2).long()

# 2. Split into Train and Test (80% Train, 20% Test)
# This lets us see if the model is actually learning or just memorizing.
X_train, X_test, y_train, y_test = train_test_split(tensor_x, tensor_y, test_size=0.2, random_state=42)

# 3. Create DataLoaders
batch_size = 32

train_dataset = TensorDataset(X_train, y_train)
test_dataset = TensorDataset(X_test, y_test)

train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)
test_loader = DataLoader(test_dataset, batch_size=batch_size, shuffle=False)

print(f"Training batches: {len(train_loader)}")
print(f"Testing batches: {len(test_loader)}")

Training batches: 608
Testing batches: 152


In [16]:
import torch.nn as nn
import numpy as np

class LightweightModel(nn.Module):
    def __init__(self, input_channels, num_classes, window_size=1400):
        super(LightweightModel, self).__init__()
        
        # --- PART 1: The Client Encoder ---
        self.encoder = nn.Sequential(
            nn.Conv1d(in_channels=input_channels, out_channels=16, kernel_size=5, stride=1, padding=2),
            nn.ReLU(),
            nn.MaxPool1d(kernel_size=2),
            nn.Conv1d(in_channels=16, out_channels=32, kernel_size=5, stride=1, padding=2),
            nn.ReLU(),
            nn.MaxPool1d(kernel_size=2)
        )
        
        # --- PART 2: The Server Classifier ---
        final_dim = window_size // 4
        self.classifier = nn.Sequential(
            nn.Flatten(),
            nn.Linear(32 * final_dim, 64),
            nn.ReLU(),
            nn.Linear(64, num_classes)
        )

    def forward(self, x):
        features = self.encoder(x)
        output = self.classifier(features)
        return output

# Initialize Model
# Determine number of classes from labels to avoid out-of-bounds errors
num_classes = int(len(np.unique(y_s2)))
model = LightweightModel(input_channels=X_s2.shape[1], num_classes=num_classes, window_size=X_s2.shape[2])
print(f"Initialized model with num_classes={num_classes}")
print(model)

Initialized model with num_classes=7
LightweightModel(
  (encoder): Sequential(
    (0): Conv1d(8, 16, kernel_size=(5,), stride=(1,), padding=(2,))
    (1): ReLU()
    (2): MaxPool1d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
    (3): Conv1d(16, 32, kernel_size=(5,), stride=(1,), padding=(2,))
    (4): ReLU()
    (5): MaxPool1d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
  )
  (classifier): Sequential(
    (0): Flatten(start_dim=1, end_dim=-1)
    (1): Linear(in_features=11200, out_features=64, bias=True)
    (2): ReLU()
    (3): Linear(in_features=64, out_features=7, bias=True)
  )
)


In [10]:
import torch.optim as optim

# Setup
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model = model.to(device)
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=0.001)

num_epochs = 10

print(f"Starting training on {device}...")

for epoch in range(num_epochs):
    model.train()
    running_loss = 0.0
    correct = 0
    total = 0
    
    for inputs, labels in train_loader:
        inputs, labels = inputs.to(device), labels.to(device)
        
        # Zero gradients
        optimizer.zero_grad()
        
        # Forward pass
        outputs = model(inputs)
        loss = criterion(outputs, labels)
        
        # Backward pass
        loss.backward()
        optimizer.step()
        
        # Stats
        running_loss += loss.item()
        _, predicted = torch.max(outputs.data, 1)
        total += labels.size(0)
        correct += (predicted == labels).sum().item()
        
    epoch_acc = 100 * correct / total
    print(f"Epoch [{epoch+1}/{num_epochs}] Loss: {running_loss/len(train_loader):.4f} | Accuracy: {epoch_acc:.2f}%")

print("Training Complete.")

Starting training on cpu...


IndexError: Target 2 is out of bounds.