In [1]:
import numpy as np
import pandas as pd
import glob
import os
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from pathlib import Path

import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import TensorDataset, DataLoader
import torch.nn.functional as F

In [2]:
# Load your datasets (adjust the paths if needed)
X1 = np.load('deep_learning_data/mo_ew2_accdata_21_10_139-1336_x.npy')
y1 = pd.read_csv('deep_learning_data/mo_ew2_accdata_21_10_139-1336_y.csv')

X2 = np.load('deep_learning_data/mo_ew3_accdata_16_12_1419-1449_x.npy')
y2 = pd.read_csv('deep_learning_data/mo_ew3_accdata_16_12_1419-1449_y.csv')

In [3]:
# Check dataset info
print("X1 shape:", X1.shape)
print("y1 unique:", np.unique(y1))
print("y1 shape:", y1.shape)
print("X2 shape:", X2.shape)
print("y2 unique:", np.unique(y2))
print("y2 shape:", y2.shape)

X1 shape: (1186, 24, 7)
y1 unique: [-1.  0.  1.]
y1 shape: (1185, 1)
X2 shape: (1182, 24, 7)
y2 unique: [-1.  0.  1.]
y2 shape: (1181, 1)


In [4]:
# Get paths for all X and y files
x_files = glob.glob('deep_learning_data/*_x.npy')
y_files = glob.glob('deep_learning_data/*_y.csv')

# Sort to ensure matching pairs
x_files.sort()
y_files.sort()

print(f"Found {len(x_files)} dataset pairs")

Found 44 dataset pairs


In [5]:
assert len(x_files) == len(y_files), "Mismatch in number of X vs Y files!"

In [6]:
def load_data(x_path, y_path):
    X = np.load(x_path)                     # shape (n_windows, 24, 7)
    y = pd.read_csv(y_path).values.squeeze() # shape (n_labels,)
    n = min(len(X), len(y))
    return X[:n], y[:n]

# Load & accumulate
all_X, all_y = [], []
for xf, yf in zip(x_files, y_files):
    X, y = load_data(xf, yf)
    all_X.append(X)
    all_y.append(y)

# Concatenate across sessions
X_all = np.concatenate(all_X, axis=0)       # (total_windows, 24, 7)
y_all = np.concatenate(all_y, axis=0)       # (total_windows,)

print(X_all.shape)
print(y_all.shape)

(53836, 24, 7)
(53836,)


In [7]:
print(f"Total number of sessions: {len(x_files)}")
print(f"Final X shape: {X_all.shape}")
print(f"Final y shape: {y_all.shape}")
print(f"Unique labels in y: {np.unique(y_all)}")
print(f"Label distribution: {np.bincount(y_all.astype(int) + 1)}")  # +1 to handle -1 labels

Total number of sessions: 44
Final X shape: (53836, 24, 7)
Final y shape: (53836,)
Unique labels in y: [-1.  0.  1.]
Label distribution: [17954 17943 17939]


In [8]:
# Train/Val/Test split (70%/15%/15%) with stratification
X_temp, X_test, y_temp, y_test = train_test_split(
    X_all, y_all, test_size=0.15, stratify=y_all, random_state=42
)
# Now split the 85% into 70/15 overall:
val_ratio = 0.15 / 0.85
X_train, X_val, y_train, y_val = train_test_split(
    X_temp, y_temp, test_size=val_ratio, stratify=y_temp, random_state=42
)

print(f"Train / Val / Test shapes:\n"
      f"  X_train: {X_train.shape}, y_train: {y_train.shape}\n"
      f"  X_val:   {X_val.shape},   y_val:   {y_val.shape}\n"
      f"  X_test:  {X_test.shape},  y_test:  {y_test.shape}")

Train / Val / Test shapes:
  X_train: (37684, 24, 7), y_train: (37684,)
  X_val:   (8076, 24, 7),   y_val:   (8076,)
  X_test:  (8076, 24, 7),  y_test:  (8076,)


In [9]:
# Z-score normalization (fit on train only)
scaler = StandardScaler()
# flatten time & channels: (N, T, C) → (N*T, C)
train_flat = X_train.reshape(-1, X_train.shape[-1])
scaler.fit(train_flat)

def zscore_normalize(X):
    flat = X.reshape(-1, X.shape[-1])
    scaled = scaler.transform(flat)
    return scaled.reshape(X.shape)

X_train = zscore_normalize(X_train)
X_val = zscore_normalize(X_val)  # Fixed missing parenthesis
X_test = zscore_normalize(X_test)

print("Done! Your data is now ready for modeling.")

Done! Your data is now ready for modeling.


# CNN

In [10]:
def get_device():
    if torch.backends.mps.is_available():
        return torch.device("mps")
    elif torch.cuda.is_available():
        return torch.device("cuda")
    else:
        return torch.device("cpu")

device = get_device()
print(f"Using device: {device}")

Using device: mps


In [11]:
y_val

array([ 0.,  1.,  1., ..., -1., -1.,  1.])

In [12]:
# X_*: (N_*, 24, 7), y_*: in {-1,0,1}. Remap to {0,1,2}:
def remap(y): return np.where(y==-1, 0, np.where(y==0, 1, 2))

Xtr = torch.tensor(X_train, dtype=torch.float32)
ytr = torch.tensor(remap(y_train), dtype=torch.long)
Xva = torch.tensor(X_val,   dtype=torch.float32)
yva = torch.tensor(remap(y_val),   dtype=torch.long)

In [13]:
# A simple function to make a loader
def make_loader(X, y, bs):
    ds = TensorDataset(X, y)
    return DataLoader(ds, batch_size=bs, shuffle=True)

# CNN

In [14]:
class CNN1D(nn.Module):
    def __init__(self, in_channels, num_classes, num_filters, kernel_size, dropout):
        super().__init__()
        self.conv1 = nn.Conv1d(in_channels, num_filters, kernel_size, padding=kernel_size//2)
        self.conv2 = nn.Conv1d(num_filters, num_filters*2, kernel_size, padding=kernel_size//2)
        self.pool  = nn.MaxPool1d(2)
        self.relu  = nn.ReLU()
        # after two pools: 24→12→6
        flat_dim = (num_filters*2) * 6
        self.fc1   = nn.Linear(flat_dim, 64)
        self.drop  = nn.Dropout(dropout)
        self.fc2   = nn.Linear(64, num_classes)

    def forward(self, x):
        # x: (batch, time, channels) → (batch, channels, time)
        x = x.permute(0, 2, 1)
        x = self.pool(self.relu(self.conv1(x)))
        x = self.pool(self.relu(self.conv2(x)))
        x = x.view(x.size(0), -1)
        x = self.drop(self.relu(self.fc1(x)))
        return self.fc2(x)

def train_one_epoch(model, loader, criterion, optimizer, device):
    model.train()
    total_loss = 0
    correct = 0
    for xb, yb in loader:
        xb, yb = xb.to(device), yb.to(device)
        optimizer.zero_grad()
        out = model(xb)
        loss = criterion(out, yb)
        loss.backward()
        optimizer.step()
        total_loss += loss.item() * xb.size(0)
        correct += (out.argmax(1) == yb).sum().item()
    return total_loss/len(loader.dataset), correct/len(loader.dataset)

def eval_one_epoch(model, loader, criterion, device):
    model.eval()
    total_loss = 0
    correct = 0
    with torch.no_grad():
        for xb, yb in loader:
            xb, yb = xb.to(device), yb.to(device)
            out = model(xb)
            loss = criterion(out, yb)
            total_loss += loss.item() * xb.size(0)
            correct += (out.argmax(1) == yb).sum().item()
    return total_loss/len(loader.dataset), correct/len(loader.dataset)


In [15]:
def setup_directories():
    Path('models').mkdir(exist_ok=True)
    Path('results').mkdir(exist_ok=True)

In [16]:
param_grid = {
    'num_filters': [16, 32],
    'kernel_size': [3, 5],
    'dropout':     [0.3, 0.5],
    'lr':          [1e-3, 5e-4, 1e-4],
    'batch_size':  [64, 128, 256]
}

In [17]:
# Setup directories
setup_directories()

results = []
best_val_acc = 0
best_config = None

for nf in param_grid['num_filters']:
  for ks in param_grid['kernel_size']:
    for dr in param_grid['dropout']:
      for lr in param_grid['lr']:
        for bs in param_grid['batch_size']:
          print(f"\nTraining with: nf={nf}, ks={ks}, dr={dr}, lr={lr:.0e}, bs={bs}")
          
          # Prepare data loaders
          train_loader = make_loader(Xtr, ytr, bs)
          val_loader   = make_loader(Xva, yva, bs)

          # Instantiate model, loss, optimizer
          model = CNN1D(in_channels=7, num_classes=3,
                        num_filters=nf, kernel_size=ks,
                        dropout=dr).to(device)
          criterion = nn.CrossEntropyLoss()
          optimizer = optim.Adam(model.parameters(), lr=lr)

          best_val_acc = 0.0
          epochs_no_improve = 0
          history = {
              'train_loss': [], 'train_acc': [],
              'val_loss': [], 'val_acc': []
          }

          # Train up to 50 epochs with simple early stopping
          for epoch in range(50):
            train_loss, train_acc = train_one_epoch(model, train_loader, criterion, optimizer, device)
            val_loss, val_acc = eval_one_epoch(model, val_loader, criterion, device)
            
            # Update history
            history['train_loss'].append(train_loss)
            history['train_acc'].append(train_acc)
            history['val_loss'].append(val_loss)
            history['val_acc'].append(val_acc)
            
            print(f'Epoch {epoch+1}/50:')
            print(f'Train Loss: {train_loss:.4f}, Train Acc: {train_acc:.4f}')
            print(f'Val Loss: {val_loss:.4f}, Val Acc: {val_acc:.4f}')

            if val_acc > best_val_acc:
              best_val_acc = val_acc
              epochs_no_improve = 0
              best_weights = model.state_dict()
              
              # Save best model weights
              torch.save(model.state_dict(), f'models/model_nf{nf}_ks{ks}_dr{dr}_lr{lr:.0e}_bs{bs}.pth')
              
              # Save training history
              pd.DataFrame(history).to_csv(f'results/history_nf{nf}_ks{ks}_dr{dr}_lr{lr:.0e}_bs{bs}.csv', index=False)
            else:
              epochs_no_improve += 1
            if epochs_no_improve >= 3:
              print(f"Early stopping triggered after {epoch+1} epochs")
              break

          # restore best weights
          model.load_state_dict(best_weights)

          results.append({
            'num_filters': nf,
            'kernel_size': ks,
            'dropout':     dr,
            'lr':          lr,
            'batch_size':  bs,
            'best_val_acc': best_val_acc,
            'epochs_trained': epoch + 1
          })
          print(f"nf={nf}, ks={ks}, dr={dr}, lr={lr:.0e}, bs={bs} → {best_val_acc:.3f} (stopped at epoch {epoch+1})")

# Save all results
results_df = pd.DataFrame(results)
results_df.to_csv('results/hyperparameter_search_results.csv', index=False)

# Summarize top 10 configs
print("\nTop 10 configurations:")
print(results_df.sort_values('best_val_acc', ascending=False).head(10).to_string(index=False))


Training with: nf=16, ks=3, dr=0.3, lr=1e-03, bs=64
Epoch 1/50:
Train Loss: 1.0951, Train Acc: 0.3582
Val Loss: 1.0882, Val Acc: 0.3772
Epoch 2/50:
Train Loss: 1.0761, Train Acc: 0.4006
Val Loss: 1.0751, Val Acc: 0.4019
Epoch 3/50:
Train Loss: 1.0587, Train Acc: 0.4237
Val Loss: 1.0645, Val Acc: 0.4143
Epoch 4/50:
Train Loss: 1.0382, Train Acc: 0.4460
Val Loss: 1.0440, Val Acc: 0.4425
Epoch 5/50:
Train Loss: 1.0191, Train Acc: 0.4645
Val Loss: 1.0376, Val Acc: 0.4373
Epoch 6/50:
Train Loss: 1.0015, Train Acc: 0.4794
Val Loss: 1.0257, Val Acc: 0.4565
Epoch 7/50:
Train Loss: 0.9861, Train Acc: 0.4958
Val Loss: 1.0220, Val Acc: 0.4646
Epoch 8/50:
Train Loss: 0.9742, Train Acc: 0.5070
Val Loss: 1.0220, Val Acc: 0.4641
Epoch 9/50:
Train Loss: 0.9637, Train Acc: 0.5153
Val Loss: 1.0080, Val Acc: 0.4730
Epoch 10/50:
Train Loss: 0.9512, Train Acc: 0.5238
Val Loss: 1.0112, Val Acc: 0.4776
Epoch 11/50:
Train Loss: 0.9415, Train Acc: 0.5330
Val Loss: 1.0047, Val Acc: 0.4870
Epoch 12/50:
Train Lo

# Deeper CNN

In [23]:
class ResidualBlock(nn.Module):
    def __init__(self, in_channels, out_channels, kernel_size):
        super().__init__()
        self.conv1 = nn.Conv1d(in_channels, out_channels, kernel_size, padding=kernel_size//2)
        self.bn1 = nn.BatchNorm1d(out_channels)
        self.conv2 = nn.Conv1d(out_channels, out_channels, kernel_size, padding=kernel_size//2)
        self.bn2 = nn.BatchNorm1d(out_channels)
        
        # Skip connection
        self.skip = nn.Sequential()
        if in_channels != out_channels:
            self.skip = nn.Sequential(
                nn.Conv1d(in_channels, out_channels, 1),
                nn.BatchNorm1d(out_channels)
            )
        
    def forward(self, x):
        residual = self.skip(x)
        out = self.conv1(x)
        out = self.bn1(out)
        out = F.relu(out)
        out = self.conv2(out)
        out = self.bn2(out)
        out += residual
        out = F.relu(out)
        return out

class DeepCNN1D(nn.Module):
    def __init__(self, in_channels, num_classes, num_filters, kernel_size, dropout):
        super().__init__()
        
        # Initial convolution
        self.conv1 = nn.Conv1d(in_channels, num_filters, kernel_size, padding=kernel_size//2)
        self.bn1 = nn.BatchNorm1d(num_filters)
        
        # Residual blocks
        self.res1 = ResidualBlock(num_filters, num_filters*2, kernel_size)
        self.res2 = ResidualBlock(num_filters*2, num_filters*4, kernel_size)
        
        # Pooling
        self.pool = nn.MaxPool1d(2)
        
        # After two pools: 24→12→6
        flat_dim = (num_filters*4) * 6
        
        # Fully connected layers
        self.fc1 = nn.Linear(flat_dim, 128)
        self.bn_fc1 = nn.BatchNorm1d(128)
        self.fc2 = nn.Linear(128, 64)
        self.bn_fc2 = nn.BatchNorm1d(64)
        self.drop = nn.Dropout(dropout)
        self.fc3 = nn.Linear(64, num_classes)

    def forward(self, x):
        # x: (batch, time, channels) → (batch, channels, time)
        x = x.permute(0, 2, 1)
        
        # Initial conv
        x = self.conv1(x)
        x = self.bn1(x)
        x = F.relu(x)
        
        # Residual blocks with pooling
        x = self.pool(x)
        x = self.res1(x)
        x = self.pool(x)
        x = self.res2(x)
        
        # Flatten
        x = x.view(x.size(0), -1)
        
        # Fully connected layers
        x = self.fc1(x)
        x = self.bn_fc1(x)
        x = F.relu(x)
        x = self.drop(x)
        
        x = self.fc2(x)
        x = self.bn_fc2(x)
        x = F.relu(x)
        x = self.drop(x)
        
        return self.fc3(x)

In [24]:
def setup_directories():
    Path('deeper_models').mkdir(exist_ok=True)
    Path('deeper_results').mkdir(exist_ok=True)

In [26]:
param_grid = {
    'num_filters': [16, 32],
    'kernel_size': [3, 5],
    'dropout':     [0.3, 0.5],
    'lr':          [1e-3, 5e-4, 1e-4],
    'batch_size':  [64, 128, 256]
}

# Setup directories
setup_directories()

results = []
best_val_acc = 0
best_config = None

for nf in param_grid['num_filters']:
  for ks in param_grid['kernel_size']:
    for dr in param_grid['dropout']:
      for lr in param_grid['lr']:
        for bs in param_grid['batch_size']:
          print(f"\nTraining with: nf={nf}, ks={ks}, dr={dr}, lr={lr:.0e}, bs={bs}")
          
          # Prepare data loaders
          train_loader = make_loader(Xtr, ytr, bs)
          val_loader   = make_loader(Xva, yva, bs)

          # Instantiate model, loss, optimizer
          model = DeepCNN1D(in_channels=7, num_classes=3,
                        num_filters=nf, kernel_size=ks,
                        dropout=dr).to(device)
          criterion = nn.CrossEntropyLoss()
          optimizer = optim.Adam(model.parameters(), lr=lr)

          best_val_acc = 0.0
          epochs_no_improve = 0
          history = {
              'train_loss': [], 'train_acc': [],
              'val_loss': [], 'val_acc': []
          }

          # Train up to 20 epochs with simple early stopping
          for epoch in range(50):
            train_loss, train_acc = train_one_epoch(model, train_loader, criterion, optimizer, device)
            val_loss, val_acc = eval_one_epoch(model, val_loader, criterion, device)
            
            # Update history
            history['train_loss'].append(train_loss)
            history['train_acc'].append(train_acc)
            history['val_loss'].append(val_loss)
            history['val_acc'].append(val_acc)
            
            print(f'Epoch {epoch+1}/20:')
            print(f'Train Loss: {train_loss:.4f}, Train Acc: {train_acc:.4f}')
            print(f'Val Loss: {val_loss:.4f}, Val Acc: {val_acc:.4f}')

            if val_acc > best_val_acc:
              best_val_acc = val_acc
              epochs_no_improve = 0
              best_weights = model.state_dict()
              
              # Save best model weights
              torch.save(model.state_dict(), f'deeper_models/model_nf{nf}_ks{ks}_dr{dr}_lr{lr:.0e}_bs{bs}.pth')
              
              # Save training history
              pd.DataFrame(history).to_csv(f'deeper_results/history_nf{nf}_ks{ks}_dr{dr}_lr{lr:.0e}_bs{bs}.csv', index=False)
            else:
              epochs_no_improve += 1
            if epochs_no_improve >= 3:
              print(f"Early stopping triggered after {epoch+1} epochs")
              break

          # restore best weights
          model.load_state_dict(best_weights)

          results.append({
            'num_filters': nf,
            'kernel_size': ks,
            'dropout':     dr,
            'lr':          lr,
            'batch_size':  bs,
            'best_val_acc': best_val_acc,
            'epochs_trained': epoch + 1
          })
          print(f"nf={nf}, ks={ks}, dr={dr}, lr={lr:.0e}, bs={bs} → {best_val_acc:.3f} (stopped at epoch {epoch+1})")

# Save all results
results_df = pd.DataFrame(results)
results_df.to_csv('deeper_results/hyperparameter_search_results.csv', index=False)

# Summarize top 10 configs
print("\nTop 10 configurations:")
print(results_df.sort_values('best_val_acc', ascending=False).head(10).to_string(index=False))


Training with: nf=16, ks=3, dr=0.3, lr=1e-03, bs=64
Epoch 1/20:
Train Loss: 1.1032, Train Acc: 0.3678
Val Loss: 1.0762, Val Acc: 0.4090
Epoch 2/20:
Train Loss: 1.0609, Train Acc: 0.4226
Val Loss: 1.0507, Val Acc: 0.4319
Epoch 3/20:
Train Loss: 1.0254, Train Acc: 0.4608
Val Loss: 1.0148, Val Acc: 0.4718
Epoch 4/20:
Train Loss: 0.9907, Train Acc: 0.4911
Val Loss: 0.9970, Val Acc: 0.4778
Epoch 5/20:
Train Loss: 0.9602, Train Acc: 0.5177
Val Loss: 0.9951, Val Acc: 0.4916
Epoch 6/20:
Train Loss: 0.9304, Train Acc: 0.5414
Val Loss: 0.9705, Val Acc: 0.5113
Epoch 7/20:
Train Loss: 0.9035, Train Acc: 0.5615
Val Loss: 0.9781, Val Acc: 0.5165
Epoch 8/20:
Train Loss: 0.8803, Train Acc: 0.5797
Val Loss: 0.9399, Val Acc: 0.5337
Epoch 9/20:
Train Loss: 0.8533, Train Acc: 0.6023
Val Loss: 0.9413, Val Acc: 0.5430
Epoch 10/20:
Train Loss: 0.8297, Train Acc: 0.6152
Val Loss: 0.9380, Val Acc: 0.5448
Epoch 11/20:
Train Loss: 0.8038, Train Acc: 0.6308
Val Loss: 0.9631, Val Acc: 0.5417
Epoch 12/20:
Train Lo

# Symmetric TCN

In [30]:
# Temporal Convolutional Network (TCN)
class TemporalBlock(nn.Module):
    def __init__(self, in_channels, out_channels, kernel_size, dilation):
        super().__init__()
        # Calculate padding to maintain sequence length
        # padding = (kernel_size - 1) * dilation
        padding = (kernel_size - 1) * dilation // 2
        
        self.conv1 = nn.Conv1d(in_channels, out_channels, kernel_size, 
                              padding=padding, dilation=dilation)
        self.bn1 = nn.BatchNorm1d(out_channels)
        self.conv2 = nn.Conv1d(out_channels, out_channels, kernel_size,
                              padding=padding, dilation=dilation)
        self.bn2 = nn.BatchNorm1d(out_channels)
        
        # Skip connection
        self.skip = nn.Sequential()
        if in_channels != out_channels:
            self.skip = nn.Sequential(
                nn.Conv1d(in_channels, out_channels, 1),
                nn.BatchNorm1d(out_channels)
            )
        
    def forward(self, x):
        residual = self.skip(x)
        out = self.conv1(x)
        out = self.bn1(out)
        out = F.relu(out)
        out = self.conv2(out)
        out = self.bn2(out)
        out += residual
        out = F.relu(out)
        return out

class TCN(nn.Module):
    def __init__(self, in_channels, num_classes, num_filters, kernel_size, dropout):
        super().__init__()
        
        # Initial convolution
        self.conv1 = nn.Conv1d(in_channels, num_filters, kernel_size, padding=kernel_size//2)
        self.bn1 = nn.BatchNorm1d(num_filters)
        
        # Temporal blocks with increasing dilation
        self.tcn1 = TemporalBlock(num_filters, num_filters*2, kernel_size, dilation=1)
        self.tcn2 = TemporalBlock(num_filters*2, num_filters*4, kernel_size, dilation=2)
        self.tcn3 = TemporalBlock(num_filters*4, num_filters*8, kernel_size, dilation=4)
        
        # Global average pooling
        self.global_pool = nn.AdaptiveAvgPool1d(1)
        
        # Fully connected layers
        self.fc1 = nn.Linear(num_filters*8, 128)
        self.bn_fc1 = nn.BatchNorm1d(128)
        self.fc2 = nn.Linear(128, 64)
        self.bn_fc2 = nn.BatchNorm1d(64)
        self.drop = nn.Dropout(dropout)
        self.fc3 = nn.Linear(64, num_classes)

    def forward(self, x):
        # x: (batch, time, channels) → (batch, channels, time)
        x = x.permute(0, 2, 1)
        
        # Initial conv
        x = self.conv1(x)
        x = self.bn1(x)
        x = F.relu(x)
        
        # Temporal blocks
        x = self.tcn1(x)
        x = self.tcn2(x)
        x = self.tcn3(x)
        
        # Global pooling
        x = self.global_pool(x)
        x = x.view(x.size(0), -1)
        
        # Fully connected layers
        x = self.fc1(x)
        x = self.bn_fc1(x)
        x = F.relu(x)
        x = self.drop(x)
        
        x = self.fc2(x)
        x = self.bn_fc2(x)
        x = F.relu(x)
        x = self.drop(x)
        
        return self.fc3(x)

# Setup directories for TCN
def setup_tcn_directories():
    Path('tcn_models').mkdir(exist_ok=True)
    Path('tcn_results').mkdir(exist_ok=True)

# %%
# TCN hyperparameter search
param_grid = {
    'num_filters': [16, 32],
    'kernel_size': [3, 5],
    'dropout':     [0.3, 0.5],
    'lr':          [1e-3, 5e-4, 1e-4],
    'batch_size':  [64, 128, 256]
}

# Setup directories
setup_tcn_directories()

results = []
best_val_acc = 0
best_config = None

for nf in param_grid['num_filters']:
  for ks in param_grid['kernel_size']:
    for dr in param_grid['dropout']:
      for lr in param_grid['lr']:
        for bs in param_grid['batch_size']:
          print(f"\nTraining TCN with: nf={nf}, ks={ks}, dr={dr}, lr={lr:.0e}, bs={bs}")
          
          # Prepare data loaders
          train_loader = make_loader(Xtr, ytr, bs)
          val_loader   = make_loader(Xva, yva, bs)

          # Instantiate model, loss, optimizer
          model = TCN(in_channels=7, num_classes=3,
                     num_filters=nf, kernel_size=ks,
                     dropout=dr).to(device)
          criterion = nn.CrossEntropyLoss()
          optimizer = optim.Adam(model.parameters(), lr=lr)

          best_val_acc = 0.0
          epochs_no_improve = 0
          history = {
              'train_loss': [], 'train_acc': [],
              'val_loss': [], 'val_acc': []
          }

          # Train up to 50 epochs with early stopping
          for epoch in range(50):
            train_loss, train_acc = train_one_epoch(model, train_loader, criterion, optimizer, device)
            val_loss, val_acc = eval_one_epoch(model, val_loader, criterion, device)
            
            # Update history
            history['train_loss'].append(train_loss)
            history['train_acc'].append(train_acc)
            history['val_loss'].append(val_loss)
            history['val_acc'].append(val_acc)
            
            print(f'Epoch {epoch+1}/50:')
            print(f'Train Loss: {train_loss:.4f}, Train Acc: {train_acc:.4f}')
            print(f'Val Loss: {val_loss:.4f}, Val Acc: {val_acc:.4f}')

            if val_acc > best_val_acc:
              best_val_acc = val_acc
              epochs_no_improve = 0
              best_weights = model.state_dict()
              
              # Save best model weights
              torch.save(model.state_dict(), f'tcn_models/model_nf{nf}_ks{ks}_dr{dr}_lr{lr:.0e}_bs{bs}.pth')
              
              # Save training history
              pd.DataFrame(history).to_csv(f'tcn_results/history_nf{nf}_ks{ks}_dr{dr}_lr{lr:.0e}_bs{bs}.csv', index=False)
            else:
              epochs_no_improve += 1
            if epochs_no_improve >= 3:
              print(f"Early stopping triggered after {epoch+1} epochs")
              break

          # restore best weights
          model.load_state_dict(best_weights)

          results.append({
            'num_filters': nf,
            'kernel_size': ks,
            'dropout':     dr,
            'lr':          lr,
            'batch_size':  bs,
            'best_val_acc': best_val_acc,
            'epochs_trained': epoch + 1
          })
          print(f"nf={nf}, ks={ks}, dr={dr}, lr={lr:.0e}, bs={bs} → {best_val_acc:.3f} (stopped at epoch {epoch+1})")

# Save all results
results_df = pd.DataFrame(results)
results_df.to_csv('tcn_results/hyperparameter_search_results.csv', index=False)

# Summarize top 10 configs
print("\nTop 10 TCN configurations:")
print(results_df.sort_values('best_val_acc', ascending=False).head(10).to_string(index=False))


Training TCN with: nf=16, ks=3, dr=0.3, lr=1e-03, bs=64
Epoch 1/50:
Train Loss: 1.0801, Train Acc: 0.4025
Val Loss: 1.0442, Val Acc: 0.4401
Epoch 2/50:
Train Loss: 1.0165, Train Acc: 0.4657
Val Loss: 0.9772, Val Acc: 0.5007
Epoch 3/50:
Train Loss: 0.9643, Train Acc: 0.5141
Val Loss: 0.9519, Val Acc: 0.5162
Epoch 4/50:
Train Loss: 0.9116, Train Acc: 0.5544
Val Loss: 0.8968, Val Acc: 0.5623
Epoch 5/50:
Train Loss: 0.8638, Train Acc: 0.5858
Val Loss: 0.8682, Val Acc: 0.5765
Epoch 6/50:
Train Loss: 0.8220, Train Acc: 0.6146
Val Loss: 0.8577, Val Acc: 0.5826
Epoch 7/50:
Train Loss: 0.7773, Train Acc: 0.6409
Val Loss: 0.8350, Val Acc: 0.6092
Epoch 8/50:
Train Loss: 0.7403, Train Acc: 0.6628
Val Loss: 0.8517, Val Acc: 0.6007
Epoch 9/50:
Train Loss: 0.7044, Train Acc: 0.6850
Val Loss: 0.8522, Val Acc: 0.6091
Epoch 10/50:
Train Loss: 0.6646, Train Acc: 0.7083
Val Loss: 0.8015, Val Acc: 0.6374
Epoch 11/50:
Train Loss: 0.6327, Train Acc: 0.7269
Val Loss: 0.8635, Val Acc: 0.6216
Epoch 12/50:
Trai

## Deeper TCN

In [35]:
class ImprovedTemporalBlock(nn.Module):
    def __init__(self, in_channels, out_channels, kernel_size, dilation):
        super().__init__()
        # Symmetric padding for better temporal modeling
        padding = (kernel_size - 1) * dilation // 2
        
        self.conv1 = nn.Conv1d(in_channels, out_channels, kernel_size, 
                              padding=padding, dilation=dilation)
        self.bn1 = nn.BatchNorm1d(out_channels)
        self.conv2 = nn.Conv1d(out_channels, out_channels, kernel_size,
                              padding=padding, dilation=dilation)
        self.bn2 = nn.BatchNorm1d(out_channels)
        
        # Skip connection with 1x1 convolution for channel matching
        self.skip = nn.Sequential()
        if in_channels != out_channels:
            self.skip = nn.Sequential(
                nn.Conv1d(in_channels, out_channels, 1),
                nn.BatchNorm1d(out_channels)
            )
        
    def forward(self, x):
        residual = self.skip(x)
        out = self.conv1(x)
        out = self.bn1(out)
        out = F.relu(out)
        out = self.conv2(out)
        out = self.bn2(out)
        out += residual
        out = F.relu(out)
        return out

class ImprovedTCN(nn.Module):
    def __init__(self, in_channels, num_classes, num_filters, kernel_size, dropout):
        super().__init__()
        
        # Initial convolution with more filters
        self.conv1 = nn.Conv1d(in_channels, num_filters, kernel_size, padding=kernel_size//2)
        self.bn1 = nn.BatchNorm1d(num_filters)
        
        # More temporal blocks with different dilation rates
        self.tcn1 = ImprovedTemporalBlock(num_filters, num_filters*2, kernel_size, dilation=1)
        self.tcn2 = ImprovedTemporalBlock(num_filters*2, num_filters*4, kernel_size, dilation=2)
        self.tcn3 = ImprovedTemporalBlock(num_filters*4, num_filters*8, kernel_size, dilation=4)
        self.tcn4 = ImprovedTemporalBlock(num_filters*8, num_filters*16, kernel_size, dilation=8)
        self.tcn5 = ImprovedTemporalBlock(num_filters*16, num_filters*32, kernel_size, dilation=16)  # Added fifth block
        
        # Global average pooling
        self.global_pool = nn.AdaptiveAvgPool1d(1)
        
        # Deeper fully connected layers
        self.fc1 = nn.Linear(num_filters*32, 256)  # Updated input size for fc1
        self.bn_fc1 = nn.BatchNorm1d(256)
        self.fc2 = nn.Linear(256, 128)
        self.bn_fc2 = nn.BatchNorm1d(128)
        self.fc3 = nn.Linear(128, 64)
        self.bn_fc3 = nn.BatchNorm1d(64)
        self.drop = nn.Dropout(dropout)
        self.fc4 = nn.Linear(64, num_classes)

    def forward(self, x):
        # x: (batch, time, channels) → (batch, channels, time)
        x = x.permute(0, 2, 1)
        
        # Initial conv
        x = self.conv1(x)
        x = self.bn1(x)
        x = F.relu(x)
        
        # Temporal blocks
        x = self.tcn1(x)
        x = self.tcn2(x)
        x = self.tcn3(x)
        x = self.tcn4(x)
        x = self.tcn5(x)  # Added fifth block
        
        # Global pooling
        x = self.global_pool(x)
        x = x.view(x.size(0), -1)
        
        # Fully connected layers
        x = self.fc1(x)
        x = self.bn_fc1(x)
        x = F.relu(x)
        x = self.drop(x)
        
        x = self.fc2(x)
        x = self.bn_fc2(x)
        x = F.relu(x)
        x = self.drop(x)
        
        x = self.fc3(x)
        x = self.bn_fc3(x)
        x = F.relu(x)
        x = self.drop(x)
        
        return self.fc4(x)

def train_one_epoch(model, loader, criterion, optimizer, device):
    model.train()
    total_loss = 0
    correct = 0
    for xb, yb in loader:
        xb, yb = xb.to(device), yb.to(device)
        optimizer.zero_grad()
        out = model(xb)
        loss = criterion(out, yb)
        loss.backward()
        optimizer.step()
        total_loss += loss.item() * xb.size(0)
        correct += (out.argmax(1) == yb).sum().item()
    return total_loss/len(loader.dataset), correct/len(loader.dataset)

def eval_one_epoch(model, loader, criterion, device):
    model.eval()
    total_loss = 0
    correct = 0
    with torch.no_grad():
        for xb, yb in loader:
            xb, yb = xb.to(device), yb.to(device)
            out = model(xb)
            loss = criterion(out, yb)
            total_loss += loss.item() * xb.size(0)
            correct += (out.argmax(1) == yb).sum().item()
    return total_loss/len(loader.dataset), correct/len(loader.dataset)

def setup_directories():
    Path('improved_tcn_models').mkdir(exist_ok=True)
    Path('improved_tcn_results').mkdir(exist_ok=True)

# Fine-tuned hyperparameter grid
param_grid = {
    'num_filters': [32, 64],  # Explore slightly larger models
    'kernel_size': [3, 7],     # Modified kernel sizes
    'dropout': [0.0, 0.3],   # More granular dropout
    'lr': [5e-4, 1e-3, 2e-3],     # Explore around 1e-3
    'batch_size': [32, 64, 128]   # Try smaller and larger batches
}

# Setup directories
setup_directories()

results = []
best_val_acc = 0
best_config = None

for nf in param_grid['num_filters']:
    for ks in param_grid['kernel_size']:
        for dr in param_grid['dropout']:
            for lr in param_grid['lr']:
                for bs in param_grid['batch_size']:
                    print(f"\nTraining with: nf={nf}, ks={ks}, dr={dr}, lr={lr:.0e}, bs={bs}")
                    
                    # Prepare data loaders
                    train_loader = make_loader(Xtr, ytr, bs)
                    val_loader = make_loader(Xva, yva, bs)

                    # Instantiate model, loss, optimizer
                    model = ImprovedTCN(
                        in_channels=7, 
                        num_classes=3,
                        num_filters=nf, 
                        kernel_size=ks,
                        dropout=dr
                    ).to(device)
                    
                    criterion = nn.CrossEntropyLoss()
                    optimizer = optim.Adam(model.parameters(), lr=lr)
                    scheduler = optim.lr_scheduler.ReduceLROnPlateau(
                        optimizer, mode='max', factor=0.5, patience=2, verbose=True
                    )

                    best_val_acc = 0.0
                    epochs_no_improve = 0
                    history = {
                        'train_loss': [], 'train_acc': [],
                        'val_loss': [], 'val_acc': []
                    }

                    # Train up to 50 epochs with early stopping
                    for epoch in range(50):
                        train_loss, train_acc = train_one_epoch(model, train_loader, criterion, optimizer, device)
                        val_loss, val_acc = eval_one_epoch(model, val_loader, criterion, device)
                        
                        # Update learning rate
                        scheduler.step(val_acc)
                        
                        # Update history
                        history['train_loss'].append(train_loss)
                        history['train_acc'].append(train_acc)
                        history['val_loss'].append(val_loss)
                        history['val_acc'].append(val_acc)
                        
                        print(f'Epoch {epoch+1}/50:')
                        print(f'Train Loss: {train_loss:.4f}, Train Acc: {train_acc:.4f}')
                        print(f'Val Loss: {val_loss:.4f}, Val Acc: {val_acc:.4f}')

                        if val_acc > best_val_acc:
                            best_val_acc = val_acc
                            epochs_no_improve = 0
                            best_weights = model.state_dict()
                            
                            # Save best model weights
                            torch.save(model.state_dict(), 
                                     f'improved_tcn_models/model_nf{nf}_ks{ks}_dr{dr}_lr{lr:.0e}_bs{bs}.pth')
                            
                            # Save training history
                            pd.DataFrame(history).to_csv(
                                f'improved_tcn_results/history_nf{nf}_ks{ks}_dr{dr}_lr{lr:.0e}_bs{bs}.csv', 
                                index=False
                            )
                        else:
                            epochs_no_improve += 1
                        if epochs_no_improve >= 3:
                            print(f"Early stopping triggered after {epoch+1} epochs")
                            break

                    # restore best weights
                    model.load_state_dict(best_weights)

                    results.append({
                        'num_filters': nf,
                        'kernel_size': ks,
                        'dropout': dr,
                        'lr': lr,
                        'batch_size': bs,
                        'best_val_acc': best_val_acc,
                        'epochs_trained': epoch + 1
                    })
                    print(f"nf={nf}, ks={ks}, dr={dr}, lr={lr:.0e}, bs={bs} → {best_val_acc:.3f} (stopped at epoch {epoch+1})")

# Save all results
results_df = pd.DataFrame(results)
results_df.to_csv('improved_tcn_results/hyperparameter_search_results.csv', index=False)

# Summarize top 10 configs
print("\nTop 10 configurations:")
print(results_df.sort_values('best_val_acc', ascending=False).head(10).to_string(index=False))


Training with: nf=32, ks=3, dr=0.0, lr=5e-04, bs=32
Epoch 1/50:
Train Loss: 1.0775, Train Acc: 0.3948
Val Loss: 1.0732, Val Acc: 0.3978
Epoch 2/50:
Train Loss: 1.0215, Train Acc: 0.4609
Val Loss: 0.9698, Val Acc: 0.5037
Epoch 3/50:
Train Loss: 0.9582, Train Acc: 0.5163
Val Loss: 0.9289, Val Acc: 0.5359
Epoch 4/50:
Train Loss: 0.8961, Train Acc: 0.5627
Val Loss: 0.8708, Val Acc: 0.5837
Epoch 5/50:
Train Loss: 0.8364, Train Acc: 0.6011
Val Loss: 0.8424, Val Acc: 0.5958
Epoch 6/50:
Train Loss: 0.7776, Train Acc: 0.6387
Val Loss: 0.8147, Val Acc: 0.6209
Epoch 7/50:
Train Loss: 0.7175, Train Acc: 0.6743
Val Loss: 0.7980, Val Acc: 0.6348
Epoch 8/50:
Train Loss: 0.6633, Train Acc: 0.7059
Val Loss: 0.7603, Val Acc: 0.6534
Epoch 9/50:
Train Loss: 0.6048, Train Acc: 0.7388
Val Loss: 0.7518, Val Acc: 0.6682
Epoch 10/50:
Train Loss: 0.5532, Train Acc: 0.7660
Val Loss: 0.7776, Val Acc: 0.6641
Epoch 11/50:
Train Loss: 0.4967, Train Acc: 0.7921
Val Loss: 0.7513, Val Acc: 0.6783
Epoch 12/50:
Train Lo

KeyboardInterrupt: 

## Deeper TCN with Spacial dropout

In [None]:
class ImprovedSpatialTemporalBlock(nn.Module):
    def __init__(self, in_channels, out_channels, kernel_size, dilation, dropout):
        super().__init__()
        # Symmetric padding for better temporal modeling
        padding = (kernel_size - 1) * dilation // 2
        
        self.conv1 = nn.Conv1d(in_channels, out_channels, kernel_size, 
                              padding=padding, dilation=dilation)
        self.bn1 = nn.BatchNorm1d(out_channels)
        self.drop1 = nn.Dropout2d(dropout)  # Changed to Dropout2d
        self.conv2 = nn.Conv1d(out_channels, out_channels, kernel_size,
                              padding=padding, dilation=dilation)
        self.bn2 = nn.BatchNorm1d(out_channels)
        self.drop2 = nn.Dropout2d(dropout)  # Added second Dropout2d
        
        # Skip connection with 1x1 convolution for channel matching
        self.skip = nn.Sequential()
        if in_channels != out_channels:
            self.skip = nn.Sequential(
                nn.Conv1d(in_channels, out_channels, 1),
                nn.BatchNorm1d(out_channels)
            )
        
    def forward(self, x):
        residual = self.skip(x)
        out = self.conv1(x)
        out = self.bn1(out)
        out = F.relu(out)
        out = self.drop1(out)
        out = self.conv2(out)
        out = self.bn2(out)
        out = self.drop2(out)  # Added dropout after second conv
        out += residual
        out = F.relu(out)
        return out

class ImprovedTCN(nn.Module):
    def __init__(self, in_channels, num_classes, num_filters, kernel_size, dropout):
        super().__init__()
        
        # Initial convolution with more filters
        self.conv1 = nn.Conv1d(in_channels, num_filters, kernel_size, padding=kernel_size//2)
        self.bn1 = nn.BatchNorm1d(num_filters)
        
        # More temporal blocks with different dilation rates
        self.tcn1 = ImprovedSpatialTemporalBlock(num_filters, num_filters*2, kernel_size, dilation=1, dropout=dropout)
        self.tcn2 = ImprovedSpatialTemporalBlock(num_filters*2, num_filters*4, kernel_size, dilation=2, dropout=dropout)
        self.tcn3 = ImprovedSpatialTemporalBlock(num_filters*4, num_filters*8, kernel_size, dilation=4, dropout=dropout)
        self.tcn4 = ImprovedSpatialTemporalBlock(num_filters*8, num_filters*16, kernel_size, dilation=8, dropout=dropout)
        self.tcn5 = ImprovedSpatialTemporalBlock(num_filters*16, num_filters*32, kernel_size, dilation=16, dropout=dropout)
        
        # Global average pooling
        self.global_pool = nn.AdaptiveAvgPool1d(1)
        
        # Deeper fully connected layers
        self.fc1 = nn.Linear(num_filters*32, 256)  # Updated input size for fc1
        self.bn_fc1 = nn.BatchNorm1d(256)
        self.fc2 = nn.Linear(256, 128)
        self.bn_fc2 = nn.BatchNorm1d(128)
        self.fc3 = nn.Linear(128, 64)
        self.bn_fc3 = nn.BatchNorm1d(64)
        self.drop = nn.Dropout(dropout)
        self.fc4 = nn.Linear(64, num_classes)

    def forward(self, x):
        # x: (batch, time, channels) → (batch, channels, time)
        x = x.permute(0, 2, 1)
        
        # Initial conv
        x = self.conv1(x)
        x = self.bn1(x)
        x = F.relu(x)
        
        # Temporal blocks
        x = self.tcn1(x)
        x = self.tcn2(x)
        x = self.tcn3(x)
        x = self.tcn4(x)
        x = self.tcn5(x)  # Added fifth block
        
        # Global pooling
        x = self.global_pool(x)
        x = x.view(x.size(0), -1)
        
        # Fully connected layers
        x = self.fc1(x)
        x = self.bn_fc1(x)
        x = F.relu(x)
        x = self.drop(x)
        
        x = self.fc2(x)
        x = self.bn_fc2(x)
        x = F.relu(x)
        x = self.drop(x)
        
        x = self.fc3(x)
        x = self.bn_fc3(x)
        x = F.relu(x)
        x = self.drop(x)
        
        return self.fc4(x)

def train_one_epoch(model, loader, criterion, optimizer, device):
    model.train()
    total_loss = 0
    correct = 0
    for xb, yb in loader:
        xb, yb = xb.to(device), yb.to(device)
        optimizer.zero_grad()
        out = model(xb)
        loss = criterion(out, yb)
        loss.backward()
        optimizer.step()
        total_loss += loss.item() * xb.size(0)
        correct += (out.argmax(1) == yb).sum().item()
    return total_loss/len(loader.dataset), correct/len(loader.dataset)

def eval_one_epoch(model, loader, criterion, device):
    model.eval()
    total_loss = 0
    correct = 0
    with torch.no_grad():
        for xb, yb in loader:
            xb, yb = xb.to(device), yb.to(device)
            out = model(xb)
            loss = criterion(out, yb)
            total_loss += loss.item() * xb.size(0)
            correct += (out.argmax(1) == yb).sum().item()
    return total_loss/len(loader.dataset), correct/len(loader.dataset)

def setup_directories():
    Path('improved_spatial_tcn_models').mkdir(exist_ok=True)  # Updated directory name
    Path('improved_spatial_tcn_results').mkdir(exist_ok=True) # Updated directory name

# Fine-tuned hyperparameter grid
param_grid = {
    'num_filters': [64],  # Explore slightly larger models
    'kernel_size': [3],     # Modified kernel sizes
    'dropout': [0.1, 0.3],   # More granular dropout
    'lr': [1e-3, 2e-3],     # Explore around 1e-3
    'batch_size': [64, 128]   # Try smaller and larger batches
}

# Setup directories
setup_directories()

results = []
best_val_acc = 0
best_config = None

for nf in param_grid['num_filters']:
    for ks in param_grid['kernel_size']:
        for dr in param_grid['dropout']:
            for lr in param_grid['lr']:
                for bs in param_grid['batch_size']:
                    print(f"\nTraining with: nf={nf}, ks={ks}, dr={dr}, lr={lr:.0e}, bs={bs}")
                    
                    # Prepare data loaders
                    train_loader = make_loader(Xtr, ytr, bs)
                    val_loader = make_loader(Xva, yva, bs)

                    # Instantiate model, loss, optimizer
                    model = ImprovedTCN(
                        in_channels=7, 
                        num_classes=3,
                        num_filters=nf, 
                        kernel_size=ks,
                        dropout=dr
                    ).to(device)
                    
                    criterion = nn.CrossEntropyLoss()
                    optimizer = optim.Adam(model.parameters(), lr=lr)
                    scheduler = optim.lr_scheduler.ReduceLROnPlateau(
                        optimizer, mode='max', factor=0.5, patience=2, verbose=True
                    )

                    best_val_acc = 0.0
                    epochs_no_improve = 0
                    history = {
                        'train_loss': [], 'train_acc': [],
                        'val_loss': [], 'val_acc': []
                    }

                    # Train up to 50 epochs with early stopping
                    for epoch in range(50):
                        train_loss, train_acc = train_one_epoch(model, train_loader, criterion, optimizer, device)
                        val_loss, val_acc = eval_one_epoch(model, val_loader, criterion, device)
                        
                        # Update learning rate
                        scheduler.step(val_acc)
                        
                        # Update history
                        history['train_loss'].append(train_loss)
                        history['train_acc'].append(train_acc)
                        history['val_loss'].append(val_loss)
                        history['val_acc'].append(val_acc)
                        
                        print(f'Epoch {epoch+1}/50:')
                        print(f'Train Loss: {train_loss:.4f}, Train Acc: {train_acc:.4f}')
                        print(f'Val Loss: {val_loss:.4f}, Val Acc: {val_acc:.4f}')

                        if val_acc > best_val_acc:
                            best_val_acc = val_acc
                            epochs_no_improve = 0
                            best_weights = model.state_dict()
                            
                            # Save best model weights with updated path
                            torch.save(model.state_dict(), 
                                     f'improved_spatial_tcn_models/model_nf{nf}_ks{ks}_dr{dr}_lr{lr:.0e}_bs{bs}.pth')
                            
                            # Save training history with updated path
                            pd.DataFrame(history).to_csv(
                                f'improved_spatial_tcn_results/history_nf{nf}_ks{ks}_dr{dr}_lr{lr:.0e}_bs{bs}.csv', 
                                index=False
                            )
                        else:
                            epochs_no_improve += 1
                        if epochs_no_improve >= 3:
                            print(f"Early stopping triggered after {epoch+1} epochs")
                            break

                    # restore best weights
                    model.load_state_dict(best_weights)

                    results.append({
                        'num_filters': nf,
                        'kernel_size': ks,
                        'dropout': dr,
                        'lr': lr,
                        'batch_size': bs,
                        'best_val_acc': best_val_acc,
                        'epochs_trained': epoch + 1
                    })
                    print(f"nf={nf}, ks={ks}, dr={dr}, lr={lr:.0e}, bs={bs} → {best_val_acc:.3f} (stopped at epoch {epoch+1})")

# Save all results with updated path
results_df = pd.DataFrame(results)
results_df.to_csv('improved_spatial_tcn_results/hyperparameter_search_results.csv', index=False)

# Summarize top 10 configs
print("\nTop 10 configurations:")
print(results_df.sort_values('best_val_acc', ascending=False).head(10).to_string(index=False))

# Causal TCN

In [33]:
# Temporal Convolutional Network (TCN)
class CausalTemporalBlock(nn.Module):
    def __init__(self, in_channels, out_channels, kernel_size, dilation):
        super().__init__()
        # Calculate padding for causal convolution
        self.kernel_size = kernel_size  
        self.dilation = dilation         
        
        # No padding in Conv1d - we'll handle it manually for causality
        self.conv1 = nn.Conv1d(in_channels, out_channels, kernel_size, padding=0, dilation=dilation)
        self.bn1 = nn.BatchNorm1d(out_channels)

        self.conv2 = nn.Conv1d(out_channels, out_channels, kernel_size, padding=0, dilation=dilation)
        self.bn2 = nn.BatchNorm1d(out_channels)
        
        # Skip connection
        self.skip = nn.Sequential()
        if in_channels != out_channels:
            self.skip = nn.Sequential(
                nn.Conv1d(in_channels, out_channels, 1),
                nn.BatchNorm1d(out_channels)
            )
        
    def forward(self, x):
        residual = self.skip(x)
        
        # Calculate padding needed for causality
        pad = (self.kernel_size - 1) * self.dilation
        
        # Pad only on the left for causality
        out = F.pad(x, (pad, 0))
        out = self.conv1(out)
        out = self.bn1(out)
        out = F.relu(out)

        # Second causal conv layer
        out = F.pad(out, (pad, 0))
        out = self.conv2(out)
        out = self.bn2(out)

        # Add residual - need to match dimensions
        out += residual[:, :, -out.size(2):]
        return F.relu(out)

class TCN(nn.Module):
    def __init__(self, in_channels, num_classes, num_filters, kernel_size, dropout):
        super().__init__()
        
        # Initial causal convolution
        self.conv1 = nn.Conv1d(in_channels, num_filters, kernel_size, padding=0)
        self.bn1 = nn.BatchNorm1d(num_filters)
        
        # Temporal blocks with increasing dilation
        self.tcn1 = CausalTemporalBlock(num_filters, num_filters*2, kernel_size, dilation=1)
        self.tcn2 = CausalTemporalBlock(num_filters*2, num_filters*4, kernel_size, dilation=2)
        self.tcn3 = CausalTemporalBlock(num_filters*4, num_filters*8, kernel_size, dilation=4)
        
        # Global average pooling
        self.global_pool = nn.AdaptiveAvgPool1d(1)
        
        # Fully connected layers
        self.fc1 = nn.Linear(num_filters*8, 128)
        self.bn_fc1 = nn.BatchNorm1d(128)
        self.fc2 = nn.Linear(128, 64)
        self.bn_fc2 = nn.BatchNorm1d(64)
        self.drop = nn.Dropout(dropout)
        self.fc3 = nn.Linear(64, num_classes)

    def forward(self, x):
        # x: (batch, time, channels) → (batch, channels, time)
        x = x.permute(0, 2, 1)
        
        # Initial causal conv - pad only left side
        pad = self.conv1.kernel_size[0] - 1
        x = F.pad(x, (pad, 0))
        x = self.conv1(x)
        x = self.bn1(x)
        x = F.relu(x)
        
        # Temporal blocks
        x = self.tcn1(x)
        x = self.tcn2(x)
        x = self.tcn3(x)
        
        # Global pooling
        x = self.global_pool(x)
        x = x.view(x.size(0), -1)
        
        # Fully connected layers
        x = self.fc1(x)
        x = self.bn_fc1(x)
        x = F.relu(x)
        x = self.drop(x)
        
        x = self.fc2(x)
        x = self.bn_fc2(x)
        x = F.relu(x)
        x = self.drop(x)
        
        return self.fc3(x)

# Setup directories for Causal TCN
def setup_tcn_directories():
    Path('causal_tcn_models').mkdir(exist_ok=True)
    Path('causal_tcn_results').mkdir(exist_ok=True)

# %%
# TCN hyperparameter search
param_grid = {
    'num_filters': [16, 32],
    'kernel_size': [3, 5],
    'dropout':     [0.3, 0.5],
    'lr':          [1e-3, 5e-4, 1e-4],
    'batch_size':  [64, 128, 256]
}

# Setup directories
setup_tcn_directories()

results = []
best_val_acc = 0
best_config = None

for nf in param_grid['num_filters']:
  for ks in param_grid['kernel_size']:
    for dr in param_grid['dropout']:
      for lr in param_grid['lr']:
        for bs in param_grid['batch_size']:
          print(f"\nTraining Causal TCN with: nf={nf}, ks={ks}, dr={dr}, lr={lr:.0e}, bs={bs}")
          
          # Prepare data loaders
          train_loader = make_loader(Xtr, ytr, bs)
          val_loader   = make_loader(Xva, yva, bs)

          # Instantiate model, loss, optimizer
          model = TCN(in_channels=7, num_classes=3,
                     num_filters=nf, kernel_size=ks,
                     dropout=dr).to(device)
          criterion = nn.CrossEntropyLoss()
          optimizer = optim.Adam(model.parameters(), lr=lr)

          best_val_acc = 0.0
          epochs_no_improve = 0
          history = {
              'train_loss': [], 'train_acc': [],
              'val_loss': [], 'val_acc': []
          }

          # Train up to 50 epochs with early stopping
          for epoch in range(50):
            train_loss, train_acc = train_one_epoch(model, train_loader, criterion, optimizer, device)
            val_loss, val_acc = eval_one_epoch(model, val_loader, criterion, device)
            
            # Update history
            history['train_loss'].append(train_loss)
            history['train_acc'].append(train_acc)
            history['val_loss'].append(val_loss)
            history['val_acc'].append(val_acc)
            
            print(f'Epoch {epoch+1}/50:')
            print(f'Train Loss: {train_loss:.4f}, Train Acc: {train_acc:.4f}')
            print(f'Val Loss: {val_loss:.4f}, Val Acc: {val_acc:.4f}')

            if val_acc > best_val_acc:
              best_val_acc = val_acc
              epochs_no_improve = 0
              best_weights = model.state_dict()
              
              # Save best model weights
              torch.save(model.state_dict(), f'causal_tcn_models/model_nf{nf}_ks{ks}_dr{dr}_lr{lr:.0e}_bs{bs}.pth')
              
              # Save training history
              pd.DataFrame(history).to_csv(f'causal_tcn_results/history_nf{nf}_ks{ks}_dr{dr}_lr{lr:.0e}_bs{bs}.csv', index=False)
            else:
              epochs_no_improve += 1
            if epochs_no_improve >= 3:
              print(f"Early stopping triggered after {epoch+1} epochs")
              break

          # restore best weights
          model.load_state_dict(best_weights)

          results.append({
            'num_filters': nf,
            'kernel_size': ks,
            'dropout':     dr,
            'lr':          lr,
            'batch_size':  bs,
            'best_val_acc': best_val_acc,
            'epochs_trained': epoch + 1
          })
          print(f"nf={nf}, ks={ks}, dr={dr}, lr={lr:.0e}, bs={bs} → {best_val_acc:.3f} (stopped at epoch {epoch+1})")

# Save all results
results_df = pd.DataFrame(results)
results_df.to_csv('causal_tcn_results/hyperparameter_search_results.csv', index=False)

# Summarize top 10 configs
print("\nTop 10 TCN configurations:")
print(results_df.sort_values('best_val_acc', ascending=False).head(10).to_string(index=False))


Training Causal TCN with: nf=16, ks=3, dr=0.3, lr=1e-03, bs=64
Epoch 1/50:
Train Loss: 1.0904, Train Acc: 0.3836
Val Loss: 1.0461, Val Acc: 0.4307
Epoch 2/50:
Train Loss: 1.0335, Train Acc: 0.4499
Val Loss: 0.9878, Val Acc: 0.4892
Epoch 3/50:
Train Loss: 0.9845, Train Acc: 0.4963
Val Loss: 1.0081, Val Acc: 0.4833
Epoch 4/50:
Train Loss: 0.9409, Train Acc: 0.5304
Val Loss: 0.9827, Val Acc: 0.5093
Epoch 5/50:
Train Loss: 0.9019, Train Acc: 0.5572
Val Loss: 0.9365, Val Acc: 0.5207
Epoch 6/50:
Train Loss: 0.8662, Train Acc: 0.5828
Val Loss: 0.8905, Val Acc: 0.5593
Epoch 7/50:
Train Loss: 0.8316, Train Acc: 0.6076
Val Loss: 0.8707, Val Acc: 0.5744
Epoch 8/50:
Train Loss: 0.7926, Train Acc: 0.6329
Val Loss: 0.8529, Val Acc: 0.5905
Epoch 9/50:
Train Loss: 0.7579, Train Acc: 0.6549
Val Loss: 0.8727, Val Acc: 0.5874
Epoch 10/50:
Train Loss: 0.7261, Train Acc: 0.6741
Val Loss: 0.8469, Val Acc: 0.6003
Epoch 11/50:
Train Loss: 0.6915, Train Acc: 0.6943
Val Loss: 0.8830, Val Acc: 0.5929
Epoch 12/5

# Hybrid CNN LSTM

In [32]:
# Hybrid CNN-LSTM Model
class SimplifiedHybridCNNLSTM(nn.Module):
    def __init__(self, in_channels, num_classes, num_filters, kernel_size, dropout, hidden_size=64):
        super().__init__()
        
        # CNN layers
        self.conv1 = nn.Conv1d(in_channels, num_filters, kernel_size, padding=kernel_size//2)
        self.bn1 = nn.BatchNorm1d(num_filters)
        self.conv2 = nn.Conv1d(num_filters, num_filters*2, kernel_size, padding=kernel_size//2)
        self.bn2 = nn.BatchNorm1d(num_filters*2)
        self.pool = nn.MaxPool1d(2)
        
        # Single LSTM layer
        self.lstm = nn.LSTM(
            input_size=num_filters*2,
            hidden_size=hidden_size,
            num_layers=1,
            batch_first=True,
            bidirectional=False
        )
        
        # Simpler attention
        self.attention = nn.Linear(hidden_size, 1)
        
        # Fully connected layers
        self.fc1 = nn.Linear(hidden_size, 64)
        self.bn_fc1 = nn.BatchNorm1d(64)
        self.drop = nn.Dropout(dropout)
        self.fc2 = nn.Linear(64, num_classes)

    def forward(self, x):
        # CNN feature extraction
        x = x.permute(0, 2, 1)
        x = self.pool(F.relu(self.bn1(self.conv1(x))))
        x = self.pool(F.relu(self.bn2(self.conv2(x))))
        
        # LSTM processing
        x = x.permute(0, 2, 1)
        lstm_out, _ = self.lstm(x)
        
        # Simple attention
        attention_weights = F.softmax(self.attention(lstm_out), dim=1)
        x = torch.sum(attention_weights * lstm_out, dim=1)
        
        # Classification
        x = self.drop(F.relu(self.bn_fc1(self.fc1(x))))
        return self.fc2(x)

# %%
# Setup directories for Hybrid model
def setup_hybrid_directories():
    Path('hybrid_models').mkdir(exist_ok=True)
    Path('hybrid_results').mkdir(exist_ok=True)

# %%
# Hybrid model hyperparameter search
param_grid = {
    'num_filters': [32, 64],
    'kernel_size': [3],
    'dropout':     [0.0, 0.3],
    'lr':          [1e-3],
    'batch_size':  [32, 64],
    'hidden_size': [64, 128]  # LSTM hidden size
}


# Setup directories
setup_hybrid_directories()

results = []
best_val_acc = 0
best_config = None

for nf in param_grid['num_filters']:
  for ks in param_grid['kernel_size']:
    for dr in param_grid['dropout']:
      for lr in param_grid['lr']:
        for bs in param_grid['batch_size']:
          for hs in param_grid['hidden_size']:
            print(f"\nTraining Hybrid with: nf={nf}, ks={ks}, dr={dr}, lr={lr:.0e}, bs={bs}, hs={hs}")
            
            # Prepare data loaders
            train_loader = make_loader(Xtr, ytr, bs)
            val_loader   = make_loader(Xva, yva, bs)

            # Instantiate model, loss, optimizer
            model = SimplifiedHybridCNNLSTM(
                in_channels=7, 
                num_classes=3,
                num_filters=nf, 
                kernel_size=ks,
                dropout=dr,
                hidden_size=hs
            ).to(device)
            criterion = nn.CrossEntropyLoss()
            optimizer = optim.Adam(model.parameters(), lr=lr)

            best_val_acc = 0.0
            epochs_no_improve = 0
            history = {
                'train_loss': [], 'train_acc': [],
                'val_loss': [], 'val_acc': []
            }

            # Train up to 50 epochs with early stopping
            for epoch in range(50):
                train_loss, train_acc = train_one_epoch(model, train_loader, criterion, optimizer, device)
                val_loss, val_acc = eval_one_epoch(model, val_loader, criterion, device)
                
                # Update history
                history['train_loss'].append(train_loss)
                history['train_acc'].append(train_acc)
                history['val_loss'].append(val_loss)
                history['val_acc'].append(val_acc)
                
                print(f'Epoch {epoch+1}/50:')
                print(f'Train Loss: {train_loss:.4f}, Train Acc: {train_acc:.4f}')
                print(f'Val Loss: {val_loss:.4f}, Val Acc: {val_acc:.4f}')

                if val_acc > best_val_acc:
                    best_val_acc = val_acc
                    epochs_no_improve = 0
                    best_weights = model.state_dict()
                    
                    # Save best model weights
                    torch.save(model.state_dict(), 
                             f'hybrid_models/model_nf{nf}_ks{ks}_dr{dr}_lr{lr:.0e}_bs{bs}_hs{hs}.pth')
                    
                    # Save training history
                    pd.DataFrame(history).to_csv(
                        f'hybrid_results/history_nf{nf}_ks{ks}_dr{dr}_lr{lr:.0e}_bs{bs}_hs{hs}.csv', 
                        index=False
                    )
                else:
                    epochs_no_improve += 1
                if epochs_no_improve >= 3:
                    print(f"Early stopping triggered after {epoch+1} epochs")
                    break

            # restore best weights
            model.load_state_dict(best_weights)

            results.append({
                'num_filters': nf,
                'kernel_size': ks,
                'dropout': dr,
                'lr': lr,
                'batch_size': bs,
                'hidden_size': hs,
                'best_val_acc': best_val_acc,
                'epochs_trained': epoch + 1
            })
            print(f"nf={nf}, ks={ks}, dr={dr}, lr={lr:.0e}, bs={bs}, hs={hs} → {best_val_acc:.3f} (stopped at epoch {epoch+1})")

# Save all results
results_df = pd.DataFrame(results)
results_df.to_csv('hybrid_results/hyperparameter_search_results.csv', index=False)

# Summarize top 10 configs
print("\nTop 10 Hybrid configurations:")
print(results_df.sort_values('best_val_acc', ascending=False).head(10).to_string(index=False))


Training Hybrid with: nf=32, ks=3, dr=0.0, lr=1e-03, bs=32, hs=64
Epoch 1/50:
Train Loss: 1.0915, Train Acc: 0.3794
Val Loss: 1.0831, Val Acc: 0.3925
Epoch 2/50:
Train Loss: 1.0751, Train Acc: 0.4004
Val Loss: 1.0733, Val Acc: 0.3943
Epoch 3/50:
Train Loss: 1.0633, Train Acc: 0.4176
Val Loss: 1.0721, Val Acc: 0.4064
Epoch 4/50:
Train Loss: 1.0532, Train Acc: 0.4269
Val Loss: 1.0416, Val Acc: 0.4417
Epoch 5/50:
Train Loss: 1.0431, Train Acc: 0.4411
Val Loss: 1.0436, Val Acc: 0.4402
Epoch 6/50:
Train Loss: 1.0311, Train Acc: 0.4532
Val Loss: 1.0252, Val Acc: 0.4534
Epoch 7/50:
Train Loss: 1.0140, Train Acc: 0.4727
Val Loss: 1.0096, Val Acc: 0.4807
Epoch 8/50:
Train Loss: 1.0002, Train Acc: 0.4808
Val Loss: 0.9993, Val Acc: 0.4843
Epoch 9/50:
Train Loss: 0.9876, Train Acc: 0.4937
Val Loss: 1.0006, Val Acc: 0.4827
Epoch 10/50:
Train Loss: 0.9776, Train Acc: 0.5025
Val Loss: 0.9740, Val Acc: 0.5067
Epoch 11/50:
Train Loss: 0.9712, Train Acc: 0.5077
Val Loss: 0.9759, Val Acc: 0.5050
Epoch 1