In [1]:
import numpy as np
import pandas as pd
import glob
import os
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from pathlib import Path

import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import TensorDataset, DataLoader

In [2]:
# Load your datasets (adjust the paths if needed)
X1 = np.load('deep_learning_data/mo_ew2_accdata_21_10_139-1336_x.npy')
y1 = pd.read_csv('deep_learning_data/mo_ew2_accdata_21_10_139-1336_y.csv')

X2 = np.load('deep_learning_data/mo_ew3_accdata_16_12_1419-1449_x.npy')
y2 = pd.read_csv('deep_learning_data/mo_ew3_accdata_16_12_1419-1449_y.csv')

In [3]:
# Check dataset info
print("X1 shape:", X1.shape)
print("y1 unique:", np.unique(y1))
print("y1 shape:", y1.shape)
print("X2 shape:", X2.shape)
print("y2 unique:", np.unique(y2))
print("y2 shape:", y2.shape)

X1 shape: (1186, 24, 7)
y1 unique: [-1.  0.  1.]
y1 shape: (1185, 1)
X2 shape: (1182, 24, 7)
y2 unique: [-1.  0.  1.]
y2 shape: (1181, 1)


In [4]:
# Get paths for all X and y files
x_files = glob.glob('deep_learning_data/*_x.npy')
y_files = glob.glob('deep_learning_data/*_y.csv')

# Sort to ensure matching pairs
x_files.sort()
y_files.sort()

print(f"Found {len(x_files)} dataset pairs")

Found 44 dataset pairs


In [5]:
assert len(x_files) == len(y_files), "Mismatch in number of X vs Y files!"

In [6]:
def load_data(x_path, y_path):
    X = np.load(x_path)                     # shape (n_windows, 24, 7)
    y = pd.read_csv(y_path).values.squeeze() # shape (n_labels,)
    n = min(len(X), len(y))
    return X[:n], y[:n]

# Load & accumulate
all_X, all_y = [], []
for xf, yf in zip(x_files, y_files):
    X, y = load_data(xf, yf)
    all_X.append(X)
    all_y.append(y)

# Concatenate across sessions
X_all = np.concatenate(all_X, axis=0)       # (total_windows, 24, 7)
y_all = np.concatenate(all_y, axis=0)       # (total_windows,)

print(X_all.shape)
print(y_all.shape)

(53836, 24, 7)
(53836,)


In [7]:
print(f"Total number of sessions: {len(x_files)}")
print(f"Final X shape: {X_all.shape}")
print(f"Final y shape: {y_all.shape}")
print(f"Unique labels in y: {np.unique(y_all)}")
print(f"Label distribution: {np.bincount(y_all.astype(int) + 1)}")  # +1 to handle -1 labels

Total number of sessions: 44
Final X shape: (53836, 24, 7)
Final y shape: (53836,)
Unique labels in y: [-1.  0.  1.]
Label distribution: [17954 17943 17939]


In [8]:
# Train/Val/Test split (70%/15%/15%) with stratification
X_temp, X_test, y_temp, y_test = train_test_split(
    X_all, y_all, test_size=0.15, stratify=y_all, random_state=42
)
# Now split the 85% into 70/15 overall:
val_ratio = 0.15 / 0.85
X_train, X_val, y_train, y_val = train_test_split(
    X_temp, y_temp, test_size=val_ratio, stratify=y_temp, random_state=42
)

print(f"Train / Val / Test shapes:\n"
      f"  X_train: {X_train.shape}, y_train: {y_train.shape}\n"
      f"  X_val:   {X_val.shape},   y_val:   {y_val.shape}\n"
      f"  X_test:  {X_test.shape},  y_test:  {y_test.shape}")

Train / Val / Test shapes:
  X_train: (37684, 24, 7), y_train: (37684,)
  X_val:   (8076, 24, 7),   y_val:   (8076,)
  X_test:  (8076, 24, 7),  y_test:  (8076,)


In [9]:
# Z-score normalization (fit on train only)
scaler = StandardScaler()
# flatten time & channels: (N, T, C) → (N*T, C)
train_flat = X_train.reshape(-1, X_train.shape[-1])
scaler.fit(train_flat)

def zscore_normalize(X):
    flat = X.reshape(-1, X.shape[-1])
    scaled = scaler.transform(flat)
    return scaled.reshape(X.shape)

X_train = zscore_normalize(X_train)
X_val = zscore_normalize(X_val)  # Fixed missing parenthesis
X_test = zscore_normalize(X_test)

print("Done! Your data is now ready for modeling.")

Done! Your data is now ready for modeling.


# CNN

In [10]:
def get_device():
    if torch.backends.mps.is_available():
        return torch.device("mps")
    elif torch.cuda.is_available():
        return torch.device("cuda")
    else:
        return torch.device("cpu")

device = get_device()
print(f"Using device: {device}")

Using device: mps


In [11]:
y_val

array([ 0.,  1.,  1., ..., -1., -1.,  1.])

In [12]:
# X_*: (N_*, 24, 7), y_*: in {-1,0,1}. Remap to {0,1,2}:
def remap(y): return np.where(y==-1, 0, np.where(y==0, 1, 2))

Xtr = torch.tensor(X_train, dtype=torch.float32)
ytr = torch.tensor(remap(y_train), dtype=torch.long)
Xva = torch.tensor(X_val,   dtype=torch.float32)
yva = torch.tensor(remap(y_val),   dtype=torch.long)

In [13]:
# A simple function to make a loader
def make_loader(X, y, bs):
    ds = TensorDataset(X, y)
    return DataLoader(ds, batch_size=bs, shuffle=True)

In [14]:
class CNN1D(nn.Module):
    def __init__(self, in_channels, num_classes, num_filters, kernel_size, dropout):
        super().__init__()
        self.conv1 = nn.Conv1d(in_channels, num_filters, kernel_size, padding=kernel_size//2)
        self.conv2 = nn.Conv1d(num_filters, num_filters*2, kernel_size, padding=kernel_size//2)
        self.pool  = nn.MaxPool1d(2)
        self.relu  = nn.ReLU()
        # after two pools: 24→12→6
        flat_dim = (num_filters*2) * 6
        self.fc1   = nn.Linear(flat_dim, 64)
        self.drop  = nn.Dropout(dropout)
        self.fc2   = nn.Linear(64, num_classes)

    def forward(self, x):
        # x: (batch, time, channels) → (batch, channels, time)
        x = x.permute(0, 2, 1)
        x = self.pool(self.relu(self.conv1(x)))
        x = self.pool(self.relu(self.conv2(x)))
        x = x.view(x.size(0), -1)
        x = self.drop(self.relu(self.fc1(x)))
        return self.fc2(x)

def train_one_epoch(model, loader, criterion, optimizer, device):
    model.train()
    total_loss = 0
    correct = 0
    for xb, yb in loader:
        xb, yb = xb.to(device), yb.to(device)
        optimizer.zero_grad()
        out = model(xb)
        loss = criterion(out, yb)
        loss.backward()
        optimizer.step()
        total_loss += loss.item() * xb.size(0)
        correct += (out.argmax(1) == yb).sum().item()
    return total_loss/len(loader.dataset), correct/len(loader.dataset)

def eval_one_epoch(model, loader, criterion, device):
    model.eval()
    total_loss = 0
    correct = 0
    with torch.no_grad():
        for xb, yb in loader:
            xb, yb = xb.to(device), yb.to(device)
            out = model(xb)
            loss = criterion(out, yb)
            total_loss += loss.item() * xb.size(0)
            correct += (out.argmax(1) == yb).sum().item()
    return total_loss/len(loader.dataset), correct/len(loader.dataset)


In [15]:
def setup_directories():
    Path('models').mkdir(exist_ok=True)
    Path('results').mkdir(exist_ok=True)

In [16]:
param_grid = {
    'num_filters': [16, 32],
    'kernel_size': [3, 5],
    'dropout':     [0.3, 0.5],
    'lr':          [1e-3, 5e-4, 1e-4],
    'batch_size':  [64, 128, 256]
}

In [None]:
# Setup directories
setup_directories()

results = []
best_val_acc = 0
best_config = None

for nf in param_grid['num_filters']:
  for ks in param_grid['kernel_size']:
    for dr in param_grid['dropout']:
      for lr in param_grid['lr']:
        for bs in param_grid['batch_size']:
          print(f"\nTraining with: nf={nf}, ks={ks}, dr={dr}, lr={lr:.0e}, bs={bs}")
          
          # Prepare data loaders
          train_loader = make_loader(Xtr, ytr, bs)
          val_loader   = make_loader(Xva, yva, bs)

          # Instantiate model, loss, optimizer
          model = CNN1D(in_channels=7, num_classes=3,
                        num_filters=nf, kernel_size=ks,
                        dropout=dr).to(device)
          criterion = nn.CrossEntropyLoss()
          optimizer = optim.Adam(model.parameters(), lr=lr)

          best_val_acc = 0.0
          epochs_no_improve = 0
          history = {
              'train_loss': [], 'train_acc': [],
              'val_loss': [], 'val_acc': []
          }

          # Train up to 50 epochs with simple early stopping
          for epoch in range(50):
            train_loss, train_acc = train_one_epoch(model, train_loader, criterion, optimizer, device)
            val_loss, val_acc = eval_one_epoch(model, val_loader, criterion, device)
            
            # Update history
            history['train_loss'].append(train_loss)
            history['train_acc'].append(train_acc)
            history['val_loss'].append(val_loss)
            history['val_acc'].append(val_acc)
            
            print(f'Epoch {epoch+1}/50:')
            print(f'Train Loss: {train_loss:.4f}, Train Acc: {train_acc:.4f}')
            print(f'Val Loss: {val_loss:.4f}, Val Acc: {val_acc:.4f}')

            if val_acc > best_val_acc:
              best_val_acc = val_acc
              epochs_no_improve = 0
              best_weights = model.state_dict()
              
              # Save best model weights
              torch.save(model.state_dict(), f'models/model_nf{nf}_ks{ks}_dr{dr}_lr{lr:.0e}_bs{bs}.pth')
              
              # Save training history
              pd.DataFrame(history).to_csv(f'results/history_nf{nf}_ks{ks}_dr{dr}_lr{lr:.0e}_bs{bs}.csv', index=False)
            else:
              epochs_no_improve += 1
            if epochs_no_improve >= 3:
              print(f"Early stopping triggered after {epoch+1} epochs")
              break

          # restore best weights
          model.load_state_dict(best_weights)

          results.append({
            'num_filters': nf,
            'kernel_size': ks,
            'dropout':     dr,
            'lr':          lr,
            'batch_size':  bs,
            'best_val_acc': best_val_acc,
            'epochs_trained': epoch + 1
          })
          print(f"nf={nf}, ks={ks}, dr={dr}, lr={lr:.0e}, bs={bs} → {best_val_acc:.3f} (stopped at epoch {epoch+1})")

# Save all results
results_df = pd.DataFrame(results)
results_df.to_csv('results/hyperparameter_search_results.csv', index=False)

# Summarize top 10 configs
print("\nTop 10 configurations:")
print(results_df.sort_values('best_val_acc', ascending=False).head(10).to_string(index=False))


Training with: nf=16, ks=3, dr=0.3, lr=1e-03, bs=64
Epoch 1/50:
Train Loss: 1.0951, Train Acc: 0.3582
Val Loss: 1.0882, Val Acc: 0.3772
Epoch 2/50:
Train Loss: 1.0761, Train Acc: 0.4006
Val Loss: 1.0751, Val Acc: 0.4019
Epoch 3/50:
Train Loss: 1.0587, Train Acc: 0.4237
Val Loss: 1.0645, Val Acc: 0.4143
Epoch 4/50:
Train Loss: 1.0382, Train Acc: 0.4460
Val Loss: 1.0440, Val Acc: 0.4425
Epoch 5/50:
Train Loss: 1.0191, Train Acc: 0.4645
Val Loss: 1.0376, Val Acc: 0.4373
Epoch 6/50:
Train Loss: 1.0015, Train Acc: 0.4794
Val Loss: 1.0257, Val Acc: 0.4565
Epoch 7/50:
Train Loss: 0.9861, Train Acc: 0.4958
Val Loss: 1.0220, Val Acc: 0.4646
Epoch 8/50:
Train Loss: 0.9742, Train Acc: 0.5070
Val Loss: 1.0220, Val Acc: 0.4641
Epoch 9/50:
Train Loss: 0.9637, Train Acc: 0.5153
Val Loss: 1.0080, Val Acc: 0.4730
Epoch 10/50:
Train Loss: 0.9512, Train Acc: 0.5238
Val Loss: 1.0112, Val Acc: 0.4776
Epoch 11/50:
Train Loss: 0.9415, Train Acc: 0.5330
Val Loss: 1.0047, Val Acc: 0.4870
Epoch 12/50:
Train Lo