# Solution 1: 1D CNN Architecture

**Hypothesis**: CNNs might better capture local patterns in sensor data compared to RNNs.

**Approach**:
- Use 1D Convolutional layers to extract features
- Global Average Pooling instead of RNN hidden states
- Direct oversampling for balanced dataset

In [1]:
# Set seed for reproducibility
SEED = 42

import os
os.environ['PYTHONHASHSEED'] = str(SEED)
os.environ['MPLCONFIGDIR'] = os.getcwd() + '/configs/'

import warnings
warnings.simplefilter(action='ignore', category=FutureWarning)
warnings.simplefilter(action='ignore', category=Warning)

import random
import numpy as np
np.random.seed(SEED)
random.seed(SEED)

import torch
torch.manual_seed(SEED)
from torch import nn
from torch.utils.data import DataLoader, TensorDataset

if torch.cuda.is_available():
    device = torch.device("cuda")
    torch.cuda.manual_seed_all(SEED)
else:
    device = torch.device("cpu")

import pandas as pd
from sklearn.preprocessing import MinMaxScaler, LabelEncoder
from sklearn.model_selection import train_test_split
from sklearn.metrics import f1_score, classification_report
import pickle

print(f"PyTorch version: {torch.__version__}")
print(f"Device: {device}")

PyTorch version: 2.9.0
Device: cpu


In [2]:
# Load and preprocess data
df = pd.read_csv("pirate_pain_train.csv")
target = pd.read_csv("pirate_pain_train_labels.csv")

# Encode categorical features
number_cols = ['n_legs', 'n_hands', 'n_eyes']
for col in number_cols:
    df[col] = df[col].astype('category').cat.codes

# Normalize joint columns
joint_cols = ["joint_" + str(i).zfill(2) for i in range(31)]
for col in joint_cols:
    df[col] = df[col].astype(np.float32)

minmax_scaler = MinMaxScaler()
df[joint_cols] = minmax_scaler.fit_transform(df[joint_cols])

data_cols = number_cols + joint_cols

# Map labels
label_mapping = {'no_pain': 0, 'low_pain': 1, 'high_pain': 2}
target['label'] = target['label'].map(label_mapping)

print("Data loaded and preprocessed")
print(f"Features: {len(data_cols)}")
print(f"Samples: {len(df['sample_index'].unique())}")

Data loaded and preprocessed
Features: 34
Samples: 661


In [3]:
# Build sequences
WINDOW_SIZE = 300
STRIDE = 50

def build_sequences(df, target_df, window=300, stride=50):
    dataset = []
    labels = []
    
    for id in df['sample_index'].unique():
        temp = df[df['sample_index'] == id][data_cols].values
        label = target_df[target_df['sample_index'] == id]['label'].values[0]
        
        padding_len = window - len(temp) % window
        padding = np.zeros((padding_len, len(data_cols)), dtype='float32')
        temp = np.concatenate((temp, padding))
        
        idx = 0
        while idx + window <= len(temp):
            dataset.append(temp[idx:idx + window])
            labels.append(label)
            idx += stride
    
    return np.array(dataset), np.array(labels)

# Split data
unique_users = df['sample_index'].unique()
random.seed(SEED)
random.shuffle(unique_users)

train_users, val_users = train_test_split(unique_users, test_size=0.2, random_state=SEED)

df_train = df[df['sample_index'].isin(unique_users[train_users])]
df_val = df[df['sample_index'].isin(unique_users[val_users])]

X_train, y_train = build_sequences(df_train, target, WINDOW_SIZE, STRIDE)
X_val, y_val = build_sequences(df_val, target, WINDOW_SIZE, STRIDE)

print(f"Training sequences: {X_train.shape}")
print(f"Validation sequences: {X_val.shape}")

Training sequences: (528, 300, 34)
Validation sequences: (133, 300, 34)


In [4]:
# Direct oversampling to balance dataset
train_class_counts = np.bincount(y_train.astype(int))
print("Original distribution:", train_class_counts)

target_count = train_class_counts[0]
duplication_factors = np.ceil(target_count / train_class_counts).astype(int)

X_train_balanced = []
y_train_balanced = []

for cls in range(len(train_class_counts)):
    cls_indices = np.where(y_train == cls)[0]
    for _ in range(duplication_factors[cls]):
        X_train_balanced.append(X_train[cls_indices])
        y_train_balanced.append(y_train[cls_indices])

X_train_balanced = np.concatenate(X_train_balanced, axis=0)
y_train_balanced = np.concatenate(y_train_balanced, axis=0)

balanced_counts = np.bincount(y_train_balanced.astype(int))
print("Balanced distribution:", balanced_counts)
print(f"Total samples: {len(y_train_balanced)}")

Original distribution: [407  72  49]
Balanced distribution: [407 432 441]
Total samples: 1280


In [5]:
# 1D CNN Model
class CNN1DClassifier(nn.Module):
    def __init__(self, input_size, num_classes, dropout=0.3):
        super().__init__()
        
        # Input: (batch, seq_len, features) -> need to transpose to (batch, features, seq_len) for Conv1d
        
        # Convolutional layers
        self.conv1 = nn.Conv1d(input_size, 64, kernel_size=5, padding=2)
        self.bn1 = nn.BatchNorm1d(64)
        self.conv2 = nn.Conv1d(64, 128, kernel_size=5, padding=2)
        self.bn2 = nn.BatchNorm1d(128)
        self.conv3 = nn.Conv1d(128, 256, kernel_size=3, padding=1)
        self.bn3 = nn.BatchNorm1d(256)
        
        self.relu = nn.ReLU()
        self.pool = nn.MaxPool1d(2)
        self.dropout = nn.Dropout(dropout)
        
        # Global Average Pooling
        self.global_pool = nn.AdaptiveAvgPool1d(1)
        
        # Classifier
        self.fc = nn.Sequential(
            nn.Linear(256, 128),
            nn.ReLU(),
            nn.Dropout(dropout),
            nn.Linear(128, num_classes)
        )
    
    def forward(self, x):
        # x shape: (batch, seq_len, features)
        # Transpose to (batch, features, seq_len)
        x = x.transpose(1, 2)
        
        # Conv blocks
        x = self.pool(self.relu(self.bn1(self.conv1(x))))
        x = self.dropout(x)
        
        x = self.pool(self.relu(self.bn2(self.conv2(x))))
        x = self.dropout(x)
        
        x = self.relu(self.bn3(self.conv3(x)))
        
        # Global pooling
        x = self.global_pool(x).squeeze(-1)
        
        # Classifier
        x = self.fc(x)
        return x

# Initialize model
model = CNN1DClassifier(input_size=X_train.shape[-1], num_classes=3, dropout=0.3).to(device)
print("CNN Model created")
print(f"Parameters: {sum(p.numel() for p in model.parameters()):,}")

CNN Model created
Parameters: 184,771


In [6]:
# Create dataloaders
BATCH_SIZE = 32

train_ds = TensorDataset(torch.from_numpy(X_train_balanced).float(), torch.from_numpy(y_train_balanced).long())
val_ds = TensorDataset(torch.from_numpy(X_val).float(), torch.from_numpy(y_val).long())

train_loader = DataLoader(train_ds, batch_size=BATCH_SIZE, shuffle=True, num_workers=0)
val_loader = DataLoader(val_ds, batch_size=BATCH_SIZE, shuffle=False, num_workers=0)

print(f"Train batches: {len(train_loader)}")
print(f"Val batches: {len(val_loader)}")

Train batches: 40
Val batches: 5


In [7]:
# Training setup
criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.AdamW(model.parameters(), lr=1e-3, weight_decay=1e-4)
scaler = torch.amp.GradScaler(enabled=(device.type == 'cuda'))

def train_epoch(model, loader, criterion, optimizer, scaler, device):
    model.train()
    total_loss = 0
    all_preds, all_labels = [], []
    
    for inputs, labels in loader:
        inputs, labels = inputs.to(device), labels.to(device)
        optimizer.zero_grad()
        
        with torch.amp.autocast(device_type=device.type, enabled=(device.type == 'cuda')):
            outputs = model(inputs)
            loss = criterion(outputs, labels)
        
        scaler.scale(loss).backward()
        scaler.unscale_(optimizer)
        torch.nn.utils.clip_grad_norm_(model.parameters(), 1.0)
        scaler.step(optimizer)
        scaler.update()
        
        total_loss += loss.item() * inputs.size(0)
        all_preds.extend(outputs.argmax(1).cpu().numpy())
        all_labels.extend(labels.cpu().numpy())
    
    avg_loss = total_loss / len(loader.dataset)
    f1 = f1_score(all_labels, all_preds, average='weighted', zero_division=0)
    return avg_loss, f1

def val_epoch(model, loader, criterion, device):
    model.eval()
    total_loss = 0
    all_preds, all_labels = [], []
    
    with torch.no_grad():
        for inputs, labels in loader:
            inputs, labels = inputs.to(device), labels.to(device)
            
            with torch.amp.autocast(device_type=device.type, enabled=(device.type == 'cuda')):
                outputs = model(inputs)
                loss = criterion(outputs, labels)
            
            total_loss += loss.item() * inputs.size(0)
            all_preds.extend(outputs.argmax(1).cpu().numpy())
            all_labels.extend(labels.cpu().numpy())
    
    avg_loss = total_loss / len(loader.dataset)
    f1 = f1_score(all_labels, all_preds, average='weighted', zero_division=0)
    return avg_loss, f1, all_preds, all_labels

print("Training functions ready")

Training functions ready


In [8]:
# Train model
print("=" * 70)
print("Training 1D CNN Model")
print("=" * 70)

EPOCHS = 30
best_f1 = 0
history = {'train_loss': [], 'train_f1': [], 'val_loss': [], 'val_f1': []}

for epoch in range(1, EPOCHS + 1):
    train_loss, train_f1 = train_epoch(model, train_loader, criterion, optimizer, scaler, device)
    val_loss, val_f1, val_preds, val_labels = val_epoch(model, val_loader, criterion, device)
    
    history['train_loss'].append(train_loss)
    history['train_f1'].append(train_f1)
    history['val_loss'].append(val_loss)
    history['val_f1'].append(val_f1)
    
    if val_f1 > best_f1:
        best_f1 = val_f1
        torch.save(model.state_dict(), 'models/cnn_best.pt')
    
    if epoch % 5 == 0 or epoch == 1:
        print(f"Epoch {epoch:3d}/{EPOCHS} | Train: Loss={train_loss:.4f}, F1={train_f1:.4f} | Val: Loss={val_loss:.4f}, F1={val_f1:.4f}")

print("\n" + "=" * 70)
print(f"Best validation F1: {best_f1:.4f}")
print("=" * 70)

Training 1D CNN Model
Epoch   1/30 | Train: Loss=0.7900, F1=0.6453 | Val: Loss=0.5033, F1=0.7919
Epoch   1/30 | Train: Loss=0.7900, F1=0.6453 | Val: Loss=0.5033, F1=0.7919
Epoch   5/30 | Train: Loss=0.2884, F1=0.8924 | Val: Loss=0.7488, F1=0.7773
Epoch   5/30 | Train: Loss=0.2884, F1=0.8924 | Val: Loss=0.7488, F1=0.7773
Epoch  10/30 | Train: Loss=0.2193, F1=0.9126 | Val: Loss=0.5660, F1=0.8686
Epoch  10/30 | Train: Loss=0.2193, F1=0.9126 | Val: Loss=0.5660, F1=0.8686
Epoch  15/30 | Train: Loss=0.1782, F1=0.9359 | Val: Loss=0.9088, F1=0.8097
Epoch  15/30 | Train: Loss=0.1782, F1=0.9359 | Val: Loss=0.9088, F1=0.8097
Epoch  20/30 | Train: Loss=0.2237, F1=0.9157 | Val: Loss=1.1940, F1=0.6404
Epoch  20/30 | Train: Loss=0.2237, F1=0.9157 | Val: Loss=1.1940, F1=0.6404
Epoch  25/30 | Train: Loss=0.1783, F1=0.9328 | Val: Loss=1.4101, F1=0.7594
Epoch  25/30 | Train: Loss=0.1783, F1=0.9328 | Val: Loss=1.4101, F1=0.7594
Epoch  30/30 | Train: Loss=0.1021, F1=0.9618 | Val: Loss=1.3477, F1=0.7717

Be

In [9]:
# Final evaluation
model.load_state_dict(torch.load('models/cnn_best.pt'))
_, val_f1, val_preds, val_labels = val_epoch(model, val_loader, criterion, device)

print("\nüìä FINAL RESULTS (1D CNN):")
print(f"Validation F1: {val_f1:.4f}")

print("\nüìã Per-class metrics:")
print(classification_report(val_labels, val_preds, target_names=['no_pain', 'low_pain', 'high_pain'], digits=4))

unique_preds, counts = np.unique(val_preds, return_counts=True)
print("\nüéØ Prediction distribution:")
for cls, count in zip(unique_preds, counts):
    print(f"  Class {cls}: {count} predictions ({count/len(val_preds)*100:.1f}%)")

if len(unique_preds) >= 3:
    print("\n‚úÖ SUCCESS: Model predicts ALL 3 classes!")
elif len(unique_preds) == 2:
    print("\n‚ö†Ô∏è  PARTIAL: Predicts 2 out of 3 classes")
else:
    print("\n‚ùå FAILED: Stuck on 1 class")


üìä FINAL RESULTS (1D CNN):
Validation F1: 0.8910

üìã Per-class metrics:
              precision    recall  f1-score   support

     no_pain     0.9423    0.9423    0.9423       104
    low_pain     0.7917    0.8636    0.8261        22
   high_pain     0.4000    0.2857    0.3333         7

    accuracy                         0.8947       133
   macro avg     0.7113    0.6972    0.7006       133
weighted avg     0.8888    0.8947    0.8910       133


üéØ Prediction distribution:
  Class 0: 104 predictions (78.2%)
  Class 1: 24 predictions (18.0%)
  Class 2: 5 predictions (3.8%)

‚úÖ SUCCESS: Model predicts ALL 3 classes!
