# Solution 2: Binary Classification (Pain vs No-Pain)

**Hypothesis**: The 3-class problem may be too difficult. Simplify to binary: pain (low+high) vs no-pain.

**Approach**:
- Merge class 1 (low_pain) and class 2 (high_pain) into single "pain" class
- Use same GRU architecture but simpler problem
- Direct oversampling for balanced dataset

In [1]:
# Set seed for reproducibility
SEED = 42

import os
os.environ['PYTHONHASHSEED'] = str(SEED)
os.environ['MPLCONFIGDIR'] = os.getcwd() + '/configs/'

import warnings
warnings.simplefilter(action='ignore', category=FutureWarning)
warnings.simplefilter(action='ignore', category=Warning)

import random
import numpy as np
np.random.seed(SEED)
random.seed(SEED)

import torch
torch.manual_seed(SEED)
from torch import nn
from torch.utils.data import DataLoader, TensorDataset

if torch.cuda.is_available():
    device = torch.device("cuda")
    torch.cuda.manual_seed_all(SEED)
else:
    device = torch.device("cpu")

import pandas as pd
from sklearn.preprocessing import MinMaxScaler
from sklearn.model_selection import train_test_split
from sklearn.metrics import f1_score, classification_report, confusion_matrix

print(f"PyTorch version: {torch.__version__}")
print(f"Device: {device}")

PyTorch version: 2.9.0
Device: cpu


In [2]:
# Load and preprocess data
df = pd.read_csv("pirate_pain_train.csv")
target = pd.read_csv("pirate_pain_train_labels.csv")

# Encode categorical features
number_cols = ['n_legs', 'n_hands', 'n_eyes']
for col in number_cols:
    df[col] = df[col].astype('category').cat.codes

# Normalize joint columns
joint_cols = ["joint_" + str(i).zfill(2) for i in range(31)]
for col in joint_cols:
    df[col] = df[col].astype(np.float32)

minmax_scaler = MinMaxScaler()
df[joint_cols] = minmax_scaler.fit_transform(df[joint_cols])
data_cols = number_cols + joint_cols

# ‚ö†Ô∏è KEY CHANGE: Binary classification
# Map: no_pain -> 0, low_pain -> 1, high_pain -> 1
label_mapping = {'no_pain': 0, 'low_pain': 1, 'high_pain': 1}
target['label'] = target['label'].map(label_mapping)

print("‚úÖ Binary classification setup:")
print("  Class 0: no_pain")
print("  Class 1: pain (low + high)")
print(f"\nClass distribution:")
for cls, count in target['label'].value_counts().items():
    print(f"  Class {cls}: {count} samples ({count/len(target)*100:.1f}%)")

‚úÖ Binary classification setup:
  Class 0: no_pain
  Class 1: pain (low + high)

Class distribution:
  Class 0: 511 samples (77.3%)
  Class 1: 150 samples (22.7%)


In [3]:
# Build sequences
WINDOW_SIZE = 300
STRIDE = 50

def build_sequences(df, target_df, window=300, stride=50):
    dataset = []
    labels = []
    
    for id in df['sample_index'].unique():
        temp = df[df['sample_index'] == id][data_cols].values
        label = target_df[target_df['sample_index'] == id]['label'].values[0]
        
        padding_len = window - len(temp) % window
        padding = np.zeros((padding_len, len(data_cols)), dtype='float32')
        temp = np.concatenate((temp, padding))
        
        idx = 0
        while idx + window <= len(temp):
            dataset.append(temp[idx:idx + window])
            labels.append(label)
            idx += stride
    
    return np.array(dataset), np.array(labels)

# Split data
unique_users = df['sample_index'].unique()
random.seed(SEED)
random.shuffle(unique_users)

train_users, val_users = train_test_split(unique_users, test_size=0.2, random_state=SEED)

df_train = df[df['sample_index'].isin(unique_users[train_users])]
df_val = df[df['sample_index'].isin(unique_users[val_users])]

X_train, y_train = build_sequences(df_train, target, WINDOW_SIZE, STRIDE)
X_val, y_val = build_sequences(df_val, target, WINDOW_SIZE, STRIDE)

train_counts = np.bincount(y_train.astype(int))
val_counts = np.bincount(y_val.astype(int))

print(f"\nTraining sequences: {X_train.shape}")
print(f"  Class 0 (no_pain): {train_counts[0]} ({train_counts[0]/len(y_train)*100:.1f}%)")
print(f"  Class 1 (pain): {train_counts[1]} ({train_counts[1]/len(y_train)*100:.1f}%)")

print(f"\nValidation sequences: {X_val.shape}")
print(f"  Class 0 (no_pain): {val_counts[0]} ({val_counts[0]/len(y_val)*100:.1f}%)")
print(f"  Class 1 (pain): {val_counts[1]} ({val_counts[1]/len(y_val)*100:.1f}%)")


Training sequences: (528, 300, 34)
  Class 0 (no_pain): 407 (77.1%)
  Class 1 (pain): 121 (22.9%)

Validation sequences: (133, 300, 34)
  Class 0 (no_pain): 104 (78.2%)
  Class 1 (pain): 29 (21.8%)


In [4]:
# Balance dataset via oversampling
target_count = max(train_counts)
duplication_factors = np.ceil(target_count / train_counts).astype(int)

X_train_balanced = []
y_train_balanced = []

for cls in range(len(train_counts)):
    cls_indices = np.where(y_train == cls)[0]
    for _ in range(duplication_factors[cls]):
        X_train_balanced.append(X_train[cls_indices])
        y_train_balanced.append(y_train[cls_indices])

X_train_balanced = np.concatenate(X_train_balanced, axis=0)
y_train_balanced = np.concatenate(y_train_balanced, axis=0)

balanced_counts = np.bincount(y_train_balanced.astype(int))
print(f"Balanced training set:")
print(f"  Class 0: {balanced_counts[0]} ({balanced_counts[0]/len(y_train_balanced)*100:.1f}%)")
print(f"  Class 1: {balanced_counts[1]} ({balanced_counts[1]/len(y_train_balanced)*100:.1f}%)")
print(f"  Total: {len(y_train_balanced)} samples")

Balanced training set:
  Class 0: 407 (45.7%)
  Class 1: 484 (54.3%)
  Total: 891 samples


In [5]:
# GRU Model (same as before, but for 2 classes)
class RecurrentClassifier(nn.Module):
    def __init__(self, input_size, hidden_size, num_layers, num_classes, dropout_rate=0.2):
        super().__init__()
        
        self.num_layers = num_layers
        self.hidden_size = hidden_size
        
        dropout_val = dropout_rate if num_layers > 1 else 0
        
        self.rnn = nn.GRU(
            input_size=input_size,
            hidden_size=hidden_size,
            num_layers=num_layers,
            batch_first=True,
            dropout=dropout_val
        )
        
        self.classifier = nn.Linear(hidden_size, num_classes)
    
    def forward(self, x):
        _, hidden = self.rnn(x)
        logits = self.classifier(hidden[-1])
        return logits

# Initialize model
model = RecurrentClassifier(
    input_size=X_train.shape[-1],
    hidden_size=128,
    num_layers=2,
    num_classes=2,  # Binary classification!
    dropout_rate=0.2
).to(device)

print(f"Model created for binary classification")
print(f"Parameters: {sum(p.numel() for p in model.parameters()):,}")

Model created for binary classification
Parameters: 162,306


In [6]:
# Create dataloaders
BATCH_SIZE = 16

train_ds = TensorDataset(torch.from_numpy(X_train_balanced).float(), torch.from_numpy(y_train_balanced).long())
val_ds = TensorDataset(torch.from_numpy(X_val).float(), torch.from_numpy(y_val).long())

train_loader = DataLoader(train_ds, batch_size=BATCH_SIZE, shuffle=True, num_workers=0)
val_loader = DataLoader(val_ds, batch_size=BATCH_SIZE, shuffle=False, num_workers=0)

print(f"Train batches: {len(train_loader)}")
print(f"Val batches: {len(val_loader)}")

Train batches: 56
Val batches: 9


In [7]:
# Training setup
criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.AdamW(model.parameters(), lr=1e-3, weight_decay=1e-4)
scaler = torch.amp.GradScaler(enabled=(device.type == 'cuda'))

def train_epoch(model, loader, criterion, optimizer, scaler, device):
    model.train()
    total_loss = 0
    all_preds, all_labels = [], []
    
    for inputs, labels in loader:
        inputs, labels = inputs.to(device), labels.to(device)
        optimizer.zero_grad()
        
        with torch.amp.autocast(device_type=device.type, enabled=(device.type == 'cuda')):
            outputs = model(inputs)
            loss = criterion(outputs, labels)
        
        scaler.scale(loss).backward()
        scaler.step(optimizer)
        scaler.update()
        
        total_loss += loss.item() * inputs.size(0)
        all_preds.extend(outputs.argmax(1).cpu().numpy())
        all_labels.extend(labels.cpu().numpy())
    
    avg_loss = total_loss / len(loader.dataset)
    f1 = f1_score(all_labels, all_preds, average='weighted', zero_division=0)
    return avg_loss, f1

def val_epoch(model, loader, criterion, device):
    model.eval()
    total_loss = 0
    all_preds, all_labels = [], []
    
    with torch.no_grad():
        for inputs, labels in loader:
            inputs, labels = inputs.to(device), labels.to(device)
            
            with torch.amp.autocast(device_type=device.type, enabled=(device.type == 'cuda')):
                outputs = model(inputs)
                loss = criterion(outputs, labels)
            
            total_loss += loss.item() * inputs.size(0)
            all_preds.extend(outputs.argmax(1).cpu().numpy())
            all_labels.extend(labels.cpu().numpy())
    
    avg_loss = total_loss / len(loader.dataset)
    f1 = f1_score(all_labels, all_preds, average='weighted', zero_division=0)
    return avg_loss, f1, all_preds, all_labels

In [8]:
# Train model
print("=" * 70)
print("Training Binary Classification Model (Pain vs No-Pain)")
print("=" * 70)

EPOCHS = 30
best_f1 = 0

for epoch in range(1, EPOCHS + 1):
    train_loss, train_f1 = train_epoch(model, train_loader, criterion, optimizer, scaler, device)
    val_loss, val_f1, val_preds, val_labels = val_epoch(model, val_loader, criterion, device)
    
    if val_f1 > best_f1:
        best_f1 = val_f1
        torch.save(model.state_dict(), 'models/binary_best.pt')
    
    if epoch % 5 == 0 or epoch == 1:
        print(f"Epoch {epoch:3d}/{EPOCHS} | Train: Loss={train_loss:.4f}, F1={train_f1:.4f} | Val: Loss={val_loss:.4f}, F1={val_f1:.4f}")

print("\n" + "=" * 70)
print(f"Best validation F1: {best_f1:.4f}")
print("=" * 70)

Training Binary Classification Model (Pain vs No-Pain)
Epoch   1/30 | Train: Loss=0.6923, F1=0.4675 | Val: Loss=0.7475, F1=0.0781
Epoch   1/30 | Train: Loss=0.6923, F1=0.4675 | Val: Loss=0.7475, F1=0.0781
Epoch   5/30 | Train: Loss=0.6899, F1=0.3824 | Val: Loss=0.7580, F1=0.0781
Epoch   5/30 | Train: Loss=0.6899, F1=0.3824 | Val: Loss=0.7580, F1=0.0781
Epoch  10/30 | Train: Loss=0.6901, F1=0.3824 | Val: Loss=0.7469, F1=0.0781
Epoch  10/30 | Train: Loss=0.6901, F1=0.3824 | Val: Loss=0.7469, F1=0.0781
Epoch  15/30 | Train: Loss=0.6898, F1=0.3824 | Val: Loss=0.7469, F1=0.0781
Epoch  15/30 | Train: Loss=0.6898, F1=0.3824 | Val: Loss=0.7469, F1=0.0781
Epoch  20/30 | Train: Loss=0.6898, F1=0.3824 | Val: Loss=0.7478, F1=0.0781
Epoch  20/30 | Train: Loss=0.6898, F1=0.3824 | Val: Loss=0.7478, F1=0.0781
Epoch  25/30 | Train: Loss=0.6899, F1=0.3824 | Val: Loss=0.7389, F1=0.0781
Epoch  25/30 | Train: Loss=0.6899, F1=0.3824 | Val: Loss=0.7389, F1=0.0781
Epoch  30/30 | Train: Loss=0.6900, F1=0.3824 

In [9]:
# Final evaluation
model.load_state_dict(torch.load('models/binary_best.pt'))
_, val_f1, val_preds, val_labels = val_epoch(model, val_loader, criterion, device)

print("\nüìä FINAL RESULTS (Binary Classification):")
print(f"Validation F1: {val_f1:.4f}")

print("\nüìã Per-class metrics:")
print(classification_report(val_labels, val_preds, target_names=['no_pain', 'pain'], digits=4))

print("\nüìà Confusion Matrix:")
cm = confusion_matrix(val_labels, val_preds)
print(cm)
print(f"\n  True Negatives (no_pain predicted correctly): {cm[0,0]}")
print(f"  False Positives (no_pain predicted as pain): {cm[0,1]}")
print(f"  False Negatives (pain predicted as no_pain): {cm[1,0]}")
print(f"  True Positives (pain predicted correctly): {cm[1,1]}")

unique_preds, counts = np.unique(val_preds, return_counts=True)
print("\nüéØ Prediction distribution:")
for cls, count in zip(unique_preds, counts):
    print(f"  Class {cls}: {count} predictions ({count/len(val_preds)*100:.1f}%)")

if len(unique_preds) == 2:
    print("\n‚úÖ SUCCESS: Model predicts BOTH classes!")
    print("Binary classification is working properly.")
else:
    print("\n‚ùå FAILED: Still stuck on 1 class even with binary problem")


üìä FINAL RESULTS (Binary Classification):
Validation F1: 0.0781

üìã Per-class metrics:
              precision    recall  f1-score   support

     no_pain     0.0000    0.0000    0.0000       104
        pain     0.2180    1.0000    0.3580        29

    accuracy                         0.2180       133
   macro avg     0.1090    0.5000    0.1790       133
weighted avg     0.0475    0.2180    0.0781       133


üìà Confusion Matrix:
[[  0 104]
 [  0  29]]

  True Negatives (no_pain predicted correctly): 0
  False Positives (no_pain predicted as pain): 104
  False Negatives (pain predicted as no_pain): 0
  True Positives (pain predicted correctly): 29

üéØ Prediction distribution:
  Class 1: 133 predictions (100.0%)

‚ùå FAILED: Still stuck on 1 class even with binary problem
