# 03 - LSTM Training
Train ActivityLSTM on pre-extracted ResNet-18 features.

**Runtime:** GPU recommended for faster training.

In [None]:
# Colab Setup
import os
IN_COLAB = 'COLAB_GPU' in os.environ or os.path.exists('/content')

if IN_COLAB:
    from google.colab import drive
    drive.mount('/content/drive')

    REPO_DIR = '/content/Driver-Activity-Recognition'
    if not os.path.exists(REPO_DIR):
        !git clone https://github.com/batuhne/Driver-Activity-Recognition.git {REPO_DIR}

    os.chdir(REPO_DIR)
    !pip install -q -r requirements.txt
    DATA_ROOT = '/content/drive/MyDrive/DriveAndAct'
else:
    DATA_ROOT = './data'

print(f'Working directory: {os.getcwd()}')
print(f'Data root: {DATA_ROOT}')

In [None]:
import torch
from src.utils import load_config, set_seed
from src.train import train

config = load_config()
if IN_COLAB:
    config['data']['root'] = DATA_ROOT
    # Save outputs to Drive so they persist between sessions
    drive_output = os.path.join(DATA_ROOT, 'results')
    config['output']['checkpoint_dir'] = os.path.join(drive_output, 'checkpoints')
    config['output']['log_dir'] = os.path.join(drive_output, 'logs')
    config['output']['figure_dir'] = os.path.join(drive_output, 'figures')

# Model config — Run 5: capacity balance (192 between 128 and 256)
config['model']['use_layernorm'] = True
config['model']['bidirectional'] = True
config['model']['pooling'] = 'attention'
config['model']['lstm_hidden'] = 192            # Up from 128 (Run 4), down from 256 (Run 3) → ~2.0M params
config['model']['lstm_dropout'] = 0.4           # Between 0.3 (Run 4) and 0.5 (Run 3)

# Training config — Run 5: class-weighted loss instead of sampler
config['training']['loss_type'] = 'ce'
config['training']['mixup_alpha'] = 0.2
config['training']['noise_std'] = 0.1
config['training']['weight_decay'] = 0.0005
config['training']['epochs'] = 80
config['training']['early_stop_patience'] = 15
config['training']['lr'] = 0.0005
config['training']['scheduler_type'] = 'plateau'     # Back to plateau (cosine restart hurt in Run 4)
config['training']['scheduler_factor'] = 0.5
config['training']['scheduler_patience'] = 5
config['training']['use_weighted_sampler'] = False    # KEY CHANGE: disable sampler
config['training']['en_beta'] = 0.999                 # Higher beta OK for loss weights (vs 0.99 for sampler)

print(f'GPU available: {torch.cuda.is_available()}')
if torch.cuda.is_available():
    print(f'GPU: {torch.cuda.get_device_name(0)}')
print(f"Model: BiLSTM={config['model']['bidirectional']}, Pooling={config['model']['pooling']}")
print(f"LSTM hidden={config['model']['lstm_hidden']}, dropout={config['model']['lstm_dropout']}")
print(f"Loss: {config['training']['loss_type']}, Mixup alpha={config['training']['mixup_alpha']}")
print(f"Scheduler: {config['training']['scheduler_type']}, LR={config['training']['lr']}")
print(f"Sampler: {'WeightedRandom' if config['training']['use_weighted_sampler'] else 'DISABLED (shuffle=True)'}")
print(f"EN beta={config['training']['en_beta']} ({'sampler weights' if config['training']['use_weighted_sampler'] else 'loss weights'})")

In [None]:
# Run 5b Diagnostics: verify sqrt-dampened loss weights and model size
import csv
import torch
import numpy as np
from src.utils import compute_effective_number_weights
from src.models import ActivityLSTM

# Read training labels from manifest
feature_dir = os.path.join(config['data']['root'], config['features']['save_dir'])
manifest_path = os.path.join(feature_dir, 'train', 'manifest.csv')
labels = []
with open(manifest_path) as f:
    reader = csv.DictReader(f)
    for row in reader:
        labels.append(int(row['label']))

num_classes = max(labels) + 1
print(f"Training samples: {len(labels)}, Unique classes: {len(set(labels))}, num_classes: {num_classes}")

# Loss weights: EN beta=0.999 + sqrt dampening
beta = config['training']['en_beta']
raw_w = compute_effective_number_weights(labels, num_classes, beta=beta)
print(f"\nRaw EN weights (beta={beta}): min={raw_w.min():.4f}, max={raw_w.max():.4f}, ratio={raw_w.max()/raw_w.min():.1f}x")

dampened_w = torch.sqrt(raw_w)
dampened_w = dampened_w / dampened_w.mean()
print(f"Sqrt dampened:               min={dampened_w.min():.4f}, max={dampened_w.max():.4f}, ratio={dampened_w.max()/dampened_w.min():.1f}x")
print(f"Sampler: DISABLED (shuffle=True)")

# Model param count (target ~2.0M)
model_check = ActivityLSTM(
    input_dim=config['model']['feature_dim'],
    hidden_dim=config['model']['lstm_hidden'],
    num_layers=config['model']['lstm_layers'],
    num_classes=num_classes,
    lstm_dropout=config['model']['lstm_dropout'],
    fc_dropout=config['model']['fc_dropout'],
    use_layernorm=config['model'].get('use_layernorm', False),
    bidirectional=config['model'].get('bidirectional', False),
    pooling=config['model'].get('pooling', 'last'),
)
total_params = sum(p.numel() for p in model_check.parameters())
print(f"\nModel params: {total_params:,} (target ~2.0M)")
print(f"Params/sample ratio: {total_params/len(labels):.0f} (target ~305)")
del model_check

In [None]:
# Train the model
model = train(config)

## Monitor Training
Launch TensorBoard to monitor training progress in real-time.

In [None]:
# TensorBoard (works in Colab and Jupyter)
%load_ext tensorboard
%tensorboard --logdir {config['output']['log_dir']}