# 03 - LSTM Training
Train ActivityLSTM on pre-extracted ResNet-18 features.

**Runtime:** GPU recommended for faster training.

In [None]:
# Colab Setup
import os
IN_COLAB = 'COLAB_GPU' in os.environ or os.path.exists('/content')

if IN_COLAB:
    from google.colab import drive
    drive.mount('/content/drive')

    REPO_DIR = '/content/Driver-Activity-Recognition'
    if not os.path.exists(REPO_DIR):
        !git clone https://github.com/batuhne/Driver-Activity-Recognition.git {REPO_DIR}

    os.chdir(REPO_DIR)
    !pip install -q -r requirements.txt
    DATA_ROOT = '/content/drive/MyDrive/DriveAndAct'
else:
    DATA_ROOT = './data'

print(f'Working directory: {os.getcwd()}')
print(f'Data root: {DATA_ROOT}')

In [None]:
import torch
from src.utils import load_config, set_seed
from src.train import train

config = load_config()
if IN_COLAB:
    config['data']['root'] = DATA_ROOT
    # Save outputs to Drive so they persist between sessions
    drive_output = os.path.join(DATA_ROOT, 'results')
    config['output']['checkpoint_dir'] = os.path.join(drive_output, 'checkpoints')
    config['output']['log_dir'] = os.path.join(drive_output, 'logs')
    config['output']['figure_dir'] = os.path.join(drive_output, 'figures')

# Model config — Run 4: reduced capacity to fight overfitting
config['model']['use_layernorm'] = True
config['model']['bidirectional'] = True
config['model']['pooling'] = 'attention'       # "last" | "mean" | "attention"
config['model']['lstm_hidden'] = 128            # Reduced from 256 (~850K params vs 3.1M)
config['model']['lstm_dropout'] = 0.3           # Reduced from 0.5 (smaller model)

# Training config — Run 4: cosine scheduler + effective number sampling
config['training']['loss_type'] = 'ce'
config['training']['mixup_alpha'] = 0.2
config['training']['noise_std'] = 0.1
config['training']['weight_decay'] = 0.0005
config['training']['epochs'] = 80
config['training']['early_stop_patience'] = 15
config['training']['lr'] = 0.0005               # Reduced from 0.001
config['training']['scheduler_type'] = 'cosine_warm'
config['training']['cosine_T0'] = 10
config['training']['cosine_T_mult'] = 2
config['training']['cosine_eta_min'] = 1e-6
config['training']['en_beta'] = 0.99            # Effective Number sampling

print(f'GPU available: {torch.cuda.is_available()}')
if torch.cuda.is_available():
    print(f'GPU: {torch.cuda.get_device_name(0)}')
print(f"Model: BiLSTM={config['model']['bidirectional']}, Pooling={config['model']['pooling']}")
print(f"LSTM hidden={config['model']['lstm_hidden']}, dropout={config['model']['lstm_dropout']}")
print(f"Loss: {config['training']['loss_type']}, Mixup alpha={config['training']['mixup_alpha']}")
print(f"Scheduler: {config['training']['scheduler_type']}, LR={config['training']['lr']}")
print(f"EN sampling beta={config['training']['en_beta']}")

In [None]:
# Run 4 Diagnostics: verify EN weights and model size before training
import csv
import numpy as np
from src.utils import compute_effective_number_weights
from src.models import ActivityLSTM

# Read training labels from manifest
feature_dir = os.path.join(config['data']['root'], config['features']['save_dir'])
manifest_path = os.path.join(feature_dir, 'train', 'manifest.csv')
labels = []
with open(manifest_path) as f:
    reader = csv.DictReader(f)
    for row in reader:
        labels.append(int(row['label']))

num_classes = len(set(labels))
print(f"Training samples: {len(labels)}, Classes: {num_classes}")

# Compare old vs new weights
from src.utils import compute_class_weights_from_labels
old_w = compute_class_weights_from_labels(labels, num_classes)
new_w = compute_effective_number_weights(labels, num_classes, beta=config['training']['en_beta'])

print(f"\nOld (sklearn balanced) — min: {old_w.min():.4f}, max: {old_w.max():.4f}, ratio: {old_w.max()/old_w.min():.1f}x")
print(f"New (EN beta={config['training']['en_beta']}) — min: {new_w.min():.4f}, max: {new_w.max():.4f}, ratio: {new_w.max()/new_w.min():.1f}x")

# Model param count
model_check = ActivityLSTM(
    input_dim=config['model']['feature_dim'],
    hidden_dim=config['model']['lstm_hidden'],
    num_layers=config['model']['lstm_layers'],
    num_classes=num_classes,
    lstm_dropout=config['model']['lstm_dropout'],
    fc_dropout=config['model']['fc_dropout'],
    use_layernorm=config['model'].get('use_layernorm', False),
    bidirectional=config['model'].get('bidirectional', False),
    pooling=config['model'].get('pooling', 'last'),
)
total_params = sum(p.numel() for p in model_check.parameters())
print(f"\nModel params: {total_params:,} (target ~850K)")
print(f"Params/sample ratio: {total_params/len(labels):.0f} (target <150)")
del model_check

In [None]:
# Train the model
model = train(config)

## Monitor Training
Launch TensorBoard to monitor training progress in real-time.

In [None]:
# TensorBoard (works in Colab and Jupyter)
%load_ext tensorboard
%tensorboard --logdir results/logs