# 03 - LSTM Training (Run 6a)
Train ActivityLSTM on fine-tuned ResNet-18 features.

**Pre-requisite:** Run notebook 03a first to fine-tune CNN and re-extract features.

**Runtime:** GPU recommended for faster training.

In [None]:
# Colab Setup
import os
IN_COLAB = 'COLAB_GPU' in os.environ or os.path.exists('/content')

if IN_COLAB:
    from google.colab import drive
    drive.mount('/content/drive')

    REPO_DIR = '/content/Driver-Activity-Recognition'
    if not os.path.exists(REPO_DIR):
        !git clone https://github.com/batuhne/Driver-Activity-Recognition.git {REPO_DIR}

    os.chdir(REPO_DIR)
    !pip install -q -r requirements.txt
    DATA_ROOT = '/content/drive/MyDrive/DriveAndAct'
else:
    DATA_ROOT = './data'

print(f'Working directory: {os.getcwd()}')
print(f'Data root: {DATA_ROOT}')

In [None]:
import torch
from src.utils import load_config, set_seed
from src.train import train

config = load_config()
if IN_COLAB:
    config['data']['root'] = DATA_ROOT
    # Save outputs to Drive so they persist between sessions
    drive_output = os.path.join(DATA_ROOT, 'results')
    config['output']['checkpoint_dir'] = os.path.join(drive_output, 'checkpoints')
    config['output']['log_dir'] = os.path.join(drive_output, 'logs')
    config['output']['figure_dir'] = os.path.join(drive_output, 'figures')

# ==================== Run 6a: LSTM on Fine-Tuned Features ====================
# Use features_finetuned (from notebook 03a) instead of original frozen features.
# All LSTM hyperparams match Run 3 (best MPCA=39.2%).

config['features']['save_dir'] = 'features_finetuned'  # KEY: use fine-tuned features

# Training mode: feature-based (fast, no video I/O)
config['training']['mode'] = 'feature_based'

# Model config — same as Run 3
config['model']['use_layernorm'] = True
config['model']['bidirectional'] = True
config['model']['pooling'] = 'attention'
config['model']['lstm_hidden'] = 256            # Run 3 value
config['model']['lstm_dropout'] = 0.3           # Run 3 value

# Training config — Run 3 values
config['training']['batch_size'] = 32
config['training']['lr'] = 0.001                # Run 3 LR
config['training']['loss_type'] = 'ce'
config['training']['label_smoothing'] = 0.1
config['training']['mixup_alpha'] = 0.0         # Keep simple
config['training']['noise_std'] = 0.0
config['training']['weight_decay'] = 0.0001     # Run 3 value
config['training']['epochs'] = 50
config['training']['early_stop_patience'] = 12
config['training']['scheduler_type'] = 'plateau'
config['training']['scheduler_factor'] = 0.5
config['training']['scheduler_patience'] = 5
config['training']['gradient_clip'] = 1.0
config['training']['use_weighted_sampler'] = True   # Run 3 value
config['training']['en_beta'] = 0.99                # Run 3 value
config['training']['num_workers'] = 2               # Colab compatible

print(f'GPU available: {torch.cuda.is_available()}')
if torch.cuda.is_available():
    print(f'GPU: {torch.cuda.get_device_name(0)}')
print(f"\n--- Run 6a LSTM Config ---")
print(f"Features: {config['features']['save_dir']}")
print(f"Mode: {config['training']['mode']}")
print(f"LSTM: h={config['model']['lstm_hidden']}, BiLSTM={config['model']['bidirectional']}, pool={config['model']['pooling']}")
print(f"LR: {config['training']['lr']}, batch={config['training']['batch_size']}")
print(f"Sampler: {'WeightedRandom' if config['training']['use_weighted_sampler'] else 'DISABLED'}")

In [None]:
# Run 6a Diagnostics: verify fine-tuned features exist and model size
import csv
import numpy as np
from src.utils import compute_effective_number_weights
from src.models import ActivityLSTM

# Check features directory
feature_dir = os.path.join(config['data']['root'], config['features']['save_dir'])
manifest_path = os.path.join(feature_dir, 'train', 'manifest.csv')

if not os.path.exists(manifest_path):
    print(f"ERROR: {manifest_path} not found!")
    print("Run notebook 03a first to fine-tune CNN and re-extract features.")
else:
    labels = []
    with open(manifest_path) as f:
        reader = csv.DictReader(f)
        for row in reader:
            labels.append(int(row['label']))

    num_classes = max(labels) + 1
    print(f"Features dir: {feature_dir}")
    print(f"Training samples: {len(labels)}, Unique classes: {len(set(labels))}, num_classes: {num_classes}")

    # Check a sample feature shape
    sample = np.load(os.path.join(feature_dir, 'train', 'seg_000000.npy'))
    print(f"Feature shape: {sample.shape}, dtype: {sample.dtype}")

    # Model param count
    model_check = ActivityLSTM(
        input_dim=config['model']['feature_dim'],
        hidden_dim=config['model']['lstm_hidden'],
        num_layers=config['model']['lstm_layers'],
        num_classes=num_classes,
        lstm_dropout=config['model']['lstm_dropout'],
        fc_dropout=config['model']['fc_dropout'],
        use_layernorm=config['model'].get('use_layernorm', False),
        bidirectional=config['model'].get('bidirectional', False),
        pooling=config['model'].get('pooling', 'last'),
    )
    total_params = sum(p.numel() for p in model_check.parameters())
    print(f"\nLSTM params: {total_params:,}")
    del model_check

In [None]:
# Train the model
model = train(config)

In [None]:
# GPU memory summary (run after training)
if torch.cuda.is_available():
    peak_mem = torch.cuda.max_memory_allocated() / 1024**3
    current_mem = torch.cuda.memory_allocated() / 1024**3
    total_mem = torch.cuda.get_device_properties(0).total_memory / 1024**3
    print(f"GPU Memory: peak={peak_mem:.2f} GB, current={current_mem:.2f} GB, total={total_mem:.1f} GB")
    print(f"Utilization: {peak_mem/total_mem*100:.0f}%")

## Monitor Training
Launch TensorBoard to monitor training progress in real-time.

In [None]:
# TensorBoard (works in Colab and Jupyter)
%load_ext tensorboard
%tensorboard --logdir {config['output']['log_dir']}