# 03 - End-to-End CNN+LSTM Training (Run 6a)
Fine-tune ResNet-18 layer4 + LSTM end-to-end on Drive&Act Kinect IR.

**Key change from Run 3:** CNN layer4 unfrozen with differential LR (CNN=1e-5, LSTM=1e-3).
Everything else matches Run 3 (best MPCA=39.2%).

**Runtime:** GPU required (T4 16GB). Mixed precision enabled.

In [None]:
# Colab Setup
import os
IN_COLAB = 'COLAB_GPU' in os.environ or os.path.exists('/content')

if IN_COLAB:
    from google.colab import drive
    drive.mount('/content/drive')

    REPO_DIR = '/content/Driver-Activity-Recognition'
    if not os.path.exists(REPO_DIR):
        !git clone https://github.com/batuhne/Driver-Activity-Recognition.git {REPO_DIR}

    os.chdir(REPO_DIR)
    !pip install -q -r requirements.txt
    DATA_ROOT = '/content/drive/MyDrive/DriveAndAct'
else:
    DATA_ROOT = './data'

print(f'Working directory: {os.getcwd()}')
print(f'Data root: {DATA_ROOT}')

In [None]:
import torch
from src.utils import load_config, set_seed
from src.train import train

config = load_config()
if IN_COLAB:
    config['data']['root'] = DATA_ROOT
    # Save outputs to Drive so they persist between sessions
    drive_output = os.path.join(DATA_ROOT, 'results')
    config['output']['checkpoint_dir'] = os.path.join(drive_output, 'checkpoints')
    config['output']['log_dir'] = os.path.join(drive_output, 'logs')
    config['output']['figure_dir'] = os.path.join(drive_output, 'figures')

# ==================== Run 6a: End-to-End CNN Fine-Tuning ====================
# Single change from Run 3: unfreeze ResNet-18 layer4 and train end-to-end.
# All other hyperparams match Run 3 (best MPCA=39.2%).

# Model config — same as Run 3
config['model']['use_layernorm'] = True
config['model']['bidirectional'] = True
config['model']['pooling'] = 'attention'
config['model']['lstm_hidden'] = 256            # Run 3 value
config['model']['lstm_dropout'] = 0.3           # Run 3 value
config['model']['freeze_mode'] = 'layer4'       # NEW: only layer4 trainable

# Training config — end-to-end mode
config['training']['mode'] = 'end_to_end'       # NEW: end-to-end training
config['training']['batch_size'] = 8            # Reduced for GPU memory
config['training']['accumulation_steps'] = 4    # Effective batch = 32
config['training']['lr'] = 0.001                # LSTM LR (Run 3)
config['training']['cnn_lr'] = 1e-5             # NEW: CNN fine-tuning LR
config['training']['cnn_warmup_epochs'] = 3     # NEW: freeze CNN for 3 epochs
config['training']['use_amp'] = True            # NEW: mixed precision
config['training']['loss_type'] = 'ce'
config['training']['label_smoothing'] = 0.1
config['training']['mixup_alpha'] = 0.0         # Disabled for video frames
config['training']['noise_std'] = 0.0           # Disabled — CNN augmentation sufficient
config['training']['weight_decay'] = 0.0001     # Run 3 value
config['training']['epochs'] = 50
config['training']['early_stop_patience'] = 12
config['training']['scheduler_type'] = 'plateau'
config['training']['scheduler_factor'] = 0.5
config['training']['scheduler_patience'] = 5
config['training']['gradient_clip'] = 1.0
config['training']['use_weighted_sampler'] = True   # Run 3 value
config['training']['en_beta'] = 0.99                # Run 3 value
config['training']['num_workers'] = 2               # Colab compatible

print(f'GPU available: {torch.cuda.is_available()}')
if torch.cuda.is_available():
    print(f'GPU: {torch.cuda.get_device_name(0)}')
    print(f'GPU Memory: {torch.cuda.get_device_properties(0).total_mem / 1024**3:.1f} GB')
print(f"\n--- Run 6a Config ---")
print(f"Mode: {config['training']['mode']}")
print(f"CNN freeze_mode: {config['model']['freeze_mode']}")
print(f"CNN LR: {config['training']['cnn_lr']}, LSTM LR: {config['training']['lr']}")
print(f"CNN warmup: {config['training']['cnn_warmup_epochs']} epochs")
print(f"Batch: {config['training']['batch_size']} x {config['training']['accumulation_steps']} accum = {config['training']['batch_size'] * config['training']['accumulation_steps']} effective")
print(f"AMP: {config['training']['use_amp']}")
print(f"LSTM: h={config['model']['lstm_hidden']}, BiLSTM={config['model']['bidirectional']}, pool={config['model']['pooling']}")
print(f"Sampler: {'WeightedRandom' if config['training']['use_weighted_sampler'] else 'DISABLED'}")

In [None]:
# Run 6a Diagnostics: verify CNN layer freeze status, param counts, memory estimate
import torch
from src.models import CNNLSTMModel

freeze_mode = config['model']['freeze_mode']
model_check = CNNLSTMModel(
    num_classes=34,  # approximate
    hidden_dim=config['model']['lstm_hidden'],
    num_layers=config['model']['lstm_layers'],
    lstm_dropout=config['model']['lstm_dropout'],
    fc_dropout=config['model']['fc_dropout'],
    use_layernorm=config['model'].get('use_layernorm', False),
    bidirectional=config['model'].get('bidirectional', False),
    pooling=config['model'].get('pooling', 'last'),
    freeze_mode=freeze_mode,
)

total_params = sum(p.numel() for p in model_check.parameters())
trainable_params = sum(p.numel() for p in model_check.parameters() if p.requires_grad)
frozen_params = total_params - trainable_params

print(f"=== CNNLSTMModel (freeze_mode={freeze_mode}) ===")
print(f"Total params:     {total_params:>10,}")
print(f"Trainable params: {trainable_params:>10,}")
print(f"Frozen params:    {frozen_params:>10,}")
print()

# CNN layer breakdown
print("CNN Layer Status:")
for idx, (name, child) in enumerate(model_check.cnn.features.named_children()):
    child_params = sum(p.numel() for p in child.parameters())
    child_trainable = sum(p.numel() for p in child.parameters() if p.requires_grad)
    if child_params > 0:
        status = "TRAINABLE" if child_trainable > 0 else "frozen"
        print(f"  [{idx}] {name:>2}: {child_params:>10,} params  [{status}]")

# LSTM params
lstm_params = sum(p.numel() for p in model_check.lstm.parameters())
print(f"\nLSTM params: {lstm_params:,}")

# Memory estimate
batch = config['training']['batch_size']
seq = 16
print(f"\nMemory estimate (batch={batch}, seq={seq}, AMP={config['training']['use_amp']}):")
input_mb = batch * seq * 3 * 224 * 224 * 4 / 1024**2
print(f"  Input tensor: ~{input_mb:.0f} MB")
print(f"  Estimated peak: ~4-6 GB (T4 has 16 GB)")

del model_check

In [None]:
# Train the model
model = train(config)

In [None]:
# GPU memory summary (run after training)
if torch.cuda.is_available():
    peak_mem = torch.cuda.max_memory_allocated() / 1024**3
    current_mem = torch.cuda.memory_allocated() / 1024**3
    total_mem = torch.cuda.get_device_properties(0).total_mem / 1024**3
    print(f"GPU Memory: peak={peak_mem:.2f} GB, current={current_mem:.2f} GB, total={total_mem:.1f} GB")
    print(f"Utilization: {peak_mem/total_mem*100:.0f}%")

## Monitor Training
Launch TensorBoard to monitor training progress in real-time.

In [None]:
# TensorBoard (works in Colab and Jupyter)
%load_ext tensorboard
%tensorboard --logdir {config['output']['log_dir']}