## 1. Environment Setup

In [None]:
# Check GPU availability
!nvidia-smi

In [None]:
# Clone repository from GitHub
!git clone https://github.com/hmolhem/nthu-driver-drowsiness-ROI.git
%cd nthu-driver-drowsiness-ROI

In [None]:
# Install dependencies (PyTorch already in Colab, just need extras)
!pip install -q pyyaml tqdm

# Verify PyTorch and CUDA
import torch
print(f"PyTorch version: {torch.__version__}")
print(f"CUDA available: {torch.cuda.is_available()}")
print(f"CUDA device: {torch.cuda.get_device_name(0) if torch.cuda.is_available() else 'None'}")

## 2. Mount Google Drive & Link Dataset

In [None]:
# Mount Google Drive
from google.colab import drive
drive.mount('/content/drive')

In [None]:
# Link dataset from Google Drive to project
import os

# Update this path if your dataset is in a different Drive location
drive_dataset_path = '/content/drive/MyDrive/drowsiness-dataset/archive'
project_dataset_path = '/content/nthu-driver-drowsiness-ROI/datasets/archive'

# Create symbolic link
!mkdir -p /content/nthu-driver-drowsiness-ROI/datasets
!ln -s {drive_dataset_path} {project_dataset_path}

# Verify dataset
print("\nDataset folders:")
!ls -lh /content/nthu-driver-drowsiness-ROI/datasets/archive/

print("\nSample counts:")
!find /content/nthu-driver-drowsiness-ROI/datasets/archive/drowsy -type f | wc -l
!find /content/nthu-driver-drowsiness-ROI/datasets/archive/notdrowsy -type f | wc -l

## 3. Verify Data Splits

In [None]:
# Check if splits exist, if not create them
import os

if not os.path.exists('data/splits/train.csv'):
    print("Creating data splits...")
    !python src/data/create_splits.py
else:
    print("Data splits already exist")

# Verify splits
!ls -lh data/splits/
!wc -l data/splits/*.csv

## 4. Create Results Directory in Google Drive

In [None]:
# Create results directory in Google Drive for persistence
drive_results_path = '/content/drive/MyDrive/drowsiness-results'
!mkdir -p {drive_results_path}/checkpoints
!mkdir -p {drive_results_path}/logs

# Link to project
!ln -s {drive_results_path}/checkpoints /content/nthu-driver-drowsiness-ROI/checkpoints

print("Results will be saved to Google Drive:")
print(f"  {drive_results_path}")

## 5. Train ResNet50 Baseline

In [None]:
# Train ResNet50 on GPU
!python src/training/train_baseline.py \
    --config configs/baseline_resnet50.yaml \
    --device cuda

## 6. Train EfficientNet Baseline (Optional)

In [None]:
# Train EfficientNet-B0 on GPU
!python src/training/train_baseline.py \
    --config configs/baseline_efficientnet.yaml \
    --device cuda

## 7. Check Training Results

In [None]:
# List saved checkpoints
print("Saved checkpoints:")
!ls -lh /content/nthu-driver-drowsiness-ROI/checkpoints/

# Show checkpoint sizes
!du -h /content/nthu-driver-drowsiness-ROI/checkpoints/*

## 8. Quick Evaluation on Test Set

In [None]:
# Quick test set evaluation
import torch
import sys
sys.path.append('/content/nthu-driver-drowsiness-ROI/src')

from data.dataset import create_dataloaders
from data.transforms import get_val_transforms
from models.classifier import create_model
from training.metrics import MetricsCalculator
from utils.config import load_config

# Load config and model
config = load_config('configs/baseline_resnet50.yaml')
model = create_model(config)

# Load best checkpoint
checkpoint = torch.load('checkpoints/baseline_resnet50_best.pth')
model.load_state_dict(checkpoint['model_state_dict'])
model = model.cuda()
model.eval()

# Load test data
dataloaders = create_dataloaders(
    'data/splits/train.csv',
    'data/splits/val.csv',
    'data/splits/test.csv',
    data_root='datasets/archive',
    val_transform=get_val_transforms(),
    batch_size=32
)
test_loader = dataloaders['test']

# Evaluate
metrics_calc = MetricsCalculator(num_classes=2, class_names=['notdrowsy', 'drowsy'])

with torch.no_grad():
    for images, labels, metadata in test_loader:
        images = images.cuda()
        labels = labels.cuda()
        outputs = model(images)
        preds = outputs.argmax(dim=1)
        metrics_calc.update(preds, labels)

# Print results
test_metrics = metrics_calc.compute()
print("\n" + "="*60)
print("TEST SET RESULTS (ResNet50)")
print("="*60)
print(f"Accuracy: {test_metrics['accuracy']:.4f}")
print(f"Macro-F1: {test_metrics['f1_macro']:.4f}")
print(f"Precision (macro): {test_metrics['precision_macro']:.4f}")
print(f"Recall (macro): {test_metrics['recall_macro']:.4f}")
print("\nPer-class metrics:")
for class_name in ['notdrowsy', 'drowsy']:
    print(f"  {class_name}:")
    print(f"    Precision: {test_metrics[f'precision_{class_name}']:.4f}")
    print(f"    Recall: {test_metrics[f'recall_{class_name}']:.4f}")
    print(f"    F1: {test_metrics[f'f1_{class_name}']:.4f}")
print("\nConfusion Matrix:")
print(test_metrics['confusion_matrix'])
print("="*60)

## 9. Package Results for Download

In [None]:
# Create zip of results
!zip -r /content/drowsiness_results.zip \
    /content/nthu-driver-drowsiness-ROI/checkpoints/ \
    -x "*.git*"

print("\nResults packaged!")
!ls -lh /content/drowsiness_results.zip

In [None]:
# Download results to local machine
from google.colab import files
files.download('/content/drowsiness_results.zip')

print("\nDownload started! Check your browser's downloads folder.")
print("\nExtract this zip to your local project root:")
print("  nthu-driver-drowsiness-ROI/")

## 10. Summary of Files Created

**Files saved to Google Drive:**
- `My Drive/drowsiness-results/checkpoints/baseline_resnet50_best.pth`
- `My Drive/drowsiness-results/checkpoints/baseline_resnet50_last.pth`
- `My Drive/drowsiness-results/checkpoints/baseline_efficientnet_best.pth` (if trained)
- `My Drive/drowsiness-results/checkpoints/baseline_efficientnet_last.pth` (if trained)

**Downloaded to local machine:**
- `drowsiness_results.zip` (contains all checkpoints)

**Next steps on local machine:**
1. Extract `drowsiness_results.zip` to project root
2. Verify checkpoints in `checkpoints/` folder
3. Run evaluation scripts
4. Generate reports and visualizations
5. Proceed to ROI model development

**Training complete! ðŸŽ‰**