# Google Colab TPU Training (Folds 2-4)

This notebook is designed to train the **ReSupModel** on Google Colab using TPU.

### ðŸš€ Before you start:
1.  Go to **Runtime** > **Change runtime type**.
2.  Select **TPU v2** (or available TPU) as the Hardware accelerator.
3.  Click **Save**.
4.  Make sure you have `project_code.zip` and `data.zip` in your Google Drive under `MyDrive/mouse-facial-expressions` (or update the path below).

In [None]:
# 1. Install Dependencies
# We use --ignore-installed blinker to avoid conflicts with Colab pre-installed packages
!pip install lightning mlflow torchmetrics python-dotenv --ignore-installed blinker

# Install Torch XLA for TPU (Universal method for Colab)
!pip install torch_xla[tpu] -f https://storage.googleapis.com/libtpu-releases/index.html

# Verify XLA installation
import torch_xla.core.xla_model as xm
print(f"TPU Device: {xm.xla_device()}")

In [None]:
# 2. Connect Google Drive & Setup Data
from google.colab import drive
import os
import shutil

# Mount Drive
drive.mount('/content/drive')

# Define your Drive path (Change this if your files are elsewhere!)
DRIVE_PATH = '/content/drive/MyDrive/mouse-facial-expressions' 

# Setup Workspace in /content (Faster I/O than Drive)
%cd /content

if os.path.exists(DRIVE_PATH):
    print(f"Copying files from {DRIVE_PATH}...")
    # Copy zip files
    shutil.copy(f'{DRIVE_PATH}/project_code.zip', '/content/project_code.zip')
    shutil.copy(f'{DRIVE_PATH}/data.zip', '/content/data.zip')
    
    print("Unzipping...")
    !unzip -q -o project_code.zip
    !unzip -q -o data.zip
    
    # Fix for Pickle/Pathlib error: Move CSV to data folder
    if os.path.exists('dataset_df_fixed.csv'):
        print("Applying fix for dataset_df.csv...")
        target_dir = '/content/data/processed/task-1.1'
        os.makedirs(target_dir, exist_ok=True)
        shutil.move('dataset_df_fixed.csv', f'{target_dir}/dataset_df.csv')
        print("dataset_df.csv restored successfully!")
    
    print("Done! Workspace is ready in /content")
    
    # Create .env file for Colab paths
    print("Creating .env file for Colab...")
    with open('.env', 'w') as f:
        f.write("MFE_RAW_VIDEO_FOLDER=/content/data/raw_videos\n")
        f.write("MFE_PROCESSED_VIDEO_FOLDER=/content/data/processed_videos\n")
        f.write("MFE_EXTRACTED_FRAMES_FOLDER=/content/data/extracted_frames\n")
        f.write("MFE_DLC_FACIAL_LABELS_FOLDER=/content/data/dlc_labels\n")
        f.write("MFE_DLC_FACIAL_PROJECT_PATH=/content/data/dlc_project\n")
        f.write("MFE_TASKS=/content/data/processed\n")
        f.write("MFE_RAW_CSV_FOLDER=/content/data/raw_csvs\n")
        f.write("MFE_VERSION=1.1\n")
    print(".env file created successfully!")
    
else:
    print(f"Error: Path {DRIVE_PATH} not found. Please check your Drive path.")

In [None]:
# 3. Run Training (Folds 2-4)
import sys
import os

# Verify we are in the right directory
if not os.path.exists('mouse_facial_expressions'):
    print("Error: 'mouse_facial_expressions' folder not found in current directory!")
    print("Current directory contents:", os.listdir('.'))
else:
    print("Found package. Starting training...")
    
    # Run training module directly with PYTHONPATH set
    !PYTHONPATH=. python3 -m mouse_facial_expressions.models.train_task1_resup_model \
        --folds "2,3,4" \
        --epochs 20 \
        --learning_rate 0.0001 \
        --num_frames 10 \
        --frame_stride 2 \
        --train_batch_size 128 \
        --test_batch_size 128 \
        --use_soft_labels True \
        --label_smoothing 0.2 \
        --dropout 0.5 \
        --dataset_version "1.1" \
        --train_augmentation "TrivialAugmentWide" \
        --seed 97531 \
        --accelerator "auto"

In [None]:
# 4. Download Results
from google.colab import files

print("Zipping checkpoints...")
!zip -r checkpoints_folds234.zip models/checkpoints_resup

print("Downloading...")
files.download('checkpoints_folds234.zip')