# Google Colab (Skip locally)

In [None]:
#@title Mount your Google Drive
# If you run this notebook locally or on a cluster (i.e. not on Google Colab)
# you can delete this cell which is specific to Google Colab. You may also
# change the paths for data/logs in Arguments below.
%matplotlib inline
%load_ext autoreload
%autoreload 2

from google.colab import drive
drive.mount('/content/gdrive')

In [None]:
#@title Link your assignment folder & install requirements
#@markdown Enter the path to the assignment folder in your Google Drive
# If you run this notebook locally or on a cluster (i.e. not on Google Colab)
# you can delete this cell which is specific to Google Colab. You may also
# change the paths for data/logs in Arguments below.
import sys
import os
import shutil
import warnings

folder = "/content/gdrive/MyDrive/IFT6135/HW2_2025/src" #@param {type:"string"}
!ln -Ts "$folder" /content/src 2> /dev/null

# Add the assignment folder to Python path
if '/content/src' not in sys.path:
  sys.path.insert(0, '/content/src')

# Check if CUDA is available
import torch
if not torch.cuda.is_available():
  warnings.warn('CUDA is not available.')

# Imports & Setup

In [1]:
%matplotlib inline
import warnings

from main import train_models
from arguments import Arguments
import os
from plotter import plot_all, plot_metrics
from checkpointing import get_all_checkpoints_per_trials

models = ['lstm', 'gpt']
seeds = [0, 42, 7]
colab = False

# Experiments

## Experiment 1 (Sanity Check)

In [None]:
experiment1 = Arguments()
if colab: experiment1.num_workers = 2
experiment1.log_dir = "logs/experiment1"

results = {}
for model in models:
    experiment1.model = model
    
    experiment1.exp_name = model
    train_models(experiment1)

plot_all(results, save_path=experiment1.log_dir, mode="std")

In [None]:
!zip -r experiment1.zip logs/experiment1

## Experiment 2 (Scaling Data Size : Training Ratio)

In [None]:
experiment2 = Arguments()
log_dir = "logs/experiment2"
experiment2.verbose = False

for model in models:
    experiment2.model = model
    experiment2.log_dir = os.path.join(log_dir, model)
    for r in range(1, 10):
        r = r / 10
        experiment2.r_train = r
        experiment2.exp_name = f"{model}_r_{r}"
        train_models(experiment2)

In [None]:
!zip -r experiment2.zip logs/experiment2

In [12]:
experiment2 = Arguments()
experiment2.verbose = False

log_dir = "../logs/experiment2"
configurations = [r/10 for r in range(1, 10)]
labels = [f"r_{r}" for r in configurations]

performances = {}
results = {}
for model in models:
    results[model], performances[model] = {}, {}
    for label, configuration in zip(labels, configurations):
        exp_name = f"{model}_{label}"
        experiment2.exp_name = exp_name
        experiment2.log_dir = os.path.join(log_dir, model)
        checkpoints = []
        for m, seed in enumerate([0, 42]):
            checkpoint_path = os.path.join(log_dir, model, exp_name, str(m))
            checkpoints.append(checkpoint_path)
        
        models_path, metrics = get_all_checkpoints_per_trials(checkpoints, exp_name, just_files=True, verbose=experiment2.verbose)
        results[model][experiment2.exp_name] = metrics
        performances[model][configuration] = metrics["performance"]
    
    # plot_all(results[model], save_path=experiment2.log_dir, mode="std")
    plot_all(results[model], save_path=experiment2.log_dir, mode="mean")

plot_metrics(performances, axis_labels=("r", "Time (s)"), key_metric="total_elapsed", mode="std", file_name="Training_Time", save_path=log_dir, show=experiment2.verbose)

## Experiment 3 (Scaling Data Size : P)

In [None]:
experiment3 = Arguments()
experiment3.log_dir = "logs/experiment3"
experiment3.verbose = False

experiment3.p = 11
experiment3.operation_orders = [2, 3]

results = {}
for model in models:
    experiment3.model = model
    
    experiment3.exp_name = model
    train_models(experiment3)

In [None]:
!zip -r experiment3.zip logs/experiment3

## Experiment 4 (Scaling Model Size)

In [None]:
experiment4 = Arguments()
logdir = "logs/experiment4"
experiment4.verbose = False

for model in models:
    experiment4.model = model
    experiment4.log_dir = os.path.join(log_dir, model)
    for L in [1, 2, 3]:
        experiment4.num_layers = L
        for d in [2**6, 2**7, 2**8]:
            experiment4.embedding_size = d
            experiment4.hidden_size = d
            experiment4.exp_name = f"{model}_L_{L}_d_{d}"
            train_models(experiment4)

In [None]:
!zip -r experiment4.zip logs/experiment4

## Experiment 5 (Scaling Compute)

In [None]:
experiment5 = Arguments()
log_dir = "logs/experiment5"
experiment5.verbose = False

experiment5.n_steps = 2 * 10**4 + 1

for model in models:
    experiment5.model = model
    experiment5.log_dir = os.path.join(log_dir, model)
    for B in [2**5, 2**6, 2**7, 2**8, 2**9]:
        experiment5.batch_size = B

        experiment5.exp_name = f"{model}_B_{B}"
        train_models(experiment5)

In [None]:
!zip -r experiment5.zip logs/experiment5

## Experiment 6 (Regularisation)

In [None]:
experiment6 = Arguments()
if colab: experiment6.num_workers = 2
experiment6.log_dir = "logs/experiment6"

for model in models:
    experiment6.model = model
    experiment6.log_dir = os.path.join(experiment6.log_dir, model)
    for wd in [0.25, 0.5, 0.75, 1.0]:
        experiment6.weight_decay = wd

        experiment6.exp_name = f"{model}_wd_{wd}"
        train_models(experiment6)

In [None]:
!zip -r experiment6.zip logs/experiment6