# Google Colab (Skip locally)

In [None]:
#@title Mount your Google Drive
# If you run this notebook locally or on a cluster (i.e. not on Google Colab)
# you can delete this cell which is specific to Google Colab. You may also
# change the paths for data/logs in Arguments below.
%matplotlib inline
%load_ext autoreload
%autoreload 2

from google.colab import drive
drive.mount('/content/gdrive')

In [None]:
#@title Link your assignment folder & install requirements
#@markdown Enter the path to the assignment folder in your Google Drive
# If you run this notebook locally or on a cluster (i.e. not on Google Colab)
# you can delete this cell which is specific to Google Colab. You may also
# change the paths for data/logs in Arguments below.
import sys
import os
import shutil
import warnings

folder = "/content/gdrive/MyDrive/IFT6135/HW2_2025/src" #@param {type:"string"}
!ln -Ts "$folder" /content/src 2> /dev/null

# Add the assignment folder to Python path
if '/content/src' not in sys.path:
  sys.path.insert(0, '/content/src')

# Check if CUDA is available
import torch
if not torch.cuda.is_available():
  warnings.warn('CUDA is not available.')

# Imports & Setup

In [None]:
%matplotlib inline
import warnings

# Check if CUDA is available
import torch
if not torch.cuda.is_available():
  warnings.warn('CUDA is not available.')

from main import train_models
from arguments import Arguments
import os
from plotter import generate_plots

models = ['lstm', 'gpt']
colab = False

# Experiments

## Experiment 1 (Sanity Check)

In [None]:
experiment1 = Arguments()
if colab: experiment1.num_workers = 2
experiment1.log_dir = "logs/experiment1"
experiment1.n_steps = 50

results = {}
for model in models:
    experiment1.model = model
    
    experiment1.exp_name = model
    _, results[model], _ = train_models(experiment1)

generate_plots(results, save_path=experiment1.log_dir, mode="std")

## Experiment 2 (Scaling Data Size : Training Ratio)

In [None]:
experiment2 = Arguments()
if colab: experiment2.num_workers = 2
experiment2.log_dir = "logs/experiment2"

results = {}
for model in models:
    experiment2.model = model
    experiment2.log_dir = os.path.join(experiment2.log_dir, model)
    for r in [0.1, 0.3, 0.5, 0.7, 0.9]:
        experiment2.r_train = r
        
        experiment2.exp_name = f"{model}_r_{r}"
        results[model] = train_models(experiment2)

## Experiment 3 (Scaling Data Size : P)

In [None]:
experiment3 = Arguments()
if colab: experiment3.num_workers = 2
experiment3.log_dir = "logs/experiment3"

experiment3.p = 11
# todo: add the rest of the arguments

results = {}
for model in models:
    experiment3.model = model
    
    experiment3.exp_name = model
    results[model] = train_models(experiment3)

## Experiment 4 (Scaling Model Size)

In [None]:
experiment4 = Arguments()
if colab: experiment4.num_workers = 2
experiment4.log_dir = "logs/experiment4"

for model in models:
    experiment4.model = model
    experiment4.log_dir = os.path.join(experiment4.log_dir, model)
    for L in [1, 2, 3]:
        experiment4.num_layers = L
        for d in [2**6, 2**7, 2**8]:
            experiment4.embedding_size = d
            experiment4.hidden_size = d

            experiment4.exp_name = f"{model}_L_{L}_d_{d}"
            train_models(experiment4)

## Experiment 5 (Scaling Compute)

In [None]:
experiment5 = Arguments()
if colab: experiment5.num_workers = 2
experiment5.log_dir = "logs/experiment5"

for model in models:
    experiment5.model = model
    experiment5.log_dir = os.path.join(experiment5.log_dir, model)
    for B in [2**5, 2**6, 2**7, 2**8, 2**9]:
        experiment5.batch_size = B

        experiment5.exp_name = f"{model}_B_{B}"
        train_models(experiment5)

## Experiment 6 (Regularisation)

In [None]:
experiment6 = Arguments()
if colab: experiment6.num_workers = 2
experiment6.log_dir = "logs/experiment6"

for model in models:
    experiment6.model = model
    experiment6.log_dir = os.path.join(experiment6.log_dir, model)
    for wd in [0.25, 0.5, 0.75, 1.0]:
        experiment6.weight_decay = wd

        experiment6.exp_name = f"{model}_wd_{wd}"
        train_models(experiment6)