# Google Colab (Skip locally)

In [None]:
# Mount your Google Drive
%matplotlib inline
%load_ext autoreload
%autoreload 2

from google.colab import drive
import sys
import os
import shutil
import warnings

drive.mount('/content/gdrive')

folder = "/content/gdrive/MyDrive/IFT6135/HW1_2025/assignment1_release" #@param {type:"string"}
!ln -Ts "$folder" /content/assignment 2> /dev/null

# Add the assignment folder to Python path
if '/content/assignment' not in sys.path:
  sys.path.insert(0, '/content/assignment')

# Check if CUDA is available
import torch
if not torch.cuda.is_available():
  warnings.warn('CUDA is not available.')

# Imports & Setup

In [None]:
%matplotlib inline
import warnings

from main import train_models
from arguments import Arguments
import os

models = ['lstm', 'gpt']

# Experiments

# Sanity Check

## Experiment 1

In [None]:
args = Arguments()
args.log_dir = f'./logs/experiment1'

results = {}
for model in models:
    args.model = model
    
    args.exp_name = model
    results[model] = train_models(args)

# Experiment 2 (Scaling Data Size : Training Ratio)

In [None]:
args = Arguments()
args.log_dir = f'./logs/experiment2'

results = {}
for model in models:
    args.model = model
    args.log_dir = os.path.join(args.log_dir, model)
    for r in [0.1, 0.3, 0.5, 0.7, 0.9]:
        args.r_train = r
        
        args.exp_name = f"{model}_r_{r}"
        results[model] = train_models(args)

# Experiment 3 (Scaling Data Size : P)

In [None]:
args = Arguments()
args.log_dir = "logs/experiment3"

args.p = 11
# todo: add the rest of the arguments

results = {}
for model in models:
    args.model = model
    
    args.exp_name = model
    results[model] = train_models(args)

# Experiment 4 (Scaling Model Size)

In [None]:
args = Arguments()
args.log_dir = "logs/experiment4"

for model in models:
    args.model = model
    args.log_dir = os.path.join(args.log_dir, model)
    for L in [1, 2, 3]:
        args.num_layers = L
        for d in [2**6, 2**7, 2**8]:
            args.embedding_size = d
            args.hidden_size = d

            args.exp_name = f"{model}_L_{L}_d_{d}"
            train_models(args)

# Experiment 5 (Scaling Compute)

In [None]:
args = Arguments()
args.log_dir = "logs/experiment5"

for model in models:
    args.model = model
    args.log_dir = os.path.join(args.log_dir, model)
    for B in [2**5, 2**6, 2**7, 2**8, 2**9]:
        args.batch_size = B

        args.exp_name = f"{model}_B_{B}"
        train_models(args)

# Experiment 6 (Regularisation)

In [None]:
args = Arguments()
args.log_dir = "logs/experiment6"

for model in models:
    args.model = model
    args.log_dir = os.path.join(args.log_dir, model)
    for wd in [0.25, 0.5, 0.75, 1.0]:
        args.weight_decay = wd

        args.exp_name = f"{model}_wd_{wd}"
        train_models(args)