In [None]:
#@title Imports
%load_ext autoreload
%aimport core_utils
%aimport custom_model
%aimport training_utils
%aimport eval_utils
%autoreload 1

import core_utils
import custom_model
import training_utils
import eval_utils

import torch

In [None]:
#@title Imports
core_utils.colab_setup()

In [None]:
#@title Training

# Sinusoidal activation trained with chapter curriculum
model = training_utils.train(
    cfg={
        "activation_fn": "x_plus_sin2",
        "curriculum": "chapter",
        "epochs": 55_000,
        "save_every": 100,
        "max_waves": 5,
        "seed": 42,
    },
    use_wandb=True
)

# Alternate training schemes:

# # Sinusoidal activation trained with interspersed curriculum
# model = training_utils.train(
#     cfg={
#         "activation_fn": "x_plus_sin2",
#         "curriculum": "interspersed",
#         "epochs": 55_000,
#         "save_every": 5_000,
#         "max_waves": 5,
#         "seed": 42,
#         "curriculum_config": {"interspersed_every": 10}
#     },
#     use_wandb=True
# )

# # Sinusoidal activation trained with standard incremental curriculum
# model = training_utils.train(
#     cfg={
#         "activation_fn": "x_plus_sin2",
#         "curriculum": "standard",
#         "epochs": 55_000,
#         "save_every": 5_000,
#         "max_waves": 5,
#         "seed": 42,
#     },
#     use_wandb=True
# )

# # GeLU activation trained with chapter curriculum
# model = training_utils.train(
#     cfg={
#         "curriculum": "chapter",
#         "epochs": 55_000,
#         "save_every": 5_000,
#         "max_waves": 5,
#         "seed": 42,
#     },
#     use_wandb=True
# )

In [None]:
NUM_WAVES = 5

cfg={
    "batch_size": 32,
    "grid_len":    2 * NUM_WAVES + 1, # must be strictly greater than 2 * n_waves
    "rand_len":    2 * NUM_WAVES + 1,
    "lr":          1e-4,
    "epochs":      55_000,
    "name": f"sine-fit-run_WAVES={NUM_WAVES}_norm"
}

device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')


model = custom_model.GPT2Regressor(
    input_dim=1,
    output_dim=1,
    max_seq_len=2*(cfg["grid_len"] + cfg["rand_len"]),
    activation_fn="x_plus_sin2", # Comment line to switch to GeLU
)
weights_path = "/content/drive/MyDrive/sinusoidal_icl/checkpoints/sinusoidal_fit_max_waves_5_x_plus_sin2_activation_chapter_curriculum/sine-fit-run_chapter_curriculum_WAVES=5_sin_act_final.pth"
model.load_state_dict(torch.load(weights_path, map_location=device))
model.to(device)

In [None]:
eval_utils.evaluate_model(model, 100, 5)