# TinyTransformer: Train and Evaluate
Set your arguments below and run the desired cell.
- Train cell: trains the model and automatically evaluates on test pairs using solutions.json.
- Eval-only cell: loads a checkpoint and evaluates on test pairs.
- Optional single-example inference cell at the end.

In [None]:
# %cd /content/mdlARC/
from pathlib import Path
import argparse
import importlib
import utils, train

# Reload order matters: utils first, then train (train imports utils)
importlib.reload(utils)
importlib.reload(train)  # pick up code changes during iteration

# Editable arguments
args = {
    "data_path": Path("assets/script-tests/grouped-tasks-00d62c1b/challenges.json"),
    "batch_size": 6,
    "epochs": 500,
    "lr": 3e-4,
    "weight_decay": 0.01,
    "grad_clip": 1.0,
    "max_steps": 0,  # 0 disables
    "num_workers": 0,
    "device": "cuda",  # 'cuda' | 'mps' | 'cpu'
    "seed": 42,
    "save_path": Path("runs/tiny.pt"),
    "checkpoint_path": Path(
        "runs/tiny.pt"
    ),  # None or Path('runs/tiny.pt') to load. otherwise use None
    "eval_only": True,
    "inference_task_id": "00d62c1b",  # "3aa6fb7a",  "00d62c1b", "e0fb7511" '00576224' to run single inference
    "inference_pair_index": 0,
    "max_new_tokens": 1024,
    # Visibility toggles
    "log_train_strings": False,
    "log_train_limit": 10,
    "log_inference_prompt": True,
    "log_eval_strings": True,
    "log_eval_limit": 10,
    "plot_inference_grids": True,
}


def make_namespace(d):
    # Ensure Path types for known path-like keys
    for k in ["data_path", "save_path", "checkpoint_path"]:
        if d.get(k) is not None and not isinstance(d[k], Path):
            d[k] = Path(d[k])
    return argparse.Namespace(**d)


In [None]:
# Training only
cfg = dict(args)
cfg["eval_only"] = False
ns = make_namespace(cfg)
model, dataset, dataloader, device, data_path = train.build_model_and_data(ns)
train.train_model(
    ns,
    model=model,
    dataloader=dataloader,
    dataset=dataset,
    device=device,
    data_path=data_path,
)


In [None]:
# Eval-only across test pairs (requires a checkpoint or weights already in memory)
cfg = dict(args)
cfg["eval_only"] = True
ns = make_namespace(cfg)
model, dataset, dataloader, device, data_path = train.build_model_and_data(ns)
train.evaluate_model(
    ns,
    model=model,
    dataset=dataset,
    device=device,
    data_path=data_path,
)


In [None]:
# Optional: Single-example inference by task id and pair index
# Set args['inference_task_id'] above (e.g., '00576224'), then run this cell.
cfg = dict(args)
cfg["eval_only"] = True
ns = make_namespace(cfg)
model, dataset, dataloader, device, data_path = train.build_model_and_data(ns)
assert cfg["inference_task_id"] is not None, "Set inference_task_id in args first."
train.run_inference(
    model=model,
    dataset=dataset,
    task_id=cfg["inference_task_id"],
    pair_index=cfg["inference_pair_index"],
    device=device,
    max_new_tokens=cfg["max_new_tokens"],
    log_prompt=cfg["log_inference_prompt"],
    plot_grids_flag=cfg["plot_inference_grids"],
)


In [None]:
# Train + Eval combo (convenience)
cfg = dict(args)
cfg["eval_only"] = False
ns = make_namespace(cfg)
model, dataset, dataloader, device, data_path = train.build_model_and_data(ns)
train.train_model(
    ns,
    model=model,
    dataloader=dataloader,
    dataset=dataset,
    device=device,
    data_path=data_path,
)
train.evaluate_model(
    ns,
    model=model,
    dataset=dataset,
    device=device,
    data_path=data_path,
)
