## Setup and Configuration

In [1]:
# Install dependencies
!pip install -q "transformers>=4.45.0" "datasets" "accelerate" "peft" "huggingface_hub"

In [2]:
import math
import random
import time
from dataclasses import dataclass
from typing import List, Tuple, Dict

import torch
from datasets import Dataset as HFDataset
from getpass import getpass
from huggingface_hub import login

from transformers import (
    AutoTokenizer,
    AutoModelForCausalLM,
    DataCollatorForLanguageModeling,
    TrainingArguments,
    Trainer,
)
from peft import (
    LoraConfig,
    get_peft_model,
)

# 1. LOGIN + CONFIG
print("Paste your Hugging Face access token (with read permissions):")
hf_token = getpass("HF token: ")
login(hf_token)

DEVICE = "cuda" if torch.cuda.is_available() else "cpu"
print("Using device:", DEVICE)

# NOTE:
# For real experiment on a big GPU, replace these with:
#   SMALL_MODEL_ID = "google/gemma-3n-E4B"
#   BIG_MODEL_ID   = "openai/gpt-oss-20b"
SMALL_MODEL_ID = "gpt2"
BIG_MODEL_ID = "gpt2-medium"
# SMALL_MODEL_ID = "EleutherAI/gpt-neo-125M"
# BIG_MODEL_ID   = "EleutherAI/gpt-neo-1.3B"

BOARD_SIZE = 3
TRAIN_SAMPLES = 300
VAL_SAMPLES   = 50
TEST_SAMPLES  = 50
MAX_MOVES_GENERATION = 32
RANDOM_SEED = 42
random.seed(RANDOM_SEED)

Paste your Hugging Face access token (with read permissions):
HF token: ··········
Using device: cuda


## Sliding Puzzle Logic and BFS Solver

In [3]:
GOAL_STATE = tuple([1,2,3,4,5,6,7,8,0])  # 0 = blank

def is_solvable(board: Tuple[int, ...]) -> bool:
    arr = [x for x in board if x != 0]
    inv_count = 0
    for i in range(len(arr)):
        for j in range(i+1, len(arr)):
            if arr[i] > arr[j]:
                inv_count += 1
    return inv_count % 2 == 0

def random_board_with_max_depth(max_depth=6) -> Tuple[int, ...]:
    """
    Start from the GOAL_STATE and apply a short random walk.
    This guarantees the puzzle is solvable and at most `max_depth` moves away.
    """
    state = list(GOAL_STATE)
    steps = random.randint(1, max_depth)
    for _ in range(steps):
        neighbors = get_neighbors(tuple(state))
        nxt, mv = random.choice(neighbors)
        state = list(nxt)
    return tuple(state)

def random_solvable_board() -> Tuple[int, ...]:
    while True:
        arr = list(range(9))
        random.shuffle(arr)
        board = tuple(arr)
        if board != GOAL_STATE and is_solvable(board):
            return board

def get_neighbors(board: Tuple[int, ...]) -> List[Tuple[Tuple[int, ...], str]]:
    idx0 = board.index(0)
    row, col = divmod(idx0, BOARD_SIZE)
    neighbors = []

    # up
    if row > 0:
        new_idx = idx0 - BOARD_SIZE
        new_board = list(board)
        new_board[idx0], new_board[new_idx] = new_board[new_idx], new_board[idx0]
        neighbors.append((tuple(new_board), "U"))
    # down
    if row < BOARD_SIZE - 1:
        new_idx = idx0 + BOARD_SIZE
        new_board = list(board)
        new_board[idx0], new_board[new_idx] = new_board[new_idx], new_board[idx0]
        neighbors.append((tuple(new_board), "D"))
    # left
    if col > 0:
        new_idx = idx0 - 1
        new_board = list(board)
        new_board[idx0], new_board[new_idx] = new_board[new_idx], new_board[idx0]
        neighbors.append((tuple(new_board), "L"))
    # right
    if col < BOARD_SIZE - 1:
        new_idx = idx0 + 1
        new_board = list(board)
        new_board[idx0], new_board[new_idx] = new_board[new_idx], new_board[idx0]
        neighbors.append((tuple(new_board), "R"))
    return neighbors

def bfs_solve(start: Tuple[int, ...]) -> str:
    if start == GOAL_STATE:
        return ""
    from collections import deque
    queue = deque([start])
    parent: Dict[Tuple[int, ...], Tuple[Tuple[int, ...], str]] = {start: (None, "")}

    while queue:
        state = queue.popleft()
        for nxt, move in get_neighbors(state):
            if nxt not in parent:
                parent[nxt] = (state, move)
                if nxt == GOAL_STATE:
                    path = []
                    cur = nxt
                    while parent[cur][0] is not None:
                        prev, mv = parent[cur]
                        path.append(mv)
                        cur = prev
                    path.reverse()
                    return "".join(path)
                queue.append(nxt)
    return ""  # should not happen

def format_board(board: Tuple[int, ...]) -> str:
    return " ".join(str(x) for x in board)

def make_example(board: Tuple[int, ...]) -> Tuple[str, str]:
    moves = bfs_solve(board)
    prompt = (
        "You are solving a 3x3 sliding puzzle.\n"
        "The board is given row by row as 9 numbers, where 0 is the blank.\n"
        "Goal board is: 1 2 3 4 5 6 7 8 0\n"
        f"Start board: {format_board(board)}\n"
        "Output ONLY the sequence of moves as letters U, D, L, R with no spaces.\n"
        "Moves:"
    )
    target = moves
    return prompt, target

# sanity check
test_board = random_solvable_board()
print("Example board:", format_board(test_board))
print("BFS solution:", bfs_solve(test_board))

Example board: 2 5 7 8 6 1 4 0 3
BFS solution: URDLLURURDLULDRDLURRD


## Dataset Generation

In [4]:
def build_split(n_samples: int, max_depth: int = 6):
    prompts = []
    targets = []
    boards = []
    for _ in range(n_samples):
        b = random_board_with_max_depth(max_depth=max_depth)
        p, t = make_example(b)
        prompts.append(p)
        targets.append(t)
        boards.append(b)
    return prompts, targets, boards

train_prompts, train_targets, train_boards = build_split(TRAIN_SAMPLES)
val_prompts,   val_targets,   val_boards   = build_split(VAL_SAMPLES)
test_prompts,  test_targets,  test_boards  = build_split(TEST_SAMPLES)

print("First train prompt:\n", train_prompts[0])
print("First train target:\n", train_targets[0])

train_hf = HFDataset.from_dict({"prompt": train_prompts, "target": train_targets})
val_hf   = HFDataset.from_dict({"prompt": val_prompts,   "target": val_targets})
test_hf  = HFDataset.from_dict({"prompt": test_prompts,  "target": test_targets})

First train prompt:
 You are solving a 3x3 sliding puzzle.
The board is given row by row as 9 numbers, where 0 is the blank.
Goal board is: 1 2 3 4 5 6 7 8 0
Start board: 1 2 3 4 5 6 7 0 8
Output ONLY the sequence of moves as letters U, D, L, R with no spaces.
Moves:
First train target:
 R


## Tokenizer and LoRA Model Setup

In [5]:
tokenizer = AutoTokenizer.from_pretrained(SMALL_MODEL_ID)
if tokenizer.pad_token is None:
    tokenizer.pad_token = tokenizer.eos_token

base_model = AutoModelForCausalLM.from_pretrained(
    SMALL_MODEL_ID,
)
base_model.to(DEVICE)

lora_config = LoraConfig(
    r=8,
    lora_alpha=16,
    lora_dropout=0.05,
    bias="none",
    task_type="CAUSAL_LM",
)

model = get_peft_model(base_model, lora_config)
model.print_trainable_parameters()

The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


tokenizer_config.json:   0%|          | 0.00/26.0 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/665 [00:00<?, ?B/s]

vocab.json:   0%|          | 0.00/1.04M [00:00<?, ?B/s]

merges.txt:   0%|          | 0.00/456k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/1.36M [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/548M [00:00<?, ?B/s]

generation_config.json:   0%|          | 0.00/124 [00:00<?, ?B/s]

trainable params: 294,912 || all params: 124,734,720 || trainable%: 0.2364




## Tokenization for Training

In [6]:
def preprocess_fn(examples):
    texts = []
    for p, t in zip(examples["prompt"], examples["target"]):
        full_text = p + " " + t
        texts.append(full_text)
    return tokenizer(
        texts,
        padding="max_length",
        truncation=True,
        max_length=256,
    )

train_tokenized = train_hf.map(preprocess_fn, batched=True, remove_columns=["prompt", "target"])
val_tokenized   = val_hf.map(preprocess_fn,   batched=True, remove_columns=["prompt", "target"])

data_collator = DataCollatorForLanguageModeling(
    tokenizer=tokenizer,
    mlm=False,
)

Map:   0%|          | 0/300 [00:00<?, ? examples/s]

Map:   0%|          | 0/50 [00:00<?, ? examples/s]

## LoRA Model Training

In [7]:
output_dir = "small_gpt2_sliding_puzzle_lora"

training_args = TrainingArguments(
    output_dir=output_dir,
    num_train_epochs=1,
    per_device_train_batch_size=2,
    per_device_eval_batch_size=2,
    gradient_accumulation_steps=2,
    learning_rate=5e-5,
    weight_decay=0.01,
    logging_steps=10,
    lr_scheduler_type="cosine",
    warmup_ratio=0.05,
    save_total_limit=2,
    report_to="none",
)

trainer = Trainer(
    model=model,
    args=training_args,
    train_dataset=train_tokenized,
    eval_dataset=val_tokenized,
    data_collator=data_collator,
)

print("Starting LoRA training (small GPT-2)...")
trainer.train()

trainer.save_model(output_dir)
tokenizer.save_pretrained(output_dir)
print("LoRA fine-tuned adapter saved to:", output_dir)

Starting LoRA training (small GPT-2)...


`loss_type=None` was set in the config but it is unrecognized. Using the default loss: `ForCausalLMLoss`.


Step,Training Loss
10,3.9525
20,3.9389
30,3.8871
40,3.8321
50,3.8073
60,3.7897
70,3.8249


LoRA fine-tuned adapter saved to: small_gpt2_sliding_puzzle_lora


## Evaluation Helpers

In [8]:
@dataclass
class EvalResult:
    solve_rate: float
    avg_extra_moves: float
    avg_invalid_moves: float
    avg_time_ms: float
    n: int

def build_prompt_from_board(board: Tuple[int, ...]) -> str:
    p, _ = make_example(board)
    return p

def extract_moves_from_output(prompt: str, generated_text: str) -> str:
    idx = generated_text.rfind("Moves:")
    if idx == -1:
        tail = generated_text
    else:
        tail = generated_text[idx + len("Moves:"):]
    moves = "".join(ch for ch in tail if ch in "UDLR")
    return moves

def evaluate_model_on_puzzles(
    model,
    tokenizer,
    puzzles: List[Tuple[int, ...]],
    true_targets: List[str],
    max_new_tokens: int = 32,
) -> EvalResult:
    model.eval()
    solve_count = 0
    total_extra_moves = 0
    total_invalid = 0
    total_time = 0.0

    for board, true_moves in zip(puzzles, true_targets):
        prompt = build_prompt_from_board(board)
        inputs = tokenizer(prompt, return_tensors="pt").to(DEVICE)
        model.to(DEVICE)

        start = time.perf_counter()
        with torch.no_grad():
            out = model.generate(
                **inputs,
                max_new_tokens=max_new_tokens,
                do_sample=False,
                temperature=None,
            )
        end = time.perf_counter()
        total_time += (end - start)

        full = tokenizer.decode(out[0], skip_special_tokens=True)
        pred_moves = extract_moves_from_output(prompt, full)

        invalid_count = 0
        state = list(board)
        for mv in pred_moves:
            idx0 = state.index(0)
            row, col = divmod(idx0, BOARD_SIZE)
            valid = True
            if mv == "U" and row > 0:
                new_idx = idx0 - BOARD_SIZE
            elif mv == "D" and row < BOARD_SIZE - 1:
                new_idx = idx0 + BOARD_SIZE
            elif mv == "L" and col > 0:
                new_idx = idx0 - 1
            elif mv == "R" and col < BOARD_SIZE - 1:
                new_idx = idx0 + 1
            else:
                valid = False
            if not valid:
                invalid_count += 1
                continue
            state[idx0], state[new_idx] = state[new_idx], state[idx0]

        final_state = tuple(state)
        solved = (final_state == GOAL_STATE)
        if solved:
            solve_count += 1

        extra_moves = max(0, len(pred_moves) - len(true_moves))
        total_extra_moves += extra_moves
        total_invalid += invalid_count

    n = len(puzzles)
    return EvalResult(
        solve_rate=solve_count / n,
        avg_extra_moves=total_extra_moves / n,
        avg_invalid_moves=total_invalid / n,
        avg_time_ms=(total_time / n) * 1000.0,
        n=n,
    )

def pretty_print_results(name: str, res: EvalResult):
    print(f"=== {name} ===")
    print(f"Test puzzles       : {res.n}")
    print(f"Solve rate         : {res.solve_rate*100:.1f}%")
    print(f"Avg extra moves    : {res.avg_extra_moves:.2f}")
    print(f"Avg invalid moves  : {res.avg_invalid_moves:.2f}")
    print(f"Avg solve time     : {res.avg_time_ms:.1f} ms")
    print()

## Evaluation and Comparison

In [9]:
print("Loading BASE small model for comparison...")
base_small = AutoModelForCausalLM.from_pretrained(SMALL_MODEL_ID).to(DEVICE)

print("Evaluating BASE small model (gpt2)...")
base_small_res = evaluate_model_on_puzzles(
    base_small,
    tokenizer,
    test_boards,
    test_targets,
    max_new_tokens=MAX_MOVES_GENERATION,
)
pretty_print_results("Small model (base gpt2)", base_small_res)

print("Evaluating LoRA FINE-TUNED small model (gpt2)...")
ft_small_res = evaluate_model_on_puzzles(
    model,
    tokenizer,
    test_boards,
    test_targets,
    max_new_tokens=MAX_MOVES_GENERATION,
)
pretty_print_results("Small model (LoRA fine-tuned gpt2)", ft_small_res)

print("Loading BIG baseline model (gpt2-medium)...")
big_tokenizer = AutoTokenizer.from_pretrained(BIG_MODEL_ID)
if big_tokenizer.pad_token is None:
    big_tokenizer.pad_token = big_tokenizer.eos_token

big_model = AutoModelForCausalLM.from_pretrained(BIG_MODEL_ID).to(DEVICE)

print("Evaluating BIG baseline model (gpt2-medium)...")
big_res = evaluate_model_on_puzzles(
    big_model,
    big_tokenizer,
    test_boards,
    test_targets,
    max_new_tokens=MAX_MOVES_GENERATION,
)
pretty_print_results("Big baseline (gpt2-medium)", big_res)

print("FINAL COMPARISON")
pretty_print_results("Small model (base gpt2)", base_small_res)
pretty_print_results("Small model (LoRA fine-tuned gpt2)", ft_small_res)
pretty_print_results("Big baseline (gpt2-medium)", big_res)
print("Use these numbers to discuss:")
print("- Solve rate (accuracy)")
print("- Extra moves vs optimal BFS solution")
print("- Invalid moves (constraint satisfaction)")
print("- Average solve time per puzzle (efficiency)")
print("- Improvement from base -> fine-tuned")
print("- Comparison of small vs bigger model")

Loading BASE small model for comparison...


Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


Evaluating BASE small model (gpt2)...


Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end gene

=== Small model (base gpt2) ===
Test puzzles       : 50
Solve rate         : 10.0%
Avg extra moves    : 0.00
Avg invalid moves  : 0.00
Avg solve time     : 758.4 ms

Evaluating LoRA FINE-TUNED small model (gpt2)...


Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end gene

=== Small model (LoRA fine-tuned gpt2) ===
Test puzzles       : 50
Solve rate         : 10.0%
Avg extra moves    : 0.00
Avg invalid moves  : 0.00
Avg solve time     : 724.6 ms

Loading BIG baseline model (gpt2-medium)...


tokenizer_config.json:   0%|          | 0.00/26.0 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/718 [00:00<?, ?B/s]

vocab.json:   0%|          | 0.00/1.04M [00:00<?, ?B/s]

merges.txt:   0%|          | 0.00/456k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/1.36M [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/1.52G [00:00<?, ?B/s]

generation_config.json:   0%|          | 0.00/124 [00:00<?, ?B/s]

Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


Evaluating BIG baseline model (gpt2-medium)...


Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end gene

=== Big baseline (gpt2-medium) ===
Test puzzles       : 50
Solve rate         : 10.0%
Avg extra moves    : 2.02
Avg invalid moves  : 0.44
Avg solve time     : 577.5 ms

FINAL COMPARISON
=== Small model (base gpt2) ===
Test puzzles       : 50
Solve rate         : 10.0%
Avg extra moves    : 0.00
Avg invalid moves  : 0.00
Avg solve time     : 758.4 ms

=== Small model (LoRA fine-tuned gpt2) ===
Test puzzles       : 50
Solve rate         : 10.0%
Avg extra moves    : 0.00
Avg invalid moves  : 0.00
Avg solve time     : 724.6 ms

=== Big baseline (gpt2-medium) ===
Test puzzles       : 50
Solve rate         : 10.0%
Avg extra moves    : 2.02
Avg invalid moves  : 0.44
Avg solve time     : 577.5 ms

Use these numbers to discuss:
- Solve rate (accuracy)
- Extra moves vs optimal BFS solution
- Invalid moves (constraint satisfaction)
- Average solve time per puzzle (efficiency)
- Improvement from base -> fine-tuned
- Comparison of small vs bigger model
