# Notebook 02 — Task 1 (Playing by Mine Prediction)

In this notebook, I train **three separate Task‑1 models** (one per difficulty):

- Easy: 22×22, 50 mines
- Medium: 22×22, 80 mines
- Hard: 22×22, 100 mines

I’m not mounting Google Drive here (it kept hanging for me). I unzip my project bundle so the repo lives at `/content/repo/`, and then I train.

I save:
- datasets as `.npz` (under `models/task1/datasets/`)
- checkpoints as `models/task1/checkpoints/task1_{easy,medium,hard}.pt`



In [1]:
# Colab installs
# NOTE: I avoid re-installing torch in Colab because it can create checkpoint loading issues
# if the runtime's torch version changes mid-session.
%pip install -q numpy matplotlib tqdm scikit-learn


In [1]:
# Set repo root (already unzipped under /content/repo)
import sys
from pathlib import Path

repo_root = Path('/content/repo')

# Sometimes zip extraction creates one extra top-level folder; if so, I step into it.
if not ((repo_root / 'minesweeper').exists() and (repo_root / 'models').exists()):
    kids = [p for p in repo_root.iterdir() if p.is_dir()]
    if len(kids) == 1:
        repo_root = kids[0]

if not ((repo_root / 'minesweeper').exists() and (repo_root / 'models').exists()):
    raise FileNotFoundError(
        f"Bad repo_root: {repo_root}\n"
        "I expected `minesweeper/` and `models/` directly inside /content/repo."
    )

sys.path.insert(0, str(repo_root))
print(f"Repo root: {repo_root}")



## Step 1 — Generate / load datasets

I generate one `.npz` dataset per difficulty. I keep the datasets under `models/task1/datasets/` so I don’t have to regenerate them every run.



In [None]:
import json
from pathlib import Path

from models.dataset_cache import dataset_dir_for_task, ensure_npz
from models.task1.dataset import generate_task1_dataset_npz
from models.task1 import load_task1_npz

# I store datasets under models/task1/datasets/ so they travel with the task code.
DATA_DIR = dataset_dir_for_task(repo_root=repo_root, task='task1')
DATA_DIR.mkdir(parents=True, exist_ok=True)

DIFFICULTIES = {
    'easy': {'height': 22, 'width': 22, 'num_mines': 50},
    'medium': {'height': 22, 'width': 22, 'num_mines': 80},
    'hard': {'height': 22, 'width': 22, 'num_mines': 100},
}

# I can scale these up for better generalization.
NUM_GAMES = {'easy': 1000, 'medium': 2000, 'hard': 3000}

datasets = {}
for name, cfg in DIFFICULTIES.items():
    out_npz = DATA_DIR / f'task1_{name}_teacher_logic.npz'

    ensure_npz(
        out_path=out_npz,
        generator=generate_task1_dataset_npz,
        generator_kwargs=dict(
            height=cfg['height'],
            width=cfg['width'],
            num_mines=cfg['num_mines'],
            num_games=NUM_GAMES[name],
            teacher='logic',
            allow_mine_triggers=True,
            max_clicks_per_game=512,
            seed=0,
        ),
        force=False,
        verbose=True,
    )

    npz = load_task1_npz(out_npz)
    meta = json.loads(npz.meta_json)
    print(f"{name}: samples={npz.x_visible.shape[0]} games={meta.get('num_games')}")
    datasets[name] = npz



## Step 2 — Train models (masked loss)

I train on unrevealed cells only (that’s where the uncertainty is).



I keep the training loop pretty straightforward:

- model outputs per-cell mine logits
- loss is masked so I only train on unrevealed cells
- I train one model per difficulty and save checkpoints under `models/task1/checkpoints/`



In [None]:
from dataclasses import asdict

import torch
from torch.utils.data import DataLoader

from models.metrics import (
    masked_bce_with_logits,
    masked_binary_confusion_from_logits,
    binary_metrics_from_confusion,
    pos_weight_from_targets,
)
from models.task1 import Task1Dataset
from models.task1.model import MinePredictor, MinePredictorConfig


def _add_conf(dst: dict, src: dict) -> None:
    for k in ('tp', 'fp', 'tn', 'fn', 'n'):
        dst[k] = int(dst.get(k, 0) or 0) + int(src.get(k, 0) or 0)


def train_one_model(
    *,
    name: str,
    npz,
    cfg: MinePredictorConfig,
    epochs: int = 15,
    batch_size: int = 64,
    lr: float = 3e-4,
    weight_decay: float = 1e-2,
    val_frac: float = 0.1,
    seed: int = 0,
    threshold: float = 0.5,
    use_pos_weight: bool = True,
    early_stop_patience: int = 4,
    early_stop_min_delta: float = 1e-4,
):
    device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

    ds = Task1Dataset(npz)
    g = torch.Generator().manual_seed(seed)
    perm = torch.randperm(len(ds), generator=g)
    n_val = int(len(ds) * val_frac)
    val_idx = perm[:n_val].tolist()
    train_idx = perm[n_val:].tolist()

    train_loader = DataLoader(torch.utils.data.Subset(ds, train_idx), batch_size=batch_size, shuffle=True, num_workers=0)
    val_loader = DataLoader(torch.utils.data.Subset(ds, val_idx), batch_size=batch_size, shuffle=False, num_workers=0)

    model = MinePredictor(cfg).to(device)
    opt = torch.optim.AdamW(model.parameters(), lr=lr, weight_decay=weight_decay)

    use_amp = torch.cuda.is_available()
    scaler = torch.cuda.amp.GradScaler(enabled=use_amp)

    best_f1 = -1.0
    best_state = None
    patience = 0

    for epoch in range(1, int(epochs) + 1):
        model.train()
        tr_loss = 0.0
        tr_steps = 0
        tr_conf = {'tp': 0, 'fp': 0, 'tn': 0, 'fn': 0, 'n': 0}

        for batch in train_loader:
            x = batch['x'].to(device)
            y = batch['y'].to(device)
            m = batch['mask'].to(device)

            opt.zero_grad(set_to_none=True)
            with torch.cuda.amp.autocast(enabled=use_amp):
                logits = model(x)
                pw = pos_weight_from_targets(y, m) if bool(use_pos_weight) else None
                loss = masked_bce_with_logits(logits, y, m, pos_weight=pw)

            scaler.scale(loss).backward()
            scaler.step(opt)
            scaler.update()

            tr_loss += float(loss.item())
            tr_steps += 1
            _add_conf(tr_conf, masked_binary_confusion_from_logits(logits.detach(), y, m, threshold=float(threshold)))

        tr_loss /= max(1, tr_steps)
        tr_m = binary_metrics_from_confusion(tr_conf['tp'], tr_conf['fp'], tr_conf['tn'], tr_conf['fn'])

        model.eval()
        va_loss = 0.0
        va_steps = 0
        va_conf = {'tp': 0, 'fp': 0, 'tn': 0, 'fn': 0, 'n': 0}
        with torch.no_grad():
            for batch in val_loader:
                x = batch['x'].to(device)
                y = batch['y'].to(device)
                m = batch['mask'].to(device)
                logits = model(x)
                pw = pos_weight_from_targets(y, m) if bool(use_pos_weight) else None
                va_loss += float(masked_bce_with_logits(logits, y, m, pos_weight=pw).item())
                va_steps += 1
                _add_conf(va_conf, masked_binary_confusion_from_logits(logits, y, m, threshold=float(threshold)))

        va_loss /= max(1, va_steps)
        va_m = binary_metrics_from_confusion(va_conf['tp'], va_conf['fp'], va_conf['tn'], va_conf['fn'])

        print(
            f'[{name}] epoch {epoch}/{epochs} | '
            f'train loss {tr_loss:.4f} acc {tr_m["acc"]:.3f} prec {tr_m["precision"]:.3f} rec {tr_m["recall"]:.3f} f1 {tr_m["f1"]:.3f} | '
            f'val loss {va_loss:.4f} acc {va_m["acc"]:.3f} prec {va_m["precision"]:.3f} rec {va_m["recall"]:.3f} f1 {va_m["f1"]:.3f}'
        )

        cur_f1 = float(va_m.get('f1', 0.0) or 0.0)
        if cur_f1 > (best_f1 + float(early_stop_min_delta)):
            best_f1 = cur_f1
            best_state = {k: v.detach().clone() for k, v in model.state_dict().items()}
            patience = 0
        else:
            patience += 1
            if patience >= int(early_stop_patience):
                print(f'[{name}] early stop: no val f1 improvement for {early_stop_patience} epoch(s). best_f1={best_f1:.4f}')
                break

    if best_state is not None:
        model.load_state_dict(best_state)

    return model



In [None]:
import json
from dataclasses import asdict
from pathlib import Path

import torch

from models.task1.model import MinePredictorConfig

CKPT_DIR = Path(repo_root) / 'models' / 'task1' / 'checkpoints'
CKPT_DIR.mkdir(parents=True, exist_ok=True)

TRAINING_CFG = dict(epochs=15, batch_size=64, lr=3e-4, weight_decay=1e-2, val_frac=0.1, seed=0, early_stop_patience=4)

trained = {}
for name, diff in DIFFICULTIES.items():
    cfg = MinePredictorConfig(height=diff['height'], width=diff['width'])
    model = train_one_model(name=name, npz=datasets[name], cfg=cfg, **TRAINING_CFG)

    ckpt_path = CKPT_DIR / f'task1_{name}.pt'
    meta = json.loads(datasets[name].meta_json)

    torch.save(
        {
            'task': 'task1_mine_prediction',
            'difficulty': name,
            'difficulty_cfg': diff,
            'dataset_meta': meta,
            'model_cfg': asdict(cfg),
            'state_dict': model.state_dict(),
        },
        ckpt_path,
    )
    print(f'Saved {name} -> {ckpt_path}')
    trained[name] = str(ckpt_path)

trained

