Imports + versions

In [1]:
# [CELL 11B-00] Imports + versions
import os, json, time, math, random
from pathlib import Path

import numpy as np
import pandas as pd
import torch
import torch.nn as nn
import torch.nn.functional as F
from torch.utils.data import DataLoader, TensorDataset

print("[11B-00] torch:", torch.__version__)
print("[11B-00] pandas:", pd.__version__)
print("[11B-00] numpy:", np.__version__)


[11B-00] torch: 2.9.1+cpu
[11B-00] pandas: 2.3.3
[11B-00] numpy: 2.4.0


REPO_ROOT + load configs (single source of truth = repo artifacts)

In [2]:
# [CELL 11B-01] REPO_ROOT + load configs (single source of truth = repo artifacts)
REPO_ROOT = Path(r"C:\mooc-coldstart-session-meta").resolve()
RUN_TAG = time.strftime("%Y%m%d_%H%M%S")

cfg_path   = REPO_ROOT / "data" / "processed" / "supervised" / "dataloader_config_20251229_163357_20251229_232834.json"
san_path   = REPO_ROOT / "data" / "processed" / "supervised" / "sanity_metrics_20251229_163357_20251229_232834.json"
gaps_path  = REPO_ROOT / "data" / "processed" / "normalized_events" / "session_gap_thresholds.json"

print("[11B-01] REPO_ROOT:", REPO_ROOT)
print("[11B-01] RUN_TAG:", RUN_TAG)
print("[11B-01] Expect config:", cfg_path)
print("[11B-01] Expect sanity:", san_path)
print("[11B-01] Expect gaps:", gaps_path)

assert cfg_path.exists(), f"Missing: {cfg_path}"
assert san_path.exists(), f"Missing: {san_path}"
assert gaps_path.exists(), f"Missing: {gaps_path}"

DL_CFG = json.loads(cfg_path.read_text(encoding="utf-8"))
SANITY = json.loads(san_path.read_text(encoding="utf-8"))
GAPS   = json.loads(gaps_path.read_text(encoding="utf-8"))

print("[11B-01] Loaded dataloader_config keys:", list(DL_CFG.keys()))
print("[11B-01] Loaded sanity_metrics keys:", list(SANITY.keys()))
print("[11B-01] Loaded session_gap_thresholds keys:", list(GAPS.keys()))

def infer_gap_minutes(d: dict, name: str) -> int:
    if "gap_minutes" in d:
        return int(d["gap_minutes"])
    if "primary_threshold_seconds" in d:
        m = int(round(int(d["primary_threshold_seconds"]) / 60))
        lbl = d.get("primary_threshold_label", None)
        print(f"[11B-01] {name}: gap_minutes from primary_threshold_seconds={d['primary_threshold_seconds']} -> {m}m | label={lbl}")
        return m
    raise KeyError(f"[11B-01] {name}: cannot infer gap minutes. keys={list(d.keys())}")

gap_target_m = infer_gap_minutes(GAPS["target"], "target")
gap_source_m = infer_gap_minutes(GAPS["source"], "source")
assert gap_target_m == 30, f"target gap mismatch: got {gap_target_m}m"
assert gap_source_m == 10, f"source gap mismatch: got {gap_source_m}m"
print("[11B-01] ✅ Session gaps confirmed: target=30m, source=10m")

PROTO_RAW = DL_CFG["protocol"]

# Normalize protocol to match Notebook 06 constants
PROTO = {
    "K_LIST": [5, 10, 20],
    "MAX_PREFIX_LEN": int(PROTO_RAW["max_prefix_len"]),
    "CAP_ENABLED": bool(PROTO_RAW["source_long_session_policy"]["enabled"]),
    "CAP_SESSION_LEN": int(PROTO_RAW["source_long_session_policy"]["cap_session_len"]),
    "CAP_STRATEGY": str(PROTO_RAW["source_long_session_policy"]["cap_strategy"]),
}
assert PROTO["MAX_PREFIX_LEN"] == 20, "Protocol drift: MAX_PREFIX_LEN != 20"
assert PROTO["CAP_ENABLED"] is True, "Protocol drift: CAP_ENABLED != True"
assert PROTO["CAP_SESSION_LEN"] == 200, "Protocol drift: CAP_SESSION_LEN != 200"
assert PROTO["CAP_STRATEGY"] == "take_last", "Protocol drift: CAP_STRATEGY != take_last"

print("[11B-01] ✅ PROTO:", PROTO)

print("\n[11B-01] CHECKPOINT A")
print("Paste: inferred gaps + PROTO dict")


[11B-01] REPO_ROOT: C:\mooc-coldstart-session-meta
[11B-01] RUN_TAG: 20260104_114435
[11B-01] Expect config: C:\mooc-coldstart-session-meta\data\processed\supervised\dataloader_config_20251229_163357_20251229_232834.json
[11B-01] Expect sanity: C:\mooc-coldstart-session-meta\data\processed\supervised\sanity_metrics_20251229_163357_20251229_232834.json
[11B-01] Expect gaps: C:\mooc-coldstart-session-meta\data\processed\normalized_events\session_gap_thresholds.json
[11B-01] Loaded dataloader_config keys: ['target', 'source', 'protocol']
[11B-01] Loaded sanity_metrics keys: ['run_tag_target', 'run_tag_source', 'created_at', 'target', 'source', 'notes']
[11B-01] Loaded session_gap_thresholds keys: ['generated_from_run_tag', 'generated_at', 'target', 'source', 'decision_notes']
[11B-01] target: gap_minutes from primary_threshold_seconds=1800 -> 30m | label=30m
[11B-01] source: gap_minutes from primary_threshold_seconds=600 -> 10m | label=10m
[11B-01] ✅ Session gaps confirmed: target=30m, so

Paths: TARGET tensors + vocab, SOURCE pretrained checkpoint

In [3]:
# [CELL 11B-02] Paths: TARGET tensors + vocab, SOURCE pretrained checkpoint
target_run_tag = "20251229_163357"
src_pretrain_run_tag = "20260103_220933"

target_train_pt = REPO_ROOT / "data" / "processed" / "tensor_target" / f"target_tensor_train_{target_run_tag}.pt"
target_val_pt   = REPO_ROOT / "data" / "processed" / "tensor_target" / f"target_tensor_val_{target_run_tag}.pt"
target_test_pt  = REPO_ROOT / "data" / "processed" / "tensor_target" / f"target_tensor_test_{target_run_tag}.pt"
target_vocab_js = REPO_ROOT / "data" / "processed" / "tensor_target" / f"target_vocab_items_{target_run_tag}.json"

src_ckpt_pt = REPO_ROOT / "reports" / "11A_transfer_pretrain_source" / src_pretrain_run_tag / "model_pretrained_source.pt"

for p in [target_train_pt, target_val_pt, target_test_pt, target_vocab_js, src_ckpt_pt]:
    print("[11B-02] Expect:", p)
    assert p.exists(), f"Missing artifact: {p}"

print("[11B-02] ✅ All required artifacts exist")

print("\n[11B-02] CHECKPOINT B")
print("Confirm the 5 artifact paths exist (pt/json + pretrained checkpoint).")


[11B-02] Expect: C:\mooc-coldstart-session-meta\data\processed\tensor_target\target_tensor_train_20251229_163357.pt
[11B-02] Expect: C:\mooc-coldstart-session-meta\data\processed\tensor_target\target_tensor_val_20251229_163357.pt
[11B-02] Expect: C:\mooc-coldstart-session-meta\data\processed\tensor_target\target_tensor_test_20251229_163357.pt
[11B-02] Expect: C:\mooc-coldstart-session-meta\data\processed\tensor_target\target_vocab_items_20251229_163357.json
[11B-02] Expect: C:\mooc-coldstart-session-meta\reports\11A_transfer_pretrain_source\20260103_220933\model_pretrained_source.pt
[11B-02] ✅ All required artifacts exist

[11B-02] CHECKPOINT B
Confirm the 5 artifact paths exist (pt/json + pretrained checkpoint).


Load TARGET tensors + vocab

In [4]:
# [CELL 11B-03] Load TARGET tensors + vocab
target_vocab = json.loads(target_vocab_js.read_text(encoding="utf-8"))

def infer_vocab_size(vocab: dict, name: str) -> int:
    if "vocab_size" in vocab:
        return int(vocab["vocab_size"])
    if "vocab" in vocab and isinstance(vocab["vocab"], dict):
        # token->id map
        vs = int(max(vocab["vocab"].values())) + 1
        print(f"[11B-03] {name}: vocab_size from max(vocab['vocab'])+1 = {vs} (token->id)")
        return vs
    raise KeyError(f"[11B-03] {name}: cannot infer vocab_size. keys={list(vocab.keys())}")

VOCAB_SIZE_TARGET = infer_vocab_size(target_vocab, "TARGET")
PAD_ID_TARGET = int(target_vocab.get("pad_id", 0))
UNK_ID_TARGET = int(target_vocab.get("unk_id", 1))

print("[11B-03] VOCAB_SIZE_TARGET:", VOCAB_SIZE_TARGET)
print("[11B-03] PAD_ID_TARGET:", PAD_ID_TARGET, "| UNK_ID_TARGET:", UNK_ID_TARGET)

def load_pt(path: Path):
    obj = torch.load(path, map_location="cpu", weights_only=False)
    print(f"[11B-03] torch.load OK (weights_only=False): {path}")
    return obj

tr = load_pt(target_train_pt)
va = load_pt(target_val_pt)
te = load_pt(target_test_pt)

# Required keys from 05B tensor target
need_keys = ["input_ids", "attn_mask", "labels"]
for k in need_keys:
    assert k in tr and k in va and k in te, f"Missing key '{k}' in tensors."

print("[11B-03] TARGET train shapes:", tuple(tr["input_ids"].shape), tuple(tr["attn_mask"].shape), tuple(tr["labels"].shape))
print("[11B-03] TARGET val shapes  :", tuple(va["input_ids"].shape), tuple(va["labels"].shape))
print("[11B-03] TARGET test shapes :", tuple(te["input_ids"].shape), tuple(te["labels"].shape))

assert tr["input_ids"].shape[1] == PROTO["MAX_PREFIX_LEN"] == 20, "seq_len mismatch vs protocol"

print("\n[11B-03] CHECKPOINT C")
print("Paste: VOCAB_SIZE_TARGET + PAD/UNK + the three shape lines.")


[11B-03] TARGET: vocab_size from max(vocab['vocab'])+1 = 747 (token->id)
[11B-03] VOCAB_SIZE_TARGET: 747
[11B-03] PAD_ID_TARGET: 0 | UNK_ID_TARGET: 1
[11B-03] torch.load OK (weights_only=False): C:\mooc-coldstart-session-meta\data\processed\tensor_target\target_tensor_train_20251229_163357.pt
[11B-03] torch.load OK (weights_only=False): C:\mooc-coldstart-session-meta\data\processed\tensor_target\target_tensor_val_20251229_163357.pt
[11B-03] torch.load OK (weights_only=False): C:\mooc-coldstart-session-meta\data\processed\tensor_target\target_tensor_test_20251229_163357.pt
[11B-03] TARGET train shapes: (1944, 20) (1944, 20) (1944,)
[11B-03] TARGET val shapes  : (189, 20) (189,)
[11B-03] TARGET test shapes : (200, 20) (200,)

[11B-03] CHECKPOINT C
Paste: VOCAB_SIZE_TARGET + PAD/UNK + the three shape lines.


Metrics (reuse Notebook 06 protocol exactly)

In [5]:
# [CELL 11B-04] Metrics (reuse Notebook 06 protocol exactly)
K_LIST = PROTO["K_LIST"]

def metrics_from_ranks(ranks: np.ndarray, K_LIST):
    # ranks: 1..inf ; inf means miss
    out = {}
    n = len(ranks)
    for k in K_LIST:
        hit = (ranks <= k).astype(np.float64)
        out[f"HR@{k}"] = float(hit.mean()) if n else 0.0
        rr = np.where(ranks <= k, 1.0 / ranks, 0.0)
        out[f"MRR@{k}"] = float(rr.mean()) if n else 0.0
        nd = np.where(ranks <= k, 1.0 / np.log2(ranks + 1.0), 0.0)
        out[f"NDCG@{k}"] = float(nd.mean()) if n else 0.0
    return out

@torch.no_grad()
def eval_target_model(model: nn.Module, input_ids: torch.Tensor, attn_mask: torch.Tensor, labels: torch.Tensor,
                      pad_id: int, K_LIST):
    model.eval()
    bs = 512
    ranks = []

    for i in range(0, input_ids.size(0), bs):
        x = input_ids[i:i+bs]
        m = attn_mask[i:i+bs]
        y = labels[i:i+bs]

        lengths = m.sum(dim=1).clamp(min=1).long()
        logits = model(x, lengths)  # [B, V]
        logits[:, pad_id] = -1e9    # PAD excluded from ranking

        topk = max(K_LIST)
        recs = torch.topk(logits, k=topk, dim=1).indices  # [B, topk]

        y_np = y.cpu().numpy()
        recs_np = recs.cpu().numpy()

        for b in range(recs_np.shape[0]):
            yt = int(y_np[b])
            if yt == pad_id:
                continue
            pos = np.where(recs_np[b] == yt)[0]
            if pos.size == 0:
                ranks.append(np.inf)
            else:
                ranks.append(float(pos[0] + 1))

    ranks = np.asarray(ranks, dtype=np.float64)
    out = metrics_from_ranks(ranks, K_LIST)
    out["_n_examples"] = int(np.isfinite(ranks).shape[0])
    return out

print("[11B-04] ✅ Metric functions ready")


[11B-04] ✅ Metric functions ready


Model: GRU4RecDropout (same shape as 11A pretrain; transfer GRU weights)

In [6]:
# [CELL 11B-05] Model: GRU4RecDropout (same shape as 11A pretrain; transfer GRU weights)
def set_seed(seed: int):
    random.seed(seed)
    np.random.seed(seed)
    torch.manual_seed(seed)

class GRU4RecDropout(nn.Module):
    def __init__(self, vocab_size: int, emb_dim: int, hidden_dim: int, dropout: float, pad_id: int):
        super().__init__()
        self.vocab_size = vocab_size
        self.pad_id = pad_id
        self.emb = nn.Embedding(vocab_size, emb_dim, padding_idx=pad_id)
        self.drop = nn.Dropout(dropout)
        self.gru = nn.GRU(input_size=emb_dim, hidden_size=hidden_dim, batch_first=True)
        self.fc = nn.Linear(hidden_dim, vocab_size)

    def forward(self, input_ids: torch.Tensor, lengths: torch.Tensor):
        # input_ids: [B, T]
        emb = self.drop(self.emb(input_ids))  # [B, T, E]
        packed = nn.utils.rnn.pack_padded_sequence(emb, lengths.cpu(), batch_first=True, enforce_sorted=False)
        out_packed, h = self.gru(packed)      # h: [1, B, H]
        last = h.squeeze(0)                   # [B, H]
        logits = self.fc(last)                # [B, V]
        return logits

print("[11B-05] ✅ GRU4RecDropout defined")


[11B-05] ✅ GRU4RecDropout defined


Load SOURCE pretrained checkpoint (robust extraction)

In [7]:
# [CELL 11B-06] Load SOURCE pretrained checkpoint (robust extraction)
def extract_state_dict(obj):
    if isinstance(obj, dict):
        for k in ["state_dict", "model_state_dict", "model"]:
            if k in obj and isinstance(obj[k], dict):
                return obj[k], k
        # sometimes it's already a state dict
        if all(isinstance(v, torch.Tensor) for v in obj.values()):
            return obj, "root"
    raise TypeError(f"Unrecognized checkpoint format: type={type(obj)} keys={list(obj.keys()) if isinstance(obj, dict) else None}")

ckpt_obj = torch.load(src_ckpt_pt, map_location="cpu", weights_only=False)
src_sd, src_sd_key = extract_state_dict(ckpt_obj)

print("[11B-06] Loaded pretrained checkpoint:", src_ckpt_pt)
print("[11B-06] state_dict source key:", src_sd_key)
print("[11B-06] #keys in src_sd:", len(src_sd))
print("[11B-06] sample keys:", list(src_sd.keys())[:8])

print("\n[11B-06] CHECKPOINT D")
print("Paste: src_sd_key + #keys + sample keys (first ~8).")


[11B-06] Loaded pretrained checkpoint: C:\mooc-coldstart-session-meta\reports\11A_transfer_pretrain_source\20260103_220933\model_pretrained_source.pt
[11B-06] state_dict source key: state_dict
[11B-06] #keys in src_sd: 7
[11B-06] sample keys: ['emb.weight', 'gru.weight_ih_l0', 'gru.weight_hh_l0', 'gru.bias_ih_l0', 'gru.bias_hh_l0', 'out.weight', 'out.bias']

[11B-06] CHECKPOINT D
Paste: src_sd_key + #keys + sample keys (first ~8).


Init TARGET model + transfer GRU weights (+ optional PAD/UNK embedding rows only)

In [8]:
# [CELL 11B-07] Init TARGET model + transfer GRU weights (+ optional PAD/UNK embedding rows only)
FINETUNE_SEED = 42
set_seed(FINETUNE_SEED)

FT_CFG = {
    "emb_dim": 64,
    "hidden_dim": 128,
    "dropout": 0.3,
    "batch_size": 256,
    "lr": 1e-3,
    "weight_decay": 1e-5,
    "grad_clip": 1.0,
    "max_epochs": 50,
    "early_stop_metric": "HR@20",
    "patience": 7,
    "min_delta": 1e-4,
    "seed": FINETUNE_SEED,
    "transfer_mode": "copy_gru_only_plus_pad_unk_emb_rows",
    "src_pretrain_run_tag": src_pretrain_run_tag,
    "src_checkpoint": str(src_ckpt_pt),
}

device = torch.device("cpu")

model = GRU4RecDropout(
    vocab_size=VOCAB_SIZE_TARGET,
    emb_dim=FT_CFG["emb_dim"],
    hidden_dim=FT_CFG["hidden_dim"],
    dropout=FT_CFG["dropout"],
    pad_id=PAD_ID_TARGET,
).to(device)

# Build a source-shaped model just to validate key compatibility (optional)
# But we can directly copy matching keys by name + shape.
tgt_sd = model.state_dict()

copied, skipped_shape, skipped_missing = [], [], []

for k, v in src_sd.items():
    if k not in tgt_sd:
        skipped_missing.append(k)
        continue
    if tgt_sd[k].shape != v.shape:
        skipped_shape.append((k, tuple(v.shape), tuple(tgt_sd[k].shape)))
        continue
    # default: copy everything that matches (this will include GRU weights if names align)
    tgt_sd[k].copy_(v)
    copied.append(k)

# If embedding/output names differ between notebooks, we force a safer transfer:
# copy GRU weights by contains(".gru.") and ignore emb/fc unless matched already
# (If they were matched above, it's still safe only if same shape.)
# Also: explicitly re-init output head (always) because vocab differs.
with torch.no_grad():
    model.fc.reset_parameters()

# Optional: PAD/UNK rows from src embedding if matching key exists and shape allows
padunk_copied = False
if "emb.weight" in src_sd and "emb.weight" in tgt_sd:
    if src_sd["emb.weight"].shape[1] == tgt_sd["emb.weight"].shape[1] and src_sd["emb.weight"].shape[0] >= 2 and tgt_sd["emb.weight"].shape[0] >= 2:
        tgt_sd["emb.weight"][:2].copy_(src_sd["emb.weight"][:2])
        padunk_copied = True

model.load_state_dict(tgt_sd)

print("[11B-07] Transfer summary:")
print("  copied_keys:", len(copied))
print("  skipped_missing:", len(skipped_missing))
print("  skipped_shape:", len(skipped_shape))
print("  pad/unk emb rows copied:", padunk_copied)
if skipped_shape[:5]:
    print("  skipped_shape sample:", skipped_shape[:5])

print("\n[11B-07] CHECKPOINT E")
print("Paste: copied_keys/skipped counts + whether pad/unk emb rows copied.")


[11B-07] Transfer summary:
  copied_keys: 4
  skipped_missing: 2
  skipped_shape: 1
  pad/unk emb rows copied: True
  skipped_shape sample: [('emb.weight', (1620, 64), (747, 64))]

[11B-07] CHECKPOINT E
Paste: copied_keys/skipped counts + whether pad/unk emb rows copied.


Build TARGET dataloaders

In [9]:
# [CELL 11B-08] Build TARGET dataloaders
train_ds = TensorDataset(tr["input_ids"].long(), tr["attn_mask"].long(), tr["labels"].long())
val_ds   = TensorDataset(va["input_ids"].long(), va["attn_mask"].long(), va["labels"].long())
test_ds  = TensorDataset(te["input_ids"].long(), te["attn_mask"].long(), te["labels"].long())

train_loader = DataLoader(train_ds, batch_size=FT_CFG["batch_size"], shuffle=True, drop_last=False)
val_tensors  = (va["input_ids"].long(), va["attn_mask"].long(), va["labels"].long())
test_tensors = (te["input_ids"].long(), te["attn_mask"].long(), te["labels"].long())

def make_lengths(attn_mask: torch.Tensor) -> torch.Tensor:
    return attn_mask.sum(dim=1).clamp(min=1).long()

print("[11B-08] ✅ DataLoaders ready | train_batches:", len(train_loader))


[11B-08] ✅ DataLoaders ready | train_batches: 8


Fine-tune on TARGET with early stopping on HR@20

In [10]:
# [CELL 11B-09] Fine-tune on TARGET with early stopping on HR@20
opt = torch.optim.Adam(model.parameters(), lr=FT_CFG["lr"], weight_decay=FT_CFG["weight_decay"])

best_metric = -1.0
best_epoch = -1
best_state = None
bad_epochs = 0

t0 = time.time()
for epoch in range(1, FT_CFG["max_epochs"] + 1):
    model.train()
    losses = []

    for (x, m, y) in train_loader:
        x = x.to(device)
        m = m.to(device)
        y = y.to(device)

        lengths = make_lengths(m)
        logits = model(x, lengths)
        loss = F.cross_entropy(logits, y, ignore_index=PAD_ID_TARGET)

        opt.zero_grad()
        loss.backward()
        nn.utils.clip_grad_norm_(model.parameters(), FT_CFG["grad_clip"])
        opt.step()

        losses.append(float(loss.item()))

    val_res = eval_target_model(model, val_tensors[0], val_tensors[1], val_tensors[2], PAD_ID_TARGET, K_LIST)
    metric = float(val_res[FT_CFG["early_stop_metric"]])
    mean_loss = float(np.mean(losses)) if losses else float("nan")

    dt = time.time() - t0
    print(f"[11B-09] epoch={epoch:02d} loss={mean_loss:.4f} elapsed={dt:.1f}s | VAL: {val_res}")

    if metric > best_metric + FT_CFG["min_delta"]:
        best_metric = metric
        best_epoch = epoch
        best_state = {k: v.detach().cpu().clone() for k, v in model.state_dict().items()}
        bad_epochs = 0
    else:
        bad_epochs += 1

    if bad_epochs >= FT_CFG["patience"]:
        print(f"[11B-09] ✅ Early stop at epoch={epoch} (best epoch={best_epoch}, best {FT_CFG['early_stop_metric']}={best_metric:.6f})")
        break

assert best_state is not None, "No best_state captured (unexpected)."
model.load_state_dict(best_state)
print(f"[11B-09] ✅ Restored best weights from epoch={best_epoch} with best {FT_CFG['early_stop_metric']}={best_metric:.6f}")

print("\n[11B-09] CHECKPOINT F")
print("Paste: best_epoch + best_metric + last 2 epoch lines.")


[11B-09] epoch=01 loss=6.6204 elapsed=0.5s | VAL: {'HR@5': 0.015873015873015872, 'MRR@5': 0.008994708994708995, 'NDCG@5': 0.010676098205322749, 'HR@10': 0.026455026455026454, 'MRR@10': 0.010338456370202401, 'NDCG@10': 0.014032517935467271, 'HR@20': 0.037037037037037035, 'MRR@20': 0.010963088939279416, 'NDCG@20': 0.016572515054434803, '_n_examples': 189}
[11B-09] epoch=02 loss=6.5098 elapsed=0.8s | VAL: {'HR@5': 0.021164021164021163, 'MRR@5': 0.014109347442680775, 'NDCG@5': 0.015873015873015872, 'HR@10': 0.026455026455026454, 'MRR@10': 0.014638447971781305, 'NDCG@10': 0.017402459398507344, 'HR@20': 0.07407407407407407, 'MRR@20': 0.018135819670907386, 'NDCG@20': 0.029675130697266714, '_n_examples': 189}
[11B-09] epoch=03 loss=6.4069 elapsed=1.2s | VAL: {'HR@5': 0.021164021164021163, 'MRR@5': 0.014109347442680775, 'NDCG@5': 0.015873015873015872, 'HR@10': 0.05291005291005291, 'MRR@10': 0.018060804568741075, 'NDCG@10': 0.02586080022338621, 'HR@20': 0.10052910052910052, 'MRR@20': 0.021178947

Final eval on TARGET VAL/TEST (best model)

In [11]:
# [CELL 11B-10] Final eval on TARGET VAL/TEST (best model)
val_final  = eval_target_model(model, val_tensors[0], val_tensors[1], val_tensors[2], PAD_ID_TARGET, K_LIST)
test_final = eval_target_model(model, test_tensors[0], test_tensors[1], test_tensors[2], PAD_ID_TARGET, K_LIST)

print("[11B-10] TARGET VAL (transfer-finetune):", val_final)
print("[11B-10] TARGET TEST (transfer-finetune):", test_final)

print("\n[11B-10] CHECKPOINT G")
print("Paste VAL/TEST metrics dicts before writing reports.")


[11B-10] TARGET VAL (transfer-finetune): {'HR@5': 0.06878306878306878, 'MRR@5': 0.046296296296296294, 'NDCG@5': 0.0519760096232157, 'HR@10': 0.12698412698412698, 'MRR@10': 0.05337826488620139, 'NDCG@10': 0.07010594499394271, 'HR@20': 0.20105820105820105, 'MRR@20': 0.05799168906692618, 'NDCG@20': 0.08816282865768477, '_n_examples': 189}
[11B-10] TARGET TEST (transfer-finetune): {'HR@5': 0.09, 'MRR@5': 0.055, 'NDCG@5': 0.06377071188430579, 'HR@10': 0.15, 'MRR@10': 0.06272619047619048, 'NDCG@10': 0.0828935307213819, 'HR@20': 0.23, 'MRR@20': 0.06820425152333047, 'NDCG@20': 0.1030238813429633, '_n_examples': 200}

[11B-10] CHECKPOINT G
Paste VAL/TEST metrics dicts before writing reports.


Write reports + update meta.json

In [12]:
# [CELL 11B-11] Write reports + update meta.json
report_dir = REPO_ROOT / "reports" / "11B_transfer_finetune_target" / RUN_TAG
report_dir.mkdir(parents=True, exist_ok=True)

# Save model
model_path = report_dir / "model_transfer_finetuned_target.pt"
torch.save({"state_dict": model.state_dict(), "cfg": FT_CFG}, model_path)

# Save metrics + cfg
(report_dir / "metrics_target_val.json").write_text(json.dumps(val_final, indent=2), encoding="utf-8")
(report_dir / "metrics_target_test.json").write_text(json.dumps(test_final, indent=2), encoding="utf-8")
(report_dir / "finetune_cfg.json").write_text(json.dumps(FT_CFG, indent=2), encoding="utf-8")

# Update meta.json (append-only)
meta_path = REPO_ROOT / "meta.json"
meta = {}
if meta_path.exists():
    meta = json.loads(meta_path.read_text(encoding="utf-8"))

runs = meta.get("runs", [])
runs.append({
    "run_tag": RUN_TAG,
    "notebook": "11B_transfer_finetune_target.ipynb",
    "created_at": time.strftime("%Y-%m-%d %H:%M:%S"),
    "artifacts": {
        "report_dir": str(report_dir),
        "model": str(model_path),
        "val_metrics": str(report_dir / "metrics_target_val.json"),
        "test_metrics": str(report_dir / "metrics_target_test.json"),
        "cfg": str(report_dir / "finetune_cfg.json"),
    },
    "inputs": {
        "target_run_tag": target_run_tag,
        "source_pretrain_run_tag": src_pretrain_run_tag,
        "source_checkpoint": str(src_ckpt_pt),
        "dataloader_config": str(cfg_path),
        "sanity_metrics": str(san_path),
        "session_gap_thresholds": str(gaps_path),
    },
    "results": {
        "target_val": val_final,
        "target_test": test_final,
    }
})
meta["runs"] = runs
meta_path.write_text(json.dumps(meta, indent=2), encoding="utf-8")

print("[11B-11] ✅ Saved model:", model_path)
print("[11B-11] ✅ Wrote report files under:", report_dir)
print("[11B-11] ✅ Updated meta.json:", meta_path)

print("\n[11B-11] CHECKPOINT H")
print("Paste: report_dir path + confirm meta.json updated.")


[11B-11] ✅ Saved model: C:\mooc-coldstart-session-meta\reports\11B_transfer_finetune_target\20260104_114435\model_transfer_finetuned_target.pt
[11B-11] ✅ Wrote report files under: C:\mooc-coldstart-session-meta\reports\11B_transfer_finetune_target\20260104_114435
[11B-11] ✅ Updated meta.json: C:\mooc-coldstart-session-meta\meta.json

[11B-11] CHECKPOINT H
Paste: report_dir path + confirm meta.json updated.
