In [1]:
!pip install torch transformers datasets huggingface_hub transformer-lens pandas

Defaulting to user installation because normal site-packages is not writeable
[0m

In [2]:
%pip install -q devinterp

[0mNote: you may need to restart the kernel to use updated packages.


In [33]:
import math, re, glob, warnings, pathlib, random
import typing, tqdm, torch, numpy as np
import pandas as pd
import torch.nn.functional as F
from torch.utils.data import DataLoader
from datasets import load_dataset
from transformer_lens import HookedTransformer, HookedTransformerConfig
import os, signal, sys, pandas as pd, tqdm
from huggingface_hub import list_repo_files
import re, math, json, glob, torch, tqdm, pandas as pd
import torch.nn.functional as F
from torch.utils.data import Dataset, DataLoader
from huggingface_hub import hf_hub_download, upload_file, login, list_repo_files
from transformer_lens import HookedTransformer, HookedTransformerConfig
from devinterp.optim import SGLD
from devinterp.slt.sampler import estimate_learning_coeff_with_summary
from devinterp.utils import default_nbeta

from devinterp.optim import SGLD
from devinterp.slt.sampler import estimate_learning_coeff_with_summary
from devinterp.utils import default_nbeta, plot_trace

warnings.filterwarnings("ignore")
DEVICE = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print("Device →", DEVICE)
OUTPUT_CSV  = "ckpt_loss_perturbation_scan.csv"

Device → cuda


In [5]:
# Cell 4: evaluation / LLC helpers

def ce_per_token(model, loader):
    model.eval()
    total, n = 0.0, 0
    with torch.no_grad():
        for batch in loader:
            x = batch["input_ids"].to(DEVICE)
            y = batch["labels"].to(DEVICE)
            logits = model(x)
            loss = F.cross_entropy(
                logits.view(-1, logits.size(-1)),
                y.view(-1),
                reduction="sum",
            )
            total += loss.item()
            n     += y.numel()
    return total / n

# a proper evaluate function for devinterp:
def evaluate_one(model, batch):
    # returns (loss_tensor, aux_dict)
    logits = model(batch["input_ids"].to(DEVICE))
    loss = F.cross_entropy(
        logits.view(-1, logits.size(-1)),
        batch["labels"].view(-1).to(DEVICE),
        reduction="mean"
    )
    return loss, {}

def perturb_state(state_dict, frac=1e-2):
    new = {}
    for k, v in state_dict.items():
        if torch.is_floating_point(v):
            sigma = frac * v.norm() / math.sqrt(v.numel())
            new[k] = v + torch.randn_like(v) * sigma
        else:
            new[k] = v
    return new

def llc_mean(model):
    stats = estimate_learning_coeff_with_summary(
        model,
        loader           = val_loader,
        evaluate         = evaluate_one,   # <— returns torch scalar!
        sampling_method  = SGLD,
        optimizer_kwargs = dict(
            lr           = 1e-3,
            localization = 200.0,
            nbeta        = default_nbeta(val_loader),
        ),
        num_chains         = 20,
        num_draws          = 200,
        num_burnin_steps   = 0,
        num_steps_bw_draws = 1,
        device             = DEVICE,
        online             = False,
        verbose            = False,
    )
    return float(stats["llc/mean"])


In [11]:
# Cell 3 — Build your validation DataLoader (512 seqs of length 1024)

from torch.utils.data import Dataset, DataLoader

tokenizer = model.tokenizer   # << use the tokenizer attached to your instantiated model

class PileValidDataset(Dataset):
    def __init__(self, lines, seq_len=1024):
        toks = []
        for ln in lines:
            toks.extend(tokenizer(ln, truncation=False)["input_ids"])
        self.seqs = [toks[i:i+seq_len] for i in range(0, len(toks), seq_len)]
        if len(self.seqs[-1]) < seq_len:
            self.seqs.pop()
    def __len__(self):
        return len(self.seqs)
    def __getitem__(self, idx):
        x = torch.tensor(self.seqs[idx][:-1], dtype=torch.long)
        y = torch.tensor(self.seqs[idx][1:],  dtype=torch.long)
        return {"input_ids": x, "labels": y}

# Load 512 lines from Pile
from huggingface_hub import hf_hub_download

jsonl_path = hf_hub_download(
    repo_id   = "stanford-crfm/DSIR-filtered-pile-50M",
    filename  = "train_6.jsonl",
    repo_type = "dataset"
)

valid_lines = []
with open(jsonl_path, "r") as fp:
    for _ in range(512):
        valid_lines.append(__import__('json').loads(next(fp))["contents"])

val_ds = PileValidDataset(valid_lines, seq_len=1024)
val_loader = DataLoader(val_ds, batch_size=16, shuffle=False)
print("Validation loader ready, examples:", len(val_ds))


Validation loader ready, examples: 264


In [7]:
# Cell 4 — Instantiate your model

CFG = HookedTransformerConfig(
    n_layers                     = 2,
    n_heads                      = 8,
    d_head                       = 32,
    d_model                      = 256,
    n_ctx                        = 1024,
    d_vocab                      = 5000,
    attn_only                    = True,
    tokenizer_name               = "georgeyw/TinyStories-tokenizer-5k",
    normalization_type           = "LN",
    positional_embedding_type    = "shortformer",
)



In [28]:
# Cell 3 — Config & checkpoint listing
REPO_ID     = "timaeus/tetrahedron-3m-og"
CKPT_DIR    = "checkpoints"
KEEP_EVERY  = 200
RANGE_START = 100
RANGE_END   = 15000
STEP_RX     = re.compile(rf"{CKPT_DIR}/checkpoint_(\d+)\.pth$")
def list_ckpts(repo):
    files = list_repo_files(repo)
    pairs = [(int(m.group(1)), f) for f in files if (m:=STEP_RX.match(f))]
    return sorted(
        (s, f) for s, f in pairs
        if (s%KEEP_EVERY==0) and RANGE_START<=s<=RANGE_END
    )
ckpts = list_ckpts(REPO_ID)
print("Using checkpoints:", [s for s,_ in ckpts])


Using checkpoints: [200, 400, 600, 800, 1000, 1200, 1400, 1600, 1800, 2000, 2200, 2400, 2600, 2800, 3000, 3200, 3400, 3600, 3800, 4000, 4200, 4400, 4600, 4800, 5000, 5200, 5400, 5600, 5800, 6000, 6200, 6400, 6600, 6800, 7000, 7200, 7400, 7600, 7800, 8000, 8200, 8400, 8600, 8800, 9000, 9200, 9400, 9600, 9800, 10000, 10200, 10400, 10600, 10800, 11000, 11200, 11400, 11600, 11800, 12000, 12200, 12400, 12600, 12800, 13000, 13200, 13400, 13600, 13800, 14000, 14200, 14400, 14600, 14800, 15000]


In [34]:
# Cell 6 — main loop: compute & append
records = []

for step, hf_path in tqdm.tqdm(ckpts, desc="checkpoints"):
    # 1) load
    local = hf_hub_download(REPO_ID, hf_path, repo_type="model")
    sd = torch.load(local, map_location="cpu")
    model.load_state_dict(sd, strict=True)

    # 2) reference loss
    loss = ce_per_token(model, val_loader)

    # 3) perturbed loss
    model.load_state_dict(perturb_state(sd), strict=False)
    loss_pert = ce_per_token(model, val_loader)

    # 4) LLC on clean weights
    model.load_state_dict(sd, strict=True)
    llc = llc_mean(model)

    rec = {
        "step":      step,
        "loss":      loss,
        "loss_pert": loss_pert,
        "d_loss":    loss_pert - loss,
        "llc":       llc
    }
    records.append(rec)

    # append **just this row** to CSV
    pd.DataFrame([rec]).to_csv(
        OUTPUT_CSV, mode="a", header=False, index=False
    )

    # try pushing to the Hub, but don’t crash if we’re not logged in
    try:
        upload_file(
            OUTPUT_CSV,
            path_in_repo="ckpt_loss_perturbation_scan.csv",
            repo_id=HF_REPO,
            repo_type="dataset",
            commit_message=f"checkpoint {step}",
        )
    except Exception as e:
        print(f"⚠️  upload failed (not logged in?): {e}")

    torch.cuda.empty_cache()

print("✅ Done — final results in", OUTPUT_CSV)


checkpoints:   0%|          | 0/75 [00:00<?, ?it/s]

Moving model to device:  cuda
Moving model to device:  cuda
Moving model to device:  cuda
Moving model to device:  cuda
Moving model to device:  cuda
Moving model to device:  cuda
Moving model to device:  cuda
Moving model to device:  cuda
Moving model to device:  cuda
Moving model to device:  cuda
Moving model to device:  cuda
Moving model to device:  cuda
Moving model to device:  cuda
Moving model to device:  cuda
Moving model to device:  cuda
Moving model to device:  cuda
Moving model to device:  cuda
Moving model to device:  cuda
Moving model to device:  cuda
Moving model to device:  cuda
Moving model to device:  cuda
Moving model to device:  cuda


checkpoints:   1%|▏         | 1/75 [04:09<5:07:15, 249.13s/it]

⚠️  upload failed (not logged in?): name 'HF_REPO' is not defined


checkpoint_0000400.pth:   0%|          | 0.00/15.5M [00:00<?, ?B/s]

Moving model to device:  cuda
Moving model to device:  cuda
Moving model to device:  cuda
Moving model to device:  cuda
Moving model to device:  cuda
Moving model to device:  cuda
Moving model to device:  cuda
Moving model to device:  cuda
Moving model to device:  cuda
Moving model to device:  cuda
Moving model to device:  cuda
Moving model to device:  cuda
Moving model to device:  cuda
Moving model to device:  cuda
Moving model to device:  cuda
Moving model to device:  cuda
Moving model to device:  cuda
Moving model to device:  cuda
Moving model to device:  cuda
Moving model to device:  cuda
Moving model to device:  cuda
Moving model to device:  cuda


checkpoints:   3%|▎         | 2/75 [08:18<5:03:29, 249.44s/it]

⚠️  upload failed (not logged in?): name 'HF_REPO' is not defined


checkpoint_0000600.pth:   0%|          | 0.00/15.5M [00:00<?, ?B/s]

Moving model to device:  cuda
Moving model to device:  cuda
Moving model to device:  cuda
Moving model to device:  cuda
Moving model to device:  cuda
Moving model to device:  cuda
Moving model to device:  cuda
Moving model to device:  cuda
Moving model to device:  cuda
Moving model to device:  cuda
Moving model to device:  cuda
Moving model to device:  cuda
Moving model to device:  cuda
Moving model to device:  cuda
Moving model to device:  cuda
Moving model to device:  cuda
Moving model to device:  cuda
Moving model to device:  cuda
Moving model to device:  cuda
Moving model to device:  cuda
Moving model to device:  cuda
Moving model to device:  cuda


checkpoints:   4%|▍         | 3/75 [12:28<4:59:31, 249.61s/it]

⚠️  upload failed (not logged in?): name 'HF_REPO' is not defined


checkpoint_0000800.pth:   0%|          | 0.00/15.5M [00:00<?, ?B/s]

Moving model to device:  cuda
Moving model to device:  cuda
Moving model to device:  cuda
Moving model to device:  cuda
Moving model to device:  cuda
Moving model to device:  cuda
Moving model to device:  cuda
Moving model to device:  cuda
Moving model to device:  cuda
Moving model to device:  cuda
Moving model to device:  cuda
Moving model to device:  cuda
Moving model to device:  cuda
Moving model to device:  cuda
Moving model to device:  cuda
Moving model to device:  cuda
Moving model to device:  cuda
Moving model to device:  cuda
Moving model to device:  cuda
Moving model to device:  cuda
Moving model to device:  cuda
Moving model to device:  cuda


checkpoints:   5%|▌         | 4/75 [16:38<4:55:18, 249.56s/it]

⚠️  upload failed (not logged in?): name 'HF_REPO' is not defined
Moving model to device:  cuda
Moving model to device:  cuda
Moving model to device:  cuda
Moving model to device:  cuda
Moving model to device:  cuda
Moving model to device:  cuda
Moving model to device:  cuda
Moving model to device:  cuda
Moving model to device:  cuda
Moving model to device:  cuda
Moving model to device:  cuda
Moving model to device:  cuda
Moving model to device:  cuda
Moving model to device:  cuda
Moving model to device:  cuda
Moving model to device:  cuda
Moving model to device:  cuda
Moving model to device:  cuda
Moving model to device:  cuda
Moving model to device:  cuda
Moving model to device:  cuda
Moving model to device:  cuda


checkpoints:   7%|▋         | 5/75 [20:47<4:51:10, 249.57s/it]

⚠️  upload failed (not logged in?): name 'HF_REPO' is not defined


checkpoint_0001200.pth:   0%|          | 0.00/15.5M [00:00<?, ?B/s]

Moving model to device:  cuda
Moving model to device:  cuda
Moving model to device:  cuda
Moving model to device:  cuda
Moving model to device:  cuda
Moving model to device:  cuda
Moving model to device:  cuda
Moving model to device:  cuda
Moving model to device:  cuda
Moving model to device:  cuda
Moving model to device:  cuda
Moving model to device:  cuda
Moving model to device:  cuda
Moving model to device:  cuda
Moving model to device:  cuda
Moving model to device:  cuda
Moving model to device:  cuda
Moving model to device:  cuda
Moving model to device:  cuda
Moving model to device:  cuda
Moving model to device:  cuda
Moving model to device:  cuda


checkpoints:   8%|▊         | 6/75 [24:57<4:47:02, 249.60s/it]

⚠️  upload failed (not logged in?): name 'HF_REPO' is not defined
Moving model to device:  cuda
Moving model to device:  cuda
Moving model to device:  cuda
Moving model to device:  cuda
Moving model to device:  cuda
Moving model to device:  cuda
Moving model to device:  cuda
Moving model to device:  cuda
Moving model to device:  cuda
Moving model to device:  cuda
Moving model to device:  cuda
Moving model to device:  cuda
Moving model to device:  cuda
Moving model to device:  cuda
Moving model to device:  cuda
Moving model to device:  cuda
Moving model to device:  cuda
Moving model to device:  cuda
Moving model to device:  cuda
Moving model to device:  cuda
Moving model to device:  cuda
Moving model to device:  cuda


checkpoints:   9%|▉         | 7/75 [29:06<4:42:51, 249.58s/it]

⚠️  upload failed (not logged in?): name 'HF_REPO' is not defined


checkpoint_0001600.pth:   0%|          | 0.00/15.5M [00:00<?, ?B/s]

Moving model to device:  cuda
Moving model to device:  cuda
Moving model to device:  cuda
Moving model to device:  cuda
Moving model to device:  cuda
Moving model to device:  cuda
Moving model to device:  cuda
Moving model to device:  cuda
Moving model to device:  cuda
Moving model to device:  cuda
Moving model to device:  cuda
Moving model to device:  cuda
Moving model to device:  cuda
Moving model to device:  cuda
Moving model to device:  cuda
Moving model to device:  cuda
Moving model to device:  cuda
Moving model to device:  cuda
Moving model to device:  cuda
Moving model to device:  cuda
Moving model to device:  cuda
Moving model to device:  cuda


checkpoints:  11%|█         | 8/75 [33:17<4:38:53, 249.76s/it]

⚠️  upload failed (not logged in?): name 'HF_REPO' is not defined


checkpoint_0001800.pth:   0%|          | 0.00/15.5M [00:00<?, ?B/s]

Moving model to device:  cuda
Moving model to device:  cuda
Moving model to device:  cuda
Moving model to device:  cuda
Moving model to device:  cuda
Moving model to device:  cuda
Moving model to device:  cuda
Moving model to device:  cuda
Moving model to device:  cuda
Moving model to device:  cuda
Moving model to device:  cuda
Moving model to device:  cuda
Moving model to device:  cuda
Moving model to device:  cuda
Moving model to device:  cuda
Moving model to device:  cuda
Moving model to device:  cuda
Moving model to device:  cuda
Moving model to device:  cuda
Moving model to device:  cuda
Moving model to device:  cuda
Moving model to device:  cuda


checkpoints:  12%|█▏        | 9/75 [37:27<4:34:49, 249.84s/it]

⚠️  upload failed (not logged in?): name 'HF_REPO' is not defined


checkpoint_0002000.pth:   0%|          | 0.00/15.5M [00:00<?, ?B/s]

Moving model to device:  cuda
Moving model to device:  cuda
Moving model to device:  cuda
Moving model to device:  cuda
Moving model to device:  cuda
Moving model to device:  cuda
Moving model to device:  cuda
Moving model to device:  cuda
Moving model to device:  cuda
Moving model to device:  cuda
Moving model to device:  cuda
Moving model to device:  cuda
Moving model to device:  cuda
Moving model to device:  cuda
Moving model to device:  cuda
Moving model to device:  cuda
Moving model to device:  cuda
Moving model to device:  cuda
Moving model to device:  cuda
Moving model to device:  cuda
Moving model to device:  cuda
Moving model to device:  cuda


checkpoints:  13%|█▎        | 10/75 [41:36<4:30:34, 249.76s/it]

⚠️  upload failed (not logged in?): name 'HF_REPO' is not defined


checkpoint_0002200.pth:   0%|          | 0.00/15.5M [00:00<?, ?B/s]

Moving model to device:  cuda
Moving model to device:  cuda
Moving model to device:  cuda
Moving model to device:  cuda
Moving model to device:  cuda
Moving model to device:  cuda
Moving model to device:  cuda
Moving model to device:  cuda
Moving model to device:  cuda
Moving model to device:  cuda
Moving model to device:  cuda
Moving model to device:  cuda
Moving model to device:  cuda
Moving model to device:  cuda
Moving model to device:  cuda
Moving model to device:  cuda
Moving model to device:  cuda
Moving model to device:  cuda
Moving model to device:  cuda
Moving model to device:  cuda
Moving model to device:  cuda
Moving model to device:  cuda


checkpoints:  15%|█▍        | 11/75 [45:46<4:26:21, 249.72s/it]

⚠️  upload failed (not logged in?): name 'HF_REPO' is not defined


checkpoint_0002400.pth:   0%|          | 0.00/15.5M [00:00<?, ?B/s]

Moving model to device:  cuda
Moving model to device:  cuda
Moving model to device:  cuda
Moving model to device:  cuda
Moving model to device:  cuda
Moving model to device:  cuda
Moving model to device:  cuda
Moving model to device:  cuda
Moving model to device:  cuda
Moving model to device:  cuda
Moving model to device:  cuda
Moving model to device:  cuda
Moving model to device:  cuda
Moving model to device:  cuda
Moving model to device:  cuda
Moving model to device:  cuda
Moving model to device:  cuda
Moving model to device:  cuda
Moving model to device:  cuda
Moving model to device:  cuda
Moving model to device:  cuda
Moving model to device:  cuda


checkpoints:  16%|█▌        | 12/75 [49:55<4:22:11, 249.71s/it]

⚠️  upload failed (not logged in?): name 'HF_REPO' is not defined


checkpoint_0002600.pth:   0%|          | 0.00/15.5M [00:00<?, ?B/s]

Moving model to device:  cuda
Moving model to device:  cuda
Moving model to device:  cuda
Moving model to device:  cuda
Moving model to device:  cuda
Moving model to device:  cuda
Moving model to device:  cuda
Moving model to device:  cuda
Moving model to device:  cuda
Moving model to device:  cuda
Moving model to device:  cuda
Moving model to device:  cuda
Moving model to device:  cuda
Moving model to device:  cuda
Moving model to device:  cuda
Moving model to device:  cuda
Moving model to device:  cuda
Moving model to device:  cuda
Moving model to device:  cuda
Moving model to device:  cuda
Moving model to device:  cuda
Moving model to device:  cuda


checkpoints:  17%|█▋        | 13/75 [54:05<4:18:02, 249.72s/it]

⚠️  upload failed (not logged in?): name 'HF_REPO' is not defined


checkpoint_0002800.pth:   0%|          | 0.00/15.5M [00:00<?, ?B/s]

Moving model to device:  cuda
Moving model to device:  cuda
Moving model to device:  cuda
Moving model to device:  cuda
Moving model to device:  cuda
Moving model to device:  cuda
Moving model to device:  cuda
Moving model to device:  cuda
Moving model to device:  cuda
Moving model to device:  cuda
Moving model to device:  cuda
Moving model to device:  cuda
Moving model to device:  cuda
Moving model to device:  cuda
Moving model to device:  cuda
Moving model to device:  cuda
Moving model to device:  cuda
Moving model to device:  cuda
Moving model to device:  cuda
Moving model to device:  cuda
Moving model to device:  cuda
Moving model to device:  cuda


checkpoints:  19%|█▊        | 14/75 [58:15<4:13:50, 249.68s/it]

⚠️  upload failed (not logged in?): name 'HF_REPO' is not defined


checkpoint_0003000.pth:   0%|          | 0.00/15.5M [00:00<?, ?B/s]

Moving model to device:  cuda
Moving model to device:  cuda
Moving model to device:  cuda
Moving model to device:  cuda
Moving model to device:  cuda
Moving model to device:  cuda
Moving model to device:  cuda
Moving model to device:  cuda
Moving model to device:  cuda
Moving model to device:  cuda
Moving model to device:  cuda
Moving model to device:  cuda
Moving model to device:  cuda
Moving model to device:  cuda
Moving model to device:  cuda
Moving model to device:  cuda
Moving model to device:  cuda
Moving model to device:  cuda
Moving model to device:  cuda
Moving model to device:  cuda
Moving model to device:  cuda
Moving model to device:  cuda


checkpoints:  20%|██        | 15/75 [1:02:25<4:09:54, 249.90s/it]

⚠️  upload failed (not logged in?): name 'HF_REPO' is not defined


checkpoint_0003200.pth:   0%|          | 0.00/15.5M [00:00<?, ?B/s]

Moving model to device:  cuda
Moving model to device:  cuda
Moving model to device:  cuda
Moving model to device:  cuda
Moving model to device:  cuda
Moving model to device:  cuda
Moving model to device:  cuda
Moving model to device:  cuda
Moving model to device:  cuda
Moving model to device:  cuda
Moving model to device:  cuda
Moving model to device:  cuda
Moving model to device:  cuda
Moving model to device:  cuda
Moving model to device:  cuda
Moving model to device:  cuda
Moving model to device:  cuda
Moving model to device:  cuda
Moving model to device:  cuda
Moving model to device:  cuda
Moving model to device:  cuda
Moving model to device:  cuda


checkpoints:  21%|██▏       | 16/75 [1:06:36<4:05:53, 250.05s/it]

⚠️  upload failed (not logged in?): name 'HF_REPO' is not defined


checkpoint_0003400.pth:   0%|          | 0.00/15.5M [00:00<?, ?B/s]

Moving model to device:  cuda
Moving model to device:  cuda
Moving model to device:  cuda
Moving model to device:  cuda
Moving model to device:  cuda
Moving model to device:  cuda
Moving model to device:  cuda
Moving model to device:  cuda
Moving model to device:  cuda
Moving model to device:  cuda
Moving model to device:  cuda
Moving model to device:  cuda
Moving model to device:  cuda
Moving model to device:  cuda
Moving model to device:  cuda
Moving model to device:  cuda
Moving model to device:  cuda
Moving model to device:  cuda
Moving model to device:  cuda
Moving model to device:  cuda
Moving model to device:  cuda
Moving model to device:  cuda


checkpoints:  23%|██▎       | 17/75 [1:10:46<4:01:48, 250.14s/it]

⚠️  upload failed (not logged in?): name 'HF_REPO' is not defined


checkpoint_0003600.pth:   0%|          | 0.00/15.5M [00:00<?, ?B/s]

Moving model to device:  cuda
Moving model to device:  cuda
Moving model to device:  cuda
Moving model to device:  cuda
Moving model to device:  cuda
Moving model to device:  cuda
Moving model to device:  cuda
Moving model to device:  cuda
Moving model to device:  cuda
Moving model to device:  cuda
Moving model to device:  cuda
Moving model to device:  cuda
Moving model to device:  cuda
Moving model to device:  cuda
Moving model to device:  cuda
Moving model to device:  cuda
Moving model to device:  cuda
Moving model to device:  cuda
Moving model to device:  cuda
Moving model to device:  cuda
Moving model to device:  cuda
Moving model to device:  cuda


checkpoints:  24%|██▍       | 18/75 [1:14:56<3:57:39, 250.17s/it]

⚠️  upload failed (not logged in?): name 'HF_REPO' is not defined


checkpoint_0003800.pth:   0%|          | 0.00/15.5M [00:00<?, ?B/s]

Moving model to device:  cuda
Moving model to device:  cuda
Moving model to device:  cuda
Moving model to device:  cuda
Moving model to device:  cuda
Moving model to device:  cuda
Moving model to device:  cuda
Moving model to device:  cuda
Moving model to device:  cuda
Moving model to device:  cuda
Moving model to device:  cuda
Moving model to device:  cuda
Moving model to device:  cuda
Moving model to device:  cuda
Moving model to device:  cuda
Moving model to device:  cuda
Moving model to device:  cuda
Moving model to device:  cuda
Moving model to device:  cuda
Moving model to device:  cuda
Moving model to device:  cuda
Moving model to device:  cuda


checkpoints:  25%|██▌       | 19/75 [1:19:07<3:53:32, 250.23s/it]

⚠️  upload failed (not logged in?): name 'HF_REPO' is not defined


checkpoint_0004000.pth:   0%|          | 0.00/15.5M [00:00<?, ?B/s]

Moving model to device:  cuda
Moving model to device:  cuda
Moving model to device:  cuda
Moving model to device:  cuda
Moving model to device:  cuda
Moving model to device:  cuda
Moving model to device:  cuda
Moving model to device:  cuda
Moving model to device:  cuda
Moving model to device:  cuda
Moving model to device:  cuda
Moving model to device:  cuda
Moving model to device:  cuda
Moving model to device:  cuda
Moving model to device:  cuda
Moving model to device:  cuda
Moving model to device:  cuda
Moving model to device:  cuda
Moving model to device:  cuda
Moving model to device:  cuda
Moving model to device:  cuda
Moving model to device:  cuda


checkpoints:  27%|██▋       | 20/75 [1:23:17<3:49:20, 250.19s/it]

⚠️  upload failed (not logged in?): name 'HF_REPO' is not defined


checkpoint_0004200.pth:   0%|          | 0.00/15.5M [00:00<?, ?B/s]

Moving model to device:  cuda
Moving model to device:  cuda
Moving model to device:  cuda
Moving model to device:  cuda
Moving model to device:  cuda
Moving model to device:  cuda
Moving model to device:  cuda
Moving model to device:  cuda
Moving model to device:  cuda
Moving model to device:  cuda
Moving model to device:  cuda
Moving model to device:  cuda
Moving model to device:  cuda
Moving model to device:  cuda
Moving model to device:  cuda
Moving model to device:  cuda
Moving model to device:  cuda
Moving model to device:  cuda
Moving model to device:  cuda
Moving model to device:  cuda
Moving model to device:  cuda
Moving model to device:  cuda


checkpoints:  28%|██▊       | 21/75 [1:27:27<3:45:05, 250.11s/it]

⚠️  upload failed (not logged in?): name 'HF_REPO' is not defined


checkpoint_0004400.pth:   0%|          | 0.00/15.5M [00:00<?, ?B/s]

Moving model to device:  cuda
Moving model to device:  cuda
Moving model to device:  cuda
Moving model to device:  cuda
Moving model to device:  cuda
Moving model to device:  cuda
Moving model to device:  cuda
Moving model to device:  cuda
Moving model to device:  cuda
Moving model to device:  cuda
Moving model to device:  cuda
Moving model to device:  cuda
Moving model to device:  cuda
Moving model to device:  cuda
Moving model to device:  cuda
Moving model to device:  cuda
Moving model to device:  cuda
Moving model to device:  cuda
Moving model to device:  cuda
Moving model to device:  cuda
Moving model to device:  cuda
Moving model to device:  cuda


checkpoints:  29%|██▉       | 22/75 [1:31:37<3:40:55, 250.11s/it]

⚠️  upload failed (not logged in?): name 'HF_REPO' is not defined


checkpoint_0004600.pth:   0%|          | 0.00/15.5M [00:00<?, ?B/s]

Moving model to device:  cuda
Moving model to device:  cuda
Moving model to device:  cuda
Moving model to device:  cuda
Moving model to device:  cuda
Moving model to device:  cuda
Moving model to device:  cuda
Moving model to device:  cuda
Moving model to device:  cuda
Moving model to device:  cuda
Moving model to device:  cuda
Moving model to device:  cuda
Moving model to device:  cuda
Moving model to device:  cuda
Moving model to device:  cuda
Moving model to device:  cuda
Moving model to device:  cuda
Moving model to device:  cuda
Moving model to device:  cuda
Moving model to device:  cuda
Moving model to device:  cuda
Moving model to device:  cuda


checkpoints:  31%|███       | 23/75 [1:35:47<3:36:45, 250.11s/it]

⚠️  upload failed (not logged in?): name 'HF_REPO' is not defined


checkpoint_0004800.pth:   0%|          | 0.00/15.5M [00:00<?, ?B/s]

Moving model to device:  cuda
Moving model to device:  cuda
Moving model to device:  cuda
Moving model to device:  cuda
Moving model to device:  cuda
Moving model to device:  cuda
Moving model to device:  cuda
Moving model to device:  cuda
Moving model to device:  cuda
Moving model to device:  cuda
Moving model to device:  cuda
Moving model to device:  cuda
Moving model to device:  cuda
Moving model to device:  cuda
Moving model to device:  cuda
Moving model to device:  cuda
Moving model to device:  cuda
Moving model to device:  cuda
Moving model to device:  cuda
Moving model to device:  cuda
Moving model to device:  cuda
Moving model to device:  cuda


checkpoints:  32%|███▏      | 24/75 [1:39:56<3:32:27, 249.94s/it]

⚠️  upload failed (not logged in?): name 'HF_REPO' is not defined


checkpoint_0005000.pth:   0%|          | 0.00/15.5M [00:00<?, ?B/s]

Moving model to device:  cuda
Moving model to device:  cuda
Moving model to device:  cuda
Moving model to device:  cuda
Moving model to device:  cuda
Moving model to device:  cuda
Moving model to device:  cuda
Moving model to device:  cuda
Moving model to device:  cuda
Moving model to device:  cuda
Moving model to device:  cuda
Moving model to device:  cuda
Moving model to device:  cuda
Moving model to device:  cuda
Moving model to device:  cuda
Moving model to device:  cuda
Moving model to device:  cuda
Moving model to device:  cuda
Moving model to device:  cuda
Moving model to device:  cuda
Moving model to device:  cuda
Moving model to device:  cuda


checkpoints:  33%|███▎      | 25/75 [1:44:06<3:28:16, 249.92s/it]

⚠️  upload failed (not logged in?): name 'HF_REPO' is not defined


checkpoint_0005200.pth:   0%|          | 0.00/15.5M [00:00<?, ?B/s]

Moving model to device:  cuda
Moving model to device:  cuda
Moving model to device:  cuda
Moving model to device:  cuda
Moving model to device:  cuda
Moving model to device:  cuda
Moving model to device:  cuda
Moving model to device:  cuda
Moving model to device:  cuda
Moving model to device:  cuda
Moving model to device:  cuda
Moving model to device:  cuda
Moving model to device:  cuda
Moving model to device:  cuda
Moving model to device:  cuda
Moving model to device:  cuda
Moving model to device:  cuda
Moving model to device:  cuda
Moving model to device:  cuda
Moving model to device:  cuda
Moving model to device:  cuda
Moving model to device:  cuda


checkpoints:  35%|███▍      | 26/75 [1:48:16<3:24:06, 249.92s/it]

⚠️  upload failed (not logged in?): name 'HF_REPO' is not defined


checkpoint_0005400.pth:   0%|          | 0.00/15.5M [00:00<?, ?B/s]

Moving model to device:  cuda
Moving model to device:  cuda
Moving model to device:  cuda
Moving model to device:  cuda
Moving model to device:  cuda
Moving model to device:  cuda
Moving model to device:  cuda
Moving model to device:  cuda
Moving model to device:  cuda
Moving model to device:  cuda
Moving model to device:  cuda
Moving model to device:  cuda
Moving model to device:  cuda
Moving model to device:  cuda
Moving model to device:  cuda
Moving model to device:  cuda
Moving model to device:  cuda
Moving model to device:  cuda
Moving model to device:  cuda
Moving model to device:  cuda
Moving model to device:  cuda
Moving model to device:  cuda


checkpoints:  36%|███▌      | 27/75 [1:52:26<3:19:55, 249.91s/it]

⚠️  upload failed (not logged in?): name 'HF_REPO' is not defined


checkpoint_0005600.pth:   0%|          | 0.00/15.5M [00:00<?, ?B/s]

Moving model to device:  cuda
Moving model to device:  cuda
Moving model to device:  cuda
Moving model to device:  cuda
Moving model to device:  cuda
Moving model to device:  cuda
Moving model to device:  cuda
Moving model to device:  cuda
Moving model to device:  cuda
Moving model to device:  cuda
Moving model to device:  cuda
Moving model to device:  cuda
Moving model to device:  cuda
Moving model to device:  cuda
Moving model to device:  cuda
Moving model to device:  cuda
Moving model to device:  cuda
Moving model to device:  cuda
Moving model to device:  cuda
Moving model to device:  cuda
Moving model to device:  cuda
Moving model to device:  cuda


checkpoints:  37%|███▋      | 28/75 [1:56:36<3:15:42, 249.83s/it]

⚠️  upload failed (not logged in?): name 'HF_REPO' is not defined


checkpoint_0005800.pth:   0%|          | 0.00/15.5M [00:00<?, ?B/s]

Moving model to device:  cuda
Moving model to device:  cuda
Moving model to device:  cuda
Moving model to device:  cuda
Moving model to device:  cuda
Moving model to device:  cuda
Moving model to device:  cuda
Moving model to device:  cuda
Moving model to device:  cuda
Moving model to device:  cuda
Moving model to device:  cuda
Moving model to device:  cuda
Moving model to device:  cuda
Moving model to device:  cuda
Moving model to device:  cuda
Moving model to device:  cuda
Moving model to device:  cuda
Moving model to device:  cuda
Moving model to device:  cuda
Moving model to device:  cuda
Moving model to device:  cuda
Moving model to device:  cuda


checkpoints:  39%|███▊      | 29/75 [2:00:45<3:11:29, 249.77s/it]

⚠️  upload failed (not logged in?): name 'HF_REPO' is not defined


checkpoint_0006000.pth:   0%|          | 0.00/15.5M [00:00<?, ?B/s]

Moving model to device:  cuda
Moving model to device:  cuda
Moving model to device:  cuda
Moving model to device:  cuda
Moving model to device:  cuda
Moving model to device:  cuda
Moving model to device:  cuda
Moving model to device:  cuda
Moving model to device:  cuda
Moving model to device:  cuda
Moving model to device:  cuda
Moving model to device:  cuda
Moving model to device:  cuda
Moving model to device:  cuda
Moving model to device:  cuda
Moving model to device:  cuda
Moving model to device:  cuda
Moving model to device:  cuda
Moving model to device:  cuda
Moving model to device:  cuda
Moving model to device:  cuda
Moving model to device:  cuda


checkpoints:  40%|████      | 30/75 [2:04:55<3:07:15, 249.67s/it]

⚠️  upload failed (not logged in?): name 'HF_REPO' is not defined


checkpoint_0006200.pth:   0%|          | 0.00/15.5M [00:00<?, ?B/s]

Moving model to device:  cuda
Moving model to device:  cuda
Moving model to device:  cuda
Moving model to device:  cuda
Moving model to device:  cuda
Moving model to device:  cuda
Moving model to device:  cuda
Moving model to device:  cuda
Moving model to device:  cuda
Moving model to device:  cuda
Moving model to device:  cuda
Moving model to device:  cuda
Moving model to device:  cuda
Moving model to device:  cuda
Moving model to device:  cuda
Moving model to device:  cuda
Moving model to device:  cuda
Moving model to device:  cuda
Moving model to device:  cuda
Moving model to device:  cuda
Moving model to device:  cuda
Moving model to device:  cuda


checkpoints:  41%|████▏     | 31/75 [2:09:04<3:03:03, 249.63s/it]

⚠️  upload failed (not logged in?): name 'HF_REPO' is not defined


checkpoint_0006400.pth:   0%|          | 0.00/15.5M [00:00<?, ?B/s]

Moving model to device:  cuda
Moving model to device:  cuda
Moving model to device:  cuda
Moving model to device:  cuda
Moving model to device:  cuda
Moving model to device:  cuda
Moving model to device:  cuda
Moving model to device:  cuda
Moving model to device:  cuda
Moving model to device:  cuda
Moving model to device:  cuda
Moving model to device:  cuda
Moving model to device:  cuda
Moving model to device:  cuda
Moving model to device:  cuda
Moving model to device:  cuda
Moving model to device:  cuda
Moving model to device:  cuda
Moving model to device:  cuda
Moving model to device:  cuda
Moving model to device:  cuda
Moving model to device:  cuda


checkpoints:  43%|████▎     | 32/75 [2:13:14<2:58:55, 249.66s/it]

⚠️  upload failed (not logged in?): name 'HF_REPO' is not defined


checkpoint_0006600.pth:   0%|          | 0.00/15.5M [00:00<?, ?B/s]

Moving model to device:  cuda
Moving model to device:  cuda
Moving model to device:  cuda
Moving model to device:  cuda
Moving model to device:  cuda
Moving model to device:  cuda
Moving model to device:  cuda
Moving model to device:  cuda
Moving model to device:  cuda
Moving model to device:  cuda
Moving model to device:  cuda
Moving model to device:  cuda
Moving model to device:  cuda
Moving model to device:  cuda
Moving model to device:  cuda
Moving model to device:  cuda
Moving model to device:  cuda
Moving model to device:  cuda
Moving model to device:  cuda
Moving model to device:  cuda
Moving model to device:  cuda
Moving model to device:  cuda


checkpoints:  44%|████▍     | 33/75 [2:17:24<2:54:53, 249.84s/it]

⚠️  upload failed (not logged in?): name 'HF_REPO' is not defined


checkpoint_0006800.pth:   0%|          | 0.00/15.5M [00:00<?, ?B/s]

Moving model to device:  cuda
Moving model to device:  cuda
Moving model to device:  cuda
Moving model to device:  cuda
Moving model to device:  cuda
Moving model to device:  cuda
Moving model to device:  cuda
Moving model to device:  cuda
Moving model to device:  cuda
Moving model to device:  cuda
Moving model to device:  cuda
Moving model to device:  cuda
Moving model to device:  cuda
Moving model to device:  cuda
Moving model to device:  cuda
Moving model to device:  cuda
Moving model to device:  cuda
Moving model to device:  cuda
Moving model to device:  cuda
Moving model to device:  cuda
Moving model to device:  cuda
Moving model to device:  cuda


checkpoints:  45%|████▌     | 34/75 [2:21:34<2:50:42, 249.81s/it]

⚠️  upload failed (not logged in?): name 'HF_REPO' is not defined


checkpoint_0007000.pth:   0%|          | 0.00/15.5M [00:00<?, ?B/s]

Moving model to device:  cuda
Moving model to device:  cuda
Moving model to device:  cuda
Moving model to device:  cuda
Moving model to device:  cuda
Moving model to device:  cuda
Moving model to device:  cuda
Moving model to device:  cuda
Moving model to device:  cuda
Moving model to device:  cuda
Moving model to device:  cuda
Moving model to device:  cuda
Moving model to device:  cuda
Moving model to device:  cuda
Moving model to device:  cuda
Moving model to device:  cuda
Moving model to device:  cuda
Moving model to device:  cuda
Moving model to device:  cuda
Moving model to device:  cuda
Moving model to device:  cuda
Moving model to device:  cuda


checkpoints:  47%|████▋     | 35/75 [2:25:44<2:46:31, 249.79s/it]

⚠️  upload failed (not logged in?): name 'HF_REPO' is not defined


checkpoint_0007200.pth:   0%|          | 0.00/15.5M [00:00<?, ?B/s]

Moving model to device:  cuda
Moving model to device:  cuda
Moving model to device:  cuda
Moving model to device:  cuda
Moving model to device:  cuda
Moving model to device:  cuda
Moving model to device:  cuda
Moving model to device:  cuda
Moving model to device:  cuda
Moving model to device:  cuda
Moving model to device:  cuda
Moving model to device:  cuda
Moving model to device:  cuda
Moving model to device:  cuda
Moving model to device:  cuda
Moving model to device:  cuda
Moving model to device:  cuda
Moving model to device:  cuda
Moving model to device:  cuda
Moving model to device:  cuda
Moving model to device:  cuda
Moving model to device:  cuda


checkpoints:  48%|████▊     | 36/75 [2:29:53<2:42:19, 249.73s/it]

⚠️  upload failed (not logged in?): name 'HF_REPO' is not defined


checkpoint_0007400.pth:   0%|          | 0.00/15.5M [00:00<?, ?B/s]

Moving model to device:  cuda
Moving model to device:  cuda
Moving model to device:  cuda
Moving model to device:  cuda
Moving model to device:  cuda
Moving model to device:  cuda
Moving model to device:  cuda
Moving model to device:  cuda
Moving model to device:  cuda
Moving model to device:  cuda
Moving model to device:  cuda
Moving model to device:  cuda
Moving model to device:  cuda
Moving model to device:  cuda
Moving model to device:  cuda
Moving model to device:  cuda
Moving model to device:  cuda
Moving model to device:  cuda
Moving model to device:  cuda
Moving model to device:  cuda
Moving model to device:  cuda
Moving model to device:  cuda


checkpoints:  49%|████▉     | 37/75 [2:34:03<2:38:11, 249.78s/it]

⚠️  upload failed (not logged in?): name 'HF_REPO' is not defined


checkpoint_0007600.pth:   0%|          | 0.00/15.5M [00:00<?, ?B/s]

Moving model to device:  cuda
Moving model to device:  cuda
Moving model to device:  cuda
Moving model to device:  cuda
Moving model to device:  cuda
Moving model to device:  cuda
Moving model to device:  cuda
Moving model to device:  cuda
Moving model to device:  cuda
Moving model to device:  cuda
Moving model to device:  cuda
Moving model to device:  cuda
Moving model to device:  cuda
Moving model to device:  cuda
Moving model to device:  cuda
Moving model to device:  cuda
Moving model to device:  cuda
Moving model to device:  cuda
Moving model to device:  cuda
Moving model to device:  cuda
Moving model to device:  cuda
Moving model to device:  cuda


checkpoints:  51%|█████     | 38/75 [2:38:13<2:34:04, 249.84s/it]

⚠️  upload failed (not logged in?): name 'HF_REPO' is not defined


checkpoint_0007800.pth:   0%|          | 0.00/15.5M [00:00<?, ?B/s]

Moving model to device:  cuda
Moving model to device:  cuda
Moving model to device:  cuda
Moving model to device:  cuda
Moving model to device:  cuda
Moving model to device:  cuda
Moving model to device:  cuda
Moving model to device:  cuda
Moving model to device:  cuda
Moving model to device:  cuda
Moving model to device:  cuda
Moving model to device:  cuda
Moving model to device:  cuda
Moving model to device:  cuda
Moving model to device:  cuda
Moving model to device:  cuda
Moving model to device:  cuda
Moving model to device:  cuda
Moving model to device:  cuda
Moving model to device:  cuda
Moving model to device:  cuda
Moving model to device:  cuda


checkpoints:  52%|█████▏    | 39/75 [2:42:24<2:30:08, 250.23s/it]

⚠️  upload failed (not logged in?): name 'HF_REPO' is not defined


checkpoint_0008000.pth:   0%|          | 0.00/15.5M [00:00<?, ?B/s]

Moving model to device:  cuda
Moving model to device:  cuda
Moving model to device:  cuda
Moving model to device:  cuda
Moving model to device:  cuda
Moving model to device:  cuda
Moving model to device:  cuda
Moving model to device:  cuda
Moving model to device:  cuda
Moving model to device:  cuda
Moving model to device:  cuda
Moving model to device:  cuda
Moving model to device:  cuda
Moving model to device:  cuda
Moving model to device:  cuda
Moving model to device:  cuda
Moving model to device:  cuda
Moving model to device:  cuda
Moving model to device:  cuda
Moving model to device:  cuda
Moving model to device:  cuda
Moving model to device:  cuda


checkpoints:  53%|█████▎    | 40/75 [2:46:34<2:25:51, 250.04s/it]

⚠️  upload failed (not logged in?): name 'HF_REPO' is not defined


checkpoint_0008200.pth:   0%|          | 0.00/15.5M [00:00<?, ?B/s]

Moving model to device:  cuda
Moving model to device:  cuda
Moving model to device:  cuda
Moving model to device:  cuda
Moving model to device:  cuda
Moving model to device:  cuda
Moving model to device:  cuda
Moving model to device:  cuda
Moving model to device:  cuda
Moving model to device:  cuda
Moving model to device:  cuda
Moving model to device:  cuda
Moving model to device:  cuda
Moving model to device:  cuda
Moving model to device:  cuda
Moving model to device:  cuda
Moving model to device:  cuda
Moving model to device:  cuda
Moving model to device:  cuda
Moving model to device:  cuda
Moving model to device:  cuda
Moving model to device:  cuda


checkpoints:  55%|█████▍    | 41/75 [2:50:44<2:21:37, 249.92s/it]

⚠️  upload failed (not logged in?): name 'HF_REPO' is not defined


checkpoint_0008400.pth:   0%|          | 0.00/15.5M [00:00<?, ?B/s]

Moving model to device:  cuda
Moving model to device:  cuda
Moving model to device:  cuda
Moving model to device:  cuda
Moving model to device:  cuda
Moving model to device:  cuda
Moving model to device:  cuda
Moving model to device:  cuda
Moving model to device:  cuda
Moving model to device:  cuda
Moving model to device:  cuda
Moving model to device:  cuda
Moving model to device:  cuda
Moving model to device:  cuda
Moving model to device:  cuda
Moving model to device:  cuda
Moving model to device:  cuda
Moving model to device:  cuda
Moving model to device:  cuda
Moving model to device:  cuda
Moving model to device:  cuda
Moving model to device:  cuda


checkpoints:  56%|█████▌    | 42/75 [2:54:53<2:17:23, 249.81s/it]

⚠️  upload failed (not logged in?): name 'HF_REPO' is not defined


checkpoint_0008600.pth:   0%|          | 0.00/15.5M [00:00<?, ?B/s]

Moving model to device:  cuda
Moving model to device:  cuda
Moving model to device:  cuda
Moving model to device:  cuda
Moving model to device:  cuda
Moving model to device:  cuda
Moving model to device:  cuda
Moving model to device:  cuda
Moving model to device:  cuda
Moving model to device:  cuda
Moving model to device:  cuda
Moving model to device:  cuda
Moving model to device:  cuda
Moving model to device:  cuda
Moving model to device:  cuda
Moving model to device:  cuda
Moving model to device:  cuda
Moving model to device:  cuda
Moving model to device:  cuda
Moving model to device:  cuda
Moving model to device:  cuda
Moving model to device:  cuda


checkpoints:  57%|█████▋    | 43/75 [2:59:03<2:13:12, 249.78s/it]

⚠️  upload failed (not logged in?): name 'HF_REPO' is not defined


checkpoint_0008800.pth:   0%|          | 0.00/15.5M [00:00<?, ?B/s]

Moving model to device:  cuda
Moving model to device:  cuda
Moving model to device:  cuda
Moving model to device:  cuda
Moving model to device:  cuda
Moving model to device:  cuda
Moving model to device:  cuda
Moving model to device:  cuda
Moving model to device:  cuda
Moving model to device:  cuda
Moving model to device:  cuda
Moving model to device:  cuda
Moving model to device:  cuda
Moving model to device:  cuda
Moving model to device:  cuda
Moving model to device:  cuda
Moving model to device:  cuda
Moving model to device:  cuda
Moving model to device:  cuda
Moving model to device:  cuda
Moving model to device:  cuda
Moving model to device:  cuda


checkpoints:  59%|█████▊    | 44/75 [3:03:13<2:09:02, 249.75s/it]

⚠️  upload failed (not logged in?): name 'HF_REPO' is not defined


checkpoint_0009000.pth:   0%|          | 0.00/15.5M [00:00<?, ?B/s]

Moving model to device:  cuda
Moving model to device:  cuda
Moving model to device:  cuda
Moving model to device:  cuda
Moving model to device:  cuda
Moving model to device:  cuda
Moving model to device:  cuda
Moving model to device:  cuda
Moving model to device:  cuda
Moving model to device:  cuda
Moving model to device:  cuda
Moving model to device:  cuda
Moving model to device:  cuda
Moving model to device:  cuda
Moving model to device:  cuda
Moving model to device:  cuda
Moving model to device:  cuda
Moving model to device:  cuda
Moving model to device:  cuda
Moving model to device:  cuda
Moving model to device:  cuda
Moving model to device:  cuda


checkpoints:  60%|██████    | 45/75 [3:07:22<2:04:52, 249.75s/it]

⚠️  upload failed (not logged in?): name 'HF_REPO' is not defined


checkpoint_0009200.pth:   0%|          | 0.00/15.5M [00:00<?, ?B/s]

Moving model to device:  cuda
Moving model to device:  cuda
Moving model to device:  cuda
Moving model to device:  cuda
Moving model to device:  cuda
Moving model to device:  cuda
Moving model to device:  cuda
Moving model to device:  cuda
Moving model to device:  cuda
Moving model to device:  cuda
Moving model to device:  cuda
Moving model to device:  cuda
Moving model to device:  cuda
Moving model to device:  cuda
Moving model to device:  cuda
Moving model to device:  cuda
Moving model to device:  cuda
Moving model to device:  cuda
Moving model to device:  cuda
Moving model to device:  cuda
Moving model to device:  cuda
Moving model to device:  cuda


checkpoints:  61%|██████▏   | 46/75 [3:11:32<2:00:41, 249.70s/it]

⚠️  upload failed (not logged in?): name 'HF_REPO' is not defined


checkpoint_0009400.pth:   0%|          | 0.00/15.5M [00:00<?, ?B/s]

Moving model to device:  cuda
Moving model to device:  cuda
Moving model to device:  cuda
Moving model to device:  cuda
Moving model to device:  cuda
Moving model to device:  cuda
Moving model to device:  cuda
Moving model to device:  cuda
Moving model to device:  cuda
Moving model to device:  cuda
Moving model to device:  cuda
Moving model to device:  cuda
Moving model to device:  cuda
Moving model to device:  cuda
Moving model to device:  cuda
Moving model to device:  cuda
Moving model to device:  cuda
Moving model to device:  cuda
Moving model to device:  cuda
Moving model to device:  cuda
Moving model to device:  cuda
Moving model to device:  cuda


checkpoints:  63%|██████▎   | 47/75 [3:15:41<1:56:31, 249.68s/it]

⚠️  upload failed (not logged in?): name 'HF_REPO' is not defined


checkpoint_0009600.pth:   0%|          | 0.00/15.5M [00:00<?, ?B/s]

Moving model to device:  cuda
Moving model to device:  cuda
Moving model to device:  cuda
Moving model to device:  cuda
Moving model to device:  cuda
Moving model to device:  cuda
Moving model to device:  cuda
Moving model to device:  cuda
Moving model to device:  cuda
Moving model to device:  cuda
Moving model to device:  cuda
Moving model to device:  cuda
Moving model to device:  cuda
Moving model to device:  cuda
Moving model to device:  cuda
Moving model to device:  cuda
Moving model to device:  cuda
Moving model to device:  cuda
Moving model to device:  cuda
Moving model to device:  cuda
Moving model to device:  cuda
Moving model to device:  cuda


checkpoints:  64%|██████▍   | 48/75 [3:19:51<1:52:23, 249.76s/it]

⚠️  upload failed (not logged in?): name 'HF_REPO' is not defined


checkpoint_0009800.pth:   0%|          | 0.00/15.5M [00:00<?, ?B/s]

Moving model to device:  cuda
Moving model to device:  cuda
Moving model to device:  cuda
Moving model to device:  cuda
Moving model to device:  cuda
Moving model to device:  cuda
Moving model to device:  cuda
Moving model to device:  cuda
Moving model to device:  cuda
Moving model to device:  cuda
Moving model to device:  cuda
Moving model to device:  cuda
Moving model to device:  cuda
Moving model to device:  cuda
Moving model to device:  cuda
Moving model to device:  cuda
Moving model to device:  cuda
Moving model to device:  cuda
Moving model to device:  cuda
Moving model to device:  cuda
Moving model to device:  cuda
Moving model to device:  cuda


checkpoints:  65%|██████▌   | 49/75 [3:24:01<1:48:11, 249.69s/it]

⚠️  upload failed (not logged in?): name 'HF_REPO' is not defined


checkpoint_0010000.pth:   0%|          | 0.00/15.5M [00:00<?, ?B/s]

Moving model to device:  cuda
Moving model to device:  cuda
Moving model to device:  cuda
Moving model to device:  cuda
Moving model to device:  cuda
Moving model to device:  cuda
Moving model to device:  cuda
Moving model to device:  cuda
Moving model to device:  cuda
Moving model to device:  cuda
Moving model to device:  cuda
Moving model to device:  cuda
Moving model to device:  cuda
Moving model to device:  cuda
Moving model to device:  cuda
Moving model to device:  cuda
Moving model to device:  cuda
Moving model to device:  cuda
Moving model to device:  cuda
Moving model to device:  cuda
Moving model to device:  cuda
Moving model to device:  cuda


checkpoints:  67%|██████▋   | 50/75 [3:28:10<1:44:00, 249.63s/it]

⚠️  upload failed (not logged in?): name 'HF_REPO' is not defined


checkpoint_0010200.pth:   0%|          | 0.00/15.5M [00:00<?, ?B/s]

Moving model to device:  cuda
Moving model to device:  cuda
Moving model to device:  cuda
Moving model to device:  cuda
Moving model to device:  cuda
Moving model to device:  cuda
Moving model to device:  cuda
Moving model to device:  cuda
Moving model to device:  cuda
Moving model to device:  cuda
Moving model to device:  cuda
Moving model to device:  cuda
Moving model to device:  cuda
Moving model to device:  cuda
Moving model to device:  cuda
Moving model to device:  cuda
Moving model to device:  cuda
Moving model to device:  cuda
Moving model to device:  cuda
Moving model to device:  cuda
Moving model to device:  cuda
Moving model to device:  cuda


checkpoints:  68%|██████▊   | 51/75 [3:32:20<1:39:51, 249.63s/it]

⚠️  upload failed (not logged in?): name 'HF_REPO' is not defined


checkpoint_0010400.pth:   0%|          | 0.00/15.5M [00:00<?, ?B/s]

Moving model to device:  cuda
Moving model to device:  cuda
Moving model to device:  cuda
Moving model to device:  cuda
Moving model to device:  cuda
Moving model to device:  cuda
Moving model to device:  cuda
Moving model to device:  cuda
Moving model to device:  cuda
Moving model to device:  cuda
Moving model to device:  cuda
Moving model to device:  cuda
Moving model to device:  cuda
Moving model to device:  cuda
Moving model to device:  cuda
Moving model to device:  cuda
Moving model to device:  cuda
Moving model to device:  cuda
Moving model to device:  cuda
Moving model to device:  cuda
Moving model to device:  cuda
Moving model to device:  cuda


checkpoints:  69%|██████▉   | 52/75 [3:36:30<1:35:41, 249.62s/it]

⚠️  upload failed (not logged in?): name 'HF_REPO' is not defined


checkpoint_0010600.pth:   0%|          | 0.00/15.5M [00:00<?, ?B/s]

Moving model to device:  cuda
Moving model to device:  cuda
Moving model to device:  cuda
Moving model to device:  cuda
Moving model to device:  cuda
Moving model to device:  cuda
Moving model to device:  cuda
Moving model to device:  cuda
Moving model to device:  cuda
Moving model to device:  cuda
Moving model to device:  cuda
Moving model to device:  cuda
Moving model to device:  cuda
Moving model to device:  cuda
Moving model to device:  cuda
Moving model to device:  cuda
Moving model to device:  cuda
Moving model to device:  cuda
Moving model to device:  cuda
Moving model to device:  cuda
Moving model to device:  cuda
Moving model to device:  cuda


checkpoints:  71%|███████   | 53/75 [3:40:39<1:31:31, 249.62s/it]

⚠️  upload failed (not logged in?): name 'HF_REPO' is not defined


checkpoint_0010800.pth:   0%|          | 0.00/15.5M [00:00<?, ?B/s]

Moving model to device:  cuda
Moving model to device:  cuda
Moving model to device:  cuda
Moving model to device:  cuda
Moving model to device:  cuda
Moving model to device:  cuda
Moving model to device:  cuda
Moving model to device:  cuda
Moving model to device:  cuda
Moving model to device:  cuda
Moving model to device:  cuda
Moving model to device:  cuda
Moving model to device:  cuda
Moving model to device:  cuda
Moving model to device:  cuda
Moving model to device:  cuda
Moving model to device:  cuda
Moving model to device:  cuda
Moving model to device:  cuda
Moving model to device:  cuda
Moving model to device:  cuda
Moving model to device:  cuda


checkpoints:  72%|███████▏  | 54/75 [3:44:49<1:27:20, 249.56s/it]

⚠️  upload failed (not logged in?): name 'HF_REPO' is not defined


checkpoint_0011000.pth:   0%|          | 0.00/15.5M [00:00<?, ?B/s]

Moving model to device:  cuda
Moving model to device:  cuda
Moving model to device:  cuda
Moving model to device:  cuda
Moving model to device:  cuda
Moving model to device:  cuda
Moving model to device:  cuda
Moving model to device:  cuda
Moving model to device:  cuda
Moving model to device:  cuda
Moving model to device:  cuda
Moving model to device:  cuda
Moving model to device:  cuda
Moving model to device:  cuda
Moving model to device:  cuda
Moving model to device:  cuda
Moving model to device:  cuda
Moving model to device:  cuda
Moving model to device:  cuda
Moving model to device:  cuda
Moving model to device:  cuda
Moving model to device:  cuda


checkpoints:  73%|███████▎  | 55/75 [3:48:58<1:23:11, 249.57s/it]

⚠️  upload failed (not logged in?): name 'HF_REPO' is not defined


checkpoint_0011200.pth:   0%|          | 0.00/15.5M [00:00<?, ?B/s]

Moving model to device:  cuda
Moving model to device:  cuda
Moving model to device:  cuda
Moving model to device:  cuda
Moving model to device:  cuda
Moving model to device:  cuda
Moving model to device:  cuda
Moving model to device:  cuda
Moving model to device:  cuda
Moving model to device:  cuda
Moving model to device:  cuda
Moving model to device:  cuda
Moving model to device:  cuda
Moving model to device:  cuda
Moving model to device:  cuda
Moving model to device:  cuda
Moving model to device:  cuda
Moving model to device:  cuda
Moving model to device:  cuda
Moving model to device:  cuda
Moving model to device:  cuda
Moving model to device:  cuda


checkpoints:  75%|███████▍  | 56/75 [3:53:08<1:19:02, 249.60s/it]

⚠️  upload failed (not logged in?): name 'HF_REPO' is not defined


checkpoint_0011400.pth:   0%|          | 0.00/15.5M [00:00<?, ?B/s]

Moving model to device:  cuda
Moving model to device:  cuda
Moving model to device:  cuda
Moving model to device:  cuda
Moving model to device:  cuda
Moving model to device:  cuda
Moving model to device:  cuda
Moving model to device:  cuda
Moving model to device:  cuda
Moving model to device:  cuda
Moving model to device:  cuda
Moving model to device:  cuda
Moving model to device:  cuda
Moving model to device:  cuda
Moving model to device:  cuda
Moving model to device:  cuda
Moving model to device:  cuda
Moving model to device:  cuda
Moving model to device:  cuda
Moving model to device:  cuda
Moving model to device:  cuda
Moving model to device:  cuda


checkpoints:  76%|███████▌  | 57/75 [3:57:18<1:14:53, 249.62s/it]

⚠️  upload failed (not logged in?): name 'HF_REPO' is not defined


checkpoint_0011600.pth:   0%|          | 0.00/15.5M [00:00<?, ?B/s]

Moving model to device:  cuda
Moving model to device:  cuda
Moving model to device:  cuda
Moving model to device:  cuda
Moving model to device:  cuda
Moving model to device:  cuda
Moving model to device:  cuda
Moving model to device:  cuda
Moving model to device:  cuda
Moving model to device:  cuda
Moving model to device:  cuda
Moving model to device:  cuda
Moving model to device:  cuda
Moving model to device:  cuda
Moving model to device:  cuda
Moving model to device:  cuda
Moving model to device:  cuda
Moving model to device:  cuda
Moving model to device:  cuda
Moving model to device:  cuda
Moving model to device:  cuda
Moving model to device:  cuda


checkpoints:  77%|███████▋  | 58/75 [4:01:27<1:10:44, 249.66s/it]

⚠️  upload failed (not logged in?): name 'HF_REPO' is not defined


checkpoint_0011800.pth:   0%|          | 0.00/15.5M [00:00<?, ?B/s]

Moving model to device:  cuda
Moving model to device:  cuda
Moving model to device:  cuda
Moving model to device:  cuda
Moving model to device:  cuda
Moving model to device:  cuda
Moving model to device:  cuda
Moving model to device:  cuda
Moving model to device:  cuda
Moving model to device:  cuda
Moving model to device:  cuda
Moving model to device:  cuda
Moving model to device:  cuda
Moving model to device:  cuda
Moving model to device:  cuda
Moving model to device:  cuda
Moving model to device:  cuda
Moving model to device:  cuda
Moving model to device:  cuda
Moving model to device:  cuda
Moving model to device:  cuda
Moving model to device:  cuda


checkpoints:  79%|███████▊  | 59/75 [4:05:37<1:06:34, 249.64s/it]

⚠️  upload failed (not logged in?): name 'HF_REPO' is not defined


checkpoint_0012000.pth:   0%|          | 0.00/15.5M [00:00<?, ?B/s]

Moving model to device:  cuda
Moving model to device:  cuda
Moving model to device:  cuda
Moving model to device:  cuda
Moving model to device:  cuda
Moving model to device:  cuda
Moving model to device:  cuda
Moving model to device:  cuda
Moving model to device:  cuda
Moving model to device:  cuda
Moving model to device:  cuda
Moving model to device:  cuda
Moving model to device:  cuda
Moving model to device:  cuda
Moving model to device:  cuda
Moving model to device:  cuda
Moving model to device:  cuda
Moving model to device:  cuda


checkpoints:  79%|███████▊  | 59/75 [4:08:49<1:07:28, 253.04s/it]


KeyboardInterrupt: 

SyntaxError: invalid syntax (<ipython-input-25-f67f8e96d4cc>, line 1)

In [44]:
for step, file_in_repo in ckpts:
    # load & move to device…
    loss = ce_per_token(model, val_loader)
    loss_pert = ce_per_token(model, val_loader)  # perturb step can stay

    print(f"step={step:>4}, loss={loss:.3f}, Δ={loss_pert-loss:.3f}")


step= 500, loss=4.539, Δ=0.000
step=1000, loss=4.539, Δ=0.000
step=1500, loss=4.539, Δ=0.000
step=2000, loss=4.539, Δ=0.000
step=2500, loss=4.539, Δ=0.000


In [32]:
# Cell 7 — Save & inspect

df = pd.DataFrame(records).sort_values("step")
print(df)
df.to_csv("ckpt_loss_perturbation_scan.csv", index=False)
print("Saved → ckpt_loss_perturbation_scan.csv")


checkpoints:   0%|          | 0/5 [00:00<?, ?it/s]

Moving model to device:  cuda


checkpoints:   0%|          | 0/5 [00:01<?, ?it/s]

Moving model to device:  cuda
Moving model to device:  cuda





TypeError: llc_mean.<locals>.<lambda>() takes 1 positional argument but 2 were given