In [None]:
import os
os.environ["PYTORCH_DISABLE_DYNAMO"] = "1"
os.environ["TORCH_COMPILE_DISABLE"] = "1"

# Install PyTorch 2.2.2 with CUDA 11.8 (Colab default)
!pip install torch==2.2.2+cu118 --index-url https://download.pytorch.org/whl/cu118

# Install specific compatible versions of dependencies
!pip install -q git+https://github.com/snap-stanford/deepsnap.git
!pip install -q PyDrive
!pip install ogb
!pip install numpy==1.26.4
!pip install safetensors==0.4.3 # Install a specific version of safetensors

Looking in indexes: https://download.pytorch.org/whl/cu118
  Preparing metadata (setup.py) ... [?25l[?25hdone


In [None]:
import os

from google.colab import drive
drive.mount('/content/drive', force_remount=True)
# Define a directory for saving, or specify full paths
emb_filename = f"/content/drive/MyDrive/CS145/neurips/FINAL-CODE/artifacts/node_embeddings.pt"


Mounted at /content/drive


In [None]:
#!/usr/bin/env python3
# -*- coding: utf-8 -*-

"""
Contrastive link prediction on ogbl-ddi with precomputed negative sampling.
Colab-friendly; robust to Jupyter's extra CLI args. OGB evaluator compliant.

Usage (in Colab):
  !python train_ddi_mlp_contrastive_precomp.py \
      --emb /content/emb.pt --epochs 3 --batch_size 2048 --pool_size 1000
"""

# ---- IMPORTANT: disable Dynamo/compile BEFORE importing torch (Colab quirk) ----
import os
os.environ.setdefault("PYTORCH_DISABLE_DYNAMO", "1")
os.environ.setdefault("TORCH_COMPILE_DISABLE", "1")

import argparse
import json
import random
import re
import warnings
from pathlib import Path
from typing import Dict, Tuple, List

import numpy as np
import torch
import torch.nn as nn
import torch.nn.functional as F
from tqdm import tqdm
import math
from math import log, exp



# ---------- OGB import (install on the fly if needed) ----------
def ensure_ogb_installed():
    try:
        import ogb  # noqa: F401
    except Exception:
        import subprocess, sys as _sys
        print("[Info] Installing ogb ...")
        subprocess.check_call([_sys.executable, "-m", "pip", "install", "-q", "ogb"])
    from ogb.linkproppred import LinkPropPredDataset, Evaluator
    return LinkPropPredDataset, Evaluator

LinkPropPredDataset, Evaluator = ensure_ogb_installed()


# ---------------- Utils ----------------

def set_seed(seed: int):
    random.seed(seed); np.random.seed(seed)
    torch.manual_seed(seed); torch.cuda.manual_seed_all(seed)
    os.environ["PYTHONHASHSEED"] = str(seed)
    torch.backends.cudnn.deterministic = True
    torch.backends.cudnn.benchmark = False

warnings.filterwarnings(
    "ignore",
    category=FutureWarning,
    message=re.escape("You are using `torch.load` with `weights_only=False`"),
)

def edge_to_np_pairs(edge) -> Tuple[np.ndarray, np.ndarray]:
    """
    Accepts torch.Tensor [E,2] or numpy.ndarray [E,2].
    Returns u, v as np.int64 arrays.
    """
    if torch.is_tensor(edge):
        assert edge.dim() == 2 and edge.size(1) == 2
        e = edge.cpu().numpy().astype(np.int64)
    else:
        e = np.asarray(edge)
        assert e.ndim == 2 and e.shape[1] == 2
        e = e.astype(np.int64)
    return e[:, 0], e[:, 1]

def build_global_adj(split, num_nodes: int) -> List[set]:
    """Undirected adjacency from train+valid+test to forbid false negatives (for training sampler only)."""
    adj = [set() for _ in range(num_nodes)]
    for part in ("train", "valid", "test"):
        if part in split and "edge" in split[part]:
            u, v = edge_to_np_pairs(split[part]["edge"])
            for a, b in zip(u.tolist(), v.tolist()):
                adj[a].add(b); adj[b].add(a)
    return adj


def precompute_negative_pools(
    adj: List[set],
    num_nodes: int,
    u_nodes: np.ndarray,
    pool_size: int,
    rng: np.random.Generator,
) -> Dict[int, np.ndarray]:
    """
    For each anchor u in u_nodes, precompute a reservoir (<= pool_size) of negatives v
    sampled from V \ ({u} ∪ N(u)). Stored sparsely in a dict for speed/memory.
    """
    all_nodes = np.arange(num_nodes, dtype=np.int64)
    neg_pool: Dict[int, np.ndarray] = {}
    for u in tqdm(np.unique(u_nodes.astype(np.int64)), desc="Precomputing negative pools"):
        forbid = adj[int(u)].copy()
        forbid.add(int(u))
        mask = np.ones(num_nodes, dtype=bool)
        if len(forbid) > 0:
            mask[list(forbid)] = False
        candidates = all_nodes[mask]
        if candidates.size == 0:
            negs = np.empty((0,), dtype=np.int64)
        else:
            take = min(pool_size, candidates.size)
            idx = rng.choice(candidates.size, size=take, replace=False)
            negs = candidates[idx]
        neg_pool[int(u)] = negs.astype(np.int64, copy=False)
    return neg_pool


class RoundRobinPicker:
    """Rolling pointer per u; returns exactly k_neg per u (wraps/shuffles if needed)."""
    def __init__(self, neg_pool: Dict[int, np.ndarray]):
        self.neg_pool = neg_pool
        self.ptr = {u: 0 for u in neg_pool.keys()}

    def pick(self, u_batch: np.ndarray, k_neg: int, rng: np.random.Generator) -> np.ndarray:
        B = u_batch.shape[0]
        out = -np.ones((B, k_neg), dtype=np.int64)
        for i, u in enumerate(u_batch.astype(np.int64).tolist()):
            pool = self.neg_pool.get(u, None)
            if pool is None or pool.size == 0:
                continue  # stays -1
            p = self.ptr[u]
            if p + k_neg <= pool.size:
                sel = pool[p:p+k_neg]
                self.ptr[u] = p + k_neg
                if self.ptr[u] == pool.size:
                    rng.shuffle(pool)
                    self.ptr[u] = 0
            else:
                first = pool[p:]
                rest_need = k_neg - first.size
                rng.shuffle(pool)
                sel = np.concatenate([first, pool[:min(rest_need, pool.size)]])
                if sel.size < k_neg:  # extremely small pool → allow repeats
                    reps = int(np.ceil(k_neg / max(1, pool.size)))
                    sel = np.tile(pool, reps)[:k_neg]
                self.ptr[u] = rest_need % max(1, pool.size)
            out[i, :] = sel.astype(np.int64, copy=False)
        return out


# --------------- Model -----------------

# class Projector(nn.Module):
#     def __init__(self, in_dim: int, hidden: int = 512, proj: int = 256, dropout: float = 0.1):
#         super().__init__()
#         self.net = nn.Sequential(
#             nn.Linear(in_dim, hidden),
#             nn.ReLU(inplace=True),
#             nn.Dropout(dropout),
#             nn.Linear(hidden, proj),
#         )

#     def forward(self, x):
#         z = self.net(x)
#         return F.normalize(z, dim=-1)

class Projector(nn.Module):
    def __init__(self, in_dim: int, hidden: int = 1024, proj: int = 256, dropout: float = 0.1):
        super().__init__()
        self.fc1 = nn.Linear(in_dim, hidden)
        #self.act = nn.GELU()
        self.act = nn.ReLU(inplace=True)
        #self.bn1 = nn.BatchNorm1d(hidden)MAKES IT WORSE
        self.drop = nn.Dropout(dropout)
        self.fc2 = nn.Linear(hidden, proj)
        # self.ln2 = nn.LayerNorm(proj)

    def forward(self, x):
        h = self.fc1(x)
        h = self.act(h)
        #h = self.bn1(h) if h.dim() == 2 else h   # keep safe for non-2D
        h = self.drop(h)
        z = self.fc2(h)
        #z = self.ln2(z)
        return F.normalize(z, dim=-1)



class Projector_Three(nn.Module):
    def __init__(self, in_dim: int, hidden: int = 1024, proj: int = 256, dropout: float = 0.1):
        super().__init__()
        self.fc1 = nn.Linear(in_dim, hidden)
        self.act = nn.ReLU(inplace=True)
        self.drop = nn.Dropout(dropout)
        self.fc2 = nn.Linear(hidden, hidden)
        self.fc3 = nn.Linear(hidden, proj)     # final projection

    def forward(self, x):
        h = self.fc1(x)
        h = self.act(h)
        h = self.drop(h)
        h = self.fc2(h)
        h = self.act(h)
        h = self.drop(h)
        z = self.fc3(h)
        return F.normalize(z, dim=-1)




# --------------- OGB-compliant Eval ------------------

@torch.no_grad()
def eval_hits(model, emb, split, batch_size: int, device: torch.device):
    """
    OGB-compliant evaluation for ogbl-ddi.

    For both 'valid' and 'test':
      - y_pred_pos: 1-D (num_pos,)
      - y_pred_neg: 1-D (num_neg,) over a GLOBAL list of negative edges

    We honor whatever format 'edge_neg' provides by converting it to a global [Nneg,2] list:
      * [Nneg, 2]                    -> as is
      * [E, M, 2]                    -> reshape(-1,2)
      * [E, M] (tails only)          -> pair with heads from 'edge' and reshape(-1,2)
      * ragged object rows of tails  -> pair with heads per row and concat
    """
    evaluator = Evaluator(name="ogbl-ddi")

    # Project all nodes once (avoids repeated forward passes)
    Z = model(emb).detach().float().cpu().numpy()  # [N, P]

    def score_pairs_np(pairs_np: np.ndarray, chunk: int = 200_000) -> np.ndarray:
        if pairs_np.size == 0:
            return np.empty((0,), dtype=np.float32)
        u = pairs_np[:, 0].astype(np.int64, copy=False)
        v = pairs_np[:, 1].astype(np.int64, copy=False)
        out = np.empty(u.shape[0], dtype=np.float32)
        for i in range(0, u.shape[0], chunk):
            sl = slice(i, min(i + chunk, u.shape[0]))
            Zu = Z[u[sl]]  # [b, P]
            Zv = Z[v[sl]]  # [b, P]
            out[sl] = np.einsum("bp,bp->b", Zu, Zv, optimize=True).astype(np.float32)
        return out

    def build_split(part: str):
        # Positives → 1-D
        pos = split[part]["edge"]
        pos_np = pos.cpu().numpy().astype(np.int64) if torch.is_tensor(pos) else np.asarray(pos, dtype=np.int64)
        assert pos_np.ndim == 2 and pos_np.shape[1] == 2, "positive edges must be [E,2]"
        y_pred_pos = score_pairs_np(pos_np)  # (E,)

        # Negatives → convert to global [Nneg,2], then score → 1-D
        neg = split[part]["edge_neg"]
        neg_raw = neg.cpu().numpy() if torch.is_tensor(neg) else np.asarray(neg)

        if isinstance(neg_raw, np.ndarray) and neg_raw.ndim == 2 and neg_raw.shape[1] == 2:
            # Canonical DDI: global negative edge list
            neg_pairs = neg_raw.astype(np.int64, copy=False)

        elif isinstance(neg_raw, np.ndarray) and neg_raw.ndim == 3 and neg_raw.shape[-1] == 2:
            # Per-positive pairs -> flatten to global list
            neg_pairs = neg_raw.reshape(-1, 2).astype(np.int64, copy=False)

        elif isinstance(neg_raw, np.ndarray) and neg_raw.ndim == 2:
            # [E, M] tails-only: pair with corresponding heads, then flatten
            E, M = neg_raw.shape
            H = pos_np[:E, 0].astype(np.int64, copy=False)
            heads = np.repeat(H, repeats=M)
            tails = neg_raw.reshape(-1).astype(np.int64, copy=False)
            neg_pairs = np.stack([heads, tails], axis=1)

        elif isinstance(neg_raw, np.ndarray) and (neg_raw.dtype == object or neg_raw.ndim == 1):
            # Ragged rows of tails -> pair per row with head, then concat
            rows = list(neg_raw)
            pairs_list = []
            for i, r in enumerate(rows):
                if r is None:
                    continue
                r = np.asarray(r, dtype=np.int64)
                if r.size:
                    heads = np.full(r.size, pos_np[i, 0], dtype=np.int64)
                    pairs_list.append(np.stack([heads, r], axis=1))
            neg_pairs = np.concatenate(pairs_list, axis=0) if pairs_list else np.empty((0, 2), dtype=np.int64)

        else:
            # Unknown format
            neg_pairs = np.empty((0, 2), dtype=np.int64)

        y_pred_neg = score_pairs_np(neg_pairs)  # (Nneg,)
        return y_pred_pos.astype(np.float32), y_pred_neg.astype(np.float32)

    # Build splits and call the official evaluator (1-D negatives)
    ypp_v, ypn_v = build_split("valid")
    ypp_t, ypn_t = build_split("test")

    # Guard rails: enforce pure 1-D arrays
    ypp_v = np.asarray(ypp_v, dtype=np.float32).reshape(-1)
    ypn_v = np.asarray(ypn_v, dtype=np.float32).reshape(-1)
    ypp_t = np.asarray(ypp_t, dtype=np.float32).reshape(-1)
    ypn_t = np.asarray(ypn_t, dtype=np.float32).reshape(-1)

    val_res  = evaluator.eval({"y_pred_pos": ypp_v, "y_pred_neg": ypn_v})
    test_res = evaluator.eval({"y_pred_pos": ypp_t, "y_pred_neg": ypn_t})
    return val_res, test_res


# --------------- Train -----------------

# def build_parser():
#     ap = argparse.ArgumentParser()
#     ap.add_argument("--emb", type=str, default=emb_filename)
#     ap.add_argument("--dataset_root", type=str, default="./ogb_data")
#     ap.add_argument("--out_dir", type=str, default="./ddi_mlp_contrastive_precomp")
#     ap.add_argument("--epochs", type=int, default=100)
#     ap.add_argument("--batch_size", type=int, default=4096)
#     ap.add_argument("--k_neg", type=int, default=100)
#     ap.add_argument("--pool_size", type=int, default=2000, help="Neg reservoir size per u")
#     ap.add_argument("--hidden", type=int, default=512)
#     ap.add_argument("--proj", type=int, default=256)
#     ap.add_argument("--dropout", type=float, default=0.1)
#     ap.add_argument("--tau", type=float, default=0.07)
#     ap.add_argument("--symmetric", type=int, default=0, help="0=one-sided InfoNCE, 1=also v->u")
#     ap.add_argument("--lr", type=float, default=1e-3)
#     ap.add_argument("--weight_decay", type=float, default=1e-4)
#     ap.add_argument("--device", type=str, default="cuda" if torch.cuda.is_available() else "cpu")
#     ap.add_argument("--seed", type=int, default=42)
#     return ap

# tau = 0.7, lr = 5e-3, k_neg = 400, pool_size = 4000 -> best epoch at 5 (hits@20=0.4705) without eta_min in cosine scheduler
# tau = 0.7, lr = 1e-4, k_neg = 400, pool_size = 4000 -> best epoch at 16 (hits@20=0.5148) with eta_min=args.lr * 0.05 in cosine scheduler
def build_parser():
    ap = argparse.ArgumentParser()
    ap.add_argument("--emb", type=str, default=emb_filename)
    ap.add_argument("--dataset_root", type=str, default="./ogb_data")
    ap.add_argument("--out_dir", type=str, default="/content/drive/MyDrive/CS145/neurips/FINAL-CODE/results/main_ablations/semantic_only")

    ap.add_argument("--epochs", type=int, default=500)
    ap.add_argument("--batch_size", type=int, default=4096)
    ap.add_argument("--k_neg", type=int, default=200)
    # ap.add_argument("--pool_size", type=int, default=4500, help="Neg reservoir size per u")
    ap.add_argument("--pool_size", type=int, default=3000, help="Neg reservoir size per u")
    ap.add_argument("--hidden", type=int, default=512)
    ap.add_argument("--proj", type=int, default=256)
    ap.add_argument("--dropout", type=float, default=0.05)
    ap.add_argument("--tau", type=float, default=0.07)
    ap.add_argument("--symmetric", type=int, default=1, help="0=one-sided InfoNCE, 1=also v->u")
    #ap.add_argument("--lr", type=float, default=5e-3)
    ap.add_argument("--lr", type=float, default=1e-4)
    ap.add_argument("--weight_decay", type=float, default=5e-4)
    ap.add_argument("--device", type=str, default="cuda" if torch.cuda.is_available() else "cpu")
    ap.add_argument("--seed", type=int, default=42)
    return ap

def main(args):
    num_layers = 3
    set_seed(args.seed)
    device = torch.device(args.device)
    out_dir = Path(args.out_dir); out_dir.mkdir(parents=True, exist_ok=True)

    # Data
    dataset = LinkPropPredDataset(name="ogbl-ddi", root=args.dataset_root)
    split = dataset.get_edge_split()

    train_edges = split["train"]["edge"]  # [E,2]
    u_train_np, v_train_np = edge_to_np_pairs(train_edges)

    # Embeddings
    emb = torch.load(args.emb, map_location="cpu")
    if not torch.is_tensor(emb):
        emb = torch.tensor(emb)
    emb = emb.float().to(device)
    num_nodes, in_dim = emb.shape

    # Adjacency & negative pools (for training sampler only)
    adj = build_global_adj(split, num_nodes)
    rng = np.random.default_rng(args.seed)
    neg_pool = precompute_negative_pools(adj, num_nodes, u_train_np, pool_size=args.pool_size, rng=rng)

    #============================================
    #print negative pool stats
    sizes = np.array([len(v) for v in neg_pool.values()])

    print(f"#anchors: {len(sizes)}")
    print(f"min={sizes.min()}  max={sizes.max()}  median={np.median(sizes):.1f}  mean={sizes.mean():.1f}")
    print("p05/25/50/75/95 =", np.percentile(sizes, [5,25,50,75,95]).astype(int))

    # how many anchors have empty pools or too few for k_neg?
    k = args.k_neg
    num_empty = (sizes == 0).sum()
    lt_k = (sizes < k).sum()
    print(f"empty pools: {num_empty}  (<k_neg: {lt_k})  (k_neg={k}, pool_size={args.pool_size})")
    #=============================================

    picker = RoundRobinPicker(neg_pool)

    # Model/opt
    if num_layers == 2:
        model = Projector(in_dim=in_dim, hidden=args.hidden, proj=args.proj, dropout=args.dropout).to(device)
    else:
        model = Projector_Three(in_dim=in_dim, hidden=args.hidden, proj=args.proj, dropout=args.dropout).to(device)

    # Replace fixed temperature with learnable
    ##################
    #opt = torch.optim.AdamW(model.parameters(), lr=args.lr, weight_decay=args.weight_decay)
    ##opt = torch.optim.AdamW(model.parameters(), lr=args.lr)
    # Learnable temperature (stored as log(1/tau))
    logit_scale = torch.nn.Parameter(torch.tensor(math.log(1/args.tau), dtype=torch.float32, device=device))

    # Include it in the optimizer without weight decay
    opt = torch.optim.AdamW(
                  [{"params": model.parameters(), "weight_decay": args.weight_decay},
                  {"params": [logit_scale],      "weight_decay": 0.0}],
                  lr=args.lr,
              )

    ###################

    # Uniquify in case any tensor appears in multiple param groups
    params_in_opt = {p for g in opt.param_groups for p in g['params']}
    total_opt = sum(p.numel() for p in params_in_opt)
    trainable_opt = sum(p.numel() for p in params_in_opt if p.requires_grad)
    print(f"Params in optimizer: {total_opt:,}  ({total_opt/1e6:.2f}M), "
      f"trainable: {trainable_opt:,}  ({trainable_opt/1e6:.2f}M)")





    warmup_epochs = 10
    scheduler = torch.optim.lr_scheduler.CosineAnnealingLR(opt, T_max=max(1, args.epochs - warmup_epochs), eta_min=args.lr * 0.05)


    order = np.arange(u_train_np.shape[0], dtype=np.int64)
    best_val = float("-inf")
    best_path = out_dir / "model_best.pt"

    for epoch in range(1, args.epochs + 1):
        np.random.shuffle(order)
        model.train()
        running = 0.0; steps = 0

        for i in tqdm(range(0, order.size, args.batch_size), desc=f"Epoch {epoch}/{args.epochs}"):
            idx = order[i:i+args.batch_size]
            u_batch = u_train_np[idx]
            v_pos   = v_train_np[idx]

            def pick_hard_negatives(u_batch_np, pool_dict, emb, k, device):
              # emb is the (possibly trainable) node embedding tensor on device
              U = torch.from_numpy(u_batch_np).long().to(device)
              eu = emb[U]                      # [B, D]
              out = np.empty((U.size(0), k), dtype=np.int64)
              for i, u in enumerate(U.tolist()):
                  pool = pool_dict.get(u, None)
                  if pool is None or pool.size == 0:
                      out[i].fill(-1)
                      continue
                  P = torch.from_numpy(pool).long().to(device)
                  ev = emb[P]                  # [|pool|, D]
                  # score by current projector (hardness under model; best)
                  with torch.no_grad():
                      zu = model(eu[i:i+1])            # [1, P]
                      zv = model(ev)                    # [|pool|, P]
                      s = (zu @ zv.T).squeeze(0)       # [|pool|]
                      topk = torch.topk(s, k=min(k, s.numel()), largest=True).indices
                  sel = P[topk].detach().cpu().numpy()
                  # pad if pool smaller than k
                  if sel.size < k:
                      sel = np.pad(sel, (0, k - sel.size), constant_values=-1)
                  out[i] = sel
              return out

            # use it
            #v_neg = pick_hard_negatives(u_batch, neg_pool, emb, args.k_neg, device)

            v_neg = picker.pick(u_batch, args.k_neg, rng)   # [B, k_neg]

            keep = (v_neg[:, 0] != -1)
            if not np.any(keep):
                continue
            u_batch = u_batch[keep]; v_pos = v_pos[keep]; v_neg = v_neg[keep]

            B = u_batch.shape[0]

            u_t    = torch.from_numpy(u_batch.astype(np.int64)).long().to(device)
            vpos_t = torch.from_numpy(v_pos.astype(np.int64)).long().to(device)
            vneg_t = torch.from_numpy(v_neg.reshape(-1).astype(np.int64)).long().to(device)

            eu = emb[u_t]                               # [B, D]
            ev_pos = emb[vpos_t]                        # [B, D]
            ev_neg = emb[vneg_t].reshape(B, args.k_neg, -1)  # [B, k, D]

            zu = model(eu)                               # [B, P]
            zv_pos = model(ev_pos)                       # [B, P]
            zv_neg = model(ev_neg.reshape(-1, ev_neg.shape[-1])).reshape(B, args.k_neg, -1)

            pos = torch.sum(zu * zv_pos, dim=-1, keepdim=True)     # [B,1]
            neg = torch.einsum("bd,bkd->bk", zu, zv_neg)           # [B,k]



            # Replace fixed temperature with learnable
            ################
            #logits = torch.cat([pos, neg], dim=1) / args.tau
            scale = logit_scale.clamp(min=math.log(1/100), max=math.log(100)).exp()
            if i == 0:
                print(f"  temp ≈ {1/scale.item():.4f} (scale={scale.item():.2f})")
            logits = scale * torch.cat([pos, neg], dim=1)
            ################



            labels = torch.zeros(B, dtype=torch.long, device=device)

            loss = F.cross_entropy(logits, labels)

            if args.symmetric:
                if not hasattr(picker, "neg_pool_v"):
                    picker.neg_pool_v = precompute_negative_pools(adj, num_nodes, v_train_np, args.pool_size, rng)
                    picker.picker_v = RoundRobinPicker(picker.neg_pool_v)
                u_neg = picker.picker_v.pick(v_pos, args.k_neg, rng)  # [B,k]
                keep2 = (u_neg[:, 0] != -1)
                if np.any(keep2):
                    u2 = u_batch[keep2]
                    v2 = v_pos[keep2]
                    u_neg2 = u_neg[keep2]

                    v2_t   = torch.from_numpy(v2.astype(np.int64)).long().to(device)
                    upos_t = torch.from_numpy(u2.astype(np.int64)).long().to(device)
                    uneg_t = torch.from_numpy(u_neg2.reshape(-1).astype(np.int64)).long().to(device)

                    ev_anchor = emb[v2_t]
                    eu_pos2   = emb[upos_t]
                    eu_neg2   = emb[uneg_t].reshape(u2.shape[0], args.k_neg, -1)

                    zv = model(ev_anchor)
                    zu_pos2 = model(eu_pos2)
                    zu_neg2 = model(eu_neg2.reshape(-1, eu_neg2.shape[-1])).reshape(u2.shape[0], args.k_neg, -1)

                    pos2 = torch.sum(zv * zu_pos2, dim=-1, keepdim=True)
                    neg2 = torch.einsum("bd,bkd->bk", zv, zu_neg2)

                    # Replace fixed temperature with learnable
                    ################
                    #logits2 = torch.cat([pos2, neg2], dim=1) / args.tau
                    logits2 = scale * torch.cat([pos2, neg2], dim=1)
                    ################

                    labels2 = torch.zeros(u2.shape[0], dtype=torch.long, device=device)
                    loss = 0.5 * (loss + F.cross_entropy(logits2, labels2))

            opt.zero_grad(set_to_none=True)
            loss.backward()
            opt.step()

            running += float(loss.item()); steps += 1

        # --- warmup ---
        if epoch <= warmup_epochs:
            for g in opt.param_groups:
                g['lr'] = args.lr * epoch / warmup_epochs
        else:
            scheduler.step()

        # (optional) log current LR
        # if epoch == 1 or epoch % 5 == 0:
        #     print(f"lr={opt.param_groups[0]['lr']:.3e}")
        avg_loss = running / max(1, steps)

        # Eval
        model.eval()
        val_res, test_res = eval_hits(model, emb, split, batch_size=32768, device=device)
        val_hits  = {k: v for k, v in val_res.items()  if k.startswith("hits@")}
        test_hits = {k: v for k, v in test_res.items() if k.startswith("hits@")}
        primary = "hits@20" if "hits@20" in val_hits else f"hits@{sorted(int(k.split('@')[1]) for k in val_hits)[0]}"
        primary_test = "hits@20" if "hits@20" in test_hits else f"hits@{sorted(int(k.split('@')[1]) for k in test_hits)[0]}"

        def fmt(d):
            ks = sorted(d.keys(), key=lambda x: int(x.split('@')[1]))
            return " ".join(f"{k}={d[k]:.4f}" for k in ks)

        #print(f"[Epoch {epoch}] loss={avg_loss:.4f} | val: {fmt(val_hits)} | test: {fmt(test_hits)}")

        if float(val_hits.get(primary, -1.0)) > best_val:
            best_val = float(val_hits[primary])
            best_test = test_hits[primary_test]
            # torch.save({"model": model.state_dict(),
            #             "config": vars(args),
            #             "val": val_res, "test": test_res,
            #             "primary": primary}, best_path)
            torch.save({"epoch": epoch,
                            "best_val": best_val,
                            "best_test": best_test,
                            "model": model.state_dict(),
                            "opt": opt.state_dict(),
                            "scheduler": scheduler.state_dict(),
                            "logit_scale": float(logit_scale.detach().cpu()),   # keep learned τ
                            "config": vars(args),
                            "val": val_res,
                            "test": test_res,
                            "rng": {
                                    "py": random.getstate(),
                                    "np": np.random.get_state(),
                                    "torch": torch.get_rng_state(),
                                    "cuda": torch.cuda.get_rng_state_all() if torch.cuda.is_available() else None,
                                  },
                            "primary": primary},
                            best_path)
            print(f"Epoch {epoch}: Best Val {best_val}, Best Test {best_test}")



    with torch.no_grad():
        raw_logit_scale = float(logit_scale.detach().cpu().item())
        raw_scale = float(math.exp(raw_logit_scale))
        raw_tau   = float(math.exp(-raw_logit_scale))

    final_lr = float(opt.param_groups[0]["lr"])
    final_metrics = {
        "config": vars(args),
        "primary": primary,            # which metric you tracked (e.g., hits@20)
        "best_val": best_val,          # best validation score seen
        "best_test": best_test,        # best test score seen
        "final": {                     # end-of-run snapshot
            "epoch": args.epochs,
            "tau_raw": raw_tau,
            "scale_raw": raw_scale,
            "lr": final_lr,
        },
    }
    with open(out_dir / "metrics.json", "w") as f:
        json.dump(final_metrics, f, indent=2)
    print(json.dumps(final_metrics, indent=2))
    print(f"Best Val {best_val}, Best Test {best_test} Done.")



if __name__ == "__main__":
    parser = build_parser()
    # Use parse_known_args so Colab's hidden "-f ..." doesn't crash argparse
    args, _ = parser.parse_known_args()
    main(args)


  sampled from V \ ({u} ∪ N(u)). Stored sparsely in a dict for speed/memory.


Downloading http://snap.stanford.edu/ogb/data/linkproppred/ddi.zip


Downloaded 0.04 GB: 100%|██████████| 46/46 [00:01<00:00, 24.70it/s]


Extracting ./ogb_data/ddi.zip
Loading necessary files...
This might take a while.
Processing graphs...


100%|██████████| 1/1 [00:00<00:00, 54.81it/s]

Saving...



Precomputing negative pools: 100%|██████████| 4107/4107 [00:00<00:00, 4256.89it/s]


#anchors: 4107
min=1789  max=3000  median=3000.0  mean=2956.7
p05/25/50/75/95 = [2652 3000 3000 3000 3000]
empty pools: 0  (<k_neg: 0)  (k_neg=200, pool_size=3000)
Params in optimizer: 787,713  (0.79M), trainable: 787,713  (0.79M)


Epoch 1/500:   0%|          | 0/261 [00:00<?, ?it/s]

  temp ≈ 0.0700 (scale=14.29)



Precomputing negative pools:   0%|          | 0/4124 [00:00<?, ?it/s][A
Precomputing negative pools:  10%|▉         | 396/4124 [00:00<00:00, 3959.28it/s][A
Precomputing negative pools:  19%|█▉        | 792/4124 [00:00<00:00, 3729.77it/s][A
Precomputing negative pools:  28%|██▊       | 1166/4124 [00:00<00:00, 3549.38it/s][A
Precomputing negative pools:  39%|███▉      | 1603/4124 [00:00<00:00, 3856.02it/s][A
Precomputing negative pools:  50%|█████     | 2066/4124 [00:00<00:00, 4125.27it/s][A
Precomputing negative pools:  60%|██████    | 2481/4124 [00:00<00:00, 4042.74it/s][A
Precomputing negative pools:  72%|███████▏  | 2971/4124 [00:00<00:00, 4314.08it/s][A
Precomputing negative pools:  84%|████████▍ | 3465/4124 [00:00<00:00, 4506.98it/s][A
Precomputing negative pools: 100%|██████████| 4124/4124 [00:00<00:00, 4258.71it/s]
Epoch 1/500: 100%|██████████| 261/261 [02:07<00:00,  2.05it/s]


Epoch 1: Best Val 0.393762781952071, Best Test 0.34439541834907744


Epoch 2/500:   0%|          | 0/261 [00:00<?, ?it/s]

  temp ≈ 0.0680 (scale=14.71)


Epoch 2/500: 100%|██████████| 261/261 [02:06<00:00,  2.06it/s]


Epoch 2: Best Val 0.4323052835814187, Best Test 0.3644869614724809


Epoch 3/500:   0%|          | 0/261 [00:00<?, ?it/s]

  temp ≈ 0.0677 (scale=14.76)


Epoch 3/500: 100%|██████████| 261/261 [02:06<00:00,  2.06it/s]


Epoch 3: Best Val 0.47113994411524546, Best Test 0.409037448778551


Epoch 4/500:   0%|          | 0/261 [00:00<?, ?it/s]

  temp ≈ 0.0673 (scale=14.86)


Epoch 4/500: 100%|██████████| 261/261 [02:06<00:00,  2.06it/s]


Epoch 4: Best Val 0.4728704237802366, Best Test 0.4466135786469297


Epoch 5/500:   0%|          | 0/261 [00:00<?, ?it/s]

  temp ≈ 0.0667 (scale=15.00)


Epoch 5/500: 100%|██████████| 261/261 [02:06<00:00,  2.06it/s]


Epoch 5: Best Val 0.48607750451348053, Best Test 0.4734472503352336


Epoch 6/500:   0%|          | 0/261 [00:00<?, ?it/s]

  temp ≈ 0.0658 (scale=15.19)


Epoch 6/500: 100%|██████████| 261/261 [02:06<00:00,  2.06it/s]


Epoch 6: Best Val 0.5029178434178098, Best Test 0.4968873839792043


Epoch 7/500:   0%|          | 0/261 [00:00<?, ?it/s]

  temp ≈ 0.0648 (scale=15.44)


Epoch 7/500: 100%|██████████| 261/261 [02:06<00:00,  2.06it/s]
Epoch 8/500:   0%|          | 0/261 [00:00<?, ?it/s]

  temp ≈ 0.0635 (scale=15.74)


Epoch 8/500: 100%|██████████| 261/261 [02:06<00:00,  2.06it/s]
Epoch 9/500:   0%|          | 0/261 [00:00<?, ?it/s]

  temp ≈ 0.0621 (scale=16.09)


Epoch 9/500: 100%|██████████| 261/261 [02:06<00:00,  2.06it/s]
Epoch 10/500:   0%|          | 0/261 [00:00<?, ?it/s]

  temp ≈ 0.0606 (scale=16.49)


Epoch 10/500: 100%|██████████| 261/261 [02:06<00:00,  2.06it/s]


Epoch 10: Best Val 0.5154357287866416, Best Test 0.5528021035441123


Epoch 11/500:   0%|          | 0/261 [00:00<?, ?it/s]

  temp ≈ 0.0590 (scale=16.95)


Epoch 11/500: 100%|██████████| 261/261 [02:06<00:00,  2.06it/s]
Epoch 12/500:   0%|          | 0/261 [00:00<?, ?it/s]

  temp ≈ 0.0573 (scale=17.46)


Epoch 12/500: 100%|██████████| 261/261 [02:06<00:00,  2.06it/s]


Epoch 12: Best Val 0.5183198615616268, Best Test 0.5520155218782072


Epoch 13/500:   0%|          | 0/261 [00:00<?, ?it/s]

  temp ≈ 0.0556 (scale=17.98)


Epoch 13/500: 100%|██████████| 261/261 [02:06<00:00,  2.06it/s]


Epoch 13: Best Val 0.5484347024848489, Best Test 0.5331075968806418


Epoch 14/500:   0%|          | 0/261 [00:00<?, ?it/s]

  temp ≈ 0.0541 (scale=18.49)


Epoch 14/500: 100%|██████████| 261/261 [02:06<00:00,  2.06it/s]
Epoch 15/500:   0%|          | 0/261 [00:00<?, ?it/s]

  temp ≈ 0.0526 (scale=19.01)


Epoch 15/500: 100%|██████████| 261/261 [02:06<00:00,  2.06it/s]
Epoch 16/500:   0%|          | 0/261 [00:00<?, ?it/s]

  temp ≈ 0.0512 (scale=19.53)


Epoch 16/500: 100%|██████████| 261/261 [02:06<00:00,  2.07it/s]


Epoch 16: Best Val 0.5624208736300369, Best Test 0.5900785832540509


Epoch 17/500:   0%|          | 0/261 [00:00<?, ?it/s]

  temp ≈ 0.0499 (scale=20.06)


Epoch 17/500: 100%|██████████| 261/261 [02:06<00:00,  2.06it/s]
Epoch 18/500:   0%|          | 0/261 [00:00<?, ?it/s]

  temp ≈ 0.0486 (scale=20.59)


Epoch 18/500: 100%|██████████| 261/261 [02:06<00:00,  2.06it/s]


Epoch 18: Best Val 0.5751110578399719, Best Test 0.6030159788446988


Epoch 19/500:   0%|          | 0/261 [00:00<?, ?it/s]

  temp ≈ 0.0473 (scale=21.12)


Epoch 19/500: 100%|██████████| 261/261 [02:06<00:00,  2.06it/s]


Epoch 19: Best Val 0.6122227299627685, Best Test 0.6382023986995183


Epoch 20/500:   0%|          | 0/261 [00:00<?, ?it/s]

  temp ≈ 0.0462 (scale=21.66)


Epoch 20/500: 100%|██████████| 261/261 [02:06<00:00,  2.06it/s]
Epoch 21/500:   0%|          | 0/261 [00:00<?, ?it/s]

  temp ≈ 0.0450 (scale=22.21)


Epoch 21/500: 100%|██████████| 261/261 [02:06<00:00,  2.06it/s]
Epoch 22/500:   0%|          | 0/261 [00:00<?, ?it/s]

  temp ≈ 0.0439 (scale=22.77)


Epoch 22/500: 100%|██████████| 261/261 [02:06<00:00,  2.06it/s]


Epoch 22: Best Val 0.6151293364996366, Best Test 0.6373783607638083


Epoch 23/500:   0%|          | 0/261 [00:00<?, ?it/s]

  temp ≈ 0.0429 (scale=23.33)


Epoch 23/500: 100%|██████████| 261/261 [02:06<00:00,  2.06it/s]
Epoch 24/500:   0%|          | 0/261 [00:00<?, ?it/s]

  temp ≈ 0.0418 (scale=23.90)


Epoch 24/500: 100%|██████████| 261/261 [02:06<00:00,  2.06it/s]


Epoch 24: Best Val 0.620777741986231, Best Test 0.6389290503337354


Epoch 25/500:   0%|          | 0/261 [00:00<?, ?it/s]

  temp ≈ 0.0408 (scale=24.49)


Epoch 25/500: 100%|██████████| 261/261 [02:06<00:00,  2.06it/s]


Epoch 25: Best Val 0.6365318490662152, Best Test 0.6471169909131089


Epoch 26/500:   0%|          | 0/261 [00:00<?, ?it/s]

  temp ≈ 0.0399 (scale=25.08)


Epoch 26/500: 100%|██████████| 261/261 [02:06<00:00,  2.06it/s]


Epoch 26: Best Val 0.6428694499172216, Best Test 0.6567207784911117


Epoch 27/500:   0%|          | 0/261 [00:00<?, ?it/s]

  temp ≈ 0.0389 (scale=25.68)


Epoch 27/500: 100%|██████████| 261/261 [02:06<00:00,  2.06it/s]
Epoch 28/500:   0%|          | 0/261 [00:00<?, ?it/s]

  temp ≈ 0.0380 (scale=26.30)


Epoch 28/500: 100%|██████████| 261/261 [02:06<00:00,  2.06it/s]


Epoch 28: Best Val 0.6998928750683577, Best Test 0.6966042145794784


Epoch 29/500:   0%|          | 0/261 [00:00<?, ?it/s]

  temp ≈ 0.0371 (scale=26.92)


Epoch 29/500: 100%|██████████| 261/261 [02:06<00:00,  2.06it/s]
Epoch 30/500:   0%|          | 0/261 [00:00<?, ?it/s]

  temp ≈ 0.0363 (scale=27.56)


Epoch 30/500: 100%|██████████| 261/261 [02:06<00:00,  2.06it/s]
Epoch 31/500:   0%|          | 0/261 [00:00<?, ?it/s]

  temp ≈ 0.0355 (scale=28.21)


Epoch 31/500: 100%|██████████| 261/261 [02:06<00:00,  2.06it/s]
Epoch 32/500:   0%|          | 0/261 [00:00<?, ?it/s]

  temp ≈ 0.0346 (scale=28.87)


Epoch 32/500: 100%|██████████| 261/261 [02:06<00:00,  2.06it/s]
Epoch 33/500:   0%|          | 0/261 [00:00<?, ?it/s]

  temp ≈ 0.0338 (scale=29.54)


Epoch 33/500: 100%|██████████| 261/261 [02:06<00:00,  2.06it/s]


Epoch 33: Best Val 0.7129426394684206, Best Test 0.7295207844841148


Epoch 34/500:   0%|          | 0/261 [00:00<?, ?it/s]

  temp ≈ 0.0331 (scale=30.23)


Epoch 34/500: 100%|██████████| 261/261 [02:06<00:00,  2.06it/s]


Epoch 34: Best Val 0.718321359812419, Best Test 0.7068522499981272


Epoch 35/500:   0%|          | 0/261 [00:00<?, ?it/s]

  temp ≈ 0.0323 (scale=30.93)


Epoch 35/500: 100%|██████████| 261/261 [02:06<00:00,  2.06it/s]


Epoch 35: Best Val 0.7278801998666556, Best Test 0.7219920742533092


Epoch 36/500:   0%|          | 0/261 [00:00<?, ?it/s]

  temp ≈ 0.0316 (scale=31.64)


Epoch 36/500: 100%|██████████| 261/261 [02:06<00:00,  2.06it/s]
Epoch 37/500:   0%|          | 0/261 [00:00<?, ?it/s]

  temp ≈ 0.0309 (scale=32.36)


Epoch 37/500: 100%|██████████| 261/261 [02:06<00:00,  2.06it/s]


Epoch 37: Best Val 0.7315658968154679, Best Test 0.7148379267205538


Epoch 38/500:   0%|          | 0/261 [00:00<?, ?it/s]

  temp ≈ 0.0302 (scale=33.10)


Epoch 38/500: 100%|██████████| 261/261 [02:06<00:00,  2.06it/s]


Epoch 38: Best Val 0.7387200443482235, Best Test 0.7149128392601638


Epoch 39/500:   0%|          | 0/261 [00:00<?, ?it/s]

  temp ≈ 0.0295 (scale=33.85)


Epoch 39/500: 100%|██████████| 261/261 [02:06<00:00,  2.06it/s]
Epoch 40/500:   0%|          | 0/261 [00:00<?, ?it/s]

  temp ≈ 0.0289 (scale=34.62)


Epoch 40/500: 100%|██████████| 261/261 [02:06<00:00,  2.06it/s]
Epoch 41/500:   0%|          | 0/261 [00:00<?, ?it/s]

  temp ≈ 0.0283 (scale=35.40)


Epoch 41/500: 100%|██████████| 261/261 [02:06<00:00,  2.06it/s]


Epoch 41: Best Val 0.7408475604731476, Best Test 0.7639730614507563


Epoch 42/500:   0%|          | 0/261 [00:00<?, ?it/s]

  temp ≈ 0.0276 (scale=36.19)


Epoch 42/500: 100%|██████████| 261/261 [02:06<00:00,  2.06it/s]


Epoch 42: Best Val 0.7658009274172404, Best Test 0.7548786791421016


Epoch 43/500:   0%|          | 0/261 [00:00<?, ?it/s]

  temp ≈ 0.0270 (scale=37.00)


Epoch 43/500: 100%|██████████| 261/261 [02:06<00:00,  2.06it/s]
Epoch 44/500:   0%|          | 0/261 [00:00<?, ?it/s]

  temp ≈ 0.0264 (scale=37.82)


Epoch 44/500: 100%|██████████| 261/261 [02:06<00:00,  2.06it/s]
Epoch 45/500:   0%|          | 0/261 [00:00<?, ?it/s]

  temp ≈ 0.0259 (scale=38.66)


Epoch 45/500: 100%|██████████| 261/261 [02:06<00:00,  2.06it/s]


Epoch 45: Best Val 0.7785210766430193, Best Test 0.7527137067473725


Epoch 46/500:   0%|          | 0/261 [00:00<?, ?it/s]

  temp ≈ 0.0253 (scale=39.51)


Epoch 46/500: 100%|██████████| 261/261 [02:06<00:00,  2.07it/s]
Epoch 47/500:   0%|          | 0/261 [00:00<?, ?it/s]

  temp ≈ 0.0248 (scale=40.37)


Epoch 47/500: 100%|██████████| 261/261 [02:06<00:00,  2.06it/s]


Epoch 47: Best Val 0.7864992621114848, Best Test 0.7773149847552981


Epoch 48/500:   0%|          | 0/261 [00:00<?, ?it/s]

  temp ≈ 0.0242 (scale=41.26)


Epoch 48/500: 100%|██████████| 261/261 [02:06<00:00,  2.07it/s]
Epoch 49/500:   0%|          | 0/261 [00:00<?, ?it/s]

  temp ≈ 0.0237 (scale=42.15)


Epoch 49/500: 100%|██████████| 261/261 [02:06<00:00,  2.06it/s]
Epoch 50/500:   0%|          | 0/261 [00:00<?, ?it/s]

  temp ≈ 0.0232 (scale=43.06)


Epoch 50/500: 100%|██████████| 261/261 [02:06<00:00,  2.06it/s]
Epoch 51/500:   0%|          | 0/261 [00:00<?, ?it/s]

  temp ≈ 0.0227 (scale=43.99)


Epoch 51/500: 100%|██████████| 261/261 [02:06<00:00,  2.06it/s]
Epoch 52/500:   0%|          | 0/261 [00:00<?, ?it/s]

  temp ≈ 0.0223 (scale=44.93)


Epoch 52/500: 100%|██████████| 261/261 [02:06<00:00,  2.06it/s]
Epoch 53/500:   0%|          | 0/261 [00:00<?, ?it/s]

  temp ≈ 0.0218 (scale=45.88)


Epoch 53/500: 100%|██████████| 261/261 [02:06<00:00,  2.06it/s]
Epoch 54/500:   0%|          | 0/261 [00:00<?, ?it/s]

  temp ≈ 0.0213 (scale=46.85)


Epoch 54/500: 100%|██████████| 261/261 [02:06<00:00,  2.06it/s]
Epoch 55/500:   0%|          | 0/261 [00:00<?, ?it/s]

  temp ≈ 0.0209 (scale=47.84)


Epoch 55/500: 100%|██████████| 261/261 [02:06<00:00,  2.06it/s]
Epoch 56/500:   0%|          | 0/261 [00:00<?, ?it/s]

  temp ≈ 0.0205 (scale=48.84)


Epoch 56/500: 100%|██████████| 261/261 [02:06<00:00,  2.06it/s]


Epoch 56: Best Val 0.7979234244020107, Best Test 0.8191011993497591


Epoch 57/500:   0%|          | 0/261 [00:00<?, ?it/s]

  temp ≈ 0.0201 (scale=49.86)


Epoch 57/500: 100%|██████████| 261/261 [02:06<00:00,  2.06it/s]
Epoch 58/500:   0%|          | 0/261 [00:00<?, ?it/s]

  temp ≈ 0.0197 (scale=50.89)


Epoch 58/500: 100%|██████████| 261/261 [02:06<00:00,  2.06it/s]
Epoch 59/500:   0%|          | 0/261 [00:00<?, ?it/s]

  temp ≈ 0.0193 (scale=51.93)


Epoch 59/500: 100%|██████████| 261/261 [02:06<00:00,  2.06it/s]
Epoch 60/500:   0%|          | 0/261 [00:00<?, ?it/s]

  temp ≈ 0.0189 (scale=52.99)


Epoch 60/500: 100%|██████████| 261/261 [02:06<00:00,  2.06it/s]
Epoch 61/500:   0%|          | 0/261 [00:00<?, ?it/s]

  temp ≈ 0.0185 (scale=54.06)


Epoch 61/500: 100%|██████████| 261/261 [02:06<00:00,  2.06it/s]


Epoch 61: Best Val 0.7993917101783667, Best Test 0.8234611091550614


Epoch 62/500:   0%|          | 0/261 [00:00<?, ?it/s]

  temp ≈ 0.0181 (scale=55.14)


Epoch 62/500: 100%|██████████| 261/261 [02:06<00:00,  2.06it/s]


Epoch 62: Best Val 0.8203297649993633, Best Test 0.84493853426125


Epoch 63/500:   0%|          | 0/261 [00:00<?, ?it/s]

  temp ≈ 0.0178 (scale=56.24)


Epoch 63/500: 100%|██████████| 261/261 [02:06<00:00,  2.06it/s]
Epoch 64/500:   0%|          | 0/261 [00:00<?, ?it/s]

  temp ≈ 0.0174 (scale=57.34)


Epoch 64/500: 100%|██████████| 261/261 [02:06<00:00,  2.06it/s]
Epoch 65/500:   0%|          | 0/261 [00:00<?, ?it/s]

  temp ≈ 0.0171 (scale=58.47)


Epoch 65/500: 100%|██████████| 261/261 [02:06<00:00,  2.06it/s]
Epoch 66/500:   0%|          | 0/261 [00:00<?, ?it/s]

  temp ≈ 0.0168 (scale=59.58)


Epoch 66/500: 100%|██████████| 261/261 [02:06<00:00,  2.06it/s]
Epoch 67/500:   0%|          | 0/261 [00:00<?, ?it/s]

  temp ≈ 0.0165 (scale=60.73)


Epoch 67/500: 100%|██████████| 261/261 [02:06<00:00,  2.06it/s]


Epoch 67: Best Val 0.8419495239308108, Best Test 0.7931514956288533


Epoch 68/500:   0%|          | 0/261 [00:00<?, ?it/s]

  temp ≈ 0.0162 (scale=61.88)


Epoch 68/500: 100%|██████████| 261/261 [02:06<00:00,  2.06it/s]


Epoch 68: Best Val 0.8608949051981811, Best Test 0.8320236124324851


Epoch 69/500:   0%|          | 0/261 [00:00<?, ?it/s]

  temp ≈ 0.0159 (scale=63.03)


Epoch 69/500: 100%|██████████| 261/261 [02:06<00:00,  2.06it/s]
Epoch 70/500:   0%|          | 0/261 [00:00<?, ?it/s]

  temp ≈ 0.0156 (scale=64.19)


Epoch 70/500: 100%|██████████| 261/261 [02:06<00:00,  2.06it/s]
Epoch 71/500:   0%|          | 0/261 [00:00<?, ?it/s]

  temp ≈ 0.0153 (scale=65.36)


Epoch 71/500: 100%|██████████| 261/261 [02:06<00:00,  2.06it/s]
Epoch 72/500:   0%|          | 0/261 [00:00<?, ?it/s]

  temp ≈ 0.0150 (scale=66.52)


Epoch 72/500: 100%|██████████| 261/261 [02:06<00:00,  2.06it/s]
Epoch 73/500:   0%|          | 0/261 [00:00<?, ?it/s]

  temp ≈ 0.0148 (scale=67.70)


Epoch 73/500: 100%|██████████| 261/261 [02:06<00:00,  2.06it/s]
Epoch 74/500:   0%|          | 0/261 [00:00<?, ?it/s]

  temp ≈ 0.0145 (scale=68.86)


Epoch 74/500: 100%|██████████| 261/261 [02:06<00:00,  2.06it/s]
Epoch 75/500:   0%|          | 0/261 [00:00<?, ?it/s]

  temp ≈ 0.0143 (scale=70.02)


Epoch 75/500: 100%|██████████| 261/261 [02:06<00:00,  2.06it/s]
Epoch 76/500:   0%|          | 0/261 [00:00<?, ?it/s]

  temp ≈ 0.0140 (scale=71.19)


Epoch 76/500: 100%|██████████| 261/261 [02:06<00:00,  2.06it/s]
Epoch 77/500:   0%|          | 0/261 [00:00<?, ?it/s]

  temp ≈ 0.0138 (scale=72.31)


Epoch 77/500: 100%|██████████| 261/261 [02:06<00:00,  2.06it/s]
Epoch 78/500:   0%|          | 0/261 [00:00<?, ?it/s]

  temp ≈ 0.0136 (scale=73.47)


Epoch 78/500: 100%|██████████| 261/261 [02:06<00:00,  2.06it/s]


Epoch 78: Best Val 0.8660264141614665, Best Test 0.8580482286930009


Epoch 79/500:   0%|          | 0/261 [00:00<?, ?it/s]

  temp ≈ 0.0134 (scale=74.59)


Epoch 79/500: 100%|██████████| 261/261 [02:06<00:00,  2.06it/s]
Epoch 80/500:   0%|          | 0/261 [00:00<?, ?it/s]

  temp ≈ 0.0132 (scale=75.70)


Epoch 80/500: 100%|██████████| 261/261 [02:06<00:00,  2.06it/s]
Epoch 81/500:   0%|          | 0/261 [00:00<?, ?it/s]

  temp ≈ 0.0130 (scale=76.79)


Epoch 81/500: 100%|██████████| 261/261 [02:06<00:00,  2.06it/s]
Epoch 82/500:   0%|          | 0/261 [00:00<?, ?it/s]

  temp ≈ 0.0128 (scale=77.85)


Epoch 82/500: 100%|██████████| 261/261 [02:06<00:00,  2.06it/s]
Epoch 83/500:   0%|          | 0/261 [00:00<?, ?it/s]

  temp ≈ 0.0127 (scale=78.87)


Epoch 83/500: 100%|██████████| 261/261 [02:06<00:00,  2.06it/s]
Epoch 84/500:   0%|          | 0/261 [00:00<?, ?it/s]

  temp ≈ 0.0125 (scale=79.85)


Epoch 84/500: 100%|██████████| 261/261 [02:06<00:00,  2.06it/s]
Epoch 85/500:   0%|          | 0/261 [00:00<?, ?it/s]

  temp ≈ 0.0124 (scale=80.80)


Epoch 85/500: 100%|██████████| 261/261 [02:06<00:00,  2.06it/s]
Epoch 86/500:   0%|          | 0/261 [00:00<?, ?it/s]

  temp ≈ 0.0122 (scale=81.70)


Epoch 86/500: 100%|██████████| 261/261 [02:06<00:00,  2.06it/s]
Epoch 87/500:   0%|          | 0/261 [00:00<?, ?it/s]

  temp ≈ 0.0121 (scale=82.58)


Epoch 87/500: 100%|██████████| 261/261 [02:06<00:00,  2.06it/s]


Epoch 87: Best Val 0.8663635205897116, Best Test 0.8521076643019275


Epoch 88/500:   0%|          | 0/261 [00:00<?, ?it/s]

  temp ≈ 0.0120 (scale=83.45)


Epoch 88/500: 100%|██████████| 261/261 [02:06<00:00,  2.06it/s]


Epoch 88: Best Val 0.8897961629797212, Best Test 0.8511637663028414


Epoch 89/500:   0%|          | 0/261 [00:00<?, ?it/s]

  temp ≈ 0.0119 (scale=84.19)


Epoch 89/500: 100%|██████████| 261/261 [02:06<00:00,  2.06it/s]
Epoch 90/500:   0%|          | 0/261 [00:00<?, ?it/s]

  temp ≈ 0.0118 (scale=84.90)


Epoch 90/500: 100%|██████████| 261/261 [02:06<00:00,  2.06it/s]
Epoch 91/500:   0%|          | 0/261 [00:00<?, ?it/s]

  temp ≈ 0.0117 (scale=85.65)


Epoch 91/500: 100%|██████████| 261/261 [02:06<00:00,  2.07it/s]
Epoch 92/500:   0%|          | 0/261 [00:00<?, ?it/s]

  temp ≈ 0.0116 (scale=86.23)


Epoch 92/500: 100%|██████████| 261/261 [02:06<00:00,  2.06it/s]
Epoch 93/500:   0%|          | 0/261 [00:00<?, ?it/s]

  temp ≈ 0.0115 (scale=86.81)


Epoch 93/500: 100%|██████████| 261/261 [02:06<00:00,  2.06it/s]
Epoch 94/500:   0%|          | 0/261 [00:00<?, ?it/s]

  temp ≈ 0.0114 (scale=87.45)


Epoch 94/500: 100%|██████████| 261/261 [02:06<00:00,  2.06it/s]
Epoch 95/500:   0%|          | 0/261 [00:00<?, ?it/s]

  temp ≈ 0.0114 (scale=87.93)


Epoch 95/500: 100%|██████████| 261/261 [02:06<00:00,  2.06it/s]
Epoch 96/500:   0%|          | 0/261 [00:00<?, ?it/s]

  temp ≈ 0.0113 (scale=88.47)


Epoch 96/500: 100%|██████████| 261/261 [02:06<00:00,  2.06it/s]
Epoch 97/500:   0%|          | 0/261 [00:00<?, ?it/s]

  temp ≈ 0.0112 (scale=89.00)


Epoch 97/500: 100%|██████████| 261/261 [02:06<00:00,  2.06it/s]
Epoch 98/500:   0%|          | 0/261 [00:00<?, ?it/s]

  temp ≈ 0.0112 (scale=89.44)


Epoch 98/500: 100%|██████████| 261/261 [02:06<00:00,  2.06it/s]


Epoch 98: Best Val 0.8919386616125673, Best Test 0.8462569949583861


Epoch 99/500:   0%|          | 0/261 [00:00<?, ?it/s]

  temp ≈ 0.0111 (scale=89.82)


Epoch 99/500: 100%|██████████| 261/261 [02:06<00:00,  2.06it/s]
Epoch 100/500:   0%|          | 0/261 [00:00<?, ?it/s]

  temp ≈ 0.0111 (scale=90.39)


Epoch 100/500: 100%|██████████| 261/261 [02:06<00:00,  2.06it/s]
Epoch 101/500:   0%|          | 0/261 [00:00<?, ?it/s]

  temp ≈ 0.0110 (scale=90.69)


Epoch 101/500: 100%|██████████| 261/261 [02:06<00:00,  2.06it/s]
Epoch 102/500:   0%|          | 0/261 [00:00<?, ?it/s]

  temp ≈ 0.0110 (scale=91.11)


Epoch 102/500: 100%|██████████| 261/261 [02:06<00:00,  2.06it/s]
Epoch 103/500:   0%|          | 0/261 [00:00<?, ?it/s]

  temp ≈ 0.0109 (scale=91.51)


Epoch 103/500: 100%|██████████| 261/261 [02:06<00:00,  2.06it/s]


Epoch 103: Best Val 0.8939088614043105, Best Test 0.858093176216767


Epoch 104/500:   0%|          | 0/261 [00:00<?, ?it/s]

  temp ≈ 0.0109 (scale=91.89)


Epoch 104/500: 100%|██████████| 261/261 [02:06<00:00,  2.06it/s]
Epoch 105/500:   0%|          | 0/261 [00:00<?, ?it/s]

  temp ≈ 0.0108 (scale=92.24)


Epoch 105/500: 100%|██████████| 261/261 [02:06<00:00,  2.06it/s]
Epoch 106/500:   0%|          | 0/261 [00:00<?, ?it/s]

  temp ≈ 0.0108 (scale=92.69)


Epoch 106/500: 100%|██████████| 261/261 [02:06<00:00,  2.06it/s]
Epoch 107/500:   0%|          | 0/261 [00:00<?, ?it/s]

  temp ≈ 0.0108 (scale=92.96)


Epoch 107/500: 100%|██████████| 261/261 [02:06<00:00,  2.06it/s]
Epoch 108/500:   0%|          | 0/261 [00:00<?, ?it/s]

  temp ≈ 0.0107 (scale=93.29)


Epoch 108/500: 100%|██████████| 261/261 [02:06<00:00,  2.06it/s]
Epoch 109/500:   0%|          | 0/261 [00:00<?, ?it/s]

  temp ≈ 0.0107 (scale=93.70)


Epoch 109/500: 100%|██████████| 261/261 [02:06<00:00,  2.06it/s]
Epoch 110/500:   0%|          | 0/261 [00:00<?, ?it/s]

  temp ≈ 0.0106 (scale=94.01)


Epoch 110/500: 100%|██████████| 261/261 [02:06<00:00,  2.06it/s]
Epoch 111/500:   0%|          | 0/261 [00:00<?, ?it/s]

  temp ≈ 0.0106 (scale=94.32)


Epoch 111/500: 100%|██████████| 261/261 [02:06<00:00,  2.06it/s]
Epoch 112/500:   0%|          | 0/261 [00:00<?, ?it/s]

  temp ≈ 0.0106 (scale=94.68)


Epoch 112/500: 100%|██████████| 261/261 [02:06<00:00,  2.06it/s]
Epoch 113/500:   0%|          | 0/261 [00:00<?, ?it/s]

  temp ≈ 0.0105 (scale=94.94)


Epoch 113/500: 100%|██████████| 261/261 [02:06<00:00,  2.06it/s]
Epoch 114/500:   0%|          | 0/261 [00:00<?, ?it/s]

  temp ≈ 0.0105 (scale=95.41)


Epoch 114/500: 100%|██████████| 261/261 [02:06<00:00,  2.06it/s]
Epoch 115/500:   0%|          | 0/261 [00:00<?, ?it/s]

  temp ≈ 0.0104 (scale=95.81)


Epoch 115/500: 100%|██████████| 261/261 [02:06<00:00,  2.06it/s]
Epoch 116/500:   0%|          | 0/261 [00:00<?, ?it/s]

  temp ≈ 0.0104 (scale=96.02)


Epoch 116/500: 100%|██████████| 261/261 [02:06<00:00,  2.06it/s]
Epoch 117/500:   0%|          | 0/261 [00:00<?, ?it/s]

  temp ≈ 0.0104 (scale=96.24)


Epoch 117/500: 100%|██████████| 261/261 [02:06<00:00,  2.06it/s]
Epoch 118/500:   0%|          | 0/261 [00:00<?, ?it/s]

  temp ≈ 0.0104 (scale=96.59)


Epoch 118/500: 100%|██████████| 261/261 [02:06<00:00,  2.06it/s]
Epoch 119/500:   0%|          | 0/261 [00:00<?, ?it/s]

  temp ≈ 0.0103 (scale=96.96)


Epoch 119/500: 100%|██████████| 261/261 [02:06<00:00,  2.06it/s]
Epoch 120/500:   0%|          | 0/261 [00:00<?, ?it/s]

  temp ≈ 0.0103 (scale=97.26)


Epoch 120/500: 100%|██████████| 261/261 [02:06<00:00,  2.06it/s]
Epoch 121/500:   0%|          | 0/261 [00:00<?, ?it/s]

  temp ≈ 0.0103 (scale=97.55)


Epoch 121/500: 100%|██████████| 261/261 [02:06<00:00,  2.06it/s]
Epoch 122/500:   0%|          | 0/261 [00:00<?, ?it/s]

  temp ≈ 0.0102 (scale=97.79)


Epoch 122/500: 100%|██████████| 261/261 [02:06<00:00,  2.06it/s]


Epoch 122: Best Val 0.8981189461303928, Best Test 0.8754878679142102


Epoch 123/500:   0%|          | 0/261 [00:00<?, ?it/s]

  temp ≈ 0.0102 (scale=98.04)


Epoch 123/500: 100%|██████████| 261/261 [02:06<00:00,  2.06it/s]
Epoch 124/500:   0%|          | 0/261 [00:00<?, ?it/s]

  temp ≈ 0.0102 (scale=98.40)


Epoch 124/500: 100%|██████████| 261/261 [02:06<00:00,  2.06it/s]
Epoch 125/500:   0%|          | 0/261 [00:00<?, ?it/s]

  temp ≈ 0.0101 (scale=98.64)


Epoch 125/500: 100%|██████████| 261/261 [02:06<00:00,  2.06it/s]
Epoch 126/500:   0%|          | 0/261 [00:00<?, ?it/s]

  temp ≈ 0.0101 (scale=98.90)


Epoch 126/500: 100%|██████████| 261/261 [02:06<00:00,  2.06it/s]
Epoch 127/500:   0%|          | 0/261 [00:00<?, ?it/s]

  temp ≈ 0.0101 (scale=99.18)


Epoch 127/500: 100%|██████████| 261/261 [02:06<00:00,  2.06it/s]
Epoch 128/500:   0%|          | 0/261 [00:00<?, ?it/s]

  temp ≈ 0.0101 (scale=99.44)


Epoch 128/500: 100%|██████████| 261/261 [02:06<00:00,  2.06it/s]
Epoch 129/500:   0%|          | 0/261 [00:00<?, ?it/s]

  temp ≈ 0.0100 (scale=99.68)


Epoch 129/500: 100%|██████████| 261/261 [02:06<00:00,  2.06it/s]
Epoch 130/500:   0%|          | 0/261 [00:00<?, ?it/s]

  temp ≈ 0.0100 (scale=100.00)


Epoch 130/500: 100%|██████████| 261/261 [02:06<00:00,  2.06it/s]
Epoch 131/500:   0%|          | 0/261 [00:00<?, ?it/s]

  temp ≈ 0.0100 (scale=100.00)


Epoch 131/500: 100%|██████████| 261/261 [02:06<00:00,  2.06it/s]
Epoch 132/500:   0%|          | 0/261 [00:00<?, ?it/s]

  temp ≈ 0.0100 (scale=100.00)


Epoch 132/500: 100%|██████████| 261/261 [02:06<00:00,  2.06it/s]
Epoch 133/500:   0%|          | 0/261 [00:00<?, ?it/s]

  temp ≈ 0.0100 (scale=100.00)


Epoch 133/500: 100%|██████████| 261/261 [02:06<00:00,  2.06it/s]
Epoch 134/500:   0%|          | 0/261 [00:00<?, ?it/s]

  temp ≈ 0.0100 (scale=100.00)


Epoch 134/500: 100%|██████████| 261/261 [02:06<00:00,  2.06it/s]
Epoch 135/500:   0%|          | 0/261 [00:00<?, ?it/s]

  temp ≈ 0.0100 (scale=100.00)


Epoch 135/500: 100%|██████████| 261/261 [02:06<00:00,  2.06it/s]
Epoch 136/500:   0%|          | 0/261 [00:00<?, ?it/s]

  temp ≈ 0.0100 (scale=100.00)


Epoch 136/500: 100%|██████████| 261/261 [02:06<00:00,  2.06it/s]
Epoch 137/500:   0%|          | 0/261 [00:00<?, ?it/s]

  temp ≈ 0.0100 (scale=100.00)


Epoch 137/500: 100%|██████████| 261/261 [02:06<00:00,  2.06it/s]
Epoch 138/500:   0%|          | 0/261 [00:00<?, ?it/s]

  temp ≈ 0.0100 (scale=100.00)


Epoch 138/500: 100%|██████████| 261/261 [02:06<00:00,  2.06it/s]
Epoch 139/500:   0%|          | 0/261 [00:00<?, ?it/s]

  temp ≈ 0.0100 (scale=100.00)


Epoch 139/500: 100%|██████████| 261/261 [02:06<00:00,  2.06it/s]
Epoch 140/500:   0%|          | 0/261 [00:00<?, ?it/s]

  temp ≈ 0.0100 (scale=100.00)


Epoch 140/500: 100%|██████████| 261/261 [02:06<00:00,  2.06it/s]
Epoch 141/500:   0%|          | 0/261 [00:00<?, ?it/s]

  temp ≈ 0.0100 (scale=100.00)


Epoch 141/500: 100%|██████████| 261/261 [02:06<00:00,  2.06it/s]
Epoch 142/500:   0%|          | 0/261 [00:00<?, ?it/s]

  temp ≈ 0.0100 (scale=100.00)


Epoch 142/500: 100%|██████████| 261/261 [02:06<00:00,  2.06it/s]
Epoch 143/500:   0%|          | 0/261 [00:00<?, ?it/s]

  temp ≈ 0.0100 (scale=100.00)


Epoch 143/500: 100%|██████████| 261/261 [02:06<00:00,  2.06it/s]
Epoch 144/500:   0%|          | 0/261 [00:00<?, ?it/s]

  temp ≈ 0.0100 (scale=100.00)


Epoch 144/500: 100%|██████████| 261/261 [02:06<00:00,  2.06it/s]
Epoch 145/500:   0%|          | 0/261 [00:00<?, ?it/s]

  temp ≈ 0.0100 (scale=100.00)


Epoch 145/500: 100%|██████████| 261/261 [02:06<00:00,  2.06it/s]
Epoch 146/500:   0%|          | 0/261 [00:00<?, ?it/s]

  temp ≈ 0.0100 (scale=100.00)


Epoch 146/500: 100%|██████████| 261/261 [02:06<00:00,  2.06it/s]
Epoch 147/500:   0%|          | 0/261 [00:00<?, ?it/s]

  temp ≈ 0.0100 (scale=100.00)


Epoch 147/500: 100%|██████████| 261/261 [02:06<00:00,  2.06it/s]
Epoch 148/500:   0%|          | 0/261 [00:00<?, ?it/s]

  temp ≈ 0.0100 (scale=100.00)


Epoch 148/500: 100%|██████████| 261/261 [02:06<00:00,  2.06it/s]
Epoch 149/500:   0%|          | 0/261 [00:00<?, ?it/s]

  temp ≈ 0.0100 (scale=100.00)


Epoch 149/500: 100%|██████████| 261/261 [02:06<00:00,  2.06it/s]
Epoch 150/500:   0%|          | 0/261 [00:00<?, ?it/s]

  temp ≈ 0.0100 (scale=100.00)


Epoch 150/500: 100%|██████████| 261/261 [02:06<00:00,  2.06it/s]
Epoch 151/500:   0%|          | 0/261 [00:00<?, ?it/s]

  temp ≈ 0.0100 (scale=100.00)


Epoch 151/500: 100%|██████████| 261/261 [02:06<00:00,  2.06it/s]
Epoch 152/500:   0%|          | 0/261 [00:00<?, ?it/s]

  temp ≈ 0.0100 (scale=100.00)


Epoch 152/500: 100%|██████████| 261/261 [02:06<00:00,  2.06it/s]
Epoch 153/500:   0%|          | 0/261 [00:00<?, ?it/s]

  temp ≈ 0.0100 (scale=100.00)


Epoch 153/500: 100%|██████████| 261/261 [02:06<00:00,  2.06it/s]
Epoch 154/500:   0%|          | 0/261 [00:00<?, ?it/s]

  temp ≈ 0.0100 (scale=100.00)


Epoch 154/500: 100%|██████████| 261/261 [02:06<00:00,  2.06it/s]
Epoch 155/500:   0%|          | 0/261 [00:00<?, ?it/s]

  temp ≈ 0.0100 (scale=100.00)


Epoch 155/500: 100%|██████████| 261/261 [02:06<00:00,  2.06it/s]
Epoch 156/500:   0%|          | 0/261 [00:00<?, ?it/s]

  temp ≈ 0.0100 (scale=100.00)


Epoch 156/500: 100%|██████████| 261/261 [02:06<00:00,  2.06it/s]
Epoch 157/500:   0%|          | 0/261 [00:00<?, ?it/s]

  temp ≈ 0.0100 (scale=100.00)


Epoch 157/500: 100%|██████████| 261/261 [02:06<00:00,  2.06it/s]
Epoch 158/500:   0%|          | 0/261 [00:00<?, ?it/s]

  temp ≈ 0.0100 (scale=100.00)


Epoch 158/500: 100%|██████████| 261/261 [02:06<00:00,  2.06it/s]


Epoch 158: Best Val 0.901257781540052, Best Test 0.902636172268876


Epoch 159/500:   0%|          | 0/261 [00:00<?, ?it/s]

  temp ≈ 0.0100 (scale=100.00)


Epoch 159/500: 100%|██████████| 261/261 [02:06<00:00,  2.06it/s]
Epoch 160/500:   0%|          | 0/261 [00:00<?, ?it/s]

  temp ≈ 0.0100 (scale=100.00)


Epoch 160/500: 100%|██████████| 261/261 [02:06<00:00,  2.06it/s]
Epoch 161/500:   0%|          | 0/261 [00:00<?, ?it/s]

  temp ≈ 0.0100 (scale=100.00)


Epoch 161/500: 100%|██████████| 261/261 [02:06<00:00,  2.06it/s]
Epoch 162/500:   0%|          | 0/261 [00:00<?, ?it/s]

  temp ≈ 0.0100 (scale=100.00)


Epoch 162/500: 100%|██████████| 261/261 [02:06<00:00,  2.06it/s]
Epoch 163/500:   0%|          | 0/261 [00:00<?, ?it/s]

  temp ≈ 0.0100 (scale=100.00)


Epoch 163/500: 100%|██████████| 261/261 [02:06<00:00,  2.06it/s]
Epoch 164/500:   0%|          | 0/261 [00:00<?, ?it/s]

  temp ≈ 0.0100 (scale=100.00)


Epoch 164/500: 100%|██████████| 261/261 [02:06<00:00,  2.06it/s]
Epoch 165/500:   0%|          | 0/261 [00:00<?, ?it/s]

  temp ≈ 0.0100 (scale=100.00)


Epoch 165/500: 100%|██████████| 261/261 [02:06<00:00,  2.06it/s]
Epoch 166/500:   0%|          | 0/261 [00:00<?, ?it/s]

  temp ≈ 0.0100 (scale=100.00)


Epoch 166/500: 100%|██████████| 261/261 [02:06<00:00,  2.06it/s]
Epoch 167/500:   0%|          | 0/261 [00:00<?, ?it/s]

  temp ≈ 0.0100 (scale=100.00)


Epoch 167/500: 100%|██████████| 261/261 [02:06<00:00,  2.06it/s]
Epoch 168/500:   0%|          | 0/261 [00:00<?, ?it/s]

  temp ≈ 0.0100 (scale=100.00)


Epoch 168/500: 100%|██████████| 261/261 [02:06<00:00,  2.06it/s]
Epoch 169/500:   0%|          | 0/261 [00:00<?, ?it/s]

  temp ≈ 0.0100 (scale=100.00)


Epoch 169/500: 100%|██████████| 261/261 [02:06<00:00,  2.06it/s]


Epoch 169: Best Val 0.9124721887196697, Best Test 0.9195064761890492


Epoch 170/500:   0%|          | 0/261 [00:00<?, ?it/s]

  temp ≈ 0.0100 (scale=100.00)


Epoch 170/500: 100%|██████████| 261/261 [02:06<00:00,  2.06it/s]
Epoch 171/500:   0%|          | 0/261 [00:00<?, ?it/s]

  temp ≈ 0.0100 (scale=100.00)


Epoch 171/500: 100%|██████████| 261/261 [02:06<00:00,  2.06it/s]
Epoch 172/500:   0%|          | 0/261 [00:00<?, ?it/s]

  temp ≈ 0.0100 (scale=100.00)


Epoch 172/500: 100%|██████████| 261/261 [02:06<00:00,  2.06it/s]
Epoch 173/500:   0%|          | 0/261 [00:00<?, ?it/s]

  temp ≈ 0.0100 (scale=100.00)


Epoch 173/500: 100%|██████████| 261/261 [02:06<00:00,  2.06it/s]
Epoch 174/500:   0%|          | 0/261 [00:00<?, ?it/s]

  temp ≈ 0.0100 (scale=100.00)


Epoch 174/500: 100%|██████████| 261/261 [02:06<00:00,  2.06it/s]
Epoch 175/500:   0%|          | 0/261 [00:00<?, ?it/s]

  temp ≈ 0.0100 (scale=100.00)


Epoch 175/500: 100%|██████████| 261/261 [02:06<00:00,  2.06it/s]
Epoch 176/500:   0%|          | 0/261 [00:00<?, ?it/s]

  temp ≈ 0.0100 (scale=100.00)


Epoch 176/500: 100%|██████████| 261/261 [02:06<00:00,  2.06it/s]
Epoch 177/500:   0%|          | 0/261 [00:00<?, ?it/s]

  temp ≈ 0.0100 (scale=100.00)


Epoch 177/500: 100%|██████████| 261/261 [02:06<00:00,  2.06it/s]
Epoch 178/500:   0%|          | 0/261 [00:00<?, ?it/s]

  temp ≈ 0.0100 (scale=100.00)


Epoch 178/500: 100%|██████████| 261/261 [02:06<00:00,  2.06it/s]
Epoch 179/500:   0%|          | 0/261 [00:00<?, ?it/s]

  temp ≈ 0.0100 (scale=100.00)


Epoch 179/500: 100%|██████████| 261/261 [02:06<00:00,  2.06it/s]
Epoch 180/500:   0%|          | 0/261 [00:00<?, ?it/s]

  temp ≈ 0.0100 (scale=100.00)


Epoch 180/500: 100%|██████████| 261/261 [02:06<00:00,  2.06it/s]
Epoch 181/500:   0%|          | 0/261 [00:00<?, ?it/s]

  temp ≈ 0.0100 (scale=100.00)


Epoch 181/500: 100%|██████████| 261/261 [02:06<00:00,  2.06it/s]
Epoch 182/500:   0%|          | 0/261 [00:00<?, ?it/s]

  temp ≈ 0.0100 (scale=100.00)


Epoch 182/500: 100%|██████████| 261/261 [02:06<00:00,  2.06it/s]
Epoch 183/500:   0%|          | 0/261 [00:00<?, ?it/s]

  temp ≈ 0.0100 (scale=100.00)


Epoch 183/500: 100%|██████████| 261/261 [02:06<00:00,  2.06it/s]
Epoch 184/500:   0%|          | 0/261 [00:00<?, ?it/s]

  temp ≈ 0.0100 (scale=100.00)


Epoch 184/500: 100%|██████████| 261/261 [02:06<00:00,  2.06it/s]
Epoch 185/500:   0%|          | 0/261 [00:00<?, ?it/s]

  temp ≈ 0.0100 (scale=100.00)


Epoch 185/500: 100%|██████████| 261/261 [02:06<00:00,  2.06it/s]
Epoch 186/500:   0%|          | 0/261 [00:00<?, ?it/s]

  temp ≈ 0.0100 (scale=100.00)


Epoch 186/500: 100%|██████████| 261/261 [02:06<00:00,  2.06it/s]
Epoch 187/500:   0%|          | 0/261 [00:00<?, ?it/s]

  temp ≈ 0.0100 (scale=100.00)


Epoch 187/500: 100%|██████████| 261/261 [02:06<00:00,  2.06it/s]
Epoch 188/500:   0%|          | 0/261 [00:00<?, ?it/s]

  temp ≈ 0.0100 (scale=100.00)


Epoch 188/500: 100%|██████████| 261/261 [02:06<00:00,  2.06it/s]
Epoch 189/500:   0%|          | 0/261 [00:00<?, ?it/s]

  temp ≈ 0.0100 (scale=100.00)


Epoch 189/500: 100%|██████████| 261/261 [02:06<00:00,  2.06it/s]
Epoch 190/500:   0%|          | 0/261 [00:00<?, ?it/s]

  temp ≈ 0.0100 (scale=100.00)


Epoch 190/500: 100%|██████████| 261/261 [02:06<00:00,  2.06it/s]
Epoch 191/500:   0%|          | 0/261 [00:00<?, ?it/s]

  temp ≈ 0.0100 (scale=100.00)


Epoch 191/500: 100%|██████████| 261/261 [02:06<00:00,  2.06it/s]
Epoch 192/500:   0%|          | 0/261 [00:00<?, ?it/s]

  temp ≈ 0.0100 (scale=100.00)


Epoch 192/500: 100%|██████████| 261/261 [02:06<00:00,  2.06it/s]
Epoch 193/500:   0%|          | 0/261 [00:00<?, ?it/s]

  temp ≈ 0.0100 (scale=100.00)


Epoch 193/500: 100%|██████████| 261/261 [02:06<00:00,  2.06it/s]
Epoch 194/500:   0%|          | 0/261 [00:00<?, ?it/s]

  temp ≈ 0.0100 (scale=100.00)


Epoch 194/500: 100%|██████████| 261/261 [02:06<00:00,  2.06it/s]
Epoch 195/500:   0%|          | 0/261 [00:00<?, ?it/s]

  temp ≈ 0.0100 (scale=100.00)


Epoch 195/500: 100%|██████████| 261/261 [02:06<00:00,  2.06it/s]
Epoch 196/500:   0%|          | 0/261 [00:00<?, ?it/s]

  temp ≈ 0.0100 (scale=100.00)


Epoch 196/500: 100%|██████████| 261/261 [02:06<00:00,  2.06it/s]
Epoch 197/500:   0%|          | 0/261 [00:00<?, ?it/s]

  temp ≈ 0.0100 (scale=100.00)


Epoch 197/500: 100%|██████████| 261/261 [02:06<00:00,  2.06it/s]
Epoch 198/500:   0%|          | 0/261 [00:00<?, ?it/s]

  temp ≈ 0.0100 (scale=100.00)


Epoch 198/500: 100%|██████████| 261/261 [02:06<00:00,  2.06it/s]
Epoch 199/500:   0%|          | 0/261 [00:00<?, ?it/s]

  temp ≈ 0.0100 (scale=100.00)


Epoch 199/500: 100%|██████████| 261/261 [02:06<00:00,  2.06it/s]
Epoch 200/500:   0%|          | 0/261 [00:00<?, ?it/s]

  temp ≈ 0.0100 (scale=100.00)


Epoch 200/500: 100%|██████████| 261/261 [02:06<00:00,  2.06it/s]
Epoch 201/500:   0%|          | 0/261 [00:00<?, ?it/s]

  temp ≈ 0.0100 (scale=100.00)


Epoch 201/500: 100%|██████████| 261/261 [02:06<00:00,  2.06it/s]
Epoch 202/500:   0%|          | 0/261 [00:00<?, ?it/s]

  temp ≈ 0.0100 (scale=100.00)


Epoch 202/500: 100%|██████████| 261/261 [02:06<00:00,  2.06it/s]
Epoch 203/500:   0%|          | 0/261 [00:00<?, ?it/s]

  temp ≈ 0.0100 (scale=100.00)


Epoch 203/500: 100%|██████████| 261/261 [02:06<00:00,  2.06it/s]
Epoch 204/500:   0%|          | 0/261 [00:00<?, ?it/s]

  temp ≈ 0.0100 (scale=100.00)


Epoch 204/500: 100%|██████████| 261/261 [02:06<00:00,  2.06it/s]
Epoch 205/500:   0%|          | 0/261 [00:00<?, ?it/s]

  temp ≈ 0.0100 (scale=100.00)


Epoch 205/500: 100%|██████████| 261/261 [02:06<00:00,  2.06it/s]
Epoch 206/500:   0%|          | 0/261 [00:00<?, ?it/s]

  temp ≈ 0.0100 (scale=100.00)


Epoch 206/500: 100%|██████████| 261/261 [02:06<00:00,  2.06it/s]
Epoch 207/500:   0%|          | 0/261 [00:00<?, ?it/s]

  temp ≈ 0.0100 (scale=100.00)


Epoch 207/500: 100%|██████████| 261/261 [02:06<00:00,  2.06it/s]
Epoch 208/500:   0%|          | 0/261 [00:00<?, ?it/s]

  temp ≈ 0.0100 (scale=100.00)


Epoch 208/500: 100%|██████████| 261/261 [02:06<00:00,  2.06it/s]
Epoch 209/500:   0%|          | 0/261 [00:00<?, ?it/s]

  temp ≈ 0.0100 (scale=100.00)


Epoch 209/500: 100%|██████████| 261/261 [02:06<00:00,  2.06it/s]
Epoch 210/500:   0%|          | 0/261 [00:00<?, ?it/s]

  temp ≈ 0.0100 (scale=100.00)


Epoch 210/500: 100%|██████████| 261/261 [02:06<00:00,  2.06it/s]
Epoch 211/500:   0%|          | 0/261 [00:00<?, ?it/s]

  temp ≈ 0.0100 (scale=100.00)


Epoch 211/500: 100%|██████████| 261/261 [02:06<00:00,  2.06it/s]
Epoch 212/500:   0%|          | 0/261 [00:00<?, ?it/s]

  temp ≈ 0.0100 (scale=100.00)


Epoch 212/500: 100%|██████████| 261/261 [02:06<00:00,  2.06it/s]
Epoch 213/500:   0%|          | 0/261 [00:00<?, ?it/s]

  temp ≈ 0.0100 (scale=100.00)


Epoch 213/500: 100%|██████████| 261/261 [02:06<00:00,  2.06it/s]
Epoch 214/500:   0%|          | 0/261 [00:00<?, ?it/s]

  temp ≈ 0.0100 (scale=100.00)


Epoch 214/500: 100%|██████████| 261/261 [02:06<00:00,  2.06it/s]
Epoch 215/500:   0%|          | 0/261 [00:00<?, ?it/s]

  temp ≈ 0.0100 (scale=100.00)


Epoch 215/500: 100%|██████████| 261/261 [02:06<00:00,  2.06it/s]
Epoch 216/500:   0%|          | 0/261 [00:00<?, ?it/s]

  temp ≈ 0.0100 (scale=100.00)


Epoch 216/500: 100%|██████████| 261/261 [02:06<00:00,  2.06it/s]
Epoch 217/500:   0%|          | 0/261 [00:00<?, ?it/s]

  temp ≈ 0.0100 (scale=100.00)


Epoch 217/500: 100%|██████████| 261/261 [02:06<00:00,  2.06it/s]
Epoch 218/500:   0%|          | 0/261 [00:00<?, ?it/s]

  temp ≈ 0.0100 (scale=100.00)


Epoch 218/500: 100%|██████████| 261/261 [02:06<00:00,  2.06it/s]
Epoch 219/500:   0%|          | 0/261 [00:00<?, ?it/s]

  temp ≈ 0.0100 (scale=100.00)


Epoch 219/500: 100%|██████████| 261/261 [02:06<00:00,  2.06it/s]
Epoch 220/500:   0%|          | 0/261 [00:00<?, ?it/s]

  temp ≈ 0.0100 (scale=100.00)


Epoch 220/500: 100%|██████████| 261/261 [02:06<00:00,  2.06it/s]
Epoch 221/500:   0%|          | 0/261 [00:00<?, ?it/s]

  temp ≈ 0.0100 (scale=100.00)


Epoch 221/500: 100%|██████████| 261/261 [02:06<00:00,  2.06it/s]
Epoch 222/500:   0%|          | 0/261 [00:00<?, ?it/s]

  temp ≈ 0.0100 (scale=100.00)


Epoch 222/500: 100%|██████████| 261/261 [02:06<00:00,  2.06it/s]
Epoch 223/500:   0%|          | 0/261 [00:00<?, ?it/s]

  temp ≈ 0.0100 (scale=100.00)


Epoch 223/500: 100%|██████████| 261/261 [02:06<00:00,  2.06it/s]
Epoch 224/500:   0%|          | 0/261 [00:00<?, ?it/s]

  temp ≈ 0.0100 (scale=100.00)


Epoch 224/500: 100%|██████████| 261/261 [02:06<00:00,  2.06it/s]
Epoch 225/500:   0%|          | 0/261 [00:00<?, ?it/s]

  temp ≈ 0.0100 (scale=100.00)


Epoch 225/500: 100%|██████████| 261/261 [02:06<00:00,  2.06it/s]
Epoch 226/500:   0%|          | 0/261 [00:00<?, ?it/s]

  temp ≈ 0.0100 (scale=100.00)


Epoch 226/500: 100%|██████████| 261/261 [02:06<00:00,  2.06it/s]
Epoch 227/500:   0%|          | 0/261 [00:00<?, ?it/s]

  temp ≈ 0.0100 (scale=100.00)


Epoch 227/500: 100%|██████████| 261/261 [02:06<00:00,  2.06it/s]
Epoch 228/500:   0%|          | 0/261 [00:00<?, ?it/s]

  temp ≈ 0.0100 (scale=100.00)


Epoch 228/500: 100%|██████████| 261/261 [02:06<00:00,  2.06it/s]
Epoch 229/500:   0%|          | 0/261 [00:00<?, ?it/s]

  temp ≈ 0.0100 (scale=100.00)


Epoch 229/500: 100%|██████████| 261/261 [02:06<00:00,  2.06it/s]
Epoch 230/500:   0%|          | 0/261 [00:00<?, ?it/s]

  temp ≈ 0.0100 (scale=100.00)


Epoch 230/500: 100%|██████████| 261/261 [02:06<00:00,  2.06it/s]
Epoch 231/500:   0%|          | 0/261 [00:00<?, ?it/s]

  temp ≈ 0.0100 (scale=100.00)


Epoch 231/500: 100%|██████████| 261/261 [02:06<00:00,  2.06it/s]
Epoch 232/500:   0%|          | 0/261 [00:00<?, ?it/s]

  temp ≈ 0.0100 (scale=100.00)


Epoch 232/500: 100%|██████████| 261/261 [02:06<00:00,  2.06it/s]
Epoch 233/500:   0%|          | 0/261 [00:00<?, ?it/s]

  temp ≈ 0.0100 (scale=100.00)


Epoch 233/500: 100%|██████████| 261/261 [02:06<00:00,  2.06it/s]
Epoch 234/500:   0%|          | 0/261 [00:00<?, ?it/s]

  temp ≈ 0.0100 (scale=100.00)


Epoch 234/500: 100%|██████████| 261/261 [02:06<00:00,  2.06it/s]
Epoch 235/500:   0%|          | 0/261 [00:00<?, ?it/s]

  temp ≈ 0.0100 (scale=100.00)


Epoch 235/500: 100%|██████████| 261/261 [02:06<00:00,  2.06it/s]
Epoch 236/500:   0%|          | 0/261 [00:00<?, ?it/s]

  temp ≈ 0.0100 (scale=100.00)


Epoch 236/500: 100%|██████████| 261/261 [02:06<00:00,  2.06it/s]
Epoch 237/500:   0%|          | 0/261 [00:00<?, ?it/s]

  temp ≈ 0.0100 (scale=100.00)


Epoch 237/500: 100%|██████████| 261/261 [02:06<00:00,  2.06it/s]
Epoch 238/500:   0%|          | 0/261 [00:00<?, ?it/s]

  temp ≈ 0.0100 (scale=100.00)


Epoch 238/500: 100%|██████████| 261/261 [02:06<00:00,  2.06it/s]
Epoch 239/500:   0%|          | 0/261 [00:00<?, ?it/s]

  temp ≈ 0.0100 (scale=100.00)


Epoch 239/500: 100%|██████████| 261/261 [02:06<00:00,  2.06it/s]


Epoch 239: Best Val 0.9128092951479148, Best Test 0.9289754211957539


Epoch 240/500:   0%|          | 0/261 [00:00<?, ?it/s]

  temp ≈ 0.0100 (scale=100.00)


Epoch 240/500: 100%|██████████| 261/261 [02:06<00:00,  2.06it/s]
Epoch 241/500:   0%|          | 0/261 [00:00<?, ?it/s]

  temp ≈ 0.0100 (scale=100.00)


Epoch 241/500: 100%|██████████| 261/261 [02:06<00:00,  2.06it/s]
Epoch 242/500:   0%|          | 0/261 [00:00<?, ?it/s]

  temp ≈ 0.0100 (scale=100.00)


Epoch 242/500: 100%|██████████| 261/261 [02:06<00:00,  2.06it/s]
Epoch 243/500:   0%|          | 0/261 [00:00<?, ?it/s]

  temp ≈ 0.0100 (scale=100.00)


Epoch 243/500: 100%|██████████| 261/261 [02:06<00:00,  2.06it/s]
Epoch 244/500:   0%|          | 0/261 [00:00<?, ?it/s]

  temp ≈ 0.0100 (scale=100.00)


Epoch 244/500: 100%|██████████| 261/261 [02:06<00:00,  2.06it/s]


Epoch 244: Best Val 0.914584722336672, Best Test 0.9084868416124176


Epoch 245/500:   0%|          | 0/261 [00:00<?, ?it/s]

  temp ≈ 0.0100 (scale=100.00)


Epoch 245/500: 100%|██████████| 261/261 [02:06<00:00,  2.06it/s]
Epoch 246/500:   0%|          | 0/261 [00:00<?, ?it/s]

  temp ≈ 0.0100 (scale=100.00)


Epoch 246/500: 100%|██████████| 261/261 [02:06<00:00,  2.06it/s]
Epoch 247/500:   0%|          | 0/261 [00:00<?, ?it/s]

  temp ≈ 0.0100 (scale=100.00)


Epoch 247/500: 100%|██████████| 261/261 [02:06<00:00,  2.06it/s]
Epoch 248/500:   0%|          | 0/261 [00:00<?, ?it/s]

  temp ≈ 0.0100 (scale=100.00)


Epoch 248/500: 100%|██████████| 261/261 [02:06<00:00,  2.06it/s]
Epoch 249/500:   0%|          | 0/261 [00:00<?, ?it/s]

  temp ≈ 0.0100 (scale=100.00)


Epoch 249/500: 100%|██████████| 261/261 [02:06<00:00,  2.06it/s]


Epoch 249: Best Val 0.9183827880948993, Best Test 0.929132737528935


Epoch 250/500:   0%|          | 0/261 [00:00<?, ?it/s]

  temp ≈ 0.0100 (scale=100.00)


Epoch 250/500: 100%|██████████| 261/261 [02:06<00:00,  2.06it/s]
Epoch 251/500:   0%|          | 0/261 [00:00<?, ?it/s]

  temp ≈ 0.0100 (scale=100.00)


Epoch 251/500: 100%|██████████| 261/261 [02:06<00:00,  2.06it/s]
Epoch 252/500:   0%|          | 0/261 [00:00<?, ?it/s]

  temp ≈ 0.0100 (scale=100.00)


Epoch 252/500: 100%|██████████| 261/261 [02:06<00:00,  2.06it/s]


Epoch 252: Best Val 0.9244207387874657, Best Test 0.9344290540793624


Epoch 253/500:   0%|          | 0/261 [00:00<?, ?it/s]

  temp ≈ 0.0100 (scale=100.00)


Epoch 253/500: 100%|██████████| 261/261 [02:06<00:00,  2.06it/s]
Epoch 254/500:   0%|          | 0/261 [00:00<?, ?it/s]

  temp ≈ 0.0100 (scale=100.00)


Epoch 254/500: 100%|██████████| 261/261 [02:06<00:00,  2.06it/s]
Epoch 255/500:   0%|          | 0/261 [00:00<?, ?it/s]

  temp ≈ 0.0100 (scale=100.00)


Epoch 255/500: 100%|██████████| 261/261 [02:06<00:00,  2.06it/s]
Epoch 256/500:   0%|          | 0/261 [00:00<?, ?it/s]

  temp ≈ 0.0100 (scale=100.00)


Epoch 256/500: 100%|██████████| 261/261 [02:06<00:00,  2.06it/s]
Epoch 257/500:   0%|          | 0/261 [00:00<?, ?it/s]

  temp ≈ 0.0100 (scale=100.00)


Epoch 257/500: 100%|██████████| 261/261 [02:06<00:00,  2.06it/s]
Epoch 258/500:   0%|          | 0/261 [00:00<?, ?it/s]

  temp ≈ 0.0100 (scale=100.00)


Epoch 258/500: 100%|██████████| 261/261 [02:06<00:00,  2.06it/s]
Epoch 259/500:   0%|          | 0/261 [00:00<?, ?it/s]

  temp ≈ 0.0100 (scale=100.00)


Epoch 259/500: 100%|██████████| 261/261 [02:06<00:00,  2.06it/s]
Epoch 260/500:   0%|          | 0/261 [00:00<?, ?it/s]

  temp ≈ 0.0100 (scale=100.00)


Epoch 260/500: 100%|██████████| 261/261 [02:06<00:00,  2.06it/s]
Epoch 261/500:   0%|          | 0/261 [00:00<?, ?it/s]

  temp ≈ 0.0100 (scale=100.00)


Epoch 261/500: 100%|██████████| 261/261 [02:06<00:00,  2.06it/s]
Epoch 262/500:   0%|          | 0/261 [00:00<?, ?it/s]

  temp ≈ 0.0100 (scale=100.00)


Epoch 262/500: 100%|██████████| 261/261 [02:06<00:00,  2.06it/s]
Epoch 263/500:   0%|          | 0/261 [00:00<?, ?it/s]

  temp ≈ 0.0100 (scale=100.00)


Epoch 263/500: 100%|██████████| 261/261 [02:06<00:00,  2.06it/s]
Epoch 264/500:   0%|          | 0/261 [00:00<?, ?it/s]

  temp ≈ 0.0100 (scale=100.00)


Epoch 264/500: 100%|██████████| 261/261 [02:06<00:00,  2.06it/s]
Epoch 265/500:   0%|          | 0/261 [00:00<?, ?it/s]

  temp ≈ 0.0100 (scale=100.00)


Epoch 265/500: 100%|██████████| 261/261 [02:06<00:00,  2.06it/s]
Epoch 266/500:   0%|          | 0/261 [00:00<?, ?it/s]

  temp ≈ 0.0100 (scale=100.00)


Epoch 266/500: 100%|██████████| 261/261 [02:06<00:00,  2.06it/s]
Epoch 267/500:   0%|          | 0/261 [00:00<?, ?it/s]

  temp ≈ 0.0100 (scale=100.00)


Epoch 267/500: 100%|██████████| 261/261 [02:06<00:00,  2.06it/s]
Epoch 268/500:   0%|          | 0/261 [00:00<?, ?it/s]

  temp ≈ 0.0100 (scale=100.00)


Epoch 268/500: 100%|██████████| 261/261 [02:06<00:00,  2.06it/s]
Epoch 269/500:   0%|          | 0/261 [00:00<?, ?it/s]

  temp ≈ 0.0100 (scale=100.00)


Epoch 269/500: 100%|██████████| 261/261 [02:06<00:00,  2.06it/s]
Epoch 270/500:   0%|          | 0/261 [00:00<?, ?it/s]

  temp ≈ 0.0100 (scale=100.00)


Epoch 270/500: 100%|██████████| 261/261 [02:06<00:00,  2.06it/s]
Epoch 271/500:   0%|          | 0/261 [00:00<?, ?it/s]

  temp ≈ 0.0100 (scale=100.00)


Epoch 271/500: 100%|██████████| 261/261 [02:06<00:00,  2.06it/s]
Epoch 272/500:   0%|          | 0/261 [00:00<?, ?it/s]

  temp ≈ 0.0100 (scale=100.00)


Epoch 272/500: 100%|██████████| 261/261 [02:06<00:00,  2.06it/s]
Epoch 273/500:   0%|          | 0/261 [00:00<?, ?it/s]

  temp ≈ 0.0100 (scale=100.00)


Epoch 273/500: 100%|██████████| 261/261 [02:06<00:00,  2.06it/s]
Epoch 274/500:   0%|          | 0/261 [00:00<?, ?it/s]

  temp ≈ 0.0100 (scale=100.00)


Epoch 274/500: 100%|██████████| 261/261 [02:06<00:00,  2.06it/s]
Epoch 275/500:   0%|          | 0/261 [00:00<?, ?it/s]

  temp ≈ 0.0100 (scale=100.00)


Epoch 275/500: 100%|██████████| 261/261 [02:06<00:00,  2.06it/s]
Epoch 276/500:   0%|          | 0/261 [00:00<?, ?it/s]

  temp ≈ 0.0100 (scale=100.00)


Epoch 276/500: 100%|██████████| 261/261 [02:06<00:00,  2.06it/s]
Epoch 277/500:   0%|          | 0/261 [00:00<?, ?it/s]

  temp ≈ 0.0100 (scale=100.00)


Epoch 277/500: 100%|██████████| 261/261 [02:06<00:00,  2.06it/s]
Epoch 278/500:   0%|          | 0/261 [00:00<?, ?it/s]

  temp ≈ 0.0100 (scale=100.00)


Epoch 278/500: 100%|██████████| 261/261 [02:06<00:00,  2.06it/s]
Epoch 279/500:   0%|          | 0/261 [00:00<?, ?it/s]

  temp ≈ 0.0100 (scale=100.00)


Epoch 279/500: 100%|██████████| 261/261 [02:06<00:00,  2.06it/s]
Epoch 280/500:   0%|          | 0/261 [00:00<?, ?it/s]

  temp ≈ 0.0100 (scale=100.00)


Epoch 280/500: 100%|██████████| 261/261 [02:06<00:00,  2.06it/s]
Epoch 281/500:   0%|          | 0/261 [00:00<?, ?it/s]

  temp ≈ 0.0100 (scale=100.00)


Epoch 281/500: 100%|██████████| 261/261 [02:06<00:00,  2.06it/s]
Epoch 282/500:   0%|          | 0/261 [00:00<?, ?it/s]

  temp ≈ 0.0100 (scale=100.00)


Epoch 282/500: 100%|██████████| 261/261 [02:06<00:00,  2.06it/s]
Epoch 283/500:   0%|          | 0/261 [00:00<?, ?it/s]

  temp ≈ 0.0100 (scale=100.00)


Epoch 283/500: 100%|██████████| 261/261 [02:06<00:00,  2.06it/s]
Epoch 284/500:   0%|          | 0/261 [00:00<?, ?it/s]

  temp ≈ 0.0100 (scale=100.00)


Epoch 284/500: 100%|██████████| 261/261 [02:06<00:00,  2.06it/s]
Epoch 285/500:   0%|          | 0/261 [00:00<?, ?it/s]

  temp ≈ 0.0100 (scale=100.00)


Epoch 285/500: 100%|██████████| 261/261 [02:06<00:00,  2.06it/s]
Epoch 286/500:   0%|          | 0/261 [00:00<?, ?it/s]

  temp ≈ 0.0100 (scale=100.00)


Epoch 286/500: 100%|██████████| 261/261 [02:06<00:00,  2.06it/s]
Epoch 287/500:   0%|          | 0/261 [00:00<?, ?it/s]

  temp ≈ 0.0100 (scale=100.00)


Epoch 287/500: 100%|██████████| 261/261 [02:06<00:00,  2.06it/s]
Epoch 288/500:   0%|          | 0/261 [00:00<?, ?it/s]

  temp ≈ 0.0100 (scale=100.00)


Epoch 288/500: 100%|██████████| 261/261 [02:06<00:00,  2.06it/s]
Epoch 289/500:   0%|          | 0/261 [00:00<?, ?it/s]

  temp ≈ 0.0100 (scale=100.00)


Epoch 289/500: 100%|██████████| 261/261 [02:06<00:00,  2.06it/s]
Epoch 290/500:   0%|          | 0/261 [00:00<?, ?it/s]

  temp ≈ 0.0100 (scale=100.00)


Epoch 290/500: 100%|██████████| 261/261 [02:06<00:00,  2.06it/s]
Epoch 291/500:   0%|          | 0/261 [00:00<?, ?it/s]

  temp ≈ 0.0100 (scale=100.00)


Epoch 291/500: 100%|██████████| 261/261 [02:06<00:00,  2.06it/s]
Epoch 292/500:   0%|          | 0/261 [00:00<?, ?it/s]

  temp ≈ 0.0100 (scale=100.00)


Epoch 292/500: 100%|██████████| 261/261 [02:06<00:00,  2.06it/s]
Epoch 293/500:   0%|          | 0/261 [00:00<?, ?it/s]

  temp ≈ 0.0100 (scale=100.00)


Epoch 293/500: 100%|██████████| 261/261 [02:06<00:00,  2.06it/s]
Epoch 294/500:   0%|          | 0/261 [00:00<?, ?it/s]

  temp ≈ 0.0100 (scale=100.00)


Epoch 294/500: 100%|██████████| 261/261 [02:06<00:00,  2.06it/s]
Epoch 295/500:   0%|          | 0/261 [00:00<?, ?it/s]

  temp ≈ 0.0100 (scale=100.00)


Epoch 295/500: 100%|██████████| 261/261 [02:06<00:00,  2.06it/s]
Epoch 296/500:   0%|          | 0/261 [00:00<?, ?it/s]

  temp ≈ 0.0100 (scale=100.00)


Epoch 296/500: 100%|██████████| 261/261 [02:06<00:00,  2.06it/s]
Epoch 297/500:   0%|          | 0/261 [00:00<?, ?it/s]

  temp ≈ 0.0100 (scale=100.00)


Epoch 297/500: 100%|██████████| 261/261 [02:06<00:00,  2.06it/s]
Epoch 298/500:   0%|          | 0/261 [00:00<?, ?it/s]

  temp ≈ 0.0100 (scale=100.00)


Epoch 298/500: 100%|██████████| 261/261 [02:06<00:00,  2.06it/s]
Epoch 299/500:   0%|          | 0/261 [00:00<?, ?it/s]

  temp ≈ 0.0100 (scale=100.00)


Epoch 299/500: 100%|██████████| 261/261 [02:06<00:00,  2.06it/s]
Epoch 300/500:   0%|          | 0/261 [00:00<?, ?it/s]

  temp ≈ 0.0100 (scale=100.00)


Epoch 300/500: 100%|██████████| 261/261 [02:06<00:00,  2.06it/s]
Epoch 301/500:   0%|          | 0/261 [00:00<?, ?it/s]

  temp ≈ 0.0100 (scale=100.00)


Epoch 301/500: 100%|██████████| 261/261 [02:06<00:00,  2.06it/s]
Epoch 302/500:   0%|          | 0/261 [00:00<?, ?it/s]

  temp ≈ 0.0100 (scale=100.00)


Epoch 302/500: 100%|██████████| 261/261 [02:06<00:00,  2.06it/s]
Epoch 303/500:   0%|          | 0/261 [00:00<?, ?it/s]

  temp ≈ 0.0100 (scale=100.00)


Epoch 303/500: 100%|██████████| 261/261 [02:06<00:00,  2.06it/s]
Epoch 304/500:   0%|          | 0/261 [00:00<?, ?it/s]

  temp ≈ 0.0100 (scale=100.00)


Epoch 304/500: 100%|██████████| 261/261 [02:06<00:00,  2.06it/s]
Epoch 305/500:   0%|          | 0/261 [00:00<?, ?it/s]

  temp ≈ 0.0100 (scale=100.00)


Epoch 305/500: 100%|██████████| 261/261 [02:06<00:00,  2.06it/s]
Epoch 306/500:   0%|          | 0/261 [00:00<?, ?it/s]

  temp ≈ 0.0100 (scale=100.00)


Epoch 306/500: 100%|██████████| 261/261 [02:06<00:00,  2.06it/s]
Epoch 307/500:   0%|          | 0/261 [00:00<?, ?it/s]

  temp ≈ 0.0100 (scale=100.00)


Epoch 307/500: 100%|██████████| 261/261 [02:06<00:00,  2.06it/s]
Epoch 308/500:   0%|          | 0/261 [00:00<?, ?it/s]

  temp ≈ 0.0100 (scale=100.00)


Epoch 308/500: 100%|██████████| 261/261 [02:06<00:00,  2.06it/s]
Epoch 309/500:   0%|          | 0/261 [00:00<?, ?it/s]

  temp ≈ 0.0100 (scale=100.00)


Epoch 309/500: 100%|██████████| 261/261 [02:06<00:00,  2.06it/s]
Epoch 310/500:   0%|          | 0/261 [00:00<?, ?it/s]

  temp ≈ 0.0100 (scale=100.00)


Epoch 310/500: 100%|██████████| 261/261 [02:06<00:00,  2.06it/s]
Epoch 311/500:   0%|          | 0/261 [00:00<?, ?it/s]

  temp ≈ 0.0100 (scale=100.00)


Epoch 311/500: 100%|██████████| 261/261 [02:06<00:00,  2.06it/s]
Epoch 312/500:   0%|          | 0/261 [00:00<?, ?it/s]

  temp ≈ 0.0100 (scale=100.00)


Epoch 312/500: 100%|██████████| 261/261 [02:06<00:00,  2.06it/s]
Epoch 313/500:   0%|          | 0/261 [00:00<?, ?it/s]

  temp ≈ 0.0100 (scale=100.00)


Epoch 313/500: 100%|██████████| 261/261 [02:06<00:00,  2.06it/s]
Epoch 314/500:   0%|          | 0/261 [00:00<?, ?it/s]

  temp ≈ 0.0100 (scale=100.00)


Epoch 314/500: 100%|██████████| 261/261 [02:06<00:00,  2.06it/s]
Epoch 315/500:   0%|          | 0/261 [00:00<?, ?it/s]

  temp ≈ 0.0100 (scale=100.00)


Epoch 315/500: 100%|██████████| 261/261 [02:06<00:00,  2.06it/s]
Epoch 316/500:   0%|          | 0/261 [00:00<?, ?it/s]

  temp ≈ 0.0100 (scale=100.00)


Epoch 316/500: 100%|██████████| 261/261 [02:06<00:00,  2.06it/s]
Epoch 317/500:   0%|          | 0/261 [00:00<?, ?it/s]

  temp ≈ 0.0100 (scale=100.00)


Epoch 317/500: 100%|██████████| 261/261 [02:06<00:00,  2.06it/s]
Epoch 318/500:   0%|          | 0/261 [00:00<?, ?it/s]

  temp ≈ 0.0100 (scale=100.00)


Epoch 318/500: 100%|██████████| 261/261 [02:06<00:00,  2.06it/s]
Epoch 319/500:   0%|          | 0/261 [00:00<?, ?it/s]

  temp ≈ 0.0100 (scale=100.00)


Epoch 319/500: 100%|██████████| 261/261 [02:06<00:00,  2.06it/s]
Epoch 320/500:   0%|          | 0/261 [00:00<?, ?it/s]

  temp ≈ 0.0100 (scale=100.00)


Epoch 320/500: 100%|██████████| 261/261 [02:06<00:00,  2.06it/s]
Epoch 321/500:   0%|          | 0/261 [00:00<?, ?it/s]

  temp ≈ 0.0100 (scale=100.00)


Epoch 321/500: 100%|██████████| 261/261 [02:06<00:00,  2.06it/s]
Epoch 322/500:   0%|          | 0/261 [00:00<?, ?it/s]

  temp ≈ 0.0100 (scale=100.00)


Epoch 322/500: 100%|██████████| 261/261 [02:06<00:00,  2.06it/s]
Epoch 323/500:   0%|          | 0/261 [00:00<?, ?it/s]

  temp ≈ 0.0100 (scale=100.00)


Epoch 323/500: 100%|██████████| 261/261 [02:06<00:00,  2.06it/s]
Epoch 324/500:   0%|          | 0/261 [00:00<?, ?it/s]

  temp ≈ 0.0100 (scale=100.00)


Epoch 324/500: 100%|██████████| 261/261 [02:06<00:00,  2.06it/s]
Epoch 325/500:   0%|          | 0/261 [00:00<?, ?it/s]

  temp ≈ 0.0100 (scale=100.00)


Epoch 325/500: 100%|██████████| 261/261 [02:06<00:00,  2.06it/s]
Epoch 326/500:   0%|          | 0/261 [00:00<?, ?it/s]

  temp ≈ 0.0100 (scale=100.00)


Epoch 326/500: 100%|██████████| 261/261 [02:06<00:00,  2.06it/s]
Epoch 327/500:   0%|          | 0/261 [00:00<?, ?it/s]

  temp ≈ 0.0100 (scale=100.00)


Epoch 327/500: 100%|██████████| 261/261 [02:06<00:00,  2.06it/s]
Epoch 328/500:   0%|          | 0/261 [00:00<?, ?it/s]

  temp ≈ 0.0100 (scale=100.00)


Epoch 328/500: 100%|██████████| 261/261 [02:06<00:00,  2.06it/s]
Epoch 329/500:   0%|          | 0/261 [00:00<?, ?it/s]

  temp ≈ 0.0100 (scale=100.00)


Epoch 329/500: 100%|██████████| 261/261 [02:06<00:00,  2.06it/s]
Epoch 330/500:   0%|          | 0/261 [00:00<?, ?it/s]

  temp ≈ 0.0100 (scale=100.00)


Epoch 330/500: 100%|██████████| 261/261 [02:06<00:00,  2.06it/s]
Epoch 331/500:   0%|          | 0/261 [00:00<?, ?it/s]

  temp ≈ 0.0100 (scale=100.00)


Epoch 331/500: 100%|██████████| 261/261 [02:06<00:00,  2.06it/s]
Epoch 332/500:   0%|          | 0/261 [00:00<?, ?it/s]

  temp ≈ 0.0100 (scale=100.00)


Epoch 332/500: 100%|██████████| 261/261 [02:06<00:00,  2.06it/s]
Epoch 333/500:   0%|          | 0/261 [00:00<?, ?it/s]

  temp ≈ 0.0100 (scale=100.00)


Epoch 333/500: 100%|██████████| 261/261 [02:06<00:00,  2.06it/s]
Epoch 334/500:   0%|          | 0/261 [00:00<?, ?it/s]

  temp ≈ 0.0100 (scale=100.00)


Epoch 334/500: 100%|██████████| 261/261 [02:06<00:00,  2.06it/s]
Epoch 335/500:   0%|          | 0/261 [00:00<?, ?it/s]

  temp ≈ 0.0100 (scale=100.00)


Epoch 335/500: 100%|██████████| 261/261 [02:06<00:00,  2.06it/s]
Epoch 336/500:   0%|          | 0/261 [00:00<?, ?it/s]

  temp ≈ 0.0100 (scale=100.00)


Epoch 336/500: 100%|██████████| 261/261 [02:06<00:00,  2.06it/s]
Epoch 337/500:   0%|          | 0/261 [00:00<?, ?it/s]

  temp ≈ 0.0100 (scale=100.00)


Epoch 337/500: 100%|██████████| 261/261 [02:06<00:00,  2.06it/s]
Epoch 338/500:   0%|          | 0/261 [00:00<?, ?it/s]

  temp ≈ 0.0100 (scale=100.00)


Epoch 338/500: 100%|██████████| 261/261 [02:06<00:00,  2.06it/s]
Epoch 339/500:   0%|          | 0/261 [00:00<?, ?it/s]

  temp ≈ 0.0100 (scale=100.00)


Epoch 339/500: 100%|██████████| 261/261 [02:06<00:00,  2.06it/s]
Epoch 340/500:   0%|          | 0/261 [00:00<?, ?it/s]

  temp ≈ 0.0100 (scale=100.00)


Epoch 340/500: 100%|██████████| 261/261 [02:06<00:00,  2.06it/s]
Epoch 341/500:   0%|          | 0/261 [00:00<?, ?it/s]

  temp ≈ 0.0100 (scale=100.00)


Epoch 341/500: 100%|██████████| 261/261 [02:06<00:00,  2.06it/s]
Epoch 342/500:   0%|          | 0/261 [00:00<?, ?it/s]

  temp ≈ 0.0100 (scale=100.00)


Epoch 342/500: 100%|██████████| 261/261 [02:06<00:00,  2.06it/s]
Epoch 343/500:   0%|          | 0/261 [00:00<?, ?it/s]

  temp ≈ 0.0100 (scale=100.00)


Epoch 343/500: 100%|██████████| 261/261 [02:06<00:00,  2.06it/s]
Epoch 344/500:   0%|          | 0/261 [00:00<?, ?it/s]

  temp ≈ 0.0100 (scale=100.00)


Epoch 344/500: 100%|██████████| 261/261 [02:06<00:00,  2.06it/s]
Epoch 345/500:   0%|          | 0/261 [00:00<?, ?it/s]

  temp ≈ 0.0100 (scale=100.00)


Epoch 345/500: 100%|██████████| 261/261 [02:06<00:00,  2.06it/s]
Epoch 346/500:   0%|          | 0/261 [00:00<?, ?it/s]

  temp ≈ 0.0100 (scale=100.00)


Epoch 346/500: 100%|██████████| 261/261 [02:06<00:00,  2.06it/s]
Epoch 347/500:   0%|          | 0/261 [00:00<?, ?it/s]

  temp ≈ 0.0100 (scale=100.00)


Epoch 347/500: 100%|██████████| 261/261 [02:06<00:00,  2.06it/s]
Epoch 348/500:   0%|          | 0/261 [00:00<?, ?it/s]

  temp ≈ 0.0100 (scale=100.00)


Epoch 348/500: 100%|██████████| 261/261 [02:06<00:00,  2.06it/s]
Epoch 349/500:   0%|          | 0/261 [00:00<?, ?it/s]

  temp ≈ 0.0100 (scale=100.00)


Epoch 349/500: 100%|██████████| 261/261 [02:06<00:00,  2.06it/s]
Epoch 350/500:   0%|          | 0/261 [00:00<?, ?it/s]

  temp ≈ 0.0100 (scale=100.00)


Epoch 350/500: 100%|██████████| 261/261 [02:06<00:00,  2.06it/s]
Epoch 351/500:   0%|          | 0/261 [00:00<?, ?it/s]

  temp ≈ 0.0100 (scale=100.00)


Epoch 351/500: 100%|██████████| 261/261 [02:06<00:00,  2.06it/s]
Epoch 352/500:   0%|          | 0/261 [00:00<?, ?it/s]

  temp ≈ 0.0100 (scale=100.00)


Epoch 352/500: 100%|██████████| 261/261 [02:06<00:00,  2.06it/s]


Epoch 352: Best Val 0.9301815130834751, Best Test 0.9332529272074852


Epoch 353/500:   0%|          | 0/261 [00:00<?, ?it/s]

  temp ≈ 0.0100 (scale=100.00)


Epoch 353/500: 100%|██████████| 261/261 [02:06<00:00,  2.06it/s]
Epoch 354/500:   0%|          | 0/261 [00:00<?, ?it/s]

  temp ≈ 0.0100 (scale=100.00)


Epoch 354/500: 100%|██████████| 261/261 [02:06<00:00,  2.06it/s]
Epoch 355/500:   0%|          | 0/261 [00:00<?, ?it/s]

  temp ≈ 0.0100 (scale=100.00)


Epoch 355/500: 100%|██████████| 261/261 [02:06<00:00,  2.06it/s]
Epoch 356/500:   0%|          | 0/261 [00:00<?, ?it/s]

  temp ≈ 0.0100 (scale=100.00)


Epoch 356/500: 100%|██████████| 261/261 [02:06<00:00,  2.06it/s]
Epoch 357/500:   0%|          | 0/261 [00:00<?, ?it/s]

  temp ≈ 0.0100 (scale=100.00)


Epoch 357/500: 100%|██████████| 261/261 [02:06<00:00,  2.06it/s]
Epoch 358/500:   0%|          | 0/261 [00:00<?, ?it/s]

  temp ≈ 0.0100 (scale=100.00)


Epoch 358/500: 100%|██████████| 261/261 [02:06<00:00,  2.06it/s]
Epoch 359/500:   0%|          | 0/261 [00:00<?, ?it/s]

  temp ≈ 0.0100 (scale=100.00)


Epoch 359/500: 100%|██████████| 261/261 [02:06<00:00,  2.06it/s]
Epoch 360/500:   0%|          | 0/261 [00:00<?, ?it/s]

  temp ≈ 0.0100 (scale=100.00)


Epoch 360/500: 100%|██████████| 261/261 [02:06<00:00,  2.06it/s]
Epoch 361/500:   0%|          | 0/261 [00:00<?, ?it/s]

  temp ≈ 0.0100 (scale=100.00)


Epoch 361/500: 100%|██████████| 261/261 [02:06<00:00,  2.06it/s]
Epoch 362/500:   0%|          | 0/261 [00:00<?, ?it/s]

  temp ≈ 0.0100 (scale=100.00)


Epoch 362/500: 100%|██████████| 261/261 [02:06<00:00,  2.06it/s]
Epoch 363/500:   0%|          | 0/261 [00:00<?, ?it/s]

  temp ≈ 0.0100 (scale=100.00)


Epoch 363/500: 100%|██████████| 261/261 [02:06<00:00,  2.06it/s]
Epoch 364/500:   0%|          | 0/261 [00:00<?, ?it/s]

  temp ≈ 0.0100 (scale=100.00)


Epoch 364/500: 100%|██████████| 261/261 [02:06<00:00,  2.06it/s]
Epoch 365/500:   0%|          | 0/261 [00:00<?, ?it/s]

  temp ≈ 0.0100 (scale=100.00)


Epoch 365/500: 100%|██████████| 261/261 [02:06<00:00,  2.06it/s]
Epoch 366/500:   0%|          | 0/261 [00:00<?, ?it/s]

  temp ≈ 0.0100 (scale=100.00)


Epoch 366/500: 100%|██████████| 261/261 [02:06<00:00,  2.06it/s]
Epoch 367/500:   0%|          | 0/261 [00:00<?, ?it/s]

  temp ≈ 0.0100 (scale=100.00)


Epoch 367/500: 100%|██████████| 261/261 [02:06<00:00,  2.06it/s]
Epoch 368/500:   0%|          | 0/261 [00:00<?, ?it/s]

  temp ≈ 0.0100 (scale=100.00)


Epoch 368/500: 100%|██████████| 261/261 [02:06<00:00,  2.06it/s]
Epoch 369/500:   0%|          | 0/261 [00:00<?, ?it/s]

  temp ≈ 0.0100 (scale=100.00)


Epoch 369/500: 100%|██████████| 261/261 [02:06<00:00,  2.06it/s]
Epoch 370/500:   0%|          | 0/261 [00:00<?, ?it/s]

  temp ≈ 0.0100 (scale=100.00)


Epoch 370/500: 100%|██████████| 261/261 [02:06<00:00,  2.06it/s]
Epoch 371/500:   0%|          | 0/261 [00:00<?, ?it/s]

  temp ≈ 0.0100 (scale=100.00)


Epoch 371/500: 100%|██████████| 261/261 [02:06<00:00,  2.06it/s]
Epoch 372/500:   0%|          | 0/261 [00:00<?, ?it/s]

  temp ≈ 0.0100 (scale=100.00)


Epoch 372/500: 100%|██████████| 261/261 [02:06<00:00,  2.06it/s]
Epoch 373/500:   0%|          | 0/261 [00:00<?, ?it/s]

  temp ≈ 0.0100 (scale=100.00)


Epoch 373/500: 100%|██████████| 261/261 [02:06<00:00,  2.06it/s]
Epoch 374/500:   0%|          | 0/261 [00:00<?, ?it/s]

  temp ≈ 0.0100 (scale=100.00)


Epoch 374/500: 100%|██████████| 261/261 [02:06<00:00,  2.06it/s]
Epoch 375/500:   0%|          | 0/261 [00:00<?, ?it/s]

  temp ≈ 0.0100 (scale=100.00)


Epoch 375/500: 100%|██████████| 261/261 [02:06<00:00,  2.06it/s]
Epoch 376/500:   0%|          | 0/261 [00:00<?, ?it/s]

  temp ≈ 0.0100 (scale=100.00)


Epoch 376/500: 100%|██████████| 261/261 [02:06<00:00,  2.06it/s]
Epoch 377/500:   0%|          | 0/261 [00:00<?, ?it/s]

  temp ≈ 0.0100 (scale=100.00)


Epoch 377/500: 100%|██████████| 261/261 [02:06<00:00,  2.06it/s]
Epoch 378/500:   0%|          | 0/261 [00:00<?, ?it/s]

  temp ≈ 0.0100 (scale=100.00)


Epoch 378/500: 100%|██████████| 261/261 [02:06<00:00,  2.06it/s]
Epoch 379/500:   0%|          | 0/261 [00:00<?, ?it/s]

  temp ≈ 0.0100 (scale=100.00)


Epoch 379/500: 100%|██████████| 261/261 [02:06<00:00,  2.06it/s]
Epoch 380/500:   0%|          | 0/261 [00:00<?, ?it/s]

  temp ≈ 0.0100 (scale=100.00)


Epoch 380/500: 100%|██████████| 261/261 [02:06<00:00,  2.06it/s]
Epoch 381/500:   0%|          | 0/261 [00:00<?, ?it/s]

  temp ≈ 0.0100 (scale=100.00)


Epoch 381/500: 100%|██████████| 261/261 [02:06<00:00,  2.06it/s]
Epoch 382/500:   0%|          | 0/261 [00:00<?, ?it/s]

  temp ≈ 0.0100 (scale=100.00)


Epoch 382/500: 100%|██████████| 261/261 [02:06<00:00,  2.06it/s]
Epoch 383/500:   0%|          | 0/261 [00:00<?, ?it/s]

  temp ≈ 0.0100 (scale=100.00)


Epoch 383/500: 100%|██████████| 261/261 [02:06<00:00,  2.06it/s]
Epoch 384/500:   0%|          | 0/261 [00:00<?, ?it/s]

  temp ≈ 0.0100 (scale=100.00)


Epoch 384/500: 100%|██████████| 261/261 [02:06<00:00,  2.06it/s]
Epoch 385/500:   0%|          | 0/261 [00:00<?, ?it/s]

  temp ≈ 0.0100 (scale=100.00)


Epoch 385/500: 100%|██████████| 261/261 [02:06<00:00,  2.06it/s]
Epoch 386/500:   0%|          | 0/261 [00:00<?, ?it/s]

  temp ≈ 0.0100 (scale=100.00)


Epoch 386/500: 100%|██████████| 261/261 [02:06<00:00,  2.06it/s]
Epoch 387/500:   0%|          | 0/261 [00:00<?, ?it/s]

  temp ≈ 0.0100 (scale=100.00)


Epoch 387/500: 100%|██████████| 261/261 [02:06<00:00,  2.06it/s]
Epoch 388/500:   0%|          | 0/261 [00:00<?, ?it/s]

  temp ≈ 0.0100 (scale=100.00)


Epoch 388/500: 100%|██████████| 261/261 [02:06<00:00,  2.06it/s]
Epoch 389/500:   0%|          | 0/261 [00:00<?, ?it/s]

  temp ≈ 0.0100 (scale=100.00)


Epoch 389/500: 100%|██████████| 261/261 [02:06<00:00,  2.06it/s]
Epoch 390/500:   0%|          | 0/261 [00:00<?, ?it/s]

  temp ≈ 0.0100 (scale=100.00)


Epoch 390/500: 100%|██████████| 261/261 [02:06<00:00,  2.06it/s]
Epoch 391/500:   0%|          | 0/261 [00:00<?, ?it/s]

  temp ≈ 0.0100 (scale=100.00)


Epoch 391/500: 100%|██████████| 261/261 [02:06<00:00,  2.06it/s]
Epoch 392/500:   0%|          | 0/261 [00:00<?, ?it/s]

  temp ≈ 0.0100 (scale=100.00)


Epoch 392/500: 100%|██████████| 261/261 [02:06<00:00,  2.06it/s]
Epoch 393/500:   0%|          | 0/261 [00:00<?, ?it/s]

  temp ≈ 0.0100 (scale=100.00)


Epoch 393/500: 100%|██████████| 261/261 [02:06<00:00,  2.06it/s]
Epoch 394/500:   0%|          | 0/261 [00:00<?, ?it/s]

  temp ≈ 0.0100 (scale=100.00)


Epoch 394/500: 100%|██████████| 261/261 [02:06<00:00,  2.06it/s]
Epoch 395/500:   0%|          | 0/261 [00:00<?, ?it/s]

  temp ≈ 0.0100 (scale=100.00)


Epoch 395/500: 100%|██████████| 261/261 [02:06<00:00,  2.06it/s]
Epoch 396/500:   0%|          | 0/261 [00:00<?, ?it/s]

  temp ≈ 0.0100 (scale=100.00)


Epoch 396/500: 100%|██████████| 261/261 [02:06<00:00,  2.06it/s]
Epoch 397/500:   0%|          | 0/261 [00:00<?, ?it/s]

  temp ≈ 0.0100 (scale=100.00)


Epoch 397/500: 100%|██████████| 261/261 [02:06<00:00,  2.06it/s]
Epoch 398/500:   0%|          | 0/261 [00:00<?, ?it/s]

  temp ≈ 0.0100 (scale=100.00)


Epoch 398/500: 100%|██████████| 261/261 [02:06<00:00,  2.06it/s]
Epoch 399/500:   0%|          | 0/261 [00:00<?, ?it/s]

  temp ≈ 0.0100 (scale=100.00)


Epoch 399/500: 100%|██████████| 261/261 [02:06<00:00,  2.06it/s]
Epoch 400/500:   0%|          | 0/261 [00:00<?, ?it/s]

  temp ≈ 0.0100 (scale=100.00)


Epoch 400/500: 100%|██████████| 261/261 [02:06<00:00,  2.06it/s]
Epoch 401/500:   0%|          | 0/261 [00:00<?, ?it/s]

  temp ≈ 0.0100 (scale=100.00)


Epoch 401/500: 100%|██████████| 261/261 [02:06<00:00,  2.06it/s]
Epoch 402/500:   0%|          | 0/261 [00:00<?, ?it/s]

  temp ≈ 0.0100 (scale=100.00)


Epoch 402/500: 100%|██████████| 261/261 [02:06<00:00,  2.06it/s]
Epoch 403/500:   0%|          | 0/261 [00:00<?, ?it/s]

  temp ≈ 0.0100 (scale=100.00)


Epoch 403/500: 100%|██████████| 261/261 [02:06<00:00,  2.06it/s]
Epoch 404/500:   0%|          | 0/261 [00:00<?, ?it/s]

  temp ≈ 0.0100 (scale=100.00)


Epoch 404/500: 100%|██████████| 261/261 [02:06<00:00,  2.06it/s]
Epoch 405/500:   0%|          | 0/261 [00:00<?, ?it/s]

  temp ≈ 0.0100 (scale=100.00)


Epoch 405/500: 100%|██████████| 261/261 [02:06<00:00,  2.06it/s]
Epoch 406/500:   0%|          | 0/261 [00:00<?, ?it/s]

  temp ≈ 0.0100 (scale=100.00)


Epoch 406/500: 100%|██████████| 261/261 [02:06<00:00,  2.06it/s]
Epoch 407/500:   0%|          | 0/261 [00:00<?, ?it/s]

  temp ≈ 0.0100 (scale=100.00)


Epoch 407/500: 100%|██████████| 261/261 [02:06<00:00,  2.06it/s]
Epoch 408/500:   0%|          | 0/261 [00:00<?, ?it/s]

  temp ≈ 0.0100 (scale=100.00)


Epoch 408/500: 100%|██████████| 261/261 [02:06<00:00,  2.06it/s]
Epoch 409/500:   0%|          | 0/261 [00:00<?, ?it/s]

  temp ≈ 0.0100 (scale=100.00)


Epoch 409/500: 100%|██████████| 261/261 [02:06<00:00,  2.06it/s]
Epoch 410/500:   0%|          | 0/261 [00:00<?, ?it/s]

  temp ≈ 0.0100 (scale=100.00)


Epoch 410/500: 100%|██████████| 261/261 [02:06<00:00,  2.06it/s]
Epoch 411/500:   0%|          | 0/261 [00:00<?, ?it/s]

  temp ≈ 0.0100 (scale=100.00)


Epoch 411/500: 100%|██████████| 261/261 [02:06<00:00,  2.06it/s]
Epoch 412/500:   0%|          | 0/261 [00:00<?, ?it/s]

  temp ≈ 0.0100 (scale=100.00)


Epoch 412/500: 100%|██████████| 261/261 [02:06<00:00,  2.06it/s]
Epoch 413/500:   0%|          | 0/261 [00:00<?, ?it/s]

  temp ≈ 0.0100 (scale=100.00)


Epoch 413/500: 100%|██████████| 261/261 [02:06<00:00,  2.06it/s]
Epoch 414/500:   0%|          | 0/261 [00:00<?, ?it/s]

  temp ≈ 0.0100 (scale=100.00)


Epoch 414/500: 100%|██████████| 261/261 [02:06<00:00,  2.06it/s]
Epoch 415/500:   0%|          | 0/261 [00:00<?, ?it/s]

  temp ≈ 0.0100 (scale=100.00)


Epoch 415/500: 100%|██████████| 261/261 [02:06<00:00,  2.06it/s]
Epoch 416/500:   0%|          | 0/261 [00:00<?, ?it/s]

  temp ≈ 0.0100 (scale=100.00)


Epoch 416/500: 100%|██████████| 261/261 [02:06<00:00,  2.06it/s]
Epoch 417/500:   0%|          | 0/261 [00:00<?, ?it/s]

  temp ≈ 0.0100 (scale=100.00)


Epoch 417/500: 100%|██████████| 261/261 [02:06<00:00,  2.06it/s]
Epoch 418/500:   0%|          | 0/261 [00:00<?, ?it/s]

  temp ≈ 0.0100 (scale=100.00)


Epoch 418/500: 100%|██████████| 261/261 [02:06<00:00,  2.06it/s]
Epoch 419/500:   0%|          | 0/261 [00:00<?, ?it/s]

  temp ≈ 0.0100 (scale=100.00)


Epoch 419/500:  43%|████▎     | 111/261 [00:54<01:13,  2.04it/s]


KeyboardInterrupt: 