## Session Setup

In [1]:
# Check to make sure there are multiple gpus available
import torch, os
!export CUDA_LAUNCH_BLOCKING=1

import gc, torch, os
torch.cuda.empty_cache()
gc.collect()

os.environ["CUDA_LAUNCH_BLOCKING"] = "1"
os.environ["CUDA_VISIBLE_DEVICES"] = "4,5"
os.chdir("/mnt/home/amir/framingdecomp/framingDecomp")

print("Devices visible:", os.environ.get("CUDA_VISIBLE_DEVICES"))
print("torch.cuda.device_count():", torch.cuda.device_count())


Devices visible: 4,5
torch.cuda.device_count(): 0




In [3]:
# ==== Cell: [Session setup] ====

import os, sys, logging, random, yaml, time, uuid, json
from pathlib import Path

import torch
import numpy as np

# ——— switches you may tune ——————————————————————————
USE_MULTIGPU     = True          # False → single-GPU
# VISIBLE_DEVICES  = "0,1,2,3"
# CFG_PATH         = "configs/decomposer3.yaml"
CFG_PATH         = "configs/decomposer_main.yaml"
# ————————————————————————————————————————————————


# device = "cuda" if torch.cuda.is_available() else "cpu"
# print(f"Device: {device}, GPUs: {torch.cuda.device_count()}")

# ---------- logging ----------
Path("logs").mkdir(exist_ok=True)
ts = time.strftime("%Y%m%d_%H%M%S")
log_path = Path(f"logs/decomposer_{ts}.log")

# root logger -> both console and file
logging.basicConfig(
    level=logging.INFO,
    format="%(asctime)s — %(levelname)s — %(message)s",
    handlers=[
        logging.StreamHandler(sys.stdout),
        logging.FileHandler(log_path, mode="w")
    ]
)
logger = logging.getLogger("train_decomposer")
logger.info("Log file created at %s", log_path)


# --- config, seeds, logging -----------------------------------
with open(CFG_PATH, "r") as f:
    config = yaml.safe_load(f)

seed = config["experiment"]["seed"]
torch.manual_seed(seed)
np.random.seed(seed)
random.seed(seed)

logger.info("Config loaded and seeds set.")

2025-07-29 23:49:40,990 — INFO — Log file created at logs/decomposer_20250729_234940.log
2025-07-29 23:49:40,994 — INFO — Config loaded and seeds set.


## Data Loading and Preprocessing

In [3]:
# ==== Cell: [Data loading & preprocessing] ====

import json
from typing import List, Dict

def load_jsonl(path: str) -> List[Dict]:
    with open(path, "r") as f:
        return [json.loads(l) for l in f if l.strip() and not l.strip().startswith("#")]

data_cfg = config["data"]

raw_F_harm  = load_jsonl(data_cfg["input_path_varyFraming"])
raw_G_harm  = load_jsonl(data_cfg["input_path_varyGoal"])
raw_F_ben   = load_jsonl(data_cfg["input_path_varyFraming_benign"])
raw_G_ben   = load_jsonl(data_cfg["input_path_varyGoal_benign"])

for e in raw_F_harm + raw_F_ben: e["split"] = "varyF"
for e in raw_G_harm + raw_G_ben: e["split"] = "varyG"

def _preprocess(entries: List[Dict], max_f_idx: int):
    processed = []
    for ent in entries:
        req = ["prompt","goal","goal_index","framing_index","split"]
        if not all(k in ent for k in req): 
            continue
        g, f = ent["goal_index"], ent["framing_index"]
        if ent["split"] == "varyF":                 # re-index framings
            f = g if f == 0 else max_f_idx + 1
            max_f_idx = max(max_f_idx, f)
        processed.append({
            "text":  ent["prompt"],
            "goal":  ent["goal"],
            "goal_index": g,
            "framing_index": f,
            "label": ent.get("jailbroken", False),
            "split": ent["split"],
        })
    return processed, max_f_idx

max_idx = max(e["framing_index"]
              for e in raw_F_harm + raw_G_harm + raw_F_ben + raw_G_ben)

P_F_harm, max_idx = _preprocess(raw_F_harm, max_idx)
P_G_harm, max_idx = _preprocess(raw_G_harm, max_idx)
P_F_ben , max_idx = _preprocess(raw_F_ben , max_idx)
P_G_ben , max_idx = _preprocess(raw_G_ben , max_idx)

all_samples = P_F_harm + P_G_harm + P_F_ben + P_G_ben
n_total = len(all_samples)
n_sample = int(n_total * config.get('sample_prop', 1.))
n_sample = max(n_sample, 500)
if n_sample < n_total:
    logger.info("Sampling %d out of %d total samples", n_sample, n_total)
    all_samples = random.sample(all_samples, n_sample)
logger.info("Total processed samples: %d", len(all_samples))

2025-07-21 20:48:58,730 — INFO — Total processed samples: 5286


## Dataloader

In [4]:
# ==== Cell: [Dataset & dataloader] ====

from collections import defaultdict
from typing import Tuple
from torch.utils.data import Dataset, DataLoader

class DualPairDataset(Dataset):
    """
    Returns (sample_a, sample_b, pair_type)
      pair_type = 0 → same-goal / diff-frame  (from varyF)
      pair_type = 1 → same-frame / diff-goal  (from varyG)
    """
    def __init__(self, samples, stratified_capping=True):
        self.samples = samples
        self.goal_pairs, self.frame_pairs = [], []

        by_goal_F  = defaultdict(list)
        by_frame_G = defaultdict(list)

        for idx, s in enumerate(samples):
            if s["split"] == "varyF":  by_goal_F [s["goal_index"]   ].append(idx)
            else:                      by_frame_G[s["framing_index"]].append(idx)

        for lst in by_goal_F.values():
            self.goal_pairs  += [(a,b,0) for a in lst for b in lst if a<b]
        for lst in by_frame_G.values():
            self.frame_pairs += [(a,b,1) for a in lst for b in lst if a<b]

        # --- stratified capping ---------------------------------
        # this improved the performance a bit
        if stratified_capping:
            cap = int(np.median([len(v) for v in by_goal_F.values()]))
            for g, lst in by_goal_F.items():
                if len(lst) > cap:               # down-sample heavy goals
                    by_goal_F[g] = random.sample(lst, cap)
        # --------------------------------------------------------------

        self.all_pairs = self.goal_pairs + self.frame_pairs

    def __len__(self): return len(self.all_pairs)
    def __getitem__(self, k): return self.all_pairs[k]

def collate_dual(batch) -> Tuple[list,str,str,torch.Tensor]:
    """
    batch → (texts, goal_ids, frame_ids, pair_types)
    """
    texts, gid, fid, ptype = [], [], [], []
    for a,b,t in batch:
        sa, sb = all_samples[a], all_samples[b]
        texts.extend([sa["text"], sb["text"]])
        gid.extend([sa["goal_cid"], sb["goal_cid"]])
        fid.extend([sa["framing_index"], sb["framing_index"]])
        ptype.append(t)
    return (texts,
            torch.tensor(gid),
            torch.tensor(fid),
            torch.tensor(ptype))

# contiguous goal ids
unique_goals = sorted({s["goal_index"] for s in all_samples})
goal2cid     = {g:i for i,g in enumerate(unique_goals)}
for s in all_samples: s["goal_cid"] = goal2cid[s["goal_index"]]

train_ds = DualPairDataset(all_samples)
logger.info("Goal pairs: %d   Frame pairs: %d   Total pairs: %d",
            len(train_ds.goal_pairs),
            len(train_ds.frame_pairs),
            len(train_ds))

2025-07-21 20:48:58,761 — INFO — Goal pairs: 86824   Frame pairs: 89419   Total pairs: 176243


## Training and Launcher

In [5]:
# ==== Cell: [Training worker & launch] ====

from accelerate import notebook_launcher
import torch.multiprocessing as mp
mp.set_start_method("spawn", force=True)
from utils.misc import seed_worker, set_seed


MODEL_JOB_PREFIXES = {
    "meta-llama/Llama-3-8B-Instruct": "ll3",
    "meta-llama/Llama-2-7b-chat-hf": "ll2", 
    "lmsys/vicuna-13b-v1.5": "vic13",
    "lmsys/vicuna-7b-v1.5": "vic7",
    "mistralai/Mistral-7B-v0.1": "mis",
    "deepseek-ai/deepseek-llm-7b-chat": "dsk",
    "gpt2-medium": "gpt2",
    "google/gemma-2-9b": "gem9",
    "google/gemma-2-2b": "gem2"
}

ts = None
run_id = None
global run_id, ts, MODEL_JOB_PREFIXES

def train_worker():
    import gc, yaml, torch, torch.distributed as dist
    from torch.optim import AdamW
    from torch.optim.lr_scheduler import CosineAnnealingLR
    from torch.nn.parallel import DistributedDataParallel as DDP
    from train_test.decomposer_training import train_decomposer
    from utils.model_utils import load_model_multiGPU
    from models.encoder import HFEncoder_notPooled
    from models.decomposer import NonlinearDecomposer, NonlinearDecomposer_tiny

    gc.collect(); torch.cuda.empty_cache()

    local_rank = int(os.environ["LOCAL_RANK"])
    world_size = int(os.environ.get("WORLD_SIZE", "1"))
    distributed = world_size > 1

    if distributed:
        dist.init_process_group(
            backend="nccl", init_method="env://",
            rank=local_rank, world_size=world_size,
        )

    torch.cuda.set_device(local_rank)
    device = torch.device("cuda", local_rank)

    # ——— load LLM once per rank ———
    model, tokenizer = load_model_multiGPU(
        model_name=config["model"]["name"],
        local_rank=local_rank,
        load_in_8bit=False,
        load_in_4bit=False,
    )
    logger.info("Loaded model %s ", config["model"]["name"])
    
    config["model"]["layers"] = 'all'
    if config["model"]["layers"] == 'all':
        num_layers = model.config.num_hidden_layers
        layers = list(range(num_layers))
    else:
        layers = config["model"]["layers"]
        if isinstance(layers, int): layers = [layers]
    
    # --- Set arguments # TODO: Delete and instead fix the config
    config['experiment']['use_sae'] = config['experiment'].get('use_sae', False)  # use Sparse Autoencoder
    config['training']['num_epochs'] = 3
    init_lambda_orth = config['lambda_orth']
    config['lambda_repulse'] = config.get('lambda_repulse', 6.0) 
    config['lambda_adv'] = config.get('lambda_adv', 2.0)  
    config['lambda_sparse'] = config.get('lambda_sparse', None)  
    config['lambda_recon'] = config.get('lambda_recon', 1.0)  
    config['lambda_Worth'] = config.get('lambda_Worth', 0.05)
    config['grad_accum_steps'] = config.get('grad_accum_steps', 8)
    config['model']['layers'] = [-1]#layers
    config['model']['layer_combine'] = config['model'].get('layer_combine', 'mean')
    config['model']['last_token'] = config['model'].get('last_token', False)
    config['training']['num_epochs']   = 3
    config['training']['batch_size']   = 8
    # ---
    
    for layer in layers:
        torch.manual_seed(config['experiment']['seed'])
        torch.cuda.manual_seed_all(config['experiment']['seed'])
        np.random.seed(config['experiment']['seed'])
        random.seed(config['experiment']['seed'])
        torch.backends.cudnn.deterministic = True
        torch.backends.cudnn.benchmark = False
    
        logger.info(f"Training decomposer for layer {layer}")
        encoder = HFEncoder_notPooled(
            model=model,
            tokenizer=tokenizer,
            device=device,
            layers=[layer],
            layer_combine=config["model"].get("layer_combine", "mean"),
            last_token=config["model"].get("last_token", True),
        )
        encoder.eval()

        # ——— decomposer ———
        enc_dim = model.config.hidden_size
        dec = NonlinearDecomposer(
            enc_dim=enc_dim,
            d_g=config["d_g"],
            d_f=config["d_f"],
            hidden_dim=config.get("hidden_dim", 1024),
            dropout=config.get("dropout", 0.1),
        ).to(device)
        d_f = dec.Wf(torch.randn(4096).to(device)).shape[0]
        dec = DDP(dec, device_ids=[local_rank])

        # ——— dataloader ———
        from torch.utils.data import DistributedSampler
        sampler = DistributedSampler(train_ds, rank=local_rank,
                                        num_replicas=world_size, shuffle=True, seed=config['experiment']['seed'])
        gen = torch.Generator()
        gen.manual_seed(config['experiment']['seed']) 
        train_loader = DataLoader(
            train_ds,
            batch_size=config["training"]["batch_size"],
            sampler=sampler,
            collate_fn=collate_dual,
            num_workers=8,
            pin_memory=True,
            shuffle=False,
            worker_init_fn=lambda wid: set_seed(config['experiment']['seed'] + wid),#seed_worker,
            generator=gen,
        )

        # ——— optim & sched ———
        opt = AdamW(dec.parameters(), lr=config["lr"])
        sched = CosineAnnealingLR(opt,
                                    T_max=len(train_ds)*config["training"]["num_epochs"])

        # adversarial classifier
        n_goals = len(unique_goals)
        adv_clf = torch.nn.Linear(d_f, n_goals).to(device)
        adv_clf = DDP(adv_clf, device_ids=[local_rank])
        adv_opt = AdamW(adv_clf.parameters(), lr=1e-4)

        # ——— train ———
        stats = train_decomposer(
            encoder     = encoder,
            decomposer  = dec,
            dataloader  = train_loader,
            optimizer   = opt,
            adv_clf     = adv_clf,
            adv_opt     = adv_opt,
            lambda_adv  = config.get('lambda_adv', 2.0),
            scheduler   = sched,
            device      = device,
            epochs      = config['training']['num_epochs'],
            lambda_g    = config['lambda_g'],
            lambda_f    = config['lambda_f'],
            lambda_repulse = config.get('lambda_repulse', 6.0),  # optional
            lambda_orth = config['lambda_orth']*10,
            lambda_recon = config.get('lambda_recon', 1.0),  # optional
            lambda_Worth = config.get('lambda_Worth', 0.25),
            grad_clip   = config['grad_clip'],
            grad_accum_steps = config["training"].get('grad_accum_steps', 4),
            log_every   = 50,
            info        = logger.info,
            layer_str= f"{layer}",
        )

        # ——— checkpoint (rank-0 only) ———
        if local_rank == 0:
            # ts = time.strftime("%Y%m%d_%H%M%S")
            # run_id = str(uuid.uuid4())
            global run_id, ts, MODEL_JOB_PREFIXES
            model_short = MODEL_JOB_PREFIXES.get(config["model"]["name"], "unknownModel")
            ckpt_dir = Path(f"checkpoints/decomposer_simple/{model_short}_decomposer_layer{layer}_{ts}_{run_id}")
            ckpt_dir.mkdir(parents=True, exist_ok=True)
            
            if layer == 0:
                config_filename = f"./output/config_{model_short}_{ts}_{run_id}.yaml"
                with open(config_filename, "w") as f:
                    yaml.safe_dump(config, f)

            torch.save({k: v.cpu() for k, v in dec.module.state_dict().items()},
                        ckpt_dir / "weights.pt")
            with open(ckpt_dir / "train_stats.json", "w") as f:
                json.dump(stats, f)
            logger.info("Checkpoint for layer %d saved to %s", layer, ckpt_dir)

    if distributed:
        dist.destroy_process_group()



In [None]:
# ——— launch ———
# from random import randint
# os.environ["MASTER_PORT"] = str(15000 + randint(0, 10000))

# num_proc = torch.cuda.device_count() if USE_MULTIGPU else 1

global run_id, ts
ts = time.strftime("%Y%m%d_%H%M%S")
run_id = str(uuid.uuid4())
num_proc=2
notebook_launcher(train_worker, num_processes=num_proc)

Launching training on 2 GPUs.


[W721 20:48:59.545498356 socket.cpp:200] [c10d] The hostname of the client socket cannot be retrieved. err=-3
[W721 20:48:59.545846066 socket.cpp:200] [c10d] The hostname of the client socket cannot be retrieved. err=-3
[W721 20:49:02.089468330 socket.cpp:200] [c10d] The hostname of the client socket cannot be retrieved. err=-3
[W721 20:49:02.089938455 socket.cpp:200] [c10d] The hostname of the client socket cannot be retrieved. err=-3
[W721 20:49:02.108150847 socket.cpp:200] [c10d] The hostname of the client socket cannot be retrieved. err=-3
[W721 20:49:02.108386983 socket.cpp:200] [c10d] The hostname of the client socket cannot be retrieved. err=-3


Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]

Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]

The following generation flags are not valid and may be ignored: ['temperature', 'top_p']. Set `TRANSFORMERS_VERBOSITY=info` for more details.
The following generation flags are not valid and may be ignored: ['temperature', 'top_p']. Set `TRANSFORMERS_VERBOSITY=info` for more details.
The following generation flags are not valid and may be ignored: ['temperature', 'top_p']. Set `TRANSFORMERS_VERBOSITY=info` for more details.
The following generation flags are not valid and may be ignored: ['temperature', 'top_p']. Set `TRANSFORMERS_VERBOSITY=info` for more details.


2025-07-21 20:49:10,121 — INFO — Training decomposer for layer 0
2025-07-21 20:49:10,172 — INFO — Training decomposer for layer 0


  scaler = torch.cuda.amp.GradScaler()
  scaler = torch.cuda.amp.GradScaler()
  with torch.cuda.amp.autocast():
  with torch.cuda.amp.autocast():
epoch 0, layer 0:   0%|▎                                                                         | 49/11016 [00:08<27:28,  6.65it/s, loss=19.9]

2025-07-21 20:49:20,892 — INFO — layer 0, [0:49], loss=19.8306
2025-07-21 20:49:20,892 — INFO — layer 0, [0:49], loss=19.5798


epoch 0, layer 0:   1%|▋                                                                         | 99/11016 [00:16<26:57,  6.75it/s, loss=18.7]

2025-07-21 20:49:28,954 — INFO — layer 0, [0:99], loss=18.9565
2025-07-21 20:49:28,954 — INFO — layer 0, [0:99], loss=18.3530


epoch 0, layer 0:   1%|▉                                                                        | 149/11016 [00:24<27:47,  6.52it/s, loss=19.4]

2025-07-21 20:49:36,625 — INFO — layer 0, [0:149], loss=17.3043
2025-07-21 20:49:36,625 — INFO — layer 0, [0:149], loss=17.3058


epoch 0, layer 0:   2%|█▎                                                                       | 199/11016 [00:32<28:26,  6.34it/s, loss=17.1]

2025-07-21 20:49:44,433 — INFO — layer 0, [0:199], loss=16.3521
2025-07-21 20:49:44,433 — INFO — layer 0, [0:199], loss=16.1582


epoch 0, layer 0:   2%|█▋                                                                       | 249/11016 [00:40<26:53,  6.67it/s, loss=15.3]

2025-07-21 20:49:52,064 — INFO — layer 0, [0:249], loss=14.7017
2025-07-21 20:49:52,065 — INFO — layer 0, [0:249], loss=14.9454


epoch 0, layer 0:   3%|█▉                                                                       | 299/11016 [00:47<26:06,  6.84it/s, loss=12.1]

2025-07-21 20:49:59,768 — INFO — layer 0, [0:299], loss=13.3207
2025-07-21 20:49:59,768 — INFO — layer 0, [0:299], loss=14.0869


epoch 0, layer 0:   3%|██▎                                                                      | 349/11016 [00:55<27:48,  6.39it/s, loss=13.5]

2025-07-21 20:50:07,354 — INFO — layer 0, [0:349], loss=11.8931
2025-07-21 20:50:07,354 — INFO — layer 0, [0:349], loss=12.9292


epoch 0, layer 0:   4%|██▋                                                                      | 399/11016 [01:02<26:55,  6.57it/s, loss=12.1]

2025-07-21 20:50:15,029 — INFO — layer 0, [0:399], loss=11.2510
2025-07-21 20:50:15,029 — INFO — layer 0, [0:399], loss=10.6345


epoch 0, layer 0:   4%|██▉                                                                      | 449/11016 [01:10<27:37,  6.37it/s, loss=10.3]

2025-07-21 20:50:22,609 — INFO — layer 0, [0:449], loss=9.1278
2025-07-21 20:50:22,609 — INFO — layer 0, [0:449], loss=12.1300


epoch 0, layer 0:   5%|███▎                                                                     | 499/11016 [01:18<25:31,  6.87it/s, loss=8.75]

2025-07-21 20:50:30,158 — INFO — layer 0, [0:499], loss=6.9560
2025-07-21 20:50:30,159 — INFO — layer 0, [0:499], loss=8.7204


epoch 0, layer 0:   5%|███▋                                                                     | 549/11016 [01:25<28:06,  6.21it/s, loss=9.49]

2025-07-21 20:50:37,727 — INFO — layer 0, [0:549], loss=6.5973
2025-07-21 20:50:37,727 — INFO — layer 0, [0:549], loss=9.6946


epoch 0, layer 0:   5%|███▉                                                                     | 599/11016 [01:33<26:10,  6.63it/s, loss=8.22]

2025-07-21 20:50:45,354 — INFO — layer 0, [0:599], loss=7.7508
2025-07-21 20:50:45,354 — INFO — layer 0, [0:599], loss=8.5765


epoch 0, layer 0:   6%|████▎                                                                    | 649/11016 [01:40<25:32,  6.76it/s, loss=6.84]

2025-07-21 20:50:52,883 — INFO — layer 0, [0:649], loss=5.6427
2025-07-21 20:50:52,883 — INFO — layer 0, [0:649], loss=6.1159


epoch 0, layer 0:   6%|████▋                                                                    | 699/11016 [01:48<26:05,  6.59it/s, loss=6.81]

2025-07-21 20:51:00,497 — INFO — layer 0, [0:699], loss=4.9298
2025-07-21 20:51:00,497 — INFO — layer 0, [0:699], loss=3.0619


epoch 0, layer 0:   7%|████▉                                                                    | 749/11016 [01:56<26:25,  6.47it/s, loss=4.99]

2025-07-21 20:51:08,063 — INFO — layer 0, [0:749], loss=5.5264
2025-07-21 20:51:08,063 — INFO — layer 0, [0:749], loss=4.3181


epoch 0, layer 0:   7%|█████▎                                                                   | 799/11016 [02:03<26:41,  6.38it/s, loss=2.96]

2025-07-21 20:51:15,773 — INFO — layer 0, [0:799], loss=5.1750
2025-07-21 20:51:15,774 — INFO — layer 0, [0:799], loss=-0.7314


epoch 0, layer 0:   8%|█████▋                                                                    | 849/11016 [02:11<26:47,  6.32it/s, loss=2.8]

2025-07-21 20:51:23,265 — INFO — layer 0, [0:849], loss=1.8336
2025-07-21 20:51:23,265 — INFO — layer 0, [0:849], loss=-0.7830


epoch 0, layer 0:   8%|█████▉                                                                   | 899/11016 [02:18<24:42,  6.82it/s, loss=9.36]

2025-07-21 20:51:30,685 — INFO — layer 0, [0:899], loss=-0.8969
2025-07-21 20:51:30,685 — INFO — layer 0, [0:899], loss=-1.6860


epoch 0, layer 0:   9%|██████▎                                                                  | 949/11016 [02:26<24:25,  6.87it/s, loss=1.97]

2025-07-21 20:51:38,317 — INFO — layer 0, [0:949], loss=1.6257
2025-07-21 20:51:38,317 — INFO — layer 0, [0:949], loss=2.1009


epoch 0, layer 0:   9%|██████▌                                                                 | 999/11016 [02:33<26:44,  6.24it/s, loss=-5.73]

2025-07-21 20:51:46,003 — INFO — layer 0, [0:999], loss=-2.6466
2025-07-21 20:51:46,003 — INFO — layer 0, [0:999], loss=8.3173


epoch 0, layer 0:  10%|██████▊                                                                | 1049/11016 [02:41<25:42,  6.46it/s, loss=-2.57]

2025-07-21 20:51:53,878 — INFO — layer 0, [0:1049], loss=-4.5971
2025-07-21 20:51:53,878 — INFO — layer 0, [0:1049], loss=-0.7649


epoch 0, layer 0:  10%|███████                                                                | 1099/11016 [02:49<25:37,  6.45it/s, loss=-5.87]

2025-07-21 20:52:01,529 — INFO — layer 0, [0:1099], loss=-6.8484
2025-07-21 20:52:01,529 — INFO — layer 0, [0:1099], loss=-7.3365


epoch 0, layer 0:  10%|███████▍                                                               | 1149/11016 [02:57<26:23,  6.23it/s, loss=-6.34]

2025-07-21 20:52:09,237 — INFO — layer 0, [0:1149], loss=7.6549
2025-07-21 20:52:09,237 — INFO — layer 0, [0:1149], loss=-8.6818


epoch 0, layer 0:  11%|███████▋                                                               | 1199/11016 [03:04<25:08,  6.51it/s, loss=0.589]

2025-07-21 20:52:16,697 — INFO — layer 0, [0:1199], loss=-5.8471
2025-07-21 20:52:16,697 — INFO — layer 0, [0:1199], loss=0.3137


epoch 0, layer 0:  11%|████████                                                               | 1249/11016 [03:12<24:50,  6.55it/s, loss=-3.45]

2025-07-21 20:52:24,248 — INFO — layer 0, [0:1249], loss=-1.3872
2025-07-21 20:52:24,248 — INFO — layer 0, [0:1249], loss=0.3187


epoch 0, layer 0:  12%|████████▎                                                              | 1299/11016 [03:19<25:05,  6.46it/s, loss=-1.15]

2025-07-21 20:52:31,932 — INFO — layer 0, [0:1299], loss=-0.1511
2025-07-21 20:52:31,932 — INFO — layer 0, [0:1299], loss=-9.9176


epoch 0, layer 0:  12%|████████▋                                                              | 1349/11016 [03:27<23:16,  6.92it/s, loss=-8.78]

2025-07-21 20:52:39,412 — INFO — layer 0, [0:1349], loss=0.6131
2025-07-21 20:52:39,412 — INFO — layer 0, [0:1349], loss=-0.7772


epoch 0, layer 0:  13%|█████████                                                              | 1399/11016 [03:34<23:55,  6.70it/s, loss=-9.73]

2025-07-21 20:52:46,990 — INFO — layer 0, [0:1399], loss=0.5577
2025-07-21 20:52:46,990 — INFO — layer 0, [0:1399], loss=-10.5549


epoch 0, layer 0:  13%|█████████▎                                                             | 1449/11016 [03:42<24:44,  6.44it/s, loss=-5.87]

2025-07-21 20:52:54,458 — INFO — layer 0, [0:1449], loss=-9.3817
2025-07-21 20:52:54,458 — INFO — layer 0, [0:1449], loss=-5.3399


epoch 0, layer 0:  14%|█████████▊                                                              | 1499/11016 [03:49<24:51,  6.38it/s, loss=3.88]

2025-07-21 20:53:01,898 — INFO — layer 0, [0:1499], loss=-7.3013
2025-07-21 20:53:01,898 — INFO — layer 0, [0:1499], loss=-7.9819


epoch 0, layer 0:  14%|█████████▉                                                             | 1549/11016 [03:57<23:59,  6.58it/s, loss=-9.95]

2025-07-21 20:53:09,627 — INFO — layer 0, [0:1549], loss=-2.7511
2025-07-21 20:53:09,627 — INFO — layer 0, [0:1549], loss=-6.1748


epoch 0, layer 0:  15%|██████████▏                                                           | 1599/11016 [04:05<22:32,  6.96it/s, loss=-0.734]

2025-07-21 20:53:17,086 — INFO — layer 0, [0:1599], loss=2.9781
2025-07-21 20:53:17,086 — INFO — layer 0, [0:1599], loss=-5.5024


epoch 0, layer 0:  15%|██████████▋                                                            | 1649/11016 [04:12<23:09,  6.74it/s, loss=-11.2]

2025-07-21 20:53:24,502 — INFO — layer 0, [0:1649], loss=-14.6796
2025-07-21 20:53:24,502 — INFO — layer 0, [0:1649], loss=-6.3522


epoch 0, layer 0:  15%|███████████                                                             | 1699/11016 [04:19<23:35,  6.58it/s, loss=2.55]

2025-07-21 20:53:31,977 — INFO — layer 0, [0:1699], loss=-2.3634
2025-07-21 20:53:31,977 — INFO — layer 0, [0:1699], loss=-4.2456


epoch 0, layer 0:  16%|███████████▎                                                           | 1749/11016 [04:27<23:04,  6.69it/s, loss=-5.91]

2025-07-21 20:53:39,740 — INFO — layer 0, [0:1749], loss=-7.8196
2025-07-21 20:53:39,740 — INFO — layer 0, [0:1749], loss=0.2831


epoch 0, layer 0:  16%|███████████▌                                                           | 1799/11016 [04:35<22:25,  6.85it/s, loss=0.762]

2025-07-21 20:53:47,181 — INFO — layer 0, [0:1799], loss=-10.1385
2025-07-21 20:53:47,181 — INFO — layer 0, [0:1799], loss=-8.7693


epoch 0, layer 0:  17%|███████████▉                                                           | 1849/11016 [04:42<23:05,  6.62it/s, loss=-12.9]

2025-07-21 20:53:54,829 — INFO — layer 0, [0:1849], loss=-6.6918
2025-07-21 20:53:54,829 — INFO — layer 0, [0:1849], loss=-13.2704


epoch 0, layer 0:  17%|████████████▏                                                          | 1899/11016 [04:50<23:33,  6.45it/s, loss=-14.8]

2025-07-21 20:54:02,459 — INFO — layer 0, [0:1899], loss=-8.8138
2025-07-21 20:54:02,459 — INFO — layer 0, [0:1899], loss=-6.3550


epoch 0, layer 0:  18%|████████████▌                                                          | 1949/11016 [04:58<22:07,  6.83it/s, loss=-10.8]

2025-07-21 20:54:10,043 — INFO — layer 0, [0:1949], loss=-3.8262
2025-07-21 20:54:10,043 — INFO — layer 0, [0:1949], loss=-6.9113


epoch 0, layer 0:  18%|████████████▉                                                          | 1999/11016 [05:05<22:52,  6.57it/s, loss=-3.58]

2025-07-21 20:54:17,460 — INFO — layer 0, [0:1999], loss=-6.8039
2025-07-21 20:54:17,460 — INFO — layer 0, [0:1999], loss=-9.7112


epoch 0, layer 0:  19%|█████████████▏                                                         | 2049/11016 [05:12<21:48,  6.85it/s, loss=-9.96]

2025-07-21 20:54:25,009 — INFO — layer 0, [0:2049], loss=-6.6437
2025-07-21 20:54:25,009 — INFO — layer 0, [0:2049], loss=-6.5602


epoch 0, layer 0:  19%|█████████████▌                                                         | 2099/11016 [05:20<22:16,  6.67it/s, loss=-10.9]

2025-07-21 20:54:32,628 — INFO — layer 0, [0:2099], loss=-16.0970
2025-07-21 20:54:32,628 — INFO — layer 0, [0:2099], loss=-0.4321


epoch 0, layer 0:  20%|██████████████                                                          | 2149/11016 [05:28<22:15,  6.64it/s, loss=3.39]

2025-07-21 20:54:40,268 — INFO — layer 0, [0:2149], loss=-6.7196
2025-07-21 20:54:40,268 — INFO — layer 0, [0:2149], loss=1.1045


epoch 0, layer 0:  20%|██████████████▏                                                        | 2199/11016 [05:35<24:16,  6.06it/s, loss=-6.12]

2025-07-21 20:54:47,850 — INFO — layer 0, [0:2199], loss=-6.0584
2025-07-21 20:54:47,850 — INFO — layer 0, [0:2199], loss=-17.8661


epoch 0, layer 0:  20%|██████████████▍                                                        | 2249/11016 [05:43<21:51,  6.68it/s, loss=-9.77]

2025-07-21 20:54:55,472 — INFO — layer 0, [0:2249], loss=-17.5540
2025-07-21 20:54:55,472 — INFO — layer 0, [0:2249], loss=-5.5022


epoch 0, layer 0:  21%|██████████████▊                                                        | 2299/11016 [05:50<21:29,  6.76it/s, loss=-2.46]

2025-07-21 20:55:02,897 — INFO — layer 0, [0:2299], loss=-16.6526
2025-07-21 20:55:02,897 — INFO — layer 0, [0:2299], loss=-8.2010


epoch 0, layer 0:  21%|███████████████▏                                                       | 2349/11016 [05:58<21:36,  6.68it/s, loss=-12.6]

2025-07-21 20:55:10,543 — INFO — layer 0, [0:2349], loss=-8.1952
2025-07-21 20:55:10,543 — INFO — layer 0, [0:2349], loss=-7.2753


epoch 0, layer 0:  22%|███████████████▍                                                       | 2399/11016 [06:06<24:03,  5.97it/s, loss=-12.8]

2025-07-21 20:55:18,390 — INFO — layer 0, [0:2399], loss=-16.4192
2025-07-21 20:55:18,390 — INFO — layer 0, [0:2399], loss=-10.5175


epoch 0, layer 0:  22%|███████████████▊                                                       | 2449/11016 [06:13<22:29,  6.35it/s, loss=-8.09]

2025-07-21 20:55:26,013 — INFO — layer 0, [0:2449], loss=-8.3084
2025-07-21 20:55:26,013 — INFO — layer 0, [0:2449], loss=-15.3395


epoch 0, layer 0:  23%|████████████████▌                                                        | 2499/11016 [06:21<21:00,  6.76it/s, loss=-17]

2025-07-21 20:55:33,513 — INFO — layer 0, [0:2499], loss=-12.0482
2025-07-21 20:55:33,513 — INFO — layer 0, [0:2499], loss=-5.6306


epoch 0, layer 0:  23%|████████████████▍                                                      | 2549/11016 [06:28<20:51,  6.77it/s, loss=-18.3]

2025-07-21 20:55:41,014 — INFO — layer 0, [0:2549], loss=-10.6103
2025-07-21 20:55:41,014 — INFO — layer 0, [0:2549], loss=-12.2643


epoch 0, layer 0:  24%|████████████████▊                                                      | 2599/11016 [06:36<21:37,  6.48it/s, loss=-20.6]

2025-07-21 20:55:48,690 — INFO — layer 0, [0:2599], loss=-4.7185
2025-07-21 20:55:48,690 — INFO — layer 0, [0:2599], loss=-1.9885


epoch 0, layer 0:  24%|████████████████▊                                                     | 2649/11016 [06:44<21:43,  6.42it/s, loss=-0.634]

2025-07-21 20:55:56,390 — INFO — layer 0, [0:2649], loss=-15.9511
2025-07-21 20:55:56,390 — INFO — layer 0, [0:2649], loss=-18.4784


epoch 0, layer 0:  25%|█████████████████▍                                                     | 2699/11016 [06:51<20:03,  6.91it/s, loss=-8.29]

2025-07-21 20:56:04,026 — INFO — layer 0, [0:2699], loss=-13.5160
2025-07-21 20:56:04,026 — INFO — layer 0, [0:2699], loss=-21.3226


epoch 0, layer 0:  25%|█████████████████▋                                                     | 2749/11016 [06:59<20:25,  6.75it/s, loss=-2.71]

2025-07-21 20:56:11,753 — INFO — layer 0, [0:2749], loss=-3.3812
2025-07-21 20:56:11,753 — INFO — layer 0, [0:2749], loss=-8.0996


epoch 0, layer 0:  25%|██████████████████▌                                                      | 2799/11016 [07:07<19:48,  6.91it/s, loss=-14]

2025-07-21 20:56:19,222 — INFO — layer 0, [0:2799], loss=-11.7489
2025-07-21 20:56:19,222 — INFO — layer 0, [0:2799], loss=-19.7750


epoch 0, layer 0:  26%|██████████████████▎                                                    | 2849/11016 [07:14<20:16,  6.71it/s, loss=-7.71]

2025-07-21 20:56:26,795 — INFO — layer 0, [0:2849], loss=-15.3040
2025-07-21 20:56:26,795 — INFO — layer 0, [0:2849], loss=-11.0955


epoch 0, layer 0:  26%|███████████████████▏                                                     | 2899/11016 [07:22<20:30,  6.60it/s, loss=-18]

2025-07-21 20:56:34,287 — INFO — layer 0, [0:2899], loss=-13.7934
2025-07-21 20:56:34,287 — INFO — layer 0, [0:2899], loss=-9.2546


epoch 0, layer 0:  27%|███████████████████                                                    | 2949/11016 [07:29<21:12,  6.34it/s, loss=-16.9]

2025-07-21 20:56:42,020 — INFO — layer 0, [0:2949], loss=-4.1486
2025-07-21 20:56:42,021 — INFO — layer 0, [0:2949], loss=-9.5815


epoch 0, layer 0:  27%|███████████████████▎                                                   | 2999/11016 [07:37<20:29,  6.52it/s, loss=-7.75]

2025-07-21 20:56:49,834 — INFO — layer 0, [0:2999], loss=-15.4969
2025-07-21 20:56:49,834 — INFO — layer 0, [0:2999], loss=-16.3791


epoch 0, layer 0:  28%|███████████████████▋                                                   | 3049/11016 [07:45<20:10,  6.58it/s, loss=-15.8]

2025-07-21 20:56:57,525 — INFO — layer 0, [0:3049], loss=-2.9985
2025-07-21 20:56:57,525 — INFO — layer 0, [0:3049], loss=-18.2044


epoch 0, layer 0:  28%|███████████████████▉                                                   | 3099/11016 [07:53<20:59,  6.29it/s, loss=-5.95]

2025-07-21 20:57:05,164 — INFO — layer 0, [0:3099], loss=-8.6071
2025-07-21 20:57:05,164 — INFO — layer 0, [0:3099], loss=-22.2515


epoch 0, layer 0:  29%|████████████████████▎                                                  | 3149/11016 [08:00<18:43,  7.00it/s, loss=-2.37]

2025-07-21 20:57:12,672 — INFO — layer 0, [0:3149], loss=-2.0897
2025-07-21 20:57:12,672 — INFO — layer 0, [0:3149], loss=-10.9489


epoch 0, layer 0:  29%|████████████████████▌                                                  | 3199/11016 [08:08<20:18,  6.42it/s, loss=-14.1]

2025-07-21 20:57:20,331 — INFO — layer 0, [0:3199], loss=-13.6267
2025-07-21 20:57:20,331 — INFO — layer 0, [0:3199], loss=-11.4744


epoch 0, layer 0:  29%|████████████████████▉                                                  | 3249/11016 [08:15<18:04,  7.16it/s, loss=-12.6]

2025-07-21 20:57:27,970 — INFO — layer 0, [0:3249], loss=-7.5069
2025-07-21 20:57:27,970 — INFO — layer 0, [0:3249], loss=0.1295


epoch 0, layer 0:  30%|█████████████████████▊                                                   | 3299/11016 [08:23<18:59,  6.77it/s, loss=-16]

2025-07-21 20:57:35,225 — INFO — layer 0, [0:3299], loss=-14.2251
2025-07-21 20:57:35,225 — INFO — layer 0, [0:3299], loss=-19.9261


epoch 0, layer 0:  30%|█████████████████████▌                                                 | 3349/11016 [08:30<19:50,  6.44it/s, loss=-2.97]

2025-07-21 20:57:42,860 — INFO — layer 0, [0:3349], loss=-15.4075
2025-07-21 20:57:42,860 — INFO — layer 0, [0:3349], loss=-7.8524


epoch 0, layer 0:  31%|█████████████████████▉                                                 | 3399/11016 [08:38<18:34,  6.84it/s, loss=-2.79]

2025-07-21 20:57:50,423 — INFO — layer 0, [0:3399], loss=-12.4479
2025-07-21 20:57:50,423 — INFO — layer 0, [0:3399], loss=1.8278


epoch 0, layer 0:  31%|██████████████████████▏                                                | 3449/11016 [08:45<19:21,  6.51it/s, loss=-9.18]

2025-07-21 20:57:57,996 — INFO — layer 0, [0:3449], loss=-12.5939
2025-07-21 20:57:57,996 — INFO — layer 0, [0:3449], loss=-22.3177


epoch 0, layer 0:  32%|██████████████████████▌                                                | 3499/11016 [08:53<18:30,  6.77it/s, loss=-14.3]

2025-07-21 20:58:05,535 — INFO — layer 0, [0:3499], loss=-10.9150
2025-07-21 20:58:05,535 — INFO — layer 0, [0:3499], loss=-12.2100


epoch 0, layer 0:  32%|██████████████████████▊                                                | 3549/11016 [09:01<17:52,  6.96it/s, loss=-15.6]

2025-07-21 20:58:13,114 — INFO — layer 0, [0:3549], loss=-12.7778
2025-07-21 20:58:13,114 — INFO — layer 0, [0:3549], loss=-21.3458


epoch 0, layer 0:  33%|███████████████████████▏                                               | 3599/11016 [09:08<20:36,  6.00it/s, loss=-7.95]

2025-07-21 20:58:20,850 — INFO — layer 0, [0:3599], loss=-17.4790
2025-07-21 20:58:20,851 — INFO — layer 0, [0:3599], loss=-11.2513


epoch 0, layer 0:  33%|███████████████████████▌                                               | 3649/11016 [09:16<18:59,  6.47it/s, loss=-6.66]

2025-07-21 20:58:28,426 — INFO — layer 0, [0:3649], loss=-10.5222
2025-07-21 20:58:28,426 — INFO — layer 0, [0:3649], loss=-4.0217


epoch 0, layer 0:  34%|████████████████████████▌                                                | 3699/11016 [09:24<18:34,  6.56it/s, loss=-19]

2025-07-21 20:58:36,110 — INFO — layer 0, [0:3699], loss=-5.7031
2025-07-21 20:58:36,110 — INFO — layer 0, [0:3699], loss=-12.4091


epoch 0, layer 0:  34%|████████████████████████▏                                              | 3749/11016 [09:31<17:13,  7.03it/s, loss=-15.2]

2025-07-21 20:58:43,740 — INFO — layer 0, [0:3749], loss=-6.6158
2025-07-21 20:58:43,740 — INFO — layer 0, [0:3749], loss=-9.1349


epoch 0, layer 0:  34%|████████████████████████▍                                              | 3799/11016 [09:39<17:17,  6.96it/s, loss=-17.4]

2025-07-21 20:58:51,390 — INFO — layer 0, [0:3799], loss=-12.6042
2025-07-21 20:58:51,390 — INFO — layer 0, [0:3799], loss=-7.3105


epoch 0, layer 0:  35%|█████████████████████████▌                                               | 3849/11016 [09:46<18:15,  6.54it/s, loss=-17]

2025-07-21 20:58:58,983 — INFO — layer 0, [0:3849], loss=-10.0005
2025-07-21 20:58:58,983 — INFO — layer 0, [0:3849], loss=-16.0614


epoch 0, layer 0:  35%|█████████████████████████▏                                             | 3899/11016 [09:54<18:39,  6.36it/s, loss=-19.5]

2025-07-21 20:59:06,603 — INFO — layer 0, [0:3899], loss=-2.7650
2025-07-21 20:59:06,603 — INFO — layer 0, [0:3899], loss=-1.3662


epoch 0, layer 0:  36%|█████████████████████████▍                                             | 3949/11016 [10:02<18:04,  6.51it/s, loss=-22.3]

2025-07-21 20:59:14,244 — INFO — layer 0, [0:3949], loss=-13.4746
2025-07-21 20:59:14,244 — INFO — layer 0, [0:3949], loss=-19.4654


epoch 0, layer 0:  36%|█████████████████████████▊                                             | 3999/11016 [10:09<18:05,  6.47it/s, loss=-11.5]

2025-07-21 20:59:21,713 — INFO — layer 0, [0:3999], loss=-9.7844
2025-07-21 20:59:21,713 — INFO — layer 0, [0:3999], loss=-14.5092


epoch 0, layer 0:  37%|██████████████████████████                                             | 4049/11016 [10:17<17:08,  6.78it/s, loss=-11.9]

2025-07-21 20:59:29,287 — INFO — layer 0, [0:4049], loss=-15.9238
2025-07-21 20:59:29,287 — INFO — layer 0, [0:4049], loss=-15.2160


epoch 0, layer 0:  37%|██████████████████████████▍                                            | 4099/11016 [10:24<17:12,  6.70it/s, loss=-18.3]

2025-07-21 20:59:37,022 — INFO — layer 0, [0:4099], loss=-13.7633
2025-07-21 20:59:37,022 — INFO — layer 0, [0:4099], loss=-12.5566


epoch 0, layer 0:  38%|██████████████████████████▋                                            | 4149/11016 [10:32<17:32,  6.53it/s, loss=-5.55]

2025-07-21 20:59:44,860 — INFO — layer 0, [0:4149], loss=-5.8500
2025-07-21 20:59:44,860 — INFO — layer 0, [0:4149], loss=-14.0743


epoch 0, layer 0:  38%|███████████████████████████                                            | 4199/11016 [10:40<17:12,  6.60it/s, loss=-9.24]

2025-07-21 20:59:52,549 — INFO — layer 0, [0:4199], loss=-13.9087
2025-07-21 20:59:52,549 — INFO — layer 0, [0:4199], loss=-3.0192


epoch 0, layer 0:  39%|███████████████████████████▍                                           | 4249/11016 [10:48<16:35,  6.80it/s, loss=-9.22]

2025-07-21 21:00:00,226 — INFO — layer 0, [0:4249], loss=-9.4635
2025-07-21 21:00:00,226 — INFO — layer 0, [0:4249], loss=-19.4267


epoch 0, layer 0:  39%|███████████████████████████▋                                           | 4299/11016 [10:55<17:00,  6.58it/s, loss=-17.3]

2025-07-21 21:00:07,821 — INFO — layer 0, [0:4299], loss=-12.9812
2025-07-21 21:00:07,821 — INFO — layer 0, [0:4299], loss=-12.0852


epoch 0, layer 0:  39%|████████████████████████████                                           | 4349/11016 [11:03<16:04,  6.91it/s, loss=-19.8]

2025-07-21 21:00:15,453 — INFO — layer 0, [0:4349], loss=-20.5671
2025-07-21 21:00:15,453 — INFO — layer 0, [0:4349], loss=-15.0027


epoch 0, layer 0:  40%|████████████████████████████▎                                          | 4399/11016 [11:11<17:43,  6.22it/s, loss=-17.1]

2025-07-21 21:00:23,234 — INFO — layer 0, [0:4399], loss=-10.8964
2025-07-21 21:00:23,234 — INFO — layer 0, [0:4399], loss=-9.4736


epoch 0, layer 0:  40%|████████████████████████████▋                                          | 4449/11016 [11:18<16:15,  6.73it/s, loss=-10.2]

2025-07-21 21:00:30,910 — INFO — layer 0, [0:4449], loss=-12.6622
2025-07-21 21:00:30,910 — INFO — layer 0, [0:4449], loss=-12.2711


epoch 0, layer 0:  41%|████████████████████████████▉                                          | 4499/11016 [11:26<16:40,  6.51it/s, loss=-14.9]

2025-07-21 21:00:38,644 — INFO — layer 0, [0:4499], loss=-6.2669
2025-07-21 21:00:38,644 — INFO — layer 0, [0:4499], loss=-19.9318


epoch 0, layer 0:  41%|█████████████████████████████▎                                         | 4549/11016 [11:34<16:23,  6.58it/s, loss=-9.33]

2025-07-21 21:00:46,195 — INFO — layer 0, [0:4549], loss=-15.4605
2025-07-21 21:00:46,195 — INFO — layer 0, [0:4549], loss=-12.6579


epoch 0, layer 0:  42%|█████████████████████████████▋                                         | 4599/11016 [11:41<16:26,  6.50it/s, loss=-13.2]

2025-07-21 21:00:53,945 — INFO — layer 0, [0:4599], loss=-15.9155
2025-07-21 21:00:53,945 — INFO — layer 0, [0:4599], loss=-15.0845


epoch 0, layer 0:  42%|█████████████████████████████▉                                         | 4649/11016 [11:49<15:54,  6.67it/s, loss=-10.2]

2025-07-21 21:01:01,408 — INFO — layer 0, [0:4649], loss=-8.0988
2025-07-21 21:01:01,408 — INFO — layer 0, [0:4649], loss=-17.1070


epoch 0, layer 0:  43%|██████████████████████████████▎                                        | 4699/11016 [11:56<16:35,  6.35it/s, loss=-13.5]

2025-07-21 21:01:09,032 — INFO — layer 0, [0:4699], loss=-9.4913
2025-07-21 21:01:09,032 — INFO — layer 0, [0:4699], loss=-6.1224


epoch 0, layer 0:  43%|██████████████████████████████▌                                        | 4749/11016 [12:04<15:10,  6.88it/s, loss=-13.8]

2025-07-21 21:01:16,468 — INFO — layer 0, [0:4749], loss=-19.1602
2025-07-21 21:01:16,468 — INFO — layer 0, [0:4749], loss=-12.1822


epoch 0, layer 0:  44%|██████████████████████████████▉                                        | 4799/11016 [12:11<16:06,  6.43it/s, loss=-20.7]

2025-07-21 21:01:24,042 — INFO — layer 0, [0:4799], loss=-12.9506
2025-07-21 21:01:24,042 — INFO — layer 0, [0:4799], loss=-10.1203


epoch 0, layer 0:  44%|███████████████████████████████▎                                       | 4849/11016 [12:19<15:15,  6.73it/s, loss=-7.68]

2025-07-21 21:01:31,560 — INFO — layer 0, [0:4849], loss=-10.1882
2025-07-21 21:01:31,560 — INFO — layer 0, [0:4849], loss=-21.0461


epoch 0, layer 0:  44%|███████████████████████████████▌                                       | 4899/11016 [12:27<15:23,  6.62it/s, loss=-19.8]

2025-07-21 21:01:39,077 — INFO — layer 0, [0:4899], loss=-14.9353
2025-07-21 21:01:39,077 — INFO — layer 0, [0:4899], loss=-22.3890


epoch 0, layer 0:  45%|███████████████████████████████▉                                       | 4949/11016 [12:34<15:58,  6.33it/s, loss=-12.8]

2025-07-21 21:01:46,765 — INFO — layer 0, [0:4949], loss=-14.4588
2025-07-21 21:01:46,766 — INFO — layer 0, [0:4949], loss=-20.7803


epoch 0, layer 0:  45%|████████████████████████████████▏                                      | 4999/11016 [12:42<15:24,  6.51it/s, loss=-13.9]

2025-07-21 21:01:54,416 — INFO — layer 0, [0:4999], loss=-9.5919
2025-07-21 21:01:54,416 — INFO — layer 0, [0:4999], loss=-5.3757


epoch 0, layer 0:  46%|████████████████████████████████▌                                      | 5049/11016 [12:49<15:08,  6.57it/s, loss=-10.3]

2025-07-21 21:02:01,950 — INFO — layer 0, [0:5049], loss=-1.7678
2025-07-21 21:02:01,950 — INFO — layer 0, [0:5049], loss=-13.1594


epoch 0, layer 0:  46%|████████████████████████████████▊                                      | 5099/11016 [12:57<14:25,  6.83it/s, loss=-12.8]

2025-07-21 21:02:09,511 — INFO — layer 0, [0:5099], loss=-17.5151
2025-07-21 21:02:09,512 — INFO — layer 0, [0:5099], loss=-15.6546


epoch 0, layer 0:  47%|█████████████████████████████████▏                                     | 5149/11016 [13:05<14:47,  6.61it/s, loss=-6.85]

2025-07-21 21:02:17,094 — INFO — layer 0, [0:5149], loss=-14.1211
2025-07-21 21:02:17,094 — INFO — layer 0, [0:5149], loss=-9.3100


epoch 0, layer 0:  47%|█████████████████████████████████▌                                     | 5199/11016 [13:12<14:17,  6.78it/s, loss=-17.8]

2025-07-21 21:02:24,751 — INFO — layer 0, [0:5199], loss=-14.4026
2025-07-21 21:02:24,751 — INFO — layer 0, [0:5199], loss=-2.6302


epoch 0, layer 0:  48%|█████████████████████████████████▊                                     | 5249/11016 [13:20<14:24,  6.67it/s, loss=-12.3]

2025-07-21 21:02:32,394 — INFO — layer 0, [0:5249], loss=-19.0563
2025-07-21 21:02:32,394 — INFO — layer 0, [0:5249], loss=-6.5274


epoch 0, layer 0:  48%|██████████████████████████████████▏                                    | 5299/11016 [13:27<14:15,  6.68it/s, loss=-16.8]

2025-07-21 21:02:39,945 — INFO — layer 0, [0:5299], loss=-11.5960
2025-07-21 21:02:39,945 — INFO — layer 0, [0:5299], loss=-20.4267


epoch 0, layer 0:  49%|██████████████████████████████████▍                                    | 5349/11016 [13:35<13:36,  6.94it/s, loss=-14.6]

2025-07-21 21:02:47,579 — INFO — layer 0, [0:5349], loss=-10.2725
2025-07-21 21:02:47,579 — INFO — layer 0, [0:5349], loss=-6.8759


epoch 0, layer 0:  49%|██████████████████████████████████▊                                    | 5399/11016 [13:43<13:42,  6.83it/s, loss=-18.1]

2025-07-21 21:02:55,113 — INFO — layer 0, [0:5399], loss=-13.4966
2025-07-21 21:02:55,113 — INFO — layer 0, [0:5399], loss=-10.8013


epoch 0, layer 0:  49%|███████████████████████████████████                                    | 5449/11016 [13:50<14:19,  6.48it/s, loss=-9.69]

2025-07-21 21:03:02,716 — INFO — layer 0, [0:5449], loss=-9.6845
2025-07-21 21:03:02,716 — INFO — layer 0, [0:5449], loss=-17.7750


epoch 0, layer 0:  50%|███████████████████████████████████▍                                   | 5499/11016 [13:58<13:50,  6.64it/s, loss=-6.15]

2025-07-21 21:03:10,253 — INFO — layer 0, [0:5499], loss=-23.2048
2025-07-21 21:03:10,253 — INFO — layer 0, [0:5499], loss=-28.4997


epoch 0, layer 0:  50%|███████████████████████████████████▊                                   | 5549/11016 [14:05<13:37,  6.69it/s, loss=-4.58]

2025-07-21 21:03:17,941 — INFO — layer 0, [0:5549], loss=-15.2343
2025-07-21 21:03:17,941 — INFO — layer 0, [0:5549], loss=-15.7336


epoch 0, layer 0:  51%|████████████████████████████████████                                   | 5599/11016 [14:13<13:42,  6.59it/s, loss=-12.5]

2025-07-21 21:03:25,447 — INFO — layer 0, [0:5599], loss=-6.9528
2025-07-21 21:03:25,447 — INFO — layer 0, [0:5599], loss=-10.4583


epoch 0, layer 0:  51%|████████████████████████████████████▍                                  | 5649/11016 [14:20<13:36,  6.57it/s, loss=-10.1]

2025-07-21 21:03:32,966 — INFO — layer 0, [0:5649], loss=-5.8493
2025-07-21 21:03:32,966 — INFO — layer 0, [0:5649], loss=-9.9684


epoch 0, layer 0:  52%|█████████████████████████████████████▏                                  | 5699/11016 [14:28<15:20,  5.78it/s, loss=-7.5]

2025-07-21 21:03:40,593 — INFO — layer 0, [0:5699], loss=-9.2680
2025-07-21 21:03:40,593 — INFO — layer 0, [0:5699], loss=-17.1676


epoch 0, layer 0:  52%|█████████████████████████████████████                                  | 5749/11016 [14:36<12:21,  7.10it/s, loss=-13.6]

2025-07-21 21:03:48,078 — INFO — layer 0, [0:5749], loss=-13.7308
2025-07-21 21:03:48,078 — INFO — layer 0, [0:5749], loss=-8.7830


epoch 0, layer 0:  53%|█████████████████████████████████████▍                                 | 5799/11016 [14:43<13:43,  6.33it/s, loss=-5.66]

2025-07-21 21:03:55,964 — INFO — layer 0, [0:5799], loss=-7.4259
2025-07-21 21:03:55,964 — INFO — layer 0, [0:5799], loss=-8.8163


epoch 0, layer 0:  53%|█████████████████████████████████████▋                                 | 5849/11016 [14:51<13:13,  6.51it/s, loss=-1.79]

2025-07-21 21:04:03,563 — INFO — layer 0, [0:5849], loss=-14.7411
2025-07-21 21:04:03,563 — INFO — layer 0, [0:5849], loss=-2.3650


epoch 0, layer 0:  54%|██████████████████████████████████████                                 | 5899/11016 [14:59<13:09,  6.48it/s, loss=-7.18]

2025-07-21 21:04:11,213 — INFO — layer 0, [0:5899], loss=-5.0186
2025-07-21 21:04:11,213 — INFO — layer 0, [0:5899], loss=-14.5544


epoch 0, layer 0:  54%|██████████████████████████████████████▎                                | 5949/11016 [15:06<12:38,  6.68it/s, loss=-14.5]

2025-07-21 21:04:18,625 — INFO — layer 0, [0:5949], loss=-24.3937
2025-07-21 21:04:18,625 — INFO — layer 0, [0:5949], loss=-9.9066


epoch 0, layer 0:  54%|██████████████████████████████████████▋                                | 5999/11016 [15:14<12:08,  6.89it/s, loss=-18.7]

2025-07-21 21:04:26,335 — INFO — layer 0, [0:5999], loss=-6.4586
2025-07-21 21:04:26,335 — INFO — layer 0, [0:5999], loss=-15.0461


epoch 0, layer 0:  55%|██████████████████████████████████████▉                                | 6049/11016 [15:21<12:28,  6.64it/s, loss=-13.3]

2025-07-21 21:04:33,905 — INFO — layer 0, [0:6049], loss=-13.6451
2025-07-21 21:04:33,905 — INFO — layer 0, [0:6049], loss=-2.2312


epoch 0, layer 0:  55%|███████████████████████████████████████▎                               | 6099/11016 [15:29<11:30,  7.12it/s, loss=-16.5]

2025-07-21 21:04:41,470 — INFO — layer 0, [0:6099], loss=-9.5608
2025-07-21 21:04:41,470 — INFO — layer 0, [0:6099], loss=-2.1598


epoch 0, layer 0:  56%|███████████████████████████████████████▋                               | 6149/11016 [15:37<12:29,  6.49it/s, loss=-10.4]

2025-07-21 21:04:49,117 — INFO — layer 0, [0:6149], loss=-23.4197
2025-07-21 21:04:49,117 — INFO — layer 0, [0:6149], loss=-6.1954


epoch 0, layer 0:  56%|███████████████████████████████████████▉                               | 6199/11016 [15:44<12:22,  6.49it/s, loss=-14.8]

2025-07-21 21:04:56,629 — INFO — layer 0, [0:6199], loss=-16.0898
2025-07-21 21:04:56,629 — INFO — layer 0, [0:6199], loss=-8.0539


epoch 0, layer 0:  57%|████████████████████████████████████████▎                              | 6249/11016 [15:52<11:50,  6.70it/s, loss=-8.61]

2025-07-21 21:05:04,389 — INFO — layer 0, [0:6249], loss=-16.2080
2025-07-21 21:05:04,389 — INFO — layer 0, [0:6249], loss=-13.3312


epoch 0, layer 0:  57%|████████████████████████████████████████▌                              | 6299/11016 [15:59<11:28,  6.85it/s, loss=-2.63]

2025-07-21 21:05:11,930 — INFO — layer 0, [0:6299], loss=-17.8794
2025-07-21 21:05:11,930 — INFO — layer 0, [0:6299], loss=-25.7899


epoch 0, layer 0:  58%|████████████████████████████████████████▉                              | 6349/11016 [16:07<11:16,  6.89it/s, loss=-19.3]

2025-07-21 21:05:19,525 — INFO — layer 0, [0:6349], loss=-8.4253
2025-07-21 21:05:19,525 — INFO — layer 0, [0:6349], loss=-16.2157


epoch 0, layer 0:  58%|█████████████████████████████████████████▏                             | 6399/11016 [16:15<12:03,  6.38it/s, loss=-14.9]

2025-07-21 21:05:27,305 — INFO — layer 0, [0:6399], loss=-14.3421
2025-07-21 21:05:27,305 — INFO — layer 0, [0:6399], loss=-24.7496


epoch 0, layer 0:  59%|█████████████████████████████████████████▌                             | 6449/11016 [16:22<11:17,  6.74it/s, loss=-7.76]

2025-07-21 21:05:34,889 — INFO — layer 0, [0:6449], loss=-17.1794
2025-07-21 21:05:34,889 — INFO — layer 0, [0:6449], loss=-10.5296


epoch 0, layer 0:  59%|█████████████████████████████████████████▉                             | 6499/11016 [16:30<12:05,  6.23it/s, loss=-12.3]

2025-07-21 21:05:42,489 — INFO — layer 0, [0:6499], loss=-9.8385
2025-07-21 21:05:42,489 — INFO — layer 0, [0:6499], loss=-8.7199


epoch 0, layer 0:  59%|██████████████████████████████████████████▏                            | 6549/11016 [16:38<11:17,  6.59it/s, loss=-18.4]

2025-07-21 21:05:50,142 — INFO — layer 0, [0:6549], loss=-10.2951
2025-07-21 21:05:50,142 — INFO — layer 0, [0:6549], loss=-2.9689


epoch 0, layer 0:  60%|██████████████████████████████████████████▌                            | 6599/11016 [16:45<11:03,  6.66it/s, loss=-6.55]

2025-07-21 21:05:57,749 — INFO — layer 0, [0:6599], loss=-11.7948
2025-07-21 21:05:57,749 — INFO — layer 0, [0:6599], loss=-11.1341


epoch 0, layer 0:  60%|██████████████████████████████████████████▊                            | 6649/11016 [16:53<11:00,  6.62it/s, loss=-15.4]

2025-07-21 21:06:05,318 — INFO — layer 0, [0:6649], loss=-4.8696
2025-07-21 21:06:05,318 — INFO — layer 0, [0:6649], loss=-17.3523


epoch 0, layer 0:  61%|███████████████████████████████████████████▏                           | 6699/11016 [17:00<11:20,  6.35it/s, loss=-15.6]

2025-07-21 21:06:12,962 — INFO — layer 0, [0:6699], loss=-25.0303
2025-07-21 21:06:12,962 — INFO — layer 0, [0:6699], loss=-11.6429


epoch 0, layer 0:  61%|███████████████████████████████████████████▍                           | 6749/11016 [17:08<10:54,  6.52it/s, loss=-10.8]

2025-07-21 21:06:20,504 — INFO — layer 0, [0:6749], loss=-7.1678
2025-07-21 21:06:20,504 — INFO — layer 0, [0:6749], loss=-23.0017


epoch 0, layer 0:  62%|███████████████████████████████████████████▊                           | 6799/11016 [17:15<10:15,  6.85it/s, loss=-7.86]

2025-07-21 21:06:27,970 — INFO — layer 0, [0:6799], loss=-10.3041
2025-07-21 21:06:27,970 — INFO — layer 0, [0:6799], loss=-16.6643


epoch 0, layer 0:  62%|████████████████████████████████████████████▏                          | 6849/11016 [17:23<10:22,  6.69it/s, loss=-13.7]

2025-07-21 21:06:35,564 — INFO — layer 0, [0:6849], loss=-16.6550
2025-07-21 21:06:35,564 — INFO — layer 0, [0:6849], loss=-13.2295


epoch 0, layer 0:  63%|████████████████████████████████████████████▍                          | 6899/11016 [17:31<10:31,  6.52it/s, loss=-20.8]

2025-07-21 21:06:43,223 — INFO — layer 0, [0:6899], loss=-13.8490
2025-07-21 21:06:43,223 — INFO — layer 0, [0:6899], loss=-16.9751


epoch 0, layer 0:  63%|████████████████████████████████████████████▊                          | 6949/11016 [17:38<10:11,  6.65it/s, loss=-8.56]

2025-07-21 21:06:50,818 — INFO — layer 0, [0:6949], loss=-13.7873
2025-07-21 21:06:50,818 — INFO — layer 0, [0:6949], loss=-16.8297


epoch 0, layer 0:  64%|█████████████████████████████████████████████                          | 6999/11016 [17:46<10:37,  6.30it/s, loss=-23.9]

2025-07-21 21:06:58,413 — INFO — layer 0, [0:6999], loss=-15.4472
2025-07-21 21:06:58,413 — INFO — layer 0, [0:6999], loss=-1.4895


epoch 0, layer 0:  64%|█████████████████████████████████████████████▍                         | 7049/11016 [17:54<10:47,  6.12it/s, loss=-18.7]

2025-07-21 21:07:06,061 — INFO — layer 0, [0:7049], loss=-11.7563
2025-07-21 21:07:06,061 — INFO — layer 0, [0:7049], loss=-7.5177


epoch 0, layer 0:  64%|█████████████████████████████████████████████▊                         | 7099/11016 [18:01<09:37,  6.78it/s, loss=-4.61]

2025-07-21 21:07:13,736 — INFO — layer 0, [0:7099], loss=-16.5380
2025-07-21 21:07:13,736 — INFO — layer 0, [0:7099], loss=-17.5756


epoch 0, layer 0:  65%|██████████████████████████████████████████████                         | 7149/11016 [18:09<10:15,  6.28it/s, loss=-22.6]

2025-07-21 21:07:21,373 — INFO — layer 0, [0:7149], loss=-9.1658
2025-07-21 21:07:21,373 — INFO — layer 0, [0:7149], loss=-15.2090


epoch 0, layer 0:  65%|██████████████████████████████████████████████▍                        | 7199/11016 [18:16<09:08,  6.95it/s, loss=-11.4]

2025-07-21 21:07:28,844 — INFO — layer 0, [0:7199], loss=-4.9776
2025-07-21 21:07:28,844 — INFO — layer 0, [0:7199], loss=-12.2081


epoch 0, layer 0:  66%|██████████████████████████████████████████████▋                        | 7249/11016 [18:24<09:01,  6.96it/s, loss=-9.34]

2025-07-21 21:07:36,479 — INFO — layer 0, [0:7249], loss=-3.6092
2025-07-21 21:07:36,479 — INFO — layer 0, [0:7249], loss=-11.3220


epoch 0, layer 0:  66%|███████████████████████████████████████████████                        | 7299/11016 [18:32<09:40,  6.40it/s, loss=-19.6]

2025-07-21 21:07:44,157 — INFO — layer 0, [0:7299], loss=-6.2500
2025-07-21 21:07:44,157 — INFO — layer 0, [0:7299], loss=-7.2886


epoch 0, layer 0:  67%|███████████████████████████████████████████████▎                       | 7349/11016 [18:39<09:06,  6.71it/s, loss=-7.69]

2025-07-21 21:07:51,665 — INFO — layer 0, [0:7349], loss=-13.8495
2025-07-21 21:07:51,665 — INFO — layer 0, [0:7349], loss=-19.1316


epoch 0, layer 0:  67%|███████████████████████████████████████████████▋                       | 7399/11016 [18:47<08:39,  6.97it/s, loss=-19.6]

2025-07-21 21:07:59,349 — INFO — layer 0, [0:7399], loss=-21.7071
2025-07-21 21:07:59,349 — INFO — layer 0, [0:7399], loss=-2.5774


epoch 0, layer 0:  68%|████████████████████████████████████████████████                       | 7449/11016 [18:54<08:52,  6.70it/s, loss=-7.77]

2025-07-21 21:08:06,934 — INFO — layer 0, [0:7449], loss=-6.5100
2025-07-21 21:08:06,934 — INFO — layer 0, [0:7449], loss=-10.9707


epoch 0, layer 0:  68%|█████████████████████████████████████████████████                       | 7499/11016 [19:02<09:04,  6.46it/s, loss=-9.8]

2025-07-21 21:08:14,615 — INFO — layer 0, [0:7499], loss=-10.8481
2025-07-21 21:08:14,615 — INFO — layer 0, [0:7499], loss=-17.9060


epoch 0, layer 0:  69%|████████████████████████████████████████████████▋                      | 7549/11016 [19:10<08:37,  6.69it/s, loss=-17.1]

2025-07-21 21:08:22,185 — INFO — layer 0, [0:7549], loss=-6.8668
2025-07-21 21:08:22,185 — INFO — layer 0, [0:7549], loss=-4.7546


epoch 0, layer 0:  69%|████████████████████████████████████████████████▉                      | 7599/11016 [19:17<08:54,  6.39it/s, loss=-21.6]

2025-07-21 21:08:29,613 — INFO — layer 0, [0:7599], loss=-14.3113
2025-07-21 21:08:29,614 — INFO — layer 0, [0:7599], loss=-18.1773


epoch 0, layer 0:  69%|█████████████████████████████████████████████████▎                     | 7649/11016 [19:25<08:46,  6.40it/s, loss=-10.3]

2025-07-21 21:08:37,351 — INFO — layer 0, [0:7649], loss=-14.8477
2025-07-21 21:08:37,351 — INFO — layer 0, [0:7649], loss=-14.7391


epoch 0, layer 0:  70%|█████████████████████████████████████████████████▌                     | 7699/11016 [19:32<08:32,  6.48it/s, loss=-16.8]

2025-07-21 21:08:45,036 — INFO — layer 0, [0:7699], loss=-23.9552
2025-07-21 21:08:45,036 — INFO — layer 0, [0:7699], loss=-14.2948


epoch 0, layer 0:  70%|█████████████████████████████████████████████████▉                     | 7749/11016 [19:40<08:35,  6.33it/s, loss=-10.2]

2025-07-21 21:08:52,708 — INFO — layer 0, [0:7749], loss=-12.0046
2025-07-21 21:08:52,708 — INFO — layer 0, [0:7749], loss=-10.2457


epoch 0, layer 0:  71%|██████████████████████████████████████████████████▎                    | 7799/11016 [19:48<08:13,  6.51it/s, loss=-19.9]

2025-07-21 21:09:00,405 — INFO — layer 0, [0:7799], loss=-14.8594
2025-07-21 21:09:00,406 — INFO — layer 0, [0:7799], loss=-15.8016


epoch 0, layer 0:  71%|██████████████████████████████████████████████████▌                    | 7849/11016 [19:55<07:36,  6.93it/s, loss=-9.69]

2025-07-21 21:09:07,936 — INFO — layer 0, [0:7849], loss=-16.1186
2025-07-21 21:09:07,936 — INFO — layer 0, [0:7849], loss=-13.5504


epoch 0, layer 0:  72%|██████████████████████████████████████████████████▉                    | 7899/11016 [20:03<07:51,  6.61it/s, loss=-4.12]

2025-07-21 21:09:15,475 — INFO — layer 0, [0:7899], loss=-19.7375
2025-07-21 21:09:15,475 — INFO — layer 0, [0:7899], loss=-19.5049


epoch 0, layer 0:  72%|███████████████████████████████████████████████████▏                   | 7949/11016 [20:11<07:51,  6.51it/s, loss=-25.9]

2025-07-21 21:09:23,107 — INFO — layer 0, [0:7949], loss=-10.2069
2025-07-21 21:09:23,107 — INFO — layer 0, [0:7949], loss=-7.9635


epoch 0, layer 0:  73%|███████████████████████████████████████████████████▌                   | 7999/11016 [20:18<07:45,  6.48it/s, loss=-7.24]

2025-07-21 21:09:30,769 — INFO — layer 0, [0:7999], loss=-16.4529
2025-07-21 21:09:30,770 — INFO — layer 0, [0:7999], loss=-14.9904


epoch 0, layer 0:  73%|███████████████████████████████████████████████████▉                   | 8049/11016 [20:26<07:17,  6.78it/s, loss=-4.07]

2025-07-21 21:09:38,425 — INFO — layer 0, [0:8049], loss=-17.1480
2025-07-21 21:09:38,425 — INFO — layer 0, [0:8049], loss=-21.2536


epoch 0, layer 0:  74%|████████████████████████████████████████████████████▏                  | 8099/11016 [20:33<07:12,  6.74it/s, loss=-9.38]

2025-07-21 21:09:46,028 — INFO — layer 0, [0:8099], loss=-12.7194
2025-07-21 21:09:46,028 — INFO — layer 0, [0:8099], loss=-27.7869


epoch 0, layer 0:  74%|████████████████████████████████████████████████████▌                  | 8149/11016 [20:41<07:36,  6.28it/s, loss=-14.4]

2025-07-21 21:09:53,658 — INFO — layer 0, [0:8149], loss=-16.7985
2025-07-21 21:09:53,658 — INFO — layer 0, [0:8149], loss=-17.0518


epoch 0, layer 0:  74%|████████████████████████████████████████████████████▊                  | 8199/11016 [20:49<07:02,  6.67it/s, loss=-10.3]

2025-07-21 21:10:01,242 — INFO — layer 0, [0:8199], loss=-11.9319
2025-07-21 21:10:01,243 — INFO — layer 0, [0:8199], loss=-8.5397


epoch 0, layer 0:  75%|█████████████████████████████████████████████████████▏                 | 8249/11016 [20:56<07:01,  6.57it/s, loss=-10.8]

2025-07-21 21:10:08,958 — INFO — layer 0, [0:8249], loss=-12.6081
2025-07-21 21:10:08,958 — INFO — layer 0, [0:8249], loss=-7.8873


epoch 0, layer 0:  75%|█████████████████████████████████████████████████████▍                 | 8299/11016 [21:04<06:56,  6.52it/s, loss=-20.3]

2025-07-21 21:10:16,496 — INFO — layer 0, [0:8299], loss=-13.4397
2025-07-21 21:10:16,496 — INFO — layer 0, [0:8299], loss=-3.5651


epoch 0, layer 0:  76%|█████████████████████████████████████████████████████▊                 | 8349/11016 [21:12<07:04,  6.29it/s, loss=-14.9]

2025-07-21 21:10:24,157 — INFO — layer 0, [0:8349], loss=-11.2643
2025-07-21 21:10:24,157 — INFO — layer 0, [0:8349], loss=-17.5308


epoch 0, layer 0:  76%|██████████████████████████████████████████████████████▏                | 8399/11016 [21:19<06:47,  6.42it/s, loss=-11.5]

2025-07-21 21:10:31,607 — INFO — layer 0, [0:8399], loss=-13.1623
2025-07-21 21:10:31,607 — INFO — layer 0, [0:8399], loss=-20.8823


epoch 0, layer 0:  77%|██████████████████████████████████████████████████████▍                | 8449/11016 [21:27<06:57,  6.16it/s, loss=-10.1]

2025-07-21 21:10:39,139 — INFO — layer 0, [0:8449], loss=-13.6303
2025-07-21 21:10:39,139 — INFO — layer 0, [0:8449], loss=-12.9303


epoch 0, layer 0:  77%|██████████████████████████████████████████████████████▊                | 8499/11016 [21:34<06:48,  6.16it/s, loss=-11.2]

2025-07-21 21:10:46,772 — INFO — layer 0, [0:8499], loss=-13.8104
2025-07-21 21:10:46,772 — INFO — layer 0, [0:8499], loss=-7.1897


epoch 0, layer 0:  78%|███████████████████████████████████████████████████████                | 8549/11016 [21:42<06:21,  6.47it/s, loss=-15.4]

2025-07-21 21:10:54,276 — INFO — layer 0, [0:8549], loss=-7.5615
2025-07-21 21:10:54,277 — INFO — layer 0, [0:8549], loss=-16.2370


epoch 0, layer 0:  78%|███████████████████████████████████████████████████████▍               | 8599/11016 [21:49<06:07,  6.58it/s, loss=-13.1]

2025-07-21 21:11:01,871 — INFO — layer 0, [0:8599], loss=-9.9802
2025-07-21 21:11:01,871 — INFO — layer 0, [0:8599], loss=-11.2910


epoch 0, layer 0:  79%|███████████████████████████████████████████████████████▋               | 8649/11016 [21:57<05:42,  6.92it/s, loss=-13.6]

2025-07-21 21:11:09,461 — INFO — layer 0, [0:8649], loss=-11.4570
2025-07-21 21:11:09,461 — INFO — layer 0, [0:8649], loss=-9.3421


epoch 0, layer 0:  79%|████████████████████████████████████████████████████████               | 8699/11016 [22:05<06:23,  6.04it/s, loss=-18.5]

2025-07-21 21:11:17,098 — INFO — layer 0, [0:8699], loss=-7.8271
2025-07-21 21:11:17,098 — INFO — layer 0, [0:8699], loss=-3.9270


epoch 0, layer 0:  79%|█████████████████████████████████████████████████████████▉               | 8749/11016 [22:12<05:46,  6.54it/s, loss=-14]

2025-07-21 21:11:24,754 — INFO — layer 0, [0:8749], loss=-15.9534
2025-07-21 21:11:24,754 — INFO — layer 0, [0:8749], loss=-17.2270


epoch 0, layer 0:  80%|████████████████████████████████████████████████████████▋              | 8799/11016 [22:20<05:34,  6.63it/s, loss=0.197]

2025-07-21 21:11:32,308 — INFO — layer 0, [0:8799], loss=-7.5677
2025-07-21 21:11:32,309 — INFO — layer 0, [0:8799], loss=-13.5502


epoch 0, layer 0:  80%|█████████████████████████████████████████████████████████              | 8849/11016 [22:27<05:13,  6.92it/s, loss=-14.1]

2025-07-21 21:11:39,830 — INFO — layer 0, [0:8849], loss=-11.1386
2025-07-21 21:11:39,830 — INFO — layer 0, [0:8849], loss=-11.0811


epoch 0, layer 0:  81%|█████████████████████████████████████████████████████████▎             | 8899/11016 [22:35<05:21,  6.59it/s, loss=-15.3]

2025-07-21 21:11:47,549 — INFO — layer 0, [0:8899], loss=-9.3188
2025-07-21 21:11:47,549 — INFO — layer 0, [0:8899], loss=-11.8530


epoch 0, layer 0:  81%|█████████████████████████████████████████████████████████▋             | 8949/11016 [22:42<05:08,  6.71it/s, loss=-14.4]

2025-07-21 21:11:54,948 — INFO — layer 0, [0:8949], loss=-13.9477
2025-07-21 21:11:54,948 — INFO — layer 0, [0:8949], loss=-4.0761


epoch 0, layer 0:  82%|██████████████████████████████████████████████████████████             | 8999/11016 [22:50<05:23,  6.23it/s, loss=-22.3]

2025-07-21 21:12:02,515 — INFO — layer 0, [0:8999], loss=-17.6593
2025-07-21 21:12:02,515 — INFO — layer 0, [0:8999], loss=-23.2509


epoch 0, layer 0:  82%|██████████████████████████████████████████████████████████▎            | 9049/11016 [22:58<04:59,  6.57it/s, loss=-6.15]

2025-07-21 21:12:10,203 — INFO — layer 0, [0:9049], loss=-13.1929
2025-07-21 21:12:10,203 — INFO — layer 0, [0:9049], loss=-24.4051


epoch 0, layer 0:  83%|██████████████████████████████████████████████████████████▋            | 9099/11016 [23:05<05:40,  5.63it/s, loss=-15.2]

2025-07-21 21:12:17,758 — INFO — layer 0, [0:9099], loss=-16.1241
2025-07-21 21:12:17,758 — INFO — layer 0, [0:9099], loss=-7.1454


epoch 0, layer 0:  83%|██████████████████████████████████████████████████████████▉            | 9149/11016 [23:13<04:41,  6.64it/s, loss=-13.7]

2025-07-21 21:12:25,280 — INFO — layer 0, [0:9149], loss=-8.8044
2025-07-21 21:12:25,280 — INFO — layer 0, [0:9149], loss=-21.2036


epoch 0, layer 0:  84%|███████████████████████████████████████████████████████████▎           | 9199/11016 [23:20<04:37,  6.55it/s, loss=-4.82]

2025-07-21 21:12:32,874 — INFO — layer 0, [0:9199], loss=-3.7267
2025-07-21 21:12:32,874 — INFO — layer 0, [0:9199], loss=-14.9570


epoch 0, layer 0:  84%|███████████████████████████████████████████████████████████▌           | 9249/11016 [23:28<04:28,  6.58it/s, loss=-11.2]

2025-07-21 21:12:40,488 — INFO — layer 0, [0:9249], loss=-20.9073
2025-07-21 21:12:40,488 — INFO — layer 0, [0:9249], loss=-14.2436


epoch 0, layer 0:  84%|███████████████████████████████████████████████████████████▉           | 9299/11016 [23:36<04:21,  6.56it/s, loss=-15.6]

2025-07-21 21:12:48,215 — INFO — layer 0, [0:9299], loss=-18.9307
2025-07-21 21:12:48,215 — INFO — layer 0, [0:9299], loss=-13.2480


epoch 0, layer 0:  85%|████████████████████████████████████████████████████████████▎          | 9349/11016 [23:43<04:13,  6.58it/s, loss=-17.1]

2025-07-21 21:12:55,853 — INFO — layer 0, [0:9349], loss=-13.6882
2025-07-21 21:12:55,853 — INFO — layer 0, [0:9349], loss=-20.0222


epoch 0, layer 0:  85%|████████████████████████████████████████████████████████████▌          | 9399/11016 [23:51<04:08,  6.49it/s, loss=-17.6]

2025-07-21 21:13:03,399 — INFO — layer 0, [0:9399], loss=-6.3562
2025-07-21 21:13:03,399 — INFO — layer 0, [0:9399], loss=-9.1875


epoch 0, layer 0:  86%|████████████████████████████████████████████████████████████▉          | 9449/11016 [23:59<04:09,  6.28it/s, loss=-15.9]

2025-07-21 21:13:11,245 — INFO — layer 0, [0:9449], loss=-7.7774
2025-07-21 21:13:11,245 — INFO — layer 0, [0:9449], loss=-9.4402


epoch 0, layer 0:  86%|█████████████████████████████████████████████████████████████▏         | 9499/11016 [24:06<04:00,  6.30it/s, loss=-12.3]

2025-07-21 21:13:18,791 — INFO — layer 0, [0:9499], loss=-12.7681
2025-07-21 21:13:18,791 — INFO — layer 0, [0:9499], loss=-13.8806


epoch 0, layer 0:  87%|█████████████████████████████████████████████████████████████▌         | 9549/11016 [24:14<03:38,  6.70it/s, loss=-7.84]

2025-07-21 21:13:26,319 — INFO — layer 0, [0:9549], loss=-18.1024
2025-07-21 21:13:26,319 — INFO — layer 0, [0:9549], loss=-14.3138


epoch 0, layer 0:  87%|█████████████████████████████████████████████████████████████▊         | 9599/11016 [24:21<03:27,  6.83it/s, loss=-7.33]

2025-07-21 21:13:33,981 — INFO — layer 0, [0:9599], loss=-18.2141
2025-07-21 21:13:33,981 — INFO — layer 0, [0:9599], loss=-12.0918


epoch 0, layer 0:  88%|███████████████████████████████████████████████████████████████▉         | 9649/11016 [24:29<03:27,  6.57it/s, loss=-10]

2025-07-21 21:13:41,590 — INFO — layer 0, [0:9649], loss=-10.1118
2025-07-21 21:13:41,590 — INFO — layer 0, [0:9649], loss=-14.4402


epoch 0, layer 0:  88%|██████████████████████████████████████████████████████████████▌        | 9699/11016 [24:37<03:29,  6.30it/s, loss=-14.7]

2025-07-21 21:13:49,150 — INFO — layer 0, [0:9699], loss=-12.7885
2025-07-21 21:13:49,150 — INFO — layer 0, [0:9699], loss=-17.4863


epoch 0, layer 0:  88%|██████████████████████████████████████████████████████████████▊        | 9749/11016 [24:44<03:14,  6.50it/s, loss=-14.1]

2025-07-21 21:13:56,593 — INFO — layer 0, [0:9749], loss=-8.1020
2025-07-21 21:13:56,593 — INFO — layer 0, [0:9749], loss=-21.8811


epoch 0, layer 0:  89%|████████████████████████████████████████████████████████████████▉        | 9799/11016 [24:52<03:05,  6.54it/s, loss=-12]

2025-07-21 21:14:04,310 — INFO — layer 0, [0:9799], loss=-16.5026
2025-07-21 21:14:04,310 — INFO — layer 0, [0:9799], loss=-15.1242


epoch 0, layer 0:  89%|███████████████████████████████████████████████████████████████▍       | 9849/11016 [24:59<03:02,  6.39it/s, loss=-8.39]

2025-07-21 21:14:11,886 — INFO — layer 0, [0:9849], loss=-5.7136
2025-07-21 21:14:11,886 — INFO — layer 0, [0:9849], loss=-13.6751


epoch 0, layer 0:  90%|███████████████████████████████████████████████████████████████▊       | 9899/11016 [25:07<02:45,  6.73it/s, loss=-14.4]

2025-07-21 21:14:19,338 — INFO — layer 0, [0:9899], loss=-15.2099
2025-07-21 21:14:19,338 — INFO — layer 0, [0:9899], loss=-13.7862


epoch 0, layer 0:  90%|████████████████████████████████████████████████████████████████       | 9949/11016 [25:14<02:46,  6.41it/s, loss=-16.3]

2025-07-21 21:14:26,837 — INFO — layer 0, [0:9949], loss=-9.7675
2025-07-21 21:14:26,837 — INFO — layer 0, [0:9949], loss=-13.1037


epoch 0, layer 0:  91%|████████████████████████████████████████████████████████████████▍      | 9999/11016 [25:22<02:29,  6.83it/s, loss=-6.39]

2025-07-21 21:14:34,360 — INFO — layer 0, [0:9999], loss=-11.8440
2025-07-21 21:14:34,360 — INFO — layer 0, [0:9999], loss=-12.3159


epoch 0, layer 0:  91%|█████████████████████████████████████████████████████████████████▋      | 10049/11016 [25:29<02:22,  6.79it/s, loss=-16]

2025-07-21 21:14:41,961 — INFO — layer 0, [0:10049], loss=-13.5169
2025-07-21 21:14:41,961 — INFO — layer 0, [0:10049], loss=-17.9762


epoch 0, layer 0:  92%|████████████████████████████████████████████████████████████████▏     | 10099/11016 [25:37<02:21,  6.48it/s, loss=-18.3]

2025-07-21 21:14:49,503 — INFO — layer 0, [0:10099], loss=-17.3512
2025-07-21 21:14:49,503 — INFO — layer 0, [0:10099], loss=-10.1412


epoch 0, layer 0:  92%|████████████████████████████████████████████████████████████████▍     | 10149/11016 [25:44<02:15,  6.42it/s, loss=-15.6]

2025-07-21 21:14:57,030 — INFO — layer 0, [0:10149], loss=-17.5405
2025-07-21 21:14:57,030 — INFO — layer 0, [0:10149], loss=-15.3588


epoch 0, layer 0:  93%|████████████████████████████████████████████████████████████████▊     | 10199/11016 [25:52<02:07,  6.39it/s, loss=-5.83]

2025-07-21 21:15:04,738 — INFO — layer 0, [0:10199], loss=-10.5187
2025-07-21 21:15:04,738 — INFO — layer 0, [0:10199], loss=-14.0795


epoch 0, layer 0:  93%|█████████████████████████████████████████████████████████████████▏    | 10249/11016 [26:00<01:55,  6.65it/s, loss=-10.6]

2025-07-21 21:15:12,430 — INFO — layer 0, [0:10249], loss=-7.3257
2025-07-21 21:15:12,430 — INFO — layer 0, [0:10249], loss=-11.7704


epoch 0, layer 0:  93%|█████████████████████████████████████████████████████████████████▍    | 10299/11016 [26:08<01:49,  6.55it/s, loss=-19.6]

2025-07-21 21:15:20,184 — INFO — layer 0, [0:10299], loss=-7.5525
2025-07-21 21:15:20,184 — INFO — layer 0, [0:10299], loss=-19.9114


epoch 0, layer 0:  94%|█████████████████████████████████████████████████████████████████▊    | 10349/11016 [26:15<01:46,  6.28it/s, loss=-11.3]

2025-07-21 21:15:27,800 — INFO — layer 0, [0:10349], loss=-18.3179
2025-07-21 21:15:27,800 — INFO — layer 0, [0:10349], loss=-14.9706


epoch 0, layer 0:  94%|██████████████████████████████████████████████████████████████████    | 10399/11016 [26:23<01:36,  6.41it/s, loss=-7.72]

2025-07-21 21:15:35,390 — INFO — layer 0, [0:10399], loss=-21.8407
2025-07-21 21:15:35,390 — INFO — layer 0, [0:10399], loss=-15.0694


epoch 0, layer 0:  95%|██████████████████████████████████████████████████████████████████▍   | 10449/11016 [26:30<01:23,  6.82it/s, loss=-12.8]

2025-07-21 21:15:43,022 — INFO — layer 0, [0:10449], loss=-21.1764
2025-07-21 21:15:43,022 — INFO — layer 0, [0:10449], loss=-7.1987


epoch 0, layer 0:  95%|██████████████████████████████████████████████████████████████████▋   | 10499/11016 [26:38<01:23,  6.22it/s, loss=-8.61]

2025-07-21 21:15:50,725 — INFO — layer 0, [0:10499], loss=-20.9179
2025-07-21 21:15:50,725 — INFO — layer 0, [0:10499], loss=-20.6800


epoch 0, layer 0:  96%|███████████████████████████████████████████████████████████████████   | 10549/11016 [26:46<01:09,  6.76it/s, loss=-21.2]

2025-07-21 21:15:58,322 — INFO — layer 0, [0:10549], loss=-4.6079
2025-07-21 21:15:58,322 — INFO — layer 0, [0:10549], loss=-6.4063


epoch 0, layer 0:  96%|███████████████████████████████████████████████████████████████████▎  | 10599/11016 [26:54<01:05,  6.35it/s, loss=-11.2]

2025-07-21 21:16:06,126 — INFO — layer 0, [0:10599], loss=-8.0863
2025-07-21 21:16:06,126 — INFO — layer 0, [0:10599], loss=-19.2859


epoch 0, layer 0:  97%|███████████████████████████████████████████████████████████████████▋  | 10649/11016 [27:01<00:54,  6.73it/s, loss=-13.3]

2025-07-21 21:16:13,723 — INFO — layer 0, [0:10649], loss=0.4617
2025-07-21 21:16:13,723 — INFO — layer 0, [0:10649], loss=-10.9753


epoch 0, layer 0:  97%|███████████████████████████████████████████████████████████████████▉  | 10699/11016 [27:09<00:47,  6.67it/s, loss=-13.7]

2025-07-21 21:16:21,493 — INFO — layer 0, [0:10699], loss=-18.2198
2025-07-21 21:16:21,493 — INFO — layer 0, [0:10699], loss=-3.2414


epoch 0, layer 0:  98%|█████████████████████████████████████████████████████████████████████▎ | 10749/11016 [27:16<00:40,  6.53it/s, loss=-5.2]

2025-07-21 21:16:29,042 — INFO — layer 0, [0:10749], loss=-11.2822
2025-07-21 21:16:29,042 — INFO — layer 0, [0:10749], loss=-10.5570


epoch 0, layer 0:  98%|████████████████████████████████████████████████████████████████████▌ | 10799/11016 [27:24<00:32,  6.65it/s, loss=-15.8]

2025-07-21 21:16:36,482 — INFO — layer 0, [0:10799], loss=-17.0467
2025-07-21 21:16:36,482 — INFO — layer 0, [0:10799], loss=-11.9005


epoch 0, layer 0:  98%|████████████████████████████████████████████████████████████████████▉ | 10849/11016 [27:31<00:25,  6.46it/s, loss=-14.3]

2025-07-21 21:16:43,964 — INFO — layer 0, [0:10849], loss=-18.1735
2025-07-21 21:16:43,964 — INFO — layer 0, [0:10849], loss=-21.1916


epoch 0, layer 0:  99%|█████████████████████████████████████████████████████████████████████▎| 10899/11016 [27:39<00:17,  6.82it/s, loss=-5.74]

2025-07-21 21:16:51,449 — INFO — layer 0, [0:10899], loss=-14.3771
2025-07-21 21:16:51,449 — INFO — layer 0, [0:10899], loss=-25.6521


epoch 0, layer 0:  99%|█████████████████████████████████████████████████████████████████████▌| 10949/11016 [27:47<00:10,  6.52it/s, loss=-15.7]

2025-07-21 21:16:59,210 — INFO — layer 0, [0:10949], loss=-12.6039
2025-07-21 21:16:59,210 — INFO — layer 0, [0:10949], loss=-22.0839


epoch 0, layer 0: 100%|█████████████████████████████████████████████████████████████████████▉| 10999/11016 [27:54<00:02,  6.81it/s, loss=-20.8]

2025-07-21 21:17:06,825 — INFO — layer 0, [0:10999], loss=-5.3987
2025-07-21 21:17:06,825 — INFO — layer 0, [0:10999], loss=-18.2786


epoch 1, layer 0:   0%|                                                                                              | 0/11016 [00:00<?, ?it/s]huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
huggingf

2025-07-21 21:17:14,888 — INFO — layer 0, [1:33], loss=-14.7787
2025-07-21 21:17:14,888 — INFO — layer 0, [1:33], loss=-16.5030


epoch 1, layer 0:   1%|▌                                                                        | 83/11016 [00:12<26:50,  6.79it/s, loss=-15.1]

2025-07-21 21:17:22,398 — INFO — layer 0, [1:83], loss=-14.0995
2025-07-21 21:17:22,398 — INFO — layer 0, [1:83], loss=-2.9718


epoch 1, layer 0:   1%|▊                                                                       | 133/11016 [00:20<27:10,  6.67it/s, loss=-16.5]

2025-07-21 21:17:30,002 — INFO — layer 0, [1:133], loss=-22.7325
2025-07-21 21:17:30,002 — INFO — layer 0, [1:133], loss=-17.5627


epoch 1, layer 0:   2%|█▏                                                                      | 183/11016 [00:28<27:32,  6.56it/s, loss=-14.5]

2025-07-21 21:17:37,550 — INFO — layer 0, [1:183], loss=-13.6722
2025-07-21 21:17:37,550 — INFO — layer 0, [1:183], loss=-10.9560


epoch 1, layer 0:   2%|█▌                                                                      | 233/11016 [00:35<28:06,  6.39it/s, loss=-7.49]

2025-07-21 21:17:45,180 — INFO — layer 0, [1:233], loss=-6.9802
2025-07-21 21:17:45,180 — INFO — layer 0, [1:233], loss=-10.2983


epoch 1, layer 0:   3%|█▊                                                                      | 283/11016 [00:43<27:28,  6.51it/s, loss=-10.1]

2025-07-21 21:17:52,802 — INFO — layer 0, [1:283], loss=-15.6666
2025-07-21 21:17:52,802 — INFO — layer 0, [1:283], loss=-20.9730


epoch 1, layer 0:   3%|██▏                                                                     | 333/11016 [00:50<26:35,  6.70it/s, loss=-18.8]

2025-07-21 21:18:00,368 — INFO — layer 0, [1:333], loss=-4.6518
2025-07-21 21:18:00,368 — INFO — layer 0, [1:333], loss=-11.5006


epoch 1, layer 0:   3%|██▌                                                                     | 383/11016 [00:58<26:19,  6.73it/s, loss=-18.3]

2025-07-21 21:18:07,906 — INFO — layer 0, [1:383], loss=-19.8590
2025-07-21 21:18:07,906 — INFO — layer 0, [1:383], loss=-6.5899


epoch 1, layer 0:   4%|██▊                                                                     | 433/11016 [01:05<26:54,  6.55it/s, loss=-12.7]

2025-07-21 21:18:15,499 — INFO — layer 0, [1:433], loss=-15.3528
2025-07-21 21:18:15,499 — INFO — layer 0, [1:433], loss=-3.6233


epoch 1, layer 0:   4%|███▏                                                                     | 483/11016 [01:13<28:04,  6.25it/s, loss=-8.8]

2025-07-21 21:18:23,027 — INFO — layer 0, [1:483], loss=-13.2448
2025-07-21 21:18:23,027 — INFO — layer 0, [1:483], loss=-17.4371


epoch 1, layer 0:   5%|███▍                                                                    | 533/11016 [01:21<25:54,  6.74it/s, loss=-10.5]

2025-07-21 21:18:30,569 — INFO — layer 0, [1:533], loss=-19.2887
2025-07-21 21:18:30,569 — INFO — layer 0, [1:533], loss=-11.8296


epoch 1, layer 0:   5%|███▊                                                                    | 583/11016 [01:28<25:58,  6.69it/s, loss=-8.88]

2025-07-21 21:18:38,174 — INFO — layer 0, [1:583], loss=-13.6485
2025-07-21 21:18:38,174 — INFO — layer 0, [1:583], loss=-21.4904


epoch 1, layer 0:   6%|████▏                                                                   | 633/11016 [01:36<26:29,  6.53it/s, loss=-9.03]

2025-07-21 21:18:45,924 — INFO — layer 0, [1:633], loss=-13.5982
2025-07-21 21:18:45,924 — INFO — layer 0, [1:633], loss=-2.3022


epoch 1, layer 0:   6%|████▌                                                                    | 683/11016 [01:43<26:12,  6.57it/s, loss=-9.6]

2025-07-21 21:18:53,489 — INFO — layer 0, [1:683], loss=-7.4960
2025-07-21 21:18:53,489 — INFO — layer 0, [1:683], loss=-18.4009


epoch 1, layer 0:   7%|████▊                                                                    | 733/11016 [01:51<26:18,  6.51it/s, loss=-6.8]

2025-07-21 21:19:00,876 — INFO — layer 0, [1:733], loss=-17.3881
2025-07-21 21:19:00,876 — INFO — layer 0, [1:733], loss=-14.2503


epoch 1, layer 0:   7%|█████                                                                   | 783/11016 [01:58<24:58,  6.83it/s, loss=-17.4]

2025-07-21 21:19:08,385 — INFO — layer 0, [1:783], loss=-11.7870
2025-07-21 21:19:08,385 — INFO — layer 0, [1:783], loss=-10.4898


epoch 1, layer 0:   8%|█████▍                                                                  | 833/11016 [02:06<25:27,  6.67it/s, loss=-13.4]

2025-07-21 21:19:16,006 — INFO — layer 0, [1:833], loss=-8.9337
2025-07-21 21:19:16,006 — INFO — layer 0, [1:833], loss=-9.3211


epoch 1, layer 0:   8%|█████▊                                                                  | 883/11016 [02:13<23:44,  7.12it/s, loss=-18.1]

2025-07-21 21:19:23,530 — INFO — layer 0, [1:883], loss=-17.9911
2025-07-21 21:19:23,530 — INFO — layer 0, [1:883], loss=-17.3238


epoch 1, layer 0:   8%|██████                                                                  | 933/11016 [02:21<26:30,  6.34it/s, loss=-7.05]

2025-07-21 21:19:31,285 — INFO — layer 0, [1:933], loss=-11.0364
2025-07-21 21:19:31,285 — INFO — layer 0, [1:933], loss=-19.3539


epoch 1, layer 0:   9%|██████▍                                                                 | 983/11016 [02:29<27:14,  6.14it/s, loss=-17.5]

2025-07-21 21:19:39,164 — INFO — layer 0, [1:983], loss=-17.7832
2025-07-21 21:19:39,164 — INFO — layer 0, [1:983], loss=-14.5961


epoch 1, layer 0:   9%|██████▋                                                                | 1033/11016 [02:37<24:02,  6.92it/s, loss=-23.3]

2025-07-21 21:19:46,753 — INFO — layer 0, [1:1033], loss=-13.5257
2025-07-21 21:19:46,753 — INFO — layer 0, [1:1033], loss=-22.7761


epoch 1, layer 0:  10%|██████▉                                                                | 1083/11016 [02:44<24:04,  6.88it/s, loss=-16.6]

2025-07-21 21:19:54,395 — INFO — layer 0, [1:1083], loss=-24.6330
2025-07-21 21:19:54,395 — INFO — layer 0, [1:1083], loss=-17.6339


epoch 1, layer 0:  10%|███████▎                                                               | 1133/11016 [02:52<25:02,  6.58it/s, loss=-24.1]

2025-07-21 21:20:01,992 — INFO — layer 0, [1:1133], loss=-18.4207
2025-07-21 21:20:01,992 — INFO — layer 0, [1:1133], loss=-16.6070


epoch 1, layer 0:  11%|███████▋                                                                | 1183/11016 [03:00<25:25,  6.44it/s, loss=-8.6]

2025-07-21 21:20:09,741 — INFO — layer 0, [1:1183], loss=-13.9306
2025-07-21 21:20:09,742 — INFO — layer 0, [1:1183], loss=-12.8556


epoch 1, layer 0:  11%|███████▉                                                               | 1233/11016 [03:07<24:27,  6.67it/s, loss=-14.7]

2025-07-21 21:20:17,367 — INFO — layer 0, [1:1233], loss=-2.7868
2025-07-21 21:20:17,367 — INFO — layer 0, [1:1233], loss=-2.5464


epoch 1, layer 0:  12%|████████▎                                                              | 1283/11016 [03:15<24:17,  6.68it/s, loss=-7.24]

2025-07-21 21:20:24,865 — INFO — layer 0, [1:1283], loss=0.1843
2025-07-21 21:20:24,865 — INFO — layer 0, [1:1283], loss=-13.6963


epoch 1, layer 0:  12%|████████▌                                                              | 1333/11016 [03:22<24:35,  6.56it/s, loss=-24.1]

2025-07-21 21:20:32,392 — INFO — layer 0, [1:1333], loss=-15.7945
2025-07-21 21:20:32,392 — INFO — layer 0, [1:1333], loss=-18.8917


epoch 1, layer 0:  13%|████████▉                                                              | 1383/11016 [03:30<23:40,  6.78it/s, loss=-23.9]

2025-07-21 21:20:39,920 — INFO — layer 0, [1:1383], loss=-15.6687
2025-07-21 21:20:39,920 — INFO — layer 0, [1:1383], loss=-23.5657


epoch 1, layer 0:  13%|█████████▏                                                             | 1433/11016 [03:37<23:24,  6.82it/s, loss=-8.91]

2025-07-21 21:20:47,455 — INFO — layer 0, [1:1433], loss=-12.0229
2025-07-21 21:20:47,455 — INFO — layer 0, [1:1433], loss=-14.2942


epoch 1, layer 0:  13%|█████████▌                                                             | 1483/11016 [03:45<24:52,  6.39it/s, loss=-13.8]

2025-07-21 21:20:55,223 — INFO — layer 0, [1:1483], loss=-11.3063
2025-07-21 21:20:55,223 — INFO — layer 0, [1:1483], loss=-10.9463


epoch 1, layer 0:  14%|█████████▉                                                             | 1533/11016 [03:53<23:57,  6.60it/s, loss=-9.87]

2025-07-21 21:21:02,907 — INFO — layer 0, [1:1533], loss=-11.1845
2025-07-21 21:21:02,907 — INFO — layer 0, [1:1533], loss=-8.9053


epoch 1, layer 0:  14%|██████████▏                                                            | 1583/11016 [04:01<24:07,  6.52it/s, loss=-18.1]

2025-07-21 21:21:10,619 — INFO — layer 0, [1:1583], loss=-12.2253
2025-07-21 21:21:10,619 — INFO — layer 0, [1:1583], loss=-15.0478


epoch 1, layer 0:  15%|██████████▌                                                            | 1633/11016 [04:08<22:57,  6.81it/s, loss=-11.5]

2025-07-21 21:21:18,214 — INFO — layer 0, [1:1633], loss=-24.5913
2025-07-21 21:21:18,214 — INFO — layer 0, [1:1633], loss=-11.8166


epoch 1, layer 0:  15%|██████████▊                                                            | 1683/11016 [04:16<24:59,  6.23it/s, loss=-8.14]

2025-07-21 21:21:25,857 — INFO — layer 0, [1:1683], loss=-12.4724
2025-07-21 21:21:25,857 — INFO — layer 0, [1:1683], loss=-7.1364


epoch 1, layer 0:  16%|███████████▏                                                           | 1733/11016 [04:23<23:10,  6.68it/s, loss=-18.3]

2025-07-21 21:21:33,352 — INFO — layer 0, [1:1733], loss=-15.9997
2025-07-21 21:21:33,352 — INFO — layer 0, [1:1733], loss=-4.6523


epoch 1, layer 0:  16%|███████████▍                                                           | 1783/11016 [04:31<22:32,  6.82it/s, loss=-16.1]

2025-07-21 21:21:40,872 — INFO — layer 0, [1:1783], loss=-10.8636
2025-07-21 21:21:40,872 — INFO — layer 0, [1:1783], loss=-14.0258


epoch 1, layer 0:  17%|███████████▊                                                           | 1833/11016 [04:38<22:40,  6.75it/s, loss=-11.1]

2025-07-21 21:21:48,477 — INFO — layer 0, [1:1833], loss=-18.0744
2025-07-21 21:21:48,477 — INFO — layer 0, [1:1833], loss=-17.2188


epoch 1, layer 0:  17%|████████████▏                                                          | 1883/11016 [04:46<22:20,  6.81it/s, loss=-6.07]

2025-07-21 21:21:55,979 — INFO — layer 0, [1:1883], loss=-14.7947
2025-07-21 21:21:55,979 — INFO — layer 0, [1:1883], loss=-10.3728


epoch 1, layer 0:  18%|████████████▍                                                          | 1933/11016 [04:54<22:30,  6.72it/s, loss=-3.92]

2025-07-21 21:22:03,812 — INFO — layer 0, [1:1933], loss=-23.2053
2025-07-21 21:22:03,812 — INFO — layer 0, [1:1933], loss=-6.2627


epoch 1, layer 0:  18%|█████████████▏                                                           | 1983/11016 [05:01<22:03,  6.83it/s, loss=-14]

2025-07-21 21:22:11,525 — INFO — layer 0, [1:1983], loss=-6.4973
2025-07-21 21:22:11,525 — INFO — layer 0, [1:1983], loss=-13.9001


epoch 1, layer 0:  18%|█████████████                                                          | 2033/11016 [05:09<23:44,  6.31it/s, loss=-11.1]

2025-07-21 21:22:19,332 — INFO — layer 0, [1:2033], loss=-14.7354
2025-07-21 21:22:19,332 — INFO — layer 0, [1:2033], loss=-8.5828


epoch 1, layer 0:  19%|█████████████▍                                                         | 2083/11016 [05:17<21:36,  6.89it/s, loss=-16.8]

2025-07-21 21:22:26,843 — INFO — layer 0, [1:2083], loss=-12.1342
2025-07-21 21:22:26,843 — INFO — layer 0, [1:2083], loss=-17.4096


epoch 1, layer 0:  19%|█████████████▋                                                         | 2133/11016 [05:24<22:23,  6.61it/s, loss=-15.8]

2025-07-21 21:22:34,396 — INFO — layer 0, [1:2133], loss=-21.4767
2025-07-21 21:22:34,396 — INFO — layer 0, [1:2133], loss=-14.9172


epoch 1, layer 0:  20%|██████████████                                                         | 2183/11016 [05:32<21:35,  6.82it/s, loss=-4.85]

2025-07-21 21:22:41,981 — INFO — layer 0, [1:2183], loss=-9.0986
2025-07-21 21:22:41,981 — INFO — layer 0, [1:2183], loss=-25.2479


epoch 1, layer 0:  20%|██████████████▍                                                        | 2233/11016 [05:40<22:03,  6.64it/s, loss=-14.8]

2025-07-21 21:22:49,604 — INFO — layer 0, [1:2233], loss=-9.7310
2025-07-21 21:22:49,604 — INFO — layer 0, [1:2233], loss=-8.2776


epoch 1, layer 0:  21%|██████████████▋                                                        | 2283/11016 [05:47<21:08,  6.88it/s, loss=-3.31]

2025-07-21 21:22:57,087 — INFO — layer 0, [1:2283], loss=-12.0247
2025-07-21 21:22:57,087 — INFO — layer 0, [1:2283], loss=-23.0050


epoch 1, layer 0:  21%|███████████████                                                        | 2333/11016 [05:55<22:29,  6.44it/s, loss=-10.8]

2025-07-21 21:23:04,754 — INFO — layer 0, [1:2333], loss=-20.9846
2025-07-21 21:23:04,754 — INFO — layer 0, [1:2333], loss=-18.1975


epoch 1, layer 0:  22%|███████████████▎                                                       | 2383/11016 [06:02<21:17,  6.76it/s, loss=-17.9]

2025-07-21 21:23:12,350 — INFO — layer 0, [1:2383], loss=-7.4461
2025-07-21 21:23:12,350 — INFO — layer 0, [1:2383], loss=-18.2631


epoch 1, layer 0:  22%|███████████████▋                                                       | 2433/11016 [06:10<22:00,  6.50it/s, loss=-10.6]

2025-07-21 21:23:20,084 — INFO — layer 0, [1:2433], loss=-13.8583
2025-07-21 21:23:20,084 — INFO — layer 0, [1:2433], loss=-9.9863


epoch 1, layer 0:  23%|████████████████                                                       | 2483/11016 [06:18<21:15,  6.69it/s, loss=-7.42]

2025-07-21 21:23:27,699 — INFO — layer 0, [1:2483], loss=-12.3705
2025-07-21 21:23:27,699 — INFO — layer 0, [1:2483], loss=-24.6223


epoch 1, layer 0:  23%|████████████████▊                                                        | 2533/11016 [06:25<22:30,  6.28it/s, loss=-14]

2025-07-21 21:23:35,160 — INFO — layer 0, [1:2533], loss=-3.8849
2025-07-21 21:23:35,160 — INFO — layer 0, [1:2533], loss=-11.3204


epoch 1, layer 0:  23%|████████████████▋                                                      | 2583/11016 [06:33<21:17,  6.60it/s, loss=-19.3]

2025-07-21 21:23:42,856 — INFO — layer 0, [1:2583], loss=-13.1742
2025-07-21 21:23:42,856 — INFO — layer 0, [1:2583], loss=-10.4850


epoch 1, layer 0:  24%|████████████████▉                                                      | 2633/11016 [06:40<21:13,  6.58it/s, loss=-23.6]

2025-07-21 21:23:50,331 — INFO — layer 0, [1:2633], loss=-7.6435
2025-07-21 21:23:50,331 — INFO — layer 0, [1:2633], loss=-12.5950


epoch 1, layer 0:  24%|█████████████████▎                                                     | 2683/11016 [06:48<20:26,  6.80it/s, loss=-19.1]

2025-07-21 21:23:58,014 — INFO — layer 0, [1:2683], loss=-20.5005
2025-07-21 21:23:58,014 — INFO — layer 0, [1:2683], loss=-19.9447


epoch 1, layer 0:  25%|█████████████████▌                                                     | 2733/11016 [06:56<22:03,  6.26it/s, loss=-10.5]

2025-07-21 21:24:05,650 — INFO — layer 0, [1:2733], loss=-13.4366
2025-07-21 21:24:05,650 — INFO — layer 0, [1:2733], loss=-15.9056


epoch 1, layer 0:  25%|█████████████████▉                                                     | 2783/11016 [07:03<20:20,  6.74it/s, loss=-8.74]

2025-07-21 21:24:13,119 — INFO — layer 0, [1:2783], loss=-4.0997
2025-07-21 21:24:13,119 — INFO — layer 0, [1:2783], loss=-16.6614


epoch 1, layer 0:  26%|██████████████████▊                                                      | 2833/11016 [07:11<19:53,  6.86it/s, loss=-16]

2025-07-21 21:24:20,793 — INFO — layer 0, [1:2833], loss=-15.2693
2025-07-21 21:24:20,793 — INFO — layer 0, [1:2833], loss=-17.3585


epoch 1, layer 0:  26%|██████████████████▌                                                    | 2883/11016 [07:18<21:30,  6.30it/s, loss=-9.93]

2025-07-21 21:24:28,428 — INFO — layer 0, [1:2883], loss=-0.1403
2025-07-21 21:24:28,428 — INFO — layer 0, [1:2883], loss=-12.3527


epoch 1, layer 0:  27%|██████████████████▉                                                    | 2933/11016 [07:26<20:19,  6.63it/s, loss=-7.18]

2025-07-21 21:24:36,224 — INFO — layer 0, [1:2933], loss=-6.3034
2025-07-21 21:24:36,224 — INFO — layer 0, [1:2933], loss=-9.9237


epoch 1, layer 0:  27%|███████████████████▏                                                   | 2983/11016 [07:34<19:21,  6.91it/s, loss=-17.1]

2025-07-21 21:24:43,835 — INFO — layer 0, [1:2983], loss=-4.3803
2025-07-21 21:24:43,835 — INFO — layer 0, [1:2983], loss=-6.2856


epoch 1, layer 0:  28%|███████████████████▌                                                   | 3033/11016 [07:41<20:15,  6.57it/s, loss=-5.09]

2025-07-21 21:24:51,344 — INFO — layer 0, [1:3033], loss=-7.0811
2025-07-21 21:24:51,344 — INFO — layer 0, [1:3033], loss=-18.8356


epoch 1, layer 0:  28%|███████████████████▊                                                   | 3083/11016 [07:49<20:05,  6.58it/s, loss=-8.11]

2025-07-21 21:24:58,971 — INFO — layer 0, [1:3083], loss=-12.0168
2025-07-21 21:24:58,971 — INFO — layer 0, [1:3083], loss=-5.3619


epoch 1, layer 0:  28%|████████████████████▏                                                  | 3133/11016 [07:56<19:10,  6.85it/s, loss=-10.5]

2025-07-21 21:25:06,536 — INFO — layer 0, [1:3133], loss=-9.9944
2025-07-21 21:25:06,536 — INFO — layer 0, [1:3133], loss=-16.3158


epoch 1, layer 0:  29%|████████████████████▌                                                  | 3183/11016 [08:04<19:42,  6.62it/s, loss=-18.8]

2025-07-21 21:25:14,212 — INFO — layer 0, [1:3183], loss=-24.3526
2025-07-21 21:25:14,212 — INFO — layer 0, [1:3183], loss=-24.4184


epoch 1, layer 0:  29%|████████████████████▊                                                  | 3233/11016 [08:12<19:31,  6.64it/s, loss=-14.2]

2025-07-21 21:25:21,698 — INFO — layer 0, [1:3233], loss=-20.9308
2025-07-21 21:25:21,698 — INFO — layer 0, [1:3233], loss=-5.4145


epoch 1, layer 0:  30%|█████████████████████▏                                                 | 3283/11016 [08:19<20:05,  6.41it/s, loss=-10.1]

2025-07-21 21:25:29,244 — INFO — layer 0, [1:3283], loss=-18.5924
2025-07-21 21:25:29,244 — INFO — layer 0, [1:3283], loss=-15.7640


epoch 1, layer 0:  30%|█████████████████████▍                                                 | 3333/11016 [08:27<19:21,  6.61it/s, loss=-13.8]

2025-07-21 21:25:36,658 — INFO — layer 0, [1:3333], loss=-9.8117
2025-07-21 21:25:36,658 — INFO — layer 0, [1:3333], loss=-16.3689


epoch 1, layer 0:  31%|█████████████████████▊                                                 | 3383/11016 [08:34<19:02,  6.68it/s, loss=-16.6]

2025-07-21 21:25:44,257 — INFO — layer 0, [1:3383], loss=-6.4639
2025-07-21 21:25:44,257 — INFO — layer 0, [1:3383], loss=-11.8309


epoch 1, layer 0:  31%|██████████████████████▏                                                | 3433/11016 [08:42<18:15,  6.92it/s, loss=-18.7]

2025-07-21 21:25:51,773 — INFO — layer 0, [1:3433], loss=-10.5517
2025-07-21 21:25:51,773 — INFO — layer 0, [1:3433], loss=-25.0159


epoch 1, layer 0:  32%|██████████████████████▍                                                | 3483/11016 [08:49<18:49,  6.67it/s, loss=-7.34]

2025-07-21 21:25:59,409 — INFO — layer 0, [1:3483], loss=-14.8070
2025-07-21 21:25:59,409 — INFO — layer 0, [1:3483], loss=-23.7682


epoch 1, layer 0:  32%|██████████████████████▊                                                | 3533/11016 [08:57<18:36,  6.70it/s, loss=-11.6]

2025-07-21 21:26:07,098 — INFO — layer 0, [1:3533], loss=-26.4365
2025-07-21 21:26:07,098 — INFO — layer 0, [1:3533], loss=-26.1911


epoch 1, layer 0:  33%|███████████████████████▋                                                 | 3583/11016 [09:05<18:14,  6.79it/s, loss=-25]

2025-07-21 21:26:14,783 — INFO — layer 0, [1:3583], loss=-16.9963
2025-07-21 21:26:14,783 — INFO — layer 0, [1:3583], loss=-9.7691


epoch 1, layer 0:  33%|███████████████████████▍                                               | 3633/11016 [09:12<18:42,  6.58it/s, loss=-17.5]

2025-07-21 21:26:22,473 — INFO — layer 0, [1:3633], loss=-13.5336
2025-07-21 21:26:22,473 — INFO — layer 0, [1:3633], loss=-3.5910


epoch 1, layer 0:  33%|███████████████████████▋                                               | 3683/11016 [09:20<18:16,  6.69it/s, loss=-15.9]

2025-07-21 21:26:29,976 — INFO — layer 0, [1:3683], loss=-3.6996
2025-07-21 21:26:29,976 — INFO — layer 0, [1:3683], loss=-7.1750


epoch 1, layer 0:  34%|████████████████████████                                               | 3733/11016 [09:27<18:12,  6.67it/s, loss=-17.3]

2025-07-21 21:26:37,538 — INFO — layer 0, [1:3733], loss=-18.7637
2025-07-21 21:26:37,538 — INFO — layer 0, [1:3733], loss=-9.2416


epoch 1, layer 0:  34%|████████████████████████▍                                              | 3783/11016 [09:35<18:35,  6.49it/s, loss=-14.2]

2025-07-21 21:26:44,978 — INFO — layer 0, [1:3783], loss=-26.1909
2025-07-21 21:26:44,978 — INFO — layer 0, [1:3783], loss=-4.9429


epoch 1, layer 0:  35%|████████████████████████▋                                              | 3833/11016 [09:43<18:37,  6.43it/s, loss=-4.81]

2025-07-21 21:26:52,583 — INFO — layer 0, [1:3833], loss=-7.9118
2025-07-21 21:26:52,583 — INFO — layer 0, [1:3833], loss=-18.9993


epoch 1, layer 0:  35%|█████████████████████████                                              | 3883/11016 [09:50<17:49,  6.67it/s, loss=-7.88]

2025-07-21 21:27:00,086 — INFO — layer 0, [1:3883], loss=-22.5714
2025-07-21 21:27:00,086 — INFO — layer 0, [1:3883], loss=-19.5849


epoch 1, layer 0:  36%|█████████████████████████▎                                             | 3933/11016 [09:58<18:32,  6.36it/s, loss=-2.49]

2025-07-21 21:27:07,969 — INFO — layer 0, [1:3933], loss=-4.8710
2025-07-21 21:27:07,969 — INFO — layer 0, [1:3933], loss=-17.3092


epoch 1, layer 0:  36%|██████████████████████████▍                                              | 3983/11016 [10:05<17:49,  6.58it/s, loss=-18]

2025-07-21 21:27:15,533 — INFO — layer 0, [1:3983], loss=-7.9829
2025-07-21 21:27:15,533 — INFO — layer 0, [1:3983], loss=-11.8721


epoch 1, layer 0:  37%|█████████████████████████▉                                             | 4033/11016 [10:13<18:57,  6.14it/s, loss=-4.68]

2025-07-21 21:27:23,096 — INFO — layer 0, [1:4033], loss=-9.8204
2025-07-21 21:27:23,096 — INFO — layer 0, [1:4033], loss=-3.2194


epoch 1, layer 0:  37%|██████████████████████████▎                                            | 4083/11016 [10:21<17:22,  6.65it/s, loss=-5.92]

2025-07-21 21:27:30,654 — INFO — layer 0, [1:4083], loss=-19.1131
2025-07-21 21:27:30,654 — INFO — layer 0, [1:4083], loss=-22.2000


epoch 1, layer 0:  38%|██████████████████████████▋                                            | 4133/11016 [10:28<16:53,  6.79it/s, loss=-4.58]

2025-07-21 21:27:38,203 — INFO — layer 0, [1:4133], loss=-23.0563
2025-07-21 21:27:38,203 — INFO — layer 0, [1:4133], loss=-13.5194


epoch 1, layer 0:  38%|██████████████████████████▉                                            | 4183/11016 [10:36<17:40,  6.44it/s, loss=-18.2]

2025-07-21 21:27:45,728 — INFO — layer 0, [1:4183], loss=-14.8579
2025-07-21 21:27:45,728 — INFO — layer 0, [1:4183], loss=-25.0741


epoch 1, layer 0:  38%|████████████████████████████                                             | 4233/11016 [10:43<16:16,  6.94it/s, loss=-19]

2025-07-21 21:27:53,359 — INFO — layer 0, [1:4233], loss=-18.9460
2025-07-21 21:27:53,359 — INFO — layer 0, [1:4233], loss=-20.8503


epoch 1, layer 0:  39%|███████████████████████████▌                                           | 4283/11016 [10:51<16:29,  6.81it/s, loss=-14.6]

2025-07-21 21:28:00,854 — INFO — layer 0, [1:4283], loss=-7.2611
2025-07-21 21:28:00,854 — INFO — layer 0, [1:4283], loss=-25.2588


epoch 1, layer 0:  39%|███████████████████████████▉                                           | 4333/11016 [10:58<17:23,  6.40it/s, loss=-10.3]

2025-07-21 21:28:08,435 — INFO — layer 0, [1:4333], loss=-12.2166
2025-07-21 21:28:08,435 — INFO — layer 0, [1:4333], loss=-11.9135


epoch 1, layer 0:  40%|████████████████████████████▏                                          | 4383/11016 [11:06<16:54,  6.54it/s, loss=-9.54]

2025-07-21 21:28:16,096 — INFO — layer 0, [1:4383], loss=-10.4898
2025-07-21 21:28:16,096 — INFO — layer 0, [1:4383], loss=-20.8619


epoch 1, layer 0:  40%|████████████████████████████▌                                          | 4433/11016 [11:14<16:52,  6.50it/s, loss=-10.3]

2025-07-21 21:28:23,668 — INFO — layer 0, [1:4433], loss=-15.3057
2025-07-21 21:28:23,668 — INFO — layer 0, [1:4433], loss=-5.6195


epoch 1, layer 0:  41%|████████████████████████████▉                                          | 4483/11016 [11:21<16:09,  6.74it/s, loss=-9.74]

2025-07-21 21:28:31,245 — INFO — layer 0, [1:4483], loss=-8.8802
2025-07-21 21:28:31,245 — INFO — layer 0, [1:4483], loss=-12.4544


epoch 1, layer 0:  41%|█████████████████████████████▋                                          | 4533/11016 [11:29<16:23,  6.59it/s, loss=-7.9]

2025-07-21 21:28:38,851 — INFO — layer 0, [1:4533], loss=-19.1428
2025-07-21 21:28:38,851 — INFO — layer 0, [1:4533], loss=-10.0447


epoch 1, layer 0:  42%|█████████████████████████████▌                                         | 4583/11016 [11:36<15:54,  6.74it/s, loss=-11.2]

2025-07-21 21:28:46,322 — INFO — layer 0, [1:4583], loss=-2.4096
2025-07-21 21:28:46,322 — INFO — layer 0, [1:4583], loss=-8.7383


epoch 1, layer 0:  42%|█████████████████████████████▊                                         | 4633/11016 [11:44<16:47,  6.33it/s, loss=-13.9]

2025-07-21 21:28:53,907 — INFO — layer 0, [1:4633], loss=-6.4208
2025-07-21 21:28:53,907 — INFO — layer 0, [1:4633], loss=-16.5205


epoch 1, layer 0:  43%|██████████████████████████████▏                                        | 4683/11016 [11:51<16:28,  6.41it/s, loss=-8.76]

2025-07-21 21:29:01,437 — INFO — layer 0, [1:4683], loss=-18.8765
2025-07-21 21:29:01,437 — INFO — layer 0, [1:4683], loss=-10.1838


epoch 1, layer 0:  43%|███████████████████████████████▎                                         | 4733/11016 [11:59<15:19,  6.83it/s, loss=-15]

2025-07-21 21:29:09,093 — INFO — layer 0, [1:4733], loss=-14.0722
2025-07-21 21:29:09,093 — INFO — layer 0, [1:4733], loss=-16.8782


epoch 1, layer 0:  43%|██████████████████████████████▊                                        | 4783/11016 [12:07<15:48,  6.57it/s, loss=-18.2]

2025-07-21 21:29:16,604 — INFO — layer 0, [1:4783], loss=-19.9671
2025-07-21 21:29:16,604 — INFO — layer 0, [1:4783], loss=-22.9320


epoch 1, layer 0:  44%|███████████████████████████████▏                                       | 4833/11016 [12:14<15:25,  6.68it/s, loss=-28.9]

2025-07-21 21:29:24,235 — INFO — layer 0, [1:4833], loss=-24.7240
2025-07-21 21:29:24,235 — INFO — layer 0, [1:4833], loss=-23.6658


epoch 1, layer 0:  44%|███████████████████████████████▉                                        | 4883/11016 [12:22<15:28,  6.61it/s, loss=-4.7]

2025-07-21 21:29:31,783 — INFO — layer 0, [1:4883], loss=-8.3885
2025-07-21 21:29:31,783 — INFO — layer 0, [1:4883], loss=-18.4956


epoch 1, layer 0:  45%|███████████████████████████████▊                                       | 4933/11016 [12:29<14:57,  6.78it/s, loss=-22.5]

2025-07-21 21:29:39,354 — INFO — layer 0, [1:4933], loss=-5.5702
2025-07-21 21:29:39,354 — INFO — layer 0, [1:4933], loss=-26.2615


epoch 1, layer 0:  45%|████████████████████████████████                                       | 4983/11016 [12:37<14:47,  6.79it/s, loss=-6.47]

2025-07-21 21:29:47,227 — INFO — layer 0, [1:4983], loss=-7.9904
2025-07-21 21:29:47,227 — INFO — layer 0, [1:4983], loss=-11.6629


epoch 1, layer 0:  46%|████████████████████████████████▍                                      | 5033/11016 [12:45<15:13,  6.55it/s, loss=-22.8]

2025-07-21 21:29:54,876 — INFO — layer 0, [1:5033], loss=-10.5559
2025-07-21 21:29:54,876 — INFO — layer 0, [1:5033], loss=-23.6248


epoch 1, layer 0:  46%|████████████████████████████████▊                                      | 5083/11016 [12:52<14:08,  7.00it/s, loss=-11.8]

2025-07-21 21:30:02,435 — INFO — layer 0, [1:5083], loss=-14.3542
2025-07-21 21:30:02,435 — INFO — layer 0, [1:5083], loss=-14.4103


epoch 1, layer 0:  47%|█████████████████████████████████                                      | 5133/11016 [13:00<16:27,  5.96it/s, loss=-11.4]

2025-07-21 21:30:10,246 — INFO — layer 0, [1:5133], loss=-18.4998
2025-07-21 21:30:10,247 — INFO — layer 0, [1:5133], loss=-12.3575


epoch 1, layer 0:  47%|█████████████████████████████████▍                                     | 5183/11016 [13:08<14:28,  6.72it/s, loss=-16.5]

2025-07-21 21:30:17,756 — INFO — layer 0, [1:5183], loss=-14.2332
2025-07-21 21:30:17,756 — INFO — layer 0, [1:5183], loss=-23.0672


epoch 1, layer 0:  48%|█████████████████████████████████▋                                     | 5233/11016 [13:15<16:54,  5.70it/s, loss=-15.7]

2025-07-21 21:30:25,408 — INFO — layer 0, [1:5233], loss=-18.7663
2025-07-21 21:30:25,408 — INFO — layer 0, [1:5233], loss=-14.0194


epoch 1, layer 0:  48%|██████████████████████████████████                                     | 5283/11016 [13:23<14:28,  6.60it/s, loss=-21.8]

2025-07-21 21:30:32,986 — INFO — layer 0, [1:5283], loss=-13.5915
2025-07-21 21:30:32,986 — INFO — layer 0, [1:5283], loss=-14.2841


epoch 1, layer 0:  48%|██████████████████████████████████▎                                    | 5333/11016 [13:31<13:15,  7.14it/s, loss=-18.7]

2025-07-21 21:30:40,533 — INFO — layer 0, [1:5333], loss=-11.4584
2025-07-21 21:30:40,533 — INFO — layer 0, [1:5333], loss=-15.1393


epoch 1, layer 0:  49%|███████████████████████████████████▋                                     | 5383/11016 [13:38<14:52,  6.31it/s, loss=-15]

2025-07-21 21:30:48,219 — INFO — layer 0, [1:5383], loss=-8.6162
2025-07-21 21:30:48,219 — INFO — layer 0, [1:5383], loss=-13.4500


epoch 1, layer 0:  49%|███████████████████████████████████                                    | 5433/11016 [13:46<13:38,  6.82it/s, loss=-12.2]

2025-07-21 21:30:55,763 — INFO — layer 0, [1:5433], loss=-19.2321
2025-07-21 21:30:55,763 — INFO — layer 0, [1:5433], loss=-19.1757


epoch 1, layer 0:  50%|███████████████████████████████████▎                                   | 5483/11016 [13:53<14:26,  6.39it/s, loss=-21.8]

2025-07-21 21:31:03,420 — INFO — layer 0, [1:5483], loss=-6.1557
2025-07-21 21:31:03,420 — INFO — layer 0, [1:5483], loss=-17.7122


epoch 1, layer 0:  50%|████████████████████████████████████▋                                    | 5533/11016 [14:01<13:18,  6.87it/s, loss=-21]

2025-07-21 21:31:11,093 — INFO — layer 0, [1:5533], loss=-11.3521
2025-07-21 21:31:11,093 — INFO — layer 0, [1:5533], loss=-21.3990


epoch 1, layer 0:  51%|███████████████████████████████████▉                                   | 5583/11016 [14:09<13:37,  6.64it/s, loss=-12.5]

2025-07-21 21:31:18,685 — INFO — layer 0, [1:5583], loss=-15.9257
2025-07-21 21:31:18,685 — INFO — layer 0, [1:5583], loss=-10.6826


epoch 1, layer 0:  51%|████████████████████████████████████▎                                  | 5633/11016 [14:16<14:17,  6.28it/s, loss=-14.2]

2025-07-21 21:31:26,259 — INFO — layer 0, [1:5633], loss=-7.7807
2025-07-21 21:31:26,259 — INFO — layer 0, [1:5633], loss=-18.5324


epoch 1, layer 0:  52%|████████████████████████████████████▋                                  | 5683/11016 [14:24<13:12,  6.73it/s, loss=-23.5]

2025-07-21 21:31:33,873 — INFO — layer 0, [1:5683], loss=-5.7794
2025-07-21 21:31:33,873 — INFO — layer 0, [1:5683], loss=-18.3939


epoch 1, layer 0:  52%|████████████████████████████████████▉                                  | 5733/11016 [14:31<12:39,  6.96it/s, loss=-23.4]

2025-07-21 21:31:41,351 — INFO — layer 0, [1:5733], loss=-19.5639
2025-07-21 21:31:41,351 — INFO — layer 0, [1:5733], loss=-17.9523


epoch 1, layer 0:  52%|█████████████████████████████████████▎                                 | 5783/11016 [14:39<13:05,  6.66it/s, loss=-24.6]

2025-07-21 21:31:48,932 — INFO — layer 0, [1:5783], loss=-11.0175
2025-07-21 21:31:48,932 — INFO — layer 0, [1:5783], loss=-9.7171


epoch 1, layer 0:  53%|█████████████████████████████████████▌                                 | 5833/11016 [14:47<12:58,  6.65it/s, loss=-17.3]

2025-07-21 21:31:56,521 — INFO — layer 0, [1:5833], loss=-24.4268
2025-07-21 21:31:56,521 — INFO — layer 0, [1:5833], loss=-10.6939


epoch 1, layer 0:  53%|█████████████████████████████████████▉                                 | 5883/11016 [14:54<13:00,  6.58it/s, loss=-16.6]

2025-07-21 21:32:04,083 — INFO — layer 0, [1:5883], loss=-20.3499
2025-07-21 21:32:04,083 — INFO — layer 0, [1:5883], loss=-7.4139


epoch 1, layer 0:  54%|███████████████████████████████████████▎                                 | 5933/11016 [15:02<12:37,  6.71it/s, loss=-17]

2025-07-21 21:32:11,761 — INFO — layer 0, [1:5933], loss=-11.5946
2025-07-21 21:32:11,761 — INFO — layer 0, [1:5933], loss=-3.8171


epoch 1, layer 0:  54%|██████████████████████████████████████▌                                | 5983/11016 [15:09<13:12,  6.35it/s, loss=-18.8]

2025-07-21 21:32:19,441 — INFO — layer 0, [1:5983], loss=-8.7932
2025-07-21 21:32:19,441 — INFO — layer 0, [1:5983], loss=-14.8635


epoch 1, layer 0:  55%|███████████████████████████████████████▍                                | 6033/11016 [15:17<12:50,  6.46it/s, loss=-4.8]

2025-07-21 21:32:27,165 — INFO — layer 0, [1:6033], loss=-18.8658
2025-07-21 21:32:27,165 — INFO — layer 0, [1:6033], loss=-10.7819


epoch 1, layer 0:  55%|███████████████████████████████████████▏                               | 6083/11016 [15:25<12:03,  6.82it/s, loss=-14.6]

2025-07-21 21:32:34,683 — INFO — layer 0, [1:6083], loss=-15.2416
2025-07-21 21:32:34,683 — INFO — layer 0, [1:6083], loss=-5.5399


epoch 1, layer 0:  56%|███████████████████████████████████████▌                               | 6133/11016 [15:32<12:47,  6.36it/s, loss=-24.5]

2025-07-21 21:32:42,302 — INFO — layer 0, [1:6133], loss=-6.0931
2025-07-21 21:32:42,302 — INFO — layer 0, [1:6133], loss=-22.1088


epoch 1, layer 0:  56%|███████████████████████████████████████▊                               | 6183/11016 [15:40<13:27,  5.99it/s, loss=-14.5]

2025-07-21 21:32:49,798 — INFO — layer 0, [1:6183], loss=-12.8028
2025-07-21 21:32:49,798 — INFO — layer 0, [1:6183], loss=-11.0718


epoch 1, layer 0:  57%|█████████████████████████████████████████▎                               | 6233/11016 [15:47<11:35,  6.87it/s, loss=-14]

2025-07-21 21:32:57,260 — INFO — layer 0, [1:6233], loss=-21.4014
2025-07-21 21:32:57,260 — INFO — layer 0, [1:6233], loss=-17.8054


epoch 1, layer 0:  57%|████████████████████████████████████████▍                              | 6283/11016 [15:55<12:06,  6.52it/s, loss=-7.07]

2025-07-21 21:33:04,851 — INFO — layer 0, [1:6283], loss=-18.2341
2025-07-21 21:33:04,851 — INFO — layer 0, [1:6283], loss=-20.4985


epoch 1, layer 0:  57%|████████████████████████████████████████▊                              | 6333/11016 [16:03<11:57,  6.53it/s, loss=-5.54]

2025-07-21 21:33:12,601 — INFO — layer 0, [1:6333], loss=-5.8088
2025-07-21 21:33:12,601 — INFO — layer 0, [1:6333], loss=-13.5752


epoch 1, layer 0:  58%|██████████████████████████████████████████▎                              | 6383/11016 [16:10<11:28,  6.73it/s, loss=-11]

2025-07-21 21:33:20,160 — INFO — layer 0, [1:6383], loss=-8.7810
2025-07-21 21:33:20,160 — INFO — layer 0, [1:6383], loss=-15.6166


epoch 1, layer 0:  58%|█████████████████████████████████████████▍                             | 6433/11016 [16:18<11:37,  6.57it/s, loss=-18.8]

2025-07-21 21:33:27,956 — INFO — layer 0, [1:6433], loss=-14.0146
2025-07-21 21:33:27,956 — INFO — layer 0, [1:6433], loss=-23.6406


epoch 1, layer 0:  59%|█████████████████████████████████████████▊                             | 6483/11016 [16:26<11:15,  6.71it/s, loss=-23.8]

2025-07-21 21:33:35,584 — INFO — layer 0, [1:6483], loss=-21.6478
2025-07-21 21:33:35,584 — INFO — layer 0, [1:6483], loss=-10.5733


epoch 1, layer 0:  59%|██████████████████████████████████████████                             | 6533/11016 [16:33<12:34,  5.94it/s, loss=-5.65]

2025-07-21 21:33:43,236 — INFO — layer 0, [1:6533], loss=-21.3465
2025-07-21 21:33:43,236 — INFO — layer 0, [1:6533], loss=-7.8496


epoch 1, layer 0:  60%|██████████████████████████████████████████▍                            | 6583/11016 [16:41<11:50,  6.24it/s, loss=-15.8]

2025-07-21 21:33:50,765 — INFO — layer 0, [1:6583], loss=-21.6692
2025-07-21 21:33:50,765 — INFO — layer 0, [1:6583], loss=-18.4987


epoch 1, layer 0:  60%|██████████████████████████████████████████▊                            | 6633/11016 [16:48<10:29,  6.96it/s, loss=-16.8]

2025-07-21 21:33:58,324 — INFO — layer 0, [1:6633], loss=-17.5567
2025-07-21 21:33:58,324 — INFO — layer 0, [1:6633], loss=-8.9937


epoch 1, layer 0:  61%|███████████████████████████████████████████                            | 6683/11016 [16:56<10:54,  6.62it/s, loss=-7.79]

2025-07-21 21:34:06,129 — INFO — layer 0, [1:6683], loss=-13.8344
2025-07-21 21:34:06,129 — INFO — layer 0, [1:6683], loss=-17.3259


epoch 1, layer 0:  61%|███████████████████████████████████████████▍                           | 6733/11016 [17:04<11:07,  6.42it/s, loss=-11.3]

2025-07-21 21:34:13,847 — INFO — layer 0, [1:6733], loss=-19.4227
2025-07-21 21:34:13,847 — INFO — layer 0, [1:6733], loss=-10.7759


epoch 1, layer 0:  62%|███████████████████████████████████████████▋                           | 6783/11016 [17:11<10:25,  6.77it/s, loss=-9.95]

2025-07-21 21:34:21,513 — INFO — layer 0, [1:6783], loss=-20.1004
2025-07-21 21:34:21,513 — INFO — layer 0, [1:6783], loss=-22.4098


epoch 1, layer 0:  62%|████████████████████████████████████████████                           | 6833/11016 [17:19<10:18,  6.77it/s, loss=-16.8]

2025-07-21 21:34:29,203 — INFO — layer 0, [1:6833], loss=-8.8100
2025-07-21 21:34:29,203 — INFO — layer 0, [1:6833], loss=-6.2088


epoch 1, layer 0:  62%|████████████████████████████████████████████▎                          | 6883/11016 [17:27<09:59,  6.90it/s, loss=-10.1]

2025-07-21 21:34:36,848 — INFO — layer 0, [1:6883], loss=-10.5761
2025-07-21 21:34:36,848 — INFO — layer 0, [1:6883], loss=-12.7360


epoch 1, layer 0:  63%|████████████████████████████████████████████▋                          | 6933/11016 [17:35<10:22,  6.56it/s, loss=-14.7]

2025-07-21 21:34:44,534 — INFO — layer 0, [1:6933], loss=-19.0038
2025-07-21 21:34:44,534 — INFO — layer 0, [1:6933], loss=-18.1211


epoch 1, layer 0:  63%|█████████████████████████████████████████████                          | 6983/11016 [17:42<09:37,  6.98it/s, loss=-7.37]

2025-07-21 21:34:52,189 — INFO — layer 0, [1:6983], loss=-20.2430
2025-07-21 21:34:52,189 — INFO — layer 0, [1:6983], loss=-17.8624


epoch 1, layer 0:  64%|█████████████████████████████████████████████▎                         | 7033/11016 [17:50<10:03,  6.60it/s, loss=-3.65]

2025-07-21 21:34:59,703 — INFO — layer 0, [1:7033], loss=-11.5530
2025-07-21 21:34:59,703 — INFO — layer 0, [1:7033], loss=-11.7114


epoch 1, layer 0:  64%|█████████████████████████████████████████████▋                         | 7083/11016 [17:57<09:41,  6.76it/s, loss=-16.9]

2025-07-21 21:35:07,250 — INFO — layer 0, [1:7083], loss=-8.3673
2025-07-21 21:35:07,250 — INFO — layer 0, [1:7083], loss=-5.2980


epoch 1, layer 0:  65%|█████████████████████████████████████████████▉                         | 7133/11016 [18:05<10:16,  6.30it/s, loss=-19.5]

2025-07-21 21:35:14,904 — INFO — layer 0, [1:7133], loss=-20.6457
2025-07-21 21:35:14,904 — INFO — layer 0, [1:7133], loss=-7.4721


epoch 1, layer 0:  65%|██████████████████████████████████████████████▎                        | 7183/11016 [18:13<10:46,  5.93it/s, loss=-22.2]

2025-07-21 21:35:22,562 — INFO — layer 0, [1:7183], loss=-17.0690
2025-07-21 21:35:22,562 — INFO — layer 0, [1:7183], loss=-11.1469


epoch 1, layer 0:  66%|██████████████████████████████████████████████▌                        | 7233/11016 [18:20<09:46,  6.44it/s, loss=-26.1]

2025-07-21 21:35:30,081 — INFO — layer 0, [1:7233], loss=-16.3012
2025-07-21 21:35:30,081 — INFO — layer 0, [1:7233], loss=-11.3428


epoch 1, layer 0:  66%|██████████████████████████████████████████████▉                        | 7283/11016 [18:28<09:07,  6.82it/s, loss=-8.74]

2025-07-21 21:35:37,814 — INFO — layer 0, [1:7283], loss=-18.3477
2025-07-21 21:35:37,814 — INFO — layer 0, [1:7283], loss=-8.6128


epoch 1, layer 0:  67%|███████████████████████████████████████████████▎                       | 7333/11016 [18:35<09:18,  6.60it/s, loss=-15.2]

2025-07-21 21:35:45,435 — INFO — layer 0, [1:7333], loss=-16.8428
2025-07-21 21:35:45,435 — INFO — layer 0, [1:7333], loss=-13.7940


epoch 1, layer 0:  67%|███████████████████████████████████████████████▌                       | 7383/11016 [18:43<09:16,  6.52it/s, loss=-6.92]

2025-07-21 21:35:53,114 — INFO — layer 0, [1:7383], loss=-13.8493
2025-07-21 21:35:53,114 — INFO — layer 0, [1:7383], loss=-15.3373


epoch 1, layer 0:  67%|███████████████████████████████████████████████▉                       | 7433/11016 [18:51<09:15,  6.45it/s, loss=-17.5]

2025-07-21 21:36:00,696 — INFO — layer 0, [1:7433], loss=-5.5113
2025-07-21 21:36:00,696 — INFO — layer 0, [1:7433], loss=-21.6564


epoch 1, layer 0:  68%|████████████████████████████████████████████████▏                      | 7483/11016 [18:58<09:21,  6.29it/s, loss=-8.24]

2025-07-21 21:36:08,434 — INFO — layer 0, [1:7483], loss=-7.1144
2025-07-21 21:36:08,434 — INFO — layer 0, [1:7483], loss=-8.1199


epoch 1, layer 0:  68%|████████████████████████████████████████████████▌                      | 7533/11016 [19:06<08:34,  6.77it/s, loss=-23.3]

2025-07-21 21:36:16,072 — INFO — layer 0, [1:7533], loss=-7.5418
2025-07-21 21:36:16,072 — INFO — layer 0, [1:7533], loss=-5.9093


epoch 1, layer 0:  69%|████████████████████████████████████████████████▊                      | 7583/11016 [19:14<08:17,  6.90it/s, loss=-19.6]

2025-07-21 21:36:23,665 — INFO — layer 0, [1:7583], loss=-6.9375
2025-07-21 21:36:23,665 — INFO — layer 0, [1:7583], loss=-23.7045


epoch 1, layer 0:  69%|█████████████████████████████████████████████████▏                     | 7633/11016 [19:21<08:24,  6.71it/s, loss=-19.6]

2025-07-21 21:36:31,265 — INFO — layer 0, [1:7633], loss=-18.9537
2025-07-21 21:36:31,265 — INFO — layer 0, [1:7633], loss=-7.2125


epoch 1, layer 0:  70%|█████████████████████████████████████████████████▌                     | 7683/11016 [19:29<08:25,  6.60it/s, loss=-19.3]

2025-07-21 21:36:38,815 — INFO — layer 0, [1:7683], loss=-24.2976
2025-07-21 21:36:38,815 — INFO — layer 0, [1:7683], loss=-16.7673


epoch 1, layer 0:  70%|█████████████████████████████████████████████████▊                     | 7733/11016 [19:37<08:34,  6.38it/s, loss=-10.4]

2025-07-21 21:36:46,555 — INFO — layer 0, [1:7733], loss=-17.7953
2025-07-21 21:36:46,555 — INFO — layer 0, [1:7733], loss=-12.0067


epoch 1, layer 0:  71%|██████████████████████████████████████████████████▏                    | 7783/11016 [19:44<08:19,  6.47it/s, loss=-10.4]

2025-07-21 21:36:54,214 — INFO — layer 0, [1:7783], loss=-12.3814
2025-07-21 21:36:54,214 — INFO — layer 0, [1:7783], loss=-4.5100


epoch 1, layer 0:  71%|██████████████████████████████████████████████████▍                    | 7833/11016 [19:52<08:09,  6.50it/s, loss=-14.2]

2025-07-21 21:37:01,743 — INFO — layer 0, [1:7833], loss=-11.1327
2025-07-21 21:37:01,743 — INFO — layer 0, [1:7833], loss=-8.2679


epoch 1, layer 0:  72%|██████████████████████████████████████████████████▊                    | 7883/11016 [19:59<08:20,  6.26it/s, loss=-21.9]

2025-07-21 21:37:09,384 — INFO — layer 0, [1:7883], loss=-6.9497
2025-07-21 21:37:09,384 — INFO — layer 0, [1:7883], loss=-9.8471


epoch 1, layer 0:  72%|███████████████████████████████████████████████████▏                   | 7933/11016 [20:07<08:46,  5.86it/s, loss=-16.4]

2025-07-21 21:37:16,810 — INFO — layer 0, [1:7933], loss=-4.9836
2025-07-21 21:37:16,810 — INFO — layer 0, [1:7933], loss=-14.5308


epoch 1, layer 0:  72%|███████████████████████████████████████████████████▍                   | 7983/11016 [20:14<07:17,  6.94it/s, loss=-9.96]

2025-07-21 21:37:24,349 — INFO — layer 0, [1:7983], loss=-19.3546
2025-07-21 21:37:24,349 — INFO — layer 0, [1:7983], loss=-8.4750


epoch 1, layer 0:  73%|████████████████████████████████████████████████████▌                   | 8033/11016 [20:22<07:30,  6.63it/s, loss=-2.7]

2025-07-21 21:37:31,858 — INFO — layer 0, [1:8033], loss=-14.2985
2025-07-21 21:37:31,858 — INFO — layer 0, [1:8033], loss=-10.3216


epoch 1, layer 0:  73%|████████████████████████████████████████████████████                   | 8083/11016 [20:29<07:47,  6.28it/s, loss=-11.7]

2025-07-21 21:37:39,476 — INFO — layer 0, [1:8083], loss=-22.4241
2025-07-21 21:37:39,476 — INFO — layer 0, [1:8083], loss=-6.0704


epoch 1, layer 0:  74%|████████████████████████████████████████████████████▍                  | 8133/11016 [20:37<07:34,  6.34it/s, loss=-7.75]

2025-07-21 21:37:47,041 — INFO — layer 0, [1:8133], loss=-19.1353
2025-07-21 21:37:47,041 — INFO — layer 0, [1:8133], loss=-11.0613


epoch 1, layer 0:  74%|████████████████████████████████████████████████████▋                  | 8183/11016 [20:45<07:06,  6.64it/s, loss=-4.84]

2025-07-21 21:37:54,783 — INFO — layer 0, [1:8183], loss=-6.9157
2025-07-21 21:37:54,783 — INFO — layer 0, [1:8183], loss=-10.4969


epoch 1, layer 0:  75%|█████████████████████████████████████████████████████                  | 8233/11016 [20:52<06:30,  7.13it/s, loss=-14.4]

2025-07-21 21:38:02,391 — INFO — layer 0, [1:8233], loss=-23.1622
2025-07-21 21:38:02,391 — INFO — layer 0, [1:8233], loss=-21.7061


epoch 1, layer 0:  75%|█████████████████████████████████████████████████████▍                 | 8283/11016 [21:00<06:48,  6.68it/s, loss=-18.5]

2025-07-21 21:38:09,900 — INFO — layer 0, [1:8283], loss=-16.5159
2025-07-21 21:38:09,900 — INFO — layer 0, [1:8283], loss=-2.6888


epoch 1, layer 0:  76%|███████████████████████████████████████████████████████▏                 | 8333/11016 [21:07<06:45,  6.61it/s, loss=-24]

2025-07-21 21:38:17,460 — INFO — layer 0, [1:8333], loss=-7.8643
2025-07-21 21:38:17,460 — INFO — layer 0, [1:8333], loss=-15.4834


epoch 1, layer 0:  76%|██████████████████████████████████████████████████████                 | 8383/11016 [21:15<06:21,  6.90it/s, loss=-9.39]

2025-07-21 21:38:25,047 — INFO — layer 0, [1:8383], loss=-10.2535
2025-07-21 21:38:25,047 — INFO — layer 0, [1:8383], loss=-3.2572


epoch 1, layer 0:  77%|██████████████████████████████████████████████████████▎                | 8433/11016 [21:23<06:22,  6.76it/s, loss=-14.5]

2025-07-21 21:38:32,610 — INFO — layer 0, [1:8433], loss=-14.9752
2025-07-21 21:38:32,610 — INFO — layer 0, [1:8433], loss=-5.7497


epoch 1, layer 0:  77%|██████████████████████████████████████████████████████▋                | 8483/11016 [21:30<06:46,  6.23it/s, loss=-11.8]

2025-07-21 21:38:40,163 — INFO — layer 0, [1:8483], loss=-14.8730
2025-07-21 21:38:40,163 — INFO — layer 0, [1:8483], loss=-16.5979


epoch 1, layer 0:  77%|██████████████████████████████████████████████████████▉                | 8533/11016 [21:38<06:24,  6.45it/s, loss=0.119]

2025-07-21 21:38:47,758 — INFO — layer 0, [1:8533], loss=-9.3255
2025-07-21 21:38:47,758 — INFO — layer 0, [1:8533], loss=-10.1118


epoch 1, layer 0:  78%|███████████████████████████████████████████████████████▎               | 8583/11016 [21:45<06:11,  6.54it/s, loss=-14.4]

2025-07-21 21:38:55,439 — INFO — layer 0, [1:8583], loss=-12.2118
2025-07-21 21:38:55,439 — INFO — layer 0, [1:8583], loss=-8.8012


epoch 1, layer 0:  78%|███████████████████████████████████████████████████████▋               | 8633/11016 [21:53<05:51,  6.78it/s, loss=-13.4]

2025-07-21 21:39:03,125 — INFO — layer 0, [1:8633], loss=-9.4876
2025-07-21 21:39:03,126 — INFO — layer 0, [1:8633], loss=-23.7893


epoch 1, layer 0:  79%|███████████████████████████████████████████████████████▉               | 8683/11016 [22:01<05:58,  6.51it/s, loss=-6.39]

2025-07-21 21:39:10,606 — INFO — layer 0, [1:8683], loss=-15.7246
2025-07-21 21:39:10,606 — INFO — layer 0, [1:8683], loss=-10.5933


epoch 1, layer 0:  79%|████████████████████████████████████████████████████████▎              | 8733/11016 [22:08<05:37,  6.77it/s, loss=-21.6]

2025-07-21 21:39:18,126 — INFO — layer 0, [1:8733], loss=-17.1307
2025-07-21 21:39:18,126 — INFO — layer 0, [1:8733], loss=-15.0976


epoch 1, layer 0:  80%|██████████████████████████████████████████████████████████▏              | 8783/11016 [22:16<05:32,  6.71it/s, loss=-14]

2025-07-21 21:39:25,719 — INFO — layer 0, [1:8783], loss=-16.3359
2025-07-21 21:39:25,719 — INFO — layer 0, [1:8783], loss=-10.5199


epoch 1, layer 0:  80%|████████████████████████████████████████████████████████▉              | 8833/11016 [22:23<05:20,  6.82it/s, loss=-8.03]

2025-07-21 21:39:33,129 — INFO — layer 0, [1:8833], loss=-18.5949
2025-07-21 21:39:33,129 — INFO — layer 0, [1:8833], loss=-22.5800


epoch 1, layer 0:  81%|█████████████████████████████████████████████████████████▎             | 8883/11016 [22:31<05:14,  6.78it/s, loss=-14.8]

2025-07-21 21:39:40,790 — INFO — layer 0, [1:8883], loss=-18.9822
2025-07-21 21:39:40,790 — INFO — layer 0, [1:8883], loss=-14.7787


epoch 1, layer 0:  81%|█████████████████████████████████████████████████████████▌             | 8933/11016 [22:38<05:25,  6.40it/s, loss=-18.7]

2025-07-21 21:39:48,427 — INFO — layer 0, [1:8933], loss=-10.5425
2025-07-21 21:39:48,427 — INFO — layer 0, [1:8933], loss=-23.3305


epoch 1, layer 0:  82%|█████████████████████████████████████████████████████████▉             | 8983/11016 [22:46<05:01,  6.73it/s, loss=-22.5]

2025-07-21 21:39:55,802 — INFO — layer 0, [1:8983], loss=-15.0750
2025-07-21 21:39:55,802 — INFO — layer 0, [1:8983], loss=-9.1076


epoch 1, layer 0:  82%|██████████████████████████████████████████████████████████▏            | 9033/11016 [22:53<05:03,  6.54it/s, loss=-24.4]

2025-07-21 21:40:03,246 — INFO — layer 0, [1:9033], loss=-16.5754
2025-07-21 21:40:03,246 — INFO — layer 0, [1:9033], loss=-22.2805


epoch 1, layer 0:  82%|██████████████████████████████████████████████████████████▌            | 9083/11016 [23:01<04:59,  6.46it/s, loss=-16.8]

2025-07-21 21:40:10,930 — INFO — layer 0, [1:9083], loss=-12.7878
2025-07-21 21:40:10,930 — INFO — layer 0, [1:9083], loss=-14.3659


epoch 1, layer 0:  83%|██████████████████████████████████████████████████████████▊            | 9133/11016 [23:08<04:52,  6.44it/s, loss=-18.4]

2025-07-21 21:40:18,483 — INFO — layer 0, [1:9133], loss=-13.2438
2025-07-21 21:40:18,483 — INFO — layer 0, [1:9133], loss=-12.9801


epoch 1, layer 0:  83%|███████████████████████████████████████████████████████████▏           | 9183/11016 [23:16<05:38,  5.41it/s, loss=-11.4]

2025-07-21 21:40:26,295 — INFO — layer 0, [1:9183], loss=-11.8700
2025-07-21 21:40:26,295 — INFO — layer 0, [1:9183], loss=-5.9470


epoch 1, layer 0:  84%|███████████████████████████████████████████████████████████▌           | 9233/11016 [23:24<04:25,  6.72it/s, loss=-11.1]

2025-07-21 21:40:33,762 — INFO — layer 0, [1:9233], loss=-19.1515
2025-07-21 21:40:33,762 — INFO — layer 0, [1:9233], loss=-8.2082


epoch 1, layer 0:  84%|███████████████████████████████████████████████████████████▊           | 9283/11016 [23:31<04:32,  6.36it/s, loss=-16.8]

2025-07-21 21:40:41,416 — INFO — layer 0, [1:9283], loss=-10.0858
2025-07-21 21:40:41,416 — INFO — layer 0, [1:9283], loss=-19.0636


epoch 1, layer 0:  85%|████████████████████████████████████████████████████████████▏          | 9333/11016 [23:39<04:07,  6.81it/s, loss=-13.7]

2025-07-21 21:40:48,963 — INFO — layer 0, [1:9333], loss=-21.8113
2025-07-21 21:40:48,963 — INFO — layer 0, [1:9333], loss=-17.4377


epoch 1, layer 0:  85%|████████████████████████████████████████████████████████████▍          | 9383/11016 [23:47<04:22,  6.23it/s, loss=-22.7]

2025-07-21 21:40:56,530 — INFO — layer 0, [1:9383], loss=-7.7787
2025-07-21 21:40:56,531 — INFO — layer 0, [1:9383], loss=-18.7763


epoch 1, layer 0:  86%|████████████████████████████████████████████████████████████▊          | 9433/11016 [23:54<04:03,  6.50it/s, loss=-10.8]

2025-07-21 21:41:04,176 — INFO — layer 0, [1:9433], loss=-8.0449
2025-07-21 21:41:04,176 — INFO — layer 0, [1:9433], loss=-15.5595


epoch 1, layer 0:  86%|█████████████████████████████████████████████████████████████          | 9483/11016 [24:02<03:47,  6.74it/s, loss=-18.1]

2025-07-21 21:41:11,790 — INFO — layer 0, [1:9483], loss=-15.2091
2025-07-21 21:41:11,790 — INFO — layer 0, [1:9483], loss=-7.2793


epoch 1, layer 0:  87%|█████████████████████████████████████████████████████████████▍         | 9533/11016 [24:09<03:49,  6.47it/s, loss=-17.6]

2025-07-21 21:41:19,414 — INFO — layer 0, [1:9533], loss=-14.8459
2025-07-21 21:41:19,414 — INFO — layer 0, [1:9533], loss=-17.4205


epoch 1, layer 0:  87%|█████████████████████████████████████████████████████████████▊         | 9583/11016 [24:17<03:31,  6.78it/s, loss=-21.7]

2025-07-21 21:41:27,047 — INFO — layer 0, [1:9583], loss=-18.6548
2025-07-21 21:41:27,047 — INFO — layer 0, [1:9583], loss=-13.5779


epoch 1, layer 0:  87%|██████████████████████████████████████████████████████████████         | 9633/11016 [24:25<03:40,  6.28it/s, loss=-12.9]

2025-07-21 21:41:34,738 — INFO — layer 0, [1:9633], loss=-20.7488
2025-07-21 21:41:34,738 — INFO — layer 0, [1:9633], loss=-18.1742


epoch 1, layer 0:  88%|██████████████████████████████████████████████████████████████▍        | 9683/11016 [24:32<03:13,  6.90it/s, loss=-14.2]

2025-07-21 21:41:42,258 — INFO — layer 0, [1:9683], loss=-8.1592
2025-07-21 21:41:42,258 — INFO — layer 0, [1:9683], loss=-14.8686


epoch 1, layer 0:  88%|████████████████████████████████████████████████████████████████▍        | 9733/11016 [24:40<03:14,  6.60it/s, loss=-14]

2025-07-21 21:41:49,839 — INFO — layer 0, [1:9733], loss=-6.0642
2025-07-21 21:41:49,839 — INFO — layer 0, [1:9733], loss=-17.6912


epoch 1, layer 0:  89%|███████████████████████████████████████████████████████████████        | 9783/11016 [24:47<03:12,  6.41it/s, loss=-13.8]

2025-07-21 21:41:57,473 — INFO — layer 0, [1:9783], loss=-16.3933
2025-07-21 21:41:57,473 — INFO — layer 0, [1:9783], loss=-14.3017


epoch 1, layer 0:  89%|███████████████████████████████████████████████████████████████▍       | 9833/11016 [24:55<03:01,  6.53it/s, loss=-9.64]

2025-07-21 21:42:05,023 — INFO — layer 0, [1:9833], loss=-22.8392
2025-07-21 21:42:05,023 — INFO — layer 0, [1:9833], loss=-16.3459


epoch 1, layer 0:  90%|███████████████████████████████████████████████████████████████▋       | 9883/11016 [25:03<02:49,  6.68it/s, loss=-27.1]

2025-07-21 21:42:12,566 — INFO — layer 0, [1:9883], loss=-13.6256
2025-07-21 21:42:12,566 — INFO — layer 0, [1:9883], loss=-8.3582


epoch 1, layer 0:  90%|████████████████████████████████████████████████████████████████       | 9933/11016 [25:10<02:43,  6.61it/s, loss=-12.9]

2025-07-21 21:42:20,192 — INFO — layer 0, [1:9933], loss=-18.1810
2025-07-21 21:42:20,192 — INFO — layer 0, [1:9933], loss=-11.2361


epoch 1, layer 0:  91%|████████████████████████████████████████████████████████████████▎      | 9983/11016 [25:18<02:28,  6.97it/s, loss=-18.8]

2025-07-21 21:42:27,774 — INFO — layer 0, [1:9983], loss=-21.1927
2025-07-21 21:42:27,774 — INFO — layer 0, [1:9983], loss=-15.2531


epoch 1, layer 0:  91%|███████████████████████████████████████████████████████████████▊      | 10033/11016 [25:25<02:34,  6.35it/s, loss=-15.1]

2025-07-21 21:42:35,312 — INFO — layer 0, [1:10033], loss=-7.7365
2025-07-21 21:42:35,312 — INFO — layer 0, [1:10033], loss=-6.6712


epoch 1, layer 0:  92%|████████████████████████████████████████████████████████████████      | 10083/11016 [25:33<02:17,  6.81it/s, loss=-14.2]

2025-07-21 21:42:43,036 — INFO — layer 0, [1:10083], loss=-11.9552
2025-07-21 21:42:43,036 — INFO — layer 0, [1:10083], loss=-15.5797


epoch 1, layer 0:  92%|████████████████████████████████████████████████████████████████▍     | 10133/11016 [25:41<02:15,  6.50it/s, loss=-26.6]

2025-07-21 21:42:50,566 — INFO — layer 0, [1:10133], loss=-26.0911
2025-07-21 21:42:50,566 — INFO — layer 0, [1:10133], loss=-5.4670


epoch 1, layer 0:  92%|████████████████████████████████████████████████████████████████▋     | 10183/11016 [25:48<02:01,  6.86it/s, loss=-14.8]

2025-07-21 21:42:58,078 — INFO — layer 0, [1:10183], loss=-7.0935
2025-07-21 21:42:58,078 — INFO — layer 0, [1:10183], loss=-8.4341


epoch 1, layer 0:  93%|█████████████████████████████████████████████████████████████████     | 10233/11016 [25:56<01:50,  7.06it/s, loss=-9.94]

2025-07-21 21:43:05,642 — INFO — layer 0, [1:10233], loss=-18.3138
2025-07-21 21:43:05,642 — INFO — layer 0, [1:10233], loss=-6.3297


epoch 1, layer 0:  93%|█████████████████████████████████████████████████████████████████▎    | 10283/11016 [26:03<01:49,  6.72it/s, loss=-18.8]

2025-07-21 21:43:13,301 — INFO — layer 0, [1:10283], loss=-16.2447
2025-07-21 21:43:13,301 — INFO — layer 0, [1:10283], loss=-15.8739


epoch 1, layer 0:  94%|█████████████████████████████████████████████████████████████████▋    | 10333/11016 [26:11<01:44,  6.52it/s, loss=-8.31]

2025-07-21 21:43:20,844 — INFO — layer 0, [1:10333], loss=-19.9893
2025-07-21 21:43:20,845 — INFO — layer 0, [1:10333], loss=-8.1593


epoch 1, layer 0:  94%|█████████████████████████████████████████████████████████████████▉    | 10383/11016 [26:18<01:38,  6.44it/s, loss=-18.6]

2025-07-21 21:43:28,484 — INFO — layer 0, [1:10383], loss=-0.1469
2025-07-21 21:43:28,484 — INFO — layer 0, [1:10383], loss=-12.7457


epoch 1, layer 0:  95%|██████████████████████████████████████████████████████████████████▎   | 10433/11016 [26:26<01:29,  6.50it/s, loss=-22.8]

2025-07-21 21:43:36,204 — INFO — layer 0, [1:10433], loss=-8.5450
2025-07-21 21:43:36,204 — INFO — layer 0, [1:10433], loss=-21.2685


epoch 1, layer 0:  95%|██████████████████████████████████████████████████████████████████▌   | 10483/11016 [26:34<01:19,  6.69it/s, loss=-5.46]

2025-07-21 21:43:43,731 — INFO — layer 0, [1:10483], loss=-19.1289
2025-07-21 21:43:43,731 — INFO — layer 0, [1:10483], loss=-19.9414


epoch 1, layer 0:  96%|██████████████████████████████████████████████████████████████████▉   | 10533/11016 [26:41<01:12,  6.70it/s, loss=-11.7]

2025-07-21 21:43:51,212 — INFO — layer 0, [1:10533], loss=-10.9963
2025-07-21 21:43:51,212 — INFO — layer 0, [1:10533], loss=-9.9211


epoch 1, layer 0:  96%|███████████████████████████████████████████████████████████████████▏  | 10583/11016 [26:49<01:03,  6.80it/s, loss=-14.6]

2025-07-21 21:43:58,723 — INFO — layer 0, [1:10583], loss=-10.7563
2025-07-21 21:43:58,723 — INFO — layer 0, [1:10583], loss=-11.4001


epoch 1, layer 0:  97%|███████████████████████████████████████████████████████████████████▌  | 10633/11016 [26:56<00:56,  6.74it/s, loss=-21.7]

2025-07-21 21:44:06,169 — INFO — layer 0, [1:10633], loss=-22.4342
2025-07-21 21:44:06,169 — INFO — layer 0, [1:10633], loss=-16.1589


epoch 1, layer 0:  97%|███████████████████████████████████████████████████████████████████▉  | 10683/11016 [27:04<00:49,  6.75it/s, loss=-12.6]

2025-07-21 21:44:13,745 — INFO — layer 0, [1:10683], loss=-7.7506
2025-07-21 21:44:13,745 — INFO — layer 0, [1:10683], loss=-10.3721


epoch 1, layer 0:  97%|████████████████████████████████████████████████████████████████████▏ | 10733/11016 [27:11<00:41,  6.79it/s, loss=-6.16]

2025-07-21 21:44:21,481 — INFO — layer 0, [1:10733], loss=-10.0468
2025-07-21 21:44:21,481 — INFO — layer 0, [1:10733], loss=-11.6645


epoch 1, layer 0:  98%|████████████████████████████████████████████████████████████████████▌ | 10783/11016 [27:19<00:34,  6.72it/s, loss=-15.4]

2025-07-21 21:44:29,004 — INFO — layer 0, [1:10783], loss=-14.3506
2025-07-21 21:44:29,004 — INFO — layer 0, [1:10783], loss=-14.6429


epoch 1, layer 0:  98%|████████████████████████████████████████████████████████████████████▊ | 10833/11016 [27:27<00:28,  6.51it/s, loss=-23.4]

2025-07-21 21:44:36,687 — INFO — layer 0, [1:10833], loss=-20.6774
2025-07-21 21:44:36,687 — INFO — layer 0, [1:10833], loss=-19.9736


epoch 1, layer 0:  99%|█████████████████████████████████████████████████████████████████████▏| 10883/11016 [27:34<00:20,  6.58it/s, loss=-11.6]

2025-07-21 21:44:44,243 — INFO — layer 0, [1:10883], loss=-18.4578
2025-07-21 21:44:44,243 — INFO — layer 0, [1:10883], loss=-17.2672


epoch 1, layer 0:  99%|█████████████████████████████████████████████████████████████████████▍| 10933/11016 [27:42<00:13,  6.23it/s, loss=-19.2]

2025-07-21 21:44:52,053 — INFO — layer 0, [1:10933], loss=-1.2834
2025-07-21 21:44:52,053 — INFO — layer 0, [1:10933], loss=-20.1140


epoch 1, layer 0: 100%|█████████████████████████████████████████████████████████████████████▊| 10983/11016 [27:50<00:05,  6.35it/s, loss=-11.6]

2025-07-21 21:44:59,780 — INFO — layer 0, [1:10983], loss=-21.0440
2025-07-21 21:44:59,780 — INFO — layer 0, [1:10983], loss=-23.6394


epoch 2, layer 0:   0%|                                                                                              | 0/11016 [00:00<?, ?it/s]huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
huggingf

2025-07-21 21:45:07,781 — INFO — layer 0, [2:17], loss=-8.8869
2025-07-21 21:45:07,781 — INFO — layer 0, [2:17], loss=-7.8134


epoch 2, layer 0:   1%|▍                                                                        | 67/11016 [00:10<28:21,  6.43it/s, loss=-16.5]

2025-07-21 21:45:15,305 — INFO — layer 0, [2:67], loss=-6.3735
2025-07-21 21:45:15,305 — INFO — layer 0, [2:67], loss=-15.2077


epoch 2, layer 0:   1%|▊                                                                       | 117/11016 [00:17<28:51,  6.29it/s, loss=-12.4]

2025-07-21 21:45:22,846 — INFO — layer 0, [2:117], loss=-12.6759
2025-07-21 21:45:22,846 — INFO — layer 0, [2:117], loss=-16.7328


epoch 2, layer 0:   2%|█                                                                       | 167/11016 [00:25<27:15,  6.63it/s, loss=-20.3]

2025-07-21 21:45:30,405 — INFO — layer 0, [2:167], loss=-24.2539
2025-07-21 21:45:30,405 — INFO — layer 0, [2:167], loss=-16.9133


epoch 2, layer 0:   2%|█▍                                                                      | 217/11016 [00:33<28:47,  6.25it/s, loss=-16.2]

2025-07-21 21:45:37,974 — INFO — layer 0, [2:217], loss=-18.4443
2025-07-21 21:45:37,974 — INFO — layer 0, [2:217], loss=-15.3471


epoch 2, layer 0:   2%|█▋                                                                      | 267/11016 [00:40<26:36,  6.73it/s, loss=-26.2]

2025-07-21 21:45:45,812 — INFO — layer 0, [2:267], loss=-8.0659
2025-07-21 21:45:45,812 — INFO — layer 0, [2:267], loss=-26.1248


epoch 2, layer 0:   3%|██                                                                      | 317/11016 [00:48<25:22,  7.03it/s, loss=-7.25]

2025-07-21 21:45:53,425 — INFO — layer 0, [2:317], loss=-9.7275
2025-07-21 21:45:53,425 — INFO — layer 0, [2:317], loss=-23.5596


epoch 2, layer 0:   3%|██▍                                                                     | 367/11016 [00:55<28:54,  6.14it/s, loss=-25.9]

2025-07-21 21:46:00,946 — INFO — layer 0, [2:367], loss=-10.6651
2025-07-21 21:46:00,947 — INFO — layer 0, [2:367], loss=-10.9257


epoch 2, layer 0:   4%|██▋                                                                     | 417/11016 [01:03<25:05,  7.04it/s, loss=-7.09]

2025-07-21 21:46:08,548 — INFO — layer 0, [2:417], loss=-4.1269
2025-07-21 21:46:08,548 — INFO — layer 0, [2:417], loss=-11.6565


epoch 2, layer 0:   4%|███                                                                     | 467/11016 [01:11<26:37,  6.60it/s, loss=-22.7]

2025-07-21 21:46:16,191 — INFO — layer 0, [2:467], loss=-22.1369
2025-07-21 21:46:16,191 — INFO — layer 0, [2:467], loss=-8.5085


epoch 2, layer 0:   5%|███▍                                                                    | 517/11016 [01:18<26:46,  6.54it/s, loss=-18.1]

2025-07-21 21:46:23,939 — INFO — layer 0, [2:517], loss=-17.4049
2025-07-21 21:46:23,939 — INFO — layer 0, [2:517], loss=-13.3277


epoch 2, layer 0:   5%|███▋                                                                    | 567/11016 [01:26<27:05,  6.43it/s, loss=-12.3]

2025-07-21 21:46:31,450 — INFO — layer 0, [2:567], loss=-18.8417
2025-07-21 21:46:31,450 — INFO — layer 0, [2:567], loss=-10.1507


epoch 2, layer 0:   6%|████                                                                    | 617/11016 [01:34<25:45,  6.73it/s, loss=-15.7]

2025-07-21 21:46:39,141 — INFO — layer 0, [2:617], loss=-10.8162
2025-07-21 21:46:39,141 — INFO — layer 0, [2:617], loss=-15.0865


epoch 2, layer 0:   6%|████▎                                                                   | 667/11016 [01:41<26:33,  6.49it/s, loss=-12.9]

2025-07-21 21:46:46,701 — INFO — layer 0, [2:667], loss=-17.2105
2025-07-21 21:46:46,701 — INFO — layer 0, [2:667], loss=-16.0554


epoch 2, layer 0:   7%|████▋                                                                   | 717/11016 [01:49<28:00,  6.13it/s, loss=-20.3]

2025-07-21 21:46:54,420 — INFO — layer 0, [2:717], loss=-13.9294
2025-07-21 21:46:54,420 — INFO — layer 0, [2:717], loss=-10.6236


epoch 2, layer 0:   7%|█████                                                                   | 767/11016 [01:57<26:04,  6.55it/s, loss=-21.4]

2025-07-21 21:47:01,975 — INFO — layer 0, [2:767], loss=-22.1302
2025-07-21 21:47:01,975 — INFO — layer 0, [2:767], loss=-21.6990


epoch 2, layer 0:   7%|█████▎                                                                  | 817/11016 [02:04<25:44,  6.60it/s, loss=-12.6]

2025-07-21 21:47:09,489 — INFO — layer 0, [2:817], loss=-25.0898
2025-07-21 21:47:09,489 — INFO — layer 0, [2:817], loss=-22.1183


epoch 2, layer 0:   8%|█████▋                                                                  | 867/11016 [02:12<24:29,  6.91it/s, loss=-5.83]

2025-07-21 21:47:17,055 — INFO — layer 0, [2:867], loss=-11.9504
2025-07-21 21:47:17,055 — INFO — layer 0, [2:867], loss=-16.4975


epoch 2, layer 0:   8%|█████▉                                                                  | 917/11016 [02:19<25:53,  6.50it/s, loss=-12.9]

2025-07-21 21:47:24,688 — INFO — layer 0, [2:917], loss=-5.2586
2025-07-21 21:47:24,688 — INFO — layer 0, [2:917], loss=-13.8229


epoch 2, layer 0:   9%|██████▎                                                                 | 967/11016 [02:27<26:53,  6.23it/s, loss=-10.6]

2025-07-21 21:47:32,299 — INFO — layer 0, [2:967], loss=-16.9840
2025-07-21 21:47:32,299 — INFO — layer 0, [2:967], loss=-15.0747


epoch 2, layer 0:   9%|██████▌                                                                | 1017/11016 [02:34<24:13,  6.88it/s, loss=-15.9]

2025-07-21 21:47:39,743 — INFO — layer 0, [2:1017], loss=-2.1723
2025-07-21 21:47:39,743 — INFO — layer 0, [2:1017], loss=-24.8650


epoch 2, layer 0:  10%|██████▉                                                                | 1067/11016 [02:42<26:06,  6.35it/s, loss=-22.2]

2025-07-21 21:47:47,591 — INFO — layer 0, [2:1067], loss=-12.0342
2025-07-21 21:47:47,591 — INFO — layer 0, [2:1067], loss=-13.6470


epoch 2, layer 0:  10%|███████▏                                                               | 1117/11016 [02:50<25:33,  6.46it/s, loss=-19.2]

2025-07-21 21:47:55,497 — INFO — layer 0, [2:1117], loss=-10.0126
2025-07-21 21:47:55,497 — INFO — layer 0, [2:1117], loss=-7.9865


epoch 2, layer 0:  11%|███████▌                                                               | 1167/11016 [02:58<25:14,  6.51it/s, loss=-19.2]

2025-07-21 21:48:03,111 — INFO — layer 0, [2:1167], loss=-11.8063
2025-07-21 21:48:03,111 — INFO — layer 0, [2:1167], loss=-10.1234


epoch 2, layer 0:  11%|███████▊                                                               | 1217/11016 [03:05<24:04,  6.78it/s, loss=-6.17]

2025-07-21 21:48:10,765 — INFO — layer 0, [2:1217], loss=-7.9876
2025-07-21 21:48:10,765 — INFO — layer 0, [2:1217], loss=-9.3313


epoch 2, layer 0:  12%|████████▏                                                              | 1267/11016 [03:13<23:43,  6.85it/s, loss=-19.1]

2025-07-21 21:48:18,324 — INFO — layer 0, [2:1267], loss=-15.7510
2025-07-21 21:48:18,324 — INFO — layer 0, [2:1267], loss=-14.4897


epoch 2, layer 0:  12%|████████▍                                                              | 1317/11016 [03:20<24:47,  6.52it/s, loss=-18.1]

2025-07-21 21:48:25,833 — INFO — layer 0, [2:1317], loss=-13.2245
2025-07-21 21:48:25,833 — INFO — layer 0, [2:1317], loss=-18.9382


epoch 2, layer 0:  12%|█████████                                                                | 1367/11016 [03:28<24:30,  6.56it/s, loss=-20]

2025-07-21 21:48:33,435 — INFO — layer 0, [2:1367], loss=-12.2070
2025-07-21 21:48:33,435 — INFO — layer 0, [2:1367], loss=-21.3059


epoch 2, layer 0:  13%|█████████▏                                                             | 1417/11016 [03:36<25:08,  6.36it/s, loss=-13.3]

2025-07-21 21:48:41,233 — INFO — layer 0, [2:1417], loss=-10.4295
2025-07-21 21:48:41,233 — INFO — layer 0, [2:1417], loss=-2.8442


epoch 2, layer 0:  13%|█████████▍                                                             | 1467/11016 [03:43<23:20,  6.82it/s, loss=-14.6]

2025-07-21 21:48:48,763 — INFO — layer 0, [2:1467], loss=-7.5180
2025-07-21 21:48:48,763 — INFO — layer 0, [2:1467], loss=-22.4955


epoch 2, layer 0:  14%|█████████▊                                                             | 1517/11016 [03:51<23:55,  6.62it/s, loss=-12.2]

2025-07-21 21:48:56,266 — INFO — layer 0, [2:1517], loss=-15.4947
2025-07-21 21:48:56,266 — INFO — layer 0, [2:1517], loss=-8.1927


epoch 2, layer 0:  14%|██████████▍                                                              | 1567/11016 [03:58<24:23,  6.46it/s, loss=-11]

2025-07-21 21:49:03,819 — INFO — layer 0, [2:1567], loss=-19.9208
2025-07-21 21:49:03,819 — INFO — layer 0, [2:1567], loss=-21.9462


epoch 2, layer 0:  15%|██████████▍                                                            | 1617/11016 [04:06<24:50,  6.31it/s, loss=-13.3]

2025-07-21 21:49:11,565 — INFO — layer 0, [2:1617], loss=-2.2160
2025-07-21 21:49:11,565 — INFO — layer 0, [2:1617], loss=-11.2321


epoch 2, layer 0:  15%|██████████▋                                                            | 1667/11016 [04:14<23:56,  6.51it/s, loss=-10.5]

2025-07-21 21:49:19,157 — INFO — layer 0, [2:1667], loss=-10.5356
2025-07-21 21:49:19,157 — INFO — layer 0, [2:1667], loss=-19.2477


epoch 2, layer 0:  16%|███████████                                                            | 1717/11016 [04:21<22:31,  6.88it/s, loss=-5.35]

2025-07-21 21:49:26,671 — INFO — layer 0, [2:1717], loss=-7.7782
2025-07-21 21:49:26,671 — INFO — layer 0, [2:1717], loss=-18.6312


epoch 2, layer 0:  16%|███████████▍                                                           | 1767/11016 [04:29<22:44,  6.78it/s, loss=-23.3]

2025-07-21 21:49:34,313 — INFO — layer 0, [2:1767], loss=-7.5583
2025-07-21 21:49:34,313 — INFO — layer 0, [2:1767], loss=-25.3857


epoch 2, layer 0:  16%|███████████▋                                                           | 1817/11016 [04:36<23:46,  6.45it/s, loss=-22.2]

2025-07-21 21:49:41,866 — INFO — layer 0, [2:1817], loss=-20.4179
2025-07-21 21:49:41,866 — INFO — layer 0, [2:1817], loss=-22.6128


epoch 2, layer 0:  17%|████████████                                                           | 1867/11016 [04:44<23:04,  6.61it/s, loss=-18.5]

2025-07-21 21:49:49,362 — INFO — layer 0, [2:1867], loss=-21.2427
2025-07-21 21:49:49,362 — INFO — layer 0, [2:1867], loss=-8.2800


epoch 2, layer 0:  17%|████████████▎                                                          | 1917/11016 [04:51<21:42,  6.99it/s, loss=-11.8]

2025-07-21 21:49:56,766 — INFO — layer 0, [2:1917], loss=-14.9351
2025-07-21 21:49:56,766 — INFO — layer 0, [2:1917], loss=-24.6915


epoch 2, layer 0:  18%|████████████▋                                                          | 1967/11016 [04:59<22:38,  6.66it/s, loss=-15.9]

2025-07-21 21:50:04,422 — INFO — layer 0, [2:1967], loss=-14.5274
2025-07-21 21:50:04,422 — INFO — layer 0, [2:1967], loss=-21.2896


epoch 2, layer 0:  18%|████████████▉                                                          | 2017/11016 [05:07<23:17,  6.44it/s, loss=-5.24]

2025-07-21 21:50:12,060 — INFO — layer 0, [2:2017], loss=-4.2984
2025-07-21 21:50:12,060 — INFO — layer 0, [2:2017], loss=-9.9498


epoch 2, layer 0:  19%|█████████████▎                                                         | 2067/11016 [05:14<23:38,  6.31it/s, loss=-20.7]

2025-07-21 21:50:19,625 — INFO — layer 0, [2:2067], loss=-18.4440
2025-07-21 21:50:19,625 — INFO — layer 0, [2:2067], loss=-14.7085


epoch 2, layer 0:  19%|█████████████▋                                                         | 2117/11016 [05:22<23:31,  6.30it/s, loss=-9.25]

2025-07-21 21:50:27,368 — INFO — layer 0, [2:2117], loss=-19.6284
2025-07-21 21:50:27,368 — INFO — layer 0, [2:2117], loss=-12.6965


epoch 2, layer 0:  20%|█████████████▉                                                         | 2167/11016 [05:29<23:44,  6.21it/s, loss=-15.5]

2025-07-21 21:50:34,960 — INFO — layer 0, [2:2167], loss=-12.4012
2025-07-21 21:50:34,961 — INFO — layer 0, [2:2167], loss=-12.1993


epoch 2, layer 0:  20%|██████████████▋                                                          | 2217/11016 [05:37<23:42,  6.19it/s, loss=-11]

2025-07-21 21:50:42,533 — INFO — layer 0, [2:2217], loss=-17.1388
2025-07-21 21:50:42,533 — INFO — layer 0, [2:2217], loss=-16.2850


epoch 2, layer 0:  21%|██████████████▌                                                        | 2267/11016 [05:45<21:10,  6.89it/s, loss=-14.8]

2025-07-21 21:50:50,073 — INFO — layer 0, [2:2267], loss=-20.2669
2025-07-21 21:50:50,073 — INFO — layer 0, [2:2267], loss=-18.0494


epoch 2, layer 0:  21%|██████████████▉                                                        | 2317/11016 [05:52<24:14,  5.98it/s, loss=-8.87]

2025-07-21 21:50:57,797 — INFO — layer 0, [2:2317], loss=-14.0949
2025-07-21 21:50:57,797 — INFO — layer 0, [2:2317], loss=-12.4943


epoch 2, layer 0:  21%|███████████████▎                                                       | 2367/11016 [06:00<22:32,  6.40it/s, loss=-17.9]

2025-07-21 21:51:05,641 — INFO — layer 0, [2:2367], loss=0.1412
2025-07-21 21:51:05,641 — INFO — layer 0, [2:2367], loss=-15.7834


epoch 2, layer 0:  22%|███████████████▌                                                       | 2417/11016 [06:08<22:50,  6.27it/s, loss=-15.3]

2025-07-21 21:51:13,159 — INFO — layer 0, [2:2417], loss=-15.2532
2025-07-21 21:51:13,159 — INFO — layer 0, [2:2417], loss=-8.0112


epoch 2, layer 0:  22%|███████████████▉                                                       | 2467/11016 [06:15<21:30,  6.63it/s, loss=-13.9]

2025-07-21 21:51:20,941 — INFO — layer 0, [2:2467], loss=-30.7174
2025-07-21 21:51:20,941 — INFO — layer 0, [2:2467], loss=-13.7873


epoch 2, layer 0:  23%|████████████████▏                                                      | 2517/11016 [06:23<21:10,  6.69it/s, loss=-11.5]

2025-07-21 21:51:28,575 — INFO — layer 0, [2:2517], loss=-27.4570
2025-07-21 21:51:28,575 — INFO — layer 0, [2:2517], loss=-2.3051


epoch 2, layer 0:  23%|████████████████▌                                                      | 2567/11016 [06:31<21:48,  6.46it/s, loss=-14.8]

2025-07-21 21:51:36,251 — INFO — layer 0, [2:2567], loss=-8.5767
2025-07-21 21:51:36,251 — INFO — layer 0, [2:2567], loss=-15.0341


epoch 2, layer 0:  24%|████████████████▊                                                      | 2617/11016 [06:38<20:26,  6.85it/s, loss=-7.11]

2025-07-21 21:51:43,773 — INFO — layer 0, [2:2617], loss=-12.1955
2025-07-21 21:51:43,773 — INFO — layer 0, [2:2617], loss=-19.3191


epoch 2, layer 0:  24%|█████████████████▏                                                     | 2667/11016 [06:46<22:00,  6.32it/s, loss=-12.3]

2025-07-21 21:51:51,436 — INFO — layer 0, [2:2667], loss=-7.8192
2025-07-21 21:51:51,436 — INFO — layer 0, [2:2667], loss=-20.2923


epoch 2, layer 0:  25%|█████████████████▌                                                     | 2717/11016 [06:53<23:37,  5.85it/s, loss=-18.5]

2025-07-21 21:51:58,942 — INFO — layer 0, [2:2717], loss=-22.3025
2025-07-21 21:51:58,942 — INFO — layer 0, [2:2717], loss=-18.0005


epoch 2, layer 0:  25%|█████████████████▊                                                     | 2767/11016 [07:01<21:30,  6.39it/s, loss=-15.8]

2025-07-21 21:52:06,638 — INFO — layer 0, [2:2767], loss=-12.2284
2025-07-21 21:52:06,639 — INFO — layer 0, [2:2767], loss=-6.1903


epoch 2, layer 0:  26%|██████████████████▏                                                    | 2817/11016 [07:09<21:37,  6.32it/s, loss=-7.99]

2025-07-21 21:52:14,226 — INFO — layer 0, [2:2817], loss=-7.9127
2025-07-21 21:52:14,226 — INFO — layer 0, [2:2817], loss=-18.1072


epoch 2, layer 0:  26%|██████████████████▍                                                    | 2867/11016 [07:16<21:56,  6.19it/s, loss=-3.76]

2025-07-21 21:52:21,949 — INFO — layer 0, [2:2867], loss=-20.0579
2025-07-21 21:52:21,949 — INFO — layer 0, [2:2867], loss=-6.1789


epoch 2, layer 0:  26%|██████████████████▊                                                    | 2917/11016 [07:24<20:29,  6.58it/s, loss=-25.5]

2025-07-21 21:52:29,430 — INFO — layer 0, [2:2917], loss=-14.2351
2025-07-21 21:52:29,430 — INFO — layer 0, [2:2917], loss=-9.6669


epoch 2, layer 0:  27%|███████████████████                                                    | 2967/11016 [07:32<20:11,  6.64it/s, loss=-24.8]

2025-07-21 21:52:37,038 — INFO — layer 0, [2:2967], loss=-17.2580
2025-07-21 21:52:37,038 — INFO — layer 0, [2:2967], loss=-13.9785


epoch 2, layer 0:  27%|███████████████████▉                                                     | 3017/11016 [07:39<21:09,  6.30it/s, loss=-21]

2025-07-21 21:52:44,740 — INFO — layer 0, [2:3017], loss=-14.5026
2025-07-21 21:52:44,740 — INFO — layer 0, [2:3017], loss=-12.8551


epoch 2, layer 0:  28%|███████████████████▊                                                   | 3067/11016 [07:47<19:26,  6.81it/s, loss=-27.5]

2025-07-21 21:52:52,344 — INFO — layer 0, [2:3067], loss=-8.7540
2025-07-21 21:52:52,344 — INFO — layer 0, [2:3067], loss=-5.4338


epoch 2, layer 0:  28%|████████████████████▋                                                    | 3117/11016 [07:54<19:10,  6.87it/s, loss=-27]

2025-07-21 21:52:59,781 — INFO — layer 0, [2:3117], loss=-22.3093
2025-07-21 21:52:59,781 — INFO — layer 0, [2:3117], loss=-14.9336


epoch 2, layer 0:  29%|████████████████████▍                                                  | 3167/11016 [08:02<19:43,  6.63it/s, loss=-28.4]

2025-07-21 21:53:07,271 — INFO — layer 0, [2:3167], loss=-18.8215
2025-07-21 21:53:07,271 — INFO — layer 0, [2:3167], loss=-10.6477


epoch 2, layer 0:  29%|█████████████████████▎                                                   | 3217/11016 [08:10<20:55,  6.21it/s, loss=-13]

2025-07-21 21:53:15,049 — INFO — layer 0, [2:3217], loss=-9.0386
2025-07-21 21:53:15,049 — INFO — layer 0, [2:3217], loss=-18.7471


epoch 2, layer 0:  30%|█████████████████████                                                  | 3267/11016 [08:17<20:14,  6.38it/s, loss=-13.8]

2025-07-21 21:53:22,542 — INFO — layer 0, [2:3267], loss=-9.2278
2025-07-21 21:53:22,542 — INFO — layer 0, [2:3267], loss=-8.0979


epoch 2, layer 0:  30%|█████████████████████                                                 | 3317/11016 [08:25<19:12,  6.68it/s, loss=-0.526]

2025-07-21 21:53:30,163 — INFO — layer 0, [2:3317], loss=-24.8786
2025-07-21 21:53:30,163 — INFO — layer 0, [2:3317], loss=-14.3030


epoch 2, layer 0:  31%|█████████████████████▋                                                 | 3367/11016 [08:32<19:53,  6.41it/s, loss=-14.4]

2025-07-21 21:53:37,713 — INFO — layer 0, [2:3367], loss=-17.5112
2025-07-21 21:53:37,713 — INFO — layer 0, [2:3367], loss=-8.6561


epoch 2, layer 0:  31%|██████████████████████                                                 | 3417/11016 [08:40<20:17,  6.24it/s, loss=-12.2]

2025-07-21 21:53:45,318 — INFO — layer 0, [2:3417], loss=-13.0097
2025-07-21 21:53:45,318 — INFO — layer 0, [2:3417], loss=-18.0868


epoch 2, layer 0:  31%|██████████████████████▎                                                | 3467/11016 [08:48<18:58,  6.63it/s, loss=-1.65]

2025-07-21 21:53:53,059 — INFO — layer 0, [2:3467], loss=-22.1319
2025-07-21 21:53:53,059 — INFO — layer 0, [2:3467], loss=-25.5774


epoch 2, layer 0:  32%|██████████████████████▋                                                | 3517/11016 [08:55<19:10,  6.52it/s, loss=-19.6]

2025-07-21 21:54:00,647 — INFO — layer 0, [2:3517], loss=-16.9989
2025-07-21 21:54:00,647 — INFO — layer 0, [2:3517], loss=-22.0953


epoch 2, layer 0:  32%|██████████████████████▉                                                | 3567/11016 [09:03<20:34,  6.03it/s, loss=-9.67]

2025-07-21 21:54:08,533 — INFO — layer 0, [2:3567], loss=-7.6643
2025-07-21 21:54:08,533 — INFO — layer 0, [2:3567], loss=-12.7560


epoch 2, layer 0:  33%|███████████████████████▎                                               | 3617/11016 [09:11<17:57,  6.87it/s, loss=-8.94]

2025-07-21 21:54:16,074 — INFO — layer 0, [2:3617], loss=-22.2051
2025-07-21 21:54:16,074 — INFO — layer 0, [2:3617], loss=-24.9225


epoch 2, layer 0:  33%|███████████████████████▋                                               | 3667/11016 [09:18<19:27,  6.30it/s, loss=-13.8]

2025-07-21 21:54:23,735 — INFO — layer 0, [2:3667], loss=-5.7252
2025-07-21 21:54:23,735 — INFO — layer 0, [2:3667], loss=-12.2546


epoch 2, layer 0:  34%|███████████████████████▉                                               | 3717/11016 [09:26<17:53,  6.80it/s, loss=-22.4]

2025-07-21 21:54:31,431 — INFO — layer 0, [2:3717], loss=-12.2527
2025-07-21 21:54:31,431 — INFO — layer 0, [2:3717], loss=-19.6631


epoch 2, layer 0:  34%|████████████████████████▎                                              | 3767/11016 [09:34<18:43,  6.45it/s, loss=-10.3]

2025-07-21 21:54:39,011 — INFO — layer 0, [2:3767], loss=-8.9480
2025-07-21 21:54:39,011 — INFO — layer 0, [2:3767], loss=-13.5519


epoch 2, layer 0:  35%|████████████████████████▌                                              | 3817/11016 [09:41<18:57,  6.33it/s, loss=-15.8]

2025-07-21 21:54:46,625 — INFO — layer 0, [2:3817], loss=-14.9749
2025-07-21 21:54:46,625 — INFO — layer 0, [2:3817], loss=-3.3766


epoch 2, layer 0:  35%|████████████████████████▉                                              | 3867/11016 [09:49<19:13,  6.20it/s, loss=-11.8]

2025-07-21 21:54:54,252 — INFO — layer 0, [2:3867], loss=-23.9319
2025-07-21 21:54:54,252 — INFO — layer 0, [2:3867], loss=-8.6183


epoch 2, layer 0:  36%|█████████████████████████▏                                             | 3917/11016 [09:56<18:00,  6.57it/s, loss=-11.7]

2025-07-21 21:55:01,704 — INFO — layer 0, [2:3917], loss=-18.9551
2025-07-21 21:55:01,704 — INFO — layer 0, [2:3917], loss=-12.0287


epoch 2, layer 0:  36%|█████████████████████████▌                                             | 3967/11016 [10:04<17:29,  6.72it/s, loss=-5.26]

2025-07-21 21:55:09,359 — INFO — layer 0, [2:3967], loss=-15.6369
2025-07-21 21:55:09,359 — INFO — layer 0, [2:3967], loss=-19.1217


epoch 2, layer 0:  36%|█████████████████████████▉                                             | 4017/11016 [10:11<18:06,  6.44it/s, loss=-5.26]

2025-07-21 21:55:16,930 — INFO — layer 0, [2:4017], loss=-17.3342
2025-07-21 21:55:16,930 — INFO — layer 0, [2:4017], loss=-13.4204


epoch 2, layer 0:  37%|██████████████████████████▏                                            | 4067/11016 [10:19<17:01,  6.80it/s, loss=-17.5]

2025-07-21 21:55:24,525 — INFO — layer 0, [2:4067], loss=-6.5192
2025-07-21 21:55:24,525 — INFO — layer 0, [2:4067], loss=-9.4413


epoch 2, layer 0:  37%|██████████████████████████▌                                            | 4117/11016 [10:27<17:07,  6.71it/s, loss=-7.81]

2025-07-21 21:55:32,144 — INFO — layer 0, [2:4117], loss=-11.1043
2025-07-21 21:55:32,144 — INFO — layer 0, [2:4117], loss=-14.5140


epoch 2, layer 0:  38%|██████████████████████████▊                                            | 4167/11016 [10:34<16:39,  6.85it/s, loss=-14.6]

2025-07-21 21:55:39,645 — INFO — layer 0, [2:4167], loss=-6.1249
2025-07-21 21:55:39,645 — INFO — layer 0, [2:4167], loss=-18.2023


epoch 2, layer 0:  38%|███████████████████████████▏                                           | 4217/11016 [10:42<16:13,  6.98it/s, loss=-9.18]

2025-07-21 21:55:47,098 — INFO — layer 0, [2:4217], loss=-8.0393
2025-07-21 21:55:47,098 — INFO — layer 0, [2:4217], loss=-10.8145


epoch 2, layer 0:  39%|███████████████████████████▌                                           | 4267/11016 [10:49<17:28,  6.44it/s, loss=-8.61]

2025-07-21 21:55:54,796 — INFO — layer 0, [2:4267], loss=-6.6936
2025-07-21 21:55:54,796 — INFO — layer 0, [2:4267], loss=-11.6199


epoch 2, layer 0:  39%|███████████████████████████▊                                           | 4317/11016 [10:57<16:52,  6.62it/s, loss=-14.2]

2025-07-21 21:56:02,395 — INFO — layer 0, [2:4317], loss=-11.6296
2025-07-21 21:56:02,395 — INFO — layer 0, [2:4317], loss=-11.8045


epoch 2, layer 0:  40%|████████████████████████████▏                                          | 4367/11016 [11:05<16:41,  6.64it/s, loss=-8.21]

2025-07-21 21:56:10,043 — INFO — layer 0, [2:4367], loss=-22.6885
2025-07-21 21:56:10,043 — INFO — layer 0, [2:4367], loss=-15.8857


epoch 2, layer 0:  40%|████████████████████████████▍                                          | 4417/11016 [11:12<17:07,  6.42it/s, loss=-22.7]

2025-07-21 21:56:17,560 — INFO — layer 0, [2:4417], loss=-23.6911
2025-07-21 21:56:17,560 — INFO — layer 0, [2:4417], loss=-4.7080


epoch 2, layer 0:  41%|████████████████████████████▊                                          | 4467/11016 [11:20<16:42,  6.53it/s, loss=-19.4]

2025-07-21 21:56:25,167 — INFO — layer 0, [2:4467], loss=-18.7302
2025-07-21 21:56:25,167 — INFO — layer 0, [2:4467], loss=-12.2101


epoch 2, layer 0:  41%|█████████████████████████████                                          | 4517/11016 [11:27<16:19,  6.64it/s, loss=-5.18]

2025-07-21 21:56:32,862 — INFO — layer 0, [2:4517], loss=-11.0845
2025-07-21 21:56:32,862 — INFO — layer 0, [2:4517], loss=-7.1081


epoch 2, layer 0:  41%|█████████████████████████████▍                                         | 4567/11016 [11:35<16:35,  6.48it/s, loss=-9.84]

2025-07-21 21:56:40,338 — INFO — layer 0, [2:4567], loss=-6.3560
2025-07-21 21:56:40,339 — INFO — layer 0, [2:4567], loss=-12.6967


epoch 2, layer 0:  42%|█████████████████████████████▊                                         | 4617/11016 [11:43<16:29,  6.47it/s, loss=-15.9]

2025-07-21 21:56:47,966 — INFO — layer 0, [2:4617], loss=-21.5449
2025-07-21 21:56:47,966 — INFO — layer 0, [2:4617], loss=-15.0651


epoch 2, layer 0:  42%|██████████████████████████████                                         | 4667/11016 [11:50<15:56,  6.64it/s, loss=-15.3]

2025-07-21 21:56:55,381 — INFO — layer 0, [2:4667], loss=-15.7306
2025-07-21 21:56:55,381 — INFO — layer 0, [2:4667], loss=-7.4813


epoch 2, layer 0:  43%|██████████████████████████████▍                                        | 4717/11016 [11:57<16:17,  6.44it/s, loss=-7.83]

2025-07-21 21:57:02,796 — INFO — layer 0, [2:4717], loss=-8.8126
2025-07-21 21:57:02,796 — INFO — layer 0, [2:4717], loss=-8.6917


epoch 2, layer 0:  43%|██████████████████████████████▋                                        | 4767/11016 [12:05<15:26,  6.74it/s, loss=-15.7]

2025-07-21 21:57:10,456 — INFO — layer 0, [2:4767], loss=-23.3643
2025-07-21 21:57:10,456 — INFO — layer 0, [2:4767], loss=-9.9611


epoch 2, layer 0:  44%|███████████████████████████████                                        | 4817/11016 [12:13<16:06,  6.41it/s, loss=-2.15]

2025-07-21 21:57:18,226 — INFO — layer 0, [2:4817], loss=-14.5505
2025-07-21 21:57:18,226 — INFO — layer 0, [2:4817], loss=-15.0481


epoch 2, layer 0:  44%|███████████████████████████████▎                                       | 4867/11016 [12:21<16:53,  6.07it/s, loss=-11.8]

2025-07-21 21:57:26,054 — INFO — layer 0, [2:4867], loss=-10.8326
2025-07-21 21:57:26,054 — INFO — layer 0, [2:4867], loss=-22.8988


epoch 2, layer 0:  45%|███████████████████████████████▋                                       | 4917/11016 [12:28<15:08,  6.71it/s, loss=-15.4]

2025-07-21 21:57:33,637 — INFO — layer 0, [2:4917], loss=-14.4615
2025-07-21 21:57:33,637 — INFO — layer 0, [2:4917], loss=-8.1234


epoch 2, layer 0:  45%|████████████████████████████████                                       | 4967/11016 [12:36<14:49,  6.80it/s, loss=-4.84]

2025-07-21 21:57:41,194 — INFO — layer 0, [2:4967], loss=-10.7703
2025-07-21 21:57:41,194 — INFO — layer 0, [2:4967], loss=-4.1727


epoch 2, layer 0:  46%|████████████████████████████████▎                                      | 5017/11016 [12:43<15:21,  6.51it/s, loss=-9.51]

2025-07-21 21:57:48,779 — INFO — layer 0, [2:5017], loss=-12.6776
2025-07-21 21:57:48,779 — INFO — layer 0, [2:5017], loss=-22.8209


epoch 2, layer 0:  46%|████████████████████████████████▋                                      | 5067/11016 [12:51<15:14,  6.51it/s, loss=-24.5]

2025-07-21 21:57:56,394 — INFO — layer 0, [2:5067], loss=-19.5007
2025-07-21 21:57:56,394 — INFO — layer 0, [2:5067], loss=-13.3647


epoch 2, layer 0:  46%|████████████████████████████████▉                                      | 5117/11016 [12:59<15:38,  6.29it/s, loss=-16.9]

2025-07-21 21:58:04,176 — INFO — layer 0, [2:5117], loss=-17.9025
2025-07-21 21:58:04,176 — INFO — layer 0, [2:5117], loss=-22.5785


epoch 2, layer 0:  47%|█████████████████████████████████▎                                     | 5167/11016 [13:06<14:55,  6.53it/s, loss=-3.19]

2025-07-21 21:58:11,694 — INFO — layer 0, [2:5167], loss=-8.3411
2025-07-21 21:58:11,694 — INFO — layer 0, [2:5167], loss=-5.8149


epoch 2, layer 0:  47%|██████████████████████████████████▌                                      | 5217/11016 [13:14<14:32,  6.65it/s, loss=-18]

2025-07-21 21:58:19,159 — INFO — layer 0, [2:5217], loss=-13.6006
2025-07-21 21:58:19,159 — INFO — layer 0, [2:5217], loss=-11.2349


epoch 2, layer 0:  48%|█████████████████████████████████▉                                     | 5267/11016 [13:21<15:42,  6.10it/s, loss=-21.5]

2025-07-21 21:58:26,828 — INFO — layer 0, [2:5267], loss=-11.4212
2025-07-21 21:58:26,828 — INFO — layer 0, [2:5267], loss=-13.1065


epoch 2, layer 0:  48%|██████████████████████████████████▎                                    | 5317/11016 [13:29<15:32,  6.11it/s, loss=-20.9]

2025-07-21 21:58:34,760 — INFO — layer 0, [2:5317], loss=-14.8959
2025-07-21 21:58:34,760 — INFO — layer 0, [2:5317], loss=-3.7984


epoch 2, layer 0:  49%|███████████████████████████████████▌                                     | 5367/11016 [13:37<14:44,  6.39it/s, loss=-14]

2025-07-21 21:58:42,640 — INFO — layer 0, [2:5367], loss=-19.6870
2025-07-21 21:58:42,640 — INFO — layer 0, [2:5367], loss=-7.1204


epoch 2, layer 0:  49%|██████████████████████████████████▉                                    | 5417/11016 [13:45<14:02,  6.65it/s, loss=-22.5]

2025-07-21 21:58:50,272 — INFO — layer 0, [2:5417], loss=-17.8994
2025-07-21 21:58:50,272 — INFO — layer 0, [2:5417], loss=-15.0626


epoch 2, layer 0:  50%|███████████████████████████████████▏                                   | 5467/11016 [13:52<14:17,  6.47it/s, loss=-5.04]

2025-07-21 21:58:57,881 — INFO — layer 0, [2:5467], loss=-17.9701
2025-07-21 21:58:57,881 — INFO — layer 0, [2:5467], loss=-14.3183


epoch 2, layer 0:  50%|███████████████████████████████████▌                                   | 5517/11016 [14:00<13:37,  6.72it/s, loss=-21.4]

2025-07-21 21:59:05,406 — INFO — layer 0, [2:5517], loss=-17.8593
2025-07-21 21:59:05,406 — INFO — layer 0, [2:5517], loss=-21.2025


epoch 2, layer 0:  51%|███████████████████████████████████▉                                   | 5567/11016 [14:08<13:30,  6.72it/s, loss=-10.7]

2025-07-21 21:59:13,128 — INFO — layer 0, [2:5567], loss=-25.0969
2025-07-21 21:59:13,128 — INFO — layer 0, [2:5567], loss=-19.7632


epoch 2, layer 0:  51%|█████████████████████████████████████▏                                   | 5617/11016 [14:15<13:40,  6.58it/s, loss=-13]

2025-07-21 21:59:20,862 — INFO — layer 0, [2:5617], loss=-14.9990
2025-07-21 21:59:20,863 — INFO — layer 0, [2:5617], loss=-12.2852


epoch 2, layer 0:  51%|████████████████████████████████████▌                                  | 5667/11016 [14:23<13:18,  6.70it/s, loss=-4.88]

2025-07-21 21:59:28,449 — INFO — layer 0, [2:5667], loss=-17.3503
2025-07-21 21:59:28,449 — INFO — layer 0, [2:5667], loss=-28.7167


epoch 2, layer 0:  52%|████████████████████████████████████▊                                  | 5717/11016 [14:31<13:20,  6.62it/s, loss=-10.6]

2025-07-21 21:59:36,088 — INFO — layer 0, [2:5717], loss=-13.1677
2025-07-21 21:59:36,088 — INFO — layer 0, [2:5717], loss=-26.1178


epoch 2, layer 0:  52%|█████████████████████████████████████▏                                 | 5767/11016 [14:38<12:41,  6.89it/s, loss=-15.4]

2025-07-21 21:59:43,615 — INFO — layer 0, [2:5767], loss=-20.0990
2025-07-21 21:59:43,615 — INFO — layer 0, [2:5767], loss=-14.4931


epoch 2, layer 0:  53%|█████████████████████████████████████▍                                 | 5817/11016 [14:46<12:42,  6.82it/s, loss=-13.2]

2025-07-21 21:59:51,336 — INFO — layer 0, [2:5817], loss=-22.3739
2025-07-21 21:59:51,336 — INFO — layer 0, [2:5817], loss=-15.7544


epoch 2, layer 0:  53%|█████████████████████████████████████▊                                 | 5867/11016 [14:54<14:52,  5.77it/s, loss=-14.8]

2025-07-21 21:59:59,267 — INFO — layer 0, [2:5867], loss=-10.7539
2025-07-21 21:59:59,267 — INFO — layer 0, [2:5867], loss=-6.3979


epoch 2, layer 0:  54%|███████████████████████████████████████▏                                 | 5917/11016 [15:01<12:34,  6.75it/s, loss=-20]

2025-07-21 22:00:06,943 — INFO — layer 0, [2:5917], loss=-15.0522
2025-07-21 22:00:06,943 — INFO — layer 0, [2:5917], loss=-16.4806


epoch 2, layer 0:  54%|██████████████████████████████████████▍                                | 5967/11016 [15:09<12:30,  6.73it/s, loss=-8.69]

2025-07-21 22:00:14,586 — INFO — layer 0, [2:5967], loss=-14.9138
2025-07-21 22:00:14,586 — INFO — layer 0, [2:5967], loss=-17.1127


epoch 2, layer 0:  55%|██████████████████████████████████████▊                                | 6017/11016 [15:17<12:30,  6.66it/s, loss=-10.5]

2025-07-21 22:00:22,243 — INFO — layer 0, [2:6017], loss=-10.2374
2025-07-21 22:00:22,243 — INFO — layer 0, [2:6017], loss=-10.3977


epoch 2, layer 0:  55%|███████████████████████████████████████                                | 6067/11016 [15:24<12:22,  6.67it/s, loss=-15.4]

2025-07-21 22:00:29,904 — INFO — layer 0, [2:6067], loss=-12.4104
2025-07-21 22:00:29,904 — INFO — layer 0, [2:6067], loss=-18.2204


epoch 2, layer 0:  56%|███████████████████████████████████████▍                               | 6117/11016 [15:32<12:02,  6.78it/s, loss=-25.6]

2025-07-21 22:00:37,558 — INFO — layer 0, [2:6117], loss=-7.8761
2025-07-21 22:00:37,558 — INFO — layer 0, [2:6117], loss=-13.4528


epoch 2, layer 0:  56%|███████████████████████████████████████▋                               | 6167/11016 [15:40<11:49,  6.84it/s, loss=-22.1]

2025-07-21 22:00:45,110 — INFO — layer 0, [2:6167], loss=-8.0952
2025-07-21 22:00:45,110 — INFO — layer 0, [2:6167], loss=-21.4542


epoch 2, layer 0:  56%|████████████████████████████████████████                               | 6217/11016 [15:47<12:14,  6.54it/s, loss=-3.59]

2025-07-21 22:00:52,742 — INFO — layer 0, [2:6217], loss=-19.3318
2025-07-21 22:00:52,743 — INFO — layer 0, [2:6217], loss=-3.8462


epoch 2, layer 0:  57%|████████████████████████████████████████▍                              | 6267/11016 [15:55<11:59,  6.60it/s, loss=-24.9]

2025-07-21 22:01:00,351 — INFO — layer 0, [2:6267], loss=-28.4465
2025-07-21 22:01:00,351 — INFO — layer 0, [2:6267], loss=-16.1250


epoch 2, layer 0:  57%|████████████████████████████████████████▋                              | 6317/11016 [16:03<11:32,  6.78it/s, loss=-14.8]

2025-07-21 22:01:08,018 — INFO — layer 0, [2:6317], loss=-11.4115
2025-07-21 22:01:08,018 — INFO — layer 0, [2:6317], loss=-8.0501


epoch 2, layer 0:  58%|██████████████████████████████████████████▏                              | 6367/11016 [16:10<11:29,  6.75it/s, loss=-21]

2025-07-21 22:01:15,565 — INFO — layer 0, [2:6367], loss=-22.5726
2025-07-21 22:01:15,565 — INFO — layer 0, [2:6367], loss=-4.9783


epoch 2, layer 0:  58%|█████████████████████████████████████████▎                             | 6417/11016 [16:18<11:26,  6.70it/s, loss=-14.6]

2025-07-21 22:01:23,101 — INFO — layer 0, [2:6417], loss=-20.1560
2025-07-21 22:01:23,101 — INFO — layer 0, [2:6417], loss=-13.9650


epoch 2, layer 0:  59%|█████████████████████████████████████████▋                             | 6467/11016 [16:25<11:36,  6.53it/s, loss=-16.3]

2025-07-21 22:01:30,738 — INFO — layer 0, [2:6467], loss=-8.1360
2025-07-21 22:01:30,738 — INFO — layer 0, [2:6467], loss=-5.3931


epoch 2, layer 0:  59%|██████████████████████████████████████████                             | 6517/11016 [16:33<11:10,  6.71it/s, loss=-3.38]

2025-07-21 22:01:38,465 — INFO — layer 0, [2:6517], loss=-13.7161
2025-07-21 22:01:38,465 — INFO — layer 0, [2:6517], loss=-20.4134


epoch 2, layer 0:  60%|██████████████████████████████████████████▎                            | 6567/11016 [16:41<11:23,  6.51it/s, loss=-14.6]

2025-07-21 22:01:46,136 — INFO — layer 0, [2:6567], loss=-14.3190
2025-07-21 22:01:46,136 — INFO — layer 0, [2:6567], loss=-8.4177


epoch 2, layer 0:  60%|██████████████████████████████████████████▋                            | 6617/11016 [16:48<10:36,  6.91it/s, loss=-18.5]

2025-07-21 22:01:53,688 — INFO — layer 0, [2:6617], loss=-18.5566
2025-07-21 22:01:53,688 — INFO — layer 0, [2:6617], loss=-15.1582


epoch 2, layer 0:  61%|██████████████████████████████████████████▉                            | 6667/11016 [16:56<11:25,  6.35it/s, loss=-21.5]

2025-07-21 22:02:01,387 — INFO — layer 0, [2:6667], loss=-7.9195
2025-07-21 22:02:01,387 — INFO — layer 0, [2:6667], loss=-15.4644


epoch 2, layer 0:  61%|███████████████████████████████████████████▎                           | 6717/11016 [17:04<10:25,  6.87it/s, loss=-15.8]

2025-07-21 22:02:08,979 — INFO — layer 0, [2:6717], loss=-8.0366
2025-07-21 22:02:08,979 — INFO — layer 0, [2:6717], loss=-6.2919


epoch 2, layer 0:  61%|███████████████████████████████████████████▌                           | 6767/11016 [17:11<10:50,  6.53it/s, loss=-14.3]

2025-07-21 22:02:16,525 — INFO — layer 0, [2:6767], loss=-18.5384
2025-07-21 22:02:16,525 — INFO — layer 0, [2:6767], loss=-18.6552


epoch 2, layer 0:  62%|███████████████████████████████████████████▉                           | 6817/11016 [17:19<10:23,  6.73it/s, loss=-17.3]

2025-07-21 22:02:24,196 — INFO — layer 0, [2:6817], loss=-12.4738
2025-07-21 22:02:24,196 — INFO — layer 0, [2:6817], loss=-11.3613


epoch 2, layer 0:  62%|████████████████████████████████████████████▎                          | 6867/11016 [17:26<10:27,  6.61it/s, loss=-15.1]

2025-07-21 22:02:31,798 — INFO — layer 0, [2:6867], loss=-10.9667
2025-07-21 22:02:31,798 — INFO — layer 0, [2:6867], loss=-11.4096


epoch 2, layer 0:  63%|█████████████████████████████████████████████▊                           | 6917/11016 [17:34<10:56,  6.25it/s, loss=-14]

2025-07-21 22:02:39,431 — INFO — layer 0, [2:6917], loss=-21.2508
2025-07-21 22:02:39,431 — INFO — layer 0, [2:6917], loss=-6.3469


epoch 2, layer 0:  63%|████████████████████████████████████████████▉                          | 6967/11016 [17:42<10:35,  6.38it/s, loss=-22.1]

2025-07-21 22:02:47,137 — INFO — layer 0, [2:6967], loss=-15.3256
2025-07-21 22:02:47,137 — INFO — layer 0, [2:6967], loss=-0.1482


epoch 2, layer 0:  64%|█████████████████████████████████████████████▏                         | 7017/11016 [17:49<09:54,  6.73it/s, loss=-22.8]

2025-07-21 22:02:54,646 — INFO — layer 0, [2:7017], loss=-10.7967
2025-07-21 22:02:54,646 — INFO — layer 0, [2:7017], loss=-26.1028


epoch 2, layer 0:  64%|█████████████████████████████████████████████▌                         | 7067/11016 [17:57<09:46,  6.74it/s, loss=-11.3]

2025-07-21 22:03:02,206 — INFO — layer 0, [2:7067], loss=-5.4648
2025-07-21 22:03:02,206 — INFO — layer 0, [2:7067], loss=-7.3417


epoch 2, layer 0:  65%|█████████████████████████████████████████████▊                         | 7117/11016 [18:04<09:45,  6.66it/s, loss=-7.49]

2025-07-21 22:03:09,750 — INFO — layer 0, [2:7117], loss=-15.1613
2025-07-21 22:03:09,750 — INFO — layer 0, [2:7117], loss=-15.4279


epoch 2, layer 0:  65%|██████████████████████████████████████████████▏                        | 7167/11016 [18:12<10:05,  6.35it/s, loss=-13.5]

2025-07-21 22:03:17,424 — INFO — layer 0, [2:7167], loss=-12.0804
2025-07-21 22:03:17,424 — INFO — layer 0, [2:7167], loss=-22.2812


epoch 2, layer 0:  66%|██████████████████████████████████████████████▌                        | 7217/11016 [18:20<10:01,  6.31it/s, loss=-19.2]

2025-07-21 22:03:25,105 — INFO — layer 0, [2:7217], loss=-12.0542
2025-07-21 22:03:25,105 — INFO — layer 0, [2:7217], loss=-12.8347


epoch 2, layer 0:  66%|██████████████████████████████████████████████▊                        | 7267/11016 [18:27<09:38,  6.48it/s, loss=-10.4]

2025-07-21 22:03:32,846 — INFO — layer 0, [2:7267], loss=-20.1818
2025-07-21 22:03:32,846 — INFO — layer 0, [2:7267], loss=-15.6025


epoch 2, layer 0:  66%|███████████████████████████████████████████████▏                       | 7317/11016 [18:35<10:31,  5.86it/s, loss=-17.4]

2025-07-21 22:03:40,607 — INFO — layer 0, [2:7317], loss=-8.0798
2025-07-21 22:03:40,607 — INFO — layer 0, [2:7317], loss=-8.7775


epoch 2, layer 0:  67%|███████████████████████████████████████████████▍                       | 7367/11016 [18:43<09:23,  6.48it/s, loss=-4.42]

2025-07-21 22:03:48,201 — INFO — layer 0, [2:7367], loss=-20.6423
2025-07-21 22:03:48,201 — INFO — layer 0, [2:7367], loss=-6.1126


epoch 2, layer 0:  67%|███████████████████████████████████████████████▊                       | 7417/11016 [18:50<09:09,  6.55it/s, loss=-25.8]

2025-07-21 22:03:55,697 — INFO — layer 0, [2:7417], loss=-8.5925
2025-07-21 22:03:55,697 — INFO — layer 0, [2:7417], loss=-11.3986


epoch 2, layer 0:  68%|████████████████████████████████████████████████▏                      | 7467/11016 [18:58<09:01,  6.56it/s, loss=-12.4]

2025-07-21 22:04:03,471 — INFO — layer 0, [2:7467], loss=-4.6963
2025-07-21 22:04:03,471 — INFO — layer 0, [2:7467], loss=-6.5119


epoch 2, layer 0:  68%|█████████████████████████████████████████████████▊                       | 7517/11016 [19:06<08:52,  6.57it/s, loss=-12]

2025-07-21 22:04:11,076 — INFO — layer 0, [2:7517], loss=-10.5572
2025-07-21 22:04:11,076 — INFO — layer 0, [2:7517], loss=-11.1542


epoch 2, layer 0:  69%|████████████████████████████████████████████████▊                      | 7567/11016 [19:13<08:20,  6.89it/s, loss=-7.65]

2025-07-21 22:04:18,489 — INFO — layer 0, [2:7567], loss=-10.5165
2025-07-21 22:04:18,490 — INFO — layer 0, [2:7567], loss=-8.3116


epoch 2, layer 0:  69%|█████████████████████████████████████████████████                      | 7617/11016 [19:21<08:42,  6.51it/s, loss=-22.4]

2025-07-21 22:04:26,120 — INFO — layer 0, [2:7617], loss=-20.3315
2025-07-21 22:04:26,120 — INFO — layer 0, [2:7617], loss=-16.7576


epoch 2, layer 0:  70%|██████████████████████████████████████████████████▊                      | 7667/11016 [19:28<08:32,  6.53it/s, loss=-12]

2025-07-21 22:04:33,636 — INFO — layer 0, [2:7667], loss=-6.4343
2025-07-21 22:04:33,636 — INFO — layer 0, [2:7667], loss=-12.4433


epoch 2, layer 0:  70%|█████████████████████████████████████████████████▋                     | 7717/11016 [19:36<08:14,  6.68it/s, loss=-26.7]

2025-07-21 22:04:41,370 — INFO — layer 0, [2:7717], loss=-13.0857
2025-07-21 22:04:41,370 — INFO — layer 0, [2:7717], loss=-13.6289


epoch 2, layer 0:  71%|██████████████████████████████████████████████████                     | 7767/11016 [19:43<07:53,  6.85it/s, loss=-14.3]

2025-07-21 22:04:48,880 — INFO — layer 0, [2:7767], loss=-18.6665
2025-07-21 22:04:48,880 — INFO — layer 0, [2:7767], loss=-22.2983


epoch 2, layer 0:  71%|██████████████████████████████████████████████████▍                    | 7817/11016 [19:51<07:36,  7.01it/s, loss=-19.3]

2025-07-21 22:04:56,468 — INFO — layer 0, [2:7817], loss=-12.6911
2025-07-21 22:04:56,468 — INFO — layer 0, [2:7817], loss=-10.6875


epoch 2, layer 0:  71%|██████████████████████████████████████████████████▋                    | 7867/11016 [19:59<08:02,  6.53it/s, loss=-6.43]

2025-07-21 22:05:04,207 — INFO — layer 0, [2:7867], loss=-18.6183
2025-07-21 22:05:04,207 — INFO — layer 0, [2:7867], loss=-19.1351


epoch 2, layer 0:  72%|███████████████████████████████████████████████████                    | 7917/11016 [20:06<07:40,  6.73it/s, loss=-18.3]

2025-07-21 22:05:11,686 — INFO — layer 0, [2:7917], loss=-15.8458
2025-07-21 22:05:11,686 — INFO — layer 0, [2:7917], loss=-14.5569


epoch 2, layer 0:  72%|███████████████████████████████████████████████████▎                   | 7967/11016 [20:14<07:51,  6.47it/s, loss=-11.1]

2025-07-21 22:05:19,379 — INFO — layer 0, [2:7967], loss=-14.3267
2025-07-21 22:05:19,379 — INFO — layer 0, [2:7967], loss=-18.7807


epoch 2, layer 0:  73%|███████████████████████████████████████████████████▋                   | 8017/11016 [20:22<08:51,  5.64it/s, loss=-11.1]

2025-07-21 22:05:26,995 — INFO — layer 0, [2:8017], loss=-19.2320
2025-07-21 22:05:26,995 — INFO — layer 0, [2:8017], loss=-22.1625


epoch 2, layer 0:  73%|███████████████████████████████████████████████████▉                   | 8067/11016 [20:29<07:15,  6.78it/s, loss=-11.5]

2025-07-21 22:05:34,669 — INFO — layer 0, [2:8067], loss=-11.6825
2025-07-21 22:05:34,669 — INFO — layer 0, [2:8067], loss=-15.7810


epoch 2, layer 0:  74%|█████████████████████████████████████████████████████▊                   | 8117/11016 [20:37<07:25,  6.50it/s, loss=-20]

2025-07-21 22:05:42,274 — INFO — layer 0, [2:8117], loss=-14.6405
2025-07-21 22:05:42,274 — INFO — layer 0, [2:8117], loss=-10.5884


epoch 2, layer 0:  74%|████████████████████████████████████████████████████▋                  | 8167/11016 [20:45<07:23,  6.43it/s, loss=-14.4]

2025-07-21 22:05:49,995 — INFO — layer 0, [2:8167], loss=-16.3954
2025-07-21 22:05:49,995 — INFO — layer 0, [2:8167], loss=-11.3699


epoch 2, layer 0:  75%|████████████████████████████████████████████████████▉                  | 8217/11016 [20:52<06:51,  6.80it/s, loss=-21.5]

2025-07-21 22:05:57,480 — INFO — layer 0, [2:8217], loss=-20.0976
2025-07-21 22:05:57,480 — INFO — layer 0, [2:8217], loss=-13.8300


epoch 2, layer 0:  75%|█████████████████████████████████████████████████████▎                 | 8267/11016 [21:00<06:36,  6.93it/s, loss=-6.93]

2025-07-21 22:06:04,952 — INFO — layer 0, [2:8267], loss=-15.7426
2025-07-21 22:06:04,952 — INFO — layer 0, [2:8267], loss=-14.7478


epoch 2, layer 0:  75%|█████████████████████████████████████████████████████▌                 | 8317/11016 [21:07<06:24,  7.03it/s, loss=-16.6]

2025-07-21 22:06:12,407 — INFO — layer 0, [2:8317], loss=-12.4108
2025-07-21 22:06:12,407 — INFO — layer 0, [2:8317], loss=-11.0701


epoch 2, layer 0:  76%|█████████████████████████████████████████████████████▉                 | 8367/11016 [21:15<06:43,  6.57it/s, loss=-3.52]

2025-07-21 22:06:20,012 — INFO — layer 0, [2:8367], loss=-19.7201
2025-07-21 22:06:20,012 — INFO — layer 0, [2:8367], loss=-14.6118


epoch 2, layer 0:  76%|██████████████████████████████████████████████████████▏                | 8417/11016 [21:22<06:46,  6.39it/s, loss=-7.82]

2025-07-21 22:06:27,484 — INFO — layer 0, [2:8417], loss=-19.2907
2025-07-21 22:06:27,484 — INFO — layer 0, [2:8417], loss=-29.8126


epoch 2, layer 0:  77%|██████████████████████████████████████████████████████▌                | 8467/11016 [21:30<06:12,  6.84it/s, loss=-13.5]

2025-07-21 22:06:34,990 — INFO — layer 0, [2:8467], loss=-23.1927
2025-07-21 22:06:34,990 — INFO — layer 0, [2:8467], loss=-13.9263


epoch 2, layer 0:  77%|████████████████████████████████████████████████████████▍                | 8517/11016 [21:37<06:44,  6.17it/s, loss=-15]

2025-07-21 22:06:42,879 — INFO — layer 0, [2:8517], loss=-16.9473
2025-07-21 22:06:42,879 — INFO — layer 0, [2:8517], loss=-7.3553


epoch 2, layer 0:  78%|███████████████████████████████████████████████████████▏               | 8567/11016 [21:45<06:01,  6.78it/s, loss=-13.3]

2025-07-21 22:06:50,475 — INFO — layer 0, [2:8567], loss=-22.7444
2025-07-21 22:06:50,475 — INFO — layer 0, [2:8567], loss=-8.7363


epoch 2, layer 0:  78%|███████████████████████████████████████████████████████▌               | 8617/11016 [21:52<05:47,  6.91it/s, loss=-10.7]

2025-07-21 22:06:57,867 — INFO — layer 0, [2:8617], loss=-20.2830
2025-07-21 22:06:57,867 — INFO — layer 0, [2:8617], loss=-25.0591


epoch 2, layer 0:  79%|███████████████████████████████████████████████████████▊               | 8667/11016 [22:00<05:46,  6.78it/s, loss=-19.1]

2025-07-21 22:07:05,313 — INFO — layer 0, [2:8667], loss=-2.7906
2025-07-21 22:07:05,313 — INFO — layer 0, [2:8667], loss=-19.6713


epoch 2, layer 0:  79%|████████████████████████████████████████████████████████▏              | 8717/11016 [22:08<05:50,  6.55it/s, loss=-18.8]

2025-07-21 22:07:13,029 — INFO — layer 0, [2:8717], loss=-15.7727
2025-07-21 22:07:13,029 — INFO — layer 0, [2:8717], loss=-14.5967


epoch 2, layer 0:  80%|████████████████████████████████████████████████████████▌              | 8767/11016 [22:15<05:37,  6.66it/s, loss=-16.6]

2025-07-21 22:07:20,634 — INFO — layer 0, [2:8767], loss=-11.4597
2025-07-21 22:07:20,634 — INFO — layer 0, [2:8767], loss=-5.6612


epoch 2, layer 0:  80%|████████████████████████████████████████████████████████▊              | 8817/11016 [22:23<05:34,  6.58it/s, loss=-8.47]

2025-07-21 22:07:28,195 — INFO — layer 0, [2:8817], loss=-11.0332
2025-07-21 22:07:28,195 — INFO — layer 0, [2:8817], loss=-18.5053


epoch 2, layer 0:  80%|█████████████████████████████████████████████████████████▏             | 8867/11016 [22:30<05:40,  6.32it/s, loss=-4.26]

2025-07-21 22:07:35,674 — INFO — layer 0, [2:8867], loss=-18.7296
2025-07-21 22:07:35,674 — INFO — layer 0, [2:8867], loss=-14.3486


epoch 2, layer 0:  81%|█████████████████████████████████████████████████████████▍             | 8917/11016 [22:38<05:21,  6.53it/s, loss=-13.9]

2025-07-21 22:07:43,335 — INFO — layer 0, [2:8917], loss=-13.8979
2025-07-21 22:07:43,335 — INFO — layer 0, [2:8917], loss=-19.7260


epoch 2, layer 0:  81%|█████████████████████████████████████████████████████████▊             | 8967/11016 [22:46<05:12,  6.57it/s, loss=-18.8]

2025-07-21 22:07:51,137 — INFO — layer 0, [2:8967], loss=-5.7365
2025-07-21 22:07:51,137 — INFO — layer 0, [2:8967], loss=-14.8779


epoch 2, layer 0:  82%|██████████████████████████████████████████████████████████             | 9017/11016 [22:53<05:06,  6.52it/s, loss=-12.4]

2025-07-21 22:07:58,743 — INFO — layer 0, [2:9017], loss=-10.9643
2025-07-21 22:07:58,743 — INFO — layer 0, [2:9017], loss=-16.0506


epoch 2, layer 0:  82%|██████████████████████████████████████████████████████████▍            | 9067/11016 [23:01<05:00,  6.49it/s, loss=-11.1]

2025-07-21 22:08:06,373 — INFO — layer 0, [2:9067], loss=-18.6733
2025-07-21 22:08:06,373 — INFO — layer 0, [2:9067], loss=-22.5377


epoch 2, layer 0:  83%|██████████████████████████████████████████████████████████▊            | 9117/11016 [23:09<04:45,  6.66it/s, loss=-26.5]

2025-07-21 22:08:14,071 — INFO — layer 0, [2:9117], loss=-22.0094


epoch 2, layer 0:  83%|████████████████████████████████████████████████████████████▍            | 9118/11016 [23:09<05:15,  6.01it/s, loss=-22]

2025-07-21 22:08:14,071 — INFO — layer 0, [2:9117], loss=-18.7413


epoch 2, layer 0:  83%|████████████████████████████████████████████████████████████▋            | 9167/11016 [23:17<04:50,  6.36it/s, loss=-18]

2025-07-21 22:08:22,004 — INFO — layer 0, [2:9167], loss=-17.2236
2025-07-21 22:08:22,004 — INFO — layer 0, [2:9167], loss=-14.6673


epoch 2, layer 0:  84%|███████████████████████████████████████████████████████████▍           | 9217/11016 [23:24<04:38,  6.45it/s, loss=-10.5]

2025-07-21 22:08:29,641 — INFO — layer 0, [2:9217], loss=-6.2538
2025-07-21 22:08:29,641 — INFO — layer 0, [2:9217], loss=-18.8559


epoch 2, layer 0:  84%|███████████████████████████████████████████████████████████▋           | 9267/11016 [23:32<04:23,  6.63it/s, loss=-10.3]

2025-07-21 22:08:37,200 — INFO — layer 0, [2:9267], loss=-26.6555
2025-07-21 22:08:37,200 — INFO — layer 0, [2:9267], loss=-14.8075


epoch 2, layer 0:  85%|████████████████████████████████████████████████████████████           | 9317/11016 [23:39<04:25,  6.39it/s, loss=-4.35]

2025-07-21 22:08:44,843 — INFO — layer 0, [2:9317], loss=-12.1322
2025-07-21 22:08:44,843 — INFO — layer 0, [2:9317], loss=-17.0614


epoch 2, layer 0:  85%|████████████████████████████████████████████████████████████▎          | 9367/11016 [23:47<04:18,  6.38it/s, loss=-15.1]

2025-07-21 22:08:52,407 — INFO — layer 0, [2:9367], loss=-17.5975
2025-07-21 22:08:52,407 — INFO — layer 0, [2:9367], loss=-12.1184


epoch 2, layer 0:  85%|████████████████████████████████████████████████████████████▋          | 9417/11016 [23:55<03:52,  6.88it/s, loss=-11.7]

2025-07-21 22:09:00,023 — INFO — layer 0, [2:9417], loss=-14.3501
2025-07-21 22:09:00,023 — INFO — layer 0, [2:9417], loss=-5.8698


epoch 2, layer 0:  86%|█████████████████████████████████████████████████████████████          | 9467/11016 [24:02<04:08,  6.23it/s, loss=-10.1]

2025-07-21 22:09:07,721 — INFO — layer 0, [2:9467], loss=-11.6783
2025-07-21 22:09:07,721 — INFO — layer 0, [2:9467], loss=-14.6322


epoch 2, layer 0:  86%|██████████████████████████████████████████████████████████████▏         | 9517/11016 [24:10<03:48,  6.55it/s, loss=-7.8]

2025-07-21 22:09:15,313 — INFO — layer 0, [2:9517], loss=-19.9893
2025-07-21 22:09:15,313 — INFO — layer 0, [2:9517], loss=-7.6568


epoch 2, layer 0:  87%|█████████████████████████████████████████████████████████████▋         | 9567/11016 [24:17<03:47,  6.37it/s, loss=-4.33]

2025-07-21 22:09:22,963 — INFO — layer 0, [2:9567], loss=-17.2701
2025-07-21 22:09:22,963 — INFO — layer 0, [2:9567], loss=-22.3252


epoch 2, layer 0:  87%|█████████████████████████████████████████████████████████████▉         | 9617/11016 [24:25<03:28,  6.70it/s, loss=-15.6]

2025-07-21 22:09:30,525 — INFO — layer 0, [2:9617], loss=-6.3503
2025-07-21 22:09:30,525 — INFO — layer 0, [2:9617], loss=-22.7785


epoch 2, layer 0:  88%|██████████████████████████████████████████████████████████████▎        | 9667/11016 [24:33<03:19,  6.76it/s, loss=-23.2]

2025-07-21 22:09:38,184 — INFO — layer 0, [2:9667], loss=-15.4044
2025-07-21 22:09:38,184 — INFO — layer 0, [2:9667], loss=-25.4536


epoch 2, layer 0:  88%|██████████████████████████████████████████████████████████████▋        | 9717/11016 [24:41<03:20,  6.47it/s, loss=-14.2]

2025-07-21 22:09:46,054 — INFO — layer 0, [2:9717], loss=-10.4091
2025-07-21 22:09:46,054 — INFO — layer 0, [2:9717], loss=-21.7903


epoch 2, layer 0:  89%|██████████████████████████████████████████████████████████████▉        | 9767/11016 [24:48<03:04,  6.76it/s, loss=-6.52]

2025-07-21 22:09:53,421 — INFO — layer 0, [2:9767], loss=-10.6986
2025-07-21 22:09:53,421 — INFO — layer 0, [2:9767], loss=-15.4436


epoch 2, layer 0:  89%|███████████████████████████████████████████████████████████████▎       | 9817/11016 [24:56<03:00,  6.63it/s, loss=-14.9]

2025-07-21 22:10:00,955 — INFO — layer 0, [2:9817], loss=-5.6302
2025-07-21 22:10:00,955 — INFO — layer 0, [2:9817], loss=-10.4878


epoch 2, layer 0:  90%|███████████████████████████████████████████████████████████████▌       | 9867/11016 [25:03<02:58,  6.45it/s, loss=-22.9]

2025-07-21 22:10:08,451 — INFO — layer 0, [2:9867], loss=-23.6637
2025-07-21 22:10:08,451 — INFO — layer 0, [2:9867], loss=-14.0260


epoch 2, layer 0:  90%|███████████████████████████████████████████████████████████████▉       | 9917/11016 [25:11<02:44,  6.68it/s, loss=-21.3]

2025-07-21 22:10:16,062 — INFO — layer 0, [2:9917], loss=-9.3407
2025-07-21 22:10:16,062 — INFO — layer 0, [2:9917], loss=-4.3218


epoch 2, layer 0:  90%|████████████████████████████████████████████████████████████████▏      | 9967/11016 [25:18<02:32,  6.87it/s, loss=-12.2]

2025-07-21 22:10:23,894 — INFO — layer 0, [2:9967], loss=-22.4771
2025-07-21 22:10:23,894 — INFO — layer 0, [2:9967], loss=-11.3067


epoch 2, layer 0:  91%|███████████████████████████████████████████████████████████████▋      | 10017/11016 [25:26<02:33,  6.52it/s, loss=-23.7]

2025-07-21 22:10:31,648 — INFO — layer 0, [2:10017], loss=-14.6489
2025-07-21 22:10:31,648 — INFO — layer 0, [2:10017], loss=-18.4891


epoch 2, layer 0:  91%|███████████████████████████████████████████████████████████████▉      | 10067/11016 [25:34<02:19,  6.81it/s, loss=-9.95]

2025-07-21 22:10:39,120 — INFO — layer 0, [2:10067], loss=-11.1587
2025-07-21 22:10:39,120 — INFO — layer 0, [2:10067], loss=-16.3870


epoch 2, layer 0:  92%|████████████████████████████████████████████████████████████████▎     | 10117/11016 [25:41<02:11,  6.81it/s, loss=-13.3]

2025-07-21 22:10:46,745 — INFO — layer 0, [2:10117], loss=-15.0584
2025-07-21 22:10:46,746 — INFO — layer 0, [2:10117], loss=-4.2880


epoch 2, layer 0:  92%|████████████████████████████████████████████████████████████████▌     | 10167/11016 [25:49<02:13,  6.34it/s, loss=-10.8]

2025-07-21 22:10:54,354 — INFO — layer 0, [2:10167], loss=-19.8363
2025-07-21 22:10:54,354 — INFO — layer 0, [2:10167], loss=-25.0334


epoch 2, layer 0:  93%|████████████████████████████████████████████████████████████████▉     | 10217/11016 [25:57<01:58,  6.72it/s, loss=-11.4]

2025-07-21 22:11:01,987 — INFO — layer 0, [2:10217], loss=-3.7585
2025-07-21 22:11:01,987 — INFO — layer 0, [2:10217], loss=-11.3411


epoch 2, layer 0:  93%|█████████████████████████████████████████████████████████████████▏    | 10267/11016 [26:04<01:47,  6.97it/s, loss=-8.15]

2025-07-21 22:11:09,502 — INFO — layer 0, [2:10267], loss=-13.3275
2025-07-21 22:11:09,502 — INFO — layer 0, [2:10267], loss=-14.1255


epoch 2, layer 0:  94%|█████████████████████████████████████████████████████████████████▌    | 10317/11016 [26:12<01:43,  6.78it/s, loss=-12.6]

2025-07-21 22:11:17,012 — INFO — layer 0, [2:10317], loss=-2.4472
2025-07-21 22:11:17,012 — INFO — layer 0, [2:10317], loss=-14.3127


epoch 2, layer 0:  94%|█████████████████████████████████████████████████████████████████▉    | 10367/11016 [26:19<01:36,  6.73it/s, loss=-4.39]

2025-07-21 22:11:24,636 — INFO — layer 0, [2:10367], loss=-10.8267
2025-07-21 22:11:24,636 — INFO — layer 0, [2:10367], loss=-22.6557


epoch 2, layer 0:  95%|██████████████████████████████████████████████████████████████████▏   | 10417/11016 [26:27<01:27,  6.83it/s, loss=-24.9]

2025-07-21 22:11:32,135 — INFO — layer 0, [2:10417], loss=-13.7225
2025-07-21 22:11:32,135 — INFO — layer 0, [2:10417], loss=-6.8031


epoch 2, layer 0:  95%|██████████████████████████████████████████████████████████████████▌   | 10467/11016 [26:34<01:24,  6.46it/s, loss=-26.1]

2025-07-21 22:11:39,867 — INFO — layer 0, [2:10467], loss=-29.7311
2025-07-21 22:11:39,867 — INFO — layer 0, [2:10467], loss=-17.2510


epoch 2, layer 0:  95%|██████████████████████████████████████████████████████████████████▊   | 10517/11016 [26:42<01:13,  6.76it/s, loss=-11.5]

2025-07-21 22:11:47,598 — INFO — layer 0, [2:10517], loss=-11.5734
2025-07-21 22:11:47,598 — INFO — layer 0, [2:10517], loss=-21.8053


epoch 2, layer 0:  96%|███████████████████████████████████████████████████████████████████▏  | 10567/11016 [26:50<01:11,  6.25it/s, loss=-6.38]

2025-07-21 22:11:55,313 — INFO — layer 0, [2:10567], loss=-14.1880
2025-07-21 22:11:55,313 — INFO — layer 0, [2:10567], loss=-24.0487


epoch 2, layer 0:  96%|███████████████████████████████████████████████████████████████████▍  | 10617/11016 [26:57<00:57,  6.93it/s, loss=-14.7]

2025-07-21 22:12:02,791 — INFO — layer 0, [2:10617], loss=-6.1857
2025-07-21 22:12:02,791 — INFO — layer 0, [2:10617], loss=-26.1939


epoch 2, layer 0:  97%|███████████████████████████████████████████████████████████████████▊  | 10667/11016 [27:05<00:51,  6.79it/s, loss=-22.6]

2025-07-21 22:12:10,252 — INFO — layer 0, [2:10667], loss=-8.0523
2025-07-21 22:12:10,252 — INFO — layer 0, [2:10667], loss=-13.0301


epoch 2, layer 0:  97%|████████████████████████████████████████████████████████████████████  | 10717/11016 [27:12<00:45,  6.56it/s, loss=-2.51]

2025-07-21 22:12:17,883 — INFO — layer 0, [2:10717], loss=-13.5245
2025-07-21 22:12:17,883 — INFO — layer 0, [2:10717], loss=-14.5888


epoch 2, layer 0:  98%|████████████████████████████████████████████████████████████████████▍ | 10767/11016 [27:20<00:36,  6.75it/s, loss=-8.26]

2025-07-21 22:12:25,491 — INFO — layer 0, [2:10767], loss=-15.8835
2025-07-21 22:12:25,491 — INFO — layer 0, [2:10767], loss=-22.5683


epoch 2, layer 0:  98%|████████████████████████████████████████████████████████████████████▋ | 10817/11016 [27:28<00:27,  7.14it/s, loss=-17.2]

2025-07-21 22:12:33,139 — INFO — layer 0, [2:10817], loss=-4.4226
2025-07-21 22:12:33,139 — INFO — layer 0, [2:10817], loss=-20.2020


epoch 2, layer 0:  99%|███████████████████████████████████████████████████████████████████████ | 10867/11016 [27:35<00:22,  6.55it/s, loss=-12]

2025-07-21 22:12:40,643 — INFO — layer 0, [2:10867], loss=-18.8873
2025-07-21 22:12:40,643 — INFO — layer 0, [2:10867], loss=-15.1437


epoch 2, layer 0:  99%|█████████████████████████████████████████████████████████████████████▎| 10917/11016 [27:43<00:15,  6.35it/s, loss=-7.78]

2025-07-21 22:12:48,249 — INFO — layer 0, [2:10917], loss=-21.5967
2025-07-21 22:12:48,249 — INFO — layer 0, [2:10917], loss=-7.8243


epoch 2, layer 0: 100%|█████████████████████████████████████████████████████████████████████▋| 10967/11016 [27:51<00:07,  6.77it/s, loss=-13.7]

2025-07-21 22:12:56,000 — INFO — layer 0, [2:10967], loss=-11.1292
2025-07-21 22:12:56,000 — INFO — layer 0, [2:10967], loss=-23.4974


                                                                                                                                               

2025-07-21 22:13:03,412 — INFO — Training decomposer for layer 1
2025-07-21 22:13:03,503 — INFO — Checkpoint for layer 0 saved to checkpoints/decomposer_simple/decomposer_layer0_20250721_204859_8666ba39-8c81-4293-8a3f-714efcc5f6f9
2025-07-21 22:13:03,505 — INFO — Training decomposer for layer 1


epoch 0, layer 1:   0%|                                                                                              | 0/11016 [00:00<?, ?it/s]huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
huggingf

2025-07-21 22:13:11,516 — INFO — layer 1, [0:49], loss=18.7416
2025-07-21 22:13:11,516 — INFO — layer 1, [0:49], loss=18.1663


epoch 0, layer 1:   1%|▋                                                                         | 99/11016 [00:15<28:18,  6.43it/s, loss=17.6]

2025-07-21 22:13:19,193 — INFO — layer 1, [0:99], loss=15.7219
2025-07-21 22:13:19,193 — INFO — layer 1, [0:99], loss=15.7000


epoch 0, layer 1:   1%|▉                                                                        | 149/11016 [00:23<30:31,  5.93it/s, loss=16.4]

2025-07-21 22:13:26,936 — INFO — layer 1, [0:149], loss=12.7501
2025-07-21 22:13:26,936 — INFO — layer 1, [0:149], loss=12.2752


epoch 0, layer 1:   2%|█▎                                                                        | 199/11016 [00:30<26:51,  6.71it/s, loss=5.7]

2025-07-21 22:13:34,592 — INFO — layer 1, [0:199], loss=14.6453
2025-07-21 22:13:34,592 — INFO — layer 1, [0:199], loss=11.4866


epoch 0, layer 1:   2%|█▋                                                                       | 249/11016 [00:38<27:10,  6.60it/s, loss=13.4]

2025-07-21 22:13:42,261 — INFO — layer 1, [0:249], loss=9.5743
2025-07-21 22:13:42,261 — INFO — layer 1, [0:249], loss=9.3018


epoch 0, layer 1:   3%|█▉                                                                       | 299/11016 [00:46<28:03,  6.37it/s, loss=11.3]

2025-07-21 22:13:49,909 — INFO — layer 1, [0:299], loss=9.7705
2025-07-21 22:13:49,909 — INFO — layer 1, [0:299], loss=10.5445


epoch 0, layer 1:   3%|██▎                                                                       | 349/11016 [00:53<26:11,  6.79it/s, loss=5.9]

2025-07-21 22:13:57,516 — INFO — layer 1, [0:349], loss=8.8799
2025-07-21 22:13:57,516 — INFO — layer 1, [0:349], loss=8.9292


epoch 0, layer 1:   4%|██▋                                                                      | 399/11016 [01:01<27:16,  6.49it/s, loss=10.3]

2025-07-21 22:14:05,133 — INFO — layer 1, [0:399], loss=11.1546
2025-07-21 22:14:05,133 — INFO — layer 1, [0:399], loss=11.5997


epoch 0, layer 1:   4%|██▉                                                                      | 449/11016 [01:08<27:10,  6.48it/s, loss=4.12]

2025-07-21 22:14:12,606 — INFO — layer 1, [0:449], loss=10.8159
2025-07-21 22:14:12,606 — INFO — layer 1, [0:449], loss=13.6250


epoch 0, layer 1:   5%|███▎                                                                     | 499/11016 [01:16<25:33,  6.86it/s, loss=12.6]

2025-07-21 22:14:20,245 — INFO — layer 1, [0:499], loss=11.2569
2025-07-21 22:14:20,245 — INFO — layer 1, [0:499], loss=11.2028


epoch 0, layer 1:   5%|███▋                                                                     | 549/11016 [01:24<27:50,  6.27it/s, loss=6.33]

2025-07-21 22:14:28,043 — INFO — layer 1, [0:549], loss=7.3370
2025-07-21 22:14:28,043 — INFO — layer 1, [0:549], loss=1.6059


epoch 0, layer 1:   5%|███▉                                                                     | 599/11016 [01:31<26:25,  6.57it/s, loss=6.05]

2025-07-21 22:14:35,596 — INFO — layer 1, [0:599], loss=8.1427
2025-07-21 22:14:35,596 — INFO — layer 1, [0:599], loss=8.4082


epoch 0, layer 1:   6%|████▎                                                                    | 649/11016 [01:39<25:57,  6.65it/s, loss=7.37]

2025-07-21 22:14:43,245 — INFO — layer 1, [0:649], loss=12.9186
2025-07-21 22:14:43,245 — INFO — layer 1, [0:649], loss=10.4060


epoch 0, layer 1:   6%|████▊                                                                      | 699/11016 [01:47<25:38,  6.71it/s, loss=12]

2025-07-21 22:14:50,859 — INFO — layer 1, [0:699], loss=5.4449
2025-07-21 22:14:50,859 — INFO — layer 1, [0:699], loss=11.2544


epoch 0, layer 1:   7%|█████                                                                      | 749/11016 [01:54<28:43,  5.96it/s, loss=12]

2025-07-21 22:14:58,576 — INFO — layer 1, [0:749], loss=10.9179
2025-07-21 22:14:58,576 — INFO — layer 1, [0:749], loss=11.3974


epoch 0, layer 1:   7%|█████▎                                                                   | 799/11016 [02:02<27:15,  6.25it/s, loss=4.52]

2025-07-21 22:15:06,138 — INFO — layer 1, [0:799], loss=7.7350
2025-07-21 22:15:06,138 — INFO — layer 1, [0:799], loss=11.6587


epoch 0, layer 1:   8%|█████▋                                                                   | 849/11016 [02:09<26:08,  6.48it/s, loss=4.08]

2025-07-21 22:15:13,766 — INFO — layer 1, [0:849], loss=5.1289
2025-07-21 22:15:13,766 — INFO — layer 1, [0:849], loss=3.9130


epoch 0, layer 1:   8%|█████▉                                                                   | 899/11016 [02:17<25:55,  6.50it/s, loss=1.84]

2025-07-21 22:15:21,413 — INFO — layer 1, [0:899], loss=4.8285
2025-07-21 22:15:21,413 — INFO — layer 1, [0:899], loss=1.1724


epoch 0, layer 1:   9%|██████▎                                                                  | 949/11016 [02:25<26:32,  6.32it/s, loss=10.9]

2025-07-21 22:15:29,037 — INFO — layer 1, [0:949], loss=9.8209
2025-07-21 22:15:29,037 — INFO — layer 1, [0:949], loss=6.4557


epoch 0, layer 1:   9%|██████▌                                                                  | 999/11016 [02:32<26:05,  6.40it/s, loss=1.47]

2025-07-21 22:15:36,759 — INFO — layer 1, [0:999], loss=2.1704
2025-07-21 22:15:36,759 — INFO — layer 1, [0:999], loss=9.9508


epoch 0, layer 1:  10%|██████▉                                                                  | 1049/11016 [02:40<25:38,  6.48it/s, loss=9.2]

2025-07-21 22:15:44,306 — INFO — layer 1, [0:1049], loss=6.4471
2025-07-21 22:15:44,306 — INFO — layer 1, [0:1049], loss=5.9985


epoch 0, layer 1:  10%|███████▏                                                                | 1099/11016 [02:48<25:21,  6.52it/s, loss=8.21]

2025-07-21 22:15:51,853 — INFO — layer 1, [0:1099], loss=7.8003
2025-07-21 22:15:51,853 — INFO — layer 1, [0:1099], loss=9.6364


epoch 0, layer 1:  10%|███████▌                                                                | 1149/11016 [02:55<24:38,  6.68it/s, loss=3.06]

2025-07-21 22:15:59,723 — INFO — layer 1, [0:1149], loss=-2.2660
2025-07-21 22:15:59,723 — INFO — layer 1, [0:1149], loss=9.3637


epoch 0, layer 1:  11%|███████▊                                                                | 1199/11016 [03:03<25:54,  6.31it/s, loss=7.86]

2025-07-21 22:16:07,456 — INFO — layer 1, [0:1199], loss=5.2560
2025-07-21 22:16:07,456 — INFO — layer 1, [0:1199], loss=8.7229


epoch 0, layer 1:  11%|████████▏                                                               | 1249/11016 [03:11<24:06,  6.75it/s, loss=4.55]

2025-07-21 22:16:15,003 — INFO — layer 1, [0:1249], loss=-4.0137
2025-07-21 22:16:15,003 — INFO — layer 1, [0:1249], loss=9.0206


epoch 0, layer 1:  12%|████████▍                                                               | 1299/11016 [03:18<23:53,  6.78it/s, loss=2.35]

2025-07-21 22:16:22,585 — INFO — layer 1, [0:1299], loss=1.6162
2025-07-21 22:16:22,585 — INFO — layer 1, [0:1299], loss=4.7840


epoch 0, layer 1:  12%|████████▊                                                               | 1349/11016 [03:26<24:23,  6.60it/s, loss=7.13]

2025-07-21 22:16:30,166 — INFO — layer 1, [0:1349], loss=6.3473
2025-07-21 22:16:30,166 — INFO — layer 1, [0:1349], loss=0.7678


epoch 0, layer 1:  13%|█████████▏                                                              | 1399/11016 [03:34<27:18,  5.87it/s, loss=8.26]

2025-07-21 22:16:37,861 — INFO — layer 1, [0:1399], loss=6.8266
2025-07-21 22:16:37,861 — INFO — layer 1, [0:1399], loss=6.2098


epoch 0, layer 1:  13%|█████████▍                                                              | 1449/11016 [03:41<24:11,  6.59it/s, loss=5.27]

2025-07-21 22:16:45,495 — INFO — layer 1, [0:1449], loss=0.7858
2025-07-21 22:16:45,495 — INFO — layer 1, [0:1449], loss=3.4259


epoch 0, layer 1:  14%|█████████▊                                                              | 1499/11016 [03:49<24:16,  6.53it/s, loss=1.33]

2025-07-21 22:16:52,985 — INFO — layer 1, [0:1499], loss=2.0753
2025-07-21 22:16:52,985 — INFO — layer 1, [0:1499], loss=-3.1207


epoch 0, layer 1:  14%|██████████                                                              | 1549/11016 [03:56<25:16,  6.24it/s, loss=4.39]

2025-07-21 22:17:00,585 — INFO — layer 1, [0:1549], loss=2.3797
2025-07-21 22:17:00,585 — INFO — layer 1, [0:1549], loss=-0.5489


epoch 0, layer 1:  15%|██████████▎                                                            | 1599/11016 [04:04<22:35,  6.94it/s, loss=0.851]

2025-07-21 22:17:08,128 — INFO — layer 1, [0:1599], loss=4.5299
2025-07-21 22:17:08,129 — INFO — layer 1, [0:1599], loss=-4.1100


epoch 0, layer 1:  15%|██████████▋                                                            | 1649/11016 [04:12<23:53,  6.53it/s, loss=0.621]

2025-07-21 22:17:15,841 — INFO — layer 1, [0:1649], loss=6.8572
2025-07-21 22:17:15,841 — INFO — layer 1, [0:1649], loss=0.1687


epoch 0, layer 1:  15%|███████████                                                             | 1699/11016 [04:19<24:38,  6.30it/s, loss=1.31]

2025-07-21 22:17:23,378 — INFO — layer 1, [0:1699], loss=5.5180
2025-07-21 22:17:23,378 — INFO — layer 1, [0:1699], loss=-5.6098


epoch 0, layer 1:  16%|███████████▎                                                           | 1749/11016 [04:27<24:48,  6.23it/s, loss=0.873]

2025-07-21 22:17:31,003 — INFO — layer 1, [0:1749], loss=0.4730
2025-07-21 22:17:31,003 — INFO — layer 1, [0:1749], loss=8.5058


epoch 0, layer 1:  16%|███████████▊                                                            | 1799/11016 [04:34<23:55,  6.42it/s, loss=7.14]

2025-07-21 22:17:38,601 — INFO — layer 1, [0:1799], loss=0.9473
2025-07-21 22:17:38,601 — INFO — layer 1, [0:1799], loss=-2.0071


epoch 0, layer 1:  17%|████████████                                                            | 1849/11016 [04:42<22:37,  6.75it/s, loss=1.43]

2025-07-21 22:17:46,108 — INFO — layer 1, [0:1849], loss=0.0829
2025-07-21 22:17:46,108 — INFO — layer 1, [0:1849], loss=4.5375


epoch 0, layer 1:  17%|████████████                                                          | 1899/11016 [04:49<24:03,  6.32it/s, loss=-0.827]

2025-07-21 22:17:53,749 — INFO — layer 1, [0:1899], loss=5.1442
2025-07-21 22:17:53,749 — INFO — layer 1, [0:1899], loss=0.3892


epoch 0, layer 1:  18%|████████████▋                                                           | 1949/11016 [04:57<23:50,  6.34it/s, loss=8.01]

2025-07-21 22:18:01,362 — INFO — layer 1, [0:1949], loss=-0.3117
2025-07-21 22:18:01,362 — INFO — layer 1, [0:1949], loss=-6.0722


epoch 0, layer 1:  18%|████████████▉                                                          | 1999/11016 [05:05<22:13,  6.76it/s, loss=-9.31]

2025-07-21 22:18:08,929 — INFO — layer 1, [0:1999], loss=3.1804
2025-07-21 22:18:08,929 — INFO — layer 1, [0:1999], loss=8.2048


epoch 0, layer 1:  19%|█████████████▍                                                          | 2049/11016 [05:12<21:37,  6.91it/s, loss=8.72]

2025-07-21 22:18:16,385 — INFO — layer 1, [0:2049], loss=-0.2989
2025-07-21 22:18:16,385 — INFO — layer 1, [0:2049], loss=1.4312


epoch 0, layer 1:  19%|█████████████▌                                                         | 2099/11016 [05:20<22:18,  6.66it/s, loss=-2.59]

2025-07-21 22:18:23,978 — INFO — layer 1, [0:2099], loss=-3.8713
2025-07-21 22:18:23,978 — INFO — layer 1, [0:2099], loss=3.3517


epoch 0, layer 1:  20%|██████████████                                                          | 2149/11016 [05:27<23:54,  6.18it/s, loss=2.61]

2025-07-21 22:18:31,604 — INFO — layer 1, [0:2149], loss=4.3961
2025-07-21 22:18:31,604 — INFO — layer 1, [0:2149], loss=0.0345


epoch 0, layer 1:  20%|██████████████▎                                                         | 2199/11016 [05:35<22:15,  6.60it/s, loss=3.24]

2025-07-21 22:18:39,210 — INFO — layer 1, [0:2199], loss=-6.1302


epoch 0, layer 1:  20%|██████████████▏                                                        | 2200/11016 [05:35<21:50,  6.73it/s, loss=-6.13]

2025-07-21 22:18:39,212 — INFO — layer 1, [0:2199], loss=-3.9824


epoch 0, layer 1:  20%|██████████████▋                                                         | 2249/11016 [05:43<21:55,  6.66it/s, loss=0.13]

2025-07-21 22:18:46,824 — INFO — layer 1, [0:2249], loss=4.7749
2025-07-21 22:18:46,824 — INFO — layer 1, [0:2249], loss=-4.7162


epoch 0, layer 1:  21%|██████████████▊                                                        | 2299/11016 [05:50<22:31,  6.45it/s, loss=-7.82]

2025-07-21 22:18:54,396 — INFO — layer 1, [0:2299], loss=0.7030
2025-07-21 22:18:54,396 — INFO — layer 1, [0:2299], loss=-1.6864


epoch 0, layer 1:  21%|██████████████▉                                                       | 2349/11016 [05:58<21:55,  6.59it/s, loss=-0.804]

2025-07-21 22:19:02,108 — INFO — layer 1, [0:2349], loss=-8.2223
2025-07-21 22:19:02,108 — INFO — layer 1, [0:2349], loss=3.0610


epoch 0, layer 1:  22%|███████████████▋                                                        | 2399/11016 [06:05<20:24,  7.04it/s, loss=2.53]

2025-07-21 22:19:09,631 — INFO — layer 1, [0:2399], loss=-4.4597
2025-07-21 22:19:09,631 — INFO — layer 1, [0:2399], loss=3.4007


epoch 0, layer 1:  22%|███████████████▊                                                       | 2449/11016 [06:13<21:50,  6.54it/s, loss=-5.52]

2025-07-21 22:19:17,352 — INFO — layer 1, [0:2449], loss=8.5405
2025-07-21 22:19:17,352 — INFO — layer 1, [0:2449], loss=1.5850


epoch 0, layer 1:  23%|████████████████                                                       | 2499/11016 [06:21<20:49,  6.82it/s, loss=-12.3]

2025-07-21 22:19:24,977 — INFO — layer 1, [0:2499], loss=1.2851
2025-07-21 22:19:24,977 — INFO — layer 1, [0:2499], loss=1.3032


epoch 0, layer 1:  23%|████████████████▋                                                       | 2549/11016 [06:28<21:39,  6.52it/s, loss=6.77]

2025-07-21 22:19:32,636 — INFO — layer 1, [0:2549], loss=0.8691
2025-07-21 22:19:32,636 — INFO — layer 1, [0:2549], loss=-8.6733


epoch 0, layer 1:  24%|████████████████▉                                                       | 2599/11016 [06:36<20:45,  6.76it/s, loss=-3.2]

2025-07-21 22:19:40,387 — INFO — layer 1, [0:2599], loss=2.9917
2025-07-21 22:19:40,387 — INFO — layer 1, [0:2599], loss=-11.6194


epoch 0, layer 1:  24%|█████████████████                                                      | 2649/11016 [06:44<20:55,  6.66it/s, loss=-4.46]

2025-07-21 22:19:47,920 — INFO — layer 1, [0:2649], loss=-2.1647
2025-07-21 22:19:47,920 — INFO — layer 1, [0:2649], loss=-0.3826


epoch 0, layer 1:  25%|█████████████████▏                                                    | 2699/11016 [06:51<20:59,  6.60it/s, loss=-0.345]

2025-07-21 22:19:55,658 — INFO — layer 1, [0:2699], loss=0.6460
2025-07-21 22:19:55,659 — INFO — layer 1, [0:2699], loss=-6.4621


epoch 0, layer 1:  25%|█████████████████▋                                                     | 2749/11016 [06:59<20:52,  6.60it/s, loss=-11.8]

2025-07-21 22:20:03,427 — INFO — layer 1, [0:2749], loss=-1.2194
2025-07-21 22:20:03,427 — INFO — layer 1, [0:2749], loss=6.8406


epoch 0, layer 1:  25%|█████████████████▊                                                    | 2799/11016 [07:07<20:46,  6.59it/s, loss=0.0703]

2025-07-21 22:20:11,012 — INFO — layer 1, [0:2799], loss=4.2163
2025-07-21 22:20:11,012 — INFO — layer 1, [0:2799], loss=2.8183


epoch 0, layer 1:  26%|██████████████████▎                                                    | 2849/11016 [07:14<20:20,  6.69it/s, loss=-1.43]

2025-07-21 22:20:18,682 — INFO — layer 1, [0:2849], loss=2.8676
2025-07-21 22:20:18,682 — INFO — layer 1, [0:2849], loss=0.3158


epoch 0, layer 1:  26%|██████████████████▋                                                    | 2899/11016 [07:22<19:23,  6.98it/s, loss=-5.82]

2025-07-21 22:20:26,212 — INFO — layer 1, [0:2899], loss=1.1248
2025-07-21 22:20:26,212 — INFO — layer 1, [0:2899], loss=4.3083


epoch 0, layer 1:  27%|███████████████████                                                    | 2949/11016 [07:30<20:59,  6.40it/s, loss=-2.88]

2025-07-21 22:20:34,049 — INFO — layer 1, [0:2949], loss=4.6698
2025-07-21 22:20:34,049 — INFO — layer 1, [0:2949], loss=-0.1321


epoch 0, layer 1:  27%|███████████████████▎                                                   | 2999/11016 [07:37<22:26,  5.95it/s, loss=0.983]

2025-07-21 22:20:41,750 — INFO — layer 1, [0:2999], loss=-4.7731
2025-07-21 22:20:41,750 — INFO — layer 1, [0:2999], loss=-3.7894


epoch 0, layer 1:  28%|███████████████████▉                                                    | 3049/11016 [07:45<20:10,  6.58it/s, loss=4.09]

2025-07-21 22:20:49,424 — INFO — layer 1, [0:3049], loss=0.8469
2025-07-21 22:20:49,424 — INFO — layer 1, [0:3049], loss=-8.2545


epoch 0, layer 1:  28%|████████████████████▎                                                   | 3099/11016 [07:53<19:38,  6.72it/s, loss=1.44]

2025-07-21 22:20:57,178 — INFO — layer 1, [0:3099], loss=-12.1258
2025-07-21 22:20:57,178 — INFO — layer 1, [0:3099], loss=-3.2597


epoch 0, layer 1:  29%|████████████████████▌                                                   | 3149/11016 [08:00<20:09,  6.51it/s, loss=3.63]

2025-07-21 22:21:04,579 — INFO — layer 1, [0:3149], loss=-5.5529
2025-07-21 22:21:04,579 — INFO — layer 1, [0:3149], loss=1.4094


epoch 0, layer 1:  29%|████████████████████▉                                                   | 3199/11016 [08:08<19:42,  6.61it/s, loss=0.19]

2025-07-21 22:21:12,228 — INFO — layer 1, [0:3199], loss=3.1416
2025-07-21 22:21:12,228 — INFO — layer 1, [0:3199], loss=-6.3059


epoch 0, layer 1:  29%|████████████████████▉                                                  | 3249/11016 [08:16<18:44,  6.91it/s, loss=-2.09]

2025-07-21 22:21:19,828 — INFO — layer 1, [0:3249], loss=-1.9541
2025-07-21 22:21:19,828 — INFO — layer 1, [0:3249], loss=-7.1142


epoch 0, layer 1:  30%|█████████████████████▌                                                  | 3299/11016 [08:23<19:19,  6.66it/s, loss=1.13]

2025-07-21 22:21:27,323 — INFO — layer 1, [0:3299], loss=-1.9259
2025-07-21 22:21:27,323 — INFO — layer 1, [0:3299], loss=-3.3459


epoch 0, layer 1:  30%|█████████████████████▌                                                 | 3349/11016 [08:31<19:16,  6.63it/s, loss=-9.56]

2025-07-21 22:21:35,015 — INFO — layer 1, [0:3349], loss=0.0972
2025-07-21 22:21:35,015 — INFO — layer 1, [0:3349], loss=-1.4947


epoch 0, layer 1:  31%|█████████████████████▉                                                 | 3399/11016 [08:38<19:06,  6.65it/s, loss=-3.95]

2025-07-21 22:21:42,581 — INFO — layer 1, [0:3399], loss=-4.9876
2025-07-21 22:21:42,581 — INFO — layer 1, [0:3399], loss=-4.4797


epoch 0, layer 1:  31%|██████████████████████▌                                                 | 3449/11016 [08:46<19:20,  6.52it/s, loss=2.26]

2025-07-21 22:21:50,165 — INFO — layer 1, [0:3449], loss=-6.8771
2025-07-21 22:21:50,165 — INFO — layer 1, [0:3449], loss=-2.5885


epoch 0, layer 1:  32%|███████████████████████▊                                                   | 3499/11016 [08:53<18:07,  6.91it/s, loss=2]

2025-07-21 22:21:57,729 — INFO — layer 1, [0:3499], loss=-7.3666
2025-07-21 22:21:57,729 — INFO — layer 1, [0:3499], loss=1.3990


epoch 0, layer 1:  32%|██████████████████████▊                                                | 3549/11016 [09:01<18:34,  6.70it/s, loss=-9.09]

2025-07-21 22:22:05,415 — INFO — layer 1, [0:3549], loss=0.9032
2025-07-21 22:22:05,415 — INFO — layer 1, [0:3549], loss=-13.4641


epoch 0, layer 1:  33%|███████████████████████▏                                               | 3599/11016 [09:09<18:41,  6.62it/s, loss=-13.1]

2025-07-21 22:22:13,017 — INFO — layer 1, [0:3599], loss=-10.5454
2025-07-21 22:22:13,017 — INFO — layer 1, [0:3599], loss=1.0267


epoch 0, layer 1:  33%|███████████████████████▌                                               | 3649/11016 [09:16<18:39,  6.58it/s, loss=-3.85]

2025-07-21 22:22:20,628 — INFO — layer 1, [0:3649], loss=-1.9541
2025-07-21 22:22:20,628 — INFO — layer 1, [0:3649], loss=-0.9520


epoch 0, layer 1:  34%|████████████████████████▌                                                | 3699/11016 [09:24<21:22,  5.71it/s, loss=1.3]

2025-07-21 22:22:28,346 — INFO — layer 1, [0:3699], loss=-0.0896
2025-07-21 22:22:28,346 — INFO — layer 1, [0:3699], loss=3.4811


epoch 0, layer 1:  34%|████████████████████████▌                                               | 3749/11016 [09:32<19:28,  6.22it/s, loss=3.09]

2025-07-21 22:22:35,904 — INFO — layer 1, [0:3749], loss=-7.4897
2025-07-21 22:22:35,904 — INFO — layer 1, [0:3749], loss=0.5949


epoch 0, layer 1:  34%|████████████████████████▍                                              | 3799/11016 [09:39<17:21,  6.93it/s, loss=-1.78]

2025-07-21 22:22:43,412 — INFO — layer 1, [0:3799], loss=-0.4459
2025-07-21 22:22:43,412 — INFO — layer 1, [0:3799], loss=-0.0755


epoch 0, layer 1:  35%|████████████████████████▊                                              | 3849/11016 [09:47<18:02,  6.62it/s, loss=-6.21]

2025-07-21 22:22:51,129 — INFO — layer 1, [0:3849], loss=0.5332
2025-07-21 22:22:51,129 — INFO — layer 1, [0:3849], loss=-7.0509


epoch 0, layer 1:  35%|█████████████████████████▏                                             | 3899/11016 [09:54<18:29,  6.42it/s, loss=-5.62]

2025-07-21 22:22:58,621 — INFO — layer 1, [0:3899], loss=-1.1676
2025-07-21 22:22:58,621 — INFO — layer 1, [0:3899], loss=-13.5449


epoch 0, layer 1:  36%|█████████████████████████                                             | 3949/11016 [10:02<18:01,  6.54it/s, loss=-0.357]

2025-07-21 22:23:06,148 — INFO — layer 1, [0:3949], loss=7.1321
2025-07-21 22:23:06,148 — INFO — layer 1, [0:3949], loss=-0.3133


epoch 0, layer 1:  36%|█████████████████████████▊                                             | 3999/11016 [10:09<16:50,  6.95it/s, loss=-10.9]

2025-07-21 22:23:13,731 — INFO — layer 1, [0:3999], loss=-8.8632
2025-07-21 22:23:13,731 — INFO — layer 1, [0:3999], loss=-16.1852


epoch 0, layer 1:  37%|██████████████████████████                                             | 4049/11016 [10:17<19:30,  5.95it/s, loss=-2.82]

2025-07-21 22:23:21,609 — INFO — layer 1, [0:4049], loss=-6.2031
2025-07-21 22:23:21,609 — INFO — layer 1, [0:4049], loss=-4.0358


epoch 0, layer 1:  37%|██████████████████████████▍                                            | 4099/11016 [10:25<16:04,  7.17it/s, loss=-6.12]

2025-07-21 22:23:29,088 — INFO — layer 1, [0:4099], loss=0.6195
2025-07-21 22:23:29,088 — INFO — layer 1, [0:4099], loss=-1.3503


epoch 0, layer 1:  38%|██████████████████████████▋                                            | 4149/11016 [10:32<16:32,  6.92it/s, loss=0.121]

2025-07-21 22:23:36,766 — INFO — layer 1, [0:4149], loss=-0.0828
2025-07-21 22:23:36,766 — INFO — layer 1, [0:4149], loss=2.7474


epoch 0, layer 1:  38%|███████████████████████████                                            | 4199/11016 [10:40<17:33,  6.47it/s, loss=-10.8]

2025-07-21 22:23:44,542 — INFO — layer 1, [0:4199], loss=-9.4895
2025-07-21 22:23:44,542 — INFO — layer 1, [0:4199], loss=-2.3843


epoch 0, layer 1:  39%|███████████████████████████▍                                           | 4249/11016 [10:48<17:17,  6.52it/s, loss=-10.7]

2025-07-21 22:23:52,205 — INFO — layer 1, [0:4249], loss=-4.9505
2025-07-21 22:23:52,204 — INFO — layer 1, [0:4249], loss=-7.5152


epoch 0, layer 1:  39%|███████████████████████████▋                                           | 4299/11016 [10:56<17:11,  6.51it/s, loss=-7.85]

2025-07-21 22:23:59,896 — INFO — layer 1, [0:4299], loss=-5.7943
2025-07-21 22:23:59,896 — INFO — layer 1, [0:4299], loss=2.1680


epoch 0, layer 1:  39%|████████████████████████████▍                                           | 4349/11016 [11:03<16:58,  6.54it/s, loss=-3.9]

2025-07-21 22:24:07,460 — INFO — layer 1, [0:4349], loss=-7.2978
2025-07-21 22:24:07,460 — INFO — layer 1, [0:4349], loss=-3.9599


epoch 0, layer 1:  40%|████████████████████████████▎                                          | 4399/11016 [11:11<17:11,  6.42it/s, loss=-7.72]

2025-07-21 22:24:15,257 — INFO — layer 1, [0:4399], loss=-5.2175
2025-07-21 22:24:15,257 — INFO — layer 1, [0:4399], loss=3.1247


epoch 0, layer 1:  40%|████████████████████████████▋                                          | 4449/11016 [11:19<16:07,  6.79it/s, loss=-13.9]

2025-07-21 22:24:22,881 — INFO — layer 1, [0:4449], loss=0.8165
2025-07-21 22:24:22,881 — INFO — layer 1, [0:4449], loss=-0.2898


epoch 0, layer 1:  41%|████████████████████████████▉                                          | 4499/11016 [11:26<16:05,  6.75it/s, loss=-4.78]

2025-07-21 22:24:30,506 — INFO — layer 1, [0:4499], loss=3.4730
2025-07-21 22:24:30,506 — INFO — layer 1, [0:4499], loss=7.0771


epoch 0, layer 1:  41%|█████████████████████████████▎                                         | 4549/11016 [11:34<15:21,  7.02it/s, loss=-6.73]

2025-07-21 22:24:37,954 — INFO — layer 1, [0:4549], loss=-16.8834
2025-07-21 22:24:37,954 — INFO — layer 1, [0:4549], loss=-2.9058


epoch 0, layer 1:  42%|██████████████████████████████                                          | 4599/11016 [11:41<16:06,  6.64it/s, loss=5.66]

2025-07-21 22:24:45,574 — INFO — layer 1, [0:4599], loss=-2.3652
2025-07-21 22:24:45,574 — INFO — layer 1, [0:4599], loss=-3.6800


epoch 0, layer 1:  42%|██████████████████████████████▍                                         | 4649/11016 [11:49<16:14,  6.53it/s, loss=-1.1]

2025-07-21 22:24:53,370 — INFO — layer 1, [0:4649], loss=-8.8497
2025-07-21 22:24:53,370 — INFO — layer 1, [0:4649], loss=-6.9547


epoch 0, layer 1:  43%|██████████████████████████████▋                                         | 4699/11016 [11:57<15:30,  6.79it/s, loss=1.31]

2025-07-21 22:25:00,908 — INFO — layer 1, [0:4699], loss=-10.0090
2025-07-21 22:25:00,908 — INFO — layer 1, [0:4699], loss=-11.6246


epoch 0, layer 1:  43%|██████████████████████████████▌                                        | 4749/11016 [12:04<15:25,  6.77it/s, loss=0.723]

2025-07-21 22:25:08,456 — INFO — layer 1, [0:4749], loss=-6.1274
2025-07-21 22:25:08,456 — INFO — layer 1, [0:4749], loss=-4.7528


epoch 0, layer 1:  44%|███████████████████████████████▎                                        | 4799/11016 [12:12<15:58,  6.49it/s, loss=-4.5]

2025-07-21 22:25:16,284 — INFO — layer 1, [0:4799], loss=0.8708
2025-07-21 22:25:16,284 — INFO — layer 1, [0:4799], loss=-0.4683


epoch 0, layer 1:  44%|████████████████████████████████▌                                         | 4849/11016 [12:20<15:24,  6.67it/s, loss=-3]

2025-07-21 22:25:23,925 — INFO — layer 1, [0:4849], loss=-12.4232
2025-07-21 22:25:23,925 — INFO — layer 1, [0:4849], loss=-4.2052


epoch 0, layer 1:  44%|███████████████████████████████▌                                       | 4899/11016 [12:27<15:46,  6.46it/s, loss=-2.13]

2025-07-21 22:25:31,567 — INFO — layer 1, [0:4899], loss=4.1803
2025-07-21 22:25:31,567 — INFO — layer 1, [0:4899], loss=-9.5130


epoch 0, layer 1:  45%|███████████████████████████████▉                                       | 4949/11016 [12:35<15:31,  6.51it/s, loss=-1.98]

2025-07-21 22:25:39,112 — INFO — layer 1, [0:4949], loss=-3.0645
2025-07-21 22:25:39,112 — INFO — layer 1, [0:4949], loss=-1.2612


epoch 0, layer 1:  45%|████████████████████████████████▏                                      | 4999/11016 [12:42<15:26,  6.50it/s, loss=0.928]

2025-07-21 22:25:46,796 — INFO — layer 1, [0:4999], loss=-4.8989
2025-07-21 22:25:46,796 — INFO — layer 1, [0:4999], loss=3.2517


epoch 0, layer 1:  46%|████████████████████████████████▌                                      | 5049/11016 [12:50<14:36,  6.81it/s, loss=-2.79]

2025-07-21 22:25:54,432 — INFO — layer 1, [0:5049], loss=-14.5304
2025-07-21 22:25:54,432 — INFO — layer 1, [0:5049], loss=-0.7093


epoch 0, layer 1:  46%|████████████████████████████████▊                                      | 5099/11016 [12:58<15:05,  6.53it/s, loss=-3.18]

2025-07-21 22:26:02,099 — INFO — layer 1, [0:5099], loss=-4.2674
2025-07-21 22:26:02,099 — INFO — layer 1, [0:5099], loss=-5.9689


epoch 0, layer 1:  47%|█████████████████████████████████▋                                      | 5149/11016 [13:05<15:25,  6.34it/s, loss=2.81]

2025-07-21 22:26:09,732 — INFO — layer 1, [0:5149], loss=-6.5101
2025-07-21 22:26:09,732 — INFO — layer 1, [0:5149], loss=-1.3854


epoch 0, layer 1:  47%|█████████████████████████████████▌                                     | 5199/11016 [13:13<15:36,  6.21it/s, loss=-12.4]

2025-07-21 22:26:17,444 — INFO — layer 1, [0:5199], loss=-3.1246
2025-07-21 22:26:17,444 — INFO — layer 1, [0:5199], loss=-11.3796


epoch 0, layer 1:  48%|█████████████████████████████████▊                                     | 5249/11016 [13:21<14:20,  6.70it/s, loss=-4.21]

2025-07-21 22:26:25,019 — INFO — layer 1, [0:5249], loss=0.3970
2025-07-21 22:26:25,019 — INFO — layer 1, [0:5249], loss=-4.7913


epoch 0, layer 1:  48%|█████████████████████████████████▋                                    | 5299/11016 [13:28<15:33,  6.12it/s, loss=-0.534]

2025-07-21 22:26:32,706 — INFO — layer 1, [0:5299], loss=0.2097
2025-07-21 22:26:32,706 — INFO — layer 1, [0:5299], loss=-5.1365


epoch 0, layer 1:  49%|██████████████████████████████████▉                                     | 5349/11016 [13:36<14:30,  6.51it/s, loss=-5.2]

2025-07-21 22:26:40,277 — INFO — layer 1, [0:5349], loss=-9.2162
2025-07-21 22:26:40,277 — INFO — layer 1, [0:5349], loss=-2.8104


epoch 0, layer 1:  49%|██████████████████████████████████▊                                    | 5399/11016 [13:44<14:18,  6.55it/s, loss=-7.14]

2025-07-21 22:26:48,090 — INFO — layer 1, [0:5399], loss=-3.0518
2025-07-21 22:26:48,090 — INFO — layer 1, [0:5399], loss=-2.0820


epoch 0, layer 1:  49%|███████████████████████████████████▌                                    | 5449/11016 [13:51<14:49,  6.26it/s, loss=2.49]

2025-07-21 22:26:55,702 — INFO — layer 1, [0:5449], loss=-9.1628
2025-07-21 22:26:55,702 — INFO — layer 1, [0:5449], loss=-3.6344


epoch 0, layer 1:  50%|███████████████████████████████████▍                                   | 5499/11016 [13:59<14:17,  6.43it/s, loss=-5.37]

2025-07-21 22:27:03,233 — INFO — layer 1, [0:5499], loss=-2.3400
2025-07-21 22:27:03,233 — INFO — layer 1, [0:5499], loss=-2.5430


epoch 0, layer 1:  50%|███████████████████████████████████▊                                   | 5549/11016 [14:06<13:50,  6.59it/s, loss=-4.74]

2025-07-21 22:27:10,781 — INFO — layer 1, [0:5549], loss=-2.1109
2025-07-21 22:27:10,781 — INFO — layer 1, [0:5549], loss=2.4587


epoch 0, layer 1:  51%|████████████████████████████████████                                   | 5599/11016 [14:14<13:51,  6.51it/s, loss=-1.69]

2025-07-21 22:27:18,499 — INFO — layer 1, [0:5599], loss=-16.3513
2025-07-21 22:27:18,499 — INFO — layer 1, [0:5599], loss=1.6287


epoch 0, layer 1:  51%|████████████████████████████████████▍                                  | 5649/11016 [14:22<13:50,  6.46it/s, loss=-2.71]

2025-07-21 22:27:26,048 — INFO — layer 1, [0:5649], loss=-4.6392
2025-07-21 22:27:26,049 — INFO — layer 1, [0:5649], loss=-10.4457


epoch 0, layer 1:  52%|████████████████████████████████████▋                                  | 5699/11016 [14:29<13:58,  6.34it/s, loss=-5.91]

2025-07-21 22:27:33,690 — INFO — layer 1, [0:5699], loss=-8.3534
2025-07-21 22:27:33,690 — INFO — layer 1, [0:5699], loss=-5.5795


epoch 0, layer 1:  52%|████████████████████████████████████▌                                 | 5749/11016 [14:37<14:01,  6.26it/s, loss=-0.777]

2025-07-21 22:27:41,213 — INFO — layer 1, [0:5749], loss=-1.0461
2025-07-21 22:27:41,213 — INFO — layer 1, [0:5749], loss=-1.6896


epoch 0, layer 1:  53%|█████████████████████████████████████▉                                  | 5799/11016 [14:45<13:15,  6.56it/s, loss=2.12]

2025-07-21 22:27:48,875 — INFO — layer 1, [0:5799], loss=-15.1603
2025-07-21 22:27:48,875 — INFO — layer 1, [0:5799], loss=0.9202


epoch 0, layer 1:  53%|█████████████████████████████████████▋                                 | 5849/11016 [14:52<13:28,  6.39it/s, loss=0.075]

2025-07-21 22:27:56,487 — INFO — layer 1, [0:5849], loss=1.0840
2025-07-21 22:27:56,487 — INFO — layer 1, [0:5849], loss=-10.3465


epoch 0, layer 1:  54%|██████████████████████████████████████                                 | 5899/11016 [15:00<13:02,  6.54it/s, loss=-3.82]

2025-07-21 22:28:04,114 — INFO — layer 1, [0:5899], loss=-11.7374
2025-07-21 22:28:04,114 — INFO — layer 1, [0:5899], loss=-3.3032


epoch 0, layer 1:  54%|██████████████████████████████████████▎                                | 5949/11016 [15:07<12:09,  6.94it/s, loss=-2.27]

2025-07-21 22:28:11,688 — INFO — layer 1, [0:5949], loss=-2.1041
2025-07-21 22:28:11,688 — INFO — layer 1, [0:5949], loss=-3.8074


epoch 0, layer 1:  54%|██████████████████████████████████████▋                                | 5999/11016 [15:15<12:57,  6.45it/s, loss=-5.56]

2025-07-21 22:28:19,176 — INFO — layer 1, [0:5999], loss=0.4812
2025-07-21 22:28:19,176 — INFO — layer 1, [0:5999], loss=-4.3875


epoch 0, layer 1:  55%|██████████████████████████████████████▉                                | 6049/11016 [15:23<13:19,  6.21it/s, loss=-8.82]

2025-07-21 22:28:26,830 — INFO — layer 1, [0:6049], loss=-6.2444
2025-07-21 22:28:26,830 — INFO — layer 1, [0:6049], loss=-3.1378


epoch 0, layer 1:  55%|███████████████████████████████████████▎                               | 6099/11016 [15:30<12:06,  6.77it/s, loss=-2.82]

2025-07-21 22:28:34,420 — INFO — layer 1, [0:6099], loss=1.7218
2025-07-21 22:28:34,420 — INFO — layer 1, [0:6099], loss=0.3554


epoch 0, layer 1:  56%|███████████████████████████████████████▋                               | 6149/11016 [15:38<12:19,  6.58it/s, loss=0.576]

2025-07-21 22:28:42,050 — INFO — layer 1, [0:6149], loss=-6.7766
2025-07-21 22:28:42,050 — INFO — layer 1, [0:6149], loss=-0.5268


epoch 0, layer 1:  56%|███████████████████████████████████████▉                               | 6199/11016 [15:45<12:15,  6.55it/s, loss=-8.71]

2025-07-21 22:28:49,763 — INFO — layer 1, [0:6199], loss=-16.0858
2025-07-21 22:28:49,763 — INFO — layer 1, [0:6199], loss=-3.1287


epoch 0, layer 1:  57%|████████████████████████████████████████▎                              | 6249/11016 [15:53<12:19,  6.44it/s, loss=-12.4]

2025-07-21 22:28:57,483 — INFO — layer 1, [0:6249], loss=-14.2334
2025-07-21 22:28:57,483 — INFO — layer 1, [0:6249], loss=-5.9523


epoch 0, layer 1:  57%|████████████████████████████████████████                              | 6299/11016 [16:01<11:45,  6.69it/s, loss=-0.481]

2025-07-21 22:29:05,260 — INFO — layer 1, [0:6299], loss=-4.4060
2025-07-21 22:29:05,260 — INFO — layer 1, [0:6299], loss=-10.4893


epoch 0, layer 1:  58%|████████████████████████████████████████▉                              | 6349/11016 [16:09<12:11,  6.38it/s, loss=-6.76]

2025-07-21 22:29:13,025 — INFO — layer 1, [0:6349], loss=-9.4787
2025-07-21 22:29:13,025 — INFO — layer 1, [0:6349], loss=-6.3866


epoch 0, layer 1:  58%|█████████████████████████████████████████▏                             | 6399/11016 [16:16<10:57,  7.02it/s, loss=-4.81]

2025-07-21 22:29:20,489 — INFO — layer 1, [0:6399], loss=1.8429
2025-07-21 22:29:20,489 — INFO — layer 1, [0:6399], loss=-4.4355


epoch 0, layer 1:  59%|█████████████████████████████████████████▌                             | 6449/11016 [16:24<11:35,  6.56it/s, loss=-10.7]

2025-07-21 22:29:28,104 — INFO — layer 1, [0:6449], loss=-2.5364
2025-07-21 22:29:28,104 — INFO — layer 1, [0:6449], loss=-8.5059


epoch 0, layer 1:  59%|█████████████████████████████████████████▉                             | 6499/11016 [16:31<11:46,  6.40it/s, loss=-11.2]

2025-07-21 22:29:35,744 — INFO — layer 1, [0:6499], loss=-0.6242
2025-07-21 22:29:35,744 — INFO — layer 1, [0:6499], loss=-4.3069


epoch 0, layer 1:  59%|██████████████████████████████████████████▏                            | 6549/11016 [16:39<11:11,  6.66it/s, loss=-2.58]

2025-07-21 22:29:43,376 — INFO — layer 1, [0:6549], loss=-8.6906
2025-07-21 22:29:43,376 — INFO — layer 1, [0:6549], loss=-8.0046


epoch 0, layer 1:  60%|██████████████████████████████████████████▌                            | 6599/11016 [16:47<10:30,  7.01it/s, loss=-17.6]

2025-07-21 22:29:50,971 — INFO — layer 1, [0:6599], loss=-12.0231
2025-07-21 22:29:50,971 — INFO — layer 1, [0:6599], loss=-3.6771


epoch 0, layer 1:  60%|███████████████████████████████████████████▍                            | 6649/11016 [16:54<11:59,  6.07it/s, loss=2.33]

2025-07-21 22:29:58,551 — INFO — layer 1, [0:6649], loss=-3.3948
2025-07-21 22:29:58,551 — INFO — layer 1, [0:6649], loss=-20.7023


epoch 0, layer 1:  61%|███████████████████████████████████████████▏                           | 6699/11016 [17:02<10:57,  6.56it/s, loss=-13.5]

2025-07-21 22:30:06,348 — INFO — layer 1, [0:6699], loss=-10.0784
2025-07-21 22:30:06,348 — INFO — layer 1, [0:6699], loss=-6.2958


epoch 0, layer 1:  61%|███████████████████████████████████████████▍                           | 6749/11016 [17:10<10:19,  6.89it/s, loss=-3.29]

2025-07-21 22:30:13,962 — INFO — layer 1, [0:6749], loss=0.2251
2025-07-21 22:30:13,962 — INFO — layer 1, [0:6749], loss=-2.6917


epoch 0, layer 1:  62%|███████████████████████████████████████████▊                           | 6799/11016 [17:17<10:15,  6.85it/s, loss=-5.93]

2025-07-21 22:30:21,712 — INFO — layer 1, [0:6799], loss=-5.8609
2025-07-21 22:30:21,712 — INFO — layer 1, [0:6799], loss=-7.9066


epoch 0, layer 1:  62%|████████████████████████████████████████████▏                          | 6849/11016 [17:25<10:18,  6.74it/s, loss=-2.53]

2025-07-21 22:30:29,449 — INFO — layer 1, [0:6849], loss=-7.7003
2025-07-21 22:30:29,449 — INFO — layer 1, [0:6849], loss=-14.3269


epoch 0, layer 1:  63%|████████████████████████████████████████████▍                          | 6899/11016 [17:33<10:02,  6.83it/s, loss=-2.35]

2025-07-21 22:30:36,916 — INFO — layer 1, [0:6899], loss=-3.7903
2025-07-21 22:30:36,916 — INFO — layer 1, [0:6899], loss=-8.9615


epoch 0, layer 1:  63%|████████████████████████████████████████████▊                          | 6949/11016 [17:40<10:09,  6.67it/s, loss=-9.32]

2025-07-21 22:30:44,465 — INFO — layer 1, [0:6949], loss=-14.4838
2025-07-21 22:30:44,465 — INFO — layer 1, [0:6949], loss=-5.8647


epoch 0, layer 1:  64%|█████████████████████████████████████████████                          | 6999/11016 [17:48<09:45,  6.86it/s, loss=-8.28]

2025-07-21 22:30:52,144 — INFO — layer 1, [0:6999], loss=-18.2699
2025-07-21 22:30:52,144 — INFO — layer 1, [0:6999], loss=-3.6310


epoch 0, layer 1:  64%|█████████████████████████████████████████████▍                         | 7049/11016 [17:56<11:03,  5.98it/s, loss=-11.7]

2025-07-21 22:30:59,902 — INFO — layer 1, [0:7049], loss=-3.4058
2025-07-21 22:30:59,902 — INFO — layer 1, [0:7049], loss=-4.8452


epoch 0, layer 1:  64%|██████████████████████████████████████████████▍                         | 7099/11016 [18:03<10:34,  6.18it/s, loss=1.86]

2025-07-21 22:31:07,518 — INFO — layer 1, [0:7099], loss=-11.6678
2025-07-21 22:31:07,518 — INFO — layer 1, [0:7099], loss=-12.4216


epoch 0, layer 1:  65%|██████████████████████████████████████████████▋                         | 7149/11016 [18:11<09:19,  6.91it/s, loss=2.77]

2025-07-21 22:31:15,071 — INFO — layer 1, [0:7149], loss=-4.3872
2025-07-21 22:31:15,071 — INFO — layer 1, [0:7149], loss=-4.3045


epoch 0, layer 1:  65%|██████████████████████████████████████████████▍                        | 7199/11016 [18:18<09:51,  6.45it/s, loss=-1.43]

2025-07-21 22:31:22,717 — INFO — layer 1, [0:7199], loss=-2.4503
2025-07-21 22:31:22,717 — INFO — layer 1, [0:7199], loss=-9.2236


epoch 0, layer 1:  66%|██████████████████████████████████████████████▋                        | 7249/11016 [18:26<09:45,  6.43it/s, loss=-12.7]

2025-07-21 22:31:30,569 — INFO — layer 1, [0:7249], loss=-3.3272
2025-07-21 22:31:30,569 — INFO — layer 1, [0:7249], loss=0.7951


epoch 0, layer 1:  66%|███████████████████████████████████████████████                        | 7299/11016 [18:34<09:12,  6.72it/s, loss=-10.8]

2025-07-21 22:31:38,256 — INFO — layer 1, [0:7299], loss=-15.1802
2025-07-21 22:31:38,256 — INFO — layer 1, [0:7299], loss=-3.7266


epoch 0, layer 1:  67%|███████████████████████████████████████████████▎                       | 7349/11016 [18:42<09:39,  6.33it/s, loss=-10.1]

2025-07-21 22:31:46,038 — INFO — layer 1, [0:7349], loss=-6.0504
2025-07-21 22:31:46,038 — INFO — layer 1, [0:7349], loss=-1.1054


epoch 0, layer 1:  67%|███████████████████████████████████████████████▋                       | 7399/11016 [18:49<08:46,  6.86it/s, loss=-5.62]

2025-07-21 22:31:53,675 — INFO — layer 1, [0:7399], loss=-1.2591
2025-07-21 22:31:53,675 — INFO — layer 1, [0:7399], loss=-10.7243


epoch 0, layer 1:  68%|████████████████████████████████████████████████                       | 7449/11016 [18:57<08:50,  6.73it/s, loss=-7.08]

2025-07-21 22:32:01,389 — INFO — layer 1, [0:7449], loss=-8.5743
2025-07-21 22:32:01,389 — INFO — layer 1, [0:7449], loss=-9.4476


epoch 0, layer 1:  68%|██████████████████████████████████████████████▉                      | 7499/11016 [19:05<09:09,  6.40it/s, loss=-0.0356]

2025-07-21 22:32:09,077 — INFO — layer 1, [0:7499], loss=-7.9205
2025-07-21 22:32:09,077 — INFO — layer 1, [0:7499], loss=-4.6163


epoch 0, layer 1:  69%|████████████████████████████████████████████████▋                      | 7549/11016 [19:12<09:02,  6.39it/s, loss=-2.65]

2025-07-21 22:32:16,656 — INFO — layer 1, [0:7549], loss=-3.2029
2025-07-21 22:32:16,656 — INFO — layer 1, [0:7549], loss=-5.3610


epoch 0, layer 1:  69%|█████████████████████████████████████████████████▋                      | 7599/11016 [19:20<08:36,  6.61it/s, loss=-7.1]

2025-07-21 22:32:24,310 — INFO — layer 1, [0:7599], loss=-7.2620
2025-07-21 22:32:24,310 — INFO — layer 1, [0:7599], loss=-16.1866


epoch 0, layer 1:  69%|█████████████████████████████████████████████████▎                     | 7649/11016 [19:28<08:53,  6.31it/s, loss=-12.8]

2025-07-21 22:32:31,944 — INFO — layer 1, [0:7649], loss=-11.5766
2025-07-21 22:32:31,944 — INFO — layer 1, [0:7649], loss=-5.6477


epoch 0, layer 1:  70%|█████████████████████████████████████████████████▌                     | 7699/11016 [19:35<08:28,  6.52it/s, loss=-19.5]

2025-07-21 22:32:39,546 — INFO — layer 1, [0:7699], loss=-12.2905
2025-07-21 22:32:39,546 — INFO — layer 1, [0:7699], loss=-2.6168


epoch 0, layer 1:  70%|█████████████████████████████████████████████████▉                     | 7749/11016 [19:43<08:32,  6.37it/s, loss=-10.8]

2025-07-21 22:32:47,128 — INFO — layer 1, [0:7749], loss=-12.6467
2025-07-21 22:32:47,128 — INFO — layer 1, [0:7749], loss=-6.7961


epoch 0, layer 1:  71%|██████████████████████████████████████████████████▎                    | 7799/11016 [19:50<08:30,  6.30it/s, loss=-8.51]

2025-07-21 22:32:54,773 — INFO — layer 1, [0:7799], loss=-13.2561
2025-07-21 22:32:54,773 — INFO — layer 1, [0:7799], loss=-9.4913


epoch 0, layer 1:  71%|██████████████████████████████████████████████████▌                    | 7849/11016 [19:58<08:55,  5.92it/s, loss=-5.14]

2025-07-21 22:33:02,457 — INFO — layer 1, [0:7849], loss=-7.8526
2025-07-21 22:33:02,457 — INFO — layer 1, [0:7849], loss=-8.7429


epoch 0, layer 1:  72%|██████████████████████████████████████████████████▉                    | 7899/11016 [20:06<07:40,  6.76it/s, loss=-9.97]

2025-07-21 22:33:10,045 — INFO — layer 1, [0:7899], loss=-10.7063
2025-07-21 22:33:10,046 — INFO — layer 1, [0:7899], loss=-16.3203


epoch 0, layer 1:  72%|███████████████████████████████████████████████████▏                   | 7949/11016 [20:13<07:32,  6.78it/s, loss=-6.27]

2025-07-21 22:33:17,572 — INFO — layer 1, [0:7949], loss=-14.8038
2025-07-21 22:33:17,572 — INFO — layer 1, [0:7949], loss=-13.6636


epoch 0, layer 1:  73%|███████████████████████████████████████████████████▌                   | 7999/11016 [20:21<08:13,  6.11it/s, loss=-16.6]

2025-07-21 22:33:25,271 — INFO — layer 1, [0:7999], loss=-9.8098
2025-07-21 22:33:25,272 — INFO — layer 1, [0:7999], loss=-4.5555


epoch 0, layer 1:  73%|███████████████████████████████████████████████████▉                   | 8049/11016 [20:28<07:15,  6.81it/s, loss=-6.01]

2025-07-21 22:33:32,755 — INFO — layer 1, [0:8049], loss=2.0710
2025-07-21 22:33:32,755 — INFO — layer 1, [0:8049], loss=-10.5225


epoch 0, layer 1:  74%|████████████████████████████████████████████████████▏                  | 8099/11016 [20:36<07:53,  6.16it/s, loss=-7.67]

2025-07-21 22:33:40,610 — INFO — layer 1, [0:8099], loss=-18.8006
2025-07-21 22:33:40,610 — INFO — layer 1, [0:8099], loss=-11.9059


epoch 0, layer 1:  74%|████████████████████████████████████████████████████▌                  | 8149/11016 [20:44<07:12,  6.63it/s, loss=-9.99]

2025-07-21 22:33:48,182 — INFO — layer 1, [0:8149], loss=-20.2434
2025-07-21 22:33:48,182 — INFO — layer 1, [0:8149], loss=-3.8662


epoch 0, layer 1:  74%|██████████████████████████████████████████████████████▎                  | 8199/11016 [20:51<07:03,  6.65it/s, loss=-10]

2025-07-21 22:33:55,759 — INFO — layer 1, [0:8199], loss=-3.9021
2025-07-21 22:33:55,759 — INFO — layer 1, [0:8199], loss=-0.8043


epoch 0, layer 1:  75%|█████████████████████████████████████████████████████▏                 | 8249/11016 [20:59<07:17,  6.32it/s, loss=-15.2]

2025-07-21 22:34:03,464 — INFO — layer 1, [0:8249], loss=-9.0044
2025-07-21 22:34:03,464 — INFO — layer 1, [0:8249], loss=-3.4118


epoch 0, layer 1:  75%|██████████████████████████████████████████████████████▏                 | 8299/11016 [21:07<06:25,  7.05it/s, loss=2.33]

2025-07-21 22:34:10,930 — INFO — layer 1, [0:8299], loss=1.4955
2025-07-21 22:34:10,930 — INFO — layer 1, [0:8299], loss=-16.2292


epoch 0, layer 1:  76%|█████████████████████████████████████████████████████▊                 | 8349/11016 [21:14<06:20,  7.01it/s, loss=-7.59]

2025-07-21 22:34:18,366 — INFO — layer 1, [0:8349], loss=-14.9123
2025-07-21 22:34:18,366 — INFO — layer 1, [0:8349], loss=-15.7417


epoch 0, layer 1:  76%|██████████████████████████████████████████████████████▏                | 8399/11016 [21:22<07:01,  6.21it/s, loss=-6.35]

2025-07-21 22:34:25,970 — INFO — layer 1, [0:8399], loss=-7.1441
2025-07-21 22:34:25,970 — INFO — layer 1, [0:8399], loss=-14.2259


epoch 0, layer 1:  77%|██████████████████████████████████████████████████████▍                | 8449/11016 [21:29<06:35,  6.50it/s, loss=-4.09]

2025-07-21 22:34:33,702 — INFO — layer 1, [0:8449], loss=-5.2342
2025-07-21 22:34:33,702 — INFO — layer 1, [0:8449], loss=-14.8798


epoch 0, layer 1:  77%|██████████████████████████████████████████████████████▊                | 8499/11016 [21:37<06:22,  6.58it/s, loss=-8.55]

2025-07-21 22:34:41,368 — INFO — layer 1, [0:8499], loss=-6.5099
2025-07-21 22:34:41,368 — INFO — layer 1, [0:8499], loss=-13.3513


epoch 0, layer 1:  78%|███████████████████████████████████████████████████████                | 8549/11016 [21:45<06:29,  6.34it/s, loss=-7.74]

2025-07-21 22:34:48,974 — INFO — layer 1, [0:8549], loss=-4.9070
2025-07-21 22:34:48,974 — INFO — layer 1, [0:8549], loss=2.1789


epoch 0, layer 1:  78%|███████████████████████████████████████████████████████▍               | 8599/11016 [21:52<05:50,  6.90it/s, loss=-7.45]

2025-07-21 22:34:56,471 — INFO — layer 1, [0:8599], loss=-9.9896
2025-07-21 22:34:56,471 — INFO — layer 1, [0:8599], loss=-5.1124


epoch 0, layer 1:  79%|███████████████████████████████████████████████████████▋               | 8649/11016 [22:00<05:30,  7.16it/s, loss=-11.1]

2025-07-21 22:35:04,051 — INFO — layer 1, [0:8649], loss=-4.7435
2025-07-21 22:35:04,051 — INFO — layer 1, [0:8649], loss=-1.7022


epoch 0, layer 1:  79%|████████████████████████████████████████████████████████               | 8699/11016 [22:07<05:50,  6.62it/s, loss=-1.97]

2025-07-21 22:35:11,686 — INFO — layer 1, [0:8699], loss=-6.8521
2025-07-21 22:35:11,686 — INFO — layer 1, [0:8699], loss=-5.2272


epoch 0, layer 1:  79%|████████████████████████████████████████████████████████▍              | 8749/11016 [22:15<05:46,  6.53it/s, loss=-6.12]

2025-07-21 22:35:19,598 — INFO — layer 1, [0:8749], loss=-11.2832
2025-07-21 22:35:19,598 — INFO — layer 1, [0:8749], loss=-5.3594


epoch 0, layer 1:  80%|██████████████████████████████████████████████████████████▎              | 8799/11016 [22:23<05:30,  6.70it/s, loss=0.6]

2025-07-21 22:35:27,448 — INFO — layer 1, [0:8799], loss=-5.0936
2025-07-21 22:35:27,448 — INFO — layer 1, [0:8799], loss=-11.9841


epoch 0, layer 1:  80%|█████████████████████████████████████████████████████████              | 8849/11016 [22:31<05:25,  6.66it/s, loss=-11.6]

2025-07-21 22:35:35,092 — INFO — layer 1, [0:8849], loss=-9.5138
2025-07-21 22:35:35,092 — INFO — layer 1, [0:8849], loss=-10.8254


epoch 0, layer 1:  81%|█████████████████████████████████████████████████████████▎             | 8899/11016 [22:38<05:17,  6.67it/s, loss=-8.65]

2025-07-21 22:35:42,712 — INFO — layer 1, [0:8899], loss=-10.7842
2025-07-21 22:35:42,712 — INFO — layer 1, [0:8899], loss=3.5592


epoch 0, layer 1:  81%|█████████████████████████████████████████████████████████▋             | 8949/11016 [22:46<05:11,  6.63it/s, loss=-6.66]

2025-07-21 22:35:50,364 — INFO — layer 1, [0:8949], loss=-15.2100
2025-07-21 22:35:50,364 — INFO — layer 1, [0:8949], loss=-10.4792


epoch 0, layer 1:  82%|██████████████████████████████████████████████████████████             | 8999/11016 [22:54<05:03,  6.65it/s, loss=-6.07]

2025-07-21 22:35:57,948 — INFO — layer 1, [0:8999], loss=-9.6660
2025-07-21 22:35:57,948 — INFO — layer 1, [0:8999], loss=-10.7478


epoch 0, layer 1:  82%|██████████████████████████████████████████████████████████▎            | 9049/11016 [23:01<05:01,  6.53it/s, loss=-12.9]

2025-07-21 22:36:05,424 — INFO — layer 1, [0:9049], loss=-0.4760
2025-07-21 22:36:05,424 — INFO — layer 1, [0:9049], loss=-7.6631


epoch 0, layer 1:  83%|██████████████████████████████████████████████████████████▋            | 9099/11016 [23:09<05:03,  6.31it/s, loss=-2.33]

2025-07-21 22:36:12,950 — INFO — layer 1, [0:9099], loss=-8.3192
2025-07-21 22:36:12,950 — INFO — layer 1, [0:9099], loss=-9.1462


epoch 0, layer 1:  83%|██████████████████████████████████████████████████████████▉            | 9149/11016 [23:16<04:44,  6.55it/s, loss=-13.5]

2025-07-21 22:36:20,547 — INFO — layer 1, [0:9149], loss=-15.7060
2025-07-21 22:36:20,547 — INFO — layer 1, [0:9149], loss=-17.9867


epoch 0, layer 1:  84%|███████████████████████████████████████████████████████████▎           | 9199/11016 [23:24<04:34,  6.63it/s, loss=-4.62]

2025-07-21 22:36:28,009 — INFO — layer 1, [0:9199], loss=-14.5006
2025-07-21 22:36:28,009 — INFO — layer 1, [0:9199], loss=-5.1966


epoch 0, layer 1:  84%|███████████████████████████████████████████████████████████▌           | 9249/11016 [23:31<04:35,  6.41it/s, loss=-15.1]

2025-07-21 22:36:35,792 — INFO — layer 1, [0:9249], loss=-5.9334
2025-07-21 22:36:35,792 — INFO — layer 1, [0:9249], loss=-13.1036


epoch 0, layer 1:  84%|███████████████████████████████████████████████████████████▉           | 9299/11016 [23:39<04:20,  6.59it/s, loss=-3.41]

2025-07-21 22:36:43,498 — INFO — layer 1, [0:9299], loss=-4.6007
2025-07-21 22:36:43,498 — INFO — layer 1, [0:9299], loss=-0.7382


epoch 0, layer 1:  85%|████████████████████████████████████████████████████████████▎          | 9349/11016 [23:47<04:11,  6.64it/s, loss=-5.35]

2025-07-21 22:36:51,113 — INFO — layer 1, [0:9349], loss=-2.2053
2025-07-21 22:36:51,113 — INFO — layer 1, [0:9349], loss=-1.3892


epoch 0, layer 1:  85%|██████████████████████████████████████████████████████████████▎          | 9399/11016 [23:54<03:51,  6.98it/s, loss=-13]

2025-07-21 22:36:58,630 — INFO — layer 1, [0:9399], loss=-15.8330
2025-07-21 22:36:58,630 — INFO — layer 1, [0:9399], loss=-3.6084


epoch 0, layer 1:  86%|████████████████████████████████████████████████████████████▉          | 9449/11016 [24:02<04:05,  6.39it/s, loss=-12.5]

2025-07-21 22:37:06,131 — INFO — layer 1, [0:9449], loss=-16.9992
2025-07-21 22:37:06,131 — INFO — layer 1, [0:9449], loss=-14.4885


epoch 0, layer 1:  86%|█████████████████████████████████████████████████████████████▏         | 9499/11016 [24:09<03:39,  6.92it/s, loss=-17.3]

2025-07-21 22:37:13,589 — INFO — layer 1, [0:9499], loss=-7.9419
2025-07-21 22:37:13,589 — INFO — layer 1, [0:9499], loss=-7.6775


epoch 0, layer 1:  87%|██████████████████████████████████████████████████████████████▍         | 9549/11016 [24:17<03:45,  6.51it/s, loss=1.82]

2025-07-21 22:37:21,293 — INFO — layer 1, [0:9549], loss=-1.8011
2025-07-21 22:37:21,293 — INFO — layer 1, [0:9549], loss=-11.3403


epoch 0, layer 1:  87%|█████████████████████████████████████████████████████████████▊         | 9599/11016 [24:25<03:38,  6.48it/s, loss=-5.97]

2025-07-21 22:37:28,818 — INFO — layer 1, [0:9599], loss=-4.7112
2025-07-21 22:37:28,818 — INFO — layer 1, [0:9599], loss=-2.7692


epoch 0, layer 1:  88%|██████████████████████████████████████████████████████████████▏        | 9649/11016 [24:32<03:48,  5.99it/s, loss=-16.8]

2025-07-21 22:37:36,586 — INFO — layer 1, [0:9649], loss=-4.6908
2025-07-21 22:37:36,586 — INFO — layer 1, [0:9649], loss=-9.9057


epoch 0, layer 1:  88%|██████████████████████████████████████████████████████████████▌        | 9699/11016 [24:40<03:21,  6.53it/s, loss=-6.82]

2025-07-21 22:37:44,161 — INFO — layer 1, [0:9699], loss=0.3337
2025-07-21 22:37:44,161 — INFO — layer 1, [0:9699], loss=-3.1972


epoch 0, layer 1:  88%|█████████████████████████████████████████████████████████████▉        | 9749/11016 [24:47<03:09,  6.68it/s, loss=-0.521]

2025-07-21 22:37:51,644 — INFO — layer 1, [0:9749], loss=-8.9025
2025-07-21 22:37:51,644 — INFO — layer 1, [0:9749], loss=-11.2976


epoch 0, layer 1:  89%|███████████████████████████████████████████████████████████████▏       | 9799/11016 [24:55<02:56,  6.88it/s, loss=-16.4]

2025-07-21 22:37:59,333 — INFO — layer 1, [0:9799], loss=-12.2564
2025-07-21 22:37:59,333 — INFO — layer 1, [0:9799], loss=-9.3847


epoch 0, layer 1:  89%|███████████████████████████████████████████████████████████████▍       | 9849/11016 [25:03<02:53,  6.74it/s, loss=-5.41]

2025-07-21 22:38:06,864 — INFO — layer 1, [0:9849], loss=1.7556
2025-07-21 22:38:06,864 — INFO — layer 1, [0:9849], loss=-6.8019


epoch 0, layer 1:  90%|███████████████████████████████████████████████████████████████▊       | 9899/11016 [25:10<02:51,  6.51it/s, loss=0.147]

2025-07-21 22:38:14,527 — INFO — layer 1, [0:9899], loss=-8.8100
2025-07-21 22:38:14,527 — INFO — layer 1, [0:9899], loss=-7.2762


epoch 0, layer 1:  90%|████████████████████████████████████████████████████████████████       | 9949/11016 [25:18<02:40,  6.66it/s, loss=-14.4]

2025-07-21 22:38:22,045 — INFO — layer 1, [0:9949], loss=-2.1437
2025-07-21 22:38:22,045 — INFO — layer 1, [0:9949], loss=-13.6863


epoch 0, layer 1:  91%|████████████████████████████████████████████████████████████████▍      | 9999/11016 [25:25<02:28,  6.86it/s, loss=-3.78]

2025-07-21 22:38:29,587 — INFO — layer 1, [0:9999], loss=-16.1401
2025-07-21 22:38:29,587 — INFO — layer 1, [0:9999], loss=-13.5272


epoch 0, layer 1:  91%|███████████████████████████████████████████████████████████████▊      | 10049/11016 [25:33<02:22,  6.79it/s, loss=-2.06]

2025-07-21 22:38:37,332 — INFO — layer 1, [0:10049], loss=1.0515
2025-07-21 22:38:37,333 — INFO — layer 1, [0:10049], loss=-11.6679


epoch 0, layer 1:  92%|██████████████████████████████████████████████████████████████████      | 10099/11016 [25:41<02:16,  6.70it/s, loss=-13]

2025-07-21 22:38:44,962 — INFO — layer 1, [0:10099], loss=-6.2222
2025-07-21 22:38:44,962 — INFO — layer 1, [0:10099], loss=-16.6702


epoch 0, layer 1:  92%|████████████████████████████████████████████████████████████████▍     | 10149/11016 [25:48<02:09,  6.68it/s, loss=-6.61]

2025-07-21 22:38:52,569 — INFO — layer 1, [0:10149], loss=-13.5971
2025-07-21 22:38:52,569 — INFO — layer 1, [0:10149], loss=-5.3720


epoch 0, layer 1:  93%|████████████████████████████████████████████████████████████████▊     | 10199/11016 [25:56<02:04,  6.57it/s, loss=-5.51]

2025-07-21 22:39:00,105 — INFO — layer 1, [0:10199], loss=-11.5079
2025-07-21 22:39:00,105 — INFO — layer 1, [0:10199], loss=-11.5450


epoch 0, layer 1:  93%|█████████████████████████████████████████████████████████████████▏    | 10249/11016 [26:03<02:01,  6.31it/s, loss=-15.6]

2025-07-21 22:39:07,767 — INFO — layer 1, [0:10249], loss=-18.9568
2025-07-21 22:39:07,767 — INFO — layer 1, [0:10249], loss=-8.5962


epoch 0, layer 1:  93%|█████████████████████████████████████████████████████████████████▍    | 10299/11016 [26:11<01:46,  6.70it/s, loss=-7.53]

2025-07-21 22:39:15,441 — INFO — layer 1, [0:10299], loss=-17.0892
2025-07-21 22:39:15,441 — INFO — layer 1, [0:10299], loss=-5.6945


epoch 0, layer 1:  94%|█████████████████████████████████████████████████████████████████▊    | 10349/11016 [26:19<01:39,  6.71it/s, loss=-16.2]

2025-07-21 22:39:23,090 — INFO — layer 1, [0:10349], loss=-5.6248
2025-07-21 22:39:23,090 — INFO — layer 1, [0:10349], loss=-14.3487


epoch 0, layer 1:  94%|██████████████████████████████████████████████████████████████████    | 10399/11016 [26:26<01:29,  6.90it/s, loss=-2.72]

2025-07-21 22:39:30,687 — INFO — layer 1, [0:10399], loss=-14.4489
2025-07-21 22:39:30,688 — INFO — layer 1, [0:10399], loss=0.5268


epoch 0, layer 1:  95%|██████████████████████████████████████████████████████████████████▍   | 10449/11016 [26:34<01:23,  6.80it/s, loss=-6.91]

2025-07-21 22:39:38,283 — INFO — layer 1, [0:10449], loss=-9.6591
2025-07-21 22:39:38,283 — INFO — layer 1, [0:10449], loss=-4.8745


epoch 0, layer 1:  95%|██████████████████████████████████████████████████████████████████▋   | 10499/11016 [26:42<01:16,  6.80it/s, loss=-10.1]

2025-07-21 22:39:45,993 — INFO — layer 1, [0:10499], loss=-13.8345
2025-07-21 22:39:45,994 — INFO — layer 1, [0:10499], loss=-14.0847


epoch 0, layer 1:  96%|███████████████████████████████████████████████████████████████████   | 10549/11016 [26:49<01:09,  6.73it/s, loss=-9.13]

2025-07-21 22:39:53,440 — INFO — layer 1, [0:10549], loss=-5.6987
2025-07-21 22:39:53,440 — INFO — layer 1, [0:10549], loss=-13.5608


epoch 0, layer 1:  96%|███████████████████████████████████████████████████████████████████▎  | 10599/11016 [26:57<01:01,  6.80it/s, loss=-5.62]

2025-07-21 22:40:00,827 — INFO — layer 1, [0:10599], loss=-7.8091
2025-07-21 22:40:00,827 — INFO — layer 1, [0:10599], loss=-3.3129


epoch 0, layer 1:  97%|███████████████████████████████████████████████████████████████████▋  | 10649/11016 [27:04<00:55,  6.63it/s, loss=-5.46]

2025-07-21 22:40:08,467 — INFO — layer 1, [0:10649], loss=-11.5872
2025-07-21 22:40:08,467 — INFO — layer 1, [0:10649], loss=-15.3101


epoch 0, layer 1:  97%|███████████████████████████████████████████████████████████████████▉  | 10699/11016 [27:12<00:49,  6.38it/s, loss=-4.57]

2025-07-21 22:40:16,148 — INFO — layer 1, [0:10699], loss=-12.9254
2025-07-21 22:40:16,148 — INFO — layer 1, [0:10699], loss=-9.3169


epoch 0, layer 1:  98%|████████████████████████████████████████████████████████████████████▎ | 10749/11016 [27:19<00:40,  6.59it/s, loss=-8.64]

2025-07-21 22:40:23,690 — INFO — layer 1, [0:10749], loss=-15.4148
2025-07-21 22:40:23,690 — INFO — layer 1, [0:10749], loss=-10.7362


epoch 0, layer 1:  98%|████████████████████████████████████████████████████████████████████▌ | 10799/11016 [27:27<00:32,  6.74it/s, loss=-5.84]

2025-07-21 22:40:31,480 — INFO — layer 1, [0:10799], loss=-12.3696
2025-07-21 22:40:31,480 — INFO — layer 1, [0:10799], loss=-10.4768


epoch 0, layer 1:  98%|██████████████████████████████████████████████████████████████████████▉ | 10849/11016 [27:35<00:26,  6.42it/s, loss=-21]

2025-07-21 22:40:39,023 — INFO — layer 1, [0:10849], loss=-15.8916
2025-07-21 22:40:39,023 — INFO — layer 1, [0:10849], loss=-19.5015


epoch 0, layer 1:  99%|█████████████████████████████████████████████████████████████████████▎| 10899/11016 [27:42<00:17,  6.87it/s, loss=-16.8]

2025-07-21 22:40:46,611 — INFO — layer 1, [0:10899], loss=-16.1995
2025-07-21 22:40:46,611 — INFO — layer 1, [0:10899], loss=-14.6788


epoch 0, layer 1:  99%|█████████████████████████████████████████████████████████████████████▌| 10949/11016 [27:50<00:10,  6.43it/s, loss=-12.6]

2025-07-21 22:40:54,257 — INFO — layer 1, [0:10949], loss=-10.9518
2025-07-21 22:40:54,257 — INFO — layer 1, [0:10949], loss=-0.2541


epoch 0, layer 1: 100%|█████████████████████████████████████████████████████████████████████▉| 10999/11016 [27:58<00:02,  6.56it/s, loss=-14.5]

2025-07-21 22:41:01,912 — INFO — layer 1, [0:10999], loss=-16.7637
2025-07-21 22:41:01,912 — INFO — layer 1, [0:10999], loss=-9.1832


epoch 1, layer 1:   0%|                                                                                              | 0/11016 [00:00<?, ?it/s]huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
huggingf

2025-07-21 22:41:09,871 — INFO — layer 1, [1:33], loss=2.5327
2025-07-21 22:41:09,871 — INFO — layer 1, [1:33], loss=-4.4483


epoch 1, layer 1:   1%|▌                                                                        | 83/11016 [00:12<27:39,  6.59it/s, loss=-11.3]

2025-07-21 22:41:17,398 — INFO — layer 1, [1:83], loss=-11.0233
2025-07-21 22:41:17,398 — INFO — layer 1, [1:83], loss=-5.1213


epoch 1, layer 1:   1%|▉                                                                        | 133/11016 [00:20<29:10,  6.22it/s, loss=-5.2]

2025-07-21 22:41:24,859 — INFO — layer 1, [1:133], loss=-21.1289
2025-07-21 22:41:24,859 — INFO — layer 1, [1:133], loss=-11.8545


epoch 1, layer 1:   2%|█▏                                                                      | 183/11016 [00:27<28:02,  6.44it/s, loss=-5.88]

2025-07-21 22:41:32,549 — INFO — layer 1, [1:183], loss=-16.7051
2025-07-21 22:41:32,549 — INFO — layer 1, [1:183], loss=-15.3880


epoch 1, layer 1:   2%|█▌                                                                      | 233/11016 [00:35<27:20,  6.57it/s, loss=0.273]

2025-07-21 22:41:39,996 — INFO — layer 1, [1:233], loss=-12.0667
2025-07-21 22:41:39,996 — INFO — layer 1, [1:233], loss=-8.5946


epoch 1, layer 1:   3%|█▊                                                                      | 283/11016 [00:43<26:17,  6.80it/s, loss=-6.81]

2025-07-21 22:41:47,709 — INFO — layer 1, [1:283], loss=-7.2487
2025-07-21 22:41:47,709 — INFO — layer 1, [1:283], loss=-3.5507


epoch 1, layer 1:   3%|██▏                                                                     | 333/11016 [00:50<27:40,  6.43it/s, loss=-9.37]

2025-07-21 22:41:55,358 — INFO — layer 1, [1:333], loss=-14.9528
2025-07-21 22:41:55,358 — INFO — layer 1, [1:333], loss=-10.0000


epoch 1, layer 1:   3%|██▌                                                                      | 383/11016 [00:58<26:08,  6.78it/s, loss=-2.8]

2025-07-21 22:42:02,950 — INFO — layer 1, [1:383], loss=-5.4960
2025-07-21 22:42:02,950 — INFO — layer 1, [1:383], loss=-11.2618


epoch 1, layer 1:   4%|██▊                                                                     | 433/11016 [01:06<26:59,  6.54it/s, loss=-18.2]

2025-07-21 22:42:10,637 — INFO — layer 1, [1:433], loss=-9.4175
2025-07-21 22:42:10,637 — INFO — layer 1, [1:433], loss=-13.1808


epoch 1, layer 1:   4%|███▏                                                                     | 483/11016 [01:13<25:35,  6.86it/s, loss=-6.9]

2025-07-21 22:42:18,209 — INFO — layer 1, [1:483], loss=-6.2248
2025-07-21 22:42:18,209 — INFO — layer 1, [1:483], loss=-12.7552


epoch 1, layer 1:   5%|███▍                                                                    | 533/11016 [01:21<25:01,  6.98it/s, loss=-4.14]

2025-07-21 22:42:25,914 — INFO — layer 1, [1:533], loss=-16.9464
2025-07-21 22:42:25,914 — INFO — layer 1, [1:533], loss=-2.8432


epoch 1, layer 1:   5%|███▊                                                                    | 583/11016 [01:29<26:15,  6.62it/s, loss=-11.2]

2025-07-21 22:42:33,567 — INFO — layer 1, [1:583], loss=-9.6951
2025-07-21 22:42:33,568 — INFO — layer 1, [1:583], loss=-12.7001


epoch 1, layer 1:   6%|████▏                                                                   | 633/11016 [01:36<26:14,  6.59it/s, loss=-15.9]

2025-07-21 22:42:41,007 — INFO — layer 1, [1:633], loss=-11.0393
2025-07-21 22:42:41,007 — INFO — layer 1, [1:633], loss=-14.6325


epoch 1, layer 1:   6%|████▍                                                                   | 683/11016 [01:44<26:11,  6.57it/s, loss=-25.4]

2025-07-21 22:42:48,674 — INFO — layer 1, [1:683], loss=-9.0635
2025-07-21 22:42:48,674 — INFO — layer 1, [1:683], loss=-10.3953


epoch 1, layer 1:   7%|████▊                                                                    | 733/11016 [01:51<27:09,  6.31it/s, loss=-4.8]

2025-07-21 22:42:56,309 — INFO — layer 1, [1:733], loss=-7.0085
2025-07-21 22:42:56,309 — INFO — layer 1, [1:733], loss=-6.0552


epoch 1, layer 1:   7%|█████▎                                                                    | 783/11016 [01:59<25:21,  6.72it/s, loss=-12]

2025-07-21 22:43:03,807 — INFO — layer 1, [1:783], loss=-0.3795
2025-07-21 22:43:03,808 — INFO — layer 1, [1:783], loss=-15.0509


epoch 1, layer 1:   8%|█████▍                                                                  | 833/11016 [02:06<24:36,  6.90it/s, loss=-7.85]

2025-07-21 22:43:11,494 — INFO — layer 1, [1:833], loss=-11.0604
2025-07-21 22:43:11,494 — INFO — layer 1, [1:833], loss=-7.5189


epoch 1, layer 1:   8%|█████▊                                                                  | 883/11016 [02:14<24:48,  6.81it/s, loss=0.594]

2025-07-21 22:43:18,989 — INFO — layer 1, [1:883], loss=-15.4364
2025-07-21 22:43:18,989 — INFO — layer 1, [1:883], loss=-5.1007


epoch 1, layer 1:   8%|██████                                                                  | 933/11016 [02:22<25:51,  6.50it/s, loss=-11.9]

2025-07-21 22:43:26,672 — INFO — layer 1, [1:933], loss=-17.7309
2025-07-21 22:43:26,672 — INFO — layer 1, [1:933], loss=-19.0900


epoch 1, layer 1:   9%|██████▍                                                                 | 983/11016 [02:29<25:06,  6.66it/s, loss=-11.1]

2025-07-21 22:43:34,242 — INFO — layer 1, [1:983], loss=-15.7189
2025-07-21 22:43:34,243 — INFO — layer 1, [1:983], loss=-15.8203


epoch 1, layer 1:   9%|██████▋                                                                | 1033/11016 [02:37<24:44,  6.72it/s, loss=-10.5]

2025-07-21 22:43:42,015 — INFO — layer 1, [1:1033], loss=-15.5378
2025-07-21 22:43:42,015 — INFO — layer 1, [1:1033], loss=-5.7958


epoch 1, layer 1:  10%|██████▉                                                                | 1083/11016 [02:45<25:26,  6.51it/s, loss=-10.5]

2025-07-21 22:43:49,721 — INFO — layer 1, [1:1083], loss=-11.3036
2025-07-21 22:43:49,721 — INFO — layer 1, [1:1083], loss=-16.0704


epoch 1, layer 1:  10%|███████▎                                                               | 1133/11016 [02:52<26:04,  6.32it/s, loss=-8.56]

2025-07-21 22:43:57,300 — INFO — layer 1, [1:1133], loss=-16.7949
2025-07-21 22:43:57,300 — INFO — layer 1, [1:1133], loss=-5.7399


epoch 1, layer 1:  11%|███████▌                                                               | 1183/11016 [03:00<24:54,  6.58it/s, loss=-9.18]

2025-07-21 22:44:04,902 — INFO — layer 1, [1:1183], loss=-16.5341
2025-07-21 22:44:04,902 — INFO — layer 1, [1:1183], loss=-17.6341


epoch 1, layer 1:  11%|███████▉                                                               | 1233/11016 [03:08<25:17,  6.45it/s, loss=-6.44]

2025-07-21 22:44:12,768 — INFO — layer 1, [1:1233], loss=-10.5440
2025-07-21 22:44:12,768 — INFO — layer 1, [1:1233], loss=-8.3361


epoch 1, layer 1:  12%|████████▎                                                              | 1283/11016 [03:15<25:52,  6.27it/s, loss=-12.5]

2025-07-21 22:44:20,498 — INFO — layer 1, [1:1283], loss=-7.1226
2025-07-21 22:44:20,498 — INFO — layer 1, [1:1283], loss=-8.7137


epoch 1, layer 1:  12%|████████▌                                                              | 1333/11016 [03:23<23:47,  6.79it/s, loss=-5.45]

2025-07-21 22:44:27,998 — INFO — layer 1, [1:1333], loss=0.4031
2025-07-21 22:44:27,998 — INFO — layer 1, [1:1333], loss=-6.3786


epoch 1, layer 1:  13%|████████▉                                                              | 1383/11016 [03:30<22:56,  7.00it/s, loss=-5.32]

2025-07-21 22:44:35,509 — INFO — layer 1, [1:1383], loss=-6.2183
2025-07-21 22:44:35,509 — INFO — layer 1, [1:1383], loss=0.1707


epoch 1, layer 1:  13%|█████████▏                                                             | 1433/11016 [03:38<23:39,  6.75it/s, loss=-3.23]

2025-07-21 22:44:43,167 — INFO — layer 1, [1:1433], loss=-10.8523
2025-07-21 22:44:43,167 — INFO — layer 1, [1:1433], loss=-9.2125


epoch 1, layer 1:  13%|█████████▌                                                             | 1483/11016 [03:46<23:06,  6.88it/s, loss=-6.44]

2025-07-21 22:44:50,894 — INFO — layer 1, [1:1483], loss=-4.3470
2025-07-21 22:44:50,894 — INFO — layer 1, [1:1483], loss=-6.6924


epoch 1, layer 1:  14%|█████████▉                                                             | 1533/11016 [03:53<23:03,  6.86it/s, loss=-5.55]

2025-07-21 22:44:58,431 — INFO — layer 1, [1:1533], loss=-11.4521
2025-07-21 22:44:58,431 — INFO — layer 1, [1:1533], loss=-3.8047


epoch 1, layer 1:  14%|██████████▏                                                            | 1583/11016 [04:01<23:13,  6.77it/s, loss=0.351]

2025-07-21 22:45:06,011 — INFO — layer 1, [1:1583], loss=-13.7917
2025-07-21 22:45:06,011 — INFO — layer 1, [1:1583], loss=-1.7152


epoch 1, layer 1:  15%|██████████▊                                                              | 1633/11016 [04:08<23:31,  6.65it/s, loss=-12]

2025-07-21 22:45:13,519 — INFO — layer 1, [1:1633], loss=-16.3321
2025-07-21 22:45:13,519 — INFO — layer 1, [1:1633], loss=-3.8968


epoch 1, layer 1:  15%|██████████▊                                                            | 1683/11016 [04:16<23:49,  6.53it/s, loss=-13.4]

2025-07-21 22:45:21,185 — INFO — layer 1, [1:1683], loss=-20.2009
2025-07-21 22:45:21,185 — INFO — layer 1, [1:1683], loss=-17.8180


epoch 1, layer 1:  16%|███████████▏                                                           | 1733/11016 [04:24<23:58,  6.45it/s, loss=-11.9]

2025-07-21 22:45:28,913 — INFO — layer 1, [1:1733], loss=-8.4009
2025-07-21 22:45:28,913 — INFO — layer 1, [1:1733], loss=-4.6770


epoch 1, layer 1:  16%|███████████▍                                                           | 1783/11016 [04:32<25:20,  6.07it/s, loss=-7.45]

2025-07-21 22:45:36,689 — INFO — layer 1, [1:1783], loss=-13.0716
2025-07-21 22:45:36,689 — INFO — layer 1, [1:1783], loss=-5.8342


epoch 1, layer 1:  17%|███████████▊                                                           | 1833/11016 [04:39<22:03,  6.94it/s, loss=-1.12]

2025-07-21 22:45:44,277 — INFO — layer 1, [1:1833], loss=-12.3675
2025-07-21 22:45:44,277 — INFO — layer 1, [1:1833], loss=-20.0755


epoch 1, layer 1:  17%|████████████▏                                                          | 1883/11016 [04:47<23:01,  6.61it/s, loss=-9.86]

2025-07-21 22:45:51,889 — INFO — layer 1, [1:1883], loss=-12.0020
2025-07-21 22:45:51,889 — INFO — layer 1, [1:1883], loss=-8.9222


epoch 1, layer 1:  18%|████████████▍                                                          | 1933/11016 [04:54<22:32,  6.71it/s, loss=-8.62]

2025-07-21 22:45:59,415 — INFO — layer 1, [1:1933], loss=-14.2334
2025-07-21 22:45:59,415 — INFO — layer 1, [1:1933], loss=-17.6400


epoch 1, layer 1:  18%|████████████▊                                                          | 1983/11016 [05:02<22:49,  6.60it/s, loss=-5.52]

2025-07-21 22:46:07,018 — INFO — layer 1, [1:1983], loss=-3.6773
2025-07-21 22:46:07,018 — INFO — layer 1, [1:1983], loss=-21.1029


epoch 1, layer 1:  18%|█████████████▍                                                           | 2033/11016 [05:10<24:15,  6.17it/s, loss=-13]

2025-07-21 22:46:14,816 — INFO — layer 1, [1:2033], loss=-9.1676
2025-07-21 22:46:14,817 — INFO — layer 1, [1:2033], loss=-11.0748


epoch 1, layer 1:  19%|█████████████▍                                                         | 2083/11016 [05:17<22:47,  6.53it/s, loss=-5.55]

2025-07-21 22:46:22,461 — INFO — layer 1, [1:2083], loss=-7.4188
2025-07-21 22:46:22,461 — INFO — layer 1, [1:2083], loss=-9.1286


epoch 1, layer 1:  19%|█████████████▉                                                          | 2133/11016 [05:25<22:20,  6.63it/s, loss=-6.1]

2025-07-21 22:46:30,064 — INFO — layer 1, [1:2133], loss=-0.5306
2025-07-21 22:46:30,064 — INFO — layer 1, [1:2133], loss=-13.6265


epoch 1, layer 1:  20%|██████████████▎                                                         | 2183/11016 [05:33<21:25,  6.87it/s, loss=-4.4]

2025-07-21 22:46:37,638 — INFO — layer 1, [1:2183], loss=-10.0394
2025-07-21 22:46:37,638 — INFO — layer 1, [1:2183], loss=-0.7874


epoch 1, layer 1:  20%|██████████████▍                                                        | 2233/11016 [05:40<22:27,  6.52it/s, loss=-10.2]

2025-07-21 22:46:45,213 — INFO — layer 1, [1:2233], loss=-5.6910
2025-07-21 22:46:45,213 — INFO — layer 1, [1:2233], loss=-6.7278


epoch 1, layer 1:  21%|██████████████▋                                                        | 2283/11016 [05:48<21:53,  6.65it/s, loss=-3.24]

2025-07-21 22:46:53,024 — INFO — layer 1, [1:2283], loss=-20.7334
2025-07-21 22:46:53,024 — INFO — layer 1, [1:2283], loss=-3.8044


epoch 1, layer 1:  21%|███████████████                                                        | 2333/11016 [05:55<22:09,  6.53it/s, loss=-3.93]

2025-07-21 22:47:00,576 — INFO — layer 1, [1:2333], loss=-9.0215
2025-07-21 22:47:00,576 — INFO — layer 1, [1:2333], loss=-13.6023


epoch 1, layer 1:  22%|███████████████▎                                                       | 2383/11016 [06:03<21:00,  6.85it/s, loss=-12.3]

2025-07-21 22:47:08,163 — INFO — layer 1, [1:2383], loss=-6.0211
2025-07-21 22:47:08,163 — INFO — layer 1, [1:2383], loss=-5.6867


epoch 1, layer 1:  22%|███████████████▋                                                       | 2433/11016 [06:11<21:02,  6.80it/s, loss=-17.6]

2025-07-21 22:47:15,857 — INFO — layer 1, [1:2433], loss=-6.6126
2025-07-21 22:47:15,857 — INFO — layer 1, [1:2433], loss=-9.5498


epoch 1, layer 1:  23%|████████████████                                                       | 2483/11016 [06:18<22:10,  6.41it/s, loss=-10.4]

2025-07-21 22:47:23,488 — INFO — layer 1, [1:2483], loss=-22.7082
2025-07-21 22:47:23,488 — INFO — layer 1, [1:2483], loss=-8.4951


epoch 1, layer 1:  23%|████████████████▎                                                      | 2533/11016 [06:26<20:22,  6.94it/s, loss=-9.57]

2025-07-21 22:47:30,937 — INFO — layer 1, [1:2533], loss=-14.5350
2025-07-21 22:47:30,937 — INFO — layer 1, [1:2533], loss=-11.6562


epoch 1, layer 1:  23%|████████████████▋                                                      | 2583/11016 [06:33<22:17,  6.31it/s, loss=-13.8]

2025-07-21 22:47:38,531 — INFO — layer 1, [1:2583], loss=-4.9514
2025-07-21 22:47:38,531 — INFO — layer 1, [1:2583], loss=-17.4023


epoch 1, layer 1:  24%|████████████████▉                                                      | 2633/11016 [06:41<21:58,  6.36it/s, loss=-9.51]

2025-07-21 22:47:46,085 — INFO — layer 1, [1:2633], loss=-5.1296
2025-07-21 22:47:46,085 — INFO — layer 1, [1:2633], loss=0.9027


epoch 1, layer 1:  24%|█████████████████▎                                                     | 2683/11016 [06:49<20:20,  6.83it/s, loss=-13.2]

2025-07-21 22:47:53,819 — INFO — layer 1, [1:2683], loss=-9.7497
2025-07-21 22:47:53,819 — INFO — layer 1, [1:2683], loss=-15.9176


epoch 1, layer 1:  25%|█████████████████▌                                                     | 2733/11016 [06:56<21:19,  6.47it/s, loss=-10.8]

2025-07-21 22:48:01,344 — INFO — layer 1, [1:2733], loss=-3.5507
2025-07-21 22:48:01,344 — INFO — layer 1, [1:2733], loss=-12.4756


epoch 1, layer 1:  25%|█████████████████▉                                                     | 2783/11016 [07:04<20:26,  6.71it/s, loss=-17.6]

2025-07-21 22:48:08,998 — INFO — layer 1, [1:2783], loss=-13.2776
2025-07-21 22:48:08,999 — INFO — layer 1, [1:2783], loss=-16.2603


epoch 1, layer 1:  26%|██████████████████▎                                                    | 2833/11016 [07:11<20:06,  6.78it/s, loss=-6.27]

2025-07-21 22:48:16,477 — INFO — layer 1, [1:2833], loss=-10.9928
2025-07-21 22:48:16,477 — INFO — layer 1, [1:2833], loss=-9.7781


epoch 1, layer 1:  26%|███████████████████▎                                                      | 2883/11016 [07:19<20:29,  6.61it/s, loss=-4]

2025-07-21 22:48:24,132 — INFO — layer 1, [1:2883], loss=-1.5336
2025-07-21 22:48:24,132 — INFO — layer 1, [1:2883], loss=-5.6150


epoch 1, layer 1:  27%|██████████████████▉                                                    | 2933/11016 [07:27<20:38,  6.53it/s, loss=-19.7]

2025-07-21 22:48:31,897 — INFO — layer 1, [1:2933], loss=-15.4867
2025-07-21 22:48:31,897 — INFO — layer 1, [1:2933], loss=-12.1990


epoch 1, layer 1:  27%|███████████████████▏                                                   | 2983/11016 [07:34<19:32,  6.85it/s, loss=-18.5]

2025-07-21 22:48:39,381 — INFO — layer 1, [1:2983], loss=-10.2613
2025-07-21 22:48:39,381 — INFO — layer 1, [1:2983], loss=-13.0314


epoch 1, layer 1:  28%|███████████████████▌                                                   | 3033/11016 [07:42<21:43,  6.12it/s, loss=-9.54]

2025-07-21 22:48:47,010 — INFO — layer 1, [1:3033], loss=-11.7568
2025-07-21 22:48:47,010 — INFO — layer 1, [1:3033], loss=-11.4998


epoch 1, layer 1:  28%|███████████████████▊                                                   | 3083/11016 [07:50<19:54,  6.64it/s, loss=-15.5]

2025-07-21 22:48:54,690 — INFO — layer 1, [1:3083], loss=-13.5400
2025-07-21 22:48:54,690 — INFO — layer 1, [1:3083], loss=-17.6620


epoch 1, layer 1:  28%|████████████████████▏                                                  | 3133/11016 [07:57<18:47,  6.99it/s, loss=-6.08]

2025-07-21 22:49:02,154 — INFO — layer 1, [1:3133], loss=-9.5179
2025-07-21 22:49:02,154 — INFO — layer 1, [1:3133], loss=-8.2622


epoch 1, layer 1:  29%|████████████████████▌                                                  | 3183/11016 [08:05<19:19,  6.75it/s, loss=-9.16]

2025-07-21 22:49:09,790 — INFO — layer 1, [1:3183], loss=-0.1099
2025-07-21 22:49:09,790 — INFO — layer 1, [1:3183], loss=-16.4428


epoch 1, layer 1:  29%|████████████████████▊                                                  | 3233/11016 [08:12<20:36,  6.29it/s, loss=-7.41]

2025-07-21 22:49:17,533 — INFO — layer 1, [1:3233], loss=-5.1755
2025-07-21 22:49:17,533 — INFO — layer 1, [1:3233], loss=-19.9491


epoch 1, layer 1:  30%|█████████████████████▏                                                 | 3283/11016 [08:20<19:38,  6.56it/s, loss=-8.18]

2025-07-21 22:49:25,130 — INFO — layer 1, [1:3283], loss=-14.2730
2025-07-21 22:49:25,130 — INFO — layer 1, [1:3283], loss=-6.5071


epoch 1, layer 1:  30%|█████████████████████▍                                                 | 3333/11016 [08:28<19:04,  6.72it/s, loss=-5.67]

2025-07-21 22:49:32,942 — INFO — layer 1, [1:3333], loss=-12.2455
2025-07-21 22:49:32,942 — INFO — layer 1, [1:3333], loss=-1.2255


epoch 1, layer 1:  31%|█████████████████████▊                                                 | 3383/11016 [08:35<18:36,  6.84it/s, loss=-6.64]

2025-07-21 22:49:40,526 — INFO — layer 1, [1:3383], loss=-9.0049
2025-07-21 22:49:40,526 — INFO — layer 1, [1:3383], loss=-13.8678


epoch 1, layer 1:  31%|██████████████████████▏                                                | 3433/11016 [08:43<18:40,  6.77it/s, loss=-9.38]

2025-07-21 22:49:48,162 — INFO — layer 1, [1:3433], loss=0.1706
2025-07-21 22:49:48,162 — INFO — layer 1, [1:3433], loss=-5.1214


epoch 1, layer 1:  32%|██████████████████████▍                                                | 3483/11016 [08:51<20:27,  6.14it/s, loss=-18.6]

2025-07-21 22:49:55,966 — INFO — layer 1, [1:3483], loss=-7.1552
2025-07-21 22:49:55,966 — INFO — layer 1, [1:3483], loss=2.2621


epoch 1, layer 1:  32%|██████████████████████▊                                                | 3533/11016 [08:59<18:51,  6.62it/s, loss=-11.1]

2025-07-21 22:50:03,595 — INFO — layer 1, [1:3533], loss=-8.0942
2025-07-21 22:50:03,596 — INFO — layer 1, [1:3533], loss=-10.1403


epoch 1, layer 1:  33%|███████████████████████                                                | 3583/11016 [09:06<19:13,  6.44it/s, loss=-9.61]

2025-07-21 22:50:11,181 — INFO — layer 1, [1:3583], loss=0.1673
2025-07-21 22:50:11,181 — INFO — layer 1, [1:3583], loss=-24.9055


epoch 1, layer 1:  33%|███████████████████████▍                                               | 3633/11016 [09:14<18:42,  6.58it/s, loss=-12.3]

2025-07-21 22:50:18,787 — INFO — layer 1, [1:3633], loss=-13.0011
2025-07-21 22:50:18,787 — INFO — layer 1, [1:3633], loss=-11.1451


epoch 1, layer 1:  33%|███████████████████████▋                                               | 3683/11016 [09:21<18:16,  6.69it/s, loss=-9.45]

2025-07-21 22:50:26,267 — INFO — layer 1, [1:3683], loss=-16.9268
2025-07-21 22:50:26,267 — INFO — layer 1, [1:3683], loss=-16.5562


epoch 1, layer 1:  34%|████████████████████████                                               | 3733/11016 [09:29<18:50,  6.44it/s, loss=-15.2]

2025-07-21 22:50:33,864 — INFO — layer 1, [1:3733], loss=-2.7014
2025-07-21 22:50:33,864 — INFO — layer 1, [1:3733], loss=-13.0154


epoch 1, layer 1:  34%|████████████████████████▍                                              | 3783/11016 [09:36<19:21,  6.23it/s, loss=-15.5]

2025-07-21 22:50:41,435 — INFO — layer 1, [1:3783], loss=-15.2470
2025-07-21 22:50:41,435 — INFO — layer 1, [1:3783], loss=-14.9854


epoch 1, layer 1:  35%|████████████████████████▋                                              | 3833/11016 [09:44<17:52,  6.70it/s, loss=-9.26]

2025-07-21 22:50:48,884 — INFO — layer 1, [1:3833], loss=-9.2711
2025-07-21 22:50:48,884 — INFO — layer 1, [1:3833], loss=-13.2345


epoch 1, layer 1:  35%|█████████████████████████                                              | 3883/11016 [09:51<18:14,  6.51it/s, loss=-9.26]

2025-07-21 22:50:56,451 — INFO — layer 1, [1:3883], loss=2.1482
2025-07-21 22:50:56,451 — INFO — layer 1, [1:3883], loss=-9.3684


epoch 1, layer 1:  36%|█████████████████████████▋                                              | 3933/11016 [09:59<18:16,  6.46it/s, loss=-4.6]

2025-07-21 22:51:04,156 — INFO — layer 1, [1:3933], loss=-10.2662
2025-07-21 22:51:04,156 — INFO — layer 1, [1:3933], loss=-16.5538


epoch 1, layer 1:  36%|██████████████████████████▍                                              | 3983/11016 [10:07<17:51,  6.56it/s, loss=-15]

2025-07-21 22:51:11,610 — INFO — layer 1, [1:3983], loss=-20.5841
2025-07-21 22:51:11,610 — INFO — layer 1, [1:3983], loss=-4.5761


epoch 1, layer 1:  37%|█████████████████████████▉                                             | 4033/11016 [10:14<17:08,  6.79it/s, loss=-2.53]

2025-07-21 22:51:19,201 — INFO — layer 1, [1:4033], loss=-11.7003
2025-07-21 22:51:19,201 — INFO — layer 1, [1:4033], loss=-6.4412


epoch 1, layer 1:  37%|██████████████████████████▎                                            | 4083/11016 [10:22<17:38,  6.55it/s, loss=0.629]

2025-07-21 22:51:26,795 — INFO — layer 1, [1:4083], loss=-15.1859
2025-07-21 22:51:26,795 — INFO — layer 1, [1:4083], loss=-9.3839


epoch 1, layer 1:  38%|██████████████████████████▋                                            | 4133/11016 [10:29<17:19,  6.62it/s, loss=-11.6]

2025-07-21 22:51:34,368 — INFO — layer 1, [1:4133], loss=-6.9134
2025-07-21 22:51:34,368 — INFO — layer 1, [1:4133], loss=-13.0984


epoch 1, layer 1:  38%|██████████████████████████▉                                            | 4183/11016 [10:37<18:03,  6.30it/s, loss=-19.3]

2025-07-21 22:51:41,998 — INFO — layer 1, [1:4183], loss=-5.8717
2025-07-21 22:51:41,998 — INFO — layer 1, [1:4183], loss=-14.4801


epoch 1, layer 1:  38%|███████████████████████████▎                                           | 4233/11016 [10:44<17:07,  6.60it/s, loss=-17.1]

2025-07-21 22:51:49,564 — INFO — layer 1, [1:4233], loss=-17.0655
2025-07-21 22:51:49,564 — INFO — layer 1, [1:4233], loss=-19.3430


epoch 1, layer 1:  39%|███████████████████████████▌                                           | 4283/11016 [10:52<16:45,  6.70it/s, loss=-3.64]

2025-07-21 22:51:56,945 — INFO — layer 1, [1:4283], loss=-4.8779
2025-07-21 22:51:56,945 — INFO — layer 1, [1:4283], loss=-13.3504


epoch 1, layer 1:  39%|███████████████████████████▉                                           | 4333/11016 [11:00<18:04,  6.16it/s, loss=-11.2]

2025-07-21 22:52:04,616 — INFO — layer 1, [1:4333], loss=-22.0742
2025-07-21 22:52:04,616 — INFO — layer 1, [1:4333], loss=-8.8927


epoch 1, layer 1:  40%|████████████████████████████▏                                          | 4383/11016 [11:07<16:49,  6.57it/s, loss=-6.09]

2025-07-21 22:52:12,351 — INFO — layer 1, [1:4383], loss=-12.4243
2025-07-21 22:52:12,351 — INFO — layer 1, [1:4383], loss=-16.4852


epoch 1, layer 1:  40%|████████████████████████████▌                                          | 4433/11016 [11:15<16:51,  6.51it/s, loss=-10.5]

2025-07-21 22:52:19,860 — INFO — layer 1, [1:4433], loss=-16.5838
2025-07-21 22:52:19,860 — INFO — layer 1, [1:4433], loss=-9.8408


epoch 1, layer 1:  41%|████████████████████████████▉                                          | 4483/11016 [11:22<16:20,  6.66it/s, loss=-11.8]

2025-07-21 22:52:27,515 — INFO — layer 1, [1:4483], loss=-16.3118
2025-07-21 22:52:27,515 — INFO — layer 1, [1:4483], loss=-9.4737


epoch 1, layer 1:  41%|█████████████████████████████▏                                         | 4533/11016 [11:30<16:10,  6.68it/s, loss=-16.3]

2025-07-21 22:52:35,323 — INFO — layer 1, [1:4533], loss=-8.0331
2025-07-21 22:52:35,323 — INFO — layer 1, [1:4533], loss=-14.3023


epoch 1, layer 1:  42%|█████████████████████████████▌                                         | 4583/11016 [11:38<16:47,  6.39it/s, loss=-8.77]

2025-07-21 22:52:43,109 — INFO — layer 1, [1:4583], loss=-2.5741
2025-07-21 22:52:43,109 — INFO — layer 1, [1:4583], loss=-17.5491


epoch 1, layer 1:  42%|█████████████████████████████▊                                         | 4633/11016 [11:46<16:41,  6.37it/s, loss=-6.14]

2025-07-21 22:52:50,861 — INFO — layer 1, [1:4633], loss=-15.7461
2025-07-21 22:52:50,861 — INFO — layer 1, [1:4633], loss=-21.3272


epoch 1, layer 1:  43%|██████████████████████████████▏                                        | 4683/11016 [11:53<16:19,  6.47it/s, loss=-13.5]

2025-07-21 22:52:58,548 — INFO — layer 1, [1:4683], loss=-5.6305
2025-07-21 22:52:58,548 — INFO — layer 1, [1:4683], loss=-11.9008


epoch 1, layer 1:  43%|███████████████████████████████▎                                         | 4733/11016 [12:01<14:42,  7.12it/s, loss=-13]

2025-07-21 22:53:06,127 — INFO — layer 1, [1:4733], loss=-3.9654
2025-07-21 22:53:06,127 — INFO — layer 1, [1:4733], loss=-10.7886


epoch 1, layer 1:  43%|██████████████████████████████▊                                        | 4783/11016 [12:09<16:07,  6.44it/s, loss=-13.2]

2025-07-21 22:53:13,836 — INFO — layer 1, [1:4783], loss=-5.9145
2025-07-21 22:53:13,836 — INFO — layer 1, [1:4783], loss=-11.8354


epoch 1, layer 1:  44%|███████████████████████████████▏                                       | 4833/11016 [12:16<17:09,  6.00it/s, loss=-8.43]

2025-07-21 22:53:21,406 — INFO — layer 1, [1:4833], loss=-9.8127
2025-07-21 22:53:21,406 — INFO — layer 1, [1:4833], loss=-7.7144


epoch 1, layer 1:  44%|████████████████████████████████▎                                        | 4883/11016 [12:24<14:36,  6.99it/s, loss=-10]

2025-07-21 22:53:28,849 — INFO — layer 1, [1:4883], loss=-17.4154
2025-07-21 22:53:28,849 — INFO — layer 1, [1:4883], loss=-16.1980


epoch 1, layer 1:  45%|███████████████████████████████▊                                       | 4933/11016 [12:31<14:53,  6.81it/s, loss=-16.4]

2025-07-21 22:53:36,419 — INFO — layer 1, [1:4933], loss=-1.4695
2025-07-21 22:53:36,419 — INFO — layer 1, [1:4933], loss=-11.6817


epoch 1, layer 1:  45%|████████████████████████████████                                       | 4983/11016 [12:39<15:55,  6.31it/s, loss=-20.9]

2025-07-21 22:53:43,969 — INFO — layer 1, [1:4983], loss=-9.4887
2025-07-21 22:53:43,969 — INFO — layer 1, [1:4983], loss=-7.4999


epoch 1, layer 1:  46%|████████████████████████████████▍                                      | 5033/11016 [12:46<14:53,  6.69it/s, loss=-19.7]

2025-07-21 22:53:51,480 — INFO — layer 1, [1:5033], loss=-13.2963
2025-07-21 22:53:51,480 — INFO — layer 1, [1:5033], loss=-7.1213


epoch 1, layer 1:  46%|████████████████████████████████▊                                      | 5083/11016 [12:54<15:19,  6.45it/s, loss=-10.9]

2025-07-21 22:53:59,137 — INFO — layer 1, [1:5083], loss=-9.2691
2025-07-21 22:53:59,137 — INFO — layer 1, [1:5083], loss=-14.8700


epoch 1, layer 1:  47%|█████████████████████████████████                                      | 5133/11016 [13:02<15:15,  6.43it/s, loss=-4.62]

2025-07-21 22:54:06,761 — INFO — layer 1, [1:5133], loss=-1.3584
2025-07-21 22:54:06,761 — INFO — layer 1, [1:5133], loss=-5.7864


epoch 1, layer 1:  47%|█████████████████████████████████▍                                     | 5183/11016 [13:09<14:16,  6.81it/s, loss=-13.7]

2025-07-21 22:54:14,375 — INFO — layer 1, [1:5183], loss=-13.0450
2025-07-21 22:54:14,375 — INFO — layer 1, [1:5183], loss=-12.1870


epoch 1, layer 1:  48%|█████████████████████████████████▋                                     | 5233/11016 [13:17<14:26,  6.67it/s, loss=-5.82]

2025-07-21 22:54:21,906 — INFO — layer 1, [1:5233], loss=-10.3204


epoch 1, layer 1:  48%|█████████████████████████████████▋                                     | 5234/11016 [13:17<14:56,  6.45it/s, loss=-10.3]

2025-07-21 22:54:21,906 — INFO — layer 1, [1:5233], loss=-12.7631


epoch 1, layer 1:  48%|██████████████████████████████████                                     | 5283/11016 [13:25<15:42,  6.08it/s, loss=-12.3]

2025-07-21 22:54:29,610 — INFO — layer 1, [1:5283], loss=-6.1377
2025-07-21 22:54:29,610 — INFO — layer 1, [1:5283], loss=-12.6896


epoch 1, layer 1:  48%|██████████████████████████████████▎                                    | 5333/11016 [13:32<14:30,  6.53it/s, loss=-10.7]

2025-07-21 22:54:37,297 — INFO — layer 1, [1:5333], loss=-17.2903
2025-07-21 22:54:37,297 — INFO — layer 1, [1:5333], loss=-16.5055


epoch 1, layer 1:  49%|██████████████████████████████████▋                                    | 5383/11016 [13:40<13:57,  6.73it/s, loss=-13.6]

2025-07-21 22:54:44,937 — INFO — layer 1, [1:5383], loss=-16.5400
2025-07-21 22:54:44,937 — INFO — layer 1, [1:5383], loss=-17.6955


epoch 1, layer 1:  49%|███████████████████████████████████                                    | 5433/11016 [13:48<15:10,  6.13it/s, loss=-14.8]

2025-07-21 22:54:52,871 — INFO — layer 1, [1:5433], loss=-11.6274
2025-07-21 22:54:52,871 — INFO — layer 1, [1:5433], loss=-9.7706


epoch 1, layer 1:  50%|███████████████████████████████████▎                                   | 5483/11016 [13:55<13:23,  6.89it/s, loss=-12.5]

2025-07-21 22:55:00,371 — INFO — layer 1, [1:5483], loss=-5.6778
2025-07-21 22:55:00,371 — INFO — layer 1, [1:5483], loss=-11.5129


epoch 1, layer 1:  50%|███████████████████████████████████▋                                   | 5533/11016 [14:03<13:41,  6.67it/s, loss=-17.2]

2025-07-21 22:55:07,995 — INFO — layer 1, [1:5533], loss=-6.7511
2025-07-21 22:55:07,995 — INFO — layer 1, [1:5533], loss=-5.4349


epoch 1, layer 1:  51%|███████████████████████████████████▉                                   | 5583/11016 [14:10<12:49,  7.06it/s, loss=-13.7]

2025-07-21 22:55:15,447 — INFO — layer 1, [1:5583], loss=-9.5198
2025-07-21 22:55:15,447 — INFO — layer 1, [1:5583], loss=-12.5496


epoch 1, layer 1:  51%|████████████████████████████████████▎                                  | 5633/11016 [14:18<13:56,  6.43it/s, loss=-12.1]

2025-07-21 22:55:23,059 — INFO — layer 1, [1:5633], loss=-12.4387
2025-07-21 22:55:23,060 — INFO — layer 1, [1:5633], loss=-16.9990


epoch 1, layer 1:  52%|████████████████████████████████████▋                                  | 5683/11016 [14:26<13:20,  6.66it/s, loss=-9.14]

2025-07-21 22:55:30,642 — INFO — layer 1, [1:5683], loss=-18.7292
2025-07-21 22:55:30,642 — INFO — layer 1, [1:5683], loss=-7.4671


epoch 1, layer 1:  52%|████████████████████████████████████▉                                  | 5733/11016 [14:33<13:58,  6.30it/s, loss=-18.3]

2025-07-21 22:55:38,213 — INFO — layer 1, [1:5733], loss=-18.3224
2025-07-21 22:55:38,213 — INFO — layer 1, [1:5733], loss=-16.5373


epoch 1, layer 1:  52%|█████████████████████████████████████▎                                 | 5783/11016 [14:41<13:17,  6.56it/s, loss=-6.88]

2025-07-21 22:55:45,697 — INFO — layer 1, [1:5783], loss=-7.3484
2025-07-21 22:55:45,697 — INFO — layer 1, [1:5783], loss=-8.7699


epoch 1, layer 1:  53%|█████████████████████████████████████▌                                 | 5833/11016 [14:48<13:23,  6.45it/s, loss=-7.73]

2025-07-21 22:55:53,492 — INFO — layer 1, [1:5833], loss=-14.4460
2025-07-21 22:55:53,492 — INFO — layer 1, [1:5833], loss=-14.3965


epoch 1, layer 1:  53%|█████████████████████████████████████▉                                 | 5883/11016 [14:56<13:01,  6.57it/s, loss=-15.3]

2025-07-21 22:56:01,167 — INFO — layer 1, [1:5883], loss=-6.1523
2025-07-21 22:56:01,167 — INFO — layer 1, [1:5883], loss=-6.2791


epoch 1, layer 1:  54%|██████████████████████████████████████▏                                | 5933/11016 [15:04<12:31,  6.76it/s, loss=-10.3]

2025-07-21 22:56:08,826 — INFO — layer 1, [1:5933], loss=-19.9582
2025-07-21 22:56:08,826 — INFO — layer 1, [1:5933], loss=-3.2652


epoch 1, layer 1:  54%|███████████████████████████████████████▋                                 | 5983/11016 [15:11<12:47,  6.56it/s, loss=-17]

2025-07-21 22:56:16,453 — INFO — layer 1, [1:5983], loss=-20.8107
2025-07-21 22:56:16,453 — INFO — layer 1, [1:5983], loss=-14.6564


epoch 1, layer 1:  55%|██████████████████████████████████████▉                                | 6033/11016 [15:19<12:00,  6.92it/s, loss=-12.2]

2025-07-21 22:56:24,046 — INFO — layer 1, [1:6033], loss=-8.4215
2025-07-21 22:56:24,046 — INFO — layer 1, [1:6033], loss=-8.9701


epoch 1, layer 1:  55%|███████████████████████████████████████▏                               | 6083/11016 [15:27<13:02,  6.31it/s, loss=-12.5]

2025-07-21 22:56:31,626 — INFO — layer 1, [1:6083], loss=-15.0266
2025-07-21 22:56:31,626 — INFO — layer 1, [1:6083], loss=0.8998


epoch 1, layer 1:  56%|███████████████████████████████████████▌                               | 6133/11016 [15:34<12:43,  6.39it/s, loss=-8.96]

2025-07-21 22:56:39,201 — INFO — layer 1, [1:6133], loss=-19.9901
2025-07-21 22:56:39,201 — INFO — layer 1, [1:6133], loss=-11.6966


epoch 1, layer 1:  56%|███████████████████████████████████████▊                               | 6183/11016 [15:42<12:02,  6.69it/s, loss=-13.7]

2025-07-21 22:56:46,736 — INFO — layer 1, [1:6183], loss=-19.0761
2025-07-21 22:56:46,736 — INFO — layer 1, [1:6183], loss=-17.0567


epoch 1, layer 1:  57%|████████████████████████████████████████▏                              | 6233/11016 [15:49<12:04,  6.60it/s, loss=-17.3]

2025-07-21 22:56:54,332 — INFO — layer 1, [1:6233], loss=-8.5501
2025-07-21 22:56:54,332 — INFO — layer 1, [1:6233], loss=-12.0226


epoch 1, layer 1:  57%|████████████████████████████████████████▍                              | 6283/11016 [15:57<12:50,  6.15it/s, loss=-13.8]

2025-07-21 22:57:02,044 — INFO — layer 1, [1:6283], loss=-4.4112
2025-07-21 22:57:02,044 — INFO — layer 1, [1:6283], loss=-7.3598


epoch 1, layer 1:  57%|████████████████████████████████████████▊                              | 6333/11016 [16:05<12:17,  6.35it/s, loss=-2.54]

2025-07-21 22:57:09,706 — INFO — layer 1, [1:6333], loss=-14.4891
2025-07-21 22:57:09,706 — INFO — layer 1, [1:6333], loss=-17.0920


epoch 1, layer 1:  58%|█████████████████████████████████████████▏                             | 6383/11016 [16:12<11:41,  6.60it/s, loss=-20.8]

2025-07-21 22:57:17,394 — INFO — layer 1, [1:6383], loss=-14.5842
2025-07-21 22:57:17,394 — INFO — layer 1, [1:6383], loss=-10.4780


epoch 1, layer 1:  58%|█████████████████████████████████████████▍                             | 6433/11016 [16:20<11:20,  6.73it/s, loss=-6.89]

2025-07-21 22:57:25,045 — INFO — layer 1, [1:6433], loss=-15.1051
2025-07-21 22:57:25,045 — INFO — layer 1, [1:6433], loss=-12.6886


epoch 1, layer 1:  59%|█████████████████████████████████████████▊                             | 6483/11016 [16:27<11:18,  6.68it/s, loss=-4.39]

2025-07-21 22:57:32,510 — INFO — layer 1, [1:6483], loss=-12.4711
2025-07-21 22:57:32,510 — INFO — layer 1, [1:6483], loss=-20.7077


epoch 1, layer 1:  59%|██████████████████████████████████████████                             | 6533/11016 [16:35<11:19,  6.60it/s, loss=-12.2]

2025-07-21 22:57:40,020 — INFO — layer 1, [1:6533], loss=-11.7456
2025-07-21 22:57:40,020 — INFO — layer 1, [1:6533], loss=-15.4426


epoch 1, layer 1:  60%|███████████████████████████████████████████▌                             | 6583/11016 [16:43<11:43,  6.30it/s, loss=-14]

2025-07-21 22:57:47,653 — INFO — layer 1, [1:6583], loss=-17.9108
2025-07-21 22:57:47,653 — INFO — layer 1, [1:6583], loss=-17.4861


epoch 1, layer 1:  60%|██████████████████████████████████████████▊                            | 6633/11016 [16:50<11:27,  6.37it/s, loss=-9.37]

2025-07-21 22:57:55,258 — INFO — layer 1, [1:6633], loss=-12.5811
2025-07-21 22:57:55,258 — INFO — layer 1, [1:6633], loss=-16.5484


epoch 1, layer 1:  61%|███████████████████████████████████████████                            | 6683/11016 [16:58<11:13,  6.43it/s, loss=-8.69]

2025-07-21 22:58:02,961 — INFO — layer 1, [1:6683], loss=-12.8255
2025-07-21 22:58:02,961 — INFO — layer 1, [1:6683], loss=-16.1270


epoch 1, layer 1:  61%|███████████████████████████████████████████▍                           | 6733/11016 [17:05<10:54,  6.54it/s, loss=-20.7]

2025-07-21 22:58:10,565 — INFO — layer 1, [1:6733], loss=-14.2984
2025-07-21 22:58:10,565 — INFO — layer 1, [1:6733], loss=-9.9891


epoch 1, layer 1:  62%|███████████████████████████████████████████▋                           | 6783/11016 [17:13<10:32,  6.70it/s, loss=-11.6]

2025-07-21 22:58:18,098 — INFO — layer 1, [1:6783], loss=-13.5907
2025-07-21 22:58:18,098 — INFO — layer 1, [1:6783], loss=-3.3714


epoch 1, layer 1:  62%|████████████████████████████████████████████                           | 6833/11016 [17:21<10:54,  6.39it/s, loss=-5.14]

2025-07-21 22:58:25,721 — INFO — layer 1, [1:6833], loss=-18.3313
2025-07-21 22:58:25,721 — INFO — layer 1, [1:6833], loss=-10.7964


epoch 1, layer 1:  62%|████████████████████████████████████████████▎                          | 6883/11016 [17:28<10:48,  6.37it/s, loss=-15.9]

2025-07-21 22:58:33,424 — INFO — layer 1, [1:6883], loss=-15.3606
2025-07-21 22:58:33,424 — INFO — layer 1, [1:6883], loss=-6.5652


epoch 1, layer 1:  63%|████████████████████████████████████████████▋                          | 6933/11016 [17:36<10:20,  6.58it/s, loss=-16.6]

2025-07-21 22:58:41,050 — INFO — layer 1, [1:6933], loss=-11.1003
2025-07-21 22:58:41,050 — INFO — layer 1, [1:6933], loss=-4.8657


epoch 1, layer 1:  63%|█████████████████████████████████████████████                          | 6983/11016 [17:44<10:35,  6.35it/s, loss=-15.7]

2025-07-21 22:58:48,862 — INFO — layer 1, [1:6983], loss=-20.0507
2025-07-21 22:58:48,862 — INFO — layer 1, [1:6983], loss=-22.0974


epoch 1, layer 1:  64%|██████████████████████████████████████████████▌                          | 7033/11016 [17:52<10:50,  6.13it/s, loss=-22]

2025-07-21 22:58:56,877 — INFO — layer 1, [1:7033], loss=-7.3932
2025-07-21 22:58:56,877 — INFO — layer 1, [1:7033], loss=-17.8961


epoch 1, layer 1:  64%|█████████████████████████████████████████████▋                         | 7083/11016 [17:59<09:24,  6.97it/s, loss=-16.7]

2025-07-21 22:59:04,501 — INFO — layer 1, [1:7083], loss=-6.8519
2025-07-21 22:59:04,501 — INFO — layer 1, [1:7083], loss=-7.8660


epoch 1, layer 1:  65%|█████████████████████████████████████████████▉                         | 7133/11016 [18:07<09:29,  6.81it/s, loss=-2.02]

2025-07-21 22:59:12,203 — INFO — layer 1, [1:7133], loss=-15.2486
2025-07-21 22:59:12,203 — INFO — layer 1, [1:7133], loss=-13.7460


epoch 1, layer 1:  65%|██████████████████████████████████████████████▎                        | 7183/11016 [18:15<09:23,  6.81it/s, loss=-13.3]

2025-07-21 22:59:19,727 — INFO — layer 1, [1:7183], loss=-9.8623
2025-07-21 22:59:19,728 — INFO — layer 1, [1:7183], loss=-9.3540


epoch 1, layer 1:  66%|██████████████████████████████████████████████▌                        | 7233/11016 [18:22<09:14,  6.82it/s, loss=-6.54]

2025-07-21 22:59:27,315 — INFO — layer 1, [1:7233], loss=-7.0390
2025-07-21 22:59:27,315 — INFO — layer 1, [1:7233], loss=-13.4298


epoch 1, layer 1:  66%|██████████████████████████████████████████████▉                        | 7283/11016 [18:30<09:51,  6.31it/s, loss=-15.2]

2025-07-21 22:59:35,084 — INFO — layer 1, [1:7283], loss=-6.8660
2025-07-21 22:59:35,084 — INFO — layer 1, [1:7283], loss=-14.6667


epoch 1, layer 1:  67%|███████████████████████████████████████████████▎                       | 7333/11016 [18:38<09:18,  6.60it/s, loss=-18.6]

2025-07-21 22:59:42,662 — INFO — layer 1, [1:7333], loss=-15.6377
2025-07-21 22:59:42,662 — INFO — layer 1, [1:7333], loss=-14.3593


epoch 1, layer 1:  67%|███████████████████████████████████████████████▌                       | 7383/11016 [18:45<09:27,  6.40it/s, loss=-11.3]

2025-07-21 22:59:50,444 — INFO — layer 1, [1:7383], loss=-24.6991
2025-07-21 22:59:50,444 — INFO — layer 1, [1:7383], loss=-16.7730


epoch 1, layer 1:  67%|███████████████████████████████████████████████▉                       | 7433/11016 [18:53<09:01,  6.62it/s, loss=-9.24]

2025-07-21 22:59:57,875 — INFO — layer 1, [1:7433], loss=-20.1077
2025-07-21 22:59:57,875 — INFO — layer 1, [1:7433], loss=-11.8202


epoch 1, layer 1:  68%|████████████████████████████████████████████████▏                      | 7483/11016 [19:00<09:56,  5.92it/s, loss=-10.7]

2025-07-21 23:00:05,536 — INFO — layer 1, [1:7483], loss=-12.3091
2025-07-21 23:00:05,536 — INFO — layer 1, [1:7483], loss=-13.6675


epoch 1, layer 1:  68%|████████████████████████████████████████████████▌                      | 7533/11016 [19:08<08:42,  6.67it/s, loss=-13.3]

2025-07-21 23:00:13,092 — INFO — layer 1, [1:7533], loss=-7.4290
2025-07-21 23:00:13,092 — INFO — layer 1, [1:7533], loss=-11.7596


epoch 1, layer 1:  69%|████████████████████████████████████████████████▊                      | 7583/11016 [19:16<08:47,  6.51it/s, loss=-14.3]

2025-07-21 23:00:20,696 — INFO — layer 1, [1:7583], loss=-11.4829
2025-07-21 23:00:20,696 — INFO — layer 1, [1:7583], loss=-8.0446


epoch 1, layer 1:  69%|█████████████████████████████████████████████████▏                     | 7633/11016 [19:23<07:52,  7.17it/s, loss=-9.93]

2025-07-21 23:00:28,112 — INFO — layer 1, [1:7633], loss=-23.2620
2025-07-21 23:00:28,112 — INFO — layer 1, [1:7633], loss=-21.8081


epoch 1, layer 1:  70%|█████████████████████████████████████████████████▌                     | 7683/11016 [19:31<08:29,  6.54it/s, loss=-24.2]

2025-07-21 23:00:35,693 — INFO — layer 1, [1:7683], loss=-4.8206
2025-07-21 23:00:35,693 — INFO — layer 1, [1:7683], loss=-9.1522


epoch 1, layer 1:  70%|█████████████████████████████████████████████████▊                     | 7733/11016 [19:38<08:37,  6.34it/s, loss=-22.5]

2025-07-21 23:00:43,344 — INFO — layer 1, [1:7733], loss=-9.0928
2025-07-21 23:00:43,344 — INFO — layer 1, [1:7733], loss=-12.8969


epoch 1, layer 1:  71%|██████████████████████████████████████████████████▏                    | 7783/11016 [19:46<07:38,  7.05it/s, loss=-1.69]

2025-07-21 23:00:51,002 — INFO — layer 1, [1:7783], loss=-16.8554
2025-07-21 23:00:51,002 — INFO — layer 1, [1:7783], loss=-16.7570


epoch 1, layer 1:  71%|██████████████████████████████████████████████████▍                    | 7833/11016 [19:53<07:48,  6.80it/s, loss=-17.5]

2025-07-21 23:00:58,469 — INFO — layer 1, [1:7833], loss=-18.3111
2025-07-21 23:00:58,469 — INFO — layer 1, [1:7833], loss=-5.0418


epoch 1, layer 1:  72%|██████████████████████████████████████████████████▊                    | 7883/11016 [20:01<07:47,  6.70it/s, loss=-18.7]

2025-07-21 23:01:05,915 — INFO — layer 1, [1:7883], loss=-17.4860
2025-07-21 23:01:05,915 — INFO — layer 1, [1:7883], loss=-17.3849


epoch 1, layer 1:  72%|███████████████████████████████████████████████████▏                   | 7933/11016 [20:08<07:19,  7.01it/s, loss=-23.6]

2025-07-21 23:01:13,521 — INFO — layer 1, [1:7933], loss=-17.6905
2025-07-21 23:01:13,521 — INFO — layer 1, [1:7933], loss=-17.1947


epoch 1, layer 1:  72%|███████████████████████████████████████████████████▍                   | 7983/11016 [20:16<08:09,  6.20it/s, loss=-7.87]

2025-07-21 23:01:21,354 — INFO — layer 1, [1:7983], loss=-23.3376
2025-07-21 23:01:21,354 — INFO — layer 1, [1:7983], loss=-10.4557


epoch 1, layer 1:  73%|███████████████████████████████████████████████████▊                   | 8033/11016 [20:24<07:44,  6.42it/s, loss=-10.1]

2025-07-21 23:01:28,856 — INFO — layer 1, [1:8033], loss=-10.3615
2025-07-21 23:01:28,856 — INFO — layer 1, [1:8033], loss=-0.6393


epoch 1, layer 1:  73%|████████████████████████████████████████████████████                   | 8083/11016 [20:31<07:01,  6.96it/s, loss=-4.37]

2025-07-21 23:01:36,483 — INFO — layer 1, [1:8083], loss=-21.4180
2025-07-21 23:01:36,483 — INFO — layer 1, [1:8083], loss=-3.9788


epoch 1, layer 1:  74%|████████████████████████████████████████████████████▍                  | 8133/11016 [20:39<07:13,  6.65it/s, loss=-7.74]

2025-07-21 23:01:44,033 — INFO — layer 1, [1:8133], loss=-12.8120
2025-07-21 23:01:44,033 — INFO — layer 1, [1:8133], loss=-11.4579


epoch 1, layer 1:  74%|█████████████████████████████████████████████████████▍                  | 8183/11016 [20:47<07:25,  6.36it/s, loss=-2.2]

2025-07-21 23:01:51,749 — INFO — layer 1, [1:8183], loss=-3.8723
2025-07-21 23:01:51,749 — INFO — layer 1, [1:8183], loss=-20.0292


epoch 1, layer 1:  75%|█████████████████████████████████████████████████████                  | 8233/11016 [20:55<07:24,  6.27it/s, loss=-13.9]

2025-07-21 23:01:59,590 — INFO — layer 1, [1:8233], loss=-14.4900
2025-07-21 23:01:59,590 — INFO — layer 1, [1:8233], loss=-6.4741


epoch 1, layer 1:  75%|██████████████████████████████████████████████████████▉                  | 8283/11016 [21:02<06:49,  6.68it/s, loss=-18]

2025-07-21 23:02:07,203 — INFO — layer 1, [1:8283], loss=-14.2899
2025-07-21 23:02:07,204 — INFO — layer 1, [1:8283], loss=-8.4661


epoch 1, layer 1:  76%|█████████████████████████████████████████████████████▋                 | 8333/11016 [21:10<07:10,  6.23it/s, loss=-17.3]

2025-07-21 23:02:14,939 — INFO — layer 1, [1:8333], loss=-13.7819
2025-07-21 23:02:14,939 — INFO — layer 1, [1:8333], loss=-12.7582


epoch 1, layer 1:  76%|██████████████████████████████████████████████████████                 | 8383/11016 [21:17<06:39,  6.59it/s, loss=-7.27]

2025-07-21 23:02:22,436 — INFO — layer 1, [1:8383], loss=-8.9501
2025-07-21 23:02:22,436 — INFO — layer 1, [1:8383], loss=-3.2881


epoch 1, layer 1:  77%|██████████████████████████████████████████████████████▎                | 8433/11016 [21:25<06:56,  6.20it/s, loss=-16.7]

2025-07-21 23:02:30,226 — INFO — layer 1, [1:8433], loss=-0.5749
2025-07-21 23:02:30,226 — INFO — layer 1, [1:8433], loss=-23.2462


epoch 1, layer 1:  77%|██████████████████████████████████████████████████████▋                | 8483/11016 [21:33<06:35,  6.41it/s, loss=-13.9]

2025-07-21 23:02:37,904 — INFO — layer 1, [1:8483], loss=-12.3092
2025-07-21 23:02:37,905 — INFO — layer 1, [1:8483], loss=-22.4290


epoch 1, layer 1:  77%|██████████████████████████████████████████████████████▉                | 8533/11016 [21:40<06:13,  6.64it/s, loss=-18.3]

2025-07-21 23:02:45,511 — INFO — layer 1, [1:8533], loss=-14.4328
2025-07-21 23:02:45,511 — INFO — layer 1, [1:8533], loss=-12.5985


epoch 1, layer 1:  78%|███████████████████████████████████████████████████████▎               | 8583/11016 [21:48<06:09,  6.58it/s, loss=-13.9]

2025-07-21 23:02:52,962 — INFO — layer 1, [1:8583], loss=-3.0102
2025-07-21 23:02:52,962 — INFO — layer 1, [1:8583], loss=-11.2516


epoch 1, layer 1:  78%|███████████████████████████████████████████████████████▋               | 8633/11016 [21:55<05:51,  6.78it/s, loss=-6.64]

2025-07-21 23:03:00,564 — INFO — layer 1, [1:8633], loss=-10.5083
2025-07-21 23:03:00,564 — INFO — layer 1, [1:8633], loss=-10.4722


epoch 1, layer 1:  79%|███████████████████████████████████████████████████████▉               | 8683/11016 [22:03<05:42,  6.80it/s, loss=-13.4]

2025-07-21 23:03:08,028 — INFO — layer 1, [1:8683], loss=-13.1698
2025-07-21 23:03:08,028 — INFO — layer 1, [1:8683], loss=-5.4764


epoch 1, layer 1:  79%|████████████████████████████████████████████████████████▎              | 8733/11016 [22:11<06:18,  6.03it/s, loss=-12.5]

2025-07-21 23:03:15,664 — INFO — layer 1, [1:8733], loss=-1.7670
2025-07-21 23:03:15,664 — INFO — layer 1, [1:8733], loss=-10.0082


epoch 1, layer 1:  80%|████████████████████████████████████████████████████████▌              | 8783/11016 [22:18<05:43,  6.50it/s, loss=-4.29]

2025-07-21 23:03:23,366 — INFO — layer 1, [1:8783], loss=-19.7366
2025-07-21 23:03:23,366 — INFO — layer 1, [1:8783], loss=-1.4513


epoch 1, layer 1:  80%|██████████████████████████████████████████████████████████▌              | 8833/11016 [22:26<05:46,  6.30it/s, loss=-11]

2025-07-21 23:03:30,967 — INFO — layer 1, [1:8833], loss=-20.3414
2025-07-21 23:03:30,967 — INFO — layer 1, [1:8833], loss=-12.1975


epoch 1, layer 1:  81%|█████████████████████████████████████████████████████████▎             | 8883/11016 [22:33<05:32,  6.42it/s, loss=-22.8]

2025-07-21 23:03:38,562 — INFO — layer 1, [1:8883], loss=-12.7895
2025-07-21 23:03:38,562 — INFO — layer 1, [1:8883], loss=-15.4256


epoch 1, layer 1:  81%|█████████████████████████████████████████████████████████▌             | 8933/11016 [22:41<05:32,  6.26it/s, loss=-23.9]

2025-07-21 23:03:46,138 — INFO — layer 1, [1:8933], loss=-2.5376
2025-07-21 23:03:46,138 — INFO — layer 1, [1:8933], loss=-10.7488


epoch 1, layer 1:  82%|█████████████████████████████████████████████████████████▉             | 8983/11016 [22:49<05:06,  6.64it/s, loss=-4.89]

2025-07-21 23:03:53,680 — INFO — layer 1, [1:8983], loss=-13.0528
2025-07-21 23:03:53,680 — INFO — layer 1, [1:8983], loss=-5.9989


epoch 1, layer 1:  82%|██████████████████████████████████████████████████████████▏            | 9033/11016 [22:56<04:54,  6.74it/s, loss=-12.3]

2025-07-21 23:04:01,398 — INFO — layer 1, [1:9033], loss=-23.5727
2025-07-21 23:04:01,398 — INFO — layer 1, [1:9033], loss=-6.8011


epoch 1, layer 1:  82%|██████████████████████████████████████████████████████████▌            | 9083/11016 [23:04<05:12,  6.19it/s, loss=-21.3]

2025-07-21 23:04:08,997 — INFO — layer 1, [1:9083], loss=-15.9543
2025-07-21 23:04:08,997 — INFO — layer 1, [1:9083], loss=-0.9075


epoch 1, layer 1:  83%|██████████████████████████████████████████████████████████▊            | 9133/11016 [23:12<04:35,  6.84it/s, loss=-10.8]

2025-07-21 23:04:16,659 — INFO — layer 1, [1:9133], loss=-13.5991
2025-07-21 23:04:16,659 — INFO — layer 1, [1:9133], loss=-6.5985


epoch 1, layer 1:  83%|███████████████████████████████████████████████████████████▏           | 9183/11016 [23:19<04:49,  6.32it/s, loss=-10.5]

2025-07-21 23:04:24,233 — INFO — layer 1, [1:9183], loss=-0.6972
2025-07-21 23:04:24,233 — INFO — layer 1, [1:9183], loss=-18.1872


epoch 1, layer 1:  84%|███████████████████████████████████████████████████████████▌           | 9233/11016 [23:27<04:28,  6.63it/s, loss=-14.3]

2025-07-21 23:04:31,660 — INFO — layer 1, [1:9233], loss=-14.9100
2025-07-21 23:04:31,660 — INFO — layer 1, [1:9233], loss=-10.1968


epoch 1, layer 1:  84%|███████████████████████████████████████████████████████████▊           | 9283/11016 [23:34<04:28,  6.45it/s, loss=-9.91]

2025-07-21 23:04:39,395 — INFO — layer 1, [1:9283], loss=-8.3630
2025-07-21 23:04:39,395 — INFO — layer 1, [1:9283], loss=-14.9545


epoch 1, layer 1:  85%|████████████████████████████████████████████████████████████▏          | 9333/11016 [23:42<04:06,  6.83it/s, loss=-15.3]

2025-07-21 23:04:46,897 — INFO — layer 1, [1:9333], loss=-2.3204
2025-07-21 23:04:46,897 — INFO — layer 1, [1:9333], loss=-1.2185


epoch 1, layer 1:  85%|████████████████████████████████████████████████████████████▍          | 9383/11016 [23:49<04:12,  6.46it/s, loss=-16.9]

2025-07-21 23:04:54,489 — INFO — layer 1, [1:9383], loss=-6.4755
2025-07-21 23:04:54,489 — INFO — layer 1, [1:9383], loss=-5.6932


epoch 1, layer 1:  86%|████████████████████████████████████████████████████████████▊          | 9433/11016 [23:57<03:58,  6.65it/s, loss=-8.13]

2025-07-21 23:05:02,153 — INFO — layer 1, [1:9433], loss=-15.0305
2025-07-21 23:05:02,153 — INFO — layer 1, [1:9433], loss=-12.6903


epoch 1, layer 1:  86%|█████████████████████████████████████████████████████████████          | 9483/11016 [24:05<04:08,  6.16it/s, loss=-4.35]

2025-07-21 23:05:09,816 — INFO — layer 1, [1:9483], loss=-17.7551
2025-07-21 23:05:09,816 — INFO — layer 1, [1:9483], loss=-12.9746


epoch 1, layer 1:  87%|█████████████████████████████████████████████████████████████▍         | 9533/11016 [24:13<03:52,  6.38it/s, loss=-12.6]

2025-07-21 23:05:17,663 — INFO — layer 1, [1:9533], loss=-18.1317
2025-07-21 23:05:17,663 — INFO — layer 1, [1:9533], loss=-14.5546


epoch 1, layer 1:  87%|█████████████████████████████████████████████████████████████▊         | 9583/11016 [24:20<03:29,  6.83it/s, loss=-7.35]

2025-07-21 23:05:25,221 — INFO — layer 1, [1:9583], loss=-17.7227
2025-07-21 23:05:25,221 — INFO — layer 1, [1:9583], loss=-11.5854


epoch 1, layer 1:  87%|██████████████████████████████████████████████████████████████         | 9633/11016 [24:28<03:24,  6.77it/s, loss=-19.7]

2025-07-21 23:05:32,663 — INFO — layer 1, [1:9633], loss=-8.7265
2025-07-21 23:05:32,663 — INFO — layer 1, [1:9633], loss=-7.7114


epoch 1, layer 1:  88%|██████████████████████████████████████████████████████████████▍        | 9683/11016 [24:35<03:14,  6.85it/s, loss=-19.3]

2025-07-21 23:05:40,299 — INFO — layer 1, [1:9683], loss=-17.8283
2025-07-21 23:05:40,299 — INFO — layer 1, [1:9683], loss=-20.1234


epoch 1, layer 1:  88%|██████████████████████████████████████████████████████████████▋        | 9733/11016 [24:43<03:09,  6.77it/s, loss=-23.5]

2025-07-21 23:05:47,737 — INFO — layer 1, [1:9733], loss=-9.9860
2025-07-21 23:05:47,737 — INFO — layer 1, [1:9733], loss=-10.0966


epoch 1, layer 1:  89%|███████████████████████████████████████████████████████████████        | 9783/11016 [24:50<03:02,  6.76it/s, loss=-17.2]

2025-07-21 23:05:55,282 — INFO — layer 1, [1:9783], loss=-8.1222
2025-07-21 23:05:55,283 — INFO — layer 1, [1:9783], loss=-14.9330


epoch 1, layer 1:  89%|█████████████████████████████████████████████████████████████████▏       | 9833/11016 [24:58<03:04,  6.43it/s, loss=-15]

2025-07-21 23:06:03,010 — INFO — layer 1, [1:9833], loss=-2.8504
2025-07-21 23:06:03,010 — INFO — layer 1, [1:9833], loss=-13.3986


epoch 1, layer 1:  90%|███████████████████████████████████████████████████████████████▋       | 9883/11016 [25:06<02:52,  6.55it/s, loss=-12.6]

2025-07-21 23:06:10,668 — INFO — layer 1, [1:9883], loss=-16.8473
2025-07-21 23:06:10,668 — INFO — layer 1, [1:9883], loss=-9.7094


epoch 1, layer 1:  90%|████████████████████████████████████████████████████████████████       | 9933/11016 [25:13<02:49,  6.40it/s, loss=-13.3]

2025-07-21 23:06:18,271 — INFO — layer 1, [1:9933], loss=-21.2149
2025-07-21 23:06:18,271 — INFO — layer 1, [1:9933], loss=-19.0214


epoch 1, layer 1:  91%|████████████████████████████████████████████████████████████████▎      | 9983/11016 [25:21<02:39,  6.49it/s, loss=-20.3]

2025-07-21 23:06:25,829 — INFO — layer 1, [1:9983], loss=-12.4642
2025-07-21 23:06:25,829 — INFO — layer 1, [1:9983], loss=-14.8061


epoch 1, layer 1:  91%|███████████████████████████████████████████████████████████████▊      | 10033/11016 [25:28<02:27,  6.67it/s, loss=-12.2]

2025-07-21 23:06:33,458 — INFO — layer 1, [1:10033], loss=-4.5276
2025-07-21 23:06:33,458 — INFO — layer 1, [1:10033], loss=-10.4854


epoch 1, layer 1:  92%|████████████████████████████████████████████████████████████████      | 10083/11016 [25:36<02:28,  6.30it/s, loss=-18.8]

2025-07-21 23:06:40,973 — INFO — layer 1, [1:10083], loss=-14.3281
2025-07-21 23:06:40,973 — INFO — layer 1, [1:10083], loss=-12.4978


epoch 1, layer 1:  92%|██████████████████████████████████████████████████████████████████▏     | 10133/11016 [25:43<02:10,  6.76it/s, loss=-14]

2025-07-21 23:06:48,496 — INFO — layer 1, [1:10133], loss=-6.9943
2025-07-21 23:06:48,496 — INFO — layer 1, [1:10133], loss=-10.1762


epoch 1, layer 1:  92%|████████████████████████████████████████████████████████████████▋     | 10183/11016 [25:51<02:08,  6.48it/s, loss=-8.34]

2025-07-21 23:06:56,024 — INFO — layer 1, [1:10183], loss=-13.7813
2025-07-21 23:06:56,024 — INFO — layer 1, [1:10183], loss=-12.8954


epoch 1, layer 1:  93%|█████████████████████████████████████████████████████████████████     | 10233/11016 [25:58<01:57,  6.69it/s, loss=-12.1]

2025-07-21 23:07:03,542 — INFO — layer 1, [1:10233], loss=-11.2700
2025-07-21 23:07:03,542 — INFO — layer 1, [1:10233], loss=-5.3770


epoch 1, layer 1:  93%|█████████████████████████████████████████████████████████████████▎    | 10283/11016 [26:06<01:56,  6.30it/s, loss=-10.8]

2025-07-21 23:07:11,139 — INFO — layer 1, [1:10283], loss=-6.5858
2025-07-21 23:07:11,139 — INFO — layer 1, [1:10283], loss=-16.0450


epoch 1, layer 1:  94%|█████████████████████████████████████████████████████████████████▋    | 10333/11016 [26:14<01:48,  6.27it/s, loss=-18.4]

2025-07-21 23:07:18,817 — INFO — layer 1, [1:10333], loss=-13.7923
2025-07-21 23:07:18,817 — INFO — layer 1, [1:10333], loss=-11.9868


epoch 1, layer 1:  94%|█████████████████████████████████████████████████████████████████▉    | 10383/11016 [26:21<01:37,  6.51it/s, loss=-9.88]

2025-07-21 23:07:26,525 — INFO — layer 1, [1:10383], loss=-11.7014
2025-07-21 23:07:26,525 — INFO — layer 1, [1:10383], loss=-16.3235


epoch 1, layer 1:  95%|██████████████████████████████████████████████████████████████████▎   | 10433/11016 [26:29<01:34,  6.15it/s, loss=-13.4]

2025-07-21 23:07:34,270 — INFO — layer 1, [1:10433], loss=-11.0476
2025-07-21 23:07:34,270 — INFO — layer 1, [1:10433], loss=-15.6487


epoch 1, layer 1:  95%|████████████████████████████████████████████████████████████████████▌   | 10483/11016 [26:37<01:16,  6.95it/s, loss=-12]

2025-07-21 23:07:41,815 — INFO — layer 1, [1:10483], loss=-12.0467
2025-07-21 23:07:41,815 — INFO — layer 1, [1:10483], loss=-7.9645


epoch 1, layer 1:  96%|██████████████████████████████████████████████████████████████████▉   | 10533/11016 [26:44<01:09,  6.91it/s, loss=-19.8]

2025-07-21 23:07:49,376 — INFO — layer 1, [1:10533], loss=-5.0797
2025-07-21 23:07:49,376 — INFO — layer 1, [1:10533], loss=-2.5602


epoch 1, layer 1:  96%|█████████████████████████████████████████████████████████████████████▏  | 10583/11016 [26:52<01:03,  6.79it/s, loss=-26]

2025-07-21 23:07:56,808 — INFO — layer 1, [1:10583], loss=-11.6358
2025-07-21 23:07:56,809 — INFO — layer 1, [1:10583], loss=-13.8598


epoch 1, layer 1:  97%|███████████████████████████████████████████████████████████████████▌  | 10633/11016 [27:00<01:05,  5.88it/s, loss=-13.7]

2025-07-21 23:08:04,696 — INFO — layer 1, [1:10633], loss=-13.3751
2025-07-21 23:08:04,696 — INFO — layer 1, [1:10633], loss=-14.4885


epoch 1, layer 1:  97%|███████████████████████████████████████████████████████████████████▉  | 10683/11016 [27:07<00:49,  6.67it/s, loss=-10.1]

2025-07-21 23:08:12,269 — INFO — layer 1, [1:10683], loss=-17.5255
2025-07-21 23:08:12,269 — INFO — layer 1, [1:10683], loss=-10.7768


epoch 1, layer 1:  97%|████████████████████████████████████████████████████████████████████▏ | 10733/11016 [27:15<00:44,  6.36it/s, loss=-23.1]

2025-07-21 23:08:19,819 — INFO — layer 1, [1:10733], loss=-21.4418
2025-07-21 23:08:19,819 — INFO — layer 1, [1:10733], loss=-10.2749


epoch 1, layer 1:  98%|████████████████████████████████████████████████████████████████████▌ | 10783/11016 [27:22<00:34,  6.84it/s, loss=-13.5]

2025-07-21 23:08:27,232 — INFO — layer 1, [1:10783], loss=0.3192
2025-07-21 23:08:27,231 — INFO — layer 1, [1:10783], loss=-12.7177


epoch 1, layer 1:  98%|████████████████████████████████████████████████████████████████████▊ | 10833/11016 [27:30<00:27,  6.74it/s, loss=-17.7]

2025-07-21 23:08:34,843 — INFO — layer 1, [1:10833], loss=-16.4949
2025-07-21 23:08:34,843 — INFO — layer 1, [1:10833], loss=-13.1627


epoch 1, layer 1:  99%|█████████████████████████████████████████████████████████████████████▏| 10883/11016 [27:37<00:19,  6.81it/s, loss=-9.48]

2025-07-21 23:08:42,480 — INFO — layer 1, [1:10883], loss=-8.9237
2025-07-21 23:08:42,480 — INFO — layer 1, [1:10883], loss=-1.0904


epoch 1, layer 1:  99%|█████████████████████████████████████████████████████████████████████▍| 10933/11016 [27:45<00:12,  6.57it/s, loss=-9.07]

2025-07-21 23:08:50,135 — INFO — layer 1, [1:10933], loss=-3.9556
2025-07-21 23:08:50,135 — INFO — layer 1, [1:10933], loss=-15.5251


epoch 1, layer 1: 100%|█████████████████████████████████████████████████████████████████████▊| 10983/11016 [27:53<00:04,  6.79it/s, loss=-7.46]

2025-07-21 23:08:57,745 — INFO — layer 1, [1:10983], loss=-16.4436
2025-07-21 23:08:57,745 — INFO — layer 1, [1:10983], loss=-1.1870


epoch 2, layer 1:   0%|                                                                                              | 0/11016 [00:00<?, ?it/s]huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
huggingf

2025-07-21 23:09:05,792 — INFO — layer 1, [2:17], loss=-12.4933
2025-07-21 23:09:05,792 — INFO — layer 1, [2:17], loss=-14.8276


epoch 2, layer 1:   1%|▍                                                                        | 67/11016 [00:10<28:23,  6.43it/s, loss=-7.61]

2025-07-21 23:09:13,392 — INFO — layer 1, [2:67], loss=-6.7768
2025-07-21 23:09:13,392 — INFO — layer 1, [2:67], loss=-14.7449


epoch 2, layer 1:   1%|▊                                                                       | 117/11016 [00:18<28:01,  6.48it/s, loss=-5.57]

2025-07-21 23:09:21,022 — INFO — layer 1, [2:117], loss=-11.7247
2025-07-21 23:09:21,022 — INFO — layer 1, [2:117], loss=-21.3038


epoch 2, layer 1:   2%|█                                                                       | 167/11016 [00:25<27:35,  6.55it/s, loss=-4.35]

2025-07-21 23:09:28,571 — INFO — layer 1, [2:167], loss=-23.7327
2025-07-21 23:09:28,571 — INFO — layer 1, [2:167], loss=-18.8752


epoch 2, layer 1:   2%|█▍                                                                      | 217/11016 [00:33<28:03,  6.41it/s, loss=-13.7]

2025-07-21 23:09:36,292 — INFO — layer 1, [2:217], loss=-9.8215
2025-07-21 23:09:36,293 — INFO — layer 1, [2:217], loss=-5.1724


epoch 2, layer 1:   2%|█▋                                                                      | 267/11016 [00:40<26:35,  6.74it/s, loss=-7.48]

2025-07-21 23:09:43,730 — INFO — layer 1, [2:267], loss=-16.9781
2025-07-21 23:09:43,730 — INFO — layer 1, [2:267], loss=-10.7865


epoch 2, layer 1:   3%|██                                                                      | 317/11016 [00:48<27:53,  6.39it/s, loss=-13.9]

2025-07-21 23:09:51,436 — INFO — layer 1, [2:317], loss=-18.3436
2025-07-21 23:09:51,436 — INFO — layer 1, [2:317], loss=-1.4243


epoch 2, layer 1:   3%|██▍                                                                     | 367/11016 [00:56<27:59,  6.34it/s, loss=-11.1]

2025-07-21 23:09:59,070 — INFO — layer 1, [2:367], loss=0.5931
2025-07-21 23:09:59,070 — INFO — layer 1, [2:367], loss=-12.1160


epoch 2, layer 1:   4%|██▋                                                                     | 417/11016 [01:03<26:04,  6.77it/s, loss=-8.86]

2025-07-21 23:10:06,802 — INFO — layer 1, [2:417], loss=-15.6151
2025-07-21 23:10:06,802 — INFO — layer 1, [2:417], loss=-9.9381


epoch 2, layer 1:   4%|███                                                                    | 467/11016 [01:11<25:14,  6.97it/s, loss=-0.443]

2025-07-21 23:10:14,521 — INFO — layer 1, [2:467], loss=-14.8959
2025-07-21 23:10:14,521 — INFO — layer 1, [2:467], loss=-14.4030


epoch 2, layer 1:   5%|███▍                                                                    | 517/11016 [01:19<26:16,  6.66it/s, loss=-20.5]

2025-07-21 23:10:22,285 — INFO — layer 1, [2:517], loss=-18.4780
2025-07-21 23:10:22,285 — INFO — layer 1, [2:517], loss=-13.6325


epoch 2, layer 1:   5%|███▋                                                                    | 567/11016 [01:26<26:29,  6.58it/s, loss=-5.49]

2025-07-21 23:10:29,742 — INFO — layer 1, [2:567], loss=-14.2496
2025-07-21 23:10:29,742 — INFO — layer 1, [2:567], loss=-12.7151


epoch 2, layer 1:   6%|████                                                                    | 617/11016 [01:34<26:02,  6.65it/s, loss=-2.85]

2025-07-21 23:10:37,256 — INFO — layer 1, [2:617], loss=-6.2201
2025-07-21 23:10:37,256 — INFO — layer 1, [2:617], loss=-17.2573


epoch 2, layer 1:   6%|████▎                                                                   | 667/11016 [01:42<25:42,  6.71it/s, loss=-12.9]

2025-07-21 23:10:44,991 — INFO — layer 1, [2:667], loss=-6.2815
2025-07-21 23:10:44,991 — INFO — layer 1, [2:667], loss=-15.5902


epoch 2, layer 1:   7%|████▋                                                                   | 717/11016 [01:49<26:04,  6.58it/s, loss=-10.9]

2025-07-21 23:10:52,544 — INFO — layer 1, [2:717], loss=-5.8347
2025-07-21 23:10:52,544 — INFO — layer 1, [2:717], loss=-7.7077


epoch 2, layer 1:   7%|█████                                                                   | 767/11016 [01:57<28:12,  6.06it/s, loss=-8.99]

2025-07-21 23:11:00,207 — INFO — layer 1, [2:767], loss=-14.0137
2025-07-21 23:11:00,207 — INFO — layer 1, [2:767], loss=-13.4274


epoch 2, layer 1:   7%|█████▎                                                                  | 817/11016 [02:04<25:39,  6.63it/s, loss=-10.6]

2025-07-21 23:11:07,914 — INFO — layer 1, [2:817], loss=-17.6639
2025-07-21 23:11:07,914 — INFO — layer 1, [2:817], loss=-16.5656


epoch 2, layer 1:   8%|█████▋                                                                  | 867/11016 [02:12<26:43,  6.33it/s, loss=-14.3]

2025-07-21 23:11:15,851 — INFO — layer 1, [2:867], loss=-2.9565
2025-07-21 23:11:15,851 — INFO — layer 1, [2:867], loss=-11.5508


epoch 2, layer 1:   8%|█████▉                                                                  | 917/11016 [02:20<25:05,  6.71it/s, loss=-19.3]

2025-07-21 23:11:23,631 — INFO — layer 1, [2:917], loss=-3.6511
2025-07-21 23:11:23,631 — INFO — layer 1, [2:917], loss=-19.7228


epoch 2, layer 1:   9%|██████▎                                                                 | 967/11016 [02:28<24:36,  6.81it/s, loss=-10.9]

2025-07-21 23:11:31,226 — INFO — layer 1, [2:967], loss=-7.9303
2025-07-21 23:11:31,227 — INFO — layer 1, [2:967], loss=-7.0255


epoch 2, layer 1:   9%|██████▋                                                                  | 1017/11016 [02:35<25:46,  6.46it/s, loss=-16]

2025-07-21 23:11:38,891 — INFO — layer 1, [2:1017], loss=-13.9121
2025-07-21 23:11:38,891 — INFO — layer 1, [2:1017], loss=-20.7665


epoch 2, layer 1:  10%|██████▉                                                                 | 1067/11016 [02:43<25:12,  6.58it/s, loss=-4.3]

2025-07-21 23:11:46,643 — INFO — layer 1, [2:1067], loss=-3.8584
2025-07-21 23:11:46,643 — INFO — layer 1, [2:1067], loss=0.0296


epoch 2, layer 1:  10%|███████▏                                                               | 1117/11016 [02:51<24:28,  6.74it/s, loss=-2.99]

2025-07-21 23:11:54,210 — INFO — layer 1, [2:1117], loss=-12.6272
2025-07-21 23:11:54,210 — INFO — layer 1, [2:1117], loss=-11.3441


epoch 2, layer 1:  11%|███████▌                                                               | 1167/11016 [02:58<25:44,  6.38it/s, loss=-9.49]

2025-07-21 23:12:01,786 — INFO — layer 1, [2:1167], loss=-10.7843
2025-07-21 23:12:01,786 — INFO — layer 1, [2:1167], loss=-10.7127


epoch 2, layer 1:  11%|███████▊                                                               | 1217/11016 [03:06<25:19,  6.45it/s, loss=-17.1]

2025-07-21 23:12:09,421 — INFO — layer 1, [2:1217], loss=-2.0876
2025-07-21 23:12:09,422 — INFO — layer 1, [2:1217], loss=-17.7621


epoch 2, layer 1:  12%|████████▏                                                              | 1267/11016 [03:14<25:25,  6.39it/s, loss=-12.2]

2025-07-21 23:12:17,028 — INFO — layer 1, [2:1267], loss=-16.9058
2025-07-21 23:12:17,028 — INFO — layer 1, [2:1267], loss=-0.0947


epoch 2, layer 1:  12%|████████▍                                                              | 1317/11016 [03:21<24:54,  6.49it/s, loss=-13.6]

2025-07-21 23:12:24,630 — INFO — layer 1, [2:1317], loss=-6.5843
2025-07-21 23:12:24,630 — INFO — layer 1, [2:1317], loss=-17.7644


epoch 2, layer 1:  12%|████████▊                                                              | 1367/11016 [03:29<24:38,  6.53it/s, loss=-10.9]

2025-07-21 23:12:32,151 — INFO — layer 1, [2:1367], loss=-6.7294
2025-07-21 23:12:32,151 — INFO — layer 1, [2:1367], loss=-11.8158


epoch 2, layer 1:  13%|█████████▎                                                              | 1417/11016 [03:36<24:32,  6.52it/s, loss=-1.5]

2025-07-21 23:12:39,686 — INFO — layer 1, [2:1417], loss=-9.9968
2025-07-21 23:12:39,686 — INFO — layer 1, [2:1417], loss=-8.9726


epoch 2, layer 1:  13%|█████████▍                                                             | 1467/11016 [03:44<25:48,  6.17it/s, loss=-12.7]

2025-07-21 23:12:47,166 — INFO — layer 1, [2:1467], loss=-1.8837
2025-07-21 23:12:47,166 — INFO — layer 1, [2:1467], loss=-17.8829


epoch 2, layer 1:  14%|█████████▊                                                             | 1517/11016 [03:51<24:02,  6.59it/s, loss=-16.1]

2025-07-21 23:12:54,819 — INFO — layer 1, [2:1517], loss=-10.3922
2025-07-21 23:12:54,819 — INFO — layer 1, [2:1517], loss=-21.5368


epoch 2, layer 1:  14%|██████████                                                             | 1567/11016 [03:59<22:30,  7.00it/s, loss=-15.1]

2025-07-21 23:13:02,290 — INFO — layer 1, [2:1567], loss=-10.6628
2025-07-21 23:13:02,290 — INFO — layer 1, [2:1567], loss=-16.9401


epoch 2, layer 1:  15%|██████████▊                                                               | 1617/11016 [04:06<22:57,  6.82it/s, loss=-7]

2025-07-21 23:13:09,913 — INFO — layer 1, [2:1617], loss=-13.7477
2025-07-21 23:13:09,913 — INFO — layer 1, [2:1617], loss=-17.0890


epoch 2, layer 1:  15%|██████████▋                                                            | 1667/11016 [04:14<22:11,  7.02it/s, loss=-19.5]

2025-07-21 23:13:17,407 — INFO — layer 1, [2:1667], loss=-19.3318
2025-07-21 23:13:17,407 — INFO — layer 1, [2:1667], loss=-10.1597


epoch 2, layer 1:  16%|███████████                                                            | 1717/11016 [04:22<22:49,  6.79it/s, loss=-17.5]

2025-07-21 23:13:25,055 — INFO — layer 1, [2:1717], loss=-15.6243
2025-07-21 23:13:25,055 — INFO — layer 1, [2:1717], loss=-3.7872


epoch 2, layer 1:  16%|███████████▍                                                           | 1767/11016 [04:29<23:16,  6.62it/s, loss=-18.3]

2025-07-21 23:13:32,712 — INFO — layer 1, [2:1767], loss=-15.4223
2025-07-21 23:13:32,712 — INFO — layer 1, [2:1767], loss=-7.1253


epoch 2, layer 1:  16%|███████████▋                                                           | 1817/11016 [04:37<24:42,  6.20it/s, loss=-22.3]

2025-07-21 23:13:40,382 — INFO — layer 1, [2:1817], loss=-28.4554
2025-07-21 23:13:40,382 — INFO — layer 1, [2:1817], loss=-14.1205


epoch 2, layer 1:  17%|████████████▎                                                            | 1867/11016 [04:45<23:04,  6.61it/s, loss=-13]

2025-07-21 23:13:47,969 — INFO — layer 1, [2:1867], loss=-2.2519
2025-07-21 23:13:47,969 — INFO — layer 1, [2:1867], loss=-8.8373


epoch 2, layer 1:  17%|████████████▎                                                          | 1917/11016 [04:52<22:48,  6.65it/s, loss=-3.16]

2025-07-21 23:13:55,646 — INFO — layer 1, [2:1917], loss=-5.4777


epoch 2, layer 1:  17%|████████████▎                                                          | 1918/11016 [04:52<23:14,  6.53it/s, loss=-5.48]

2025-07-21 23:13:55,647 — INFO — layer 1, [2:1917], loss=-0.4172


epoch 2, layer 1:  18%|████████████▋                                                          | 1967/11016 [05:00<21:57,  6.87it/s, loss=-22.8]

2025-07-21 23:14:03,167 — INFO — layer 1, [2:1967], loss=-8.4479
2025-07-21 23:14:03,167 — INFO — layer 1, [2:1967], loss=-9.4125


epoch 2, layer 1:  18%|████████████▉                                                          | 2017/11016 [05:07<22:52,  6.56it/s, loss=-16.2]

2025-07-21 23:14:10,890 — INFO — layer 1, [2:2017], loss=-10.9443
2025-07-21 23:14:10,890 — INFO — layer 1, [2:2017], loss=-8.6604


epoch 2, layer 1:  19%|█████████████▎                                                         | 2067/11016 [05:15<22:16,  6.69it/s, loss=-8.88]

2025-07-21 23:14:18,350 — INFO — layer 1, [2:2067], loss=-10.2153
2025-07-21 23:14:18,350 — INFO — layer 1, [2:2067], loss=-18.1916


epoch 2, layer 1:  19%|█████████████▋                                                         | 2117/11016 [05:23<22:57,  6.46it/s, loss=-5.08]

2025-07-21 23:14:26,124 — INFO — layer 1, [2:2117], loss=-17.2455
2025-07-21 23:14:26,124 — INFO — layer 1, [2:2117], loss=-15.8523


epoch 2, layer 1:  20%|█████████████▉                                                         | 2167/11016 [05:30<22:06,  6.67it/s, loss=-11.9]

2025-07-21 23:14:33,795 — INFO — layer 1, [2:2167], loss=-12.5246
2025-07-21 23:14:33,795 — INFO — layer 1, [2:2167], loss=-4.9374


epoch 2, layer 1:  20%|██████████████▎                                                        | 2217/11016 [05:38<24:05,  6.09it/s, loss=-21.1]

2025-07-21 23:14:41,311 — INFO — layer 1, [2:2217], loss=-15.3642
2025-07-21 23:14:41,311 — INFO — layer 1, [2:2217], loss=-9.0552


epoch 2, layer 1:  21%|██████████████▌                                                        | 2267/11016 [05:46<22:19,  6.53it/s, loss=-20.7]

2025-07-21 23:14:49,000 — INFO — layer 1, [2:2267], loss=-18.1605
2025-07-21 23:14:49,000 — INFO — layer 1, [2:2267], loss=-10.3256


epoch 2, layer 1:  21%|██████████████▉                                                        | 2317/11016 [05:53<21:52,  6.63it/s, loss=-17.7]

2025-07-21 23:14:56,499 — INFO — layer 1, [2:2317], loss=0.2262
2025-07-21 23:14:56,499 — INFO — layer 1, [2:2317], loss=-11.7200


epoch 2, layer 1:  21%|███████████████▎                                                       | 2367/11016 [06:01<21:18,  6.76it/s, loss=-17.6]

2025-07-21 23:15:04,089 — INFO — layer 1, [2:2367], loss=-16.3992
2025-07-21 23:15:04,089 — INFO — layer 1, [2:2367], loss=-15.1379


epoch 2, layer 1:  22%|███████████████▌                                                       | 2417/11016 [06:08<22:48,  6.28it/s, loss=-2.97]

2025-07-21 23:15:11,798 — INFO — layer 1, [2:2417], loss=-3.7727
2025-07-21 23:15:11,798 — INFO — layer 1, [2:2417], loss=-17.9543


epoch 2, layer 1:  22%|███████████████▉                                                       | 2467/11016 [06:16<21:29,  6.63it/s, loss=-16.8]

2025-07-21 23:15:19,556 — INFO — layer 1, [2:2467], loss=-8.1050
2025-07-21 23:15:19,556 — INFO — layer 1, [2:2467], loss=-6.7273


epoch 2, layer 1:  23%|████████████████▍                                                       | 2517/11016 [06:24<21:56,  6.46it/s, loss=-7.5]

2025-07-21 23:15:27,255 — INFO — layer 1, [2:2517], loss=-3.0996
2025-07-21 23:15:27,255 — INFO — layer 1, [2:2517], loss=-9.3573


epoch 2, layer 1:  23%|████████████████▌                                                      | 2567/11016 [06:31<21:02,  6.69it/s, loss=-8.07]

2025-07-21 23:15:34,875 — INFO — layer 1, [2:2567], loss=-23.8159
2025-07-21 23:15:34,875 — INFO — layer 1, [2:2567], loss=-11.0189


epoch 2, layer 1:  24%|████████████████▊                                                      | 2617/11016 [06:39<22:09,  6.32it/s, loss=-17.1]

2025-07-21 23:15:42,481 — INFO — layer 1, [2:2617], loss=-18.9287
2025-07-21 23:15:42,481 — INFO — layer 1, [2:2617], loss=-14.5225


epoch 2, layer 1:  24%|█████████████████▏                                                     | 2667/11016 [06:47<20:30,  6.79it/s, loss=-10.2]

2025-07-21 23:15:50,122 — INFO — layer 1, [2:2667], loss=-13.9007
2025-07-21 23:15:50,122 — INFO — layer 1, [2:2667], loss=-14.5638


epoch 2, layer 1:  25%|█████████████████▌                                                     | 2717/11016 [06:54<20:49,  6.64it/s, loss=-14.6]

2025-07-21 23:15:57,695 — INFO — layer 1, [2:2717], loss=-15.2655
2025-07-21 23:15:57,695 — INFO — layer 1, [2:2717], loss=-17.4496


epoch 2, layer 1:  25%|█████████████████▊                                                     | 2767/11016 [07:02<21:12,  6.48it/s, loss=-23.9]

2025-07-21 23:16:05,177 — INFO — layer 1, [2:2767], loss=-8.8464
2025-07-21 23:16:05,177 — INFO — layer 1, [2:2767], loss=-5.1199


epoch 2, layer 1:  26%|██████████████████▋                                                      | 2817/11016 [07:09<21:06,  6.47it/s, loss=-14]

2025-07-21 23:16:12,788 — INFO — layer 1, [2:2817], loss=-17.4340
2025-07-21 23:16:12,788 — INFO — layer 1, [2:2817], loss=-5.0183


epoch 2, layer 1:  26%|██████████████████▍                                                    | 2867/11016 [07:17<20:18,  6.69it/s, loss=-17.7]

2025-07-21 23:16:20,377 — INFO — layer 1, [2:2867], loss=-21.8725
2025-07-21 23:16:20,377 — INFO — layer 1, [2:2867], loss=-3.6751


epoch 2, layer 1:  26%|██████████████████▊                                                    | 2917/11016 [07:25<20:14,  6.67it/s, loss=-16.5]

2025-07-21 23:16:27,997 — INFO — layer 1, [2:2917], loss=-18.2019
2025-07-21 23:16:27,997 — INFO — layer 1, [2:2917], loss=-10.8715


epoch 2, layer 1:  27%|███████████████████                                                    | 2967/11016 [07:32<20:39,  6.49it/s, loss=-10.7]

2025-07-21 23:16:35,580 — INFO — layer 1, [2:2967], loss=-16.7696
2025-07-21 23:16:35,581 — INFO — layer 1, [2:2967], loss=-1.9178


epoch 2, layer 1:  27%|███████████████████▍                                                   | 3017/11016 [07:40<21:03,  6.33it/s, loss=-9.13]

2025-07-21 23:16:43,331 — INFO — layer 1, [2:3017], loss=-12.1479
2025-07-21 23:16:43,331 — INFO — layer 1, [2:3017], loss=-17.9284


epoch 2, layer 1:  28%|███████████████████▊                                                   | 3067/11016 [07:48<20:14,  6.55it/s, loss=-8.88]

2025-07-21 23:16:51,016 — INFO — layer 1, [2:3067], loss=-11.2410
2025-07-21 23:16:51,016 — INFO — layer 1, [2:3067], loss=-7.3318


epoch 2, layer 1:  28%|████████████████████                                                   | 3117/11016 [07:55<19:35,  6.72it/s, loss=-17.3]

2025-07-21 23:16:58,536 — INFO — layer 1, [2:3117], loss=-15.9798
2025-07-21 23:16:58,536 — INFO — layer 1, [2:3117], loss=-16.5924


epoch 2, layer 1:  29%|████████████████████▍                                                  | 3167/11016 [08:03<19:16,  6.79it/s, loss=-6.97]

2025-07-21 23:17:06,216 — INFO — layer 1, [2:3167], loss=-12.8319
2025-07-21 23:17:06,216 — INFO — layer 1, [2:3167], loss=-10.2699


epoch 2, layer 1:  29%|████████████████████▋                                                  | 3217/11016 [08:10<20:29,  6.35it/s, loss=-18.3]

2025-07-21 23:17:13,861 — INFO — layer 1, [2:3217], loss=-8.5222
2025-07-21 23:17:13,861 — INFO — layer 1, [2:3217], loss=-15.9205


epoch 2, layer 1:  30%|█████████████████████                                                  | 3267/11016 [08:18<18:57,  6.81it/s, loss=-10.7]

2025-07-21 23:17:21,467 — INFO — layer 1, [2:3267], loss=-15.3697
2025-07-21 23:17:21,467 — INFO — layer 1, [2:3267], loss=-7.9990


epoch 2, layer 1:  30%|█████████████████████▍                                                 | 3317/11016 [08:26<19:30,  6.58it/s, loss=-21.4]

2025-07-21 23:17:29,158 — INFO — layer 1, [2:3317], loss=-7.6408
2025-07-21 23:17:29,158 — INFO — layer 1, [2:3317], loss=-5.9079


epoch 2, layer 1:  31%|█████████████████████▋                                                 | 3367/11016 [08:33<18:45,  6.80it/s, loss=-5.53]

2025-07-21 23:17:36,739 — INFO — layer 1, [2:3367], loss=-19.7107
2025-07-21 23:17:36,739 — INFO — layer 1, [2:3367], loss=-14.1666


epoch 2, layer 1:  31%|██████████████████████                                                 | 3417/11016 [08:41<17:52,  7.08it/s, loss=-4.88]

2025-07-21 23:17:44,332 — INFO — layer 1, [2:3417], loss=-3.3243
2025-07-21 23:17:44,332 — INFO — layer 1, [2:3417], loss=-13.2111


epoch 2, layer 1:  31%|██████████████████████▎                                                | 3467/11016 [08:48<18:56,  6.64it/s, loss=-11.2]

2025-07-21 23:17:51,874 — INFO — layer 1, [2:3467], loss=-12.0568
2025-07-21 23:17:51,874 — INFO — layer 1, [2:3467], loss=-8.2607


epoch 2, layer 1:  32%|██████████████████████▋                                                | 3517/11016 [08:56<18:53,  6.61it/s, loss=-10.2]

2025-07-21 23:17:59,405 — INFO — layer 1, [2:3517], loss=-14.3536
2025-07-21 23:17:59,406 — INFO — layer 1, [2:3517], loss=-18.4031


epoch 2, layer 1:  32%|██████████████████████▉                                                | 3567/11016 [09:04<18:02,  6.88it/s, loss=-16.5]

2025-07-21 23:18:07,076 — INFO — layer 1, [2:3567], loss=-8.1903
2025-07-21 23:18:07,076 — INFO — layer 1, [2:3567], loss=-6.8075


epoch 2, layer 1:  33%|███████████████████████▎                                               | 3617/11016 [09:11<19:15,  6.41it/s, loss=-18.2]

2025-07-21 23:18:14,650 — INFO — layer 1, [2:3617], loss=-7.6187
2025-07-21 23:18:14,650 — INFO — layer 1, [2:3617], loss=-14.1737


epoch 2, layer 1:  33%|███████████████████████▋                                               | 3667/11016 [09:19<18:49,  6.50it/s, loss=-8.81]

2025-07-21 23:18:22,339 — INFO — layer 1, [2:3667], loss=-16.5146
2025-07-21 23:18:22,339 — INFO — layer 1, [2:3667], loss=-9.1396


epoch 2, layer 1:  34%|███████████████████████▉                                               | 3717/11016 [09:26<18:04,  6.73it/s, loss=-16.4]

2025-07-21 23:18:29,961 — INFO — layer 1, [2:3717], loss=-13.2648
2025-07-21 23:18:29,961 — INFO — layer 1, [2:3717], loss=-6.5080


epoch 2, layer 1:  34%|████████████████████████▎                                              | 3767/11016 [09:34<18:11,  6.64it/s, loss=-11.8]

2025-07-21 23:18:37,535 — INFO — layer 1, [2:3767], loss=-21.8788
2025-07-21 23:18:37,535 — INFO — layer 1, [2:3767], loss=-26.2784


epoch 2, layer 1:  35%|████████████████████████▌                                              | 3817/11016 [09:42<17:41,  6.78it/s, loss=-5.24]

2025-07-21 23:18:45,124 — INFO — layer 1, [2:3817], loss=-20.9532
2025-07-21 23:18:45,124 — INFO — layer 1, [2:3817], loss=-12.6489


epoch 2, layer 1:  35%|████████████████████████▉                                              | 3867/11016 [09:49<18:25,  6.47it/s, loss=-5.95]

2025-07-21 23:18:52,737 — INFO — layer 1, [2:3867], loss=-19.8142
2025-07-21 23:18:52,737 — INFO — layer 1, [2:3867], loss=-16.0943


epoch 2, layer 1:  36%|█████████████████████████▉                                               | 3917/11016 [09:57<16:57,  6.98it/s, loss=-17]

2025-07-21 23:19:00,447 — INFO — layer 1, [2:3917], loss=-2.9453
2025-07-21 23:19:00,446 — INFO — layer 1, [2:3917], loss=-1.3557


epoch 2, layer 1:  36%|█████████████████████████▌                                             | 3967/11016 [10:05<17:25,  6.74it/s, loss=-11.6]

2025-07-21 23:19:08,050 — INFO — layer 1, [2:3967], loss=-20.8834
2025-07-21 23:19:08,050 — INFO — layer 1, [2:3967], loss=-9.3195


epoch 2, layer 1:  36%|█████████████████████████▉                                             | 4017/11016 [10:12<17:38,  6.61it/s, loss=-5.66]

2025-07-21 23:19:15,626 — INFO — layer 1, [2:4017], loss=-20.6549
2025-07-21 23:19:15,626 — INFO — layer 1, [2:4017], loss=-23.3643


epoch 2, layer 1:  37%|██████████████████████████▏                                            | 4067/11016 [10:20<18:05,  6.40it/s, loss=-19.6]

2025-07-21 23:19:23,278 — INFO — layer 1, [2:4067], loss=-2.6804
2025-07-21 23:19:23,278 — INFO — layer 1, [2:4067], loss=-23.2826


epoch 2, layer 1:  37%|██████████████████████████▌                                            | 4117/11016 [10:28<16:56,  6.78it/s, loss=-6.53]

2025-07-21 23:19:30,914 — INFO — layer 1, [2:4117], loss=-14.7498
2025-07-21 23:19:30,914 — INFO — layer 1, [2:4117], loss=-7.1895


epoch 2, layer 1:  38%|██████████████████████████▊                                            | 4167/11016 [10:35<16:55,  6.74it/s, loss=-4.51]

2025-07-21 23:19:38,636 — INFO — layer 1, [2:4167], loss=-5.3640
2025-07-21 23:19:38,637 — INFO — layer 1, [2:4167], loss=-8.8798


epoch 2, layer 1:  38%|███████████████████████████▏                                           | 4217/11016 [10:43<17:06,  6.63it/s, loss=-11.5]

2025-07-21 23:19:46,373 — INFO — layer 1, [2:4217], loss=-15.3028
2025-07-21 23:19:46,374 — INFO — layer 1, [2:4217], loss=-0.2058


epoch 2, layer 1:  39%|███████████████████████████▌                                           | 4267/11016 [10:51<17:22,  6.48it/s, loss=-12.8]

2025-07-21 23:19:53,979 — INFO — layer 1, [2:4267], loss=-4.2913
2025-07-21 23:19:53,979 — INFO — layer 1, [2:4267], loss=-15.4459


epoch 2, layer 1:  39%|████████████████████████████▌                                            | 4317/11016 [10:58<16:25,  6.80it/s, loss=-10]

2025-07-21 23:20:01,390 — INFO — layer 1, [2:4317], loss=-9.5793
2025-07-21 23:20:01,390 — INFO — layer 1, [2:4317], loss=-17.4927


epoch 2, layer 1:  40%|████████████████████████████▏                                          | 4367/11016 [11:06<16:27,  6.73it/s, loss=-13.9]

2025-07-21 23:20:08,978 — INFO — layer 1, [2:4367], loss=-10.2641
2025-07-21 23:20:08,978 — INFO — layer 1, [2:4367], loss=-16.9449


epoch 2, layer 1:  40%|████████████████████████████▍                                          | 4417/11016 [11:13<16:03,  6.85it/s, loss=-18.9]

2025-07-21 23:20:16,447 — INFO — layer 1, [2:4417], loss=-14.8318
2025-07-21 23:20:16,447 — INFO — layer 1, [2:4417], loss=-11.3453


epoch 2, layer 1:  41%|█████████████████████████████▏                                          | 4467/11016 [11:21<15:46,  6.92it/s, loss=-5.8]

2025-07-21 23:20:24,044 — INFO — layer 1, [2:4467], loss=-1.1050
2025-07-21 23:20:24,044 — INFO — layer 1, [2:4467], loss=-9.8177


epoch 2, layer 1:  41%|█████████████████████████████▉                                           | 4517/11016 [11:28<15:45,  6.87it/s, loss=-12]

2025-07-21 23:20:31,618 — INFO — layer 1, [2:4517], loss=-13.6073
2025-07-21 23:20:31,618 — INFO — layer 1, [2:4517], loss=-17.1116


epoch 2, layer 1:  41%|█████████████████████████████▍                                         | 4567/11016 [11:36<16:17,  6.60it/s, loss=-17.4]

2025-07-21 23:20:39,405 — INFO — layer 1, [2:4567], loss=-10.3916
2025-07-21 23:20:39,405 — INFO — layer 1, [2:4567], loss=-15.2860


epoch 2, layer 1:  42%|██████████████████████████████▌                                          | 4617/11016 [11:44<15:46,  6.76it/s, loss=-10]

2025-07-21 23:20:46,964 — INFO — layer 1, [2:4617], loss=-13.9810
2025-07-21 23:20:46,964 — INFO — layer 1, [2:4617], loss=-11.2563


epoch 2, layer 1:  42%|██████████████████████████████                                         | 4667/11016 [11:51<15:37,  6.77it/s, loss=-3.75]

2025-07-21 23:20:54,448 — INFO — layer 1, [2:4667], loss=-4.8337
2025-07-21 23:20:54,448 — INFO — layer 1, [2:4667], loss=-7.5990


epoch 2, layer 1:  43%|██████████████████████████████▍                                        | 4717/11016 [11:59<14:41,  7.14it/s, loss=-7.29]

2025-07-21 23:21:02,005 — INFO — layer 1, [2:4717], loss=-9.8000
2025-07-21 23:21:02,005 — INFO — layer 1, [2:4717], loss=-22.8765


epoch 2, layer 1:  43%|██████████████████████████████▋                                        | 4767/11016 [12:06<16:44,  6.22it/s, loss=-10.3]

2025-07-21 23:21:09,587 — INFO — layer 1, [2:4767], loss=-16.4155
2025-07-21 23:21:09,587 — INFO — layer 1, [2:4767], loss=-11.9138


epoch 2, layer 1:  44%|███████████████████████████████                                        | 4817/11016 [12:14<15:23,  6.71it/s, loss=-7.64]

2025-07-21 23:21:17,298 — INFO — layer 1, [2:4817], loss=-16.3412
2025-07-21 23:21:17,298 — INFO — layer 1, [2:4817], loss=-12.5829


epoch 2, layer 1:  44%|████████████████████████████████▎                                        | 4867/11016 [12:22<14:43,  6.96it/s, loss=-11]

2025-07-21 23:21:25,021 — INFO — layer 1, [2:4867], loss=-11.7460
2025-07-21 23:21:25,021 — INFO — layer 1, [2:4867], loss=-11.1551


epoch 2, layer 1:  45%|███████████████████████████████▋                                       | 4917/11016 [12:29<15:45,  6.45it/s, loss=-12.2]

2025-07-21 23:21:32,762 — INFO — layer 1, [2:4917], loss=-3.7752
2025-07-21 23:21:32,762 — INFO — layer 1, [2:4917], loss=-11.5399


epoch 2, layer 1:  45%|████████████████████████████████                                       | 4967/11016 [12:37<17:02,  5.92it/s, loss=-11.1]

2025-07-21 23:21:40,495 — INFO — layer 1, [2:4967], loss=-12.4463
2025-07-21 23:21:40,495 — INFO — layer 1, [2:4967], loss=-23.4447


epoch 2, layer 1:  46%|████████████████████████████████▎                                      | 5017/11016 [12:45<15:30,  6.45it/s, loss=-14.1]

2025-07-21 23:21:48,130 — INFO — layer 1, [2:5017], loss=-11.5087
2025-07-21 23:21:48,130 — INFO — layer 1, [2:5017], loss=-15.3358


epoch 2, layer 1:  46%|████████████████████████████████▋                                      | 5067/11016 [12:52<14:27,  6.86it/s, loss=-14.5]

2025-07-21 23:21:55,598 — INFO — layer 1, [2:5067], loss=-9.5281
2025-07-21 23:21:55,598 — INFO — layer 1, [2:5067], loss=-16.5459


epoch 2, layer 1:  46%|████████████████████████████████▉                                      | 5117/11016 [13:00<14:14,  6.90it/s, loss=-12.8]

2025-07-21 23:22:03,257 — INFO — layer 1, [2:5117], loss=-12.7425
2025-07-21 23:22:03,257 — INFO — layer 1, [2:5117], loss=-11.6328


epoch 2, layer 1:  47%|█████████████████████████████████▎                                     | 5167/11016 [13:08<15:09,  6.43it/s, loss=-10.8]

2025-07-21 23:22:11,040 — INFO — layer 1, [2:5167], loss=-13.5762
2025-07-21 23:22:11,040 — INFO — layer 1, [2:5167], loss=-11.4983


epoch 2, layer 1:  47%|█████████████████████████████████▌                                     | 5217/11016 [13:15<14:00,  6.90it/s, loss=-19.9]

2025-07-21 23:22:18,555 — INFO — layer 1, [2:5217], loss=-26.6510
2025-07-21 23:22:18,555 — INFO — layer 1, [2:5217], loss=-11.3294


epoch 2, layer 1:  48%|█████████████████████████████████▉                                     | 5267/11016 [13:23<14:24,  6.65it/s, loss=-11.9]

2025-07-21 23:22:26,298 — INFO — layer 1, [2:5267], loss=-19.9788
2025-07-21 23:22:26,298 — INFO — layer 1, [2:5267], loss=-12.9278


epoch 2, layer 1:  48%|██████████████████████████████████▎                                    | 5317/11016 [13:30<14:43,  6.45it/s, loss=-16.3]

2025-07-21 23:22:33,903 — INFO — layer 1, [2:5317], loss=-20.1597
2025-07-21 23:22:33,903 — INFO — layer 1, [2:5317], loss=-17.8084


epoch 2, layer 1:  49%|██████████████████████████████████▌                                    | 5367/11016 [13:38<14:25,  6.53it/s, loss=-17.2]

2025-07-21 23:22:41,606 — INFO — layer 1, [2:5367], loss=-5.7359
2025-07-21 23:22:41,606 — INFO — layer 1, [2:5367], loss=-8.2196


epoch 2, layer 1:  49%|██████████████████████████████████▉                                    | 5417/11016 [13:46<13:59,  6.67it/s, loss=-10.1]

2025-07-21 23:22:49,308 — INFO — layer 1, [2:5417], loss=-17.5843
2025-07-21 23:22:49,308 — INFO — layer 1, [2:5417], loss=-7.1819


epoch 2, layer 1:  50%|███████████████████████████████████▏                                   | 5467/11016 [13:54<14:06,  6.55it/s, loss=-8.54]

2025-07-21 23:22:57,049 — INFO — layer 1, [2:5467], loss=-15.9976
2025-07-21 23:22:57,049 — INFO — layer 1, [2:5467], loss=-10.6985


epoch 2, layer 1:  50%|███████████████████████████████████▌                                   | 5517/11016 [14:01<13:50,  6.62it/s, loss=-11.2]

2025-07-21 23:23:04,590 — INFO — layer 1, [2:5517], loss=-13.6187
2025-07-21 23:23:04,590 — INFO — layer 1, [2:5517], loss=-6.5604


epoch 2, layer 1:  51%|███████████████████████████████████▉                                   | 5567/11016 [14:09<13:36,  6.67it/s, loss=-10.8]

2025-07-21 23:23:12,125 — INFO — layer 1, [2:5567], loss=-4.0084
2025-07-21 23:23:12,125 — INFO — layer 1, [2:5567], loss=-20.3132


epoch 2, layer 1:  51%|████████████████████████████████████▏                                  | 5617/11016 [14:16<13:49,  6.51it/s, loss=-14.5]

2025-07-21 23:23:19,620 — INFO — layer 1, [2:5617], loss=-6.6401
2025-07-21 23:23:19,620 — INFO — layer 1, [2:5617], loss=-14.7062


epoch 2, layer 1:  51%|█████████████████████████████████████▌                                   | 5667/11016 [14:24<14:25,  6.18it/s, loss=-23]

2025-07-21 23:23:27,175 — INFO — layer 1, [2:5667], loss=-20.8006
2025-07-21 23:23:27,175 — INFO — layer 1, [2:5667], loss=-13.4258


epoch 2, layer 1:  52%|████████████████████████████████████▊                                  | 5717/11016 [14:31<13:27,  6.56it/s, loss=-3.67]

2025-07-21 23:23:34,746 — INFO — layer 1, [2:5717], loss=-6.6819
2025-07-21 23:23:34,746 — INFO — layer 1, [2:5717], loss=-7.3785


epoch 2, layer 1:  52%|█████████████████████████████████████▏                                 | 5767/11016 [14:39<12:39,  6.91it/s, loss=-15.3]

2025-07-21 23:23:42,347 — INFO — layer 1, [2:5767], loss=-8.1000
2025-07-21 23:23:42,347 — INFO — layer 1, [2:5767], loss=-14.1822


epoch 2, layer 1:  53%|█████████████████████████████████████▍                                 | 5817/11016 [14:47<13:24,  6.46it/s, loss=-11.7]

2025-07-21 23:23:50,015 — INFO — layer 1, [2:5817], loss=-4.5865
2025-07-21 23:23:50,015 — INFO — layer 1, [2:5817], loss=-20.8207


epoch 2, layer 1:  53%|█████████████████████████████████████▊                                 | 5867/11016 [14:54<13:36,  6.30it/s, loss=-7.62]

2025-07-21 23:23:57,743 — INFO — layer 1, [2:5867], loss=-17.0145
2025-07-21 23:23:57,743 — INFO — layer 1, [2:5867], loss=-19.1161


epoch 2, layer 1:  54%|██████████████████████████████████████▏                                | 5917/11016 [15:02<12:04,  7.03it/s, loss=-16.1]

2025-07-21 23:24:05,309 — INFO — layer 1, [2:5917], loss=-17.5900
2025-07-21 23:24:05,309 — INFO — layer 1, [2:5917], loss=-21.1684


epoch 2, layer 1:  54%|██████████████████████████████████████▍                                | 5967/11016 [15:10<12:45,  6.59it/s, loss=-10.5]

2025-07-21 23:24:12,996 — INFO — layer 1, [2:5967], loss=-23.0634
2025-07-21 23:24:12,997 — INFO — layer 1, [2:5967], loss=-0.5719


epoch 2, layer 1:  55%|██████████████████████████████████████▊                                | 6017/11016 [15:17<13:18,  6.26it/s, loss=-8.33]

2025-07-21 23:24:20,579 — INFO — layer 1, [2:6017], loss=-15.2033
2025-07-21 23:24:20,579 — INFO — layer 1, [2:6017], loss=-18.5742


epoch 2, layer 1:  55%|███████████████████████████████████████                                | 6067/11016 [15:25<12:34,  6.56it/s, loss=-18.1]

2025-07-21 23:24:28,226 — INFO — layer 1, [2:6067], loss=-14.0963
2025-07-21 23:24:28,226 — INFO — layer 1, [2:6067], loss=-7.2251


epoch 2, layer 1:  56%|███████████████████████████████████████▍                               | 6117/11016 [15:33<11:41,  6.99it/s, loss=-14.1]

2025-07-21 23:24:35,963 — INFO — layer 1, [2:6117], loss=-14.1597
2025-07-21 23:24:35,963 — INFO — layer 1, [2:6117], loss=-12.6252


epoch 2, layer 1:  56%|███████████████████████████████████████▋                               | 6167/11016 [15:40<12:28,  6.48it/s, loss=-21.7]

2025-07-21 23:24:43,450 — INFO — layer 1, [2:6167], loss=-13.4760
2025-07-21 23:24:43,450 — INFO — layer 1, [2:6167], loss=-7.0514


epoch 2, layer 1:  56%|████████████████████████████████████████                               | 6217/11016 [15:47<11:58,  6.68it/s, loss=-6.62]

2025-07-21 23:24:50,920 — INFO — layer 1, [2:6217], loss=-3.4554
2025-07-21 23:24:50,920 — INFO — layer 1, [2:6217], loss=-17.9386


epoch 2, layer 1:  57%|████████████████████████████████████████▍                              | 6267/11016 [15:55<11:49,  6.69it/s, loss=-10.3]

2025-07-21 23:24:58,605 — INFO — layer 1, [2:6267], loss=-16.5336
2025-07-21 23:24:58,605 — INFO — layer 1, [2:6267], loss=-4.1684


epoch 2, layer 1:  57%|████████████████████████████████████████▋                              | 6317/11016 [16:03<11:50,  6.61it/s, loss=-6.06]

2025-07-21 23:25:06,234 — INFO — layer 1, [2:6317], loss=-9.5145
2025-07-21 23:25:06,234 — INFO — layer 1, [2:6317], loss=-12.1361


epoch 2, layer 1:  58%|█████████████████████████████████████████                              | 6367/11016 [16:10<11:42,  6.62it/s, loss=-18.4]

2025-07-21 23:25:13,882 — INFO — layer 1, [2:6367], loss=-15.7819
2025-07-21 23:25:13,882 — INFO — layer 1, [2:6367], loss=-21.6064


epoch 2, layer 1:  58%|█████████████████████████████████████████▎                             | 6417/11016 [16:18<12:03,  6.35it/s, loss=-1.99]

2025-07-21 23:25:21,486 — INFO — layer 1, [2:6417], loss=-16.3092
2025-07-21 23:25:21,486 — INFO — layer 1, [2:6417], loss=-12.7666


epoch 2, layer 1:  59%|█████████████████████████████████████████▋                             | 6467/11016 [16:26<10:44,  7.06it/s, loss=-23.4]

2025-07-21 23:25:29,169 — INFO — layer 1, [2:6467], loss=-8.4603
2025-07-21 23:25:29,169 — INFO — layer 1, [2:6467], loss=-11.0299


epoch 2, layer 1:  59%|███████████████████████████████████████████▏                             | 6517/11016 [16:33<11:14,  6.67it/s, loss=-21]

2025-07-21 23:25:36,764 — INFO — layer 1, [2:6517], loss=-18.1035
2025-07-21 23:25:36,764 — INFO — layer 1, [2:6517], loss=-12.0350


epoch 2, layer 1:  60%|██████████████████████████████████████████▎                            | 6567/11016 [16:41<11:09,  6.65it/s, loss=-4.25]

2025-07-21 23:25:44,328 — INFO — layer 1, [2:6567], loss=-20.5692
2025-07-21 23:25:44,328 — INFO — layer 1, [2:6567], loss=-10.9289


epoch 2, layer 1:  60%|██████████████████████████████████████████▋                            | 6617/11016 [16:48<11:10,  6.56it/s, loss=-8.59]

2025-07-21 23:25:51,793 — INFO — layer 1, [2:6617], loss=-16.1746
2025-07-21 23:25:51,793 — INFO — layer 1, [2:6617], loss=-7.4783


epoch 2, layer 1:  61%|██████████████████████████████████████████▉                            | 6667/11016 [16:56<10:35,  6.85it/s, loss=-7.34]

2025-07-21 23:25:59,502 — INFO — layer 1, [2:6667], loss=-9.7270
2025-07-21 23:25:59,502 — INFO — layer 1, [2:6667], loss=-3.6413


epoch 2, layer 1:  61%|███████████████████████████████████████████▉                            | 6717/11016 [17:04<10:50,  6.60it/s, loss=-8.7]

2025-07-21 23:26:06,964 — INFO — layer 1, [2:6717], loss=-11.3311
2025-07-21 23:26:06,965 — INFO — layer 1, [2:6717], loss=-2.5349


epoch 2, layer 1:  61%|███████████████████████████████████████████▌                           | 6767/11016 [17:11<11:11,  6.33it/s, loss=-10.4]

2025-07-21 23:26:14,684 — INFO — layer 1, [2:6767], loss=-3.7820
2025-07-21 23:26:14,684 — INFO — layer 1, [2:6767], loss=-3.3727


epoch 2, layer 1:  62%|███████████████████████████████████████████▉                           | 6817/11016 [17:19<10:10,  6.88it/s, loss=-20.8]

2025-07-21 23:26:22,281 — INFO — layer 1, [2:6817], loss=-17.8909
2025-07-21 23:26:22,281 — INFO — layer 1, [2:6817], loss=-10.8194


epoch 2, layer 1:  62%|████████████████████████████████████████████▎                          | 6867/11016 [17:27<10:05,  6.85it/s, loss=-18.8]

2025-07-21 23:26:30,004 — INFO — layer 1, [2:6867], loss=-17.9464
2025-07-21 23:26:30,004 — INFO — layer 1, [2:6867], loss=-16.6845


epoch 2, layer 1:  63%|████████████████████████████████████████████▌                          | 6917/11016 [17:35<11:25,  5.98it/s, loss=-15.9]

2025-07-21 23:26:37,926 — INFO — layer 1, [2:6917], loss=-14.3900
2025-07-21 23:26:37,926 — INFO — layer 1, [2:6917], loss=-8.7319


epoch 2, layer 1:  63%|████████████████████████████████████████████▉                          | 6967/11016 [17:42<10:18,  6.54it/s, loss=-9.86]

2025-07-21 23:26:45,555 — INFO — layer 1, [2:6967], loss=-3.5897
2025-07-21 23:26:45,555 — INFO — layer 1, [2:6967], loss=-18.8485


epoch 2, layer 1:  64%|█████████████████████████████████████████████▏                         | 7017/11016 [17:50<09:54,  6.73it/s, loss=-3.68]

2025-07-21 23:26:53,107 — INFO — layer 1, [2:7017], loss=-18.2423
2025-07-21 23:26:53,107 — INFO — layer 1, [2:7017], loss=-13.6359


epoch 2, layer 1:  64%|█████████████████████████████████████████████▌                         | 7067/11016 [17:57<10:22,  6.34it/s, loss=-7.12]

2025-07-21 23:27:00,697 — INFO — layer 1, [2:7067], loss=-19.1553
2025-07-21 23:27:00,697 — INFO — layer 1, [2:7067], loss=-14.4179


epoch 2, layer 1:  65%|█████████████████████████████████████████████▊                         | 7117/11016 [18:05<10:12,  6.37it/s, loss=-9.91]

2025-07-21 23:27:08,263 — INFO — layer 1, [2:7117], loss=-22.9535
2025-07-21 23:27:08,263 — INFO — layer 1, [2:7117], loss=-17.3188


epoch 2, layer 1:  65%|██████████████████████████████████████████████▏                        | 7167/11016 [18:12<09:57,  6.44it/s, loss=-8.03]

2025-07-21 23:27:15,903 — INFO — layer 1, [2:7167], loss=-12.0679
2025-07-21 23:27:15,903 — INFO — layer 1, [2:7167], loss=-22.8820


epoch 2, layer 1:  66%|██████████████████████████████████████████████▌                        | 7217/11016 [18:20<09:29,  6.67it/s, loss=-14.2]

2025-07-21 23:27:23,551 — INFO — layer 1, [2:7217], loss=-16.7928
2025-07-21 23:27:23,551 — INFO — layer 1, [2:7217], loss=-5.4396


epoch 2, layer 1:  66%|██████████████████████████████████████████████▊                        | 7267/11016 [18:28<09:55,  6.30it/s, loss=-14.4]

2025-07-21 23:27:31,264 — INFO — layer 1, [2:7267], loss=-9.6008
2025-07-21 23:27:31,264 — INFO — layer 1, [2:7267], loss=-13.6267


epoch 2, layer 1:  66%|██████████████████████████████████████████████▍                       | 7317/11016 [18:36<09:07,  6.76it/s, loss=-0.901]

2025-07-21 23:27:38,931 — INFO — layer 1, [2:7317], loss=-13.9163
2025-07-21 23:27:38,931 — INFO — layer 1, [2:7317], loss=-18.4796


epoch 2, layer 1:  67%|███████████████████████████████████████████████▍                       | 7367/11016 [18:43<09:13,  6.59it/s, loss=-10.8]

2025-07-21 23:27:46,608 — INFO — layer 1, [2:7367], loss=-20.9594
2025-07-21 23:27:46,608 — INFO — layer 1, [2:7367], loss=-10.2329


epoch 2, layer 1:  67%|███████████████████████████████████████████████▊                       | 7417/11016 [18:51<10:02,  5.98it/s, loss=-10.3]

2025-07-21 23:27:54,249 — INFO — layer 1, [2:7417], loss=-9.6372
2025-07-21 23:27:54,249 — INFO — layer 1, [2:7417], loss=-10.0417


epoch 2, layer 1:  68%|████████████████████████████████████████████████▏                      | 7467/11016 [18:59<09:01,  6.55it/s, loss=-15.6]

2025-07-21 23:28:01,985 — INFO — layer 1, [2:7467], loss=-15.1110
2025-07-21 23:28:01,985 — INFO — layer 1, [2:7467], loss=-8.1596


epoch 2, layer 1:  68%|████████████████████████████████████████████████▍                      | 7517/11016 [19:06<08:48,  6.63it/s, loss=-14.1]

2025-07-21 23:28:09,723 — INFO — layer 1, [2:7517], loss=-8.8439
2025-07-21 23:28:09,723 — INFO — layer 1, [2:7517], loss=-9.6033


epoch 2, layer 1:  69%|████████████████████████████████████████████████▊                      | 7567/11016 [19:14<08:52,  6.48it/s, loss=-17.9]

2025-07-21 23:28:17,322 — INFO — layer 1, [2:7567], loss=-11.5285
2025-07-21 23:28:17,322 — INFO — layer 1, [2:7567], loss=-14.9103


epoch 2, layer 1:  69%|█████████████████████████████████████████████████                      | 7617/11016 [19:22<08:54,  6.36it/s, loss=-18.1]

2025-07-21 23:28:25,008 — INFO — layer 1, [2:7617], loss=-15.8672
2025-07-21 23:28:25,008 — INFO — layer 1, [2:7617], loss=-21.7550


epoch 2, layer 1:  70%|█████████████████████████████████████████████████▍                     | 7667/11016 [19:29<08:30,  6.56it/s, loss=-7.26]

2025-07-21 23:28:32,582 — INFO — layer 1, [2:7667], loss=-8.8530
2025-07-21 23:28:32,582 — INFO — layer 1, [2:7667], loss=-10.8989


epoch 2, layer 1:  70%|█████████████████████████████████████████████████▋                     | 7717/11016 [19:37<08:13,  6.68it/s, loss=-10.8]

2025-07-21 23:28:40,143 — INFO — layer 1, [2:7717], loss=-14.9050
2025-07-21 23:28:40,143 — INFO — layer 1, [2:7717], loss=-18.3176


epoch 2, layer 1:  71%|██████████████████████████████████████████████████                     | 7767/11016 [19:44<08:08,  6.64it/s, loss=-12.5]

2025-07-21 23:28:47,872 — INFO — layer 1, [2:7767], loss=-11.4456
2025-07-21 23:28:47,871 — INFO — layer 1, [2:7767], loss=-10.0723


epoch 2, layer 1:  71%|██████████████████████████████████████████████████▍                    | 7817/11016 [19:52<08:06,  6.57it/s, loss=-1.42]

2025-07-21 23:28:55,488 — INFO — layer 1, [2:7817], loss=-12.7612
2025-07-21 23:28:55,488 — INFO — layer 1, [2:7817], loss=-14.4509


epoch 2, layer 1:  71%|██████████████████████████████████████████████████▋                    | 7867/11016 [20:00<07:46,  6.74it/s, loss=-10.3]

2025-07-21 23:29:03,048 — INFO — layer 1, [2:7867], loss=-4.5462
2025-07-21 23:29:03,048 — INFO — layer 1, [2:7867], loss=-2.9550


epoch 2, layer 1:  72%|███████████████████████████████████████████████████                    | 7917/11016 [20:07<08:15,  6.25it/s, loss=-2.82]

2025-07-21 23:29:10,722 — INFO — layer 1, [2:7917], loss=-1.8467
2025-07-21 23:29:10,722 — INFO — layer 1, [2:7917], loss=-12.1851


epoch 2, layer 1:  72%|███████████████████████████████████████████████████▎                   | 7967/11016 [20:15<07:58,  6.38it/s, loss=-25.6]

2025-07-21 23:29:18,292 — INFO — layer 1, [2:7967], loss=-16.9435
2025-07-21 23:29:18,292 — INFO — layer 1, [2:7967], loss=-19.4541


epoch 2, layer 1:  73%|███████████████████████████████████████████████████▋                   | 8017/11016 [20:23<07:33,  6.61it/s, loss=-6.55]

2025-07-21 23:29:25,929 — INFO — layer 1, [2:8017], loss=-9.1502
2025-07-21 23:29:25,930 — INFO — layer 1, [2:8017], loss=-16.6245


epoch 2, layer 1:  73%|███████████████████████████████████████████████████▉                   | 8067/11016 [20:30<07:27,  6.60it/s, loss=-17.6]

2025-07-21 23:29:33,379 — INFO — layer 1, [2:8067], loss=-18.3223
2025-07-21 23:29:33,379 — INFO — layer 1, [2:8067], loss=-6.9092


epoch 2, layer 1:  74%|████████████████████████████████████████████████████▎                  | 8117/11016 [20:38<07:24,  6.53it/s, loss=-8.05]

2025-07-21 23:29:40,973 — INFO — layer 1, [2:8117], loss=-10.7777
2025-07-21 23:29:40,973 — INFO — layer 1, [2:8117], loss=-14.7562


epoch 2, layer 1:  74%|████████████████████████████████████████████████████▋                  | 8167/11016 [20:45<07:07,  6.66it/s, loss=-12.3]

2025-07-21 23:29:48,597 — INFO — layer 1, [2:8167], loss=-15.9588
2025-07-21 23:29:48,597 — INFO — layer 1, [2:8167], loss=-15.6950


epoch 2, layer 1:  75%|████████████████████████████████████████████████████▉                  | 8217/11016 [20:53<06:58,  6.69it/s, loss=-14.7]

2025-07-21 23:29:56,169 — INFO — layer 1, [2:8217], loss=-16.3929
2025-07-21 23:29:56,169 — INFO — layer 1, [2:8217], loss=-15.3726


epoch 2, layer 1:  75%|█████████████████████████████████████████████████████▎                 | 8267/11016 [21:00<06:55,  6.62it/s, loss=-14.5]

2025-07-21 23:30:03,864 — INFO — layer 1, [2:8267], loss=-20.0054
2025-07-21 23:30:03,864 — INFO — layer 1, [2:8267], loss=-14.5753


epoch 2, layer 1:  75%|█████████████████████████████████████████████████████▌                 | 8317/11016 [21:08<07:04,  6.36it/s, loss=-20.9]

2025-07-21 23:30:11,404 — INFO — layer 1, [2:8317], loss=-9.4160
2025-07-21 23:30:11,404 — INFO — layer 1, [2:8317], loss=-15.5296


epoch 2, layer 1:  76%|█████████████████████████████████████████████████████▉                 | 8367/11016 [21:16<06:41,  6.59it/s, loss=-12.1]

2025-07-21 23:30:18,976 — INFO — layer 1, [2:8367], loss=-15.2285
2025-07-21 23:30:18,976 — INFO — layer 1, [2:8367], loss=-25.8349


epoch 2, layer 1:  76%|██████████████████████████████████████████████████████▏                | 8417/11016 [21:23<06:36,  6.55it/s, loss=-14.4]

2025-07-21 23:30:26,510 — INFO — layer 1, [2:8417], loss=-9.8546
2025-07-21 23:30:26,510 — INFO — layer 1, [2:8417], loss=-23.7385


epoch 2, layer 1:  77%|██████████████████████████████████████████████████████▌                | 8467/11016 [21:31<06:17,  6.75it/s, loss=-5.93]

2025-07-21 23:30:34,028 — INFO — layer 1, [2:8467], loss=-19.9804
2025-07-21 23:30:34,028 — INFO — layer 1, [2:8467], loss=-17.4737


epoch 2, layer 1:  77%|██████████████████████████████████████████████████████▉                | 8517/11016 [21:38<06:23,  6.52it/s, loss=-12.8]

2025-07-21 23:30:41,565 — INFO — layer 1, [2:8517], loss=-10.1083
2025-07-21 23:30:41,565 — INFO — layer 1, [2:8517], loss=-10.9317


epoch 2, layer 1:  78%|███████████████████████████████████████████████████████▏               | 8567/11016 [21:46<06:17,  6.48it/s, loss=-10.6]

2025-07-21 23:30:49,106 — INFO — layer 1, [2:8567], loss=-7.0331
2025-07-21 23:30:49,106 — INFO — layer 1, [2:8567], loss=-10.1778


epoch 2, layer 1:  78%|███████████████████████████████████████████████████████▌               | 8617/11016 [21:53<05:54,  6.76it/s, loss=-6.32]

2025-07-21 23:30:56,619 — INFO — layer 1, [2:8617], loss=-21.1791
2025-07-21 23:30:56,619 — INFO — layer 1, [2:8617], loss=-14.1926


epoch 2, layer 1:  79%|███████████████████████████████████████████████████████▊               | 8667/11016 [22:01<05:59,  6.54it/s, loss=-13.2]

2025-07-21 23:31:04,116 — INFO — layer 1, [2:8667], loss=-17.5861
2025-07-21 23:31:04,116 — INFO — layer 1, [2:8667], loss=-20.5142


epoch 2, layer 1:  79%|████████████████████████████████████████████████████████▏              | 8717/11016 [22:08<05:42,  6.72it/s, loss=-12.4]

2025-07-21 23:31:11,740 — INFO — layer 1, [2:8717], loss=-14.4158
2025-07-21 23:31:11,740 — INFO — layer 1, [2:8717], loss=-25.5077


epoch 2, layer 1:  80%|████████████████████████████████████████████████████████▌              | 8767/11016 [22:16<05:46,  6.49it/s, loss=-7.04]

2025-07-21 23:31:19,385 — INFO — layer 1, [2:8767], loss=-7.7990
2025-07-21 23:31:19,385 — INFO — layer 1, [2:8767], loss=-14.1195


epoch 2, layer 1:  80%|████████████████████████████████████████████████████████▊              | 8817/11016 [22:24<05:47,  6.32it/s, loss=-7.46]

2025-07-21 23:31:26,990 — INFO — layer 1, [2:8817], loss=-17.4323
2025-07-21 23:31:26,990 — INFO — layer 1, [2:8817], loss=-11.2506


epoch 2, layer 1:  80%|█████████████████████████████████████████████████████████▏             | 8867/11016 [22:31<05:36,  6.38it/s, loss=-3.79]

2025-07-21 23:31:34,712 — INFO — layer 1, [2:8867], loss=-12.4958
2025-07-21 23:31:34,712 — INFO — layer 1, [2:8867], loss=-10.1335


epoch 2, layer 1:  81%|█████████████████████████████████████████████████████████▍             | 8917/11016 [22:39<04:53,  7.14it/s, loss=-27.5]

2025-07-21 23:31:42,298 — INFO — layer 1, [2:8917], loss=-17.2313
2025-07-21 23:31:42,298 — INFO — layer 1, [2:8917], loss=-3.7663


epoch 2, layer 1:  81%|███████████████████████████████████████████████████████████▍             | 8967/11016 [22:47<05:04,  6.72it/s, loss=-16]

2025-07-21 23:31:50,042 — INFO — layer 1, [2:8967], loss=-14.7802
2025-07-21 23:31:50,043 — INFO — layer 1, [2:8967], loss=-14.1052


epoch 2, layer 1:  82%|██████████████████████████████████████████████████████████             | 9017/11016 [22:54<05:07,  6.49it/s, loss=-15.4]

2025-07-21 23:31:57,596 — INFO — layer 1, [2:9017], loss=-10.6158
2025-07-21 23:31:57,596 — INFO — layer 1, [2:9017], loss=-11.8139


epoch 2, layer 1:  82%|██████████████████████████████████████████████████████████▍            | 9067/11016 [23:02<04:57,  6.55it/s, loss=-12.1]

2025-07-21 23:32:05,319 — INFO — layer 1, [2:9067], loss=-11.9348
2025-07-21 23:32:05,319 — INFO — layer 1, [2:9067], loss=-2.3000


epoch 2, layer 1:  83%|██████████████████████████████████████████████████████████▊            | 9117/11016 [23:10<04:41,  6.74it/s, loss=-8.57]

2025-07-21 23:32:12,930 — INFO — layer 1, [2:9117], loss=-21.3532
2025-07-21 23:32:12,930 — INFO — layer 1, [2:9117], loss=-9.4144


epoch 2, layer 1:  83%|███████████████████████████████████████████████████████████            | 9167/11016 [23:17<04:58,  6.20it/s, loss=-19.7]

2025-07-21 23:32:20,742 — INFO — layer 1, [2:9167], loss=-0.4943
2025-07-21 23:32:20,742 — INFO — layer 1, [2:9167], loss=-6.1754


epoch 2, layer 1:  84%|███████████████████████████████████████████████████████████▍           | 9217/11016 [23:25<04:36,  6.50it/s, loss=-5.94]

2025-07-21 23:32:28,363 — INFO — layer 1, [2:9217], loss=-14.1970
2025-07-21 23:32:28,363 — INFO — layer 1, [2:9217], loss=-12.4045


epoch 2, layer 1:  84%|███████████████████████████████████████████████████████████▋           | 9267/11016 [23:33<04:11,  6.95it/s, loss=-12.7]

2025-07-21 23:32:36,014 — INFO — layer 1, [2:9267], loss=-13.8292
2025-07-21 23:32:36,014 — INFO — layer 1, [2:9267], loss=-5.7566


epoch 2, layer 1:  85%|████████████████████████████████████████████████████████████           | 9317/11016 [23:40<04:10,  6.79it/s, loss=-14.7]

2025-07-21 23:32:43,784 — INFO — layer 1, [2:9317], loss=-17.3578
2025-07-21 23:32:43,784 — INFO — layer 1, [2:9317], loss=-13.4467


epoch 2, layer 1:  85%|████████████████████████████████████████████████████████████▎          | 9367/11016 [23:48<04:10,  6.57it/s, loss=-7.04]

2025-07-21 23:32:51,365 — INFO — layer 1, [2:9367], loss=-15.0209
2025-07-21 23:32:51,365 — INFO — layer 1, [2:9367], loss=-22.8555


epoch 2, layer 1:  85%|████████████████████████████████████████████████████████████▋          | 9417/11016 [23:56<04:33,  5.85it/s, loss=-15.2]

2025-07-21 23:32:58,989 — INFO — layer 1, [2:9417], loss=-19.5871
2025-07-21 23:32:58,989 — INFO — layer 1, [2:9417], loss=-14.6664


epoch 2, layer 1:  86%|█████████████████████████████████████████████████████████████          | 9467/11016 [24:03<04:06,  6.28it/s, loss=-13.4]

2025-07-21 23:33:06,593 — INFO — layer 1, [2:9467], loss=-3.2345
2025-07-21 23:33:06,593 — INFO — layer 1, [2:9467], loss=-10.7143


epoch 2, layer 1:  86%|█████████████████████████████████████████████████████████████▎         | 9517/11016 [24:11<03:46,  6.61it/s, loss=-21.3]

2025-07-21 23:33:14,130 — INFO — layer 1, [2:9517], loss=-9.8292
2025-07-21 23:33:14,130 — INFO — layer 1, [2:9517], loss=-13.7870


epoch 2, layer 1:  87%|█████████████████████████████████████████████████████████████▋         | 9567/11016 [24:18<03:40,  6.58it/s, loss=-20.2]

2025-07-21 23:33:21,682 — INFO — layer 1, [2:9567], loss=-7.3069
2025-07-21 23:33:21,682 — INFO — layer 1, [2:9567], loss=-1.9970


epoch 2, layer 1:  87%|██████████████████████████████████████████████████████████████▊         | 9617/11016 [24:26<03:50,  6.08it/s, loss=1.79]

2025-07-21 23:33:29,443 — INFO — layer 1, [2:9617], loss=-20.4352
2025-07-21 23:33:29,443 — INFO — layer 1, [2:9617], loss=-13.7717


epoch 2, layer 1:  88%|██████████████████████████████████████████████████████████████▎        | 9667/11016 [24:34<03:30,  6.42it/s, loss=-13.9]

2025-07-21 23:33:37,224 — INFO — layer 1, [2:9667], loss=-2.3561
2025-07-21 23:33:37,224 — INFO — layer 1, [2:9667], loss=-24.2706


epoch 2, layer 1:  88%|████████████████████████████████████████████████████████████████▍        | 9717/11016 [24:41<03:17,  6.59it/s, loss=-16]

2025-07-21 23:33:44,818 — INFO — layer 1, [2:9717], loss=-21.4167
2025-07-21 23:33:44,818 — INFO — layer 1, [2:9717], loss=-11.2744


epoch 2, layer 1:  89%|██████████████████████████████████████████████████████████████▉        | 9767/11016 [24:49<03:01,  6.87it/s, loss=-15.2]

2025-07-21 23:33:52,443 — INFO — layer 1, [2:9767], loss=-9.5820
2025-07-21 23:33:52,443 — INFO — layer 1, [2:9767], loss=-5.0280


epoch 2, layer 1:  89%|███████████████████████████████████████████████████████████████▎       | 9817/11016 [24:57<03:15,  6.15it/s, loss=-4.56]

2025-07-21 23:34:00,240 — INFO — layer 1, [2:9817], loss=-23.6764
2025-07-21 23:34:00,240 — INFO — layer 1, [2:9817], loss=-23.7722


epoch 2, layer 1:  90%|███████████████████████████████████████████████████████████████▌       | 9867/11016 [25:04<03:04,  6.22it/s, loss=-19.9]

2025-07-21 23:34:07,899 — INFO — layer 1, [2:9867], loss=-4.6311
2025-07-21 23:34:07,899 — INFO — layer 1, [2:9867], loss=-12.3835


epoch 2, layer 1:  90%|███████████████████████████████████████████████████████████████▉       | 9917/11016 [25:12<02:40,  6.83it/s, loss=-9.29]

2025-07-21 23:34:15,547 — INFO — layer 1, [2:9917], loss=-22.7899
2025-07-21 23:34:15,547 — INFO — layer 1, [2:9917], loss=-12.0648


epoch 2, layer 1:  90%|████████████████████████████████████████████████████████████████▏      | 9967/11016 [25:20<02:39,  6.56it/s, loss=-14.4]

2025-07-21 23:34:23,123 — INFO — layer 1, [2:9967], loss=-15.0335
2025-07-21 23:34:23,123 — INFO — layer 1, [2:9967], loss=-18.2004


epoch 2, layer 1:  91%|█████████████████████████████████████████████████████████████████▍      | 10017/11016 [25:27<02:30,  6.62it/s, loss=-14]

2025-07-21 23:34:30,662 — INFO — layer 1, [2:10017], loss=-5.8824
2025-07-21 23:34:30,662 — INFO — layer 1, [2:10017], loss=-14.3764


epoch 2, layer 1:  91%|███████████████████████████████████████████████████████████████▉      | 10067/11016 [25:35<02:21,  6.71it/s, loss=-12.4]

2025-07-21 23:34:38,195 — INFO — layer 1, [2:10067], loss=-14.5496
2025-07-21 23:34:38,195 — INFO — layer 1, [2:10067], loss=-10.2165


epoch 2, layer 1:  92%|████████████████████████████████████████████████████████████████▎     | 10117/11016 [25:42<02:14,  6.69it/s, loss=-13.6]

2025-07-21 23:34:45,885 — INFO — layer 1, [2:10117], loss=-9.5261
2025-07-21 23:34:45,885 — INFO — layer 1, [2:10117], loss=-18.3392


epoch 2, layer 1:  92%|████████████████████████████████████████████████████████████████▌     | 10167/11016 [25:50<02:03,  6.89it/s, loss=-14.6]

2025-07-21 23:34:53,480 — INFO — layer 1, [2:10167], loss=-4.6511
2025-07-21 23:34:53,480 — INFO — layer 1, [2:10167], loss=-15.2863


epoch 2, layer 1:  93%|████████████████████████████████████████████████████████████████▉     | 10217/11016 [25:58<02:00,  6.61it/s, loss=-17.9]

2025-07-21 23:35:01,107 — INFO — layer 1, [2:10217], loss=-11.7033
2025-07-21 23:35:01,107 — INFO — layer 1, [2:10217], loss=-17.5709


epoch 2, layer 1:  93%|█████████████████████████████████████████████████████████████████▏    | 10267/11016 [26:05<01:52,  6.65it/s, loss=-4.88]

2025-07-21 23:35:08,740 — INFO — layer 1, [2:10267], loss=-7.1286
2025-07-21 23:35:08,740 — INFO — layer 1, [2:10267], loss=-10.4167


epoch 2, layer 1:  94%|█████████████████████████████████████████████████████████████████▌    | 10317/11016 [26:13<01:44,  6.69it/s, loss=-11.8]

2025-07-21 23:35:16,324 — INFO — layer 1, [2:10317], loss=-14.2464
2025-07-21 23:35:16,324 — INFO — layer 1, [2:10317], loss=-16.8746


epoch 2, layer 1:  94%|█████████████████████████████████████████████████████████████████▉    | 10367/11016 [26:21<01:34,  6.84it/s, loss=-19.8]

2025-07-21 23:35:23,976 — INFO — layer 1, [2:10367], loss=-26.6551
2025-07-21 23:35:23,976 — INFO — layer 1, [2:10367], loss=-23.9313


epoch 2, layer 1:  95%|██████████████████████████████████████████████████████████████████▏   | 10417/11016 [26:28<01:32,  6.45it/s, loss=-7.55]

2025-07-21 23:35:31,672 — INFO — layer 1, [2:10417], loss=-9.7742
2025-07-21 23:35:31,673 — INFO — layer 1, [2:10417], loss=-25.7250


epoch 2, layer 1:  95%|██████████████████████████████████████████████████████████████████▌   | 10467/11016 [26:36<01:20,  6.81it/s, loss=-11.2]

2025-07-21 23:35:39,087 — INFO — layer 1, [2:10467], loss=-16.0532
2025-07-21 23:35:39,087 — INFO — layer 1, [2:10467], loss=-5.6300


epoch 2, layer 1:  95%|██████████████████████████████████████████████████████████████████▊   | 10517/11016 [26:43<01:16,  6.55it/s, loss=-11.8]

2025-07-21 23:35:46,723 — INFO — layer 1, [2:10517], loss=-14.4774
2025-07-21 23:35:46,723 — INFO — layer 1, [2:10517], loss=-20.0606


epoch 2, layer 1:  96%|███████████████████████████████████████████████████████████████████▏  | 10567/11016 [26:51<01:05,  6.84it/s, loss=-15.9]

2025-07-21 23:35:54,269 — INFO — layer 1, [2:10567], loss=-2.2479
2025-07-21 23:35:54,269 — INFO — layer 1, [2:10567], loss=-26.1947


epoch 2, layer 1:  96%|█████████████████████████████████████████████████████████████████████▍  | 10617/11016 [26:58<00:56,  7.02it/s, loss=-25]

2025-07-21 23:36:01,703 — INFO — layer 1, [2:10617], loss=-6.2499
2025-07-21 23:36:01,703 — INFO — layer 1, [2:10617], loss=-9.3500


epoch 2, layer 1:  97%|███████████████████████████████████████████████████████████████████▊  | 10667/11016 [27:06<00:52,  6.63it/s, loss=-21.8]

2025-07-21 23:36:09,254 — INFO — layer 1, [2:10667], loss=-22.5934
2025-07-21 23:36:09,254 — INFO — layer 1, [2:10667], loss=-25.0203


epoch 2, layer 1:  97%|████████████████████████████████████████████████████████████████████  | 10717/11016 [27:13<00:47,  6.35it/s, loss=-14.4]

2025-07-21 23:36:16,867 — INFO — layer 1, [2:10717], loss=-15.7810
2025-07-21 23:36:16,867 — INFO — layer 1, [2:10717], loss=-14.8326


epoch 2, layer 1:  98%|████████████████████████████████████████████████████████████████████▍ | 10767/11016 [27:21<00:38,  6.53it/s, loss=-14.1]

2025-07-21 23:36:24,567 — INFO — layer 1, [2:10767], loss=-14.9749
2025-07-21 23:36:24,567 — INFO — layer 1, [2:10767], loss=-10.5376


epoch 2, layer 1:  98%|████████████████████████████████████████████████████████████████████▋ | 10817/11016 [27:29<00:29,  6.79it/s, loss=-8.66]

2025-07-21 23:36:32,060 — INFO — layer 1, [2:10817], loss=-2.9428
2025-07-21 23:36:32,060 — INFO — layer 1, [2:10817], loss=-25.3867


epoch 2, layer 1:  99%|██████████████████████████████████████████████████████████████████████ | 10867/11016 [27:36<00:23,  6.40it/s, loss=-9.9]

2025-07-21 23:36:39,630 — INFO — layer 1, [2:10867], loss=-13.8537
2025-07-21 23:36:39,630 — INFO — layer 1, [2:10867], loss=-11.2623


epoch 2, layer 1:  99%|█████████████████████████████████████████████████████████████████████▎| 10917/11016 [27:44<00:14,  6.61it/s, loss=-20.8]

2025-07-21 23:36:47,244 — INFO — layer 1, [2:10917], loss=-9.6883
2025-07-21 23:36:47,244 — INFO — layer 1, [2:10917], loss=-18.2578


epoch 2, layer 1: 100%|█████████████████████████████████████████████████████████████████████▋| 10967/11016 [27:52<00:07,  6.16it/s, loss=-12.3]

2025-07-21 23:36:54,991 — INFO — layer 1, [2:10967], loss=-10.9056
2025-07-21 23:36:54,991 — INFO — layer 1, [2:10967], loss=-12.7830


                                                                                                                                               

2025-07-21 23:37:02,363 — INFO — Training decomposer for layer 2
2025-07-21 23:37:02,459 — INFO — Checkpoint for layer 1 saved to checkpoints/decomposer_simple/decomposer_layer1_20250721_204859_8666ba39-8c81-4293-8a3f-714efcc5f6f9
2025-07-21 23:37:02,461 — INFO — Training decomposer for layer 2


epoch 0, layer 2:   0%|                                                                                              | 0/11016 [00:00<?, ?it/s]huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
huggingf

2025-07-21 23:37:10,569 — INFO — layer 2, [0:49], loss=387.7661
2025-07-21 23:37:10,570 — INFO — layer 2, [0:49], loss=359.9011


epoch 0, layer 2:   1%|▋                                                                          | 99/11016 [00:15<27:29,  6.62it/s, loss=833]

2025-07-21 23:37:18,259 — INFO — layer 2, [0:99], loss=1496.3893
2025-07-21 23:37:18,259 — INFO — layer 2, [0:99], loss=722.0793


epoch 0, layer 2:   1%|█                                                                         | 149/11016 [00:23<26:45,  6.77it/s, loss=983]

2025-07-21 23:37:25,992 — INFO — layer 2, [0:149], loss=676.1331
2025-07-21 23:37:25,992 — INFO — layer 2, [0:149], loss=1580.4630


epoch 0, layer 2:   2%|█▎                                                                       | 199/11016 [00:30<26:38,  6.77it/s, loss=35.9]

2025-07-21 23:37:33,582 — INFO — layer 2, [0:199], loss=31.6968
2025-07-21 23:37:33,582 — INFO — layer 2, [0:199], loss=27.5235


epoch 0, layer 2:   2%|█▋                                                                       | 249/11016 [00:38<27:09,  6.61it/s, loss=41.2]

2025-07-21 23:37:41,191 — INFO — layer 2, [0:249], loss=38.1237
2025-07-21 23:37:41,191 — INFO — layer 2, [0:249], loss=35.0885


epoch 0, layer 2:   3%|█▉                                                                       | 299/11016 [00:46<26:36,  6.71it/s, loss=21.6]

2025-07-21 23:37:48,823 — INFO — layer 2, [0:299], loss=21.2789
2025-07-21 23:37:48,823 — INFO — layer 2, [0:299], loss=21.2862


epoch 0, layer 2:   3%|██▎                                                                       | 349/11016 [00:53<28:19,  6.28it/s, loss=124]

2025-07-21 23:37:56,564 — INFO — layer 2, [0:349], loss=177.2069
2025-07-21 23:37:56,564 — INFO — layer 2, [0:349], loss=117.9856


epoch 0, layer 2:   4%|██▋                                                                      | 399/11016 [01:01<28:30,  6.21it/s, loss=54.7]

2025-07-21 23:38:04,320 — INFO — layer 2, [0:399], loss=56.0841
2025-07-21 23:38:04,320 — INFO — layer 2, [0:399], loss=69.0899


epoch 0, layer 2:   4%|███                                                                       | 449/11016 [01:09<26:02,  6.76it/s, loss=141]

2025-07-21 23:38:11,963 — INFO — layer 2, [0:449], loss=176.4581
2025-07-21 23:38:11,963 — INFO — layer 2, [0:449], loss=87.7908


epoch 0, layer 2:   5%|███▎                                                                     | 499/11016 [01:16<26:14,  6.68it/s, loss=61.4]

2025-07-21 23:38:19,501 — INFO — layer 2, [0:499], loss=25.3952
2025-07-21 23:38:19,501 — INFO — layer 2, [0:499], loss=51.3431


epoch 0, layer 2:   5%|███▋                                                                      | 549/11016 [01:24<26:14,  6.65it/s, loss=111]

2025-07-21 23:38:26,995 — INFO — layer 2, [0:549], loss=160.3794
2025-07-21 23:38:26,995 — INFO — layer 2, [0:549], loss=118.4962


epoch 0, layer 2:   5%|████                                                                      | 599/11016 [01:31<25:27,  6.82it/s, loss=100]

2025-07-21 23:38:34,526 — INFO — layer 2, [0:599], loss=64.3706
2025-07-21 23:38:34,526 — INFO — layer 2, [0:599], loss=108.6823


epoch 0, layer 2:   6%|████▎                                                                    | 649/11016 [01:39<26:42,  6.47it/s, loss=19.6]

2025-07-21 23:38:42,266 — INFO — layer 2, [0:649], loss=20.0052
2025-07-21 23:38:42,266 — INFO — layer 2, [0:649], loss=20.1369


epoch 0, layer 2:   6%|████▋                                                                    | 699/11016 [01:47<25:51,  6.65it/s, loss=23.8]

2025-07-21 23:38:49,923 — INFO — layer 2, [0:699], loss=27.5075
2025-07-21 23:38:49,923 — INFO — layer 2, [0:699], loss=24.9472


epoch 0, layer 2:   7%|████▉                                                                    | 749/11016 [01:54<25:29,  6.71it/s, loss=41.7]

2025-07-21 23:38:57,521 — INFO — layer 2, [0:749], loss=35.8299
2025-07-21 23:38:57,521 — INFO — layer 2, [0:749], loss=28.8056


epoch 0, layer 2:   7%|█████▎                                                                   | 799/11016 [02:02<26:02,  6.54it/s, loss=21.7]

2025-07-21 23:39:05,071 — INFO — layer 2, [0:799], loss=20.9255
2025-07-21 23:39:05,071 — INFO — layer 2, [0:799], loss=25.8353


epoch 0, layer 2:   8%|█████▋                                                                   | 849/11016 [02:09<25:54,  6.54it/s, loss=18.8]

2025-07-21 23:39:12,444 — INFO — layer 2, [0:849], loss=18.4226
2025-07-21 23:39:12,444 — INFO — layer 2, [0:849], loss=18.7764


epoch 0, layer 2:   8%|█████▉                                                                   | 899/11016 [02:17<26:19,  6.40it/s, loss=33.9]

2025-07-21 23:39:20,220 — INFO — layer 2, [0:899], loss=34.6712
2025-07-21 23:39:20,220 — INFO — layer 2, [0:899], loss=22.5563


epoch 0, layer 2:   9%|██████▎                                                                  | 949/11016 [02:25<26:26,  6.34it/s, loss=33.4]

2025-07-21 23:39:28,008 — INFO — layer 2, [0:949], loss=26.6638
2025-07-21 23:39:28,009 — INFO — layer 2, [0:949], loss=33.0291


epoch 0, layer 2:   9%|██████▌                                                                  | 999/11016 [02:32<24:03,  6.94it/s, loss=18.3]

2025-07-21 23:39:35,512 — INFO — layer 2, [0:999], loss=17.7801
2025-07-21 23:39:35,512 — INFO — layer 2, [0:999], loss=17.3059


epoch 0, layer 2:  10%|██████▊                                                                 | 1049/11016 [02:40<25:15,  6.58it/s, loss=47.2]

2025-07-21 23:39:43,101 — INFO — layer 2, [0:1049], loss=27.5548
2025-07-21 23:39:43,101 — INFO — layer 2, [0:1049], loss=27.4581


epoch 0, layer 2:  10%|███████▏                                                                | 1099/11016 [02:47<25:31,  6.48it/s, loss=48.2]

2025-07-21 23:39:50,635 — INFO — layer 2, [0:1099], loss=28.7673
2025-07-21 23:39:50,635 — INFO — layer 2, [0:1099], loss=26.0081


epoch 0, layer 2:  10%|███████▋                                                                  | 1149/11016 [02:55<24:53,  6.61it/s, loss=55]

2025-07-21 23:39:58,112 — INFO — layer 2, [0:1149], loss=57.7039
2025-07-21 23:39:58,112 — INFO — layer 2, [0:1149], loss=44.7530


epoch 0, layer 2:  11%|███████▊                                                                | 1199/11016 [03:02<23:46,  6.88it/s, loss=41.3]

2025-07-21 23:40:05,666 — INFO — layer 2, [0:1199], loss=48.2872
2025-07-21 23:40:05,666 — INFO — layer 2, [0:1199], loss=51.0189


epoch 0, layer 2:  11%|████████▍                                                                 | 1249/11016 [03:10<24:00,  6.78it/s, loss=20]

2025-07-21 23:40:13,324 — INFO — layer 2, [0:1249], loss=19.9596
2025-07-21 23:40:13,324 — INFO — layer 2, [0:1249], loss=18.4246


epoch 0, layer 2:  12%|████████▍                                                               | 1299/11016 [03:18<26:05,  6.21it/s, loss=21.5]

2025-07-21 23:40:20,864 — INFO — layer 2, [0:1299], loss=16.1491
2025-07-21 23:40:20,864 — INFO — layer 2, [0:1299], loss=19.8837


epoch 0, layer 2:  12%|█████████                                                                 | 1349/11016 [03:25<25:07,  6.41it/s, loss=18]

2025-07-21 23:40:28,551 — INFO — layer 2, [0:1349], loss=18.3908
2025-07-21 23:40:28,551 — INFO — layer 2, [0:1349], loss=18.6124


epoch 0, layer 2:  13%|█████████▏                                                              | 1399/11016 [03:33<23:03,  6.95it/s, loss=16.6]

2025-07-21 23:40:36,168 — INFO — layer 2, [0:1399], loss=17.2183
2025-07-21 23:40:36,168 — INFO — layer 2, [0:1399], loss=16.7683


epoch 0, layer 2:  13%|█████████▍                                                              | 1449/11016 [03:41<23:20,  6.83it/s, loss=36.5]

2025-07-21 23:40:43,799 — INFO — layer 2, [0:1449], loss=59.6929
2025-07-21 23:40:43,799 — INFO — layer 2, [0:1449], loss=74.6653


epoch 0, layer 2:  14%|█████████▊                                                              | 1499/11016 [03:48<22:51,  6.94it/s, loss=33.5]

2025-07-21 23:40:51,361 — INFO — layer 2, [0:1499], loss=46.5067
2025-07-21 23:40:51,361 — INFO — layer 2, [0:1499], loss=74.1083


epoch 0, layer 2:  14%|██████████                                                              | 1549/11016 [03:56<25:27,  6.20it/s, loss=32.7]

2025-07-21 23:40:58,918 — INFO — layer 2, [0:1549], loss=77.5732
2025-07-21 23:40:58,918 — INFO — layer 2, [0:1549], loss=31.3035


epoch 0, layer 2:  15%|██████████▍                                                             | 1599/11016 [04:03<24:18,  6.46it/s, loss=53.6]

2025-07-21 23:41:06,593 — INFO — layer 2, [0:1599], loss=24.5812
2025-07-21 23:41:06,593 — INFO — layer 2, [0:1599], loss=29.2907


epoch 0, layer 2:  15%|██████████▊                                                             | 1649/11016 [04:11<23:36,  6.61it/s, loss=79.8]

2025-07-21 23:41:14,231 — INFO — layer 2, [0:1649], loss=100.5672
2025-07-21 23:41:14,231 — INFO — layer 2, [0:1649], loss=111.4547


epoch 0, layer 2:  15%|███████████                                                             | 1699/11016 [04:19<23:27,  6.62it/s, loss=86.2]

2025-07-21 23:41:21,923 — INFO — layer 2, [0:1699], loss=51.9462
2025-07-21 23:41:21,923 — INFO — layer 2, [0:1699], loss=66.0751


epoch 0, layer 2:  16%|███████████▍                                                            | 1749/11016 [04:26<22:50,  6.76it/s, loss=86.8]

2025-07-21 23:41:29,487 — INFO — layer 2, [0:1749], loss=76.2864
2025-07-21 23:41:29,487 — INFO — layer 2, [0:1749], loss=45.8120


epoch 0, layer 2:  16%|███████████▊                                                            | 1799/11016 [04:34<24:12,  6.35it/s, loss=75.7]

2025-07-21 23:41:37,059 — INFO — layer 2, [0:1799], loss=42.0899
2025-07-21 23:41:37,059 — INFO — layer 2, [0:1799], loss=83.6638


epoch 0, layer 2:  17%|████████████                                                            | 1849/11016 [04:42<23:36,  6.47it/s, loss=32.4]

2025-07-21 23:41:44,830 — INFO — layer 2, [0:1849], loss=50.1895
2025-07-21 23:41:44,830 — INFO — layer 2, [0:1849], loss=59.6188


epoch 0, layer 2:  17%|████████████▊                                                             | 1899/11016 [04:49<23:12,  6.55it/s, loss=50]

2025-07-21 23:41:52,421 — INFO — layer 2, [0:1899], loss=54.5031
2025-07-21 23:41:52,421 — INFO — layer 2, [0:1899], loss=30.6588


epoch 0, layer 2:  18%|████████████▋                                                           | 1949/11016 [04:57<24:16,  6.23it/s, loss=36.9]

2025-07-21 23:42:00,050 — INFO — layer 2, [0:1949], loss=38.4078
2025-07-21 23:42:00,050 — INFO — layer 2, [0:1949], loss=48.7176


epoch 0, layer 2:  18%|█████████████                                                           | 1999/11016 [05:04<21:48,  6.89it/s, loss=34.4]

2025-07-21 23:42:07,625 — INFO — layer 2, [0:1999], loss=33.1543
2025-07-21 23:42:07,625 — INFO — layer 2, [0:1999], loss=45.0122


epoch 0, layer 2:  19%|█████████████▊                                                            | 2049/11016 [05:12<22:45,  6.57it/s, loss=21]

2025-07-21 23:42:15,250 — INFO — layer 2, [0:2049], loss=20.3117
2025-07-21 23:42:15,251 — INFO — layer 2, [0:2049], loss=18.9538


epoch 0, layer 2:  19%|█████████████▋                                                          | 2099/11016 [05:20<21:54,  6.78it/s, loss=16.7]

2025-07-21 23:42:22,973 — INFO — layer 2, [0:2099], loss=15.4737
2025-07-21 23:42:22,973 — INFO — layer 2, [0:2099], loss=18.9568


epoch 0, layer 2:  20%|██████████████                                                          | 2149/11016 [05:27<22:25,  6.59it/s, loss=15.6]

2025-07-21 23:42:30,447 — INFO — layer 2, [0:2149], loss=18.9048
2025-07-21 23:42:30,447 — INFO — layer 2, [0:2149], loss=18.9184


epoch 0, layer 2:  20%|██████████████▊                                                           | 2199/11016 [05:35<23:02,  6.38it/s, loss=24]

2025-07-21 23:42:38,008 — INFO — layer 2, [0:2199], loss=20.0286
2025-07-21 23:42:38,008 — INFO — layer 2, [0:2199], loss=16.8208


epoch 0, layer 2:  20%|██████████████▋                                                         | 2249/11016 [05:42<22:33,  6.48it/s, loss=32.8]

2025-07-21 23:42:45,591 — INFO — layer 2, [0:2249], loss=25.5432
2025-07-21 23:42:45,591 — INFO — layer 2, [0:2249], loss=26.1533


epoch 0, layer 2:  21%|███████████████                                                         | 2299/11016 [05:50<24:18,  5.98it/s, loss=44.5]

2025-07-21 23:42:53,323 — INFO — layer 2, [0:2299], loss=30.2846
2025-07-21 23:42:53,323 — INFO — layer 2, [0:2299], loss=32.6000


epoch 0, layer 2:  21%|███████████████▊                                                          | 2349/11016 [05:58<20:56,  6.90it/s, loss=20]

2025-07-21 23:43:00,897 — INFO — layer 2, [0:2349], loss=20.6142
2025-07-21 23:43:00,897 — INFO — layer 2, [0:2349], loss=28.4133


epoch 0, layer 2:  22%|████████████████                                                          | 2399/11016 [06:05<20:38,  6.96it/s, loss=21]

2025-07-21 23:43:08,514 — INFO — layer 2, [0:2399], loss=23.9318
2025-07-21 23:43:08,514 — INFO — layer 2, [0:2399], loss=24.1315


epoch 0, layer 2:  22%|████████████████                                                        | 2449/11016 [06:13<21:07,  6.76it/s, loss=14.5]

2025-07-21 23:43:16,063 — INFO — layer 2, [0:2449], loss=15.2057
2025-07-21 23:43:16,063 — INFO — layer 2, [0:2449], loss=14.9750


epoch 0, layer 2:  23%|████████████████▎                                                       | 2499/11016 [06:21<22:10,  6.40it/s, loss=14.3]

2025-07-21 23:43:23,821 — INFO — layer 2, [0:2499], loss=14.4405
2025-07-21 23:43:23,821 — INFO — layer 2, [0:2499], loss=14.5381


epoch 0, layer 2:  23%|████████████████▋                                                       | 2549/11016 [06:28<20:52,  6.76it/s, loss=14.6]

2025-07-21 23:43:31,357 — INFO — layer 2, [0:2549], loss=14.6619
2025-07-21 23:43:31,357 — INFO — layer 2, [0:2549], loss=14.4251


epoch 0, layer 2:  24%|████████████████▉                                                       | 2599/11016 [06:36<21:39,  6.48it/s, loss=14.6]

2025-07-21 23:43:38,974 — INFO — layer 2, [0:2599], loss=15.0141
2025-07-21 23:43:38,974 — INFO — layer 2, [0:2599], loss=14.4640


epoch 0, layer 2:  24%|█████████████████▎                                                      | 2649/11016 [06:43<20:24,  6.83it/s, loss=42.8]

2025-07-21 23:43:46,488 — INFO — layer 2, [0:2649], loss=30.2265
2025-07-21 23:43:46,488 — INFO — layer 2, [0:2649], loss=57.0589


epoch 0, layer 2:  25%|█████████████████▋                                                      | 2699/11016 [06:51<20:03,  6.91it/s, loss=33.7]

2025-07-21 23:43:54,016 — INFO — layer 2, [0:2699], loss=36.9195
2025-07-21 23:43:54,016 — INFO — layer 2, [0:2699], loss=72.0309


epoch 0, layer 2:  25%|██████████████████▍                                                       | 2749/11016 [06:58<20:25,  6.74it/s, loss=37]

2025-07-21 23:44:01,540 — INFO — layer 2, [0:2749], loss=41.7796
2025-07-21 23:44:01,540 — INFO — layer 2, [0:2749], loss=55.4013


epoch 0, layer 2:  25%|██████████████████▎                                                     | 2799/11016 [07:06<20:37,  6.64it/s, loss=37.1]

2025-07-21 23:44:09,248 — INFO — layer 2, [0:2799], loss=51.5238
2025-07-21 23:44:09,248 — INFO — layer 2, [0:2799], loss=41.9936


epoch 0, layer 2:  26%|██████████████████▌                                                     | 2849/11016 [07:14<20:54,  6.51it/s, loss=73.4]

2025-07-21 23:44:16,845 — INFO — layer 2, [0:2849], loss=108.1438
2025-07-21 23:44:16,845 — INFO — layer 2, [0:2849], loss=52.3229


epoch 0, layer 2:  26%|██████████████████▉                                                     | 2899/11016 [07:21<19:33,  6.92it/s, loss=54.5]

2025-07-21 23:44:24,341 — INFO — layer 2, [0:2899], loss=81.5502
2025-07-21 23:44:24,341 — INFO — layer 2, [0:2899], loss=49.0800


epoch 0, layer 2:  27%|███████████████████▎                                                    | 2949/11016 [07:29<19:18,  6.96it/s, loss=42.7]

2025-07-21 23:44:32,058 — INFO — layer 2, [0:2949], loss=44.5004
2025-07-21 23:44:32,058 — INFO — layer 2, [0:2949], loss=105.4045


epoch 0, layer 2:  27%|███████████████████▊                                                     | 2999/11016 [07:37<21:32,  6.20it/s, loss=125]

2025-07-21 23:44:39,809 — INFO — layer 2, [0:2999], loss=44.7301
2025-07-21 23:44:39,809 — INFO — layer 2, [0:2999], loss=103.7836


epoch 0, layer 2:  28%|███████████████████▉                                                    | 3049/11016 [07:44<20:02,  6.63it/s, loss=20.5]

2025-07-21 23:44:47,451 — INFO — layer 2, [0:3049], loss=37.0697
2025-07-21 23:44:47,451 — INFO — layer 2, [0:3049], loss=35.1207


epoch 0, layer 2:  28%|████████████████████▎                                                   | 3099/11016 [07:52<19:23,  6.80it/s, loss=28.9]

2025-07-21 23:44:55,050 — INFO — layer 2, [0:3099], loss=23.2303
2025-07-21 23:44:55,050 — INFO — layer 2, [0:3099], loss=22.9806


epoch 0, layer 2:  29%|████████████████████▌                                                   | 3149/11016 [07:59<18:27,  7.10it/s, loss=21.4]

2025-07-21 23:45:02,703 — INFO — layer 2, [0:3149], loss=23.6335
2025-07-21 23:45:02,703 — INFO — layer 2, [0:3149], loss=24.8816


epoch 0, layer 2:  29%|████████████████████▉                                                   | 3199/11016 [08:07<19:09,  6.80it/s, loss=25.6]

2025-07-21 23:45:10,421 — INFO — layer 2, [0:3199], loss=34.9806
2025-07-21 23:45:10,420 — INFO — layer 2, [0:3199], loss=36.4549


epoch 0, layer 2:  29%|█████████████████████▏                                                  | 3249/11016 [08:15<20:00,  6.47it/s, loss=16.4]

2025-07-21 23:45:18,031 — INFO — layer 2, [0:3249], loss=16.7341
2025-07-21 23:45:18,031 — INFO — layer 2, [0:3249], loss=15.7126


epoch 0, layer 2:  30%|█████████████████████▌                                                  | 3299/11016 [08:22<20:03,  6.41it/s, loss=16.6]

2025-07-21 23:45:25,643 — INFO — layer 2, [0:3299], loss=16.2209
2025-07-21 23:45:25,643 — INFO — layer 2, [0:3299], loss=14.8827


epoch 0, layer 2:  30%|█████████████████████▉                                                  | 3349/11016 [08:30<20:50,  6.13it/s, loss=15.9]

2025-07-21 23:45:33,345 — INFO — layer 2, [0:3349], loss=13.9703
2025-07-21 23:45:33,345 — INFO — layer 2, [0:3349], loss=15.6092


epoch 0, layer 2:  31%|██████████████████████▏                                                 | 3399/11016 [08:38<19:41,  6.45it/s, loss=15.1]

2025-07-21 23:45:40,933 — INFO — layer 2, [0:3399], loss=14.9665
2025-07-21 23:45:40,933 — INFO — layer 2, [0:3399], loss=14.1980


epoch 0, layer 2:  31%|██████████████████████▌                                                 | 3449/11016 [08:45<20:13,  6.24it/s, loss=24.3]

2025-07-21 23:45:48,728 — INFO — layer 2, [0:3449], loss=30.4456
2025-07-21 23:45:48,728 — INFO — layer 2, [0:3449], loss=17.0372


epoch 0, layer 2:  32%|██████████████████████▊                                                 | 3499/11016 [08:53<18:08,  6.90it/s, loss=20.8]

2025-07-21 23:45:56,340 — INFO — layer 2, [0:3499], loss=20.2651
2025-07-21 23:45:56,340 — INFO — layer 2, [0:3499], loss=16.0571


epoch 0, layer 2:  32%|███████████████████████▏                                                | 3549/11016 [09:01<18:25,  6.75it/s, loss=20.7]

2025-07-21 23:46:03,781 — INFO — layer 2, [0:3549], loss=20.8702
2025-07-21 23:46:03,781 — INFO — layer 2, [0:3549], loss=21.2651


epoch 0, layer 2:  33%|████████████████████████▏                                                 | 3599/11016 [09:08<18:36,  6.64it/s, loss=19]

2025-07-21 23:46:11,504 — INFO — layer 2, [0:3599], loss=17.6344
2025-07-21 23:46:11,504 — INFO — layer 2, [0:3599], loss=18.4473


epoch 0, layer 2:  33%|████████████████████████▌                                                 | 3649/11016 [09:16<17:28,  7.02it/s, loss=16]

2025-07-21 23:46:19,015 — INFO — layer 2, [0:3649], loss=18.7632
2025-07-21 23:46:19,015 — INFO — layer 2, [0:3649], loss=18.9035


epoch 0, layer 2:  34%|████████████████████████▏                                               | 3699/11016 [09:24<18:48,  6.48it/s, loss=21.5]

2025-07-21 23:46:26,799 — INFO — layer 2, [0:3699], loss=23.6031
2025-07-21 23:46:26,799 — INFO — layer 2, [0:3699], loss=34.1436


epoch 0, layer 2:  34%|████████████████████████▌                                               | 3749/11016 [09:31<17:52,  6.77it/s, loss=17.2]

2025-07-21 23:46:34,476 — INFO — layer 2, [0:3749], loss=15.8794
2025-07-21 23:46:34,476 — INFO — layer 2, [0:3749], loss=16.8725


epoch 0, layer 2:  34%|████████████████████████▊                                               | 3799/11016 [09:39<18:46,  6.40it/s, loss=13.5]

2025-07-21 23:46:42,069 — INFO — layer 2, [0:3799], loss=14.0588
2025-07-21 23:46:42,069 — INFO — layer 2, [0:3799], loss=14.8891


epoch 0, layer 2:  35%|█████████████████████████▊                                                | 3849/11016 [09:47<18:47,  6.36it/s, loss=47]

2025-07-21 23:46:49,735 — INFO — layer 2, [0:3849], loss=19.7809
2025-07-21 23:46:49,735 — INFO — layer 2, [0:3849], loss=26.8927


epoch 0, layer 2:  35%|█████████████████████████▍                                              | 3899/11016 [09:54<18:03,  6.57it/s, loss=33.3]

2025-07-21 23:46:57,447 — INFO — layer 2, [0:3899], loss=37.9627
2025-07-21 23:46:57,447 — INFO — layer 2, [0:3899], loss=62.2982


epoch 0, layer 2:  36%|█████████████████████████▊                                              | 3949/11016 [10:02<18:37,  6.32it/s, loss=31.7]

2025-07-21 23:47:05,026 — INFO — layer 2, [0:3949], loss=28.9921
2025-07-21 23:47:05,026 — INFO — layer 2, [0:3949], loss=45.6892


epoch 0, layer 2:  36%|██████████████████████████▏                                             | 3999/11016 [10:09<17:12,  6.79it/s, loss=44.3]

2025-07-21 23:47:12,565 — INFO — layer 2, [0:3999], loss=23.2168
2025-07-21 23:47:12,565 — INFO — layer 2, [0:3999], loss=49.6804


epoch 0, layer 2:  37%|██████████████████████████▍                                             | 4049/11016 [10:17<17:06,  6.79it/s, loss=13.6]

2025-07-21 23:47:20,033 — INFO — layer 2, [0:4049], loss=14.2287
2025-07-21 23:47:20,033 — INFO — layer 2, [0:4049], loss=13.1242


epoch 0, layer 2:  37%|██████████████████████████▊                                             | 4099/11016 [10:24<17:34,  6.56it/s, loss=18.1]

2025-07-21 23:47:27,726 — INFO — layer 2, [0:4099], loss=19.8849
2025-07-21 23:47:27,726 — INFO — layer 2, [0:4099], loss=29.0768


epoch 0, layer 2:  38%|███████████████████████████                                             | 4149/11016 [10:32<16:58,  6.75it/s, loss=13.7]

2025-07-21 23:47:35,233 — INFO — layer 2, [0:4149], loss=13.8350
2025-07-21 23:47:35,233 — INFO — layer 2, [0:4149], loss=14.0322


epoch 0, layer 2:  38%|███████████████████████████▍                                            | 4199/11016 [10:40<17:13,  6.59it/s, loss=13.5]

2025-07-21 23:47:42,832 — INFO — layer 2, [0:4199], loss=14.0647
2025-07-21 23:47:42,832 — INFO — layer 2, [0:4199], loss=13.4034


epoch 0, layer 2:  39%|███████████████████████████▊                                            | 4249/11016 [10:47<17:01,  6.62it/s, loss=53.8]

2025-07-21 23:47:50,351 — INFO — layer 2, [0:4249], loss=24.8348
2025-07-21 23:47:50,351 — INFO — layer 2, [0:4249], loss=17.4814


epoch 0, layer 2:  39%|████████████████████████████                                            | 4299/11016 [10:55<16:46,  6.67it/s, loss=14.3]

2025-07-21 23:47:57,800 — INFO — layer 2, [0:4299], loss=13.7772
2025-07-21 23:47:57,800 — INFO — layer 2, [0:4299], loss=13.1913


epoch 0, layer 2:  39%|█████████████████████████████▏                                            | 4349/11016 [11:02<17:23,  6.39it/s, loss=13]

2025-07-21 23:48:05,441 — INFO — layer 2, [0:4349], loss=12.9987
2025-07-21 23:48:05,441 — INFO — layer 2, [0:4349], loss=12.5580


epoch 0, layer 2:  40%|████████████████████████████▊                                           | 4399/11016 [11:10<17:15,  6.39it/s, loss=20.3]

2025-07-21 23:48:13,175 — INFO — layer 2, [0:4399], loss=19.3082
2025-07-21 23:48:13,175 — INFO — layer 2, [0:4399], loss=27.8251


epoch 0, layer 2:  40%|█████████████████████████████                                           | 4449/11016 [11:18<16:49,  6.50it/s, loss=15.8]

2025-07-21 23:48:20,851 — INFO — layer 2, [0:4449], loss=14.7732
2025-07-21 23:48:20,851 — INFO — layer 2, [0:4449], loss=16.0745


epoch 0, layer 2:  41%|█████████████████████████████▍                                          | 4499/11016 [11:25<16:26,  6.61it/s, loss=13.6]

2025-07-21 23:48:28,399 — INFO — layer 2, [0:4499], loss=13.5556
2025-07-21 23:48:28,399 — INFO — layer 2, [0:4499], loss=12.8677


epoch 0, layer 2:  41%|█████████████████████████████▋                                          | 4549/11016 [11:33<16:42,  6.45it/s, loss=14.2]

2025-07-21 23:48:35,906 — INFO — layer 2, [0:4549], loss=14.6960
2025-07-21 23:48:35,906 — INFO — layer 2, [0:4549], loss=13.5727


epoch 0, layer 2:  42%|██████████████████████████████                                          | 4599/11016 [11:40<16:55,  6.32it/s, loss=32.3]

2025-07-21 23:48:43,554 — INFO — layer 2, [0:4599], loss=22.8694
2025-07-21 23:48:43,554 — INFO — layer 2, [0:4599], loss=17.8718


epoch 0, layer 2:  42%|██████████████████████████████▍                                         | 4649/11016 [11:48<16:34,  6.40it/s, loss=29.4]

2025-07-21 23:48:51,209 — INFO — layer 2, [0:4649], loss=22.9949
2025-07-21 23:48:51,209 — INFO — layer 2, [0:4649], loss=35.7358


epoch 0, layer 2:  43%|██████████████████████████████▋                                         | 4699/11016 [11:56<15:52,  6.63it/s, loss=33.9]

2025-07-21 23:48:58,821 — INFO — layer 2, [0:4699], loss=27.1080
2025-07-21 23:48:58,821 — INFO — layer 2, [0:4699], loss=24.0243


epoch 0, layer 2:  43%|███████████████████████████████                                         | 4749/11016 [12:03<15:38,  6.68it/s, loss=13.3]

2025-07-21 23:49:06,421 — INFO — layer 2, [0:4749], loss=14.5014
2025-07-21 23:49:06,421 — INFO — layer 2, [0:4749], loss=12.5764


epoch 0, layer 2:  44%|███████████████████████████████▎                                        | 4799/11016 [12:11<16:43,  6.19it/s, loss=12.6]

2025-07-21 23:49:14,315 — INFO — layer 2, [0:4799], loss=12.2560
2025-07-21 23:49:14,315 — INFO — layer 2, [0:4799], loss=12.5070


epoch 0, layer 2:  44%|███████████████████████████████▋                                        | 4849/11016 [12:19<15:58,  6.43it/s, loss=26.8]

2025-07-21 23:49:21,876 — INFO — layer 2, [0:4849], loss=17.5119
2025-07-21 23:49:21,876 — INFO — layer 2, [0:4849], loss=24.0251


epoch 0, layer 2:  44%|████████████████████████████████                                        | 4899/11016 [12:26<15:05,  6.75it/s, loss=15.8]

2025-07-21 23:49:29,349 — INFO — layer 2, [0:4899], loss=14.2665
2025-07-21 23:49:29,349 — INFO — layer 2, [0:4899], loss=14.3702


epoch 0, layer 2:  45%|████████████████████████████████▎                                       | 4949/11016 [12:34<15:22,  6.58it/s, loss=12.2]

2025-07-21 23:49:36,837 — INFO — layer 2, [0:4949], loss=12.9162
2025-07-21 23:49:36,837 — INFO — layer 2, [0:4949], loss=13.3158


epoch 0, layer 2:  45%|████████████████████████████████▋                                       | 4999/11016 [12:41<15:33,  6.45it/s, loss=15.5]

2025-07-21 23:49:44,389 — INFO — layer 2, [0:4999], loss=16.5141
2025-07-21 23:49:44,389 — INFO — layer 2, [0:4999], loss=13.7767


epoch 0, layer 2:  46%|█████████████████████████████████                                       | 5049/11016 [12:49<15:14,  6.53it/s, loss=19.9]

2025-07-21 23:49:51,936 — INFO — layer 2, [0:5049], loss=19.0839
2025-07-21 23:49:51,936 — INFO — layer 2, [0:5049], loss=18.1365


epoch 0, layer 2:  46%|█████████████████████████████████▎                                      | 5099/11016 [12:56<14:50,  6.64it/s, loss=34.9]

2025-07-21 23:49:59,547 — INFO — layer 2, [0:5099], loss=27.1766
2025-07-21 23:49:59,548 — INFO — layer 2, [0:5099], loss=43.9211


epoch 0, layer 2:  47%|█████████████████████████████████▋                                      | 5149/11016 [13:04<14:39,  6.67it/s, loss=57.7]

2025-07-21 23:50:07,089 — INFO — layer 2, [0:5149], loss=39.9280
2025-07-21 23:50:07,089 — INFO — layer 2, [0:5149], loss=22.6592


epoch 0, layer 2:  47%|█████████████████████████████████▉                                      | 5199/11016 [13:11<15:15,  6.35it/s, loss=12.9]

2025-07-21 23:50:14,733 — INFO — layer 2, [0:5199], loss=13.5285
2025-07-21 23:50:14,733 — INFO — layer 2, [0:5199], loss=13.7347


epoch 0, layer 2:  48%|██████████████████████████████████▎                                     | 5249/11016 [13:19<14:52,  6.46it/s, loss=16.1]

2025-07-21 23:50:22,475 — INFO — layer 2, [0:5249], loss=14.7650
2025-07-21 23:50:22,475 — INFO — layer 2, [0:5249], loss=14.6883


epoch 0, layer 2:  48%|██████████████████████████████████▋                                     | 5299/11016 [13:27<14:48,  6.43it/s, loss=12.2]

2025-07-21 23:50:30,105 — INFO — layer 2, [0:5299], loss=12.3303
2025-07-21 23:50:30,105 — INFO — layer 2, [0:5299], loss=12.8866


epoch 0, layer 2:  49%|██████████████████████████████████▉                                     | 5349/11016 [13:34<13:58,  6.76it/s, loss=11.6]

2025-07-21 23:50:37,695 — INFO — layer 2, [0:5349], loss=11.6253
2025-07-21 23:50:37,695 — INFO — layer 2, [0:5349], loss=11.9356


epoch 0, layer 2:  49%|███████████████████████████████████▎                                    | 5399/11016 [13:42<14:32,  6.44it/s, loss=13.7]

2025-07-21 23:50:45,319 — INFO — layer 2, [0:5399], loss=12.4471
2025-07-21 23:50:45,319 — INFO — layer 2, [0:5399], loss=17.7564


epoch 0, layer 2:  49%|███████████████████████████████████▌                                    | 5449/11016 [13:50<14:04,  6.59it/s, loss=15.9]

2025-07-21 23:50:52,999 — INFO — layer 2, [0:5449], loss=19.9250
2025-07-21 23:50:53,000 — INFO — layer 2, [0:5449], loss=14.5478


epoch 0, layer 2:  50%|███████████████████████████████████▉                                    | 5499/11016 [13:57<13:37,  6.75it/s, loss=20.1]

2025-07-21 23:51:00,610 — INFO — layer 2, [0:5499], loss=19.1180
2025-07-21 23:51:00,610 — INFO — layer 2, [0:5499], loss=30.0876


epoch 0, layer 2:  50%|████████████████████████████████████▎                                   | 5549/11016 [14:05<13:47,  6.60it/s, loss=29.8]

2025-07-21 23:51:08,308 — INFO — layer 2, [0:5549], loss=26.8217
2025-07-21 23:51:08,308 — INFO — layer 2, [0:5549], loss=36.8011


epoch 0, layer 2:  51%|████████████████████████████████████▌                                   | 5599/11016 [14:13<14:03,  6.42it/s, loss=11.6]

2025-07-21 23:51:16,066 — INFO — layer 2, [0:5599], loss=11.6310
2025-07-21 23:51:16,066 — INFO — layer 2, [0:5599], loss=11.8447


epoch 0, layer 2:  51%|████████████████████████████████████▉                                   | 5649/11016 [14:20<13:33,  6.60it/s, loss=14.3]

2025-07-21 23:51:23,750 — INFO — layer 2, [0:5649], loss=36.4222
2025-07-21 23:51:23,751 — INFO — layer 2, [0:5649], loss=31.9635


epoch 0, layer 2:  52%|██████████████████████████████████████▎                                   | 5699/11016 [14:28<13:10,  6.72it/s, loss=28]

2025-07-21 23:51:31,323 — INFO — layer 2, [0:5699], loss=52.2418
2025-07-21 23:51:31,323 — INFO — layer 2, [0:5699], loss=37.0578


epoch 0, layer 2:  52%|█████████████████████████████████████▌                                  | 5749/11016 [14:36<14:06,  6.23it/s, loss=12.4]

2025-07-21 23:51:38,999 — INFO — layer 2, [0:5749], loss=12.5306
2025-07-21 23:51:38,999 — INFO — layer 2, [0:5749], loss=11.5813


epoch 0, layer 2:  53%|█████████████████████████████████████▉                                  | 5799/11016 [14:43<13:07,  6.62it/s, loss=20.5]

2025-07-21 23:51:46,622 — INFO — layer 2, [0:5799], loss=16.7096
2025-07-21 23:51:46,622 — INFO — layer 2, [0:5799], loss=14.7561


epoch 0, layer 2:  53%|██████████████████████████████████████▏                                 | 5849/11016 [14:51<14:17,  6.03it/s, loss=66.1]

2025-07-21 23:51:54,174 — INFO — layer 2, [0:5849], loss=24.3452
2025-07-21 23:51:54,174 — INFO — layer 2, [0:5849], loss=14.9391


epoch 0, layer 2:  54%|██████████████████████████████████████▌                                 | 5899/11016 [14:59<13:55,  6.13it/s, loss=13.7]

2025-07-21 23:52:01,900 — INFO — layer 2, [0:5899], loss=12.8388
2025-07-21 23:52:01,900 — INFO — layer 2, [0:5899], loss=12.0149


epoch 0, layer 2:  54%|██████████████████████████████████████▉                                 | 5949/11016 [15:06<12:15,  6.89it/s, loss=28.6]

2025-07-21 23:52:09,395 — INFO — layer 2, [0:5949], loss=48.9084
2025-07-21 23:52:09,395 — INFO — layer 2, [0:5949], loss=33.9226


epoch 0, layer 2:  54%|███████████████████████████████████████▏                                | 5999/11016 [15:14<12:45,  6.55it/s, loss=18.5]

2025-07-21 23:52:17,068 — INFO — layer 2, [0:5999], loss=15.0349
2025-07-21 23:52:17,068 — INFO — layer 2, [0:5999], loss=18.4272


epoch 0, layer 2:  55%|████████████████████████████████████████▋                                 | 6049/11016 [15:21<12:39,  6.54it/s, loss=11]

2025-07-21 23:52:24,712 — INFO — layer 2, [0:6049], loss=11.1876
2025-07-21 23:52:24,712 — INFO — layer 2, [0:6049], loss=10.7855


epoch 0, layer 2:  55%|███████████████████████████████████████▊                                | 6099/11016 [15:29<12:49,  6.39it/s, loss=21.1]

2025-07-21 23:52:32,307 — INFO — layer 2, [0:6099], loss=27.9890
2025-07-21 23:52:32,307 — INFO — layer 2, [0:6099], loss=13.4462


epoch 0, layer 2:  56%|████████████████████████████████████████▏                               | 6149/11016 [15:37<11:59,  6.76it/s, loss=11.7]

2025-07-21 23:52:39,821 — INFO — layer 2, [0:6149], loss=12.5943
2025-07-21 23:52:39,821 — INFO — layer 2, [0:6149], loss=13.0723


epoch 0, layer 2:  56%|████████████████████████████████████████▌                               | 6199/11016 [15:44<12:51,  6.24it/s, loss=11.2]

2025-07-21 23:52:47,331 — INFO — layer 2, [0:6199], loss=11.0413
2025-07-21 23:52:47,331 — INFO — layer 2, [0:6199], loss=11.2197


epoch 0, layer 2:  57%|████████████████████████████████████████▊                               | 6249/11016 [15:51<11:30,  6.91it/s, loss=11.1]

2025-07-21 23:52:54,765 — INFO — layer 2, [0:6249], loss=10.8478
2025-07-21 23:52:54,765 — INFO — layer 2, [0:6249], loss=10.8627


epoch 0, layer 2:  57%|█████████████████████████████████████████▏                              | 6299/11016 [15:59<11:45,  6.69it/s, loss=16.2]

2025-07-21 23:53:02,308 — INFO — layer 2, [0:6299], loss=14.6197
2025-07-21 23:53:02,307 — INFO — layer 2, [0:6299], loss=26.6258


epoch 0, layer 2:  58%|█████████████████████████████████████████▍                              | 6349/11016 [16:07<12:07,  6.42it/s, loss=30.7]

2025-07-21 23:53:10,015 — INFO — layer 2, [0:6349], loss=22.0632
2025-07-21 23:53:10,015 — INFO — layer 2, [0:6349], loss=20.0229


epoch 0, layer 2:  58%|█████████████████████████████████████████▊                              | 6399/11016 [16:14<11:54,  6.47it/s, loss=10.5]

2025-07-21 23:53:17,674 — INFO — layer 2, [0:6399], loss=10.8967
2025-07-21 23:53:17,674 — INFO — layer 2, [0:6399], loss=10.6462


epoch 0, layer 2:  59%|██████████████████████████████████████████▏                             | 6449/11016 [16:22<11:44,  6.48it/s, loss=14.7]

2025-07-21 23:53:25,456 — INFO — layer 2, [0:6449], loss=13.0286
2025-07-21 23:53:25,456 — INFO — layer 2, [0:6449], loss=12.2319


epoch 0, layer 2:  59%|██████████████████████████████████████████▍                             | 6499/11016 [16:30<11:18,  6.65it/s, loss=10.7]

2025-07-21 23:53:33,016 — INFO — layer 2, [0:6499], loss=10.9089
2025-07-21 23:53:33,016 — INFO — layer 2, [0:6499], loss=10.6663


epoch 0, layer 2:  59%|██████████████████████████████████████████▊                             | 6549/11016 [16:37<11:19,  6.58it/s, loss=12.4]

2025-07-21 23:53:40,624 — INFO — layer 2, [0:6549], loss=10.9059
2025-07-21 23:53:40,624 — INFO — layer 2, [0:6549], loss=12.4883


epoch 0, layer 2:  60%|███████████████████████████████████████████▏                            | 6599/11016 [16:45<11:40,  6.31it/s, loss=41.9]

2025-07-21 23:53:48,306 — INFO — layer 2, [0:6599], loss=17.3996
2025-07-21 23:53:48,306 — INFO — layer 2, [0:6599], loss=10.9322


epoch 0, layer 2:  60%|████████████████████████████████████████████▋                             | 6649/11016 [16:53<10:44,  6.78it/s, loss=28]

2025-07-21 23:53:56,057 — INFO — layer 2, [0:6649], loss=14.7435
2025-07-21 23:53:56,057 — INFO — layer 2, [0:6649], loss=14.1975


epoch 0, layer 2:  61%|███████████████████████████████████████████▊                            | 6699/11016 [17:00<10:46,  6.68it/s, loss=10.5]

2025-07-21 23:54:03,675 — INFO — layer 2, [0:6699], loss=10.8079
2025-07-21 23:54:03,675 — INFO — layer 2, [0:6699], loss=10.0844


epoch 0, layer 2:  61%|█████████████████████████████████████████████▎                            | 6749/11016 [17:08<10:58,  6.48it/s, loss=19]

2025-07-21 23:54:11,191 — INFO — layer 2, [0:6749], loss=14.5791
2025-07-21 23:54:11,191 — INFO — layer 2, [0:6749], loss=15.6964


epoch 0, layer 2:  62%|████████████████████████████████████████████▍                           | 6799/11016 [17:16<10:48,  6.50it/s, loss=20.2]

2025-07-21 23:54:18,805 — INFO — layer 2, [0:6799], loss=22.4546
2025-07-21 23:54:18,805 — INFO — layer 2, [0:6799], loss=21.1005


epoch 0, layer 2:  62%|████████████████████████████████████████████▊                           | 6849/11016 [17:23<10:48,  6.43it/s, loss=14.8]

2025-07-21 23:54:26,584 — INFO — layer 2, [0:6849], loss=12.6612
2025-07-21 23:54:26,584 — INFO — layer 2, [0:6849], loss=14.8337


epoch 0, layer 2:  63%|█████████████████████████████████████████████                           | 6899/11016 [17:31<10:30,  6.53it/s, loss=24.8]

2025-07-21 23:54:34,141 — INFO — layer 2, [0:6899], loss=28.6692
2025-07-21 23:54:34,141 — INFO — layer 2, [0:6899], loss=21.9837


epoch 0, layer 2:  63%|█████████████████████████████████████████████▍                          | 6949/11016 [17:39<10:37,  6.38it/s, loss=68.2]

2025-07-21 23:54:41,784 — INFO — layer 2, [0:6949], loss=27.3652
2025-07-21 23:54:41,784 — INFO — layer 2, [0:6949], loss=26.1253


epoch 0, layer 2:  64%|███████████████████████████████████████████████                           | 6999/11016 [17:46<09:43,  6.89it/s, loss=11]

2025-07-21 23:54:49,270 — INFO — layer 2, [0:6999], loss=10.5540
2025-07-21 23:54:49,270 — INFO — layer 2, [0:6999], loss=11.1940


epoch 0, layer 2:  64%|██████████████████████████████████████████████                          | 7049/11016 [17:54<09:46,  6.76it/s, loss=17.2]

2025-07-21 23:54:56,920 — INFO — layer 2, [0:7049], loss=28.0457
2025-07-21 23:54:56,920 — INFO — layer 2, [0:7049], loss=14.4836


epoch 0, layer 2:  64%|██████████████████████████████████████████████▍                         | 7099/11016 [18:01<10:01,  6.52it/s, loss=10.3]

2025-07-21 23:55:04,588 — INFO — layer 2, [0:7099], loss=9.7100
2025-07-21 23:55:04,588 — INFO — layer 2, [0:7099], loss=9.8043


epoch 0, layer 2:  65%|██████████████████████████████████████████████▋                         | 7149/11016 [18:09<09:50,  6.55it/s, loss=16.9]

2025-07-21 23:55:12,208 — INFO — layer 2, [0:7149], loss=13.8846
2025-07-21 23:55:12,208 — INFO — layer 2, [0:7149], loss=35.9638


epoch 0, layer 2:  65%|███████████████████████████████████████████████                         | 7199/11016 [18:17<10:27,  6.08it/s, loss=18.3]

2025-07-21 23:55:19,929 — INFO — layer 2, [0:7199], loss=22.5114
2025-07-21 23:55:19,929 — INFO — layer 2, [0:7199], loss=21.3625


epoch 0, layer 2:  66%|███████████████████████████████████████████████▍                        | 7249/11016 [18:24<09:37,  6.53it/s, loss=11.7]

2025-07-21 23:55:27,498 — INFO — layer 2, [0:7249], loss=11.2745
2025-07-21 23:55:27,498 — INFO — layer 2, [0:7249], loss=10.2161


epoch 0, layer 2:  66%|███████████████████████████████████████████████▋                        | 7299/11016 [18:32<09:54,  6.25it/s, loss=34.8]

2025-07-21 23:55:35,138 — INFO — layer 2, [0:7299], loss=21.3758
2025-07-21 23:55:35,138 — INFO — layer 2, [0:7299], loss=32.6481


epoch 0, layer 2:  67%|████████████████████████████████████████████████                        | 7349/11016 [18:39<09:05,  6.73it/s, loss=9.71]

2025-07-21 23:55:42,737 — INFO — layer 2, [0:7349], loss=10.1355
2025-07-21 23:55:42,737 — INFO — layer 2, [0:7349], loss=9.9709


epoch 0, layer 2:  67%|████████████████████████████████████████████████▎                       | 7399/11016 [18:47<08:58,  6.72it/s, loss=12.8]

2025-07-21 23:55:50,260 — INFO — layer 2, [0:7399], loss=12.0559
2025-07-21 23:55:50,260 — INFO — layer 2, [0:7399], loss=10.6595


epoch 0, layer 2:  68%|████████████████████████████████████████████████▋                       | 7449/11016 [18:55<08:47,  6.76it/s, loss=15.1]

2025-07-21 23:55:58,020 — INFO — layer 2, [0:7449], loss=21.8034
2025-07-21 23:55:58,020 — INFO — layer 2, [0:7449], loss=19.5905


epoch 0, layer 2:  68%|█████████████████████████████████████████████████                       | 7499/11016 [19:02<09:03,  6.48it/s, loss=9.13]

2025-07-21 23:56:05,638 — INFO — layer 2, [0:7499], loss=9.6136
2025-07-21 23:56:05,638 — INFO — layer 2, [0:7499], loss=9.2772


epoch 0, layer 2:  69%|█████████████████████████████████████████████████▎                      | 7549/11016 [19:10<08:33,  6.75it/s, loss=13.9]

2025-07-21 23:56:13,115 — INFO — layer 2, [0:7549], loss=22.6157
2025-07-21 23:56:13,115 — INFO — layer 2, [0:7549], loss=13.6002


epoch 0, layer 2:  69%|█████████████████████████████████████████████████▋                      | 7599/11016 [19:17<08:38,  6.59it/s, loss=29.7]

2025-07-21 23:56:20,655 — INFO — layer 2, [0:7599], loss=17.9325
2025-07-21 23:56:20,655 — INFO — layer 2, [0:7599], loss=36.2625


epoch 0, layer 2:  69%|█████████████████████████████████████████████████▉                      | 7649/11016 [19:25<08:00,  7.01it/s, loss=10.8]

2025-07-21 23:56:28,356 — INFO — layer 2, [0:7649], loss=11.5206
2025-07-21 23:56:28,356 — INFO — layer 2, [0:7649], loss=14.7447


epoch 0, layer 2:  70%|██████████████████████████████████████████████████▎                     | 7699/11016 [19:33<08:39,  6.38it/s, loss=43.6]

2025-07-21 23:56:35,907 — INFO — layer 2, [0:7699], loss=11.1047
2025-07-21 23:56:35,907 — INFO — layer 2, [0:7699], loss=30.3730


epoch 0, layer 2:  70%|██████████████████████████████████████████████████▋                     | 7749/11016 [19:40<08:00,  6.81it/s, loss=9.28]

2025-07-21 23:56:43,479 — INFO — layer 2, [0:7749], loss=10.2296
2025-07-21 23:56:43,480 — INFO — layer 2, [0:7749], loss=9.4097


epoch 0, layer 2:  71%|██████████████████████████████████████████████████▉                     | 7799/11016 [19:48<07:32,  7.10it/s, loss=10.9]

2025-07-21 23:56:50,873 — INFO — layer 2, [0:7799], loss=12.2882


epoch 0, layer 2:  71%|██████████████████████████████████████████████████▉                     | 7799/11016 [19:48<07:32,  7.11it/s, loss=12.3]

2025-07-21 23:56:50,873 — INFO — layer 2, [0:7799], loss=9.8404


epoch 0, layer 2:  71%|███████████████████████████████████████████████████▎                    | 7849/11016 [19:55<07:46,  6.79it/s, loss=10.6]

2025-07-21 23:56:58,559 — INFO — layer 2, [0:7849], loss=9.9199
2025-07-21 23:56:58,559 — INFO — layer 2, [0:7849], loss=9.0569


epoch 0, layer 2:  72%|███████████████████████████████████████████████████▋                    | 7899/11016 [20:03<07:50,  6.63it/s, loss=10.1]

2025-07-21 23:57:06,209 — INFO — layer 2, [0:7899], loss=10.4832
2025-07-21 23:57:06,209 — INFO — layer 2, [0:7899], loss=9.7398


epoch 0, layer 2:  72%|███████████████████████████████████████████████████▉                    | 7949/11016 [20:10<07:30,  6.81it/s, loss=26.1]

2025-07-21 23:57:13,655 — INFO — layer 2, [0:7949], loss=20.7816
2025-07-21 23:57:13,656 — INFO — layer 2, [0:7949], loss=16.0380


epoch 0, layer 2:  73%|████████████████████████████████████████████████████▎                   | 7999/11016 [20:18<07:41,  6.54it/s, loss=15.6]

2025-07-21 23:57:21,236 — INFO — layer 2, [0:7999], loss=15.3715
2025-07-21 23:57:21,236 — INFO — layer 2, [0:7999], loss=16.0425


epoch 0, layer 2:  73%|████████████████████████████████████████████████████▌                   | 8049/11016 [20:26<07:49,  6.32it/s, loss=15.3]

2025-07-21 23:57:28,962 — INFO — layer 2, [0:8049], loss=10.9676
2025-07-21 23:57:28,962 — INFO — layer 2, [0:8049], loss=25.0294


epoch 0, layer 2:  74%|████████████████████████████████████████████████████▉                   | 8099/11016 [20:33<07:05,  6.85it/s, loss=52.9]

2025-07-21 23:57:36,590 — INFO — layer 2, [0:8099], loss=17.0338
2025-07-21 23:57:36,590 — INFO — layer 2, [0:8099], loss=18.9391


epoch 0, layer 2:  74%|█████████████████████████████████████████████████████▎                  | 8149/11016 [20:41<07:27,  6.40it/s, loss=23.7]

2025-07-21 23:57:44,301 — INFO — layer 2, [0:8149], loss=28.1013
2025-07-21 23:57:44,301 — INFO — layer 2, [0:8149], loss=22.2971


epoch 0, layer 2:  74%|█████████████████████████████████████████████████████▌                  | 8199/11016 [20:49<07:21,  6.38it/s, loss=8.02]

2025-07-21 23:57:52,006 — INFO — layer 2, [0:8199], loss=8.0362
2025-07-21 23:57:52,006 — INFO — layer 2, [0:8199], loss=8.3913


epoch 0, layer 2:  75%|███████████████████████████████████████████████████████▍                  | 8249/11016 [20:56<06:46,  6.80it/s, loss=11]

2025-07-21 23:57:59,709 — INFO — layer 2, [0:8249], loss=36.7616
2025-07-21 23:57:59,709 — INFO — layer 2, [0:8249], loss=24.2009


epoch 0, layer 2:  75%|██████████████████████████████████████████████████████▏                 | 8299/11016 [21:04<06:46,  6.68it/s, loss=7.69]

2025-07-21 23:58:07,127 — INFO — layer 2, [0:8299], loss=9.1539
2025-07-21 23:58:07,127 — INFO — layer 2, [0:8299], loss=8.1413


epoch 0, layer 2:  76%|██████████████████████████████████████████████████████▌                 | 8349/11016 [21:12<06:52,  6.47it/s, loss=7.42]

2025-07-21 23:58:14,792 — INFO — layer 2, [0:8349], loss=8.2306
2025-07-21 23:58:14,792 — INFO — layer 2, [0:8349], loss=7.3172


epoch 0, layer 2:  76%|██████████████████████████████████████████████████████▉                 | 8399/11016 [21:19<06:24,  6.80it/s, loss=9.72]

2025-07-21 23:58:22,354 — INFO — layer 2, [0:8399], loss=28.4935
2025-07-21 23:58:22,354 — INFO — layer 2, [0:8399], loss=14.7153


epoch 0, layer 2:  77%|███████████████████████████████████████████████████████▏                | 8449/11016 [21:27<06:26,  6.64it/s, loss=10.3]

2025-07-21 23:58:29,986 — INFO — layer 2, [0:8449], loss=12.0878
2025-07-21 23:58:29,987 — INFO — layer 2, [0:8449], loss=28.9840


epoch 0, layer 2:  77%|███████████████████████████████████████████████████████▌                | 8499/11016 [21:34<06:42,  6.26it/s, loss=30.5]

2025-07-21 23:58:37,664 — INFO — layer 2, [0:8499], loss=90.2870
2025-07-21 23:58:37,664 — INFO — layer 2, [0:8499], loss=44.9639


epoch 0, layer 2:  78%|█████████████████████████████████████████████████████████▍                | 8549/11016 [21:42<06:08,  6.70it/s, loss=64]

2025-07-21 23:58:45,404 — INFO — layer 2, [0:8549], loss=25.1123
2025-07-21 23:58:45,404 — INFO — layer 2, [0:8549], loss=121.5804


epoch 0, layer 2:  78%|████████████████████████████████████████████████████████▏               | 8599/11016 [21:50<05:53,  6.84it/s, loss=8.99]

2025-07-21 23:58:53,272 — INFO — layer 2, [0:8599], loss=9.3591
2025-07-21 23:58:53,272 — INFO — layer 2, [0:8599], loss=10.0345


epoch 0, layer 2:  79%|████████████████████████████████████████████████████████▌               | 8649/11016 [21:58<06:01,  6.55it/s, loss=10.9]

2025-07-21 23:59:00,896 — INFO — layer 2, [0:8649], loss=8.3637
2025-07-21 23:59:00,896 — INFO — layer 2, [0:8649], loss=11.0348


epoch 0, layer 2:  79%|████████████████████████████████████████████████████████▊               | 8699/11016 [22:06<06:04,  6.35it/s, loss=6.02]

2025-07-21 23:59:08,879 — INFO — layer 2, [0:8699], loss=6.3564
2025-07-21 23:59:08,879 — INFO — layer 2, [0:8699], loss=6.6531


epoch 0, layer 2:  79%|█████████████████████████████████████████████████████████▏              | 8749/11016 [22:14<07:30,  5.03it/s, loss=9.68]

2025-07-21 23:59:17,264 — INFO — layer 2, [0:8749], loss=9.0555
2025-07-21 23:59:17,264 — INFO — layer 2, [0:8749], loss=6.9580


epoch 0, layer 2:  80%|█████████████████████████████████████████████████████████▌              | 8799/11016 [22:22<05:34,  6.63it/s, loss=6.69]

2025-07-21 23:59:25,283 — INFO — layer 2, [0:8799], loss=6.0413
2025-07-21 23:59:25,283 — INFO — layer 2, [0:8799], loss=6.0108


epoch 0, layer 2:  80%|█████████████████████████████████████████████████████████▊              | 8849/11016 [22:30<05:18,  6.81it/s, loss=14.6]

2025-07-21 23:59:32,908 — INFO — layer 2, [0:8849], loss=15.1446
2025-07-21 23:59:32,908 — INFO — layer 2, [0:8849], loss=24.6970


epoch 0, layer 2:  81%|██████████████████████████████████████████████████████████▏             | 8899/11016 [22:37<05:31,  6.39it/s, loss=7.12]

2025-07-21 23:59:40,549 — INFO — layer 2, [0:8899], loss=6.8594
2025-07-21 23:59:40,549 — INFO — layer 2, [0:8899], loss=6.9456


epoch 0, layer 2:  81%|██████████████████████████████████████████████████████████▍             | 8949/11016 [22:45<05:08,  6.70it/s, loss=6.71]

2025-07-21 23:59:47,894 — INFO — layer 2, [0:8949], loss=6.2006
2025-07-21 23:59:47,894 — INFO — layer 2, [0:8949], loss=7.0697


epoch 0, layer 2:  82%|██████████████████████████████████████████████████████████▊             | 8999/11016 [22:52<05:09,  6.52it/s, loss=9.55]

2025-07-21 23:59:55,466 — INFO — layer 2, [0:8999], loss=7.6677
2025-07-21 23:59:55,466 — INFO — layer 2, [0:8999], loss=12.7461


epoch 0, layer 2:  82%|███████████████████████████████████████████████████████████▉             | 9049/11016 [23:00<05:01,  6.52it/s, loss=6.7]

2025-07-22 00:00:03,111 — INFO — layer 2, [0:9049], loss=7.1190
2025-07-22 00:00:03,111 — INFO — layer 2, [0:9049], loss=7.3662


epoch 0, layer 2:  83%|███████████████████████████████████████████████████████████▍            | 9099/11016 [23:07<04:47,  6.66it/s, loss=5.88]

2025-07-22 00:00:10,636 — INFO — layer 2, [0:9099], loss=5.7534
2025-07-22 00:00:10,636 — INFO — layer 2, [0:9099], loss=5.9442


epoch 0, layer 2:  83%|███████████████████████████████████████████████████████████▊            | 9149/11016 [23:15<04:37,  6.73it/s, loss=6.81]

2025-07-22 00:00:18,414 — INFO — layer 2, [0:9149], loss=8.0130
2025-07-22 00:00:18,414 — INFO — layer 2, [0:9149], loss=6.5878


epoch 0, layer 2:  84%|████████████████████████████████████████████████████████████            | 9199/11016 [23:23<04:23,  6.90it/s, loss=5.55]

2025-07-22 00:00:25,833 — INFO — layer 2, [0:9199], loss=5.6107
2025-07-22 00:00:25,833 — INFO — layer 2, [0:9199], loss=6.4212


epoch 0, layer 2:  84%|████████████████████████████████████████████████████████████▍           | 9249/11016 [23:30<04:48,  6.11it/s, loss=5.44]

2025-07-22 00:00:33,390 — INFO — layer 2, [0:9249], loss=5.4282
2025-07-22 00:00:33,390 — INFO — layer 2, [0:9249], loss=6.1469


epoch 0, layer 2:  84%|██████████████████████████████████████████████████████████████▍           | 9299/11016 [23:38<04:26,  6.43it/s, loss=32]

2025-07-22 00:00:40,994 — INFO — layer 2, [0:9299], loss=38.8985
2025-07-22 00:00:40,994 — INFO — layer 2, [0:9299], loss=15.4339


epoch 0, layer 2:  85%|█████████████████████████████████████████████████████████████           | 9349/11016 [23:45<04:20,  6.40it/s, loss=5.68]

2025-07-22 00:00:48,650 — INFO — layer 2, [0:9349], loss=4.8619
2025-07-22 00:00:48,650 — INFO — layer 2, [0:9349], loss=4.7906


epoch 0, layer 2:  85%|██████████████████████████████████████████████████████████████▎          | 9399/11016 [23:53<04:14,  6.35it/s, loss=4.8]

2025-07-22 00:00:56,277 — INFO — layer 2, [0:9399], loss=5.0543
2025-07-22 00:00:56,278 — INFO — layer 2, [0:9399], loss=5.1309


epoch 0, layer 2:  86%|█████████████████████████████████████████████████████████████▌          | 9427/11016 [23:57<04:07,  6.43it/s, loss=7.31]

In [8]:
logger

<Logger train_decomposer (INFO)>