In [1]:
import os
import math
import time
import inspect
from dataclasses import dataclass
import torch
import torch.nn as nn
from torch.nn import functional as F
import json
from datetime import datetime
from transformers import GPT2LMHeadModel
import tiktoken
from tqdm import tqdm

In [2]:
class CausalSelfAttention(nn.Module):

    def __init__(self, config):
        super().__init__()
        assert config.n_embd % config.n_head == 0
        # key, query, value projections for all heads, but in a batch
        self.c_attn = nn.Linear(config.n_embd, 3 * config.n_embd)
        # output projection
        self.c_proj = nn.Linear(config.n_embd, config.n_embd)
        self.c_proj.NANGPT_SCALE_INIT = 1
        # regularization
        self.n_head = config.n_head
        self.n_embd = config.n_embd
        self.register_buffer("bias", torch.tril(torch.ones(config.block_size, config.block_size)).view(1, 1, config.block_size, config.block_size))

    def forward(self, x):
        B, T, C = x.size() # batch size, sequence length, embedding dimensionality (n_embd)
        # calculate query, key, values for all heads in batch and move head forward to be the batch dim
        # nh is "number of heads", hs is "head size", and C (number of channels) = nh * hs
        # e.g. in GPT-2 (124M), n_head=12, hs=64, so nh*hs=C=768 channels in the Transformer
        qkv = self.c_attn(x)
        q, k, v = qkv.split(self.n_embd, dim=2)
        k = k.view(B, T, self.n_head, C // self.n_head).transpose(1, 2) # (B, nh, T, hs)
        q = q.view(B, T, self.n_head, C // self.n_head).transpose(1, 2) # (B, nh, T, hs)
        v = v.view(B, T, self.n_head, C // self.n_head).transpose(1, 2) # (B, nh, T, hs)

        att = (q @ k.transpose(-2, -1)) * (1.0 / math.sqrt(k.size(-1)))
        att = att.masked_fill(self.bias[:, :, :T, :T] == 0, float('-inf'))
        att = F.softmax(att, dim=-1)
        y = att @ v # (B, nh, T, T) x (B, nh, T, hs) -> (B, nh, T, hs)

        y = y.transpose(1, 2).contiguous().view(B, T, C) # re-assemble all head outputs side by side
        # output projection
        y = self.c_proj(y)
        return y

In [3]:
class MLP(nn.Module):

    def __init__(self, config):
        super().__init__()
        self.c_fc    = nn.Linear(config.n_embd, 4 * config.n_embd)
        self.gelu    = nn.GELU(approximate='tanh')
        self.c_proj  = nn.Linear(4 * config.n_embd, config.n_embd)
        self.c_proj.NANOGPT_SCALE_INIT = 1

    def forward(self, x):
        x = self.c_fc(x)
        x = self.gelu(x)
        x = self.c_proj(x)
        return x

In [4]:
class Block(nn.Module):

    def __init__(self, config):
        super().__init__()
        self.ln_1 = nn.LayerNorm(config.n_embd)
        self.attn = CausalSelfAttention(config)
        self.ln_2 = nn.LayerNorm(config.n_embd)
        self.mlp = MLP(config)

    def forward(self, x):
        x = x + self.attn(self.ln_1(x))
        x = x + self.mlp(self.ln_2(x))
        return x

In [5]:
@dataclass
class GPTConfig:
    block_size: int = 1024 # max sequence length
    vocab_size: int = 50257 # number of tokens: 50,000 BPE merges + 256 bytes tokens + 1 <|endoftext|> token
    n_layer: int = 12 # number of layers
    n_head: int = 12 # number of heads
    n_embd: int = 768 # embedding dimension

In [6]:
class GPT(nn.Module):

    def __init__(self, config):
        super().__init__()
        self.config = config

        self.transformer = nn.ModuleDict(dict(
            wte = nn.Embedding(config.vocab_size, config.n_embd),
            wpe = nn.Embedding(config.block_size, config.n_embd),
            h = nn.ModuleList([Block(config) for _ in range(config.n_layer)]),
            ln_f = nn.LayerNorm(config.n_embd),
        ))
        self.lm_head = nn.Linear(config.n_embd, config.vocab_size, bias=False)

        # weight sharing
        self.transformer.wte.weight = self.lm_head.weight

        # weight initialization
        self.apply(self._init_weights)

    def _init_weights(self, module):
        if isinstance(module, nn.Linear):
            std = 0.02
            if hasattr(module, 'NANGPT_SCALE_INIT'):
                std *= (2 * self.config.n_layer) ** -0.5
            torch.nn.init.normal_(module.weight, mean = 0.0, std = std)
            if module.bias is not None:
                torch.nn.init.zeros_(module.bias)
        elif isinstance(module, nn.Embedding):
            torch.nn.init.normal_(module.weight, mean=0.0, std = 0.02)

    def forward(self, idx, targets=None):
        # idx is of shape (B, T)
        B, T = idx.size()
        assert T <= self.config.block_size, f"Cannot forward sequence of length {T}, block size is only {self.config.block_size}"
        # forward the token and posisition embeddings
        pos = torch.arange(0, T, dtype=torch.long, device=idx.device) # shape (T)
        pos_emb = self.transformer.wpe(pos) # position embeddings of shape (T, n_embd)
        tok_emb = self.transformer.wte(idx) # token embeddings of shape (B, T, n_embd)
        x = tok_emb + pos_emb
        # forward the blocks of the transformer
        for block in self.transformer.h:
            x = block(x)
        # forward the final layernorm and the classifier
        x = self.transformer.ln_f(x)
        logits = self.lm_head(x) # (B, T, vocab_size)
        loss = None
        if targets is not None:
            loss = F.cross_entropy(logits.view(-1, logits.size(-1)), targets.view(-1))
        return logits, loss

    @classmethod
    def from_pretrained(cls, model_type):
        """Loads pretrained GPT-2 model weights from huggingface"""
        assert model_type in {'gpt2', 'gpt2-medium', 'gpt2-large', 'gpt2-xl'}
        from transformers import GPT2LMHeadModel
        print("loading weights from pretrained gpt: %s" % model_type)

        # n_layer, n_head and n_embd are determined from model_type
        config_args = {
            'gpt2':         dict(n_layer=12, n_head=12, n_embd=768),  # 124M params
            'gpt2-medium':  dict(n_layer=24, n_head=16, n_embd=1024), # 350M params
            'gpt2-large':   dict(n_layer=36, n_head=20, n_embd=1280), # 774M params
            'gpt2-xl':      dict(n_layer=48, n_head=25, n_embd=1600), # 1558M params
        }[model_type]
        config_args['vocab_size'] = 50257 # always 50257 for GPT model checkpoints
        config_args['block_size'] = 1024 # always 1024 for GPT model checkpoints
        # create a from-scratch initialized minGPT model
        config = GPTConfig(**config_args)
        model = GPT(config)
        sd = model.state_dict()
        sd_keys = sd.keys()
        sd_keys = [k for k in sd_keys if not k.endswith('.attn.bias')] # discard this mask / buffer, not a param

        # init a huggingface/transformers model
        model_hf = GPT2LMHeadModel.from_pretrained(model_type)
        sd_hf = model_hf.state_dict()

        # copy while ensuring all of the parameters are aligned and match in names and shapes
        sd_keys_hf = sd_hf.keys()
        sd_keys_hf = [k for k in sd_keys_hf if not k.endswith('.attn.masked_bias')] # ignore these, just a buffer
        sd_keys_hf = [k for k in sd_keys_hf if not k.endswith('.attn.bias')] # same, just the mask (buffer)
        transposed = ['attn.c_attn.weight', 'attn.c_proj.weight', 'mlp.c_fc.weight', 'mlp.c_proj.weight']
        # basically the openai checkpoints use a "Conv1D" module, but we only want to use a vanilla Linear
        # this means that we have to transpose these weights when we import them
        assert len(sd_keys_hf) == len(sd_keys), f"mismatched keys: {len(sd_keys_hf)} != {len(sd_keys)}"
        for k in sd_keys_hf:
            if any(k.endswith(w) for w in transposed):
                # special treatment for the Conv1D weights we need to transpose
                assert sd_hf[k].shape[::-1] == sd[k].shape
                with torch.no_grad():
                    sd[k].copy_(sd_hf[k].t())
            else:
                # vanilla copy over the other parameters
                assert sd_hf[k].shape == sd[k].shape
                with torch.no_grad():
                    sd[k].copy_(sd_hf[k])

        return model

In [7]:
# model = GPT.from_pretrained('gpt2'

In [8]:
device = 'cpu'
if torch.cuda.is_available():
    device = 'cuda'
elif hasattr(torch.backends, "mps") and torch.backends.mps.is_available():
    device = "mps"
print(f"Using device: {device}")

Using device: cuda


In [9]:
# SEED
torch.manual_seed(1566)
if torch.cuda.is_available():
    torch.cuda.manual_seed(1566)

In [10]:
# STOP
num_return_sequences = 5
max_length = 30
NO_DASHES = 100

In [11]:
class DataLoaderLite:
    def __init__(self, B, T):
        self.B = B
        self.T = T

        # at init load tokens from disk and store them in memory
        with open('input.txt', 'r') as f:
            text = f.read()
        enc = tiktoken.get_encoding('gpt2') 
        tokens = enc.encode(text)
        self.tokens = torch.tensor(tokens)
        print(f'loaded {len(self.tokens)} tokens')
        print(f'1 epoch = {len(self.tokens) // (B * T)} batches')

        # state
        self.current_position = 0
    
    def next_batch(self):
        B, T = self.B, self.T
        buf = self.tokens[self.current_position: self.current_position + B * T + 1]
        x = (buf[:-1]).view(B, T) # inputs
        y = (buf[1:]).view(B, T) # targets
        # advance the position in the tensor
        self.current_position += B*T
        # if loading the next batch would be out of bounds, reset
        if self.current_position + (B * T + 1) > len(self.tokens):
            self.current_position = 0
        return x, y

In [12]:
def save_checkpoint(model, optimizer, loss, step, config, save_dir="checkpoints"):
    """Save model checkpoint and training state"""
    # Create checkpoint directory if it doesn't exist
    os.makedirs(save_dir, exist_ok=True)
    
    # Create timestamp for unique checkpoint name
    timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
    
    # Save model checkpoint
    checkpoint = {
        'step': step,
        'model_state_dict': model.state_dict(),
        'optimizer_state_dict': optimizer.state_dict(),
        'loss': loss,
        'config': {
            'n_layer': config.n_layer,
            'n_head': config.n_head,
            'n_embd': config.n_embd,
            'vocab_size': config.vocab_size,
            'block_size': config.block_size
        }
    }
    
    # Save checkpoint with timestamp and loss
    checkpoint_path = os.path.join(save_dir, f'checkpoint_step{step}_loss{loss:.4f}_{timestamp}.pt')
    torch.save(checkpoint, checkpoint_path)
    
    # Save latest checkpoint (overwrite)
    latest_path = os.path.join(save_dir, 'checkpoint_latest.pt')
    torch.save(checkpoint, latest_path)
    
    # Save training metrics
    metrics_path = os.path.join(save_dir, f'training_metrics_{timestamp}.json')
    metrics = {
        'step': step,
        'loss': loss,
        'timestamp': timestamp
    }
    with open(metrics_path, 'w') as f:
        json.dump(metrics, f, indent=4)
    
    print(f"\nCheckpoint saved: {checkpoint_path}")
    return checkpoint_path

In [13]:
def train_model():
    device = 'cuda' if torch.cuda.is_available() else 'cpu'
    print(f"Using device: {device}")

    NO_OF_STEPS = 10000
    SAVE_EVERY = 1000
    SAVE_DIR = "checkpoints"

    model = GPT(GPTConfig())
    model.to(device)

    train_loader = DataLoaderLite(B=32, T=128)
    optimizer = torch.optim.AdamW(model.parameters(), lr=3e-4)
    
    os.makedirs(SAVE_DIR, exist_ok=True)
    
    print(f"\nStarting training for {NO_OF_STEPS} steps...")
    print(f"Saving checkpoints every {SAVE_EVERY} steps to {SAVE_DIR}")

    best_loss = float('inf')
    for i in tqdm(range(NO_OF_STEPS), desc="Training Progress"):
        x, y = train_loader.next_batch()
        x, y = x.to(device), y.to(device)
        
        optimizer.zero_grad()
        logits, loss = model(x, y)
        loss.backward()
        optimizer.step()
        
        print(f'Step {i+1}/{NO_OF_STEPS}, Loss: {loss.item():.4f}')
        
        if (i + 1) % SAVE_EVERY == 0:
            checkpoint_path = save_checkpoint(
                model=model,
                optimizer=optimizer,
                loss=loss.item(),
                step=i+1,
                config=model.config,
                save_dir=SAVE_DIR
            )
        
        if loss.item() < best_loss:
            best_loss = loss.item()
            best_checkpoint_path = save_checkpoint(
                model=model,
                optimizer=optimizer,
                loss=loss.item(),
                step=i+1,
                config=model.config,
                save_dir=os.path.join(SAVE_DIR, 'best')
            )
            print(f"\nNew best loss: {best_loss:.4f}")

    final_checkpoint_path = save_checkpoint(
        model=model,
        optimizer=optimizer,
        loss=loss.item(),
        step=NO_OF_STEPS,
        config=model.config,
        save_dir=SAVE_DIR
    )

    print("\nTraining completed!")
    print("=" * NO_DASHES) 
    print(f"Best loss achieved: {best_loss:.4f}")
    print(f"Final loss: {loss.item():.4f}")
    print(f"Checkpoints saved in: {SAVE_DIR}")

In [14]:
#if __name__ == "__main__":
print("Starting training ... ")
print("=" * NO_DASHES) 
train_model()

Starting training ... 
Using device: cuda
loaded 338025 tokens
1 epoch = 82 batches

Starting training for 10000 steps...
Saving checkpoints every 1000 steps to checkpoints


Training Progress:   0%|                                                                     | 0/10000 [00:00<?, ?it/s]

Step 1/10000, Loss: 10.9407


Training Progress:   0%|                                                          | 1/10000 [00:15<41:58:08, 15.11s/it]


Checkpoint saved: checkpoints\best\checkpoint_step1_loss10.9407_20250117_125204.pt

New best loss: 10.9407
Step 2/10000, Loss: 9.3859


Training Progress:   0%|                                                          | 2/10000 [00:35<51:11:20, 18.43s/it]


Checkpoint saved: checkpoints\best\checkpoint_step2_loss9.3859_20250117_125219.pt

New best loss: 9.3859


Training Progress:   0%|                                                          | 3/10000 [00:36<28:22:54, 10.22s/it]

Step 3/10000, Loss: 9.5298
Step 4/10000, Loss: 8.8142


Training Progress:   0%|                                                          | 4/10000 [00:55<38:22:38, 13.82s/it]


Checkpoint saved: checkpoints\best\checkpoint_step4_loss8.8142_20250117_125240.pt

New best loss: 8.8142
Step 5/10000, Loss: 8.4025


Training Progress:   0%|                                                          | 5/10000 [01:17<46:00:29, 16.57s/it]


Checkpoint saved: checkpoints\best\checkpoint_step5_loss8.4025_20250117_125259.pt

New best loss: 8.4025
Step 6/10000, Loss: 8.0949


Training Progress:   0%|                                                          | 6/10000 [01:37<49:09:02, 17.70s/it]


Checkpoint saved: checkpoints\best\checkpoint_step6_loss8.0949_20250117_125321.pt

New best loss: 8.0949
Step 7/10000, Loss: 7.9842


Training Progress:   0%|                                                          | 7/10000 [01:58<52:39:18, 18.97s/it]


Checkpoint saved: checkpoints\best\checkpoint_step7_loss7.9842_20250117_125341.pt

New best loss: 7.9842
Step 8/10000, Loss: 7.7257


Training Progress:   0%|                                                          | 8/10000 [02:18<53:55:02, 19.43s/it]


Checkpoint saved: checkpoints\best\checkpoint_step8_loss7.7257_20250117_125402.pt

New best loss: 7.7257
Step 9/10000, Loss: 7.6191


Training Progress:   0%|                                                          | 9/10000 [02:39<54:33:10, 19.66s/it]


Checkpoint saved: checkpoints\best\checkpoint_step9_loss7.6191_20250117_125423.pt

New best loss: 7.6191
Step 10/10000, Loss: 7.3675


Training Progress:   0%|                                                         | 10/10000 [03:00<56:22:24, 20.31s/it]


Checkpoint saved: checkpoints\best\checkpoint_step10_loss7.3675_20250117_125443.pt

New best loss: 7.3675


Training Progress:   0%|                                                         | 12/10000 [03:01<27:33:17,  9.93s/it]

Step 11/10000, Loss: 7.3742
Step 12/10000, Loss: 7.3830


Training Progress:   0%|                                                         | 14/10000 [03:01<13:39:13,  4.92s/it]

Step 13/10000, Loss: 7.4602
Step 14/10000, Loss: 7.3780
Step 15/10000, Loss: 7.0144


Training Progress:   0%|                                                         | 15/10000 [03:22<26:26:16,  9.53s/it]


Checkpoint saved: checkpoints\best\checkpoint_step15_loss7.0144_20250117_125505.pt

New best loss: 7.0144


Training Progress:   0%|                                                         | 16/10000 [03:22<18:51:07,  6.80s/it]

Step 16/10000, Loss: 7.0192
Step 17/10000, Loss: 6.8400


Training Progress:   0%|                                                         | 17/10000 [03:44<31:13:36, 11.26s/it]


Checkpoint saved: checkpoints\best\checkpoint_step17_loss6.8400_20250117_125526.pt

New best loss: 6.8400
Step 18/10000, Loss: 6.6634


Training Progress:   0%|                                                         | 18/10000 [04:05<39:25:45, 14.22s/it]


Checkpoint saved: checkpoints\best\checkpoint_step18_loss6.6634_20250117_125548.pt

New best loss: 6.6634


Training Progress:   0%|                                                         | 20/10000 [04:05<19:42:33,  7.11s/it]

Step 19/10000, Loss: 6.7927
Step 20/10000, Loss: 6.8073


Training Progress:   0%|                                                         | 21/10000 [04:06<13:56:01,  5.03s/it]

Step 21/10000, Loss: 6.9710
Step 22/10000, Loss: 6.9372


Training Progress:   0%|▏                                                         | 24/10000 [04:06<5:07:43,  1.85s/it]

Step 23/10000, Loss: 6.7679
Step 24/10000, Loss: 6.8692


Training Progress:   0%|▏                                                         | 26/10000 [04:06<2:45:02,  1.01it/s]

Step 25/10000, Loss: 6.8776
Step 26/10000, Loss: 6.8719


Training Progress:   0%|▏                                                         | 28/10000 [04:07<1:35:38,  1.74it/s]

Step 27/10000, Loss: 6.6956
Step 28/10000, Loss: 6.7467


Training Progress:   0%|▏                                                         | 29/10000 [04:07<1:15:20,  2.21it/s]

Step 29/10000, Loss: 6.7536
Step 30/10000, Loss: 6.5684


Training Progress:   0%|▏                                                        | 30/10000 [04:25<15:32:46,  5.61s/it]


Checkpoint saved: checkpoints\best\checkpoint_step30_loss6.5684_20250117_125611.pt

New best loss: 6.5684
Step 31/10000, Loss: 6.5133


Training Progress:   0%|▏                                                        | 31/10000 [04:44<27:17:51,  9.86s/it]


Checkpoint saved: checkpoints\best\checkpoint_step31_loss6.5133_20250117_125629.pt

New best loss: 6.5133
Step 32/10000, Loss: 6.4170


Training Progress:   0%|▏                                                        | 32/10000 [05:07<37:50:39, 13.67s/it]


Checkpoint saved: checkpoints\best\checkpoint_step32_loss6.4170_20250117_125649.pt

New best loss: 6.4170


Training Progress:   0%|▏                                                        | 34/10000 [05:08<18:55:36,  6.84s/it]

Step 33/10000, Loss: 6.4951
Step 34/10000, Loss: 6.6083


Training Progress:   0%|▏                                                         | 36/10000 [05:08<9:31:00,  3.44s/it]

Step 35/10000, Loss: 6.6127
Step 36/10000, Loss: 6.6065


Training Progress:   0%|▏                                                         | 38/10000 [05:08<4:54:01,  1.77s/it]

Step 37/10000, Loss: 6.4476
Step 38/10000, Loss: 6.5757
Step 39/10000, Loss: 6.3818


Training Progress:   0%|▏                                                        | 39/10000 [05:26<18:14:43,  6.59s/it]


Checkpoint saved: checkpoints\best\checkpoint_step39_loss6.3818_20250117_125712.pt

New best loss: 6.3818
Step 40/10000, Loss: 6.2252


Training Progress:   0%|▏                                                        | 40/10000 [05:49<31:28:26, 11.38s/it]


Checkpoint saved: checkpoints\best\checkpoint_step40_loss6.2252_20250117_125730.pt

New best loss: 6.2252


Training Progress:   0%|▏                                                        | 42/10000 [05:49<15:46:16,  5.70s/it]

Step 41/10000, Loss: 6.3233
Step 42/10000, Loss: 6.4208


Training Progress:   0%|▏                                                        | 43/10000 [05:49<11:10:31,  4.04s/it]

Step 43/10000, Loss: 6.2380
Step 44/10000, Loss: 6.2236


Training Progress:   0%|▎                                                        | 44/10000 [06:08<23:11:05,  8.38s/it]


Checkpoint saved: checkpoints\best\checkpoint_step44_loss6.2236_20250117_125753.pt

New best loss: 6.2236


Training Progress:   0%|▎                                                        | 46/10000 [06:08<11:41:43,  4.23s/it]

Step 45/10000, Loss: 6.3870
Step 46/10000, Loss: 6.2735
Step 47/10000, Loss: 6.1321


Training Progress:   0%|▎                                                        | 47/10000 [06:28<24:52:17,  9.00s/it]


Checkpoint saved: checkpoints\best\checkpoint_step47_loss6.1321_20250117_125812.pt

New best loss: 6.1321


Training Progress:   0%|▎                                                        | 49/10000 [06:29<12:36:53,  4.56s/it]

Step 48/10000, Loss: 6.1496
Step 49/10000, Loss: 6.1693
Step 50/10000, Loss: 6.0813


Training Progress:   0%|▎                                                        | 50/10000 [06:48<24:37:46,  8.91s/it]


Checkpoint saved: checkpoints\best\checkpoint_step50_loss6.0813_20250117_125833.pt

New best loss: 6.0813


Training Progress:   1%|▎                                                        | 52/10000 [06:49<12:25:58,  4.50s/it]

Step 51/10000, Loss: 6.1957
Step 52/10000, Loss: 6.1309


Training Progress:   1%|▎                                                         | 54/10000 [06:49<6:19:52,  2.29s/it]

Step 53/10000, Loss: 6.5033
Step 54/10000, Loss: 6.4638


Training Progress:   1%|▎                                                         | 56/10000 [06:49<3:19:56,  1.21s/it]

Step 55/10000, Loss: 6.2838
Step 56/10000, Loss: 6.3595


Training Progress:   1%|▎                                                         | 58/10000 [06:50<1:52:37,  1.47it/s]

Step 57/10000, Loss: 6.6271
Step 58/10000, Loss: 6.5288


Training Progress:   1%|▎                                                         | 60/10000 [06:50<1:09:44,  2.38it/s]

Step 59/10000, Loss: 6.2316
Step 60/10000, Loss: 6.3827


Training Progress:   1%|▎                                                           | 62/10000 [06:50<47:47,  3.47it/s]

Step 61/10000, Loss: 6.2661
Step 62/10000, Loss: 6.2299


Training Progress:   1%|▍                                                           | 64/10000 [06:51<37:45,  4.39it/s]

Step 63/10000, Loss: 6.3518
Step 64/10000, Loss: 6.2024
Step 65/10000, Loss: 6.0417


Training Progress:   1%|▎                                                        | 65/10000 [07:10<16:09:30,  5.86s/it]


Checkpoint saved: checkpoints\best\checkpoint_step65_loss6.0417_20250117_125855.pt

New best loss: 6.0417


Training Progress:   1%|▍                                                         | 67/10000 [07:10<8:17:24,  3.00s/it]

Step 66/10000, Loss: 6.2236
Step 67/10000, Loss: 6.4213


Training Progress:   1%|▍                                                         | 69/10000 [07:11<4:17:50,  1.56s/it]

Step 68/10000, Loss: 6.3111
Step 69/10000, Loss: 6.2794


Training Progress:   1%|▍                                                         | 70/10000 [07:11<3:09:09,  1.14s/it]

Step 70/10000, Loss: 6.0962
Step 71/10000, Loss: 5.9797


Training Progress:   1%|▍                                                        | 71/10000 [07:31<18:36:51,  6.75s/it]


Checkpoint saved: checkpoints\best\checkpoint_step71_loss5.9797_20250117_125915.pt

New best loss: 5.9797


Training Progress:   1%|▍                                                         | 73/10000 [07:31<9:32:22,  3.46s/it]

Step 72/10000, Loss: 6.3207
Step 73/10000, Loss: 6.3025


Training Progress:   1%|▍                                                         | 75/10000 [07:32<4:54:30,  1.78s/it]

Step 74/10000, Loss: 6.0928
Step 75/10000, Loss: 6.1448


Training Progress:   1%|▍                                                         | 77/10000 [07:32<2:38:37,  1.04it/s]

Step 76/10000, Loss: 6.2887
Step 77/10000, Loss: 6.0532
Step 78/10000, Loss: 5.8385


Training Progress:   1%|▍                                                        | 78/10000 [07:51<17:53:03,  6.49s/it]


Checkpoint saved: checkpoints\best\checkpoint_step78_loss5.8385_20250117_125936.pt

New best loss: 5.8385


Training Progress:   1%|▍                                                         | 80/10000 [07:52<9:06:13,  3.30s/it]

Step 79/10000, Loss: 5.8757
Step 80/10000, Loss: 5.8728


Training Progress:   1%|▍                                                         | 82/10000 [07:52<4:42:32,  1.71s/it]

Step 81/10000, Loss: 6.4183
Step 82/10000, Loss: 6.2878


Training Progress:   1%|▍                                                         | 84/10000 [07:53<2:32:08,  1.09it/s]

Step 83/10000, Loss: 6.1710
Step 84/10000, Loss: 6.1632


Training Progress:   1%|▍                                                         | 86/10000 [07:53<1:29:13,  1.85it/s]

Step 85/10000, Loss: 6.1817
Step 86/10000, Loss: 6.1745


Training Progress:   1%|▌                                                         | 87/10000 [07:53<1:10:23,  2.35it/s]

Step 87/10000, Loss: 5.8900
Step 88/10000, Loss: 5.7734


Training Progress:   1%|▌                                                        | 88/10000 [08:13<17:27:52,  6.34s/it]


Checkpoint saved: checkpoints\best\checkpoint_step88_loss5.7734_20250117_125957.pt

New best loss: 5.7734
Step 89/10000, Loss: 5.7587


Training Progress:   1%|▌                                                        | 89/10000 [08:35<30:13:54, 10.98s/it]


Checkpoint saved: checkpoints\best\checkpoint_step89_loss5.7587_20250117_130018.pt

New best loss: 5.7587
Step 90/10000, Loss: 5.7229


Training Progress:   1%|▌                                                        | 90/10000 [08:55<37:27:15, 13.61s/it]


Checkpoint saved: checkpoints\best\checkpoint_step90_loss5.7229_20250117_130039.pt

New best loss: 5.7229


Training Progress:   1%|▌                                                        | 91/10000 [08:55<26:29:34,  9.63s/it]

Step 91/10000, Loss: 5.8106
Step 92/10000, Loss: 5.6987


Training Progress:   1%|▌                                                        | 92/10000 [09:16<35:25:59, 12.87s/it]


Checkpoint saved: checkpoints\best\checkpoint_step92_loss5.6987_20250117_130059.pt

New best loss: 5.6987


Training Progress:   1%|▌                                                        | 94/10000 [09:16<17:42:11,  6.43s/it]

Step 93/10000, Loss: 5.9496
Step 94/10000, Loss: 6.0268


Training Progress:   1%|▌                                                         | 96/10000 [09:16<8:54:20,  3.24s/it]

Step 95/10000, Loss: 6.1399
Step 96/10000, Loss: 6.1400


Training Progress:   1%|▌                                                         | 98/10000 [09:17<4:35:53,  1.67s/it]

Step 97/10000, Loss: 5.9340
Step 98/10000, Loss: 5.9571


Training Progress:   1%|▌                                                        | 100/10000 [09:17<2:29:14,  1.11it/s]

Step 99/10000, Loss: 5.7748
Step 100/10000, Loss: 5.7601


Training Progress:   1%|▌                                                        | 102/10000 [09:17<1:27:17,  1.89it/s]

Step 101/10000, Loss: 5.9909
Step 102/10000, Loss: 5.8943


Training Progress:   1%|▌                                                          | 104/10000 [09:18<57:07,  2.89it/s]

Step 103/10000, Loss: 6.1391
Step 104/10000, Loss: 5.9361


Training Progress:   1%|▋                                                          | 106/10000 [09:18<41:57,  3.93it/s]

Step 105/10000, Loss: 6.0661
Step 106/10000, Loss: 6.1967


Training Progress:   1%|▋                                                          | 108/10000 [09:18<35:21,  4.66it/s]

Step 107/10000, Loss: 6.2303
Step 108/10000, Loss: 6.1893


Training Progress:   1%|▋                                                          | 110/10000 [09:19<31:25,  5.24it/s]

Step 109/10000, Loss: 6.0397
Step 110/10000, Loss: 6.1225


Training Progress:   1%|▋                                                          | 112/10000 [09:19<29:47,  5.53it/s]

Step 111/10000, Loss: 6.1152
Step 112/10000, Loss: 5.9798


Training Progress:   1%|▋                                                          | 114/10000 [09:19<28:45,  5.73it/s]

Step 113/10000, Loss: 5.8819
Step 114/10000, Loss: 5.7950


Training Progress:   1%|▋                                                          | 116/10000 [09:20<28:32,  5.77it/s]

Step 115/10000, Loss: 5.8787
Step 116/10000, Loss: 5.9995


Training Progress:   1%|▋                                                          | 118/10000 [09:20<28:10,  5.85it/s]

Step 117/10000, Loss: 6.0415
Step 118/10000, Loss: 6.0035


Training Progress:   1%|▋                                                          | 120/10000 [09:20<28:03,  5.87it/s]

Step 119/10000, Loss: 5.8219
Step 120/10000, Loss: 6.0116


Training Progress:   1%|▋                                                          | 121/10000 [09:21<28:27,  5.78it/s]

Step 121/10000, Loss: 5.8292
Step 122/10000, Loss: 5.6673


Training Progress:   1%|▋                                                       | 122/10000 [09:35<12:27:33,  4.54s/it]


Checkpoint saved: checkpoints\best\checkpoint_step122_loss5.6673_20250117_130125.pt

New best loss: 5.6673


Training Progress:   1%|▋                                                        | 124/10000 [09:36<6:30:34,  2.37s/it]

Step 123/10000, Loss: 5.7994
Step 124/10000, Loss: 5.8897


Training Progress:   1%|▋                                                        | 126/10000 [09:36<3:25:32,  1.25s/it]

Step 125/10000, Loss: 5.7151
Step 126/10000, Loss: 5.7503


Training Progress:   1%|▋                                                        | 128/10000 [09:37<1:54:58,  1.43it/s]

Step 127/10000, Loss: 6.0016
Step 128/10000, Loss: 5.8733


Training Progress:   1%|▋                                                        | 130/10000 [09:37<1:10:28,  2.33it/s]

Step 129/10000, Loss: 5.7421
Step 130/10000, Loss: 5.6851


Training Progress:   1%|▊                                                          | 131/10000 [09:37<57:57,  2.84it/s]

Step 131/10000, Loss: 5.7147
Step 132/10000, Loss: 5.6328


Training Progress:   1%|▋                                                       | 132/10000 [09:57<16:56:24,  6.18s/it]


Checkpoint saved: checkpoints\best\checkpoint_step132_loss5.6328_20250117_130141.pt

New best loss: 5.6328


Training Progress:   1%|▊                                                        | 134/10000 [09:58<8:43:02,  3.18s/it]

Step 133/10000, Loss: 5.7816
Step 134/10000, Loss: 5.7075


Training Progress:   1%|▊                                                        | 136/10000 [09:58<4:30:14,  1.64s/it]

Step 135/10000, Loss: 6.1409
Step 136/10000, Loss: 6.0643


Training Progress:   1%|▊                                                        | 138/10000 [09:58<2:26:51,  1.12it/s]

Step 137/10000, Loss: 5.8489
Step 138/10000, Loss: 5.9796


Training Progress:   1%|▊                                                        | 140/10000 [09:59<1:26:13,  1.91it/s]

Step 139/10000, Loss: 6.2521
Step 140/10000, Loss: 6.1312


Training Progress:   1%|▊                                                          | 142/10000 [09:59<56:36,  2.90it/s]

Step 141/10000, Loss: 5.8321
Step 142/10000, Loss: 5.9569


Training Progress:   1%|▊                                                          | 144/10000 [09:59<41:45,  3.93it/s]

Step 143/10000, Loss: 5.8709
Step 144/10000, Loss: 5.8116


Training Progress:   1%|▊                                                          | 146/10000 [10:00<34:43,  4.73it/s]

Step 145/10000, Loss: 5.8483
Step 146/10000, Loss: 5.7225
Step 147/10000, Loss: 5.5580


Training Progress:   1%|▊                                                       | 147/10000 [10:19<16:21:54,  5.98s/it]


Checkpoint saved: checkpoints\best\checkpoint_step147_loss5.5580_20250117_130204.pt

New best loss: 5.5580


Training Progress:   1%|▊                                                        | 149/10000 [10:20<8:21:16,  3.05s/it]

Step 148/10000, Loss: 5.7667
Step 149/10000, Loss: 5.8799


Training Progress:   2%|▊                                                        | 151/10000 [10:20<4:20:24,  1.59s/it]

Step 150/10000, Loss: 5.7918
Step 151/10000, Loss: 5.7668


Training Progress:   2%|▊                                                        | 152/10000 [10:20<3:10:43,  1.16s/it]

Step 152/10000, Loss: 5.5682
Step 153/10000, Loss: 5.4494


Training Progress:   2%|▊                                                       | 153/10000 [10:36<15:29:07,  5.66s/it]


Checkpoint saved: checkpoints\best\checkpoint_step153_loss5.4494_20250117_130224.pt

New best loss: 5.4494


Training Progress:   2%|▉                                                        | 155/10000 [10:37<7:55:02,  2.90s/it]

Step 154/10000, Loss: 5.9492
Step 155/10000, Loss: 5.7447


Training Progress:   2%|▉                                                        | 157/10000 [10:37<4:07:13,  1.51s/it]

Step 156/10000, Loss: 5.5406
Step 157/10000, Loss: 5.5936


Training Progress:   2%|▉                                                        | 159/10000 [10:37<2:15:16,  1.21it/s]

Step 158/10000, Loss: 5.7473
Step 159/10000, Loss: 5.5747
Step 160/10000, Loss: 5.3178


Training Progress:   2%|▉                                                       | 160/10000 [10:58<18:00:00,  6.59s/it]


Checkpoint saved: checkpoints\best\checkpoint_step160_loss5.3178_20250117_130242.pt

New best loss: 5.3178
Step 161/10000, Loss: 5.2957


Training Progress:   2%|▉                                                       | 161/10000 [11:19<29:53:06, 10.93s/it]


Checkpoint saved: checkpoints\best\checkpoint_step161_loss5.2957_20250117_130302.pt

New best loss: 5.2957
Step 162/10000, Loss: 5.2948


Training Progress:   2%|▉                                                       | 162/10000 [11:41<39:32:00, 14.47s/it]


Checkpoint saved: checkpoints\best\checkpoint_step162_loss5.2948_20250117_130323.pt

New best loss: 5.2948


Training Progress:   2%|▉                                                       | 164/10000 [11:42<19:47:37,  7.24s/it]

Step 163/10000, Loss: 5.9815
Step 164/10000, Loss: 5.8365


Training Progress:   2%|▉                                                        | 166/10000 [11:42<9:56:07,  3.64s/it]

Step 165/10000, Loss: 5.8959
Step 166/10000, Loss: 5.8581


Training Progress:   2%|▉                                                        | 168/10000 [11:43<5:06:10,  1.87s/it]

Step 167/10000, Loss: 5.8971
Step 168/10000, Loss: 5.8641


Training Progress:   2%|▉                                                        | 170/10000 [11:43<2:43:51,  1.00s/it]

Step 169/10000, Loss: 5.5826
Step 170/10000, Loss: 5.3698


Training Progress:   2%|▉                                                        | 172/10000 [11:43<1:34:33,  1.73it/s]

Step 171/10000, Loss: 5.3956
Step 172/10000, Loss: 5.3685


Training Progress:   2%|▉                                                        | 174/10000 [11:44<1:01:07,  2.68it/s]

Step 173/10000, Loss: 5.5039
Step 174/10000, Loss: 5.3691


Training Progress:   2%|█                                                          | 176/10000 [11:44<44:13,  3.70it/s]

Step 175/10000, Loss: 5.6029
Step 176/10000, Loss: 5.6653


Training Progress:   2%|█                                                          | 178/10000 [11:44<35:15,  4.64it/s]

Step 177/10000, Loss: 5.7520
Step 178/10000, Loss: 5.7156


Training Progress:   2%|█                                                          | 180/10000 [11:45<31:21,  5.22it/s]

Step 179/10000, Loss: 5.5646
Step 180/10000, Loss: 5.5766


Training Progress:   2%|█                                                          | 182/10000 [11:45<29:55,  5.47it/s]

Step 181/10000, Loss: 5.3562
Step 182/10000, Loss: 5.3644


Training Progress:   2%|█                                                          | 184/10000 [11:45<28:43,  5.70it/s]

Step 183/10000, Loss: 5.6007
Step 184/10000, Loss: 5.4700


Training Progress:   2%|█                                                          | 186/10000 [11:46<28:22,  5.76it/s]

Step 185/10000, Loss: 5.7307
Step 186/10000, Loss: 5.4827


Training Progress:   2%|█                                                          | 188/10000 [11:46<27:46,  5.89it/s]

Step 187/10000, Loss: 5.7088
Step 188/10000, Loss: 5.7991


Training Progress:   2%|█                                                          | 190/10000 [11:46<27:50,  5.87it/s]

Step 189/10000, Loss: 5.8540
Step 190/10000, Loss: 5.8217


Training Progress:   2%|█▏                                                         | 192/10000 [11:47<28:03,  5.83it/s]

Step 191/10000, Loss: 5.6749
Step 192/10000, Loss: 5.7364


Training Progress:   2%|█▏                                                         | 194/10000 [11:47<27:38,  5.91it/s]

Step 193/10000, Loss: 5.7433
Step 194/10000, Loss: 5.6094


Training Progress:   2%|█▏                                                         | 196/10000 [11:47<27:32,  5.93it/s]

Step 195/10000, Loss: 5.4807
Step 196/10000, Loss: 5.4093


Training Progress:   2%|█▏                                                         | 198/10000 [11:48<27:40,  5.90it/s]

Step 197/10000, Loss: 5.4959
Step 198/10000, Loss: 5.6429


Training Progress:   2%|█▏                                                         | 200/10000 [11:48<27:42,  5.90it/s]

Step 199/10000, Loss: 5.6910
Step 200/10000, Loss: 5.6486


Training Progress:   2%|█▏                                                         | 202/10000 [11:48<27:19,  5.97it/s]

Step 201/10000, Loss: 5.4572
Step 202/10000, Loss: 5.6641


Training Progress:   2%|█▏                                                         | 204/10000 [11:49<27:46,  5.88it/s]

Step 203/10000, Loss: 5.4686
Step 204/10000, Loss: 5.2952


Training Progress:   2%|█▏                                                         | 206/10000 [11:49<28:07,  5.80it/s]

Step 205/10000, Loss: 5.4262
Step 206/10000, Loss: 5.5523


Training Progress:   2%|█▏                                                         | 208/10000 [11:49<27:52,  5.85it/s]

Step 207/10000, Loss: 5.3125
Step 208/10000, Loss: 5.3584


Training Progress:   2%|█▏                                                         | 210/10000 [11:50<27:53,  5.85it/s]

Step 209/10000, Loss: 5.6432
Step 210/10000, Loss: 5.5202


Training Progress:   2%|█▏                                                         | 211/10000 [11:50<27:32,  5.92it/s]

Step 211/10000, Loss: 5.3794
Step 212/10000, Loss: 5.2850


Training Progress:   2%|█▏                                                      | 212/10000 [12:05<12:21:20,  4.54s/it]


Checkpoint saved: checkpoints\best\checkpoint_step212_loss5.2850_20250117_130354.pt

New best loss: 5.2850


Training Progress:   2%|█▏                                                       | 213/10000 [12:05<9:03:15,  3.33s/it]

Step 213/10000, Loss: 5.3605
Step 214/10000, Loss: 5.2707


Training Progress:   2%|█▏                                                      | 214/10000 [12:26<23:31:04,  8.65s/it]


Checkpoint saved: checkpoints\best\checkpoint_step214_loss5.2707_20250117_130409.pt

New best loss: 5.2707


Training Progress:   2%|█▏                                                      | 216/10000 [12:27<11:50:36,  4.36s/it]

Step 215/10000, Loss: 5.4044
Step 216/10000, Loss: 5.2950


Training Progress:   2%|█▏                                                       | 218/10000 [12:27<6:02:36,  2.22s/it]

Step 217/10000, Loss: 5.7194
Step 218/10000, Loss: 5.6668


Training Progress:   2%|█▎                                                       | 220/10000 [12:27<3:11:38,  1.18s/it]

Step 219/10000, Loss: 5.4794
Step 220/10000, Loss: 5.6340


Training Progress:   2%|█▎                                                       | 222/10000 [12:28<1:48:14,  1.51it/s]

Step 221/10000, Loss: 5.9415
Step 222/10000, Loss: 5.7939


Training Progress:   2%|█▎                                                       | 224/10000 [12:28<1:06:42,  2.44it/s]

Step 223/10000, Loss: 5.4130
Step 224/10000, Loss: 5.5452


Training Progress:   2%|█▎                                                         | 226/10000 [12:28<46:55,  3.47it/s]

Step 225/10000, Loss: 5.5008
Step 226/10000, Loss: 5.4329


Training Progress:   2%|█▎                                                         | 228/10000 [12:29<37:24,  4.35it/s]

Step 227/10000, Loss: 5.4190
Step 228/10000, Loss: 5.2904
Step 229/10000, Loss: 5.1656


Training Progress:   2%|█▎                                                      | 229/10000 [12:48<16:14:11,  5.98s/it]


Checkpoint saved: checkpoints\best\checkpoint_step229_loss5.1656_20250117_130433.pt

New best loss: 5.1656


Training Progress:   2%|█▎                                                       | 231/10000 [12:49<8:18:16,  3.06s/it]

Step 230/10000, Loss: 5.3473
Step 231/10000, Loss: 5.4743


Training Progress:   2%|█▎                                                       | 233/10000 [12:49<4:18:23,  1.59s/it]

Step 232/10000, Loss: 5.3859
Step 233/10000, Loss: 5.3645


Training Progress:   2%|█▎                                                       | 234/10000 [12:49<3:09:35,  1.16s/it]

Step 234/10000, Loss: 5.1701
Step 235/10000, Loss: 5.0791


Training Progress:   2%|█▎                                                      | 235/10000 [13:08<17:48:29,  6.57s/it]


Checkpoint saved: checkpoints\best\checkpoint_step235_loss5.0791_20250117_130453.pt

New best loss: 5.0791


Training Progress:   2%|█▎                                                       | 237/10000 [13:09<9:09:51,  3.38s/it]

Step 236/10000, Loss: 5.6532
Step 237/10000, Loss: 5.3617


Training Progress:   2%|█▎                                                       | 239/10000 [13:09<4:43:04,  1.74s/it]

Step 238/10000, Loss: 5.1519
Step 239/10000, Loss: 5.1874


Training Progress:   2%|█▎                                                       | 241/10000 [13:10<2:33:08,  1.06it/s]

Step 240/10000, Loss: 5.3469
Step 241/10000, Loss: 5.2011
Step 242/10000, Loss: 4.9396


Training Progress:   2%|█▎                                                      | 242/10000 [13:30<17:59:42,  6.64s/it]


Checkpoint saved: checkpoints\best\checkpoint_step242_loss4.9396_20250117_130514.pt

New best loss: 4.9396
Step 243/10000, Loss: 4.9231


Training Progress:   2%|█▎                                                      | 243/10000 [13:50<28:46:09, 10.61s/it]


Checkpoint saved: checkpoints\best\checkpoint_step243_loss4.9231_20250117_130534.pt

New best loss: 4.9231


Training Progress:   2%|█▎                                                      | 245/10000 [13:50<14:30:54,  5.36s/it]

Step 244/10000, Loss: 4.9312
Step 245/10000, Loss: 5.6406


Training Progress:   2%|█▍                                                       | 247/10000 [13:51<7:21:17,  2.71s/it]

Step 246/10000, Loss: 5.4771
Step 247/10000, Loss: 5.5537


Training Progress:   2%|█▍                                                       | 249/10000 [13:51<3:50:08,  1.42s/it]

Step 248/10000, Loss: 5.5114
Step 249/10000, Loss: 5.5625


Training Progress:   3%|█▍                                                       | 251/10000 [13:51<2:06:47,  1.28it/s]

Step 250/10000, Loss: 5.5125
Step 251/10000, Loss: 5.2694


Training Progress:   3%|█▍                                                       | 253/10000 [13:52<1:16:16,  2.13it/s]

Step 252/10000, Loss: 5.0517
Step 253/10000, Loss: 5.1095


Training Progress:   3%|█▌                                                         | 255/10000 [13:52<51:18,  3.17it/s]

Step 254/10000, Loss: 5.0807
Step 255/10000, Loss: 5.1770


Training Progress:   3%|█▌                                                         | 257/10000 [13:52<39:06,  4.15it/s]

Step 256/10000, Loss: 5.1233
Step 257/10000, Loss: 5.3169


Training Progress:   3%|█▌                                                         | 259/10000 [13:53<32:51,  4.94it/s]

Step 258/10000, Loss: 5.3450
Step 259/10000, Loss: 5.3895


Training Progress:   3%|█▌                                                         | 261/10000 [13:53<30:02,  5.40it/s]

Step 260/10000, Loss: 5.4070
Step 261/10000, Loss: 5.2613


Training Progress:   3%|█▌                                                         | 263/10000 [13:53<28:52,  5.62it/s]

Step 262/10000, Loss: 5.2907
Step 263/10000, Loss: 5.0119


Training Progress:   3%|█▌                                                         | 265/10000 [13:54<28:22,  5.72it/s]

Step 264/10000, Loss: 5.0296
Step 265/10000, Loss: 5.3156


Training Progress:   3%|█▌                                                         | 267/10000 [13:54<28:17,  5.73it/s]

Step 266/10000, Loss: 5.1079
Step 267/10000, Loss: 5.3891


Training Progress:   3%|█▌                                                         | 269/10000 [13:54<27:14,  5.96it/s]

Step 268/10000, Loss: 5.0501
Step 269/10000, Loss: 5.4341


Training Progress:   3%|█▌                                                         | 271/10000 [13:55<27:49,  5.83it/s]

Step 270/10000, Loss: 5.4705
Step 271/10000, Loss: 5.5437


Training Progress:   3%|█▌                                                         | 273/10000 [13:55<27:49,  5.83it/s]

Step 272/10000, Loss: 5.5129
Step 273/10000, Loss: 5.3639


Training Progress:   3%|█▌                                                         | 275/10000 [13:55<27:42,  5.85it/s]

Step 274/10000, Loss: 5.4357
Step 275/10000, Loss: 5.4496


Training Progress:   3%|█▋                                                         | 277/10000 [13:56<27:04,  5.98it/s]

Step 276/10000, Loss: 5.3357
Step 277/10000, Loss: 5.1930


Training Progress:   3%|█▋                                                         | 279/10000 [13:56<27:41,  5.85it/s]

Step 278/10000, Loss: 5.1353
Step 279/10000, Loss: 5.2194


Training Progress:   3%|█▋                                                         | 281/10000 [13:56<27:28,  5.90it/s]

Step 280/10000, Loss: 5.3673
Step 281/10000, Loss: 5.4329


Training Progress:   3%|█▋                                                         | 283/10000 [13:57<27:13,  5.95it/s]

Step 282/10000, Loss: 5.3629
Step 283/10000, Loss: 5.1979


Training Progress:   3%|█▋                                                         | 285/10000 [13:57<27:31,  5.88it/s]

Step 284/10000, Loss: 5.3853
Step 285/10000, Loss: 5.2241


Training Progress:   3%|█▋                                                         | 287/10000 [13:57<27:11,  5.95it/s]

Step 286/10000, Loss: 5.0347
Step 287/10000, Loss: 5.1819


Training Progress:   3%|█▋                                                         | 289/10000 [13:58<27:40,  5.85it/s]

Step 288/10000, Loss: 5.3529
Step 289/10000, Loss: 5.0839


Training Progress:   3%|█▋                                                         | 291/10000 [13:58<27:18,  5.92it/s]

Step 290/10000, Loss: 5.1529
Step 291/10000, Loss: 5.4743


Training Progress:   3%|█▋                                                         | 293/10000 [13:59<27:47,  5.82it/s]

Step 292/10000, Loss: 5.3220
Step 293/10000, Loss: 5.1735


Training Progress:   3%|█▋                                                         | 295/10000 [13:59<27:36,  5.86it/s]

Step 294/10000, Loss: 5.0548
Step 295/10000, Loss: 5.1259


Training Progress:   3%|█▊                                                         | 297/10000 [13:59<27:45,  5.83it/s]

Step 296/10000, Loss: 5.0802
Step 297/10000, Loss: 5.2033


Training Progress:   3%|█▊                                                         | 299/10000 [14:00<27:31,  5.87it/s]

Step 298/10000, Loss: 5.0866
Step 299/10000, Loss: 5.5091


Training Progress:   3%|█▊                                                         | 301/10000 [14:00<27:01,  5.98it/s]

Step 300/10000, Loss: 5.4714
Step 301/10000, Loss: 5.2948


Training Progress:   3%|█▊                                                         | 303/10000 [14:00<27:44,  5.83it/s]

Step 302/10000, Loss: 5.4809
Step 303/10000, Loss: 5.8023


Training Progress:   3%|█▊                                                         | 305/10000 [14:01<27:25,  5.89it/s]

Step 304/10000, Loss: 5.5854
Step 305/10000, Loss: 5.2208


Training Progress:   3%|█▊                                                         | 307/10000 [14:01<27:03,  5.97it/s]

Step 306/10000, Loss: 5.3523
Step 307/10000, Loss: 5.3417


Training Progress:   3%|█▊                                                         | 309/10000 [14:01<27:47,  5.81it/s]

Step 308/10000, Loss: 5.2414
Step 309/10000, Loss: 5.1773


Training Progress:   3%|█▊                                                         | 311/10000 [14:02<27:03,  5.97it/s]

Step 310/10000, Loss: 5.0435
Step 311/10000, Loss: 4.9401


Training Progress:   3%|█▊                                                         | 313/10000 [14:02<27:35,  5.85it/s]

Step 312/10000, Loss: 5.1139
Step 313/10000, Loss: 5.2017


Training Progress:   3%|█▊                                                         | 315/10000 [14:02<27:25,  5.88it/s]

Step 314/10000, Loss: 5.1336
Step 315/10000, Loss: 5.1026
Step 316/10000, Loss: 4.9175


Training Progress:   3%|█▊                                                      | 316/10000 [14:17<11:59:33,  4.46s/it]


Checkpoint saved: checkpoints\best\checkpoint_step316_loss4.9175_20250117_130606.pt

New best loss: 4.9175
Step 317/10000, Loss: 4.8375


Training Progress:   3%|█▊                                                      | 317/10000 [14:38<25:49:58,  9.60s/it]


Checkpoint saved: checkpoints\best\checkpoint_step317_loss4.8375_20250117_130621.pt

New best loss: 4.8375


Training Progress:   3%|█▊                                                      | 319/10000 [14:39<13:03:02,  4.85s/it]

Step 318/10000, Loss: 5.4616
Step 319/10000, Loss: 5.1491


Training Progress:   3%|█▊                                                       | 321/10000 [14:39<6:37:18,  2.46s/it]

Step 320/10000, Loss: 4.9224
Step 321/10000, Loss: 4.9707


Training Progress:   3%|█▊                                                       | 323/10000 [14:40<3:28:16,  1.29s/it]

Step 322/10000, Loss: 5.1456
Step 323/10000, Loss: 4.9948
Step 324/10000, Loss: 4.7236


Training Progress:   3%|█▊                                                      | 324/10000 [15:01<19:25:18,  7.23s/it]


Checkpoint saved: checkpoints\best\checkpoint_step324_loss4.7236_20250117_130644.pt

New best loss: 4.7236
Step 325/10000, Loss: 4.7127


Training Progress:   3%|█▊                                                      | 325/10000 [15:22<31:06:16, 11.57s/it]


Checkpoint saved: checkpoints\best\checkpoint_step325_loss4.7127_20250117_130705.pt

New best loss: 4.7127


Training Progress:   3%|█▊                                                      | 327/10000 [15:23<15:34:17,  5.80s/it]

Step 326/10000, Loss: 4.7189
Step 327/10000, Loss: 5.4671


Training Progress:   3%|█▉                                                       | 329/10000 [15:23<7:51:57,  2.93s/it]

Step 328/10000, Loss: 5.2922
Step 329/10000, Loss: 5.3637


Training Progress:   3%|█▉                                                       | 331/10000 [15:24<4:05:10,  1.52s/it]

Step 330/10000, Loss: 5.3088
Step 331/10000, Loss: 5.3846


Training Progress:   3%|█▉                                                       | 333/10000 [15:24<2:14:17,  1.20it/s]

Step 332/10000, Loss: 5.3423
Step 333/10000, Loss: 5.1182


Training Progress:   3%|█▉                                                       | 335/10000 [15:24<1:19:32,  2.03it/s]

Step 334/10000, Loss: 4.8687
Step 335/10000, Loss: 4.9401


Training Progress:   3%|█▉                                                         | 337/10000 [15:25<52:39,  3.06it/s]

Step 336/10000, Loss: 4.8807
Step 337/10000, Loss: 4.9863


Training Progress:   3%|██                                                         | 339/10000 [15:25<39:57,  4.03it/s]

Step 338/10000, Loss: 4.9601
Step 339/10000, Loss: 5.1566


Training Progress:   3%|██                                                         | 341/10000 [15:25<33:24,  4.82it/s]

Step 340/10000, Loss: 5.1811
Step 341/10000, Loss: 5.1815


Training Progress:   3%|██                                                         | 343/10000 [15:26<30:01,  5.36it/s]

Step 342/10000, Loss: 5.1764
Step 343/10000, Loss: 5.0879


Training Progress:   3%|██                                                         | 345/10000 [15:26<28:47,  5.59it/s]

Step 344/10000, Loss: 5.1248
Step 345/10000, Loss: 4.7977


Training Progress:   3%|██                                                         | 347/10000 [15:26<28:18,  5.68it/s]

Step 346/10000, Loss: 4.8491
Step 347/10000, Loss: 5.1532


Training Progress:   3%|██                                                         | 349/10000 [15:27<27:57,  5.75it/s]

Step 348/10000, Loss: 4.8889
Step 349/10000, Loss: 5.1829


Training Progress:   4%|██                                                         | 351/10000 [15:27<27:10,  5.92it/s]

Step 350/10000, Loss: 4.7744
Step 351/10000, Loss: 5.2306


Training Progress:   4%|██                                                         | 353/10000 [15:27<27:38,  5.82it/s]

Step 352/10000, Loss: 5.2696
Step 353/10000, Loss: 5.3498


Training Progress:   4%|██                                                         | 355/10000 [15:28<27:27,  5.86it/s]

Step 354/10000, Loss: 5.3151
Step 355/10000, Loss: 5.1920


Training Progress:   4%|██                                                         | 357/10000 [15:28<27:05,  5.93it/s]

Step 356/10000, Loss: 5.2467
Step 357/10000, Loss: 5.2508


Training Progress:   4%|██                                                         | 359/10000 [15:28<27:02,  5.94it/s]

Step 358/10000, Loss: 5.1621
Step 359/10000, Loss: 4.9969


Training Progress:   4%|██▏                                                        | 361/10000 [15:29<27:07,  5.92it/s]

Step 360/10000, Loss: 4.9292
Step 361/10000, Loss: 5.0726


Training Progress:   4%|██▏                                                        | 363/10000 [15:29<27:38,  5.81it/s]

Step 362/10000, Loss: 5.2021
Step 363/10000, Loss: 5.2683


Training Progress:   4%|██▏                                                        | 365/10000 [15:29<26:58,  5.95it/s]

Step 364/10000, Loss: 5.1902
Step 365/10000, Loss: 5.0266


Training Progress:   4%|██▏                                                        | 367/10000 [15:30<27:41,  5.80it/s]

Step 366/10000, Loss: 5.2338
Step 367/10000, Loss: 5.0756


Training Progress:   4%|██▏                                                        | 369/10000 [15:30<27:21,  5.87it/s]

Step 368/10000, Loss: 4.8624
Step 369/10000, Loss: 5.0265


Training Progress:   4%|██▏                                                        | 371/10000 [15:30<27:18,  5.88it/s]

Step 370/10000, Loss: 5.1958
Step 371/10000, Loss: 4.9353


Training Progress:   4%|██▏                                                        | 373/10000 [15:31<27:35,  5.81it/s]

Step 372/10000, Loss: 5.0210
Step 373/10000, Loss: 5.3311


Training Progress:   4%|██▏                                                        | 375/10000 [15:31<27:03,  5.93it/s]

Step 374/10000, Loss: 5.1809
Step 375/10000, Loss: 5.0405


Training Progress:   4%|██▏                                                        | 377/10000 [15:31<27:06,  5.92it/s]

Step 376/10000, Loss: 4.9023
Step 377/10000, Loss: 5.0047


Training Progress:   4%|██▏                                                        | 379/10000 [15:32<27:18,  5.87it/s]

Step 378/10000, Loss: 4.9461
Step 379/10000, Loss: 5.0562


Training Progress:   4%|██▏                                                        | 381/10000 [15:32<27:43,  5.78it/s]

Step 380/10000, Loss: 4.9626
Step 381/10000, Loss: 5.3586


Training Progress:   4%|██▎                                                        | 383/10000 [15:32<27:26,  5.84it/s]

Step 382/10000, Loss: 5.3266
Step 383/10000, Loss: 5.1767


Training Progress:   4%|██▎                                                        | 385/10000 [15:33<26:52,  5.96it/s]

Step 384/10000, Loss: 5.3434
Step 385/10000, Loss: 5.6629


Training Progress:   4%|██▎                                                        | 387/10000 [15:33<27:27,  5.84it/s]

Step 386/10000, Loss: 5.4673
Step 387/10000, Loss: 5.0910


Training Progress:   4%|██▎                                                        | 389/10000 [15:33<27:23,  5.85it/s]

Step 388/10000, Loss: 5.2116
Step 389/10000, Loss: 5.2451


Training Progress:   4%|██▎                                                        | 391/10000 [15:34<27:33,  5.81it/s]

Step 390/10000, Loss: 5.1337
Step 391/10000, Loss: 5.0538


Training Progress:   4%|██▎                                                        | 393/10000 [15:34<27:21,  5.85it/s]

Step 392/10000, Loss: 4.9026
Step 393/10000, Loss: 4.8350


Training Progress:   4%|██▎                                                        | 395/10000 [15:35<27:25,  5.84it/s]

Step 394/10000, Loss: 4.9778
Step 395/10000, Loss: 5.0556


Training Progress:   4%|██▎                                                        | 397/10000 [15:35<27:11,  5.88it/s]

Step 396/10000, Loss: 4.9867
Step 397/10000, Loss: 4.9240


Training Progress:   4%|██▎                                                        | 398/10000 [15:35<27:23,  5.84it/s]

Step 398/10000, Loss: 4.7477
Step 399/10000, Loss: 4.6722


Training Progress:   4%|██▏                                                     | 399/10000 [15:50<12:03:15,  4.52s/it]


Checkpoint saved: checkpoints\best\checkpoint_step399_loss4.6722_20250117_130739.pt

New best loss: 4.6722


Training Progress:   4%|██▎                                                      | 401/10000 [15:50<6:17:37,  2.36s/it]

Step 400/10000, Loss: 5.3098
Step 401/10000, Loss: 4.9679


Training Progress:   4%|██▎                                                      | 403/10000 [15:51<3:18:47,  1.24s/it]

Step 402/10000, Loss: 4.7349
Step 403/10000, Loss: 4.7644


Training Progress:   4%|██▎                                                      | 405/10000 [15:51<1:51:34,  1.43it/s]

Step 404/10000, Loss: 4.9514
Step 405/10000, Loss: 4.8305
Step 406/10000, Loss: 4.5401


Training Progress:   4%|██▎                                                     | 406/10000 [16:11<17:21:17,  6.51s/it]


Checkpoint saved: checkpoints\best\checkpoint_step406_loss4.5401_20250117_130755.pt

New best loss: 4.5401
Step 407/10000, Loss: 4.5140


Training Progress:   4%|██▎                                                     | 407/10000 [16:32<29:01:46, 10.89s/it]


Checkpoint saved: checkpoints\best\checkpoint_step407_loss4.5140_20250117_130815.pt

New best loss: 4.5140


Training Progress:   4%|██▎                                                     | 409/10000 [16:33<14:38:22,  5.49s/it]

Step 408/10000, Loss: 4.5317
Step 409/10000, Loss: 5.2741


Training Progress:   4%|██▎                                                      | 411/10000 [16:33<7:24:35,  2.78s/it]

Step 410/10000, Loss: 5.1156
Step 411/10000, Loss: 5.2182


Training Progress:   4%|██▎                                                      | 413/10000 [16:34<3:51:28,  1.45s/it]

Step 412/10000, Loss: 5.1042
Step 413/10000, Loss: 5.2080


Training Progress:   4%|██▎                                                      | 415/10000 [16:34<2:07:18,  1.25it/s]

Step 414/10000, Loss: 5.1516
Step 415/10000, Loss: 4.9318


Training Progress:   4%|██▍                                                      | 417/10000 [16:34<1:16:17,  2.09it/s]

Step 416/10000, Loss: 4.6931
Step 417/10000, Loss: 4.7670


Training Progress:   4%|██▍                                                        | 419/10000 [16:35<50:47,  3.14it/s]

Step 418/10000, Loss: 4.7157
Step 419/10000, Loss: 4.8315


Training Progress:   4%|██▍                                                        | 421/10000 [16:35<38:58,  4.10it/s]

Step 420/10000, Loss: 4.7916
Step 421/10000, Loss: 5.0231


Training Progress:   4%|██▍                                                        | 423/10000 [16:35<32:47,  4.87it/s]

Step 422/10000, Loss: 5.0392
Step 423/10000, Loss: 5.0057


Training Progress:   4%|██▌                                                        | 425/10000 [16:36<30:18,  5.27it/s]

Step 424/10000, Loss: 5.0137
Step 425/10000, Loss: 4.8986


Training Progress:   4%|██▌                                                        | 427/10000 [16:36<28:01,  5.69it/s]

Step 426/10000, Loss: 4.9428
Step 427/10000, Loss: 4.6179


Training Progress:   4%|██▌                                                        | 429/10000 [16:36<28:03,  5.69it/s]

Step 428/10000, Loss: 4.6693
Step 429/10000, Loss: 4.9946


Training Progress:   4%|██▌                                                        | 431/10000 [16:37<27:26,  5.81it/s]

Step 430/10000, Loss: 4.7047
Step 431/10000, Loss: 4.9909


Training Progress:   4%|██▌                                                        | 433/10000 [16:37<27:09,  5.87it/s]

Step 432/10000, Loss: 4.6306
Step 433/10000, Loss: 5.0744


Training Progress:   4%|██▌                                                        | 435/10000 [16:37<27:25,  5.81it/s]

Step 434/10000, Loss: 5.1351
Step 435/10000, Loss: 5.2165


Training Progress:   4%|██▌                                                        | 437/10000 [16:38<27:06,  5.88it/s]

Step 436/10000, Loss: 5.1688
Step 437/10000, Loss: 5.0635


Training Progress:   4%|██▌                                                        | 439/10000 [16:38<27:12,  5.86it/s]

Step 438/10000, Loss: 5.1187
Step 439/10000, Loss: 5.1103


Training Progress:   4%|██▌                                                        | 441/10000 [16:38<27:03,  5.89it/s]

Step 440/10000, Loss: 5.0274
Step 441/10000, Loss: 4.8785


Training Progress:   4%|██▌                                                        | 443/10000 [16:39<27:04,  5.88it/s]

Step 442/10000, Loss: 4.8072
Step 443/10000, Loss: 4.9268


Training Progress:   4%|██▋                                                        | 445/10000 [16:39<27:26,  5.80it/s]

Step 444/10000, Loss: 5.0939
Step 445/10000, Loss: 5.1544


Training Progress:   4%|██▋                                                        | 447/10000 [16:39<26:50,  5.93it/s]

Step 446/10000, Loss: 5.0703
Step 447/10000, Loss: 4.9246


Training Progress:   4%|██▋                                                        | 449/10000 [16:40<26:48,  5.94it/s]

Step 448/10000, Loss: 5.1335
Step 449/10000, Loss: 4.9798


Training Progress:   5%|██▋                                                        | 451/10000 [16:40<26:41,  5.96it/s]

Step 450/10000, Loss: 4.7369
Step 451/10000, Loss: 4.8912


Training Progress:   5%|██▋                                                        | 453/10000 [16:40<27:32,  5.78it/s]

Step 452/10000, Loss: 5.0878
Step 453/10000, Loss: 4.8272


Training Progress:   5%|██▋                                                        | 455/10000 [16:41<27:19,  5.82it/s]

Step 454/10000, Loss: 4.8721
Step 455/10000, Loss: 5.2131


Training Progress:   5%|██▋                                                        | 457/10000 [16:41<27:04,  5.88it/s]

Step 456/10000, Loss: 5.0413
Step 457/10000, Loss: 4.9045


Training Progress:   5%|██▋                                                        | 459/10000 [16:41<27:00,  5.89it/s]

Step 458/10000, Loss: 4.7761
Step 459/10000, Loss: 4.8834


Training Progress:   5%|██▋                                                        | 461/10000 [16:42<27:13,  5.84it/s]

Step 460/10000, Loss: 4.8059
Step 461/10000, Loss: 4.9409


Training Progress:   5%|██▋                                                        | 463/10000 [16:42<26:57,  5.90it/s]

Step 462/10000, Loss: 4.7952
Step 463/10000, Loss: 5.2157


Training Progress:   5%|██▋                                                        | 465/10000 [16:42<26:42,  5.95it/s]

Step 464/10000, Loss: 5.2135
Step 465/10000, Loss: 5.0601


Training Progress:   5%|██▊                                                        | 467/10000 [16:43<27:09,  5.85it/s]

Step 466/10000, Loss: 5.2306
Step 467/10000, Loss: 5.5466


Training Progress:   5%|██▊                                                        | 469/10000 [16:43<27:18,  5.82it/s]

Step 468/10000, Loss: 5.3020
Step 469/10000, Loss: 4.9860


Training Progress:   5%|██▊                                                        | 471/10000 [16:43<27:23,  5.80it/s]

Step 470/10000, Loss: 5.1076
Step 471/10000, Loss: 5.1504


Training Progress:   5%|██▊                                                        | 473/10000 [16:44<27:17,  5.82it/s]

Step 472/10000, Loss: 5.0146
Step 473/10000, Loss: 4.9414


Training Progress:   5%|██▊                                                        | 475/10000 [16:44<27:04,  5.87it/s]

Step 474/10000, Loss: 4.7864
Step 475/10000, Loss: 4.6830


Training Progress:   5%|██▊                                                        | 477/10000 [16:44<26:38,  5.96it/s]

Step 476/10000, Loss: 4.8718
Step 477/10000, Loss: 4.9404


Training Progress:   5%|██▊                                                        | 479/10000 [16:45<27:03,  5.86it/s]

Step 478/10000, Loss: 4.8508
Step 479/10000, Loss: 4.7854


Training Progress:   5%|██▊                                                        | 481/10000 [16:45<27:09,  5.84it/s]

Step 480/10000, Loss: 4.6545
Step 481/10000, Loss: 4.5546


Training Progress:   5%|██▊                                                        | 483/10000 [16:45<27:09,  5.84it/s]

Step 482/10000, Loss: 5.1874
Step 483/10000, Loss: 4.8672


Training Progress:   5%|██▊                                                        | 485/10000 [16:46<26:47,  5.92it/s]

Step 484/10000, Loss: 4.6606
Step 485/10000, Loss: 4.6617


Training Progress:   5%|██▊                                                        | 487/10000 [16:46<27:15,  5.82it/s]

Step 486/10000, Loss: 4.8083
Step 487/10000, Loss: 4.7249
Step 488/10000, Loss: 4.4388


Training Progress:   5%|██▋                                                     | 488/10000 [17:04<14:07:46,  5.35s/it]


Checkpoint saved: checkpoints\best\checkpoint_step488_loss4.4388_20250117_130850.pt

New best loss: 4.4388
Step 489/10000, Loss: 4.3875


Training Progress:   5%|██▋                                                     | 489/10000 [17:24<25:44:01,  9.74s/it]


Checkpoint saved: checkpoints\best\checkpoint_step489_loss4.3875_20250117_130908.pt

New best loss: 4.3875


Training Progress:   5%|██▋                                                     | 491/10000 [17:24<12:56:40,  4.90s/it]

Step 490/10000, Loss: 4.4072
Step 491/10000, Loss: 5.1482


Training Progress:   5%|██▊                                                      | 493/10000 [17:24<6:33:35,  2.48s/it]

Step 492/10000, Loss: 5.0049
Step 493/10000, Loss: 5.0877


Training Progress:   5%|██▊                                                      | 495/10000 [17:25<3:26:45,  1.31s/it]

Step 494/10000, Loss: 4.9954
Step 495/10000, Loss: 5.0243


Training Progress:   5%|██▊                                                      | 497/10000 [17:25<1:54:36,  1.38it/s]

Step 496/10000, Loss: 4.9846
Step 497/10000, Loss: 4.7942


Training Progress:   5%|██▊                                                      | 499/10000 [17:25<1:10:22,  2.25it/s]

Step 498/10000, Loss: 4.4792
Step 499/10000, Loss: 4.6021


Training Progress:   5%|██▉                                                        | 501/10000 [17:26<47:56,  3.30it/s]

Step 500/10000, Loss: 4.5737
Step 501/10000, Loss: 4.7034


Training Progress:   5%|██▉                                                        | 503/10000 [17:26<37:34,  4.21it/s]

Step 502/10000, Loss: 4.6356
Step 503/10000, Loss: 4.8793


Training Progress:   5%|██▉                                                        | 505/10000 [17:26<32:08,  4.92it/s]

Step 504/10000, Loss: 4.8758
Step 505/10000, Loss: 4.8120


Training Progress:   5%|██▉                                                        | 507/10000 [17:27<29:27,  5.37it/s]

Step 506/10000, Loss: 4.8626
Step 507/10000, Loss: 4.7134


Training Progress:   5%|███                                                        | 509/10000 [17:27<28:07,  5.62it/s]

Step 508/10000, Loss: 4.8031
Step 509/10000, Loss: 4.5019


Training Progress:   5%|███                                                        | 511/10000 [17:27<27:28,  5.75it/s]

Step 510/10000, Loss: 4.5534
Step 511/10000, Loss: 4.8849


Training Progress:   5%|███                                                        | 513/10000 [17:28<27:09,  5.82it/s]

Step 512/10000, Loss: 4.5434
Step 513/10000, Loss: 4.8625


Training Progress:   5%|███                                                        | 515/10000 [17:28<27:09,  5.82it/s]

Step 514/10000, Loss: 4.5015
Step 515/10000, Loss: 4.9250


Training Progress:   5%|███                                                        | 517/10000 [17:29<27:02,  5.84it/s]

Step 516/10000, Loss: 5.0220
Step 517/10000, Loss: 5.0804


Training Progress:   5%|███                                                        | 519/10000 [17:29<26:54,  5.87it/s]

Step 518/10000, Loss: 5.0553
Step 519/10000, Loss: 4.9279


Training Progress:   5%|███                                                        | 521/10000 [17:29<26:53,  5.87it/s]

Step 520/10000, Loss: 4.9839
Step 521/10000, Loss: 5.0056


Training Progress:   5%|███                                                        | 523/10000 [17:30<26:51,  5.88it/s]

Step 522/10000, Loss: 4.9621
Step 523/10000, Loss: 4.7553


Training Progress:   5%|███                                                        | 525/10000 [17:30<26:54,  5.87it/s]

Step 524/10000, Loss: 4.6715
Step 525/10000, Loss: 4.8642


Training Progress:   5%|███                                                        | 527/10000 [17:30<26:52,  5.88it/s]

Step 526/10000, Loss: 4.9884
Step 527/10000, Loss: 5.0635


Training Progress:   5%|███                                                        | 529/10000 [17:31<26:49,  5.88it/s]

Step 528/10000, Loss: 4.9814
Step 529/10000, Loss: 4.8145


Training Progress:   5%|███▏                                                       | 531/10000 [17:31<26:47,  5.89it/s]

Step 530/10000, Loss: 5.0067
Step 531/10000, Loss: 4.8477


Training Progress:   5%|███▏                                                       | 533/10000 [17:31<26:53,  5.87it/s]

Step 532/10000, Loss: 4.6140
Step 533/10000, Loss: 4.7664


Training Progress:   5%|███▏                                                       | 535/10000 [17:32<26:33,  5.94it/s]

Step 534/10000, Loss: 4.9737
Step 535/10000, Loss: 4.6852


Training Progress:   5%|███▏                                                       | 537/10000 [17:32<26:52,  5.87it/s]

Step 536/10000, Loss: 4.7335
Step 537/10000, Loss: 5.0938


Training Progress:   5%|███▏                                                       | 539/10000 [17:32<26:50,  5.88it/s]

Step 538/10000, Loss: 4.9275
Step 539/10000, Loss: 4.7882


Training Progress:   5%|███▏                                                       | 541/10000 [17:33<26:58,  5.85it/s]

Step 540/10000, Loss: 4.6301
Step 541/10000, Loss: 4.7489


Training Progress:   5%|███▏                                                       | 543/10000 [17:33<26:51,  5.87it/s]

Step 542/10000, Loss: 4.6684
Step 543/10000, Loss: 4.7986


Training Progress:   5%|███▏                                                       | 545/10000 [17:33<26:59,  5.84it/s]

Step 544/10000, Loss: 4.6626
Step 545/10000, Loss: 5.0671


Training Progress:   5%|███▏                                                       | 547/10000 [17:34<26:51,  5.86it/s]

Step 546/10000, Loss: 5.0511
Step 547/10000, Loss: 4.9259


Training Progress:   5%|███▏                                                       | 549/10000 [17:34<26:52,  5.86it/s]

Step 548/10000, Loss: 5.0977
Step 549/10000, Loss: 5.3975


Training Progress:   6%|███▎                                                       | 551/10000 [17:34<26:58,  5.84it/s]

Step 550/10000, Loss: 5.1594
Step 551/10000, Loss: 4.8556


Training Progress:   6%|███▎                                                       | 553/10000 [17:35<26:52,  5.86it/s]

Step 552/10000, Loss: 4.9455
Step 553/10000, Loss: 4.9830


Training Progress:   6%|███▎                                                       | 555/10000 [17:35<26:49,  5.87it/s]

Step 554/10000, Loss: 4.8966
Step 555/10000, Loss: 4.8512


Training Progress:   6%|███▎                                                       | 557/10000 [17:35<26:46,  5.88it/s]

Step 556/10000, Loss: 4.6883
Step 557/10000, Loss: 4.5889


Training Progress:   6%|███▎                                                       | 559/10000 [17:36<26:43,  5.89it/s]

Step 558/10000, Loss: 4.7387
Step 559/10000, Loss: 4.8631


Training Progress:   6%|███▎                                                       | 561/10000 [17:36<26:44,  5.88it/s]

Step 560/10000, Loss: 4.7555
Step 561/10000, Loss: 4.6715


Training Progress:   6%|███▎                                                       | 563/10000 [17:36<26:47,  5.87it/s]

Step 562/10000, Loss: 4.5331
Step 563/10000, Loss: 4.4733


Training Progress:   6%|███▎                                                       | 565/10000 [17:37<26:43,  5.88it/s]

Step 564/10000, Loss: 5.0809
Step 565/10000, Loss: 4.7269


Training Progress:   6%|███▎                                                       | 567/10000 [17:37<26:46,  5.87it/s]

Step 566/10000, Loss: 4.4984
Step 567/10000, Loss: 4.5503


Training Progress:   6%|███▎                                                       | 569/10000 [17:37<26:45,  5.87it/s]

Step 568/10000, Loss: 4.7327
Step 569/10000, Loss: 4.6017
Step 570/10000, Loss: 4.3182


Training Progress:   6%|███▏                                                    | 570/10000 [17:53<12:56:00,  4.94s/it]


Checkpoint saved: checkpoints\best\checkpoint_step570_loss4.3182_20250117_130941.pt

New best loss: 4.3182
Step 571/10000, Loss: 4.2973


Training Progress:   6%|███▏                                                    | 571/10000 [18:14<25:26:09,  9.71s/it]


Checkpoint saved: checkpoints\best\checkpoint_step571_loss4.2973_20250117_130958.pt

New best loss: 4.2973


Training Progress:   6%|███▏                                                    | 573/10000 [18:15<12:50:19,  4.90s/it]

Step 572/10000, Loss: 4.3075
Step 573/10000, Loss: 5.0244


Training Progress:   6%|███▎                                                     | 575/10000 [18:15<6:31:09,  2.49s/it]

Step 574/10000, Loss: 4.8776
Step 575/10000, Loss: 4.9236


Training Progress:   6%|███▎                                                     | 577/10000 [18:16<3:25:04,  1.31s/it]

Step 576/10000, Loss: 4.8178
Step 577/10000, Loss: 4.8693


Training Progress:   6%|███▎                                                     | 579/10000 [18:16<1:54:08,  1.38it/s]

Step 578/10000, Loss: 4.8323
Step 579/10000, Loss: 4.6291


Training Progress:   6%|███▎                                                     | 581/10000 [18:16<1:09:25,  2.26it/s]

Step 580/10000, Loss: 4.3185
Step 581/10000, Loss: 4.4453


Training Progress:   6%|███▍                                                       | 583/10000 [18:17<47:53,  3.28it/s]

Step 582/10000, Loss: 4.4064
Step 583/10000, Loss: 4.5352


Training Progress:   6%|███▍                                                       | 585/10000 [18:17<37:16,  4.21it/s]

Step 584/10000, Loss: 4.5278
Step 585/10000, Loss: 4.7568


Training Progress:   6%|███▍                                                       | 587/10000 [18:17<32:02,  4.90it/s]

Step 586/10000, Loss: 4.7237
Step 587/10000, Loss: 4.6430


Training Progress:   6%|███▍                                                       | 589/10000 [18:18<29:53,  5.25it/s]

Step 588/10000, Loss: 4.6894
Step 589/10000, Loss: 4.6074


Training Progress:   6%|███▍                                                       | 591/10000 [18:18<28:07,  5.58it/s]

Step 590/10000, Loss: 4.6844
Step 591/10000, Loss: 4.3729


Training Progress:   6%|███▍                                                       | 593/10000 [18:18<27:33,  5.69it/s]

Step 592/10000, Loss: 4.4266
Step 593/10000, Loss: 4.7326


Training Progress:   6%|███▌                                                       | 595/10000 [18:19<27:17,  5.74it/s]

Step 594/10000, Loss: 4.3846
Step 595/10000, Loss: 4.6803


Training Progress:   6%|███▌                                                       | 597/10000 [18:19<26:24,  5.93it/s]

Step 596/10000, Loss: 4.3171
Step 597/10000, Loss: 4.8049


Training Progress:   6%|███▌                                                       | 599/10000 [18:19<26:35,  5.89it/s]

Step 598/10000, Loss: 4.8695
Step 599/10000, Loss: 4.9320


Training Progress:   6%|███▌                                                       | 601/10000 [18:20<27:05,  5.78it/s]

Step 600/10000, Loss: 4.9058
Step 601/10000, Loss: 4.7915


Training Progress:   6%|███▌                                                       | 603/10000 [18:20<26:49,  5.84it/s]

Step 602/10000, Loss: 4.8765
Step 603/10000, Loss: 4.8796


Training Progress:   6%|███▌                                                       | 605/10000 [18:20<26:55,  5.82it/s]

Step 604/10000, Loss: 4.7926
Step 605/10000, Loss: 4.6438


Training Progress:   6%|███▌                                                       | 607/10000 [18:21<26:30,  5.91it/s]

Step 606/10000, Loss: 4.5223
Step 607/10000, Loss: 4.6991


Training Progress:   6%|███▌                                                       | 609/10000 [18:21<26:40,  5.87it/s]

Step 608/10000, Loss: 4.9026
Step 609/10000, Loss: 4.9568


Training Progress:   6%|███▌                                                       | 611/10000 [18:21<26:39,  5.87it/s]

Step 610/10000, Loss: 4.8335
Step 611/10000, Loss: 4.6850


Training Progress:   6%|███▌                                                       | 613/10000 [18:22<26:49,  5.83it/s]

Step 612/10000, Loss: 4.8935
Step 613/10000, Loss: 4.7347


Training Progress:   6%|███▋                                                       | 615/10000 [18:22<26:27,  5.91it/s]

Step 614/10000, Loss: 4.4630
Step 615/10000, Loss: 4.6384


Training Progress:   6%|███▋                                                       | 617/10000 [18:22<26:26,  5.91it/s]

Step 616/10000, Loss: 4.8468
Step 617/10000, Loss: 4.5491


Training Progress:   6%|███▋                                                       | 619/10000 [18:23<26:44,  5.85it/s]

Step 618/10000, Loss: 4.5886
Step 619/10000, Loss: 4.9376


Training Progress:   6%|███▋                                                       | 621/10000 [18:23<26:10,  5.97it/s]

Step 620/10000, Loss: 4.7851
Step 621/10000, Loss: 4.6818


Training Progress:   6%|███▋                                                       | 623/10000 [18:23<26:48,  5.83it/s]

Step 622/10000, Loss: 4.5105
Step 623/10000, Loss: 4.6237


Training Progress:   6%|███▋                                                       | 625/10000 [18:24<26:22,  5.92it/s]

Step 624/10000, Loss: 4.5529
Step 625/10000, Loss: 4.6742


Training Progress:   6%|███▋                                                       | 627/10000 [18:24<26:49,  5.82it/s]

Step 626/10000, Loss: 4.5079
Step 627/10000, Loss: 4.9123


Training Progress:   6%|███▋                                                       | 629/10000 [18:24<26:46,  5.83it/s]

Step 628/10000, Loss: 4.9179
Step 629/10000, Loss: 4.7614


Training Progress:   6%|███▋                                                       | 631/10000 [18:25<26:23,  5.92it/s]

Step 630/10000, Loss: 4.9579
Step 631/10000, Loss: 5.2690


Training Progress:   6%|███▋                                                       | 633/10000 [18:25<26:56,  5.80it/s]

Step 632/10000, Loss: 5.0406
Step 633/10000, Loss: 4.7363


Training Progress:   6%|███▋                                                       | 635/10000 [18:25<26:35,  5.87it/s]

Step 634/10000, Loss: 4.8025
Step 635/10000, Loss: 4.8793


Training Progress:   6%|███▊                                                       | 637/10000 [18:26<26:47,  5.82it/s]

Step 636/10000, Loss: 4.7711
Step 637/10000, Loss: 4.7191


Training Progress:   6%|███▊                                                       | 639/10000 [18:26<26:30,  5.89it/s]

Step 638/10000, Loss: 4.5119
Step 639/10000, Loss: 4.4856


Training Progress:   6%|███▊                                                       | 641/10000 [18:26<26:31,  5.88it/s]

Step 640/10000, Loss: 4.6231
Step 641/10000, Loss: 4.6836


Training Progress:   6%|███▊                                                       | 643/10000 [18:27<26:57,  5.78it/s]

Step 642/10000, Loss: 4.6415
Step 643/10000, Loss: 4.6005


Training Progress:   6%|███▊                                                       | 645/10000 [18:27<26:08,  5.96it/s]

Step 644/10000, Loss: 4.4218
Step 645/10000, Loss: 4.3286


Training Progress:   6%|███▊                                                       | 647/10000 [18:27<26:06,  5.97it/s]

Step 646/10000, Loss: 4.9599
Step 647/10000, Loss: 4.6950


Training Progress:   6%|███▊                                                       | 649/10000 [18:28<26:37,  5.85it/s]

Step 648/10000, Loss: 4.4534
Step 649/10000, Loss: 4.4041


Training Progress:   7%|███▊                                                       | 651/10000 [18:28<26:46,  5.82it/s]

Step 650/10000, Loss: 4.5930
Step 651/10000, Loss: 4.5255
Step 652/10000, Loss: 4.2299


Training Progress:   7%|███▋                                                    | 652/10000 [18:43<11:34:33,  4.46s/it]


Checkpoint saved: checkpoints\best\checkpoint_step652_loss4.2299_20250117_131032.pt

New best loss: 4.2299
Step 653/10000, Loss: 4.2043


Training Progress:   7%|███▋                                                    | 653/10000 [19:04<24:35:54,  9.47s/it]


Checkpoint saved: checkpoints\best\checkpoint_step653_loss4.2043_20250117_131047.pt

New best loss: 4.2043
Step 654/10000, Loss: 4.1874


Training Progress:   7%|███▋                                                    | 654/10000 [19:24<33:18:25, 12.83s/it]


Checkpoint saved: checkpoints\best\checkpoint_step654_loss4.1874_20250117_131108.pt

New best loss: 4.1874


Training Progress:   7%|███▋                                                    | 656/10000 [19:25<16:40:52,  6.43s/it]

Step 655/10000, Loss: 4.8945
Step 656/10000, Loss: 4.7761


Training Progress:   7%|███▊                                                     | 658/10000 [19:25<8:23:17,  3.23s/it]

Step 657/10000, Loss: 4.8699
Step 658/10000, Loss: 4.7659


Training Progress:   7%|███▊                                                     | 660/10000 [19:26<4:20:05,  1.67s/it]

Step 659/10000, Loss: 4.7840
Step 660/10000, Loss: 4.7616


Training Progress:   7%|███▊                                                     | 662/10000 [19:26<2:21:15,  1.10it/s]

Step 661/10000, Loss: 4.5668
Step 662/10000, Loss: 4.2422


Training Progress:   7%|███▊                                                     | 664/10000 [19:26<1:22:42,  1.88it/s]

Step 663/10000, Loss: 4.3611
Step 664/10000, Loss: 4.3372


Training Progress:   7%|███▉                                                       | 666/10000 [19:27<54:10,  2.87it/s]

Step 665/10000, Loss: 4.4455
Step 666/10000, Loss: 4.4349


Training Progress:   7%|███▉                                                       | 668/10000 [19:27<39:51,  3.90it/s]

Step 667/10000, Loss: 4.6702
Step 668/10000, Loss: 4.6826


Training Progress:   7%|███▉                                                       | 670/10000 [19:27<33:37,  4.62it/s]

Step 669/10000, Loss: 4.5993
Step 670/10000, Loss: 4.6368


Training Progress:   7%|███▉                                                       | 672/10000 [19:28<29:28,  5.27it/s]

Step 671/10000, Loss: 4.5210
Step 672/10000, Loss: 4.6489


Training Progress:   7%|███▉                                                       | 674/10000 [19:28<28:17,  5.49it/s]

Step 673/10000, Loss: 4.3148
Step 674/10000, Loss: 4.3415


Training Progress:   7%|███▉                                                       | 676/10000 [19:28<27:02,  5.75it/s]

Step 675/10000, Loss: 4.6791
Step 676/10000, Loss: 4.3516


Training Progress:   7%|████                                                       | 678/10000 [19:29<27:03,  5.74it/s]

Step 677/10000, Loss: 4.6312
Step 678/10000, Loss: 4.2292


Training Progress:   7%|████                                                       | 680/10000 [19:29<26:59,  5.75it/s]

Step 679/10000, Loss: 4.7228
Step 680/10000, Loss: 4.7798


Training Progress:   7%|████                                                       | 682/10000 [19:30<26:40,  5.82it/s]

Step 681/10000, Loss: 4.8462
Step 682/10000, Loss: 4.8018


Training Progress:   7%|████                                                       | 684/10000 [19:30<26:51,  5.78it/s]

Step 683/10000, Loss: 4.6616
Step 684/10000, Loss: 4.7626


Training Progress:   7%|████                                                       | 686/10000 [19:30<26:24,  5.88it/s]

Step 685/10000, Loss: 4.7725
Step 686/10000, Loss: 4.6571


Training Progress:   7%|████                                                       | 688/10000 [19:31<26:51,  5.78it/s]

Step 687/10000, Loss: 4.4981
Step 688/10000, Loss: 4.4209


Training Progress:   7%|████                                                       | 690/10000 [19:31<26:25,  5.87it/s]

Step 689/10000, Loss: 4.6069
Step 690/10000, Loss: 4.7695


Training Progress:   7%|████                                                       | 692/10000 [19:31<26:47,  5.79it/s]

Step 691/10000, Loss: 4.8314
Step 692/10000, Loss: 4.7567


Training Progress:   7%|████                                                       | 694/10000 [19:32<26:11,  5.92it/s]

Step 693/10000, Loss: 4.6291
Step 694/10000, Loss: 4.7553


Training Progress:   7%|████                                                       | 696/10000 [19:32<26:37,  5.82it/s]

Step 695/10000, Loss: 4.6263
Step 696/10000, Loss: 4.3844


Training Progress:   7%|████                                                       | 698/10000 [19:32<26:50,  5.78it/s]

Step 697/10000, Loss: 4.5496
Step 698/10000, Loss: 4.7285


Training Progress:   7%|████▏                                                      | 700/10000 [19:33<26:32,  5.84it/s]

Step 699/10000, Loss: 4.4464
Step 700/10000, Loss: 4.5231


Training Progress:   7%|████▏                                                      | 702/10000 [19:33<26:45,  5.79it/s]

Step 701/10000, Loss: 4.8649
Step 702/10000, Loss: 4.6784


Training Progress:   7%|████▏                                                      | 704/10000 [19:33<26:27,  5.85it/s]

Step 703/10000, Loss: 4.5903
Step 704/10000, Loss: 4.4579


Training Progress:   7%|████▏                                                      | 706/10000 [19:34<26:43,  5.79it/s]

Step 705/10000, Loss: 4.5281
Step 706/10000, Loss: 4.4364


Training Progress:   7%|████▏                                                      | 708/10000 [19:34<26:23,  5.87it/s]

Step 707/10000, Loss: 4.5849
Step 708/10000, Loss: 4.4411


Training Progress:   7%|████▏                                                      | 710/10000 [19:34<26:34,  5.83it/s]

Step 709/10000, Loss: 4.8377
Step 710/10000, Loss: 4.8364


Training Progress:   7%|████▏                                                      | 712/10000 [19:35<26:17,  5.89it/s]

Step 711/10000, Loss: 4.6549
Step 712/10000, Loss: 4.8672


Training Progress:   7%|████▏                                                      | 714/10000 [19:35<26:36,  5.82it/s]

Step 713/10000, Loss: 5.1963
Step 714/10000, Loss: 4.9350


Training Progress:   7%|████▏                                                      | 716/10000 [19:35<26:41,  5.80it/s]

Step 715/10000, Loss: 4.6170
Step 716/10000, Loss: 4.6678


Training Progress:   7%|████▏                                                      | 718/10000 [19:36<26:41,  5.80it/s]

Step 717/10000, Loss: 4.7858
Step 718/10000, Loss: 4.6713


Training Progress:   7%|████▏                                                      | 720/10000 [19:36<26:45,  5.78it/s]

Step 719/10000, Loss: 4.5382
Step 720/10000, Loss: 4.3481


Training Progress:   7%|████▎                                                      | 722/10000 [19:36<26:47,  5.77it/s]

Step 721/10000, Loss: 4.3509
Step 722/10000, Loss: 4.5095


Training Progress:   7%|████▎                                                      | 724/10000 [19:37<26:34,  5.82it/s]

Step 723/10000, Loss: 4.5041
Step 724/10000, Loss: 4.4461


Training Progress:   7%|████▎                                                      | 726/10000 [19:37<26:05,  5.92it/s]

Step 725/10000, Loss: 4.4236
Step 726/10000, Loss: 4.2592


Training Progress:   7%|████▎                                                      | 728/10000 [19:37<26:00,  5.94it/s]

Step 727/10000, Loss: 4.1941
Step 728/10000, Loss: 4.8455


Training Progress:   7%|████▎                                                      | 730/10000 [19:38<26:17,  5.88it/s]

Step 729/10000, Loss: 4.5530
Step 730/10000, Loss: 4.3072


Training Progress:   7%|████▎                                                      | 732/10000 [19:38<26:38,  5.80it/s]

Step 731/10000, Loss: 4.2779
Step 732/10000, Loss: 4.4476


Training Progress:   7%|████▎                                                      | 733/10000 [19:38<26:21,  5.86it/s]

Step 733/10000, Loss: 4.3628
Step 734/10000, Loss: 4.0701


Training Progress:   7%|████                                                    | 734/10000 [19:54<12:06:12,  4.70s/it]


Checkpoint saved: checkpoints\best\checkpoint_step734_loss4.0701_20250117_131142.pt

New best loss: 4.0701
Step 735/10000, Loss: 4.0364


Training Progress:   7%|████                                                    | 735/10000 [20:15<24:43:55,  9.61s/it]


Checkpoint saved: checkpoints\best\checkpoint_step735_loss4.0364_20250117_131158.pt

New best loss: 4.0364


Training Progress:   7%|████▏                                                   | 737/10000 [20:15<12:30:03,  4.86s/it]

Step 736/10000, Loss: 4.0863
Step 737/10000, Loss: 4.8042


Training Progress:   7%|████▏                                                    | 739/10000 [20:16<6:20:55,  2.47s/it]

Step 738/10000, Loss: 4.6744
Step 739/10000, Loss: 4.7448


Training Progress:   7%|████▏                                                    | 741/10000 [20:16<3:19:57,  1.30s/it]

Step 740/10000, Loss: 4.6271
Step 741/10000, Loss: 4.6937


Training Progress:   7%|████▏                                                    | 743/10000 [20:16<1:51:22,  1.39it/s]

Step 742/10000, Loss: 4.6706
Step 743/10000, Loss: 4.4447


Training Progress:   7%|████▏                                                    | 745/10000 [20:17<1:07:37,  2.28it/s]

Step 744/10000, Loss: 4.1086
Step 745/10000, Loss: 4.2453


Training Progress:   7%|████▍                                                      | 747/10000 [20:17<46:42,  3.30it/s]

Step 746/10000, Loss: 4.2181
Step 747/10000, Loss: 4.3544


Training Progress:   7%|████▍                                                      | 749/10000 [20:17<36:09,  4.26it/s]

Step 748/10000, Loss: 4.3798
Step 749/10000, Loss: 4.5856


Training Progress:   8%|████▍                                                      | 751/10000 [20:18<31:10,  4.94it/s]

Step 750/10000, Loss: 4.5685
Step 751/10000, Loss: 4.4892


Training Progress:   8%|████▍                                                      | 753/10000 [20:18<28:42,  5.37it/s]

Step 752/10000, Loss: 4.5446
Step 753/10000, Loss: 4.4323


Training Progress:   8%|████▍                                                      | 755/10000 [20:18<27:27,  5.61it/s]

Step 754/10000, Loss: 4.5166
Step 755/10000, Loss: 4.1821


Training Progress:   8%|████▍                                                      | 757/10000 [20:19<26:50,  5.74it/s]

Step 756/10000, Loss: 4.2784
Step 757/10000, Loss: 4.6058


Training Progress:   8%|████▍                                                      | 759/10000 [20:19<26:27,  5.82it/s]

Step 758/10000, Loss: 4.2066
Step 759/10000, Loss: 4.4657


Training Progress:   8%|████▍                                                      | 761/10000 [20:19<26:19,  5.85it/s]

Step 760/10000, Loss: 4.0627
Step 761/10000, Loss: 4.6319


Training Progress:   8%|████▌                                                      | 763/10000 [20:20<26:15,  5.86it/s]

Step 762/10000, Loss: 4.6849
Step 763/10000, Loss: 4.7080


Training Progress:   8%|████▌                                                      | 765/10000 [20:20<25:58,  5.92it/s]

Step 764/10000, Loss: 4.6664
Step 765/10000, Loss: 4.5366


Training Progress:   8%|████▌                                                      | 767/10000 [20:20<26:22,  5.83it/s]

Step 766/10000, Loss: 4.6584
Step 767/10000, Loss: 4.6261


Training Progress:   8%|████▌                                                      | 769/10000 [20:21<26:20,  5.84it/s]

Step 768/10000, Loss: 4.5317
Step 769/10000, Loss: 4.3997


Training Progress:   8%|████▌                                                      | 771/10000 [20:21<25:35,  6.01it/s]

Step 770/10000, Loss: 4.3108
Step 771/10000, Loss: 4.4809


Training Progress:   8%|████▌                                                      | 773/10000 [20:21<25:54,  5.94it/s]

Step 772/10000, Loss: 4.6509
Step 773/10000, Loss: 4.6690


Training Progress:   8%|████▌                                                      | 775/10000 [20:22<26:14,  5.86it/s]

Step 774/10000, Loss: 4.6191
Step 775/10000, Loss: 4.4626


Training Progress:   8%|████▌                                                      | 777/10000 [20:22<26:24,  5.82it/s]

Step 776/10000, Loss: 4.6352
Step 777/10000, Loss: 4.5017


Training Progress:   8%|████▌                                                      | 779/10000 [20:22<26:31,  5.80it/s]

Step 778/10000, Loss: 4.2611
Step 779/10000, Loss: 4.3995


Training Progress:   8%|████▌                                                      | 781/10000 [20:23<26:22,  5.83it/s]

Step 780/10000, Loss: 4.6082
Step 781/10000, Loss: 4.3453


Training Progress:   8%|████▌                                                      | 783/10000 [20:23<26:05,  5.89it/s]

Step 782/10000, Loss: 4.4028
Step 783/10000, Loss: 4.7398


Training Progress:   8%|████▋                                                      | 785/10000 [20:23<26:08,  5.87it/s]

Step 784/10000, Loss: 4.5690
Step 785/10000, Loss: 4.4643


Training Progress:   8%|████▋                                                      | 787/10000 [20:24<25:58,  5.91it/s]

Step 786/10000, Loss: 4.2719
Step 787/10000, Loss: 4.3957


Training Progress:   8%|████▋                                                      | 789/10000 [20:24<26:04,  5.89it/s]

Step 788/10000, Loss: 4.3309
Step 789/10000, Loss: 4.4664


Training Progress:   8%|████▋                                                      | 791/10000 [20:24<26:08,  5.87it/s]

Step 790/10000, Loss: 4.2982
Step 791/10000, Loss: 4.6837


Training Progress:   8%|████▋                                                      | 793/10000 [20:25<26:19,  5.83it/s]

Step 792/10000, Loss: 4.6903
Step 793/10000, Loss: 4.5789


Training Progress:   8%|████▋                                                      | 795/10000 [20:25<25:52,  5.93it/s]

Step 794/10000, Loss: 4.7903
Step 795/10000, Loss: 5.0753


Training Progress:   8%|████▋                                                      | 797/10000 [20:25<26:00,  5.90it/s]

Step 796/10000, Loss: 4.8453
Step 797/10000, Loss: 4.5973


Training Progress:   8%|████▋                                                      | 799/10000 [20:26<26:10,  5.86it/s]

Step 798/10000, Loss: 4.6283
Step 799/10000, Loss: 4.7349


Training Progress:   8%|████▋                                                      | 801/10000 [20:26<26:23,  5.81it/s]

Step 800/10000, Loss: 4.5753
Step 801/10000, Loss: 4.4280


Training Progress:   8%|████▋                                                      | 803/10000 [20:26<26:15,  5.84it/s]

Step 802/10000, Loss: 4.3283
Step 803/10000, Loss: 4.3207


Training Progress:   8%|████▋                                                      | 805/10000 [20:27<25:38,  5.98it/s]

Step 804/10000, Loss: 4.4606
Step 805/10000, Loss: 4.3795


Training Progress:   8%|████▊                                                      | 807/10000 [20:27<26:20,  5.82it/s]

Step 806/10000, Loss: 4.3568
Step 807/10000, Loss: 4.3385


Training Progress:   8%|████▊                                                      | 809/10000 [20:27<26:07,  5.86it/s]

Step 808/10000, Loss: 4.1907
Step 809/10000, Loss: 4.1574


Training Progress:   8%|████▊                                                      | 811/10000 [20:28<26:20,  5.81it/s]

Step 810/10000, Loss: 4.8153
Step 811/10000, Loss: 4.4968


Training Progress:   8%|████▊                                                      | 813/10000 [20:28<26:03,  5.88it/s]

Step 812/10000, Loss: 4.2279
Step 813/10000, Loss: 4.2223


Training Progress:   8%|████▊                                                      | 815/10000 [20:28<25:42,  5.96it/s]

Step 814/10000, Loss: 4.4226
Step 815/10000, Loss: 4.2920
Step 816/10000, Loss: 4.0103


Training Progress:   8%|████▌                                                   | 816/10000 [20:44<12:22:46,  4.85s/it]


Checkpoint saved: checkpoints\best\checkpoint_step816_loss4.0103_20250117_131233.pt

New best loss: 4.0103
Step 817/10000, Loss: 3.9944


Training Progress:   8%|████▌                                                   | 817/10000 [21:04<24:03:01,  9.43s/it]


Checkpoint saved: checkpoints\best\checkpoint_step817_loss3.9944_20250117_131249.pt

New best loss: 3.9944


Training Progress:   8%|████▌                                                   | 819/10000 [21:05<12:10:13,  4.77s/it]

Step 818/10000, Loss: 4.0209
Step 819/10000, Loss: 4.6873


Training Progress:   8%|████▋                                                    | 821/10000 [21:05<6:11:22,  2.43s/it]

Step 820/10000, Loss: 4.5542
Step 821/10000, Loss: 4.6470


Training Progress:   8%|████▋                                                    | 823/10000 [21:06<3:15:16,  1.28s/it]

Step 822/10000, Loss: 4.5032
Step 823/10000, Loss: 4.5417


Training Progress:   8%|████▋                                                    | 825/10000 [21:06<1:48:57,  1.40it/s]

Step 824/10000, Loss: 4.4994
Step 825/10000, Loss: 4.3302


Training Progress:   8%|████▋                                                    | 827/10000 [21:06<1:06:23,  2.30it/s]

Step 826/10000, Loss: 4.0667
Step 827/10000, Loss: 4.2353


Training Progress:   8%|████▉                                                      | 829/10000 [21:07<45:53,  3.33it/s]

Step 828/10000, Loss: 4.1720
Step 829/10000, Loss: 4.2352


Training Progress:   8%|████▉                                                      | 831/10000 [21:07<35:25,  4.31it/s]

Step 830/10000, Loss: 4.2647
Step 831/10000, Loss: 4.5447


Training Progress:   8%|████▉                                                      | 833/10000 [21:07<31:06,  4.91it/s]

Step 832/10000, Loss: 4.5806
Step 833/10000, Loss: 4.4447


Training Progress:   8%|████▉                                                      | 835/10000 [21:08<28:12,  5.41it/s]

Step 834/10000, Loss: 4.4306
Step 835/10000, Loss: 4.3300


Training Progress:   8%|████▉                                                      | 837/10000 [21:08<27:21,  5.58it/s]

Step 836/10000, Loss: 4.4756
Step 837/10000, Loss: 4.1456


Training Progress:   8%|████▉                                                      | 839/10000 [21:08<26:34,  5.74it/s]

Step 838/10000, Loss: 4.2296
Step 839/10000, Loss: 4.5399


Training Progress:   8%|████▉                                                      | 841/10000 [21:09<26:28,  5.76it/s]

Step 840/10000, Loss: 4.1542
Step 841/10000, Loss: 4.3899


Training Progress:   8%|████▉                                                      | 843/10000 [21:09<26:01,  5.86it/s]

Step 842/10000, Loss: 4.0079
Step 843/10000, Loss: 4.5434


Training Progress:   8%|████▉                                                      | 845/10000 [21:09<26:03,  5.86it/s]

Step 844/10000, Loss: 4.6100
Step 845/10000, Loss: 4.6355


Training Progress:   8%|████▉                                                      | 847/10000 [21:10<26:13,  5.82it/s]

Step 846/10000, Loss: 4.5741
Step 847/10000, Loss: 4.4201


Training Progress:   8%|█████                                                      | 849/10000 [21:10<25:53,  5.89it/s]

Step 848/10000, Loss: 4.5010
Step 849/10000, Loss: 4.5351


Training Progress:   9%|█████                                                      | 851/10000 [21:10<25:38,  5.95it/s]

Step 850/10000, Loss: 4.4594
Step 851/10000, Loss: 4.2851


Training Progress:   9%|█████                                                      | 853/10000 [21:11<26:03,  5.85it/s]

Step 852/10000, Loss: 4.1725
Step 853/10000, Loss: 4.3523


Training Progress:   9%|█████                                                      | 855/10000 [21:11<26:17,  5.80it/s]

Step 854/10000, Loss: 4.5508
Step 855/10000, Loss: 4.5653


Training Progress:   9%|█████                                                      | 857/10000 [21:11<26:03,  5.85it/s]

Step 856/10000, Loss: 4.5017
Step 857/10000, Loss: 4.3482


Training Progress:   9%|█████                                                      | 859/10000 [21:12<26:12,  5.81it/s]

Step 858/10000, Loss: 4.5125
Step 859/10000, Loss: 4.4001


Training Progress:   9%|█████                                                      | 861/10000 [21:12<25:58,  5.86it/s]

Step 860/10000, Loss: 4.1685
Step 861/10000, Loss: 4.3111


Training Progress:   9%|█████                                                      | 863/10000 [21:13<25:34,  5.96it/s]

Step 862/10000, Loss: 4.4851
Step 863/10000, Loss: 4.1847


Training Progress:   9%|█████                                                      | 865/10000 [21:13<26:02,  5.85it/s]

Step 864/10000, Loss: 4.2635
Step 865/10000, Loss: 4.6173


Training Progress:   9%|█████                                                      | 867/10000 [21:13<25:52,  5.88it/s]

Step 866/10000, Loss: 4.4375
Step 867/10000, Loss: 4.3498


Training Progress:   9%|█████▏                                                     | 869/10000 [21:14<26:13,  5.80it/s]

Step 868/10000, Loss: 4.1518
Step 869/10000, Loss: 4.3018


Training Progress:   9%|█████▏                                                     | 871/10000 [21:14<25:49,  5.89it/s]

Step 870/10000, Loss: 4.1826
Step 871/10000, Loss: 4.3311


Training Progress:   9%|█████▏                                                     | 873/10000 [21:14<26:10,  5.81it/s]

Step 872/10000, Loss: 4.1699
Step 873/10000, Loss: 4.5953


Training Progress:   9%|█████▏                                                     | 874/10000 [21:14<25:41,  5.92it/s]

Step 874/10000, Loss: 4.5571


Training Progress:   9%|█████▏                                                     | 876/10000 [21:15<26:04,  5.83it/s]

Step 875/10000, Loss: 4.4187
Step 876/10000, Loss: 4.6276


Training Progress:   9%|█████▏                                                     | 878/10000 [21:15<26:07,  5.82it/s]

Step 877/10000, Loss: 4.9579
Step 878/10000, Loss: 4.7080


Training Progress:   9%|█████▏                                                     | 880/10000 [21:15<26:01,  5.84it/s]

Step 879/10000, Loss: 4.4502
Step 880/10000, Loss: 4.4427


Training Progress:   9%|█████▏                                                     | 882/10000 [21:16<26:05,  5.83it/s]

Step 881/10000, Loss: 4.5976
Step 882/10000, Loss: 4.4953


Training Progress:   9%|█████▏                                                     | 884/10000 [21:16<25:59,  5.85it/s]

Step 883/10000, Loss: 4.3089
Step 884/10000, Loss: 4.1662


Training Progress:   9%|█████▏                                                     | 886/10000 [21:16<25:43,  5.90it/s]

Step 885/10000, Loss: 4.1239
Step 886/10000, Loss: 4.2725


Training Progress:   9%|█████▏                                                     | 888/10000 [21:17<26:00,  5.84it/s]

Step 887/10000, Loss: 4.3067
Step 888/10000, Loss: 4.3214


Training Progress:   9%|█████▎                                                     | 890/10000 [21:17<25:32,  5.95it/s]

Step 889/10000, Loss: 4.2712
Step 890/10000, Loss: 4.0785


Training Progress:   9%|█████▎                                                     | 892/10000 [21:17<26:10,  5.80it/s]

Step 891/10000, Loss: 4.0412
Step 892/10000, Loss: 4.7330


Training Progress:   9%|█████▎                                                     | 894/10000 [21:18<25:27,  5.96it/s]

Step 893/10000, Loss: 4.4663
Step 894/10000, Loss: 4.2623


Training Progress:   9%|█████▎                                                     | 896/10000 [21:18<26:04,  5.82it/s]

Step 895/10000, Loss: 4.2071
Step 896/10000, Loss: 4.3330


Training Progress:   9%|█████▎                                                     | 897/10000 [21:18<25:30,  5.95it/s]

Step 897/10000, Loss: 4.2288
Step 898/10000, Loss: 3.9389


Training Progress:   9%|█████                                                   | 898/10000 [21:34<12:22:44,  4.90s/it]


Checkpoint saved: checkpoints\best\checkpoint_step898_loss3.9389_20250117_131322.pt

New best loss: 3.9389


Training Progress:   9%|█████▏                                                   | 900/10000 [21:35<6:24:54,  2.54s/it]

Step 899/10000, Loss: 3.9537
Step 900/10000, Loss: 3.9807


Training Progress:   9%|█████▏                                                   | 902/10000 [21:35<3:21:58,  1.33s/it]

Step 901/10000, Loss: 4.6848
Step 902/10000, Loss: 4.5543


Training Progress:   9%|█████▏                                                   | 904/10000 [21:36<1:52:03,  1.35it/s]

Step 903/10000, Loss: 4.5941
Step 904/10000, Loss: 4.4025


Training Progress:   9%|█████▏                                                   | 906/10000 [21:36<1:07:39,  2.24it/s]

Step 905/10000, Loss: 4.4886
Step 906/10000, Loss: 4.4875


Training Progress:   9%|█████▎                                                     | 908/10000 [21:36<46:06,  3.29it/s]

Step 907/10000, Loss: 4.3076
Step 908/10000, Loss: 3.9462


Training Progress:   9%|█████▎                                                     | 910/10000 [21:37<35:56,  4.22it/s]

Step 909/10000, Loss: 4.1038
Step 910/10000, Loss: 4.0972


Training Progress:   9%|█████▍                                                     | 912/10000 [21:37<31:03,  4.88it/s]

Step 911/10000, Loss: 4.2219
Step 912/10000, Loss: 4.2191


Training Progress:   9%|█████▍                                                     | 914/10000 [21:37<28:40,  5.28it/s]

Step 913/10000, Loss: 4.4114
Step 914/10000, Loss: 4.4121


Training Progress:   9%|█████▍                                                     | 916/10000 [21:38<27:13,  5.56it/s]

Step 915/10000, Loss: 4.3260
Step 916/10000, Loss: 4.3273


Training Progress:   9%|█████▍                                                     | 918/10000 [21:38<26:20,  5.75it/s]

Step 917/10000, Loss: 4.2503
Step 918/10000, Loss: 4.3211


Training Progress:   9%|█████▍                                                     | 920/10000 [21:38<26:19,  5.75it/s]

Step 919/10000, Loss: 3.9897
Step 920/10000, Loss: 4.0937


Training Progress:   9%|█████▍                                                     | 922/10000 [21:39<25:53,  5.84it/s]

Step 921/10000, Loss: 4.4535
Step 922/10000, Loss: 4.0531


Training Progress:   9%|█████▍                                                     | 923/10000 [21:39<26:02,  5.81it/s]

Step 923/10000, Loss: 4.2843
Step 924/10000, Loss: 3.9330


Training Progress:   9%|█████▏                                                  | 924/10000 [21:55<12:34:32,  4.99s/it]


Checkpoint saved: checkpoints\best\checkpoint_step924_loss3.9330_20250117_131343.pt

New best loss: 3.9330


Training Progress:   9%|█████▎                                                   | 926/10000 [21:56<6:32:15,  2.59s/it]

Step 925/10000, Loss: 4.4452
Step 926/10000, Loss: 4.5029


Training Progress:   9%|█████▎                                                   | 928/10000 [21:56<3:25:04,  1.36s/it]

Step 927/10000, Loss: 4.5131
Step 928/10000, Loss: 4.4756


Training Progress:   9%|█████▎                                                   | 930/10000 [21:56<1:53:41,  1.33it/s]

Step 929/10000, Loss: 4.3748
Step 930/10000, Loss: 4.4161


Training Progress:   9%|█████▎                                                   | 932/10000 [21:57<1:08:35,  2.20it/s]

Step 931/10000, Loss: 4.4143
Step 932/10000, Loss: 4.3566


Training Progress:   9%|█████▌                                                     | 934/10000 [21:57<47:04,  3.21it/s]

Step 933/10000, Loss: 4.2161
Step 934/10000, Loss: 4.1194


Training Progress:   9%|█████▌                                                     | 936/10000 [21:57<35:47,  4.22it/s]

Step 935/10000, Loss: 4.3158
Step 936/10000, Loss: 4.4760


Training Progress:   9%|█████▌                                                     | 938/10000 [21:58<30:26,  4.96it/s]

Step 937/10000, Loss: 4.4844
Step 938/10000, Loss: 4.4248


Training Progress:   9%|█████▌                                                     | 940/10000 [21:58<28:24,  5.31it/s]

Step 939/10000, Loss: 4.2960
Step 940/10000, Loss: 4.4557


Training Progress:   9%|█████▌                                                     | 942/10000 [21:58<27:02,  5.58it/s]

Step 941/10000, Loss: 4.3157
Step 942/10000, Loss: 4.0820


Training Progress:   9%|█████▌                                                     | 944/10000 [21:59<26:23,  5.72it/s]

Step 943/10000, Loss: 4.2229
Step 944/10000, Loss: 4.3737


Training Progress:   9%|█████▌                                                     | 946/10000 [21:59<25:52,  5.83it/s]

Step 945/10000, Loss: 4.1208
Step 946/10000, Loss: 4.1784


Training Progress:   9%|█████▌                                                     | 948/10000 [21:59<25:49,  5.84it/s]

Step 947/10000, Loss: 4.5229
Step 948/10000, Loss: 4.3290


Training Progress:  10%|█████▌                                                     | 950/10000 [22:00<25:51,  5.83it/s]

Step 949/10000, Loss: 4.2413
Step 950/10000, Loss: 4.0428


Training Progress:  10%|█████▌                                                     | 952/10000 [22:00<25:21,  5.95it/s]

Step 951/10000, Loss: 4.2021
Step 952/10000, Loss: 4.0947


Training Progress:  10%|█████▋                                                     | 954/10000 [22:00<25:18,  5.96it/s]

Step 953/10000, Loss: 4.2280
Step 954/10000, Loss: 4.0747


Training Progress:  10%|█████▋                                                     | 956/10000 [22:01<25:19,  5.95it/s]

Step 955/10000, Loss: 4.4520
Step 956/10000, Loss: 4.4353


Training Progress:  10%|█████▋                                                     | 958/10000 [22:01<25:45,  5.85it/s]

Step 957/10000, Loss: 4.3364
Step 958/10000, Loss: 4.5158


Training Progress:  10%|█████▋                                                     | 960/10000 [22:01<25:49,  5.84it/s]

Step 959/10000, Loss: 4.7957
Step 960/10000, Loss: 4.5737


Training Progress:  10%|█████▋                                                     | 962/10000 [22:02<25:06,  6.00it/s]

Step 961/10000, Loss: 4.3624
Step 962/10000, Loss: 4.3936


Training Progress:  10%|█████▋                                                     | 964/10000 [22:02<25:41,  5.86it/s]

Step 963/10000, Loss: 4.5076
Step 964/10000, Loss: 4.3449


Training Progress:  10%|█████▋                                                     | 966/10000 [22:02<25:43,  5.85it/s]

Step 965/10000, Loss: 4.1993
Step 966/10000, Loss: 4.1004


Training Progress:  10%|█████▋                                                     | 968/10000 [22:03<25:43,  5.85it/s]

Step 967/10000, Loss: 4.0535
Step 968/10000, Loss: 4.1728


Training Progress:  10%|█████▋                                                     | 970/10000 [22:03<25:33,  5.89it/s]

Step 969/10000, Loss: 4.1214
Step 970/10000, Loss: 4.1222


Training Progress:  10%|█████▋                                                     | 972/10000 [22:03<25:32,  5.89it/s]

Step 971/10000, Loss: 4.1666
Step 972/10000, Loss: 4.0998


Training Progress:  10%|█████▋                                                     | 974/10000 [22:04<25:44,  5.84it/s]

Step 973/10000, Loss: 3.9828
Step 974/10000, Loss: 4.6131


Training Progress:  10%|█████▊                                                     | 976/10000 [22:04<25:57,  5.79it/s]

Step 975/10000, Loss: 4.2822
Step 976/10000, Loss: 4.0686


Training Progress:  10%|█████▊                                                     | 978/10000 [22:04<25:09,  5.98it/s]

Step 977/10000, Loss: 4.0753
Step 978/10000, Loss: 4.2897


Training Progress:  10%|█████▊                                                     | 979/10000 [22:05<25:16,  5.95it/s]

Step 979/10000, Loss: 4.2344
Step 980/10000, Loss: 3.9069


Training Progress:  10%|█████▍                                                  | 980/10000 [22:20<11:46:34,  4.70s/it]


Checkpoint saved: checkpoints\best\checkpoint_step980_loss3.9069_20250117_131409.pt

New best loss: 3.9069
Step 981/10000, Loss: 3.8528


Training Progress:  10%|█████▍                                                  | 981/10000 [22:41<24:11:17,  9.65s/it]


Checkpoint saved: checkpoints\best\checkpoint_step981_loss3.8528_20250117_131424.pt

New best loss: 3.8528
Step 982/10000, Loss: 3.8295


Training Progress:  10%|█████▍                                                  | 982/10000 [23:02<32:56:04, 13.15s/it]


Checkpoint saved: checkpoints\best\checkpoint_step982_loss3.8295_20250117_131445.pt

New best loss: 3.8295


Training Progress:  10%|█████▌                                                  | 984/10000 [23:03<16:28:44,  6.58s/it]

Step 983/10000, Loss: 4.5073
Step 984/10000, Loss: 4.4076


Training Progress:  10%|█████▌                                                   | 986/10000 [23:03<8:17:37,  3.31s/it]

Step 985/10000, Loss: 4.4678
Step 986/10000, Loss: 4.3682


Training Progress:  10%|█████▋                                                   | 988/10000 [23:04<4:16:39,  1.71s/it]

Step 987/10000, Loss: 4.4146
Step 988/10000, Loss: 4.3738


Training Progress:  10%|█████▋                                                   | 990/10000 [23:04<2:19:08,  1.08it/s]

Step 989/10000, Loss: 4.1511
Step 990/10000, Loss: 3.8413


Training Progress:  10%|█████▋                                                   | 992/10000 [23:04<1:21:23,  1.84it/s]

Step 991/10000, Loss: 3.9798
Step 992/10000, Loss: 3.9876


Training Progress:  10%|█████▊                                                     | 994/10000 [23:05<52:56,  2.84it/s]

Step 993/10000, Loss: 4.1056
Step 994/10000, Loss: 4.1814


Training Progress:  10%|█████▉                                                     | 996/10000 [23:05<38:53,  3.86it/s]

Step 995/10000, Loss: 4.3779
Step 996/10000, Loss: 4.3103


Training Progress:  10%|█████▉                                                     | 998/10000 [23:05<32:04,  4.68it/s]

Step 997/10000, Loss: 4.2360
Step 998/10000, Loss: 4.2569


Training Progress:  10%|█████▉                                                     | 999/10000 [23:06<29:58,  5.00it/s]

Step 999/10000, Loss: 4.2293
Step 1000/10000, Loss: 4.3493


Training Progress:  10%|█████▌                                                 | 1000/10000 [23:26<15:55:52,  6.37s/it]


Checkpoint saved: checkpoints\checkpoint_step1000_loss4.3493_20250117_131510.pt


Training Progress:  10%|█████▌                                                  | 1002/10000 [23:27<8:11:36,  3.28s/it]

Step 1001/10000, Loss: 3.9769
Step 1002/10000, Loss: 4.0184


Training Progress:  10%|█████▌                                                  | 1004/10000 [23:27<4:13:36,  1.69s/it]

Step 1003/10000, Loss: 4.3393
Step 1004/10000, Loss: 4.0305


Training Progress:  10%|█████▋                                                  | 1006/10000 [23:28<2:17:20,  1.09it/s]

Step 1005/10000, Loss: 4.2704
Step 1006/10000, Loss: 3.9020


Training Progress:  10%|█████▋                                                  | 1008/10000 [23:28<1:19:56,  1.87it/s]

Step 1007/10000, Loss: 4.4056
Step 1008/10000, Loss: 4.4592


Training Progress:  10%|█████▊                                                    | 1010/10000 [23:28<52:42,  2.84it/s]

Step 1009/10000, Loss: 4.5275
Step 1010/10000, Loss: 4.5420


Training Progress:  10%|█████▊                                                    | 1012/10000 [23:29<38:33,  3.88it/s]

Step 1011/10000, Loss: 4.3567
Step 1012/10000, Loss: 4.4011


Training Progress:  10%|█████▉                                                    | 1014/10000 [23:29<31:45,  4.72it/s]

Step 1013/10000, Loss: 4.3805
Step 1014/10000, Loss: 4.3139


Training Progress:  10%|█████▉                                                    | 1016/10000 [23:29<29:02,  5.16it/s]

Step 1015/10000, Loss: 4.1699
Step 1016/10000, Loss: 4.0427


Training Progress:  10%|█████▉                                                    | 1018/10000 [23:30<26:39,  5.61it/s]

Step 1017/10000, Loss: 4.2720
Step 1018/10000, Loss: 4.4874


Training Progress:  10%|█████▉                                                    | 1020/10000 [23:30<26:34,  5.63it/s]

Step 1019/10000, Loss: 4.4922
Step 1020/10000, Loss: 4.4143


Training Progress:  10%|█████▉                                                    | 1022/10000 [23:30<25:44,  5.81it/s]

Step 1021/10000, Loss: 4.2341
Step 1022/10000, Loss: 4.3579


Training Progress:  10%|█████▉                                                    | 1024/10000 [23:31<25:38,  5.83it/s]

Step 1023/10000, Loss: 4.2725
Step 1024/10000, Loss: 4.0767


Training Progress:  10%|█████▉                                                    | 1026/10000 [23:31<25:36,  5.84it/s]

Step 1025/10000, Loss: 4.2093
Step 1026/10000, Loss: 4.3374


Training Progress:  10%|█████▉                                                    | 1028/10000 [23:31<25:11,  5.94it/s]

Step 1027/10000, Loss: 4.0933
Step 1028/10000, Loss: 4.1513


Training Progress:  10%|█████▉                                                    | 1030/10000 [23:32<25:36,  5.84it/s]

Step 1029/10000, Loss: 4.5057
Step 1030/10000, Loss: 4.3149


Training Progress:  10%|█████▉                                                    | 1032/10000 [23:32<25:34,  5.85it/s]

Step 1031/10000, Loss: 4.2082
Step 1032/10000, Loss: 4.0579


Training Progress:  10%|█████▉                                                    | 1034/10000 [23:32<25:30,  5.86it/s]

Step 1033/10000, Loss: 4.1702
Step 1034/10000, Loss: 4.0630


Training Progress:  10%|██████                                                    | 1036/10000 [23:33<25:35,  5.84it/s]

Step 1035/10000, Loss: 4.1935
Step 1036/10000, Loss: 4.0575


Training Progress:  10%|██████                                                    | 1038/10000 [23:33<25:35,  5.84it/s]

Step 1037/10000, Loss: 4.4248
Step 1038/10000, Loss: 4.3977


Training Progress:  10%|██████                                                    | 1040/10000 [23:33<25:29,  5.86it/s]

Step 1039/10000, Loss: 4.2865
Step 1040/10000, Loss: 4.4786


Training Progress:  10%|██████                                                    | 1042/10000 [23:34<25:32,  5.84it/s]

Step 1041/10000, Loss: 4.7501
Step 1042/10000, Loss: 4.5199


Training Progress:  10%|██████                                                    | 1044/10000 [23:34<25:16,  5.91it/s]

Step 1043/10000, Loss: 4.2928
Step 1044/10000, Loss: 4.3174


Training Progress:  10%|██████                                                    | 1046/10000 [23:34<25:42,  5.80it/s]

Step 1045/10000, Loss: 4.4336
Step 1046/10000, Loss: 4.3215


Training Progress:  10%|██████                                                    | 1048/10000 [23:35<25:20,  5.89it/s]

Step 1047/10000, Loss: 4.1937
Step 1048/10000, Loss: 4.0466


Training Progress:  10%|██████                                                    | 1050/10000 [23:35<25:19,  5.89it/s]

Step 1049/10000, Loss: 3.9914
Step 1050/10000, Loss: 4.1087


Training Progress:  11%|██████                                                    | 1052/10000 [23:36<25:27,  5.86it/s]

Step 1051/10000, Loss: 4.1031
Step 1052/10000, Loss: 4.0837


Training Progress:  11%|██████                                                    | 1054/10000 [23:36<24:57,  5.97it/s]

Step 1053/10000, Loss: 4.0717
Step 1054/10000, Loss: 3.9289


Training Progress:  11%|██████                                                    | 1056/10000 [23:36<25:36,  5.82it/s]

Step 1055/10000, Loss: 3.9013
Step 1056/10000, Loss: 4.5700


Training Progress:  11%|██████▏                                                   | 1058/10000 [23:37<25:16,  5.90it/s]

Step 1057/10000, Loss: 4.3284
Step 1058/10000, Loss: 4.0638


Training Progress:  11%|██████▏                                                   | 1060/10000 [23:37<25:40,  5.81it/s]

Step 1059/10000, Loss: 3.9594
Step 1060/10000, Loss: 4.1107


Training Progress:  11%|██████▏                                                   | 1062/10000 [23:37<25:45,  5.78it/s]

Step 1061/10000, Loss: 4.0786
Step 1062/10000, Loss: 3.8586


Training Progress:  11%|██████▏                                                   | 1064/10000 [23:38<25:33,  5.83it/s]

Step 1063/10000, Loss: 3.8535
Step 1064/10000, Loss: 3.8714


Training Progress:  11%|██████▏                                                   | 1066/10000 [23:38<25:18,  5.88it/s]

Step 1065/10000, Loss: 4.5165
Step 1066/10000, Loss: 4.3254


Training Progress:  11%|██████▏                                                   | 1068/10000 [23:38<25:35,  5.82it/s]

Step 1067/10000, Loss: 4.3672
Step 1068/10000, Loss: 4.2484


Training Progress:  11%|██████▏                                                   | 1070/10000 [23:39<25:19,  5.88it/s]

Step 1069/10000, Loss: 4.3325
Step 1070/10000, Loss: 4.3176


Training Progress:  11%|██████▏                                                   | 1072/10000 [23:39<24:55,  5.97it/s]

Step 1071/10000, Loss: 4.1226
Step 1072/10000, Loss: 3.8487


Training Progress:  11%|██████▏                                                   | 1074/10000 [23:39<25:27,  5.84it/s]

Step 1073/10000, Loss: 3.9494
Step 1074/10000, Loss: 3.9424


Training Progress:  11%|██████▏                                                   | 1076/10000 [23:40<25:13,  5.90it/s]

Step 1075/10000, Loss: 4.0239
Step 1076/10000, Loss: 4.0708


Training Progress:  11%|██████▎                                                   | 1078/10000 [23:40<25:33,  5.82it/s]

Step 1077/10000, Loss: 4.2808
Step 1078/10000, Loss: 4.2553


Training Progress:  11%|██████▎                                                   | 1080/10000 [23:40<25:12,  5.90it/s]

Step 1079/10000, Loss: 4.2062
Step 1080/10000, Loss: 4.2371


Training Progress:  11%|██████▎                                                   | 1082/10000 [23:41<25:36,  5.80it/s]

Step 1081/10000, Loss: 4.1329
Step 1082/10000, Loss: 4.2747


Training Progress:  11%|██████▎                                                   | 1084/10000 [23:41<25:19,  5.87it/s]

Step 1083/10000, Loss: 3.9662
Step 1084/10000, Loss: 4.0150


Training Progress:  11%|██████▎                                                   | 1086/10000 [23:41<25:30,  5.82it/s]

Step 1085/10000, Loss: 4.3086
Step 1086/10000, Loss: 3.9254


Training Progress:  11%|██████▎                                                   | 1087/10000 [23:41<25:18,  5.87it/s]

Step 1087/10000, Loss: 4.1658
Step 1088/10000, Loss: 3.7867


Training Progress:  11%|█████▉                                                 | 1088/10000 [23:56<11:22:27,  4.59s/it]


Checkpoint saved: checkpoints\best\checkpoint_step1088_loss3.7867_20250117_131545.pt

New best loss: 3.7867


Training Progress:  11%|██████                                                  | 1090/10000 [23:57<5:54:57,  2.39s/it]

Step 1089/10000, Loss: 4.3324
Step 1090/10000, Loss: 4.4850


Training Progress:  11%|██████                                                  | 1092/10000 [23:57<3:06:49,  1.26s/it]

Step 1091/10000, Loss: 4.5309
Step 1092/10000, Loss: 4.4610


Training Progress:  11%|██████▏                                                 | 1094/10000 [23:58<1:44:24,  1.42it/s]

Step 1093/10000, Loss: 4.2856
Step 1094/10000, Loss: 4.3331


Training Progress:  11%|██████▏                                                 | 1096/10000 [23:58<1:03:40,  2.33it/s]

Step 1095/10000, Loss: 4.3605
Step 1096/10000, Loss: 4.3231


Training Progress:  11%|██████▎                                                   | 1098/10000 [23:58<43:59,  3.37it/s]

Step 1097/10000, Loss: 4.1979
Step 1098/10000, Loss: 4.0331


Training Progress:  11%|██████▍                                                   | 1100/10000 [23:59<34:21,  4.32it/s]

Step 1099/10000, Loss: 4.2025
Step 1100/10000, Loss: 4.3673


Training Progress:  11%|██████▍                                                   | 1102/10000 [23:59<30:00,  4.94it/s]

Step 1101/10000, Loss: 4.3614
Step 1102/10000, Loss: 4.3270


Training Progress:  11%|██████▍                                                   | 1104/10000 [23:59<27:44,  5.35it/s]

Step 1103/10000, Loss: 4.2510
Step 1104/10000, Loss: 4.3780


Training Progress:  11%|██████▍                                                   | 1106/10000 [24:00<26:32,  5.58it/s]

Step 1105/10000, Loss: 4.2720
Step 1106/10000, Loss: 4.0008


Training Progress:  11%|██████▍                                                   | 1108/10000 [24:00<25:37,  5.78it/s]

Step 1107/10000, Loss: 4.0867
Step 1108/10000, Loss: 4.2576


Training Progress:  11%|██████▍                                                   | 1110/10000 [24:00<25:17,  5.86it/s]

Step 1109/10000, Loss: 4.0422
Step 1110/10000, Loss: 4.1218


Training Progress:  11%|██████▍                                                   | 1112/10000 [24:01<25:22,  5.84it/s]

Step 1111/10000, Loss: 4.4883
Step 1112/10000, Loss: 4.3013


Training Progress:  11%|██████▍                                                   | 1114/10000 [24:01<25:36,  5.78it/s]

Step 1113/10000, Loss: 4.2204
Step 1114/10000, Loss: 3.9710


Training Progress:  11%|██████▍                                                   | 1116/10000 [24:01<25:27,  5.82it/s]

Step 1115/10000, Loss: 4.0596
Step 1116/10000, Loss: 3.9613


Training Progress:  11%|██████▍                                                   | 1118/10000 [24:02<25:27,  5.81it/s]

Step 1117/10000, Loss: 4.1556
Step 1118/10000, Loss: 4.0237


Training Progress:  11%|██████▍                                                   | 1120/10000 [24:02<25:09,  5.88it/s]

Step 1119/10000, Loss: 4.3794
Step 1120/10000, Loss: 4.3708


Training Progress:  11%|██████▌                                                   | 1122/10000 [24:02<25:08,  5.89it/s]

Step 1121/10000, Loss: 4.2225
Step 1122/10000, Loss: 4.3749


Training Progress:  11%|██████▌                                                   | 1124/10000 [24:03<25:36,  5.78it/s]

Step 1123/10000, Loss: 4.6084
Step 1124/10000, Loss: 4.4002


Training Progress:  11%|██████▌                                                   | 1126/10000 [24:03<25:27,  5.81it/s]

Step 1125/10000, Loss: 4.2242
Step 1126/10000, Loss: 4.2743


Training Progress:  11%|██████▌                                                   | 1128/10000 [24:03<25:21,  5.83it/s]

Step 1127/10000, Loss: 4.3879
Step 1128/10000, Loss: 4.2697


Training Progress:  11%|██████▌                                                   | 1130/10000 [24:04<25:17,  5.85it/s]

Step 1129/10000, Loss: 4.0825
Step 1130/10000, Loss: 3.9627


Training Progress:  11%|██████▌                                                   | 1132/10000 [24:04<25:12,  5.86it/s]

Step 1131/10000, Loss: 3.9366
Step 1132/10000, Loss: 4.0613


Training Progress:  11%|██████▌                                                   | 1134/10000 [24:04<25:11,  5.86it/s]

Step 1133/10000, Loss: 4.0561
Step 1134/10000, Loss: 4.0371


Training Progress:  11%|██████▌                                                   | 1136/10000 [24:05<25:12,  5.86it/s]

Step 1135/10000, Loss: 4.0101
Step 1136/10000, Loss: 3.8325


Training Progress:  11%|██████▌                                                   | 1138/10000 [24:05<25:14,  5.85it/s]

Step 1137/10000, Loss: 3.8092
Step 1138/10000, Loss: 4.4388


Training Progress:  11%|██████▌                                                   | 1140/10000 [24:06<25:14,  5.85it/s]

Step 1139/10000, Loss: 4.1949
Step 1140/10000, Loss: 3.9659


Training Progress:  11%|██████▌                                                   | 1142/10000 [24:06<24:55,  5.92it/s]

Step 1141/10000, Loss: 3.8998
Step 1142/10000, Loss: 4.0586


Training Progress:  11%|██████▋                                                   | 1143/10000 [24:06<25:14,  5.85it/s]

Step 1143/10000, Loss: 3.9741
Step 1144/10000, Loss: 3.7103


Training Progress:  11%|██████▎                                                | 1144/10000 [24:22<11:58:06,  4.87s/it]


Checkpoint saved: checkpoints\best\checkpoint_step1144_loss3.7103_20250117_131610.pt

New best loss: 3.7103
Step 1145/10000, Loss: 3.6731


Training Progress:  11%|██████▎                                                | 1145/10000 [24:42<22:57:33,  9.33s/it]


Checkpoint saved: checkpoints\best\checkpoint_step1145_loss3.6731_20250117_131626.pt

New best loss: 3.6731


Training Progress:  11%|██████▎                                                | 1147/10000 [24:42<11:34:10,  4.70s/it]

Step 1146/10000, Loss: 3.7084
Step 1147/10000, Loss: 4.4408


Training Progress:  11%|██████▍                                                 | 1149/10000 [24:42<5:52:42,  2.39s/it]

Step 1148/10000, Loss: 4.3211
Step 1149/10000, Loss: 4.3441


Training Progress:  12%|██████▍                                                 | 1151/10000 [24:43<3:05:58,  1.26s/it]

Step 1150/10000, Loss: 4.1596
Step 1151/10000, Loss: 4.2075


Training Progress:  12%|██████▍                                                 | 1153/10000 [24:43<1:43:32,  1.42it/s]

Step 1152/10000, Loss: 4.1847
Step 1153/10000, Loss: 3.9959


Training Progress:  12%|██████▍                                                 | 1155/10000 [24:44<1:03:35,  2.32it/s]

Step 1154/10000, Loss: 3.7691
Step 1155/10000, Loss: 3.8775


Training Progress:  12%|██████▋                                                   | 1157/10000 [24:44<44:01,  3.35it/s]

Step 1156/10000, Loss: 3.8668
Step 1157/10000, Loss: 3.9297


Training Progress:  12%|██████▋                                                   | 1159/10000 [24:44<34:40,  4.25it/s]

Step 1158/10000, Loss: 3.9549
Step 1159/10000, Loss: 4.1904


Training Progress:  12%|██████▋                                                   | 1161/10000 [24:45<29:39,  4.97it/s]

Step 1160/10000, Loss: 4.1021
Step 1161/10000, Loss: 4.0060


Training Progress:  12%|██████▋                                                   | 1163/10000 [24:45<26:58,  5.46it/s]

Step 1162/10000, Loss: 4.0660
Step 1163/10000, Loss: 4.0134


Training Progress:  12%|██████▊                                                   | 1165/10000 [24:45<26:13,  5.61it/s]

Step 1164/10000, Loss: 4.1593
Step 1165/10000, Loss: 3.7825


Training Progress:  12%|██████▊                                                   | 1167/10000 [24:46<25:47,  5.71it/s]

Step 1166/10000, Loss: 3.8734
Step 1167/10000, Loss: 4.1683


Training Progress:  12%|██████▊                                                   | 1169/10000 [24:46<24:48,  5.93it/s]

Step 1168/10000, Loss: 3.8168
Step 1169/10000, Loss: 4.0492


Training Progress:  12%|██████▊                                                   | 1171/10000 [24:46<25:06,  5.86it/s]

Step 1170/10000, Loss: 3.7145
Step 1171/10000, Loss: 4.2142


Training Progress:  12%|██████▊                                                   | 1173/10000 [24:47<24:51,  5.92it/s]

Step 1172/10000, Loss: 4.3164
Step 1173/10000, Loss: 4.3216


Training Progress:  12%|██████▊                                                   | 1175/10000 [24:47<25:11,  5.84it/s]

Step 1174/10000, Loss: 4.2960
Step 1175/10000, Loss: 4.1898


Training Progress:  12%|██████▊                                                   | 1177/10000 [24:47<24:40,  5.96it/s]

Step 1176/10000, Loss: 4.2318
Step 1177/10000, Loss: 4.2168


Training Progress:  12%|██████▊                                                   | 1179/10000 [24:48<25:14,  5.82it/s]

Step 1178/10000, Loss: 4.1167
Step 1179/10000, Loss: 3.9966


Training Progress:  12%|██████▊                                                   | 1181/10000 [24:48<24:55,  5.90it/s]

Step 1180/10000, Loss: 3.9104
Step 1181/10000, Loss: 4.1062


Training Progress:  12%|██████▊                                                   | 1183/10000 [24:48<25:06,  5.85it/s]

Step 1182/10000, Loss: 4.3322
Step 1183/10000, Loss: 4.3282


Training Progress:  12%|██████▊                                                   | 1185/10000 [24:49<25:00,  5.87it/s]

Step 1184/10000, Loss: 4.2319
Step 1185/10000, Loss: 4.1039


Training Progress:  12%|██████▉                                                   | 1187/10000 [24:49<24:37,  5.96it/s]

Step 1186/10000, Loss: 4.2081
Step 1187/10000, Loss: 4.1580


Training Progress:  12%|██████▉                                                   | 1189/10000 [24:49<24:55,  5.89it/s]

Step 1188/10000, Loss: 3.9445
Step 1189/10000, Loss: 4.0598


Training Progress:  12%|██████▉                                                   | 1191/10000 [24:50<24:42,  5.94it/s]

Step 1190/10000, Loss: 4.2113
Step 1191/10000, Loss: 3.9225


Training Progress:  12%|██████▉                                                   | 1193/10000 [24:50<25:05,  5.85it/s]

Step 1192/10000, Loss: 3.9460
Step 1193/10000, Loss: 4.3177


Training Progress:  12%|██████▉                                                   | 1195/10000 [24:50<25:15,  5.81it/s]

Step 1194/10000, Loss: 4.1562
Step 1195/10000, Loss: 4.1025


Training Progress:  12%|██████▉                                                   | 1197/10000 [24:51<25:07,  5.84it/s]

Step 1196/10000, Loss: 3.8691
Step 1197/10000, Loss: 4.0313


Training Progress:  12%|██████▉                                                   | 1199/10000 [24:51<25:03,  5.85it/s]

Step 1198/10000, Loss: 3.9151
Step 1199/10000, Loss: 4.0402


Training Progress:  12%|██████▉                                                   | 1201/10000 [24:51<24:58,  5.87it/s]

Step 1200/10000, Loss: 3.8820
Step 1201/10000, Loss: 4.2176


Training Progress:  12%|██████▉                                                   | 1203/10000 [24:52<25:00,  5.86it/s]

Step 1202/10000, Loss: 4.2083
Step 1203/10000, Loss: 4.1413


Training Progress:  12%|██████▉                                                   | 1205/10000 [24:52<24:29,  5.98it/s]

Step 1204/10000, Loss: 4.3414
Step 1205/10000, Loss: 4.5810


Training Progress:  12%|███████                                                   | 1207/10000 [24:52<25:03,  5.85it/s]

Step 1206/10000, Loss: 4.3240
Step 1207/10000, Loss: 4.1119


Training Progress:  12%|███████                                                   | 1209/10000 [24:53<24:45,  5.92it/s]

Step 1208/10000, Loss: 4.1050
Step 1209/10000, Loss: 4.2237


Training Progress:  12%|███████                                                   | 1211/10000 [24:53<25:06,  5.84it/s]

Step 1210/10000, Loss: 4.1560
Step 1211/10000, Loss: 4.0055


Training Progress:  12%|███████                                                   | 1213/10000 [24:53<25:05,  5.84it/s]

Step 1212/10000, Loss: 3.8620
Step 1213/10000, Loss: 3.8489


Training Progress:  12%|███████                                                   | 1215/10000 [24:54<25:02,  5.85it/s]

Step 1214/10000, Loss: 3.9606
Step 1215/10000, Loss: 3.9488


Training Progress:  12%|███████                                                   | 1217/10000 [24:54<25:02,  5.85it/s]

Step 1216/10000, Loss: 3.8751
Step 1217/10000, Loss: 3.8738


Training Progress:  12%|███████                                                   | 1219/10000 [24:54<25:06,  5.83it/s]

Step 1218/10000, Loss: 3.7159
Step 1219/10000, Loss: 3.7295


Training Progress:  12%|███████                                                   | 1221/10000 [24:55<25:04,  5.84it/s]

Step 1220/10000, Loss: 4.3248
Step 1221/10000, Loss: 4.0848


Training Progress:  12%|███████                                                   | 1223/10000 [24:55<25:00,  5.85it/s]

Step 1222/10000, Loss: 3.8453
Step 1223/10000, Loss: 3.7905


Training Progress:  12%|███████                                                   | 1225/10000 [24:55<24:57,  5.86it/s]

Step 1224/10000, Loss: 3.9431
Step 1225/10000, Loss: 3.8532
Step 1226/10000, Loss: 3.6327


Training Progress:  12%|██████▋                                                | 1226/10000 [25:12<12:38:01,  5.18s/it]


Checkpoint saved: checkpoints\best\checkpoint_step1226_loss3.6327_20250117_131659.pt

New best loss: 3.6327
Step 1227/10000, Loss: 3.6224


Training Progress:  12%|██████▋                                                | 1227/10000 [25:31<22:51:15,  9.38s/it]


Checkpoint saved: checkpoints\best\checkpoint_step1227_loss3.6224_20250117_131717.pt

New best loss: 3.6224
Step 1228/10000, Loss: 3.6215


Training Progress:  12%|██████▊                                                | 1228/10000 [25:51<30:32:30, 12.53s/it]


Checkpoint saved: checkpoints\best\checkpoint_step1228_loss3.6215_20250117_131736.pt

New best loss: 3.6215


Training Progress:  12%|██████▊                                                | 1230/10000 [25:52<15:16:26,  6.27s/it]

Step 1229/10000, Loss: 4.2557
Step 1230/10000, Loss: 4.1456


Training Progress:  12%|██████▉                                                 | 1232/10000 [25:52<7:41:48,  3.16s/it]

Step 1231/10000, Loss: 4.1534
Step 1232/10000, Loss: 4.0460


Training Progress:  12%|██████▉                                                 | 1234/10000 [25:53<3:58:51,  1.63s/it]

Step 1233/10000, Loss: 4.1361
Step 1234/10000, Loss: 4.0774


Training Progress:  12%|██████▉                                                 | 1235/10000 [25:53<2:54:53,  1.20s/it]

Step 1235/10000, Loss: 3.8881
Step 1236/10000, Loss: 3.6146


Training Progress:  12%|██████▊                                                | 1236/10000 [26:13<17:03:19,  7.01s/it]


Checkpoint saved: checkpoints\best\checkpoint_step1236_loss3.6146_20250117_131757.pt

New best loss: 3.6146


Training Progress:  12%|██████▉                                                 | 1238/10000 [26:14<8:41:43,  3.57s/it]

Step 1237/10000, Loss: 3.7346
Step 1238/10000, Loss: 3.7175


Training Progress:  12%|██████▉                                                 | 1240/10000 [26:14<4:28:15,  1.84s/it]

Step 1239/10000, Loss: 3.8177
Step 1240/10000, Loss: 3.8846


Training Progress:  12%|██████▉                                                 | 1242/10000 [26:15<2:23:41,  1.02it/s]

Step 1241/10000, Loss: 4.0846
Step 1242/10000, Loss: 4.0197


Training Progress:  12%|██████▉                                                 | 1244/10000 [26:15<1:23:24,  1.75it/s]

Step 1243/10000, Loss: 3.9142
Step 1244/10000, Loss: 3.9297


Training Progress:  12%|███████▏                                                  | 1246/10000 [26:15<53:06,  2.75it/s]

Step 1245/10000, Loss: 3.8839
Step 1246/10000, Loss: 4.0112


Training Progress:  12%|███████▏                                                  | 1248/10000 [26:16<39:10,  3.72it/s]

Step 1247/10000, Loss: 3.6690
Step 1248/10000, Loss: 3.7727


Training Progress:  12%|███████▎                                                  | 1250/10000 [26:16<31:51,  4.58it/s]

Step 1249/10000, Loss: 4.0626
Step 1250/10000, Loss: 3.7148


Training Progress:  13%|███████▎                                                  | 1251/10000 [26:16<29:49,  4.89it/s]

Step 1251/10000, Loss: 3.9343
Step 1252/10000, Loss: 3.5972


Training Progress:  13%|██████▉                                                | 1252/10000 [26:36<14:36:12,  6.01s/it]


Checkpoint saved: checkpoints\best\checkpoint_step1252_loss3.5972_20250117_131820.pt

New best loss: 3.5972


Training Progress:  13%|███████                                                 | 1254/10000 [26:36<7:29:57,  3.09s/it]

Step 1253/10000, Loss: 4.0495
Step 1254/10000, Loss: 4.1124


Training Progress:  13%|███████                                                 | 1256/10000 [26:37<3:52:56,  1.60s/it]

Step 1255/10000, Loss: 4.1729
Step 1256/10000, Loss: 4.1436


Training Progress:  13%|███████                                                 | 1258/10000 [26:37<2:07:03,  1.15it/s]

Step 1257/10000, Loss: 4.0246
Step 1258/10000, Loss: 4.0573


Training Progress:  13%|███████                                                 | 1260/10000 [26:37<1:14:54,  1.94it/s]

Step 1259/10000, Loss: 4.0625
Step 1260/10000, Loss: 4.0056


Training Progress:  13%|███████▎                                                  | 1262/10000 [26:38<49:25,  2.95it/s]

Step 1261/10000, Loss: 3.8584
Step 1262/10000, Loss: 3.7659


Training Progress:  13%|███████▎                                                  | 1264/10000 [26:38<36:44,  3.96it/s]

Step 1263/10000, Loss: 3.8985
Step 1264/10000, Loss: 4.1453


Training Progress:  13%|███████▎                                                  | 1266/10000 [26:38<30:44,  4.74it/s]

Step 1265/10000, Loss: 4.1530
Step 1266/10000, Loss: 4.0886


Training Progress:  13%|███████▎                                                  | 1268/10000 [26:39<27:43,  5.25it/s]

Step 1267/10000, Loss: 3.9728
Step 1268/10000, Loss: 4.0963


Training Progress:  13%|███████▎                                                  | 1270/10000 [26:39<26:01,  5.59it/s]

Step 1269/10000, Loss: 4.0361
Step 1270/10000, Loss: 3.7795


Training Progress:  13%|███████▍                                                  | 1272/10000 [26:39<25:17,  5.75it/s]

Step 1271/10000, Loss: 3.8587
Step 1272/10000, Loss: 4.0526


Training Progress:  13%|███████▍                                                  | 1274/10000 [26:40<25:10,  5.78it/s]

Step 1273/10000, Loss: 3.8457
Step 1274/10000, Loss: 3.9403


Training Progress:  13%|███████▍                                                  | 1276/10000 [26:40<24:32,  5.93it/s]

Step 1275/10000, Loss: 4.2627
Step 1276/10000, Loss: 4.0277


Training Progress:  13%|███████▍                                                  | 1278/10000 [26:40<25:01,  5.81it/s]

Step 1277/10000, Loss: 3.9404
Step 1278/10000, Loss: 3.7436


Training Progress:  13%|███████▍                                                  | 1280/10000 [26:41<24:42,  5.88it/s]

Step 1279/10000, Loss: 3.9199
Step 1280/10000, Loss: 3.8390


Training Progress:  13%|███████▍                                                  | 1282/10000 [26:41<25:03,  5.80it/s]

Step 1281/10000, Loss: 3.9599
Step 1282/10000, Loss: 3.7841


Training Progress:  13%|███████▍                                                  | 1284/10000 [26:41<24:54,  5.83it/s]

Step 1283/10000, Loss: 4.0691
Step 1284/10000, Loss: 4.0529


Training Progress:  13%|███████▍                                                  | 1286/10000 [26:42<24:16,  5.98it/s]

Step 1285/10000, Loss: 3.9931
Step 1286/10000, Loss: 4.1942


Training Progress:  13%|███████▍                                                  | 1288/10000 [26:42<24:46,  5.86it/s]

Step 1287/10000, Loss: 4.3805
Step 1288/10000, Loss: 4.1822


Training Progress:  13%|███████▍                                                  | 1290/10000 [26:42<24:30,  5.92it/s]

Step 1289/10000, Loss: 4.0007
Step 1290/10000, Loss: 3.9976


Training Progress:  13%|███████▍                                                  | 1292/10000 [26:43<24:55,  5.82it/s]

Step 1291/10000, Loss: 4.1033
Step 1292/10000, Loss: 4.0221


Training Progress:  13%|███████▌                                                  | 1294/10000 [26:43<24:40,  5.88it/s]

Step 1293/10000, Loss: 3.8596
Step 1294/10000, Loss: 3.7260


Training Progress:  13%|███████▌                                                  | 1296/10000 [26:43<24:39,  5.88it/s]

Step 1295/10000, Loss: 3.7341
Step 1296/10000, Loss: 3.8465


Training Progress:  13%|███████▌                                                  | 1298/10000 [26:44<24:51,  5.83it/s]

Step 1297/10000, Loss: 3.7843
Step 1298/10000, Loss: 3.7950


Training Progress:  13%|███████▌                                                  | 1300/10000 [26:44<24:43,  5.86it/s]

Step 1299/10000, Loss: 3.8311
Step 1300/10000, Loss: 3.6569


Training Progress:  13%|███████▌                                                  | 1302/10000 [26:44<24:39,  5.88it/s]

Step 1301/10000, Loss: 3.6364
Step 1302/10000, Loss: 4.2188


Training Progress:  13%|███████▌                                                  | 1304/10000 [26:45<24:33,  5.90it/s]

Step 1303/10000, Loss: 3.9442
Step 1304/10000, Loss: 3.7017


Training Progress:  13%|███████▌                                                  | 1306/10000 [26:45<24:59,  5.80it/s]

Step 1305/10000, Loss: 3.7098
Step 1306/10000, Loss: 3.8719


Training Progress:  13%|███████▌                                                  | 1307/10000 [26:45<24:28,  5.92it/s]

Step 1307/10000, Loss: 3.8004
Step 1308/10000, Loss: 3.5710


Training Progress:  13%|███████▏                                               | 1308/10000 [27:00<11:10:44,  4.63s/it]


Checkpoint saved: checkpoints\best\checkpoint_step1308_loss3.5710_20250117_131849.pt

New best loss: 3.5710
Step 1309/10000, Loss: 3.5591


Training Progress:  13%|███████▏                                               | 1309/10000 [27:23<24:13:43, 10.04s/it]


Checkpoint saved: checkpoints\best\checkpoint_step1309_loss3.5591_20250117_131905.pt

New best loss: 3.5591
Step 1310/10000, Loss: 3.5470


Training Progress:  13%|███████▏                                               | 1310/10000 [27:43<31:20:51, 12.99s/it]


Checkpoint saved: checkpoints\best\checkpoint_step1310_loss3.5470_20250117_131927.pt

New best loss: 3.5470


Training Progress:  13%|███████▏                                               | 1312/10000 [27:44<15:41:53,  6.50s/it]

Step 1311/10000, Loss: 4.1077
Step 1312/10000, Loss: 4.0392


Training Progress:  13%|███████▎                                                | 1314/10000 [27:44<7:54:39,  3.28s/it]

Step 1313/10000, Loss: 4.0918
Step 1314/10000, Loss: 3.9433


Training Progress:  13%|███████▎                                                | 1316/10000 [27:44<4:04:33,  1.69s/it]

Step 1315/10000, Loss: 4.0218
Step 1316/10000, Loss: 3.9889


Training Progress:  13%|███████▍                                                | 1318/10000 [27:45<2:12:48,  1.09it/s]

Step 1317/10000, Loss: 3.8337
Step 1318/10000, Loss: 3.5471


Training Progress:  13%|███████▍                                                | 1320/10000 [27:45<1:17:50,  1.86it/s]

Step 1319/10000, Loss: 3.6609
Step 1320/10000, Loss: 3.6244


Training Progress:  13%|███████▋                                                  | 1322/10000 [27:45<50:45,  2.85it/s]

Step 1321/10000, Loss: 3.7146
Step 1322/10000, Loss: 3.7506


Training Progress:  13%|███████▋                                                  | 1324/10000 [27:46<37:15,  3.88it/s]

Step 1323/10000, Loss: 3.9657
Step 1324/10000, Loss: 3.9018


Training Progress:  13%|███████▋                                                  | 1326/10000 [27:46<30:29,  4.74it/s]

Step 1325/10000, Loss: 3.8112
Step 1326/10000, Loss: 3.8398


Training Progress:  13%|███████▋                                                  | 1328/10000 [27:46<27:40,  5.22it/s]

Step 1327/10000, Loss: 3.7438
Step 1328/10000, Loss: 3.9035


Training Progress:  13%|███████▋                                                  | 1330/10000 [27:47<26:05,  5.54it/s]

Step 1329/10000, Loss: 3.5839
Step 1330/10000, Loss: 3.7230


Training Progress:  13%|███████▋                                                  | 1332/10000 [27:47<25:31,  5.66it/s]

Step 1331/10000, Loss: 3.9722
Step 1332/10000, Loss: 3.5768


Training Progress:  13%|███████▋                                                  | 1333/10000 [27:47<24:50,  5.81it/s]

Step 1333/10000, Loss: 3.7951
Step 1334/10000, Loss: 3.5001


Training Progress:  13%|███████▎                                               | 1334/10000 [28:04<12:21:07,  5.13s/it]


Checkpoint saved: checkpoints\best\checkpoint_step1334_loss3.5001_20250117_131951.pt

New best loss: 3.5001


Training Progress:  13%|███████▍                                                | 1336/10000 [28:04<6:20:59,  2.64s/it]

Step 1335/10000, Loss: 3.9703
Step 1336/10000, Loss: 4.0827


Training Progress:  13%|███████▍                                                | 1338/10000 [28:05<3:18:57,  1.38s/it]

Step 1337/10000, Loss: 4.1282
Step 1338/10000, Loss: 4.0635


Training Progress:  13%|███████▌                                                | 1340/10000 [28:05<1:50:19,  1.31it/s]

Step 1339/10000, Loss: 3.9000
Step 1340/10000, Loss: 3.9456


Training Progress:  13%|███████▌                                                | 1342/10000 [28:05<1:06:47,  2.16it/s]

Step 1341/10000, Loss: 3.9631
Step 1342/10000, Loss: 3.9340


Training Progress:  13%|███████▊                                                  | 1344/10000 [28:06<45:15,  3.19it/s]

Step 1343/10000, Loss: 3.7869
Step 1344/10000, Loss: 3.6775


Training Progress:  13%|███████▊                                                  | 1346/10000 [28:06<34:34,  4.17it/s]

Step 1345/10000, Loss: 3.8288
Step 1346/10000, Loss: 4.0302


Training Progress:  13%|███████▊                                                  | 1348/10000 [28:06<29:32,  4.88it/s]

Step 1347/10000, Loss: 3.9931
Step 1348/10000, Loss: 3.9491


Training Progress:  14%|███████▊                                                  | 1350/10000 [28:07<26:58,  5.34it/s]

Step 1349/10000, Loss: 3.8504
Step 1350/10000, Loss: 3.9663


Training Progress:  14%|███████▊                                                  | 1352/10000 [28:07<26:04,  5.53it/s]

Step 1351/10000, Loss: 3.9080
Step 1352/10000, Loss: 3.6935


Training Progress:  14%|███████▊                                                  | 1354/10000 [28:07<24:53,  5.79it/s]

Step 1353/10000, Loss: 3.8083
Step 1354/10000, Loss: 4.0121


Training Progress:  14%|███████▊                                                  | 1356/10000 [28:08<24:52,  5.79it/s]

Step 1355/10000, Loss: 3.7733
Step 1356/10000, Loss: 3.7892


Training Progress:  14%|███████▉                                                  | 1358/10000 [28:08<24:18,  5.92it/s]

Step 1357/10000, Loss: 4.0960
Step 1358/10000, Loss: 3.9096


Training Progress:  14%|███████▉                                                  | 1360/10000 [28:08<24:14,  5.94it/s]

Step 1359/10000, Loss: 3.8827
Step 1360/10000, Loss: 3.7037


Training Progress:  14%|███████▉                                                  | 1362/10000 [28:09<24:28,  5.88it/s]

Step 1361/10000, Loss: 3.8432
Step 1362/10000, Loss: 3.7186


Training Progress:  14%|███████▉                                                  | 1364/10000 [28:09<24:43,  5.82it/s]

Step 1363/10000, Loss: 3.8238
Step 1364/10000, Loss: 3.7029


Training Progress:  14%|███████▉                                                  | 1366/10000 [28:09<24:42,  5.82it/s]

Step 1365/10000, Loss: 4.0022
Step 1366/10000, Loss: 3.9932


Training Progress:  14%|███████▉                                                  | 1368/10000 [28:10<24:04,  5.97it/s]

Step 1367/10000, Loss: 3.8992
Step 1368/10000, Loss: 4.0510


Training Progress:  14%|███████▉                                                  | 1370/10000 [28:10<24:36,  5.85it/s]

Step 1369/10000, Loss: 4.2381
Step 1370/10000, Loss: 4.0693


Training Progress:  14%|███████▉                                                  | 1372/10000 [28:10<24:35,  5.85it/s]

Step 1371/10000, Loss: 3.9099
Step 1372/10000, Loss: 3.9009


Training Progress:  14%|███████▉                                                  | 1374/10000 [28:11<24:41,  5.82it/s]

Step 1373/10000, Loss: 4.0055
Step 1374/10000, Loss: 3.9032


Training Progress:  14%|███████▉                                                  | 1376/10000 [28:11<24:16,  5.92it/s]

Step 1375/10000, Loss: 3.8012
Step 1376/10000, Loss: 3.6624


Training Progress:  14%|███████▉                                                  | 1378/10000 [28:11<24:20,  5.90it/s]

Step 1377/10000, Loss: 3.6379
Step 1378/10000, Loss: 3.7805


Training Progress:  14%|████████                                                  | 1380/10000 [28:12<24:38,  5.83it/s]

Step 1379/10000, Loss: 3.7449
Step 1380/10000, Loss: 3.7733


Training Progress:  14%|████████                                                  | 1382/10000 [28:12<24:21,  5.90it/s]

Step 1381/10000, Loss: 3.7808
Step 1382/10000, Loss: 3.5890


Training Progress:  14%|████████                                                  | 1384/10000 [28:12<24:05,  5.96it/s]

Step 1383/10000, Loss: 3.5650
Step 1384/10000, Loss: 4.1607


Training Progress:  14%|████████                                                  | 1386/10000 [28:13<24:30,  5.86it/s]

Step 1385/10000, Loss: 3.9903
Step 1386/10000, Loss: 3.7616


Training Progress:  14%|████████                                                  | 1388/10000 [28:13<24:35,  5.84it/s]

Step 1387/10000, Loss: 3.6994
Step 1388/10000, Loss: 3.8492


Training Progress:  14%|████████                                                  | 1390/10000 [28:13<24:32,  5.85it/s]

Step 1389/10000, Loss: 3.7618
Step 1390/10000, Loss: 3.5063
Step 1391/10000, Loss: 3.4982


Training Progress:  14%|███████▋                                               | 1391/10000 [28:30<12:21:45,  5.17s/it]


Checkpoint saved: checkpoints\best\checkpoint_step1391_loss3.4982_20250117_132018.pt

New best loss: 3.4982


Training Progress:  14%|███████▊                                                | 1393/10000 [28:31<6:22:46,  2.67s/it]

Step 1392/10000, Loss: 3.5077
Step 1393/10000, Loss: 4.0689


Training Progress:  14%|███████▊                                                | 1395/10000 [28:31<3:19:50,  1.39s/it]

Step 1394/10000, Loss: 3.9705
Step 1395/10000, Loss: 4.0549


Training Progress:  14%|███████▊                                                | 1397/10000 [28:32<1:50:45,  1.29it/s]

Step 1396/10000, Loss: 3.9342
Step 1397/10000, Loss: 3.9763


Training Progress:  14%|███████▊                                                | 1399/10000 [28:32<1:06:29,  2.16it/s]

Step 1398/10000, Loss: 3.9347
Step 1399/10000, Loss: 3.7550
Step 1400/10000, Loss: 3.4814


Training Progress:  14%|███████▋                                               | 1400/10000 [28:51<14:14:33,  5.96s/it]


Checkpoint saved: checkpoints\best\checkpoint_step1400_loss3.4814_20250117_132036.pt

New best loss: 3.4814


Training Progress:  14%|███████▊                                                | 1402/10000 [28:51<7:18:17,  3.06s/it]

Step 1401/10000, Loss: 3.5821
Step 1402/10000, Loss: 3.5574


Training Progress:  14%|███████▊                                                | 1404/10000 [28:52<3:46:53,  1.58s/it]

Step 1403/10000, Loss: 3.6457
Step 1404/10000, Loss: 3.6885


Training Progress:  14%|███████▊                                                | 1406/10000 [28:52<2:03:48,  1.16it/s]

Step 1405/10000, Loss: 3.8792
Step 1406/10000, Loss: 3.8577


Training Progress:  14%|███████▉                                                | 1408/10000 [28:52<1:13:21,  1.95it/s]

Step 1407/10000, Loss: 3.7521
Step 1408/10000, Loss: 3.7881


Training Progress:  14%|████████▏                                                 | 1410/10000 [28:53<48:26,  2.96it/s]

Step 1409/10000, Loss: 3.7161
Step 1410/10000, Loss: 3.8569


Training Progress:  14%|████████▏                                                 | 1412/10000 [28:53<35:58,  3.98it/s]

Step 1411/10000, Loss: 3.5568
Step 1412/10000, Loss: 3.6360


Training Progress:  14%|████████▏                                                 | 1414/10000 [28:53<30:19,  4.72it/s]

Step 1413/10000, Loss: 3.8832
Step 1414/10000, Loss: 3.5896


Training Progress:  14%|████████▏                                                 | 1416/10000 [28:54<27:05,  5.28it/s]

Step 1415/10000, Loss: 3.7761
Step 1416/10000, Loss: 3.4821


Training Progress:  14%|████████▏                                                 | 1418/10000 [28:54<25:25,  5.63it/s]

Step 1417/10000, Loss: 3.9335
Step 1418/10000, Loss: 4.0201


Training Progress:  14%|████████▏                                                 | 1420/10000 [28:54<24:49,  5.76it/s]

Step 1419/10000, Loss: 4.0249
Step 1420/10000, Loss: 4.0216


Training Progress:  14%|████████▏                                                 | 1422/10000 [28:55<24:40,  5.79it/s]

Step 1421/10000, Loss: 3.9030
Step 1422/10000, Loss: 3.9614


Training Progress:  14%|████████▎                                                 | 1424/10000 [28:55<24:13,  5.90it/s]

Step 1423/10000, Loss: 4.0051
Step 1424/10000, Loss: 3.8701


Training Progress:  14%|████████▎                                                 | 1426/10000 [28:55<24:20,  5.87it/s]

Step 1425/10000, Loss: 3.7276
Step 1426/10000, Loss: 3.6395


Training Progress:  14%|████████▎                                                 | 1428/10000 [28:56<24:12,  5.90it/s]

Step 1427/10000, Loss: 3.7977
Step 1428/10000, Loss: 3.9720


Training Progress:  14%|████████▎                                                 | 1430/10000 [28:56<24:32,  5.82it/s]

Step 1429/10000, Loss: 3.9571
Step 1430/10000, Loss: 3.8995


Training Progress:  14%|████████▎                                                 | 1432/10000 [28:56<24:24,  5.85it/s]

Step 1431/10000, Loss: 3.8015
Step 1432/10000, Loss: 3.9343


Training Progress:  14%|████████▎                                                 | 1434/10000 [28:57<24:23,  5.85it/s]

Step 1433/10000, Loss: 3.8880
Step 1434/10000, Loss: 3.6570


Training Progress:  14%|████████▎                                                 | 1436/10000 [28:57<24:02,  5.94it/s]

Step 1435/10000, Loss: 3.7379
Step 1436/10000, Loss: 3.9309


Training Progress:  14%|████████▎                                                 | 1438/10000 [28:57<24:34,  5.81it/s]

Step 1437/10000, Loss: 3.7418
Step 1438/10000, Loss: 3.7869


Training Progress:  14%|████████▎                                                 | 1440/10000 [28:58<24:20,  5.86it/s]

Step 1439/10000, Loss: 4.0893
Step 1440/10000, Loss: 3.8921


Training Progress:  14%|████████▎                                                 | 1442/10000 [28:58<23:53,  5.97it/s]

Step 1441/10000, Loss: 3.8253
Step 1442/10000, Loss: 3.5921


Training Progress:  14%|████████▍                                                 | 1444/10000 [28:58<24:35,  5.80it/s]

Step 1443/10000, Loss: 3.7212
Step 1444/10000, Loss: 3.6397


Training Progress:  14%|████████▍                                                 | 1446/10000 [28:59<23:54,  5.96it/s]

Step 1445/10000, Loss: 3.7995
Step 1446/10000, Loss: 3.7012


Training Progress:  14%|████████▍                                                 | 1448/10000 [28:59<24:24,  5.84it/s]

Step 1447/10000, Loss: 4.0069
Step 1448/10000, Loss: 3.9268


Training Progress:  14%|████████▍                                                 | 1450/10000 [28:59<24:00,  5.94it/s]

Step 1449/10000, Loss: 3.8179
Step 1450/10000, Loss: 3.9757


Training Progress:  15%|████████▍                                                 | 1452/10000 [29:00<24:32,  5.80it/s]

Step 1451/10000, Loss: 4.1627
Step 1452/10000, Loss: 3.9945


Training Progress:  15%|████████▍                                                 | 1454/10000 [29:00<24:15,  5.87it/s]

Step 1453/10000, Loss: 3.8135
Step 1454/10000, Loss: 3.8220


Training Progress:  15%|████████▍                                                 | 1456/10000 [29:00<23:57,  5.94it/s]

Step 1455/10000, Loss: 3.9549
Step 1456/10000, Loss: 3.9043


Training Progress:  15%|████████▍                                                 | 1458/10000 [29:01<24:21,  5.84it/s]

Step 1457/10000, Loss: 3.7350
Step 1458/10000, Loss: 3.5841


Training Progress:  15%|████████▍                                                 | 1460/10000 [29:01<24:08,  5.90it/s]

Step 1459/10000, Loss: 3.5694
Step 1460/10000, Loss: 3.6947


Training Progress:  15%|████████▍                                                 | 1462/10000 [29:02<24:17,  5.86it/s]

Step 1461/10000, Loss: 3.6600
Step 1462/10000, Loss: 3.6262


Training Progress:  15%|████████▍                                                 | 1464/10000 [29:02<24:13,  5.87it/s]

Step 1463/10000, Loss: 3.6365
Step 1464/10000, Loss: 3.5236


Training Progress:  15%|████████▌                                                 | 1466/10000 [29:02<24:31,  5.80it/s]

Step 1465/10000, Loss: 3.5726
Step 1466/10000, Loss: 4.1515


Training Progress:  15%|████████▌                                                 | 1468/10000 [29:03<24:12,  5.87it/s]

Step 1467/10000, Loss: 3.9300
Step 1468/10000, Loss: 3.6627


Training Progress:  15%|████████▌                                                 | 1470/10000 [29:03<23:49,  5.97it/s]

Step 1469/10000, Loss: 3.5679
Step 1470/10000, Loss: 3.7536


Training Progress:  15%|████████▌                                                 | 1472/10000 [29:03<24:33,  5.79it/s]

Step 1471/10000, Loss: 3.6966
Step 1472/10000, Loss: 3.4852


Training Progress:  15%|████████▌                                                 | 1474/10000 [29:04<23:58,  5.93it/s]

Step 1473/10000, Loss: 3.4869
Step 1474/10000, Loss: 3.5144


Training Progress:  15%|████████▌                                                 | 1476/10000 [29:04<24:25,  5.82it/s]

Step 1475/10000, Loss: 4.0193
Step 1476/10000, Loss: 3.9119


Training Progress:  15%|████████▌                                                 | 1478/10000 [29:04<24:11,  5.87it/s]

Step 1477/10000, Loss: 3.9600
Step 1478/10000, Loss: 3.8173


Training Progress:  15%|████████▌                                                 | 1480/10000 [29:05<24:29,  5.80it/s]

Step 1479/10000, Loss: 3.9031
Step 1480/10000, Loss: 3.9136


Training Progress:  15%|████████▌                                                 | 1482/10000 [29:05<24:09,  5.87it/s]

Step 1481/10000, Loss: 3.7779
Step 1482/10000, Loss: 3.5211


Training Progress:  15%|████████▌                                                 | 1484/10000 [29:05<24:26,  5.81it/s]

Step 1483/10000, Loss: 3.6203
Step 1484/10000, Loss: 3.5537


Training Progress:  15%|████████▌                                                 | 1486/10000 [29:06<23:52,  5.94it/s]

Step 1485/10000, Loss: 3.5850
Step 1486/10000, Loss: 3.6103


Training Progress:  15%|████████▋                                                 | 1488/10000 [29:06<24:31,  5.79it/s]

Step 1487/10000, Loss: 3.7923
Step 1488/10000, Loss: 3.7607


Training Progress:  15%|████████▋                                                 | 1490/10000 [29:06<24:05,  5.89it/s]

Step 1489/10000, Loss: 3.6651
Step 1490/10000, Loss: 3.7079


Training Progress:  15%|████████▋                                                 | 1492/10000 [29:07<24:18,  5.83it/s]

Step 1491/10000, Loss: 3.6401
Step 1492/10000, Loss: 3.7725
Step 1493/10000, Loss: 3.4677


Training Progress:  15%|████████▏                                              | 1493/10000 [29:21<10:47:16,  4.57s/it]


Checkpoint saved: checkpoints\best\checkpoint_step1493_loss3.4677_20250117_132111.pt

New best loss: 3.4677


Training Progress:  15%|████████▎                                               | 1495/10000 [29:22<5:38:21,  2.39s/it]

Step 1494/10000, Loss: 3.5474
Step 1495/10000, Loss: 3.7817


Training Progress:  15%|████████▍                                               | 1497/10000 [29:22<2:57:35,  1.25s/it]

Step 1496/10000, Loss: 3.4688
Step 1497/10000, Loss: 3.6595
Step 1498/10000, Loss: 3.3821


Training Progress:  15%|████████▏                                              | 1498/10000 [29:43<16:42:22,  7.07s/it]


Checkpoint saved: checkpoints\best\checkpoint_step1498_loss3.3821_20250117_132126.pt

New best loss: 3.3821


Training Progress:  15%|████████▍                                               | 1500/10000 [29:44<8:31:02,  3.61s/it]

Step 1499/10000, Loss: 3.8382
Step 1500/10000, Loss: 3.9435


Training Progress:  15%|████████▍                                               | 1502/10000 [29:44<4:22:41,  1.85s/it]

Step 1501/10000, Loss: 3.9627
Step 1502/10000, Loss: 3.9280


Training Progress:  15%|████████▍                                               | 1504/10000 [29:44<2:20:53,  1.01it/s]

Step 1503/10000, Loss: 3.8151
Step 1504/10000, Loss: 3.8326


Training Progress:  15%|████████▍                                               | 1506/10000 [29:45<1:21:25,  1.74it/s]

Step 1505/10000, Loss: 3.8645
Step 1506/10000, Loss: 3.7870


Training Progress:  15%|████████▋                                                 | 1508/10000 [29:45<52:31,  2.69it/s]

Step 1507/10000, Loss: 3.6826
Step 1508/10000, Loss: 3.5832


Training Progress:  15%|████████▊                                                 | 1510/10000 [29:45<38:00,  3.72it/s]

Step 1509/10000, Loss: 3.7413
Step 1510/10000, Loss: 3.9418


Training Progress:  15%|████████▊                                                 | 1512/10000 [29:46<30:20,  4.66it/s]

Step 1511/10000, Loss: 3.8936
Step 1512/10000, Loss: 3.8435


Training Progress:  15%|████████▊                                                 | 1514/10000 [29:46<27:30,  5.14it/s]

Step 1513/10000, Loss: 3.7479
Step 1514/10000, Loss: 3.8114


Training Progress:  15%|████████▊                                                 | 1516/10000 [29:46<25:48,  5.48it/s]

Step 1515/10000, Loss: 3.8047
Step 1516/10000, Loss: 3.6071


Training Progress:  15%|████████▊                                                 | 1518/10000 [29:47<25:02,  5.65it/s]

Step 1517/10000, Loss: 3.6945
Step 1518/10000, Loss: 3.8683


Training Progress:  15%|████████▊                                                 | 1520/10000 [29:47<24:27,  5.78it/s]

Step 1519/10000, Loss: 3.6813
Step 1520/10000, Loss: 3.7197


Training Progress:  15%|████████▊                                                 | 1522/10000 [29:47<24:01,  5.88it/s]

Step 1521/10000, Loss: 3.9830
Step 1522/10000, Loss: 3.8123


Training Progress:  15%|████████▊                                                 | 1524/10000 [29:48<24:30,  5.77it/s]

Step 1523/10000, Loss: 3.7813
Step 1524/10000, Loss: 3.5403


Training Progress:  15%|████████▊                                                 | 1526/10000 [29:48<23:44,  5.95it/s]

Step 1525/10000, Loss: 3.7091
Step 1526/10000, Loss: 3.6035


Training Progress:  15%|████████▊                                                 | 1528/10000 [29:48<24:17,  5.81it/s]

Step 1527/10000, Loss: 3.7087
Step 1528/10000, Loss: 3.5755


Training Progress:  15%|████████▊                                                 | 1530/10000 [29:49<23:39,  5.97it/s]

Step 1529/10000, Loss: 3.8266
Step 1530/10000, Loss: 3.8008


Training Progress:  15%|████████▉                                                 | 1532/10000 [29:49<24:01,  5.87it/s]

Step 1531/10000, Loss: 3.7858
Step 1532/10000, Loss: 3.9332


Training Progress:  15%|████████▉                                                 | 1534/10000 [29:49<24:03,  5.87it/s]

Step 1533/10000, Loss: 4.0804
Step 1534/10000, Loss: 3.8784


Training Progress:  15%|████████▉                                                 | 1536/10000 [29:50<24:13,  5.82it/s]

Step 1535/10000, Loss: 3.7316
Step 1536/10000, Loss: 3.7584


Training Progress:  15%|████████▉                                                 | 1538/10000 [29:50<23:56,  5.89it/s]

Step 1537/10000, Loss: 3.8894
Step 1538/10000, Loss: 3.8438


Training Progress:  15%|████████▉                                                 | 1540/10000 [29:50<23:39,  5.96it/s]

Step 1539/10000, Loss: 3.6515
Step 1540/10000, Loss: 3.5288


Training Progress:  15%|████████▉                                                 | 1542/10000 [29:51<24:01,  5.87it/s]

Step 1541/10000, Loss: 3.5454
Step 1542/10000, Loss: 3.6687


Training Progress:  15%|████████▉                                                 | 1544/10000 [29:51<24:16,  5.81it/s]

Step 1543/10000, Loss: 3.5880
Step 1544/10000, Loss: 3.5425


Training Progress:  15%|████████▉                                                 | 1546/10000 [29:52<24:01,  5.86it/s]

Step 1545/10000, Loss: 3.5290
Step 1546/10000, Loss: 3.4305


Training Progress:  15%|████████▉                                                 | 1548/10000 [29:52<24:13,  5.81it/s]

Step 1547/10000, Loss: 3.4734
Step 1548/10000, Loss: 3.9995


Training Progress:  16%|████████▉                                                 | 1550/10000 [29:52<24:11,  5.82it/s]

Step 1549/10000, Loss: 3.8292
Step 1550/10000, Loss: 3.6317


Training Progress:  16%|█████████                                                 | 1552/10000 [29:53<23:38,  5.96it/s]

Step 1551/10000, Loss: 3.5450
Step 1552/10000, Loss: 3.7077


Training Progress:  16%|█████████                                                 | 1553/10000 [29:53<23:53,  5.89it/s]

Step 1553/10000, Loss: 3.6262
Step 1554/10000, Loss: 3.3733


Training Progress:  16%|████████▌                                              | 1554/10000 [30:08<10:57:49,  4.67s/it]


Checkpoint saved: checkpoints\best\checkpoint_step1554_loss3.3733_20250117_132157.pt

New best loss: 3.3733


Training Progress:  16%|████████▋                                               | 1556/10000 [30:08<5:41:19,  2.43s/it]

Step 1555/10000, Loss: 3.3873
Step 1556/10000, Loss: 3.3881


Training Progress:  16%|████████▋                                               | 1558/10000 [30:09<2:59:37,  1.28s/it]

Step 1557/10000, Loss: 3.9281
Step 1558/10000, Loss: 3.9041


Training Progress:  16%|████████▋                                               | 1560/10000 [30:09<1:39:51,  1.41it/s]

Step 1559/10000, Loss: 3.9329
Step 1560/10000, Loss: 3.7822


Training Progress:  16%|████████▋                                               | 1562/10000 [30:09<1:01:32,  2.29it/s]

Step 1561/10000, Loss: 3.8172
Step 1562/10000, Loss: 3.7642


Training Progress:  16%|█████████                                                 | 1564/10000 [30:10<41:53,  3.36it/s]

Step 1563/10000, Loss: 3.6547
Step 1564/10000, Loss: 3.3946


Training Progress:  16%|█████████                                                 | 1566/10000 [30:10<32:36,  4.31it/s]

Step 1565/10000, Loss: 3.6024
Step 1566/10000, Loss: 3.5909


Training Progress:  16%|█████████                                                 | 1568/10000 [30:11<28:17,  4.97it/s]

Step 1567/10000, Loss: 3.6686
Step 1568/10000, Loss: 3.7081


Training Progress:  16%|█████████                                                 | 1570/10000 [30:11<26:20,  5.33it/s]

Step 1569/10000, Loss: 3.8191
Step 1570/10000, Loss: 3.7203


Training Progress:  16%|█████████                                                 | 1572/10000 [30:11<24:58,  5.62it/s]

Step 1571/10000, Loss: 3.5986
Step 1572/10000, Loss: 3.6687


Training Progress:  16%|█████████▏                                                | 1574/10000 [30:12<24:44,  5.68it/s]

Step 1573/10000, Loss: 3.6376
Step 1574/10000, Loss: 3.7660


Training Progress:  16%|█████████▏                                                | 1576/10000 [30:12<24:28,  5.73it/s]

Step 1575/10000, Loss: 3.4591
Step 1576/10000, Loss: 3.5478


Training Progress:  16%|█████████▏                                                | 1578/10000 [30:12<24:28,  5.74it/s]

Step 1577/10000, Loss: 3.8002
Step 1578/10000, Loss: 3.4607


Training Progress:  16%|█████████▏                                                | 1579/10000 [30:12<23:48,  5.89it/s]

Step 1579/10000, Loss: 3.6620
Step 1580/10000, Loss: 3.3514


Training Progress:  16%|████████▋                                              | 1580/10000 [30:30<12:20:43,  5.28s/it]


Checkpoint saved: checkpoints\best\checkpoint_step1580_loss3.3514_20250117_132216.pt

New best loss: 3.3514


Training Progress:  16%|████████▊                                               | 1582/10000 [30:30<6:25:07,  2.74s/it]

Step 1581/10000, Loss: 3.7576
Step 1582/10000, Loss: 3.8843


Training Progress:  16%|████████▊                                               | 1584/10000 [30:31<3:20:51,  1.43s/it]

Step 1583/10000, Loss: 3.8916
Step 1584/10000, Loss: 3.8633


Training Progress:  16%|████████▉                                               | 1586/10000 [30:31<1:50:10,  1.27it/s]

Step 1585/10000, Loss: 3.7618
Step 1586/10000, Loss: 3.7807


Training Progress:  16%|████████▉                                               | 1588/10000 [30:31<1:06:18,  2.11it/s]

Step 1587/10000, Loss: 3.7590
Step 1588/10000, Loss: 3.6963


Training Progress:  16%|█████████▏                                                | 1590/10000 [30:32<44:30,  3.15it/s]

Step 1589/10000, Loss: 3.6002
Step 1590/10000, Loss: 3.5351


Training Progress:  16%|█████████▏                                                | 1592/10000 [30:32<34:09,  4.10it/s]

Step 1591/10000, Loss: 3.6826
Step 1592/10000, Loss: 3.8813


Training Progress:  16%|█████████▏                                                | 1594/10000 [30:32<28:31,  4.91it/s]

Step 1593/10000, Loss: 3.8258
Step 1594/10000, Loss: 3.7561


Training Progress:  16%|█████████▎                                                | 1596/10000 [30:33<26:24,  5.30it/s]

Step 1595/10000, Loss: 3.6505
Step 1596/10000, Loss: 3.7330


Training Progress:  16%|█████████▎                                                | 1598/10000 [30:33<24:56,  5.61it/s]

Step 1597/10000, Loss: 3.7046
Step 1598/10000, Loss: 3.5237


Training Progress:  16%|█████████▎                                                | 1600/10000 [30:33<24:33,  5.70it/s]

Step 1599/10000, Loss: 3.6078
Step 1600/10000, Loss: 3.8070


Training Progress:  16%|█████████▎                                                | 1602/10000 [30:34<23:56,  5.85it/s]

Step 1601/10000, Loss: 3.5815
Step 1602/10000, Loss: 3.6249


Training Progress:  16%|█████████▎                                                | 1604/10000 [30:34<23:53,  5.86it/s]

Step 1603/10000, Loss: 3.8991
Step 1604/10000, Loss: 3.7162


Training Progress:  16%|█████████▎                                                | 1606/10000 [30:34<23:44,  5.89it/s]

Step 1605/10000, Loss: 3.6650
Step 1606/10000, Loss: 3.4851


Training Progress:  16%|█████████▎                                                | 1608/10000 [30:35<23:46,  5.88it/s]

Step 1607/10000, Loss: 3.6422
Step 1608/10000, Loss: 3.5633


Training Progress:  16%|█████████▎                                                | 1610/10000 [30:35<24:07,  5.80it/s]

Step 1609/10000, Loss: 3.6246
Step 1610/10000, Loss: 3.4873


Training Progress:  16%|█████████▎                                                | 1612/10000 [30:35<23:23,  5.98it/s]

Step 1611/10000, Loss: 3.7568
Step 1612/10000, Loss: 3.7326


Training Progress:  16%|█████████▎                                                | 1614/10000 [30:36<23:33,  5.93it/s]

Step 1613/10000, Loss: 3.6802
Step 1614/10000, Loss: 3.8439


Training Progress:  16%|█████████▎                                                | 1616/10000 [30:36<23:50,  5.86it/s]

Step 1615/10000, Loss: 3.9814
Step 1616/10000, Loss: 3.8209


Training Progress:  16%|█████████▍                                                | 1618/10000 [30:36<23:53,  5.85it/s]

Step 1617/10000, Loss: 3.6747
Step 1618/10000, Loss: 3.6311


Training Progress:  16%|█████████▍                                                | 1620/10000 [30:37<23:57,  5.83it/s]

Step 1619/10000, Loss: 3.7546
Step 1620/10000, Loss: 3.7171


Training Progress:  16%|█████████▍                                                | 1622/10000 [30:37<23:51,  5.85it/s]

Step 1621/10000, Loss: 3.6271
Step 1622/10000, Loss: 3.4956


Training Progress:  16%|█████████▍                                                | 1624/10000 [30:37<23:38,  5.90it/s]

Step 1623/10000, Loss: 3.4708
Step 1624/10000, Loss: 3.5865


Training Progress:  16%|█████████▍                                                | 1626/10000 [30:38<23:46,  5.87it/s]

Step 1625/10000, Loss: 3.4972
Step 1626/10000, Loss: 3.4805


Training Progress:  16%|█████████▍                                                | 1628/10000 [30:38<23:23,  5.97it/s]

Step 1627/10000, Loss: 3.4864
Step 1628/10000, Loss: 3.3889


Training Progress:  16%|█████████▍                                                | 1630/10000 [30:38<23:57,  5.82it/s]

Step 1629/10000, Loss: 3.3813
Step 1630/10000, Loss: 3.9050


Training Progress:  16%|█████████▍                                                | 1632/10000 [30:39<23:28,  5.94it/s]

Step 1631/10000, Loss: 3.6646
Step 1632/10000, Loss: 3.5498


Training Progress:  16%|█████████▍                                                | 1634/10000 [30:39<23:54,  5.83it/s]

Step 1633/10000, Loss: 3.4852
Step 1634/10000, Loss: 3.5783


Training Progress:  16%|█████████▍                                                | 1635/10000 [30:39<23:34,  5.91it/s]

Step 1635/10000, Loss: 3.5239
Step 1636/10000, Loss: 3.2947


Training Progress:  16%|████████▉                                              | 1636/10000 [30:56<11:44:13,  5.05s/it]


Checkpoint saved: checkpoints\best\checkpoint_step1636_loss3.2947_20250117_132243.pt

New best loss: 3.2947
Step 1637/10000, Loss: 3.2634


Training Progress:  16%|█████████                                              | 1637/10000 [31:16<22:37:42,  9.74s/it]


Checkpoint saved: checkpoints\best\checkpoint_step1637_loss3.2634_20250117_132300.pt

New best loss: 3.2634


Training Progress:  16%|█████████                                              | 1639/10000 [31:17<11:26:01,  4.92s/it]

Step 1638/10000, Loss: 3.2903
Step 1639/10000, Loss: 3.8220


Training Progress:  16%|█████████▏                                              | 1641/10000 [31:17<5:48:06,  2.50s/it]

Step 1640/10000, Loss: 3.7575
Step 1641/10000, Loss: 3.7378


Training Progress:  16%|█████████▏                                              | 1643/10000 [31:18<3:02:47,  1.31s/it]

Step 1642/10000, Loss: 3.6444
Step 1643/10000, Loss: 3.6957


Training Progress:  16%|█████████▏                                              | 1645/10000 [31:18<1:41:18,  1.37it/s]

Step 1644/10000, Loss: 3.7515
Step 1645/10000, Loss: 3.6212


Training Progress:  16%|█████████▏                                              | 1647/10000 [31:18<1:01:34,  2.26it/s]

Step 1646/10000, Loss: 3.3352
Step 1647/10000, Loss: 3.4256


Training Progress:  16%|█████████▌                                                | 1649/10000 [31:19<42:24,  3.28it/s]

Step 1648/10000, Loss: 3.3818
Step 1649/10000, Loss: 3.4577


Training Progress:  17%|█████████▌                                                | 1651/10000 [31:19<33:00,  4.22it/s]

Step 1650/10000, Loss: 3.5695
Step 1651/10000, Loss: 3.7859


Training Progress:  17%|█████████▌                                                | 1653/10000 [31:19<28:22,  4.90it/s]

Step 1652/10000, Loss: 3.7317
Step 1653/10000, Loss: 3.6273


Training Progress:  17%|█████████▌                                                | 1655/10000 [31:20<25:34,  5.44it/s]

Step 1654/10000, Loss: 3.6576
Step 1655/10000, Loss: 3.5224


Training Progress:  17%|█████████▌                                                | 1657/10000 [31:20<24:57,  5.57it/s]

Step 1656/10000, Loss: 3.6238
Step 1657/10000, Loss: 3.3223


Training Progress:  17%|█████████▌                                                | 1659/10000 [31:20<24:09,  5.75it/s]

Step 1658/10000, Loss: 3.4029
Step 1659/10000, Loss: 3.6661


Training Progress:  17%|█████████▋                                                | 1661/10000 [31:21<24:05,  5.77it/s]

Step 1660/10000, Loss: 3.4036
Step 1661/10000, Loss: 3.6476


Training Progress:  17%|█████████▋                                                | 1663/10000 [31:21<23:33,  5.90it/s]

Step 1662/10000, Loss: 3.3418
Step 1663/10000, Loss: 3.7637


Training Progress:  17%|█████████▋                                                | 1665/10000 [31:21<23:42,  5.86it/s]

Step 1664/10000, Loss: 3.7940
Step 1665/10000, Loss: 3.7889


Training Progress:  17%|█████████▋                                                | 1667/10000 [31:22<23:51,  5.82it/s]

Step 1666/10000, Loss: 3.7270
Step 1667/10000, Loss: 3.6146


Training Progress:  17%|█████████▋                                                | 1669/10000 [31:22<23:36,  5.88it/s]

Step 1668/10000, Loss: 3.7189
Step 1669/10000, Loss: 3.7053


Training Progress:  17%|█████████▋                                                | 1671/10000 [31:23<23:58,  5.79it/s]

Step 1670/10000, Loss: 3.6445
Step 1671/10000, Loss: 3.5315


Training Progress:  17%|█████████▋                                                | 1673/10000 [31:23<23:31,  5.90it/s]

Step 1672/10000, Loss: 3.4172
Step 1673/10000, Loss: 3.5555


Training Progress:  17%|█████████▋                                                | 1675/10000 [31:23<23:43,  5.85it/s]

Step 1674/10000, Loss: 3.7965
Step 1675/10000, Loss: 3.7501


Training Progress:  17%|█████████▋                                                | 1677/10000 [31:24<23:33,  5.89it/s]

Step 1676/10000, Loss: 3.6963
Step 1677/10000, Loss: 3.6038


Training Progress:  17%|█████████▋                                                | 1679/10000 [31:24<23:16,  5.96it/s]

Step 1678/10000, Loss: 3.6496
Step 1679/10000, Loss: 3.6461


Training Progress:  17%|█████████▋                                                | 1681/10000 [31:24<23:53,  5.80it/s]

Step 1680/10000, Loss: 3.4161
Step 1681/10000, Loss: 3.4916


Training Progress:  17%|█████████▊                                                | 1683/10000 [31:25<23:29,  5.90it/s]

Step 1682/10000, Loss: 3.6630
Step 1683/10000, Loss: 3.4654


Training Progress:  17%|█████████▊                                                | 1685/10000 [31:25<23:48,  5.82it/s]

Step 1684/10000, Loss: 3.5388
Step 1685/10000, Loss: 3.8169


Training Progress:  17%|█████████▊                                                | 1687/10000 [31:25<23:26,  5.91it/s]

Step 1686/10000, Loss: 3.6513
Step 1687/10000, Loss: 3.5874


Training Progress:  17%|█████████▊                                                | 1689/10000 [31:26<23:47,  5.82it/s]

Step 1688/10000, Loss: 3.3763
Step 1689/10000, Loss: 3.5452


Training Progress:  17%|█████████▊                                                | 1691/10000 [31:26<23:36,  5.87it/s]

Step 1690/10000, Loss: 3.4653
Step 1691/10000, Loss: 3.5489


Training Progress:  17%|█████████▊                                                | 1693/10000 [31:26<23:11,  5.97it/s]

Step 1692/10000, Loss: 3.4100
Step 1693/10000, Loss: 3.7144


Training Progress:  17%|█████████▊                                                | 1695/10000 [31:27<23:41,  5.84it/s]

Step 1694/10000, Loss: 3.6930
Step 1695/10000, Loss: 3.6348


Training Progress:  17%|█████████▊                                                | 1697/10000 [31:27<23:28,  5.89it/s]

Step 1696/10000, Loss: 3.7499
Step 1697/10000, Loss: 3.8842


Training Progress:  17%|█████████▊                                                | 1699/10000 [31:27<23:45,  5.82it/s]

Step 1698/10000, Loss: 3.7374
Step 1699/10000, Loss: 3.6067


Training Progress:  17%|█████████▊                                                | 1701/10000 [31:28<23:39,  5.85it/s]

Step 1700/10000, Loss: 3.5514
Step 1701/10000, Loss: 3.6977


Training Progress:  17%|█████████▉                                                | 1703/10000 [31:28<23:08,  5.97it/s]

Step 1702/10000, Loss: 3.6090
Step 1703/10000, Loss: 3.4710


Training Progress:  17%|█████████▉                                                | 1705/10000 [31:28<23:34,  5.86it/s]

Step 1704/10000, Loss: 3.3598
Step 1705/10000, Loss: 3.3712


Training Progress:  17%|█████████▉                                                | 1707/10000 [31:29<23:27,  5.89it/s]

Step 1706/10000, Loss: 3.5068
Step 1707/10000, Loss: 3.4922


Training Progress:  17%|█████████▉                                                | 1709/10000 [31:29<23:48,  5.81it/s]

Step 1708/10000, Loss: 3.4236
Step 1709/10000, Loss: 3.4532


Training Progress:  17%|█████████▉                                                | 1710/10000 [31:29<23:36,  5.85it/s]

Step 1710/10000, Loss: 3.2848
Step 1711/10000, Loss: 3.2604


Training Progress:  17%|█████████▍                                             | 1711/10000 [31:44<10:26:38,  4.54s/it]


Checkpoint saved: checkpoints\best\checkpoint_step1711_loss3.2604_20250117_132333.pt

New best loss: 3.2604


Training Progress:  17%|█████████▌                                              | 1713/10000 [31:44<5:26:11,  2.36s/it]

Step 1712/10000, Loss: 3.7895
Step 1713/10000, Loss: 3.6177


Training Progress:  17%|█████████▌                                              | 1715/10000 [31:45<2:51:17,  1.24s/it]

Step 1714/10000, Loss: 3.4038
Step 1715/10000, Loss: 3.3815


Training Progress:  17%|█████████▌                                              | 1717/10000 [31:45<1:36:10,  1.44it/s]

Step 1716/10000, Loss: 3.5143
Step 1717/10000, Loss: 3.4325
Step 1718/10000, Loss: 3.1702


Training Progress:  17%|█████████▍                                             | 1718/10000 [32:06<15:12:10,  6.61s/it]


Checkpoint saved: checkpoints\best\checkpoint_step1718_loss3.1702_20250117_132349.pt

New best loss: 3.1702
Step 1719/10000, Loss: 3.1454


Training Progress:  17%|█████████▍                                             | 1719/10000 [32:27<25:11:55, 10.95s/it]


Checkpoint saved: checkpoints\best\checkpoint_step1719_loss3.1454_20250117_132410.pt

New best loss: 3.1454


Training Progress:  17%|█████████▍                                             | 1721/10000 [32:27<12:40:10,  5.51s/it]

Step 1720/10000, Loss: 3.1737
Step 1721/10000, Loss: 3.7211


Training Progress:  17%|█████████▋                                              | 1723/10000 [32:28<6:24:38,  2.79s/it]

Step 1722/10000, Loss: 3.6203
Step 1723/10000, Loss: 3.6149


Training Progress:  17%|█████████▋                                              | 1725/10000 [32:28<3:20:16,  1.45s/it]

Step 1724/10000, Loss: 3.4957
Step 1725/10000, Loss: 3.5408


Training Progress:  17%|█████████▋                                              | 1727/10000 [32:28<1:50:22,  1.25it/s]

Step 1726/10000, Loss: 3.5303
Step 1727/10000, Loss: 3.4094


Training Progress:  17%|█████████▋                                              | 1729/10000 [32:29<1:05:54,  2.09it/s]

Step 1728/10000, Loss: 3.1555
Step 1729/10000, Loss: 3.2829


Training Progress:  17%|██████████                                                | 1731/10000 [32:29<44:11,  3.12it/s]

Step 1730/10000, Loss: 3.2524
Step 1731/10000, Loss: 3.3328


Training Progress:  17%|██████████                                                | 1733/10000 [32:29<33:44,  4.08it/s]

Step 1732/10000, Loss: 3.4369
Step 1733/10000, Loss: 3.5858


Training Progress:  17%|██████████                                                | 1735/10000 [32:30<28:20,  4.86it/s]

Step 1734/10000, Loss: 3.4915
Step 1735/10000, Loss: 3.3793


Training Progress:  17%|██████████                                                | 1737/10000 [32:30<26:12,  5.26it/s]

Step 1736/10000, Loss: 3.4091
Step 1737/10000, Loss: 3.3846


Training Progress:  17%|██████████                                                | 1739/10000 [32:30<24:55,  5.52it/s]

Step 1738/10000, Loss: 3.5692
Step 1739/10000, Loss: 3.3209


Training Progress:  17%|██████████                                                | 1741/10000 [32:31<23:37,  5.82it/s]

Step 1740/10000, Loss: 3.3752
Step 1741/10000, Loss: 3.5772


Training Progress:  17%|██████████                                                | 1743/10000 [32:31<23:51,  5.77it/s]

Step 1742/10000, Loss: 3.2481
Step 1743/10000, Loss: 3.4296
Step 1744/10000, Loss: 3.1282


Training Progress:  17%|█████████▌                                             | 1744/10000 [32:49<12:22:48,  5.40s/it]


Checkpoint saved: checkpoints\best\checkpoint_step1744_loss3.1282_20250117_132435.pt

New best loss: 3.1282


Training Progress:  17%|█████████▊                                              | 1746/10000 [32:49<6:25:13,  2.80s/it]

Step 1745/10000, Loss: 3.6049
Step 1746/10000, Loss: 3.6702


Training Progress:  17%|█████████▊                                              | 1748/10000 [32:50<3:20:05,  1.45s/it]

Step 1747/10000, Loss: 3.7323
Step 1748/10000, Loss: 3.7003


Training Progress:  18%|█████████▊                                              | 1750/10000 [32:50<1:49:48,  1.25it/s]

Step 1749/10000, Loss: 3.6136
Step 1750/10000, Loss: 3.6029


Training Progress:  18%|█████████▊                                              | 1752/10000 [32:50<1:05:37,  2.09it/s]

Step 1751/10000, Loss: 3.5758
Step 1752/10000, Loss: 3.4936


Training Progress:  18%|██████████▏                                               | 1754/10000 [32:51<44:24,  3.09it/s]

Step 1753/10000, Loss: 3.4282
Step 1754/10000, Loss: 3.3508


Training Progress:  18%|██████████▏                                               | 1756/10000 [32:51<33:35,  4.09it/s]

Step 1755/10000, Loss: 3.4635
Step 1756/10000, Loss: 3.6650


Training Progress:  18%|██████████▏                                               | 1758/10000 [32:51<28:33,  4.81it/s]

Step 1757/10000, Loss: 3.6035
Step 1758/10000, Loss: 3.5951


Training Progress:  18%|██████████▏                                               | 1760/10000 [32:52<25:54,  5.30it/s]

Step 1759/10000, Loss: 3.4712
Step 1760/10000, Loss: 3.5601


Training Progress:  18%|██████████▏                                               | 1762/10000 [32:52<24:35,  5.58it/s]

Step 1761/10000, Loss: 3.5428
Step 1762/10000, Loss: 3.3352


Training Progress:  18%|██████████▏                                               | 1764/10000 [32:52<23:52,  5.75it/s]

Step 1763/10000, Loss: 3.4202
Step 1764/10000, Loss: 3.5826


Training Progress:  18%|██████████▏                                               | 1766/10000 [32:53<23:30,  5.84it/s]

Step 1765/10000, Loss: 3.4078
Step 1766/10000, Loss: 3.4597


Training Progress:  18%|██████████▎                                               | 1768/10000 [32:53<23:37,  5.81it/s]

Step 1767/10000, Loss: 3.7095
Step 1768/10000, Loss: 3.5442


Training Progress:  18%|██████████▎                                               | 1770/10000 [32:53<23:11,  5.91it/s]

Step 1769/10000, Loss: 3.5090
Step 1770/10000, Loss: 3.3268


Training Progress:  18%|██████████▎                                               | 1772/10000 [32:54<23:43,  5.78it/s]

Step 1771/10000, Loss: 3.4866
Step 1772/10000, Loss: 3.4223


Training Progress:  18%|██████████▎                                               | 1774/10000 [32:54<23:14,  5.90it/s]

Step 1773/10000, Loss: 3.4766
Step 1774/10000, Loss: 3.3315


Training Progress:  18%|██████████▎                                               | 1776/10000 [32:54<23:19,  5.88it/s]

Step 1775/10000, Loss: 3.5249
Step 1776/10000, Loss: 3.5275


Training Progress:  18%|██████████▎                                               | 1778/10000 [32:55<23:20,  5.87it/s]

Step 1777/10000, Loss: 3.4989
Step 1778/10000, Loss: 3.6373


Training Progress:  18%|██████████▎                                               | 1780/10000 [32:55<23:00,  5.96it/s]

Step 1779/10000, Loss: 3.8047
Step 1780/10000, Loss: 3.6505


Training Progress:  18%|██████████▎                                               | 1782/10000 [32:55<23:06,  5.93it/s]

Step 1781/10000, Loss: 3.4988
Step 1782/10000, Loss: 3.4456


Training Progress:  18%|██████████▎                                               | 1784/10000 [32:56<23:04,  5.94it/s]

Step 1783/10000, Loss: 3.5751
Step 1784/10000, Loss: 3.5076


Training Progress:  18%|██████████▎                                               | 1786/10000 [32:56<23:33,  5.81it/s]

Step 1785/10000, Loss: 3.3470
Step 1786/10000, Loss: 3.2148


Training Progress:  18%|██████████▎                                               | 1788/10000 [32:56<23:36,  5.80it/s]

Step 1787/10000, Loss: 3.2377
Step 1788/10000, Loss: 3.3682


Training Progress:  18%|██████████▍                                               | 1790/10000 [32:57<23:29,  5.83it/s]

Step 1789/10000, Loss: 3.2967
Step 1790/10000, Loss: 3.3543


Training Progress:  18%|██████████▍                                               | 1792/10000 [32:57<23:15,  5.88it/s]

Step 1791/10000, Loss: 3.4539
Step 1792/10000, Loss: 3.3030


Training Progress:  18%|██████████▍                                               | 1794/10000 [32:57<23:09,  5.90it/s]

Step 1793/10000, Loss: 3.2392
Step 1794/10000, Loss: 3.7257


Training Progress:  18%|██████████▍                                               | 1796/10000 [32:58<23:22,  5.85it/s]

Step 1795/10000, Loss: 3.4690
Step 1796/10000, Loss: 3.2464


Training Progress:  18%|██████████▍                                               | 1798/10000 [32:58<22:59,  5.94it/s]

Step 1797/10000, Loss: 3.2315
Step 1798/10000, Loss: 3.3730


Training Progress:  18%|██████████▍                                               | 1800/10000 [32:58<22:55,  5.96it/s]

Step 1799/10000, Loss: 3.3357
Step 1800/10000, Loss: 3.1801


Training Progress:  18%|██████████▍                                               | 1802/10000 [32:59<23:01,  5.93it/s]

Step 1801/10000, Loss: 3.1676
Step 1802/10000, Loss: 3.1935


Training Progress:  18%|██████████▍                                               | 1804/10000 [32:59<23:25,  5.83it/s]

Step 1803/10000, Loss: 3.6438
Step 1804/10000, Loss: 3.5030


Training Progress:  18%|██████████▍                                               | 1806/10000 [32:59<23:21,  5.85it/s]

Step 1805/10000, Loss: 3.5411
Step 1806/10000, Loss: 3.3948


Training Progress:  18%|██████████▍                                               | 1808/10000 [33:00<23:27,  5.82it/s]

Step 1807/10000, Loss: 3.4446
Step 1808/10000, Loss: 3.4239


Training Progress:  18%|██████████▍                                               | 1809/10000 [33:00<22:56,  5.95it/s]

Step 1809/10000, Loss: 3.3353
Step 1810/10000, Loss: 3.1172


Training Progress:  18%|█████████▉                                             | 1810/10000 [33:15<10:18:26,  4.53s/it]


Checkpoint saved: checkpoints\best\checkpoint_step1810_loss3.1172_20250117_132504.pt

New best loss: 3.1172


Training Progress:  18%|██████████▏                                             | 1812/10000 [33:15<5:21:15,  2.35s/it]

Step 1811/10000, Loss: 3.2202
Step 1812/10000, Loss: 3.1874


Training Progress:  18%|██████████▏                                             | 1814/10000 [33:16<2:49:15,  1.24s/it]

Step 1813/10000, Loss: 3.2376
Step 1814/10000, Loss: 3.3235


Training Progress:  18%|██████████▏                                             | 1816/10000 [33:16<1:34:55,  1.44it/s]

Step 1815/10000, Loss: 3.4765
Step 1816/10000, Loss: 3.4273


Training Progress:  18%|██████████▌                                               | 1818/10000 [33:16<58:23,  2.34it/s]

Step 1817/10000, Loss: 3.3488
Step 1818/10000, Loss: 3.3366


Training Progress:  18%|██████████▌                                               | 1820/10000 [33:17<40:22,  3.38it/s]

Step 1819/10000, Loss: 3.2535
Step 1820/10000, Loss: 3.3850


Training Progress:  18%|██████████▌                                               | 1822/10000 [33:17<31:14,  4.36it/s]

Step 1821/10000, Loss: 3.1408
Step 1822/10000, Loss: 3.2050


Training Progress:  18%|██████████▌                                               | 1824/10000 [33:17<27:22,  4.98it/s]

Step 1823/10000, Loss: 3.4676
Step 1824/10000, Loss: 3.2107


Training Progress:  18%|██████████▌                                               | 1825/10000 [33:17<26:22,  5.16it/s]

Step 1825/10000, Loss: 3.3921
Step 1826/10000, Loss: 3.0911


Training Progress:  18%|██████████                                             | 1826/10000 [33:37<13:53:25,  6.12s/it]


Checkpoint saved: checkpoints\best\checkpoint_step1826_loss3.0911_20250117_132521.pt

New best loss: 3.0911


Training Progress:  18%|██████████▏                                             | 1828/10000 [33:38<7:08:16,  3.14s/it]

Step 1827/10000, Loss: 3.4725
Step 1828/10000, Loss: 3.5450


Training Progress:  18%|██████████▏                                             | 1830/10000 [33:38<3:41:56,  1.63s/it]

Step 1829/10000, Loss: 3.5671
Step 1830/10000, Loss: 3.5265


Training Progress:  18%|██████████▎                                             | 1832/10000 [33:39<2:00:37,  1.13it/s]

Step 1831/10000, Loss: 3.4245
Step 1832/10000, Loss: 3.4825


Training Progress:  18%|██████████▎                                             | 1834/10000 [33:39<1:10:48,  1.92it/s]

Step 1833/10000, Loss: 3.5039
Step 1834/10000, Loss: 3.4298


Training Progress:  18%|██████████▋                                               | 1836/10000 [33:39<46:30,  2.93it/s]

Step 1835/10000, Loss: 3.3427
Step 1836/10000, Loss: 3.2626


Training Progress:  18%|██████████▋                                               | 1838/10000 [33:40<34:40,  3.92it/s]

Step 1837/10000, Loss: 3.3321
Step 1838/10000, Loss: 3.5394


Training Progress:  18%|██████████▋                                               | 1840/10000 [33:40<29:00,  4.69it/s]

Step 1839/10000, Loss: 3.5108
Step 1840/10000, Loss: 3.4583


Training Progress:  18%|██████████▋                                               | 1842/10000 [33:40<25:33,  5.32it/s]

Step 1841/10000, Loss: 3.3569
Step 1842/10000, Loss: 3.4375


Training Progress:  18%|██████████▋                                               | 1844/10000 [33:41<24:28,  5.55it/s]

Step 1843/10000, Loss: 3.4873
Step 1844/10000, Loss: 3.2795


Training Progress:  18%|██████████▋                                               | 1846/10000 [33:41<23:55,  5.68it/s]

Step 1845/10000, Loss: 3.3388
Step 1846/10000, Loss: 3.4606


Training Progress:  18%|██████████▋                                               | 1848/10000 [33:41<23:37,  5.75it/s]

Step 1847/10000, Loss: 3.3258
Step 1848/10000, Loss: 3.3272


Training Progress:  18%|██████████▋                                               | 1850/10000 [33:42<23:05,  5.88it/s]

Step 1849/10000, Loss: 3.6236
Step 1850/10000, Loss: 3.4601


Training Progress:  19%|██████████▋                                               | 1852/10000 [33:42<23:21,  5.81it/s]

Step 1851/10000, Loss: 3.4172
Step 1852/10000, Loss: 3.2182


Training Progress:  19%|██████████▊                                               | 1854/10000 [33:42<23:16,  5.83it/s]

Step 1853/10000, Loss: 3.3836
Step 1854/10000, Loss: 3.3047


Training Progress:  19%|██████████▊                                               | 1856/10000 [33:43<23:18,  5.82it/s]

Step 1855/10000, Loss: 3.3774
Step 1856/10000, Loss: 3.2563


Training Progress:  19%|██████████▊                                               | 1858/10000 [33:43<23:02,  5.89it/s]

Step 1857/10000, Loss: 3.4865
Step 1858/10000, Loss: 3.4808


Training Progress:  19%|██████████▊                                               | 1860/10000 [33:43<23:01,  5.89it/s]

Step 1859/10000, Loss: 3.4375
Step 1860/10000, Loss: 3.5925


Training Progress:  19%|██████████▊                                               | 1862/10000 [33:44<23:18,  5.82it/s]

Step 1861/10000, Loss: 3.6615
Step 1862/10000, Loss: 3.5010


Training Progress:  19%|██████████▊                                               | 1864/10000 [33:44<22:53,  5.92it/s]

Step 1863/10000, Loss: 3.3665
Step 1864/10000, Loss: 3.3287


Training Progress:  19%|██████████▊                                               | 1866/10000 [33:44<23:25,  5.79it/s]

Step 1865/10000, Loss: 3.4924
Step 1866/10000, Loss: 3.4588


Training Progress:  19%|██████████▊                                               | 1868/10000 [33:45<23:00,  5.89it/s]

Step 1867/10000, Loss: 3.3173
Step 1868/10000, Loss: 3.1886


Training Progress:  19%|██████████▊                                               | 1870/10000 [33:45<23:19,  5.81it/s]

Step 1869/10000, Loss: 3.2375
Step 1870/10000, Loss: 3.3018


Training Progress:  19%|██████████▊                                               | 1872/10000 [33:45<23:07,  5.86it/s]

Step 1871/10000, Loss: 3.2214
Step 1872/10000, Loss: 3.1864


Training Progress:  19%|██████████▊                                               | 1874/10000 [33:46<22:40,  5.97it/s]

Step 1873/10000, Loss: 3.2180
Step 1874/10000, Loss: 3.1215


Training Progress:  19%|██████████▉                                               | 1876/10000 [33:46<23:09,  5.85it/s]

Step 1875/10000, Loss: 3.1681
Step 1876/10000, Loss: 3.7344


Training Progress:  19%|██████████▉                                               | 1878/10000 [33:47<22:59,  5.89it/s]

Step 1877/10000, Loss: 3.5316
Step 1878/10000, Loss: 3.2984


Training Progress:  19%|██████████▉                                               | 1880/10000 [33:47<23:14,  5.82it/s]

Step 1879/10000, Loss: 3.1847
Step 1880/10000, Loss: 3.3222


Training Progress:  19%|██████████▉                                               | 1881/10000 [33:47<22:49,  5.93it/s]

Step 1881/10000, Loss: 3.1835
Step 1882/10000, Loss: 3.0300


Training Progress:  19%|██████████▎                                            | 1882/10000 [34:03<11:09:27,  4.95s/it]


Checkpoint saved: checkpoints\best\checkpoint_step1882_loss3.0300_20250117_132551.pt

New best loss: 3.0300
Step 1883/10000, Loss: 2.9986


Training Progress:  19%|██████████▎                                            | 1883/10000 [34:25<22:18:16,  9.89s/it]


Checkpoint saved: checkpoints\best\checkpoint_step1883_loss2.9986_20250117_132608.pt

New best loss: 2.9986


Training Progress:  19%|██████████▎                                            | 1885/10000 [34:25<11:16:30,  5.00s/it]

Step 1884/10000, Loss: 3.0862
Step 1885/10000, Loss: 3.6287


Training Progress:  19%|██████████▌                                             | 1887/10000 [34:26<5:42:57,  2.54s/it]

Step 1886/10000, Loss: 3.5300
Step 1887/10000, Loss: 3.6052


Training Progress:  19%|██████████▌                                             | 1889/10000 [34:26<2:59:23,  1.33s/it]

Step 1888/10000, Loss: 3.4301
Step 1889/10000, Loss: 3.4326


Training Progress:  19%|██████████▌                                             | 1891/10000 [34:26<1:40:05,  1.35it/s]

Step 1890/10000, Loss: 3.3596
Step 1891/10000, Loss: 3.2248


Training Progress:  19%|██████████▌                                             | 1893/10000 [34:27<1:00:39,  2.23it/s]

Step 1892/10000, Loss: 3.0369
Step 1893/10000, Loss: 3.1284


Training Progress:  19%|██████████▉                                               | 1895/10000 [34:27<40:59,  3.29it/s]

Step 1894/10000, Loss: 3.1327
Step 1895/10000, Loss: 3.2136


Training Progress:  19%|███████████                                               | 1897/10000 [34:27<32:02,  4.21it/s]

Step 1896/10000, Loss: 3.2632
Step 1897/10000, Loss: 3.4889


Training Progress:  19%|███████████                                               | 1899/10000 [34:28<27:09,  4.97it/s]

Step 1898/10000, Loss: 3.3910
Step 1899/10000, Loss: 3.3476


Training Progress:  19%|███████████                                               | 1901/10000 [34:28<25:21,  5.32it/s]

Step 1900/10000, Loss: 3.3473
Step 1901/10000, Loss: 3.2597


Training Progress:  19%|███████████                                               | 1903/10000 [34:28<23:51,  5.66it/s]

Step 1902/10000, Loss: 3.3289
Step 1903/10000, Loss: 3.0650


Training Progress:  19%|███████████                                               | 1905/10000 [34:29<23:29,  5.74it/s]

Step 1904/10000, Loss: 3.1296
Step 1905/10000, Loss: 3.3584


Training Progress:  19%|███████████                                               | 1907/10000 [34:29<23:25,  5.76it/s]

Step 1906/10000, Loss: 3.0923
Step 1907/10000, Loss: 3.2547


Training Progress:  19%|███████████                                               | 1909/10000 [34:29<23:19,  5.78it/s]

Step 1908/10000, Loss: 3.0112
Step 1909/10000, Loss: 3.3785


Training Progress:  19%|███████████                                               | 1911/10000 [34:30<23:07,  5.83it/s]

Step 1910/10000, Loss: 3.4375
Step 1911/10000, Loss: 3.4855


Training Progress:  19%|███████████                                               | 1913/10000 [34:30<22:53,  5.89it/s]

Step 1912/10000, Loss: 3.4503
Step 1913/10000, Loss: 3.3217


Training Progress:  19%|███████████                                               | 1915/10000 [34:30<23:00,  5.85it/s]

Step 1914/10000, Loss: 3.4273
Step 1915/10000, Loss: 3.4262


Training Progress:  19%|███████████                                               | 1917/10000 [34:31<23:03,  5.84it/s]

Step 1916/10000, Loss: 3.3714
Step 1917/10000, Loss: 3.2645


Training Progress:  19%|███████████▏                                              | 1919/10000 [34:31<22:33,  5.97it/s]

Step 1918/10000, Loss: 3.1682
Step 1919/10000, Loss: 3.2581


Training Progress:  19%|███████████▏                                              | 1921/10000 [34:31<22:45,  5.92it/s]

Step 1920/10000, Loss: 3.4638
Step 1921/10000, Loss: 3.4357


Training Progress:  19%|███████████▏                                              | 1923/10000 [34:32<22:52,  5.88it/s]

Step 1922/10000, Loss: 3.3870
Step 1923/10000, Loss: 3.2678


Training Progress:  19%|███████████▏                                              | 1925/10000 [34:32<23:12,  5.80it/s]

Step 1924/10000, Loss: 3.3551
Step 1925/10000, Loss: 3.3913


Training Progress:  19%|███████████▏                                              | 1927/10000 [34:32<23:14,  5.79it/s]

Step 1926/10000, Loss: 3.1950
Step 1927/10000, Loss: 3.2748


Training Progress:  19%|███████████▏                                              | 1929/10000 [34:33<22:31,  5.97it/s]

Step 1928/10000, Loss: 3.4127
Step 1929/10000, Loss: 3.2649


Training Progress:  19%|███████████▏                                              | 1931/10000 [34:33<22:59,  5.85it/s]

Step 1930/10000, Loss: 3.2573
Step 1931/10000, Loss: 3.5138


Training Progress:  19%|███████████▏                                              | 1933/10000 [34:33<22:56,  5.86it/s]

Step 1932/10000, Loss: 3.3648
Step 1933/10000, Loss: 3.3322


Training Progress:  19%|███████████▏                                              | 1935/10000 [34:34<22:55,  5.87it/s]

Step 1934/10000, Loss: 3.1227
Step 1935/10000, Loss: 3.2992


Training Progress:  19%|███████████▏                                              | 1937/10000 [34:34<22:46,  5.90it/s]

Step 1936/10000, Loss: 3.1951
Step 1937/10000, Loss: 3.2654


Training Progress:  19%|███████████▏                                              | 1939/10000 [34:34<22:44,  5.91it/s]

Step 1938/10000, Loss: 3.1198
Step 1939/10000, Loss: 3.3650


Training Progress:  19%|███████████▎                                              | 1941/10000 [34:35<22:55,  5.86it/s]

Step 1940/10000, Loss: 3.3566
Step 1941/10000, Loss: 3.3068


Training Progress:  19%|███████████▎                                              | 1943/10000 [34:35<22:31,  5.96it/s]

Step 1942/10000, Loss: 3.4676
Step 1943/10000, Loss: 3.5363


Training Progress:  19%|███████████▎                                              | 1945/10000 [34:35<22:34,  5.95it/s]

Step 1944/10000, Loss: 3.4034
Step 1945/10000, Loss: 3.2610


Training Progress:  19%|███████████▎                                              | 1947/10000 [34:36<22:50,  5.88it/s]

Step 1946/10000, Loss: 3.1873
Step 1947/10000, Loss: 3.3441


Training Progress:  19%|███████████▎                                              | 1949/10000 [34:36<23:03,  5.82it/s]

Step 1948/10000, Loss: 3.2960
Step 1949/10000, Loss: 3.1030


Training Progress:  20%|███████████▎                                              | 1951/10000 [34:36<23:02,  5.82it/s]

Step 1950/10000, Loss: 3.0163
Step 1951/10000, Loss: 3.0969


Training Progress:  20%|███████████▎                                              | 1953/10000 [34:37<22:24,  5.99it/s]

Step 1952/10000, Loss: 3.1969
Step 1953/10000, Loss: 3.0964


Training Progress:  20%|███████████▎                                              | 1955/10000 [34:37<22:55,  5.85it/s]

Step 1954/10000, Loss: 3.0873
Step 1955/10000, Loss: 3.1409


Training Progress:  20%|███████████▎                                              | 1957/10000 [34:37<22:51,  5.86it/s]

Step 1956/10000, Loss: 3.0297
Step 1957/10000, Loss: 3.0060


Training Progress:  20%|███████████▎                                              | 1959/10000 [34:38<23:00,  5.83it/s]

Step 1958/10000, Loss: 3.5011
Step 1959/10000, Loss: 3.2703


Training Progress:  20%|███████████▎                                              | 1961/10000 [34:38<22:43,  5.89it/s]

Step 1960/10000, Loss: 3.1352
Step 1961/10000, Loss: 3.1148


Training Progress:  20%|███████████▍                                              | 1963/10000 [34:38<22:58,  5.83it/s]

Step 1962/10000, Loss: 3.2467
Step 1963/10000, Loss: 3.1644
Step 1964/10000, Loss: 2.9946


Training Progress:  20%|██████████▊                                            | 1964/10000 [34:54<10:53:05,  4.88s/it]


Checkpoint saved: checkpoints\best\checkpoint_step1964_loss2.9946_20250117_132642.pt

New best loss: 2.9946
Step 1965/10000, Loss: 2.9600


Training Progress:  20%|██████████▊                                            | 1965/10000 [35:15<21:09:23,  9.48s/it]


Checkpoint saved: checkpoints\best\checkpoint_step1965_loss2.9600_20250117_132659.pt

New best loss: 2.9600


Training Progress:  20%|██████████▊                                            | 1967/10000 [35:15<10:41:49,  4.79s/it]

Step 1966/10000, Loss: 2.9852
Step 1967/10000, Loss: 3.4349


Training Progress:  20%|███████████                                             | 1969/10000 [35:16<5:25:35,  2.43s/it]

Step 1968/10000, Loss: 3.3039
Step 1969/10000, Loss: 3.3370


Training Progress:  20%|███████████                                             | 1971/10000 [35:16<2:51:24,  1.28s/it]

Step 1970/10000, Loss: 3.2419
Step 1971/10000, Loss: 3.3394


Training Progress:  20%|███████████                                             | 1973/10000 [35:16<1:35:16,  1.40it/s]

Step 1972/10000, Loss: 3.3507
Step 1973/10000, Loss: 3.2658


Training Progress:  20%|███████████▍                                              | 1975/10000 [35:17<58:34,  2.28it/s]

Step 1974/10000, Loss: 3.0210
Step 1975/10000, Loss: 3.0845


Training Progress:  20%|███████████▍                                              | 1977/10000 [35:17<39:50,  3.36it/s]

Step 1976/10000, Loss: 3.0164
Step 1977/10000, Loss: 3.0558


Training Progress:  20%|███████████▍                                              | 1979/10000 [35:17<31:33,  4.24it/s]

Step 1978/10000, Loss: 3.1558
Step 1979/10000, Loss: 3.3289


Training Progress:  20%|███████████▍                                              | 1981/10000 [35:18<26:59,  4.95it/s]

Step 1980/10000, Loss: 3.2778
Step 1981/10000, Loss: 3.2282


Training Progress:  20%|███████████▌                                              | 1983/10000 [35:18<24:44,  5.40it/s]

Step 1982/10000, Loss: 3.2187
Step 1983/10000, Loss: 3.1755


Training Progress:  20%|███████████▌                                              | 1985/10000 [35:18<23:53,  5.59it/s]

Step 1984/10000, Loss: 3.3092
Step 1985/10000, Loss: 3.0272


Training Progress:  20%|███████████▌                                              | 1987/10000 [35:19<23:12,  5.75it/s]

Step 1986/10000, Loss: 3.0993
Step 1987/10000, Loss: 3.3003


Training Progress:  20%|███████████▌                                              | 1989/10000 [35:19<22:52,  5.84it/s]

Step 1988/10000, Loss: 3.0427
Step 1989/10000, Loss: 3.1718
Step 1990/10000, Loss: 2.9272


Training Progress:  20%|██████████▉                                            | 1990/10000 [35:35<11:13:12,  5.04s/it]


Checkpoint saved: checkpoints\best\checkpoint_step1990_loss2.9272_20250117_132723.pt

New best loss: 2.9272


Training Progress:  20%|███████████▏                                            | 1992/10000 [35:36<5:50:06,  2.62s/it]

Step 1991/10000, Loss: 3.2692
Step 1992/10000, Loss: 3.3323


Training Progress:  20%|███████████▏                                            | 1994/10000 [35:36<3:03:22,  1.37s/it]

Step 1993/10000, Loss: 3.3744
Step 1994/10000, Loss: 3.3219


Training Progress:  20%|███████████▏                                            | 1996/10000 [35:37<1:41:20,  1.32it/s]

Step 1995/10000, Loss: 3.2161
Step 1996/10000, Loss: 3.2838


Training Progress:  20%|███████████▏                                            | 1998/10000 [35:37<1:01:24,  2.17it/s]

Step 1997/10000, Loss: 3.2709
Step 1998/10000, Loss: 3.2364


Training Progress:  20%|███████████▌                                              | 1999/10000 [35:37<49:40,  2.68it/s]

Step 1999/10000, Loss: 3.1848
Step 2000/10000, Loss: 3.1056


Training Progress:  20%|███████████                                            | 2000/10000 [35:57<13:35:26,  6.12s/it]


Checkpoint saved: checkpoints\checkpoint_step2000_loss3.1056_20250117_132741.pt


Training Progress:  20%|███████████▏                                            | 2002/10000 [35:57<6:59:50,  3.15s/it]

Step 2001/10000, Loss: 3.1931
Step 2002/10000, Loss: 3.3700


Training Progress:  20%|███████████▏                                            | 2004/10000 [35:58<3:37:08,  1.63s/it]

Step 2003/10000, Loss: 3.3468
Step 2004/10000, Loss: 3.2769


Training Progress:  20%|███████████▏                                            | 2006/10000 [35:58<1:58:13,  1.13it/s]

Step 2005/10000, Loss: 3.1617
Step 2006/10000, Loss: 3.2255


Training Progress:  20%|███████████▏                                            | 2008/10000 [35:58<1:09:25,  1.92it/s]

Step 2007/10000, Loss: 3.2277
Step 2008/10000, Loss: 3.0435


Training Progress:  20%|███████████▋                                              | 2010/10000 [35:59<45:41,  2.91it/s]

Step 2009/10000, Loss: 3.1348
Step 2010/10000, Loss: 3.3101


Training Progress:  20%|███████████▋                                              | 2012/10000 [35:59<33:40,  3.95it/s]

Step 2011/10000, Loss: 3.2018
Step 2012/10000, Loss: 3.2188


Training Progress:  20%|███████████▋                                              | 2014/10000 [35:59<28:01,  4.75it/s]

Step 2013/10000, Loss: 3.4116
Step 2014/10000, Loss: 3.2448


Training Progress:  20%|███████████▋                                              | 2016/10000 [36:00<25:20,  5.25it/s]

Step 2015/10000, Loss: 3.2135
Step 2016/10000, Loss: 3.0284


Training Progress:  20%|███████████▋                                              | 2018/10000 [36:00<24:14,  5.49it/s]

Step 2017/10000, Loss: 3.2214
Step 2018/10000, Loss: 3.1372


Training Progress:  20%|███████████▋                                              | 2020/10000 [36:00<23:27,  5.67it/s]

Step 2019/10000, Loss: 3.1801
Step 2020/10000, Loss: 3.0748


Training Progress:  20%|███████████▋                                              | 2022/10000 [36:01<23:12,  5.73it/s]

Step 2021/10000, Loss: 3.2448
Step 2022/10000, Loss: 3.2280


Training Progress:  20%|███████████▋                                              | 2024/10000 [36:01<22:46,  5.84it/s]

Step 2023/10000, Loss: 3.2023
Step 2024/10000, Loss: 3.3500


Training Progress:  20%|███████████▊                                              | 2026/10000 [36:01<22:49,  5.82it/s]

Step 2025/10000, Loss: 3.4428
Step 2026/10000, Loss: 3.3098


Training Progress:  20%|███████████▊                                              | 2028/10000 [36:02<22:59,  5.78it/s]

Step 2027/10000, Loss: 3.1635
Step 2028/10000, Loss: 3.1184


Training Progress:  20%|███████████▊                                              | 2030/10000 [36:02<22:21,  5.94it/s]

Step 2029/10000, Loss: 3.2321
Step 2030/10000, Loss: 3.1808


Training Progress:  20%|███████████▊                                              | 2031/10000 [36:02<22:44,  5.84it/s]

Step 2031/10000, Loss: 3.0016
Step 2032/10000, Loss: 2.8518


Training Progress:  20%|███████████▏                                           | 2032/10000 [36:18<10:40:06,  4.82s/it]


Checkpoint saved: checkpoints\best\checkpoint_step2032_loss2.8518_20250117_132806.pt

New best loss: 2.8518


Training Progress:  20%|███████████▍                                            | 2034/10000 [36:19<5:32:06,  2.50s/it]

Step 2033/10000, Loss: 2.9395
Step 2034/10000, Loss: 3.0301


Training Progress:  20%|███████████▍                                            | 2036/10000 [36:19<2:54:07,  1.31s/it]

Step 2035/10000, Loss: 2.9081
Step 2036/10000, Loss: 2.9347


Training Progress:  20%|███████████▍                                            | 2038/10000 [36:19<1:36:30,  1.38it/s]

Step 2037/10000, Loss: 2.9816
Step 2038/10000, Loss: 2.8883


Training Progress:  20%|███████████▊                                              | 2040/10000 [36:20<58:48,  2.26it/s]

Step 2039/10000, Loss: 2.8868
Step 2040/10000, Loss: 3.3626


Training Progress:  20%|███████████▊                                              | 2042/10000 [36:20<40:09,  3.30it/s]

Step 2041/10000, Loss: 3.1550
Step 2042/10000, Loss: 2.9630


Training Progress:  20%|███████████▊                                              | 2044/10000 [36:20<31:25,  4.22it/s]

Step 2043/10000, Loss: 2.9434
Step 2044/10000, Loss: 3.0634


Training Progress:  20%|███████████▊                                              | 2045/10000 [36:20<28:25,  4.66it/s]

Step 2045/10000, Loss: 2.9982
Step 2046/10000, Loss: 2.8253


Training Progress:  20%|███████████▎                                           | 2046/10000 [36:39<12:53:09,  5.83s/it]


Checkpoint saved: checkpoints\best\checkpoint_step2046_loss2.8253_20250117_132824.pt

New best loss: 2.8253
Step 2047/10000, Loss: 2.8139


Training Progress:  20%|███████████▎                                           | 2047/10000 [37:01<23:12:14, 10.50s/it]


Checkpoint saved: checkpoints\best\checkpoint_step2047_loss2.8139_20250117_132844.pt

New best loss: 2.8139


Training Progress:  20%|███████████▎                                           | 2049/10000 [37:01<11:40:33,  5.29s/it]

Step 2048/10000, Loss: 2.8777
Step 2049/10000, Loss: 3.3252


Training Progress:  21%|███████████▍                                            | 2051/10000 [37:02<5:54:47,  2.68s/it]

Step 2050/10000, Loss: 3.2602
Step 2051/10000, Loss: 3.2905


Training Progress:  21%|███████████▍                                            | 2053/10000 [37:02<3:04:50,  1.40s/it]

Step 2052/10000, Loss: 3.1413
Step 2053/10000, Loss: 3.1887


Training Progress:  21%|███████████▌                                            | 2055/10000 [37:02<1:42:12,  1.30it/s]

Step 2054/10000, Loss: 3.1776
Step 2055/10000, Loss: 3.0760


Training Progress:  21%|███████████▌                                            | 2057/10000 [37:03<1:01:45,  2.14it/s]

Step 2056/10000, Loss: 2.8802
Step 2057/10000, Loss: 3.0263


Training Progress:  21%|███████████▉                                              | 2059/10000 [37:03<41:33,  3.18it/s]

Step 2058/10000, Loss: 3.0084
Step 2059/10000, Loss: 3.0471


Training Progress:  21%|███████████▉                                              | 2061/10000 [37:03<31:41,  4.17it/s]

Step 2060/10000, Loss: 3.1221
Step 2061/10000, Loss: 3.2315


Training Progress:  21%|███████████▉                                              | 2063/10000 [37:04<27:09,  4.87it/s]

Step 2062/10000, Loss: 3.1360
Step 2063/10000, Loss: 3.0673


Training Progress:  21%|███████████▉                                              | 2065/10000 [37:04<24:47,  5.34it/s]

Step 2064/10000, Loss: 3.1266
Step 2065/10000, Loss: 3.0719


Training Progress:  21%|███████████▉                                              | 2067/10000 [37:04<23:50,  5.55it/s]

Step 2066/10000, Loss: 3.2305
Step 2067/10000, Loss: 2.9411


Training Progress:  21%|████████████                                              | 2069/10000 [37:05<22:42,  5.82it/s]

Step 2068/10000, Loss: 2.9848
Step 2069/10000, Loss: 3.1981


Training Progress:  21%|████████████                                              | 2071/10000 [37:05<22:59,  5.75it/s]

Step 2070/10000, Loss: 2.9415
Step 2071/10000, Loss: 3.0913


Training Progress:  21%|████████████                                              | 2073/10000 [37:05<22:37,  5.84it/s]

Step 2072/10000, Loss: 2.8431
Step 2073/10000, Loss: 3.1580


Training Progress:  21%|████████████                                              | 2075/10000 [37:06<22:45,  5.80it/s]

Step 2074/10000, Loss: 3.2374
Step 2075/10000, Loss: 3.2643


Training Progress:  21%|████████████                                              | 2077/10000 [37:06<22:33,  5.85it/s]

Step 2076/10000, Loss: 3.2478
Step 2077/10000, Loss: 3.1611


Training Progress:  21%|████████████                                              | 2079/10000 [37:06<22:10,  5.95it/s]

Step 2078/10000, Loss: 3.1784
Step 2079/10000, Loss: 3.1615


Training Progress:  21%|████████████                                              | 2081/10000 [37:07<22:29,  5.87it/s]

Step 2080/10000, Loss: 3.1140
Step 2081/10000, Loss: 3.0956


Training Progress:  21%|████████████                                              | 2083/10000 [37:07<22:06,  5.97it/s]

Step 2082/10000, Loss: 2.9718
Step 2083/10000, Loss: 3.0808


Training Progress:  21%|████████████                                              | 2085/10000 [37:07<22:40,  5.82it/s]

Step 2084/10000, Loss: 3.2715
Step 2085/10000, Loss: 3.2504


Training Progress:  21%|████████████                                              | 2087/10000 [37:08<22:07,  5.96it/s]

Step 2086/10000, Loss: 3.1894
Step 2087/10000, Loss: 3.1300


Training Progress:  21%|████████████                                              | 2089/10000 [37:08<22:04,  5.97it/s]

Step 2088/10000, Loss: 3.1626
Step 2089/10000, Loss: 3.1394


Training Progress:  21%|████████████▏                                             | 2091/10000 [37:09<22:25,  5.88it/s]

Step 2090/10000, Loss: 2.9483
Step 2091/10000, Loss: 3.0190


Training Progress:  21%|████████████▏                                             | 2093/10000 [37:09<22:42,  5.80it/s]

Step 2092/10000, Loss: 3.1773
Step 2093/10000, Loss: 3.0569


Training Progress:  21%|████████████▏                                             | 2095/10000 [37:09<22:27,  5.87it/s]

Step 2094/10000, Loss: 3.1092
Step 2095/10000, Loss: 3.3436


Training Progress:  21%|████████████▏                                             | 2097/10000 [37:10<22:06,  5.96it/s]

Step 2096/10000, Loss: 3.2137
Step 2097/10000, Loss: 3.1580


Training Progress:  21%|████████████▏                                             | 2099/10000 [37:10<22:32,  5.84it/s]

Step 2098/10000, Loss: 2.9255
Step 2099/10000, Loss: 3.1071


Training Progress:  21%|████████████▏                                             | 2101/10000 [37:10<22:47,  5.78it/s]

Step 2100/10000, Loss: 3.0050
Step 2101/10000, Loss: 3.0082


Training Progress:  21%|████████████▏                                             | 2103/10000 [37:11<22:34,  5.83it/s]

Step 2102/10000, Loss: 2.9196
Step 2103/10000, Loss: 3.1625


Training Progress:  21%|████████████▏                                             | 2105/10000 [37:11<22:22,  5.88it/s]

Step 2104/10000, Loss: 3.1719
Step 2105/10000, Loss: 3.1629


Training Progress:  21%|████████████▏                                             | 2107/10000 [37:11<22:41,  5.80it/s]

Step 2106/10000, Loss: 3.3265
Step 2107/10000, Loss: 3.3766


Training Progress:  21%|████████████▏                                             | 2109/10000 [37:12<22:21,  5.88it/s]

Step 2108/10000, Loss: 3.2048
Step 2109/10000, Loss: 3.0569


Training Progress:  21%|████████████▏                                             | 2111/10000 [37:12<22:37,  5.81it/s]

Step 2110/10000, Loss: 3.0156
Step 2111/10000, Loss: 3.1548


Training Progress:  21%|████████████▎                                             | 2113/10000 [37:12<22:36,  5.81it/s]

Step 2112/10000, Loss: 3.1199
Step 2113/10000, Loss: 2.9780


Training Progress:  21%|████████████▎                                             | 2115/10000 [37:13<22:00,  5.97it/s]

Step 2114/10000, Loss: 2.8144
Step 2115/10000, Loss: 2.9108


Training Progress:  21%|████████████▎                                             | 2117/10000 [37:13<22:28,  5.85it/s]

Step 2116/10000, Loss: 2.9862
Step 2117/10000, Loss: 2.8479


Training Progress:  21%|████████████▎                                             | 2119/10000 [37:13<22:13,  5.91it/s]

Step 2118/10000, Loss: 2.8410
Step 2119/10000, Loss: 2.8572
Step 2120/10000, Loss: 2.7560


Training Progress:  21%|███████████▋                                           | 2120/10000 [37:29<10:19:14,  4.72s/it]


Checkpoint saved: checkpoints\best\checkpoint_step2120_loss2.7560_20250117_132917.pt

New best loss: 2.7560


Training Progress:  21%|███████████▉                                            | 2122/10000 [37:29<5:20:43,  2.44s/it]

Step 2121/10000, Loss: 2.7698
Step 2122/10000, Loss: 3.2112


Training Progress:  21%|███████████▉                                            | 2124/10000 [37:29<2:48:06,  1.28s/it]

Step 2123/10000, Loss: 3.0259
Step 2124/10000, Loss: 2.8786


Training Progress:  21%|███████████▉                                            | 2126/10000 [37:30<1:34:00,  1.40it/s]

Step 2125/10000, Loss: 2.8581
Step 2126/10000, Loss: 2.9550


Training Progress:  21%|███████████▉                                            | 2127/10000 [37:30<1:12:44,  1.80it/s]

Step 2127/10000, Loss: 2.8805
Step 2128/10000, Loss: 2.7129


Training Progress:  21%|███████████▋                                           | 2128/10000 [37:49<13:10:17,  6.02s/it]


Checkpoint saved: checkpoints\best\checkpoint_step2128_loss2.7129_20250117_132934.pt

New best loss: 2.7129
Step 2129/10000, Loss: 2.6663


Training Progress:  21%|███████████▋                                           | 2129/10000 [38:11<23:52:38, 10.92s/it]


Checkpoint saved: checkpoints\best\checkpoint_step2129_loss2.6663_20250117_132953.pt

New best loss: 2.6663


Training Progress:  21%|███████████▋                                           | 2131/10000 [38:12<12:00:23,  5.49s/it]

Step 2130/10000, Loss: 2.7321
Step 2131/10000, Loss: 3.1480


Training Progress:  21%|███████████▉                                            | 2133/10000 [38:12<6:04:23,  2.78s/it]

Step 2132/10000, Loss: 3.0943
Step 2133/10000, Loss: 3.1504


Training Progress:  21%|███████████▉                                            | 2135/10000 [38:12<3:09:32,  1.45s/it]

Step 2134/10000, Loss: 3.0365
Step 2135/10000, Loss: 3.1148


Training Progress:  21%|███████████▉                                            | 2137/10000 [38:13<1:44:01,  1.26it/s]

Step 2136/10000, Loss: 3.1045
Step 2137/10000, Loss: 2.9874


Training Progress:  21%|███████████▉                                            | 2139/10000 [38:13<1:02:43,  2.09it/s]

Step 2138/10000, Loss: 2.7836
Step 2139/10000, Loss: 2.8686


Training Progress:  21%|████████████▍                                             | 2141/10000 [38:13<42:01,  3.12it/s]

Step 2140/10000, Loss: 2.8554
Step 2141/10000, Loss: 2.9126


Training Progress:  21%|████████████▍                                             | 2143/10000 [38:14<32:04,  4.08it/s]

Step 2142/10000, Loss: 3.0189
Step 2143/10000, Loss: 3.1812


Training Progress:  21%|████████████▍                                             | 2145/10000 [38:14<26:59,  4.85it/s]

Step 2144/10000, Loss: 3.1430
Step 2145/10000, Loss: 3.0888


Training Progress:  21%|████████████▍                                             | 2147/10000 [38:14<24:50,  5.27it/s]

Step 2146/10000, Loss: 3.0495
Step 2147/10000, Loss: 2.9826


Training Progress:  21%|████████████▍                                             | 2149/10000 [38:15<23:35,  5.55it/s]

Step 2148/10000, Loss: 3.1591
Step 2149/10000, Loss: 2.8904


Training Progress:  22%|████████████▍                                             | 2151/10000 [38:15<22:47,  5.74it/s]

Step 2150/10000, Loss: 2.9718
Step 2151/10000, Loss: 3.1890


Training Progress:  22%|████████████▍                                             | 2153/10000 [38:15<22:51,  5.72it/s]

Step 2152/10000, Loss: 2.9692
Step 2153/10000, Loss: 3.0829


Training Progress:  22%|████████████▍                                             | 2155/10000 [38:16<22:30,  5.81it/s]

Step 2154/10000, Loss: 2.8336
Step 2155/10000, Loss: 3.1598


Training Progress:  22%|████████████▌                                             | 2157/10000 [38:16<22:36,  5.78it/s]

Step 2156/10000, Loss: 3.2014
Step 2157/10000, Loss: 3.1920


Training Progress:  22%|████████████▌                                             | 2159/10000 [38:17<22:20,  5.85it/s]

Step 2158/10000, Loss: 3.1720
Step 2159/10000, Loss: 3.1136


Training Progress:  22%|████████████▌                                             | 2161/10000 [38:17<21:55,  5.96it/s]

Step 2160/10000, Loss: 3.1581
Step 2161/10000, Loss: 3.1326


Training Progress:  22%|████████████▌                                             | 2163/10000 [38:17<22:24,  5.83it/s]

Step 2162/10000, Loss: 3.0675
Step 2163/10000, Loss: 3.0257


Training Progress:  22%|████████████▌                                             | 2165/10000 [38:18<21:57,  5.95it/s]

Step 2164/10000, Loss: 2.9040
Step 2165/10000, Loss: 2.9972


Training Progress:  22%|████████████▌                                             | 2167/10000 [38:18<22:22,  5.83it/s]

Step 2166/10000, Loss: 3.2123
Step 2167/10000, Loss: 3.1584


Training Progress:  22%|████████████▌                                             | 2169/10000 [38:18<22:36,  5.77it/s]

Step 2168/10000, Loss: 3.1111
Step 2169/10000, Loss: 3.0237


Training Progress:  22%|████████████▌                                             | 2171/10000 [38:19<22:05,  5.91it/s]

Step 2170/10000, Loss: 3.0668
Step 2171/10000, Loss: 3.0749


Training Progress:  22%|████████████▌                                             | 2173/10000 [38:19<22:12,  5.87it/s]

Step 2172/10000, Loss: 2.8982
Step 2173/10000, Loss: 2.9563


Training Progress:  22%|████████████▌                                             | 2175/10000 [38:19<22:27,  5.81it/s]

Step 2174/10000, Loss: 3.0505
Step 2175/10000, Loss: 2.9246


Training Progress:  22%|████████████▋                                             | 2177/10000 [38:20<22:14,  5.86it/s]

Step 2176/10000, Loss: 2.9751
Step 2177/10000, Loss: 3.2200


Training Progress:  22%|████████████▋                                             | 2179/10000 [38:20<22:06,  5.90it/s]

Step 2178/10000, Loss: 3.1191
Step 2179/10000, Loss: 3.0993


Training Progress:  22%|████████████▋                                             | 2181/10000 [38:20<22:14,  5.86it/s]

Step 2180/10000, Loss: 2.8833
Step 2181/10000, Loss: 3.0694


Training Progress:  22%|████████████▋                                             | 2183/10000 [38:21<22:17,  5.84it/s]

Step 2182/10000, Loss: 2.9609
Step 2183/10000, Loss: 2.9926


Training Progress:  22%|████████████▋                                             | 2185/10000 [38:21<22:20,  5.83it/s]

Step 2184/10000, Loss: 2.8594
Step 2185/10000, Loss: 3.0174


Training Progress:  22%|████████████▋                                             | 2187/10000 [38:21<22:00,  5.92it/s]

Step 2186/10000, Loss: 3.0150
Step 2187/10000, Loss: 3.0277


Training Progress:  22%|████████████▋                                             | 2189/10000 [38:22<22:04,  5.90it/s]

Step 2188/10000, Loss: 3.2102
Step 2189/10000, Loss: 3.2811


Training Progress:  22%|████████████▋                                             | 2191/10000 [38:22<22:16,  5.84it/s]

Step 2190/10000, Loss: 3.1827
Step 2191/10000, Loss: 3.1037


Training Progress:  22%|████████████▋                                             | 2193/10000 [38:22<22:19,  5.83it/s]

Step 2192/10000, Loss: 2.9902
Step 2193/10000, Loss: 3.1188


Training Progress:  22%|████████████▋                                             | 2195/10000 [38:23<21:45,  5.98it/s]

Step 2194/10000, Loss: 3.0590
Step 2195/10000, Loss: 2.8742


Training Progress:  22%|████████████▋                                             | 2197/10000 [38:23<21:59,  5.92it/s]

Step 2196/10000, Loss: 2.7248
Step 2197/10000, Loss: 2.8526


Training Progress:  22%|████████████▊                                             | 2199/10000 [38:23<22:09,  5.87it/s]

Step 2198/10000, Loss: 3.0001
Step 2199/10000, Loss: 2.8298


Training Progress:  22%|████████████▊                                             | 2201/10000 [38:24<21:56,  5.93it/s]

Step 2200/10000, Loss: 2.8601
Step 2201/10000, Loss: 2.9005


Training Progress:  22%|████████████▊                                             | 2203/10000 [38:24<22:20,  5.81it/s]

Step 2202/10000, Loss: 2.7754
Step 2203/10000, Loss: 2.7759


Training Progress:  22%|████████████▊                                             | 2205/10000 [38:24<22:18,  5.82it/s]

Step 2204/10000, Loss: 3.1858
Step 2205/10000, Loss: 2.9422


Training Progress:  22%|████████████▊                                             | 2207/10000 [38:25<22:14,  5.84it/s]

Step 2206/10000, Loss: 2.7793
Step 2207/10000, Loss: 2.7495


Training Progress:  22%|████████████▊                                             | 2209/10000 [38:25<22:11,  5.85it/s]

Step 2208/10000, Loss: 2.8845
Step 2209/10000, Loss: 2.8689


Training Progress:  22%|████████████▊                                             | 2210/10000 [38:25<22:15,  5.83it/s]

Step 2210/10000, Loss: 2.6820
Step 2211/10000, Loss: 2.6443


Training Progress:  22%|████████████▏                                          | 2211/10000 [38:41<10:16:14,  4.75s/it]


Checkpoint saved: checkpoints\best\checkpoint_step2211_loss2.6443_20250117_133029.pt

New best loss: 2.6443


Training Progress:  22%|████████████▍                                           | 2213/10000 [38:41<5:17:41,  2.45s/it]

Step 2212/10000, Loss: 2.6839
Step 2213/10000, Loss: 3.0844


Training Progress:  22%|████████████▍                                           | 2215/10000 [38:41<2:46:32,  1.28s/it]

Step 2214/10000, Loss: 3.0000
Step 2215/10000, Loss: 2.9980


Training Progress:  22%|████████████▍                                           | 2217/10000 [38:42<1:33:03,  1.39it/s]

Step 2216/10000, Loss: 2.8895
Step 2217/10000, Loss: 2.9489


Training Progress:  22%|████████████▊                                             | 2219/10000 [38:42<56:32,  2.29it/s]

Step 2218/10000, Loss: 2.9605
Step 2219/10000, Loss: 2.8588


Training Progress:  22%|████████████▉                                             | 2221/10000 [38:42<39:16,  3.30it/s]

Step 2220/10000, Loss: 2.7098
Step 2221/10000, Loss: 2.8119


Training Progress:  22%|████████████▉                                             | 2223/10000 [38:43<30:21,  4.27it/s]

Step 2222/10000, Loss: 2.8095
Step 2223/10000, Loss: 2.8208


Training Progress:  22%|████████████▉                                             | 2225/10000 [38:43<26:23,  4.91it/s]

Step 2224/10000, Loss: 2.8659
Step 2225/10000, Loss: 3.0151


Training Progress:  22%|████████████▉                                             | 2227/10000 [38:44<24:04,  5.38it/s]

Step 2226/10000, Loss: 3.0044
Step 2227/10000, Loss: 2.9409


Training Progress:  22%|████████████▉                                             | 2229/10000 [38:44<23:16,  5.57it/s]

Step 2228/10000, Loss: 2.9513
Step 2229/10000, Loss: 2.9066


Training Progress:  22%|████████████▉                                             | 2231/10000 [38:44<22:23,  5.78it/s]

Step 2230/10000, Loss: 3.0787
Step 2231/10000, Loss: 2.7868


Training Progress:  22%|████████████▉                                             | 2233/10000 [38:45<21:56,  5.90it/s]

Step 2232/10000, Loss: 2.8308
Step 2233/10000, Loss: 3.0651


Training Progress:  22%|████████████▉                                             | 2235/10000 [38:45<22:16,  5.81it/s]

Step 2234/10000, Loss: 2.8329
Step 2235/10000, Loss: 2.9870


Training Progress:  22%|████████████▉                                             | 2237/10000 [38:45<22:17,  5.81it/s]

Step 2236/10000, Loss: 2.7645
Step 2237/10000, Loss: 3.1283


Training Progress:  22%|████████████▉                                             | 2239/10000 [38:46<22:06,  5.85it/s]

Step 2238/10000, Loss: 3.1432
Step 2239/10000, Loss: 3.1702


Training Progress:  22%|████████████▉                                             | 2241/10000 [38:46<21:51,  5.92it/s]

Step 2240/10000, Loss: 3.1524
Step 2241/10000, Loss: 3.0556


Training Progress:  22%|█████████████                                             | 2243/10000 [38:46<22:06,  5.85it/s]

Step 2242/10000, Loss: 3.0923
Step 2243/10000, Loss: 3.0688


Training Progress:  22%|█████████████                                             | 2245/10000 [38:47<22:13,  5.82it/s]

Step 2244/10000, Loss: 3.0527
Step 2245/10000, Loss: 3.0269


Training Progress:  22%|█████████████                                             | 2247/10000 [38:47<22:11,  5.82it/s]

Step 2246/10000, Loss: 2.8677
Step 2247/10000, Loss: 2.9476


Training Progress:  22%|█████████████                                             | 2249/10000 [38:47<21:49,  5.92it/s]

Step 2248/10000, Loss: 3.1278
Step 2249/10000, Loss: 3.1070


Training Progress:  23%|█████████████                                             | 2251/10000 [38:48<22:13,  5.81it/s]

Step 2250/10000, Loss: 3.0667
Step 2251/10000, Loss: 2.9819


Training Progress:  23%|█████████████                                             | 2253/10000 [38:48<21:57,  5.88it/s]

Step 2252/10000, Loss: 3.0387
Step 2253/10000, Loss: 3.0193


Training Progress:  23%|█████████████                                             | 2255/10000 [38:48<22:09,  5.83it/s]

Step 2254/10000, Loss: 2.8459
Step 2255/10000, Loss: 2.9188


Training Progress:  23%|█████████████                                             | 2257/10000 [38:49<22:04,  5.84it/s]

Step 2256/10000, Loss: 3.0495
Step 2257/10000, Loss: 2.9103


Training Progress:  23%|█████████████                                             | 2259/10000 [38:49<22:12,  5.81it/s]

Step 2258/10000, Loss: 2.9539
Step 2259/10000, Loss: 3.1905


Training Progress:  23%|█████████████                                             | 2261/10000 [38:49<21:46,  5.92it/s]

Step 2260/10000, Loss: 3.0146
Step 2261/10000, Loss: 2.9718


Training Progress:  23%|█████████████▏                                            | 2263/10000 [38:50<21:38,  5.96it/s]

Step 2262/10000, Loss: 2.7828
Step 2263/10000, Loss: 2.9873


Training Progress:  23%|█████████████▏                                            | 2265/10000 [38:50<22:04,  5.84it/s]

Step 2264/10000, Loss: 2.9024
Step 2265/10000, Loss: 2.9644


Training Progress:  23%|█████████████▏                                            | 2267/10000 [38:50<22:05,  5.83it/s]

Step 2266/10000, Loss: 2.8377
Step 2267/10000, Loss: 3.0385


Training Progress:  23%|█████████████▏                                            | 2269/10000 [38:51<21:56,  5.87it/s]

Step 2268/10000, Loss: 3.0168
Step 2269/10000, Loss: 2.9982


Training Progress:  23%|█████████████▏                                            | 2271/10000 [38:51<21:50,  5.90it/s]

Step 2270/10000, Loss: 3.0875
Step 2271/10000, Loss: 3.1527


Training Progress:  23%|█████████████▏                                            | 2273/10000 [38:51<21:54,  5.88it/s]

Step 2272/10000, Loss: 3.0619
Step 2273/10000, Loss: 2.9889


Training Progress:  23%|█████████████▏                                            | 2275/10000 [38:52<21:52,  5.88it/s]

Step 2274/10000, Loss: 2.9364
Step 2275/10000, Loss: 3.1183


Training Progress:  23%|█████████████▏                                            | 2277/10000 [38:52<22:08,  5.81it/s]

Step 2276/10000, Loss: 3.1487
Step 2277/10000, Loss: 2.9508


Training Progress:  23%|█████████████▏                                            | 2279/10000 [38:52<22:04,  5.83it/s]

Step 2278/10000, Loss: 2.7566
Step 2279/10000, Loss: 2.8273


Training Progress:  23%|█████████████▏                                            | 2281/10000 [38:53<21:54,  5.87it/s]

Step 2280/10000, Loss: 2.8787
Step 2281/10000, Loss: 2.7383


Training Progress:  23%|█████████████▏                                            | 2283/10000 [38:53<21:51,  5.88it/s]

Step 2282/10000, Loss: 2.7872
Step 2283/10000, Loss: 2.8388


Training Progress:  23%|█████████████▎                                            | 2285/10000 [38:53<21:52,  5.88it/s]

Step 2284/10000, Loss: 2.8073
Step 2285/10000, Loss: 2.8266


Training Progress:  23%|█████████████▎                                            | 2287/10000 [38:54<22:01,  5.84it/s]

Step 2286/10000, Loss: 3.2471
Step 2287/10000, Loss: 3.0343


Training Progress:  23%|█████████████▎                                            | 2289/10000 [38:54<22:10,  5.80it/s]

Step 2288/10000, Loss: 2.8584
Step 2289/10000, Loss: 2.7964


Training Progress:  23%|█████████████▎                                            | 2291/10000 [38:54<22:01,  5.83it/s]

Step 2290/10000, Loss: 2.8832
Step 2291/10000, Loss: 2.7957
Step 2292/10000, Loss: 2.6441


Training Progress:  23%|████████████▊                                           | 2292/10000 [39:09<9:32:46,  4.46s/it]


Checkpoint saved: checkpoints\best\checkpoint_step2292_loss2.6441_20250117_133058.pt

New best loss: 2.6441
Step 2293/10000, Loss: 2.6278


Training Progress:  23%|████████████▌                                          | 2293/10000 [39:34<22:45:27, 10.63s/it]


Checkpoint saved: checkpoints\best\checkpoint_step2293_loss2.6278_20250117_133113.pt

New best loss: 2.6278


Training Progress:  23%|████████████▌                                          | 2295/10000 [39:35<11:26:49,  5.35s/it]

Step 2294/10000, Loss: 2.6750
Step 2295/10000, Loss: 3.0664


Training Progress:  23%|████████████▊                                           | 2297/10000 [39:35<5:47:24,  2.71s/it]

Step 2296/10000, Loss: 3.0057
Step 2297/10000, Loss: 3.0191


Training Progress:  23%|████████████▊                                           | 2299/10000 [39:35<3:01:07,  1.41s/it]

Step 2298/10000, Loss: 2.8918
Step 2299/10000, Loss: 2.9240


Training Progress:  23%|████████████▉                                           | 2301/10000 [39:36<1:39:59,  1.28it/s]

Step 2300/10000, Loss: 2.9276
Step 2301/10000, Loss: 2.7768
Step 2302/10000, Loss: 2.6163


Training Progress:  23%|████████████▋                                          | 2302/10000 [39:50<10:40:11,  4.99s/it]


Checkpoint saved: checkpoints\best\checkpoint_step2302_loss2.6163_20250117_133140.pt

New best loss: 2.6163


Training Progress:  23%|████████████▉                                           | 2304/10000 [39:51<5:31:00,  2.58s/it]

Step 2303/10000, Loss: 2.7100
Step 2304/10000, Loss: 2.7071


Training Progress:  23%|████████████▉                                           | 2306/10000 [39:51<2:52:58,  1.35s/it]

Step 2305/10000, Loss: 2.7402
Step 2306/10000, Loss: 2.8121


Training Progress:  23%|████████████▉                                           | 2308/10000 [39:52<1:36:12,  1.33it/s]

Step 2307/10000, Loss: 2.9757
Step 2308/10000, Loss: 2.9130


Training Progress:  23%|█████████████▍                                            | 2310/10000 [39:52<58:07,  2.20it/s]

Step 2309/10000, Loss: 2.8736
Step 2310/10000, Loss: 2.8468


Training Progress:  23%|█████████████▍                                            | 2312/10000 [39:52<39:47,  3.22it/s]

Step 2311/10000, Loss: 2.7821
Step 2312/10000, Loss: 2.9659


Training Progress:  23%|█████████████▍                                            | 2314/10000 [39:53<30:30,  4.20it/s]

Step 2313/10000, Loss: 2.6892
Step 2314/10000, Loss: 2.7610


Training Progress:  23%|█████████████▍                                            | 2316/10000 [39:53<25:58,  4.93it/s]

Step 2315/10000, Loss: 3.0225
Step 2316/10000, Loss: 2.7312


Training Progress:  23%|█████████████▍                                            | 2318/10000 [39:53<24:03,  5.32it/s]

Step 2317/10000, Loss: 2.8994
Step 2318/10000, Loss: 2.6761


Training Progress:  23%|█████████████▍                                            | 2320/10000 [39:54<22:45,  5.63it/s]

Step 2319/10000, Loss: 3.0119
Step 2320/10000, Loss: 3.0727


Training Progress:  23%|█████████████▍                                            | 2322/10000 [39:54<22:06,  5.79it/s]

Step 2321/10000, Loss: 3.1353
Step 2322/10000, Loss: 3.0931


Training Progress:  23%|█████████████▍                                            | 2324/10000 [39:54<21:55,  5.83it/s]

Step 2323/10000, Loss: 3.0243
Step 2324/10000, Loss: 3.0769


Training Progress:  23%|█████████████▍                                            | 2326/10000 [39:55<21:41,  5.90it/s]

Step 2325/10000, Loss: 3.0309
Step 2326/10000, Loss: 2.9474


Training Progress:  23%|█████████████▌                                            | 2328/10000 [39:55<22:01,  5.81it/s]

Step 2327/10000, Loss: 2.9771
Step 2328/10000, Loss: 2.8649


Training Progress:  23%|█████████████▌                                            | 2330/10000 [39:55<21:55,  5.83it/s]

Step 2329/10000, Loss: 2.9715
Step 2330/10000, Loss: 3.1873


Training Progress:  23%|█████████████▌                                            | 2332/10000 [39:56<21:54,  5.83it/s]

Step 2331/10000, Loss: 3.1374
Step 2332/10000, Loss: 3.0506


Training Progress:  23%|█████████████▌                                            | 2334/10000 [39:56<21:41,  5.89it/s]

Step 2333/10000, Loss: 2.9420
Step 2334/10000, Loss: 3.0002


Training Progress:  23%|█████████████▌                                            | 2336/10000 [39:56<21:46,  5.87it/s]

Step 2335/10000, Loss: 2.9967
Step 2336/10000, Loss: 2.8217


Training Progress:  23%|█████████████▌                                            | 2338/10000 [39:57<21:48,  5.85it/s]

Step 2337/10000, Loss: 2.8922
Step 2338/10000, Loss: 3.0685


Training Progress:  23%|█████████████▌                                            | 2340/10000 [39:57<21:27,  5.95it/s]

Step 2339/10000, Loss: 2.9102
Step 2340/10000, Loss: 2.9380


Training Progress:  23%|█████████████▌                                            | 2342/10000 [39:57<21:30,  5.93it/s]

Step 2341/10000, Loss: 3.1725
Step 2342/10000, Loss: 2.9944


Training Progress:  23%|█████████████▌                                            | 2344/10000 [39:58<21:28,  5.94it/s]

Step 2343/10000, Loss: 2.9761
Step 2344/10000, Loss: 2.7598


Training Progress:  23%|█████████████▌                                            | 2346/10000 [39:58<21:53,  5.83it/s]

Step 2345/10000, Loss: 2.9358
Step 2346/10000, Loss: 2.8632


Training Progress:  23%|█████████████▌                                            | 2348/10000 [39:58<21:47,  5.85it/s]

Step 2347/10000, Loss: 2.9148
Step 2348/10000, Loss: 2.8018


Training Progress:  24%|█████████████▋                                            | 2350/10000 [39:59<21:52,  5.83it/s]

Step 2349/10000, Loss: 2.9690
Step 2350/10000, Loss: 2.9545


Training Progress:  24%|█████████████▋                                            | 2352/10000 [39:59<21:25,  5.95it/s]

Step 2351/10000, Loss: 2.9530
Step 2352/10000, Loss: 3.0815


Training Progress:  24%|█████████████▋                                            | 2354/10000 [39:59<21:22,  5.96it/s]

Step 2353/10000, Loss: 3.1246
Step 2354/10000, Loss: 3.0045


Training Progress:  24%|█████████████▋                                            | 2356/10000 [40:00<21:42,  5.87it/s]

Step 2355/10000, Loss: 2.8855
Step 2356/10000, Loss: 2.8218


Training Progress:  24%|█████████████▋                                            | 2358/10000 [40:00<21:57,  5.80it/s]

Step 2357/10000, Loss: 2.9600
Step 2358/10000, Loss: 2.9399


Training Progress:  24%|█████████████▋                                            | 2360/10000 [40:00<21:48,  5.84it/s]

Step 2359/10000, Loss: 2.8001
Step 2360/10000, Loss: 2.6669


Training Progress:  24%|█████████████▋                                            | 2362/10000 [40:01<21:21,  5.96it/s]

Step 2361/10000, Loss: 2.7891
Step 2362/10000, Loss: 2.9167


Training Progress:  24%|█████████████▋                                            | 2364/10000 [40:01<21:58,  5.79it/s]

Step 2363/10000, Loss: 2.7275
Step 2364/10000, Loss: 2.7243


Training Progress:  24%|█████████████▋                                            | 2366/10000 [40:01<21:59,  5.78it/s]

Step 2365/10000, Loss: 2.7519
Step 2366/10000, Loss: 2.6697


Training Progress:  24%|█████████████▋                                            | 2368/10000 [40:02<21:45,  5.85it/s]

Step 2367/10000, Loss: 2.6950
Step 2368/10000, Loss: 3.1095


Training Progress:  24%|█████████████▋                                            | 2370/10000 [40:02<21:29,  5.92it/s]

Step 2369/10000, Loss: 2.9712
Step 2370/10000, Loss: 2.7974


Training Progress:  24%|█████████████▊                                            | 2372/10000 [40:03<21:55,  5.80it/s]

Step 2371/10000, Loss: 2.7802
Step 2372/10000, Loss: 2.9117


Training Progress:  24%|█████████████▊                                            | 2374/10000 [40:03<21:38,  5.87it/s]

Step 2373/10000, Loss: 2.7958
Step 2374/10000, Loss: 2.6512


Training Progress:  24%|█████████████▊                                            | 2376/10000 [40:03<22:00,  5.77it/s]

Step 2375/10000, Loss: 2.6385
Step 2376/10000, Loss: 2.7151


Training Progress:  24%|█████████████▊                                            | 2378/10000 [40:04<21:36,  5.88it/s]

Step 2377/10000, Loss: 3.0608
Step 2378/10000, Loss: 3.0055


Training Progress:  24%|█████████████▊                                            | 2380/10000 [40:04<21:53,  5.80it/s]

Step 2379/10000, Loss: 3.0187
Step 2380/10000, Loss: 2.8835


Training Progress:  24%|█████████████▊                                            | 2382/10000 [40:04<22:01,  5.76it/s]

Step 2381/10000, Loss: 2.9173
Step 2382/10000, Loss: 2.9330


Training Progress:  24%|█████████████▊                                            | 2384/10000 [40:05<21:43,  5.84it/s]

Step 2383/10000, Loss: 2.8701
Step 2384/10000, Loss: 2.6741


Training Progress:  24%|█████████████▊                                            | 2386/10000 [40:05<21:32,  5.89it/s]

Step 2385/10000, Loss: 2.7875
Step 2386/10000, Loss: 2.7304


Training Progress:  24%|█████████████▊                                            | 2388/10000 [40:05<21:49,  5.81it/s]

Step 2387/10000, Loss: 2.7500
Step 2388/10000, Loss: 2.8088


Training Progress:  24%|█████████████▊                                            | 2390/10000 [40:06<21:36,  5.87it/s]

Step 2389/10000, Loss: 2.9428
Step 2390/10000, Loss: 2.8739


Training Progress:  24%|█████████████▊                                            | 2392/10000 [40:06<21:51,  5.80it/s]

Step 2391/10000, Loss: 2.8073
Step 2392/10000, Loss: 2.8419


Training Progress:  24%|█████████████▉                                            | 2394/10000 [40:06<21:29,  5.90it/s]

Step 2393/10000, Loss: 2.7733
Step 2394/10000, Loss: 2.9444


Training Progress:  24%|█████████████▉                                            | 2396/10000 [40:07<21:48,  5.81it/s]

Step 2395/10000, Loss: 2.6867
Step 2396/10000, Loss: 2.7009


Training Progress:  24%|█████████████▉                                            | 2398/10000 [40:07<21:35,  5.87it/s]

Step 2397/10000, Loss: 2.9318
Step 2398/10000, Loss: 2.7020


Training Progress:  24%|█████████████▉                                            | 2400/10000 [40:07<21:44,  5.83it/s]

Step 2399/10000, Loss: 2.8549
Step 2400/10000, Loss: 2.6640


Training Progress:  24%|█████████████▉                                            | 2402/10000 [40:08<21:36,  5.86it/s]

Step 2401/10000, Loss: 2.9861
Step 2402/10000, Loss: 3.0348


Training Progress:  24%|█████████████▉                                            | 2404/10000 [40:08<21:50,  5.80it/s]

Step 2403/10000, Loss: 3.0373
Step 2404/10000, Loss: 3.0113


Training Progress:  24%|█████████████▉                                            | 2406/10000 [40:08<21:25,  5.91it/s]

Step 2405/10000, Loss: 2.9526
Step 2406/10000, Loss: 2.9966


Training Progress:  24%|█████████████▉                                            | 2408/10000 [40:09<21:50,  5.79it/s]

Step 2407/10000, Loss: 3.0299
Step 2408/10000, Loss: 2.9440


Training Progress:  24%|█████████████▉                                            | 2410/10000 [40:09<21:36,  5.85it/s]

Step 2409/10000, Loss: 2.9196
Step 2410/10000, Loss: 2.7925


Training Progress:  24%|█████████████▉                                            | 2412/10000 [40:09<21:35,  5.86it/s]

Step 2411/10000, Loss: 2.8765
Step 2412/10000, Loss: 3.0761


Training Progress:  24%|██████████████                                            | 2414/10000 [40:10<21:33,  5.86it/s]

Step 2413/10000, Loss: 3.0694
Step 2414/10000, Loss: 2.9951


Training Progress:  24%|██████████████                                            | 2416/10000 [40:10<21:49,  5.79it/s]

Step 2415/10000, Loss: 2.9393
Step 2416/10000, Loss: 2.9941


Training Progress:  24%|██████████████                                            | 2418/10000 [40:10<21:45,  5.81it/s]

Step 2417/10000, Loss: 2.9803
Step 2418/10000, Loss: 2.7928


Training Progress:  24%|██████████████                                            | 2420/10000 [40:11<21:41,  5.82it/s]

Step 2419/10000, Loss: 2.8315
Step 2420/10000, Loss: 2.9724


Training Progress:  24%|██████████████                                            | 2422/10000 [40:11<21:28,  5.88it/s]

Step 2421/10000, Loss: 2.8899
Step 2422/10000, Loss: 2.9157


Training Progress:  24%|██████████████                                            | 2424/10000 [40:11<21:46,  5.80it/s]

Step 2423/10000, Loss: 3.1556
Step 2424/10000, Loss: 2.9950


Training Progress:  24%|██████████████                                            | 2426/10000 [40:12<21:23,  5.90it/s]

Step 2425/10000, Loss: 2.9880
Step 2426/10000, Loss: 2.7672


Training Progress:  24%|██████████████                                            | 2428/10000 [40:12<21:42,  5.81it/s]

Step 2427/10000, Loss: 2.9379
Step 2428/10000, Loss: 2.8567


Training Progress:  24%|██████████████                                            | 2430/10000 [40:12<21:40,  5.82it/s]

Step 2429/10000, Loss: 2.8804
Step 2430/10000, Loss: 2.7955


Training Progress:  24%|██████████████                                            | 2432/10000 [40:13<21:38,  5.83it/s]

Step 2431/10000, Loss: 2.9612
Step 2432/10000, Loss: 2.9350


Training Progress:  24%|██████████████                                            | 2434/10000 [40:13<21:34,  5.84it/s]

Step 2433/10000, Loss: 2.9201
Step 2434/10000, Loss: 3.0271


Training Progress:  24%|██████████████▏                                           | 2436/10000 [40:13<21:48,  5.78it/s]

Step 2435/10000, Loss: 3.0589
Step 2436/10000, Loss: 2.9655


Training Progress:  24%|██████████████▏                                           | 2438/10000 [40:14<21:10,  5.95it/s]

Step 2437/10000, Loss: 2.8376
Step 2438/10000, Loss: 2.8404


Training Progress:  24%|██████████████▏                                           | 2440/10000 [40:14<21:36,  5.83it/s]

Step 2439/10000, Loss: 2.9221
Step 2440/10000, Loss: 2.9001


Training Progress:  24%|██████████████▏                                           | 2441/10000 [40:14<21:38,  5.82it/s]

Step 2441/10000, Loss: 2.7256
Step 2442/10000, Loss: 2.5978


Training Progress:  24%|█████████████▋                                          | 2442/10000 [40:29<9:12:34,  4.39s/it]


Checkpoint saved: checkpoints\best\checkpoint_step2442_loss2.5978_20250117_133218.pt

New best loss: 2.5978


Training Progress:  24%|█████████████▋                                          | 2444/10000 [40:29<4:47:45,  2.29s/it]

Step 2443/10000, Loss: 2.6698
Step 2444/10000, Loss: 2.7385


Training Progress:  24%|█████████████▋                                          | 2446/10000 [40:29<2:31:44,  1.21s/it]

Step 2445/10000, Loss: 2.6252
Step 2446/10000, Loss: 2.6572


Training Progress:  24%|█████████████▋                                          | 2448/10000 [40:30<1:25:08,  1.48it/s]

Step 2447/10000, Loss: 2.6902
Step 2448/10000, Loss: 2.6590


Training Progress:  24%|██████████████▏                                           | 2450/10000 [40:30<52:57,  2.38it/s]

Step 2449/10000, Loss: 2.6551
Step 2450/10000, Loss: 3.0689


Training Progress:  25%|██████████████▏                                           | 2452/10000 [40:30<36:47,  3.42it/s]

Step 2451/10000, Loss: 2.8257
Step 2452/10000, Loss: 2.7177


Training Progress:  25%|██████████████▏                                           | 2454/10000 [40:31<29:11,  4.31it/s]

Step 2453/10000, Loss: 2.6939
Step 2454/10000, Loss: 2.7758


Training Progress:  25%|██████████████▏                                           | 2455/10000 [40:31<27:04,  4.65it/s]

Step 2455/10000, Loss: 2.6889
Step 2456/10000, Loss: 2.5651


Training Progress:  25%|█████████████▌                                         | 2456/10000 [40:49<11:45:57,  5.61s/it]


Checkpoint saved: checkpoints\best\checkpoint_step2456_loss2.5651_20250117_133235.pt

New best loss: 2.5651
Step 2457/10000, Loss: 2.5597


Training Progress:  25%|█████████████▌                                         | 2457/10000 [41:10<21:33:37, 10.29s/it]


Checkpoint saved: checkpoints\best\checkpoint_step2457_loss2.5597_20250117_133254.pt

New best loss: 2.5597


Training Progress:  25%|█████████████▌                                         | 2459/10000 [41:11<10:53:43,  5.20s/it]

Step 2458/10000, Loss: 2.6546
Step 2459/10000, Loss: 2.9823


Training Progress:  25%|█████████████▊                                          | 2461/10000 [41:11<5:31:25,  2.64s/it]

Step 2460/10000, Loss: 2.9359
Step 2461/10000, Loss: 3.0258


Training Progress:  25%|█████████████▊                                          | 2463/10000 [41:12<2:53:09,  1.38s/it]

Step 2462/10000, Loss: 2.8562
Step 2463/10000, Loss: 2.9098


Training Progress:  25%|█████████████▊                                          | 2465/10000 [41:12<1:35:44,  1.31it/s]

Step 2464/10000, Loss: 2.8970
Step 2465/10000, Loss: 2.8022


Training Progress:  25%|██████████████▎                                           | 2467/10000 [41:12<57:55,  2.17it/s]

Step 2466/10000, Loss: 2.5760
Step 2467/10000, Loss: 2.6805


Training Progress:  25%|██████████████▎                                           | 2469/10000 [41:13<39:23,  3.19it/s]

Step 2468/10000, Loss: 2.6630
Step 2469/10000, Loss: 2.7148


Training Progress:  25%|██████████████▎                                           | 2471/10000 [41:13<29:52,  4.20it/s]

Step 2470/10000, Loss: 2.8203
Step 2471/10000, Loss: 2.9622


Training Progress:  25%|██████████████▎                                           | 2473/10000 [41:13<25:52,  4.85it/s]

Step 2472/10000, Loss: 2.9419
Step 2473/10000, Loss: 2.8701


Training Progress:  25%|██████████████▎                                           | 2475/10000 [41:14<23:30,  5.34it/s]

Step 2474/10000, Loss: 2.8642
Step 2475/10000, Loss: 2.7686


Training Progress:  25%|██████████████▎                                           | 2477/10000 [41:14<22:30,  5.57it/s]

Step 2476/10000, Loss: 2.8938
Step 2477/10000, Loss: 2.6483


Training Progress:  25%|██████████████▍                                           | 2479/10000 [41:14<21:45,  5.76it/s]

Step 2478/10000, Loss: 2.6907
Step 2479/10000, Loss: 2.8875


Training Progress:  25%|██████████████▍                                           | 2481/10000 [41:15<21:50,  5.74it/s]

Step 2480/10000, Loss: 2.6983
Step 2481/10000, Loss: 2.8465


Training Progress:  25%|██████████████▍                                           | 2483/10000 [41:15<21:27,  5.84it/s]

Step 2482/10000, Loss: 2.6605
Step 2483/10000, Loss: 3.0223


Training Progress:  25%|██████████████▍                                           | 2485/10000 [41:16<21:38,  5.79it/s]

Step 2484/10000, Loss: 3.0471
Step 2485/10000, Loss: 3.0601


Training Progress:  25%|██████████████▍                                           | 2487/10000 [41:16<21:34,  5.81it/s]

Step 2486/10000, Loss: 3.0456
Step 2487/10000, Loss: 2.9239


Training Progress:  25%|██████████████▍                                           | 2489/10000 [41:16<20:59,  5.96it/s]

Step 2488/10000, Loss: 2.9614
Step 2489/10000, Loss: 2.9237


Training Progress:  25%|██████████████▍                                           | 2491/10000 [41:17<21:41,  5.77it/s]

Step 2490/10000, Loss: 2.8771
Step 2491/10000, Loss: 2.8757


Training Progress:  25%|██████████████▍                                           | 2493/10000 [41:17<21:01,  5.95it/s]

Step 2492/10000, Loss: 2.7762
Step 2493/10000, Loss: 2.8813


Training Progress:  25%|██████████████▍                                           | 2495/10000 [41:17<21:40,  5.77it/s]

Step 2494/10000, Loss: 3.0612
Step 2495/10000, Loss: 3.0309


Training Progress:  25%|██████████████▍                                           | 2497/10000 [41:18<21:29,  5.82it/s]

Step 2496/10000, Loss: 2.9273
Step 2497/10000, Loss: 2.8598


Training Progress:  25%|██████████████▍                                           | 2499/10000 [41:18<21:25,  5.84it/s]

Step 2498/10000, Loss: 2.9447
Step 2499/10000, Loss: 2.9645


Training Progress:  25%|██████████████▌                                           | 2501/10000 [41:18<21:27,  5.82it/s]

Step 2500/10000, Loss: 2.8146
Step 2501/10000, Loss: 2.8458


Training Progress:  25%|██████████████▌                                           | 2503/10000 [41:19<21:13,  5.89it/s]

Step 2502/10000, Loss: 2.9755
Step 2503/10000, Loss: 2.8487


Training Progress:  25%|██████████████▌                                           | 2505/10000 [41:19<20:54,  5.97it/s]

Step 2504/10000, Loss: 2.8197
Step 2505/10000, Loss: 3.0313


Training Progress:  25%|██████████████▌                                           | 2507/10000 [41:19<21:25,  5.83it/s]

Step 2506/10000, Loss: 2.9232
Step 2507/10000, Loss: 2.9116


Training Progress:  25%|██████████████▌                                           | 2509/10000 [41:20<20:59,  5.95it/s]

Step 2508/10000, Loss: 2.7346
Step 2509/10000, Loss: 2.9110


Training Progress:  25%|██████████████▌                                           | 2511/10000 [41:20<21:29,  5.81it/s]

Step 2510/10000, Loss: 2.8616
Step 2511/10000, Loss: 2.8887


Training Progress:  25%|██████████████▌                                           | 2513/10000 [41:20<21:03,  5.93it/s]

Step 2512/10000, Loss: 2.8088
Step 2513/10000, Loss: 2.9530


Training Progress:  25%|██████████████▌                                           | 2515/10000 [41:21<21:30,  5.80it/s]

Step 2514/10000, Loss: 2.9353
Step 2515/10000, Loss: 2.8875


Training Progress:  25%|██████████████▌                                           | 2517/10000 [41:21<21:11,  5.88it/s]

Step 2516/10000, Loss: 3.0276
Step 2517/10000, Loss: 3.0520


Training Progress:  25%|██████████████▌                                           | 2519/10000 [41:21<21:21,  5.84it/s]

Step 2518/10000, Loss: 2.9588
Step 2519/10000, Loss: 2.8637


Training Progress:  25%|██████████████▌                                           | 2521/10000 [41:22<21:16,  5.86it/s]

Step 2520/10000, Loss: 2.7790
Step 2521/10000, Loss: 2.9266


Training Progress:  25%|██████████████▋                                           | 2523/10000 [41:22<21:31,  5.79it/s]

Step 2522/10000, Loss: 2.8823
Step 2523/10000, Loss: 2.7131


Training Progress:  25%|██████████████▋                                           | 2525/10000 [41:22<21:00,  5.93it/s]

Step 2524/10000, Loss: 2.6041
Step 2525/10000, Loss: 2.6676


Training Progress:  25%|██████████████▋                                           | 2527/10000 [41:23<21:33,  5.78it/s]

Step 2526/10000, Loss: 2.7211
Step 2527/10000, Loss: 2.6353


Training Progress:  25%|██████████████▋                                           | 2529/10000 [41:23<21:15,  5.86it/s]

Step 2528/10000, Loss: 2.6251
Step 2529/10000, Loss: 2.6553


Training Progress:  25%|██████████████▋                                           | 2531/10000 [41:23<21:11,  5.87it/s]

Step 2530/10000, Loss: 2.5759
Step 2531/10000, Loss: 2.5961


Training Progress:  25%|██████████████▋                                           | 2533/10000 [41:24<21:29,  5.79it/s]

Step 2532/10000, Loss: 3.0327
Step 2533/10000, Loss: 2.8206


Training Progress:  25%|██████████████▋                                           | 2535/10000 [41:24<21:13,  5.86it/s]

Step 2534/10000, Loss: 2.6867
Step 2535/10000, Loss: 2.6431


Training Progress:  25%|██████████████▋                                           | 2537/10000 [41:24<20:48,  5.98it/s]

Step 2536/10000, Loss: 2.7123
Step 2537/10000, Loss: 2.6441
Step 2538/10000, Loss: 2.5103


Training Progress:  25%|██████████████▏                                         | 2538/10000 [41:39<9:13:44,  4.45s/it]


Checkpoint saved: checkpoints\best\checkpoint_step2538_loss2.5103_20250117_133328.pt

New best loss: 2.5103
Step 2539/10000, Loss: 2.4640


Training Progress:  25%|█████████████▉                                         | 2539/10000 [42:01<20:03:12,  9.68s/it]


Checkpoint saved: checkpoints\best\checkpoint_step2539_loss2.4640_20250117_133343.pt

New best loss: 2.4640


Training Progress:  25%|█████████████▉                                         | 2541/10000 [42:01<10:06:18,  4.88s/it]

Step 2540/10000, Loss: 2.5553
Step 2541/10000, Loss: 2.9190


Training Progress:  25%|██████████████▏                                         | 2543/10000 [42:02<5:07:48,  2.48s/it]

Step 2542/10000, Loss: 2.8211
Step 2543/10000, Loss: 2.8708


Training Progress:  25%|██████████████▎                                         | 2545/10000 [42:02<2:41:50,  1.30s/it]

Step 2544/10000, Loss: 2.7217
Step 2545/10000, Loss: 2.7854


Training Progress:  25%|██████████████▎                                         | 2547/10000 [42:02<1:29:45,  1.38it/s]

Step 2546/10000, Loss: 2.8001
Step 2547/10000, Loss: 2.7475


Training Progress:  25%|██████████████▊                                           | 2549/10000 [42:03<54:33,  2.28it/s]

Step 2548/10000, Loss: 2.5096
Step 2549/10000, Loss: 2.6524


Training Progress:  26%|██████████████▊                                           | 2551/10000 [42:03<37:49,  3.28it/s]

Step 2550/10000, Loss: 2.5891
Step 2551/10000, Loss: 2.6055


Training Progress:  26%|██████████████▊                                           | 2553/10000 [42:03<29:22,  4.22it/s]

Step 2552/10000, Loss: 2.6911
Step 2553/10000, Loss: 2.8360


Training Progress:  26%|██████████████▊                                           | 2555/10000 [42:04<25:08,  4.93it/s]

Step 2554/10000, Loss: 2.7945
Step 2555/10000, Loss: 2.7181


Training Progress:  26%|██████████████▊                                           | 2557/10000 [42:04<22:57,  5.40it/s]

Step 2556/10000, Loss: 2.7242
Step 2557/10000, Loss: 2.6833


Training Progress:  26%|██████████████▊                                           | 2559/10000 [42:04<22:00,  5.64it/s]

Step 2558/10000, Loss: 2.8440
Step 2559/10000, Loss: 2.5828


Training Progress:  26%|██████████████▊                                           | 2561/10000 [42:05<21:37,  5.73it/s]

Step 2560/10000, Loss: 2.6213
Step 2561/10000, Loss: 2.8038


Training Progress:  26%|██████████████▊                                           | 2563/10000 [42:05<21:38,  5.73it/s]

Step 2562/10000, Loss: 2.6138
Step 2563/10000, Loss: 2.7114


Training Progress:  26%|██████████████▉                                           | 2565/10000 [42:05<20:54,  5.92it/s]

Step 2564/10000, Loss: 2.5012
Step 2565/10000, Loss: 2.9247


Training Progress:  26%|██████████████▉                                           | 2567/10000 [42:06<20:55,  5.92it/s]

Step 2566/10000, Loss: 2.9625
Step 2567/10000, Loss: 3.0229


Training Progress:  26%|██████████████▉                                           | 2569/10000 [42:06<21:30,  5.76it/s]

Step 2568/10000, Loss: 3.0127
Step 2569/10000, Loss: 2.9229


Training Progress:  26%|██████████████▉                                           | 2571/10000 [42:06<21:15,  5.82it/s]

Step 2570/10000, Loss: 2.9556
Step 2571/10000, Loss: 2.8875


Training Progress:  26%|██████████████▉                                           | 2573/10000 [42:07<21:02,  5.88it/s]

Step 2572/10000, Loss: 2.8271
Step 2573/10000, Loss: 2.7901


Training Progress:  26%|██████████████▉                                           | 2575/10000 [42:07<20:45,  5.96it/s]

Step 2574/10000, Loss: 2.6767
Step 2575/10000, Loss: 2.7806


Training Progress:  26%|██████████████▉                                           | 2577/10000 [42:07<21:20,  5.80it/s]

Step 2576/10000, Loss: 2.9752
Step 2577/10000, Loss: 2.9626


Training Progress:  26%|██████████████▉                                           | 2579/10000 [42:08<20:44,  5.97it/s]

Step 2578/10000, Loss: 2.9115
Step 2579/10000, Loss: 2.8305


Training Progress:  26%|██████████████▉                                           | 2581/10000 [42:08<21:09,  5.84it/s]

Step 2580/10000, Loss: 2.9101
Step 2581/10000, Loss: 2.8772


Training Progress:  26%|██████████████▉                                           | 2583/10000 [42:08<21:07,  5.85it/s]

Step 2582/10000, Loss: 2.7460
Step 2583/10000, Loss: 2.7713


Training Progress:  26%|██████████████▉                                           | 2585/10000 [42:09<21:10,  5.84it/s]

Step 2584/10000, Loss: 2.9270
Step 2585/10000, Loss: 2.8266


Training Progress:  26%|███████████████                                           | 2587/10000 [42:09<21:01,  5.88it/s]

Step 2586/10000, Loss: 2.8110
Step 2587/10000, Loss: 2.9695


Training Progress:  26%|███████████████                                           | 2589/10000 [42:09<20:46,  5.94it/s]

Step 2588/10000, Loss: 2.8245
Step 2589/10000, Loss: 2.8183


Training Progress:  26%|███████████████                                           | 2591/10000 [42:10<21:06,  5.85it/s]

Step 2590/10000, Loss: 2.6463
Step 2591/10000, Loss: 2.8402


Training Progress:  26%|███████████████                                           | 2593/10000 [42:10<21:14,  5.81it/s]

Step 2592/10000, Loss: 2.7543
Step 2593/10000, Loss: 2.7935


Training Progress:  26%|███████████████                                           | 2595/10000 [42:10<21:09,  5.83it/s]

Step 2594/10000, Loss: 2.7424
Step 2595/10000, Loss: 2.9091


Training Progress:  26%|███████████████                                           | 2597/10000 [42:11<20:38,  5.98it/s]

Step 2596/10000, Loss: 2.9173
Step 2597/10000, Loss: 2.8811


Training Progress:  26%|███████████████                                           | 2599/10000 [42:11<21:07,  5.84it/s]

Step 2598/10000, Loss: 2.9886
Step 2599/10000, Loss: 3.0120


Training Progress:  26%|███████████████                                           | 2601/10000 [42:11<20:59,  5.87it/s]

Step 2600/10000, Loss: 2.8708
Step 2601/10000, Loss: 2.8229


Training Progress:  26%|███████████████                                           | 2603/10000 [42:12<21:13,  5.81it/s]

Step 2602/10000, Loss: 2.7684
Step 2603/10000, Loss: 2.8748


Training Progress:  26%|███████████████                                           | 2605/10000 [42:12<21:20,  5.77it/s]

Step 2604/10000, Loss: 2.8423
Step 2605/10000, Loss: 2.7196


Training Progress:  26%|███████████████                                           | 2607/10000 [42:13<21:08,  5.83it/s]

Step 2606/10000, Loss: 2.6163
Step 2607/10000, Loss: 2.6892


Training Progress:  26%|███████████████▏                                          | 2609/10000 [42:13<20:45,  5.93it/s]

Step 2608/10000, Loss: 2.7295
Step 2609/10000, Loss: 2.6286


Training Progress:  26%|███████████████▏                                          | 2611/10000 [42:13<21:13,  5.80it/s]

Step 2610/10000, Loss: 2.6333
Step 2611/10000, Loss: 2.6306


Training Progress:  26%|███████████████▏                                          | 2613/10000 [42:14<20:57,  5.88it/s]

Step 2612/10000, Loss: 2.4982
Step 2613/10000, Loss: 2.4814


Training Progress:  26%|███████████████▏                                          | 2615/10000 [42:14<21:12,  5.81it/s]

Step 2614/10000, Loss: 2.9367
Step 2615/10000, Loss: 2.7169


Training Progress:  26%|███████████████▏                                          | 2617/10000 [42:14<20:57,  5.87it/s]

Step 2616/10000, Loss: 2.6028
Step 2617/10000, Loss: 2.5672


Training Progress:  26%|███████████████▏                                          | 2619/10000 [42:15<20:37,  5.96it/s]

Step 2618/10000, Loss: 2.6669
Step 2619/10000, Loss: 2.5756
Step 2620/10000, Loss: 2.4509


Training Progress:  26%|██████████████▋                                         | 2620/10000 [42:30<9:31:03,  4.64s/it]


Checkpoint saved: checkpoints\best\checkpoint_step2620_loss2.4509_20250117_133419.pt

New best loss: 2.4509
Step 2621/10000, Loss: 2.4222


Training Progress:  26%|██████████████▍                                        | 2621/10000 [42:52<20:08:55,  9.83s/it]


Checkpoint saved: checkpoints\best\checkpoint_step2621_loss2.4222_20250117_133434.pt

New best loss: 2.4222


Training Progress:  26%|██████████████▍                                        | 2623/10000 [42:52<10:06:55,  4.94s/it]

Step 2622/10000, Loss: 2.5066
Step 2623/10000, Loss: 2.8756


Training Progress:  26%|██████████████▋                                         | 2625/10000 [42:52<5:07:59,  2.51s/it]

Step 2624/10000, Loss: 2.7646
Step 2625/10000, Loss: 2.8034


Training Progress:  26%|██████████████▋                                         | 2627/10000 [42:53<2:41:34,  1.31s/it]

Step 2626/10000, Loss: 2.6597
Step 2627/10000, Loss: 2.6642


Training Progress:  26%|██████████████▋                                         | 2629/10000 [42:53<1:29:46,  1.37it/s]

Step 2628/10000, Loss: 2.6544
Step 2629/10000, Loss: 2.5828
Step 2630/10000, Loss: 2.3995


Training Progress:  26%|██████████████▍                                        | 2630/10000 [43:14<14:06:00,  6.89s/it]


Checkpoint saved: checkpoints\best\checkpoint_step2630_loss2.3995_20250117_133457.pt

New best loss: 2.3995


Training Progress:  26%|██████████████▋                                         | 2632/10000 [43:15<7:09:16,  3.50s/it]

Step 2631/10000, Loss: 2.5742
Step 2632/10000, Loss: 2.5495


Training Progress:  26%|██████████████▊                                         | 2634/10000 [43:15<3:40:44,  1.80s/it]

Step 2633/10000, Loss: 2.5263
Step 2634/10000, Loss: 2.6030


Training Progress:  26%|██████████████▊                                         | 2636/10000 [43:16<1:59:00,  1.03it/s]

Step 2635/10000, Loss: 2.7916
Step 2636/10000, Loss: 2.7316


Training Progress:  26%|██████████████▊                                         | 2638/10000 [43:16<1:08:53,  1.78it/s]

Step 2637/10000, Loss: 2.6443
Step 2638/10000, Loss: 2.6484


Training Progress:  26%|███████████████▎                                          | 2640/10000 [43:16<44:43,  2.74it/s]

Step 2639/10000, Loss: 2.5917
Step 2640/10000, Loss: 2.7484


Training Progress:  26%|███████████████▎                                          | 2642/10000 [43:17<32:23,  3.79it/s]

Step 2641/10000, Loss: 2.4621
Step 2642/10000, Loss: 2.5317


Training Progress:  26%|███████████████▎                                          | 2644/10000 [43:17<26:38,  4.60it/s]

Step 2643/10000, Loss: 2.7284
Step 2644/10000, Loss: 2.5244


Training Progress:  26%|███████████████▎                                          | 2646/10000 [43:17<23:39,  5.18it/s]

Step 2645/10000, Loss: 2.6171
Step 2646/10000, Loss: 2.4334


Training Progress:  26%|███████████████▎                                          | 2648/10000 [43:18<21:53,  5.60it/s]

Step 2647/10000, Loss: 2.7529
Step 2648/10000, Loss: 2.8042


Training Progress:  26%|███████████████▎                                          | 2650/10000 [43:18<21:42,  5.64it/s]

Step 2649/10000, Loss: 2.8362
Step 2650/10000, Loss: 2.8348


Training Progress:  27%|███████████████▍                                          | 2652/10000 [43:18<20:52,  5.87it/s]

Step 2651/10000, Loss: 2.7676
Step 2652/10000, Loss: 2.8737


Training Progress:  27%|███████████████▍                                          | 2654/10000 [43:19<21:07,  5.79it/s]

Step 2653/10000, Loss: 2.8157
Step 2654/10000, Loss: 2.7646


Training Progress:  27%|███████████████▍                                          | 2656/10000 [43:19<21:17,  5.75it/s]

Step 2655/10000, Loss: 2.7669
Step 2656/10000, Loss: 2.6464


Training Progress:  27%|███████████████▍                                          | 2658/10000 [43:19<20:57,  5.84it/s]

Step 2657/10000, Loss: 2.6707
Step 2658/10000, Loss: 2.8513


Training Progress:  27%|███████████████▍                                          | 2660/10000 [43:20<20:49,  5.88it/s]

Step 2659/10000, Loss: 2.8335
Step 2660/10000, Loss: 2.7595


Training Progress:  27%|███████████████▍                                          | 2662/10000 [43:20<21:02,  5.81it/s]

Step 2661/10000, Loss: 2.6741
Step 2662/10000, Loss: 2.7840


Training Progress:  27%|███████████████▍                                          | 2664/10000 [43:20<20:43,  5.90it/s]

Step 2663/10000, Loss: 2.7522
Step 2664/10000, Loss: 2.6233


Training Progress:  27%|███████████████▍                                          | 2666/10000 [43:21<21:05,  5.79it/s]

Step 2665/10000, Loss: 2.6451
Step 2666/10000, Loss: 2.8135


Training Progress:  27%|███████████████▍                                          | 2668/10000 [43:21<20:36,  5.93it/s]

Step 2667/10000, Loss: 2.7447
Step 2668/10000, Loss: 2.7851


Training Progress:  27%|███████████████▍                                          | 2670/10000 [43:21<21:01,  5.81it/s]

Step 2669/10000, Loss: 2.9257
Step 2670/10000, Loss: 2.8050


Training Progress:  27%|███████████████▍                                          | 2672/10000 [43:22<20:50,  5.86it/s]

Step 2671/10000, Loss: 2.7541
Step 2672/10000, Loss: 2.5742


Training Progress:  27%|███████████████▌                                          | 2674/10000 [43:22<20:30,  5.95it/s]

Step 2673/10000, Loss: 2.7426
Step 2674/10000, Loss: 2.6722


Training Progress:  27%|███████████████▌                                          | 2676/10000 [43:22<20:42,  5.89it/s]

Step 2675/10000, Loss: 2.7014
Step 2676/10000, Loss: 2.6365


Training Progress:  27%|███████████████▌                                          | 2678/10000 [43:23<20:32,  5.94it/s]

Step 2677/10000, Loss: 2.8266
Step 2678/10000, Loss: 2.7895


Training Progress:  27%|███████████████▌                                          | 2680/10000 [43:23<20:34,  5.93it/s]

Step 2679/10000, Loss: 2.8052
Step 2680/10000, Loss: 2.8910


Training Progress:  27%|███████████████▌                                          | 2682/10000 [43:23<20:54,  5.84it/s]

Step 2681/10000, Loss: 2.8769
Step 2682/10000, Loss: 2.8094


Training Progress:  27%|███████████████▌                                          | 2684/10000 [43:24<20:46,  5.87it/s]

Step 2683/10000, Loss: 2.6895
Step 2684/10000, Loss: 2.6824


Training Progress:  27%|███████████████▌                                          | 2686/10000 [43:24<20:40,  5.89it/s]

Step 2685/10000, Loss: 2.7786
Step 2686/10000, Loss: 2.7431


Training Progress:  27%|███████████████▌                                          | 2688/10000 [43:24<20:36,  5.91it/s]

Step 2687/10000, Loss: 2.6146
Step 2688/10000, Loss: 2.5231


Training Progress:  27%|███████████████▌                                          | 2690/10000 [43:25<20:44,  5.88it/s]

Step 2689/10000, Loss: 2.5914
Step 2690/10000, Loss: 2.7065


Training Progress:  27%|███████████████▌                                          | 2692/10000 [43:25<20:34,  5.92it/s]

Step 2691/10000, Loss: 2.6149
Step 2692/10000, Loss: 2.6167


Training Progress:  27%|███████████████▋                                          | 2694/10000 [43:25<20:55,  5.82it/s]

Step 2693/10000, Loss: 2.5655
Step 2694/10000, Loss: 2.4498


Training Progress:  27%|███████████████▋                                          | 2696/10000 [43:26<20:59,  5.80it/s]

Step 2695/10000, Loss: 2.4209
Step 2696/10000, Loss: 2.8346


Training Progress:  27%|███████████████▋                                          | 2698/10000 [43:26<20:40,  5.89it/s]

Step 2697/10000, Loss: 2.6485
Step 2698/10000, Loss: 2.5256


Training Progress:  27%|███████████████▋                                          | 2700/10000 [43:26<21:00,  5.79it/s]

Step 2699/10000, Loss: 2.5147
Step 2700/10000, Loss: 2.5811


Training Progress:  27%|███████████████▋                                          | 2702/10000 [43:27<20:49,  5.84it/s]

Step 2701/10000, Loss: 2.5393
Step 2702/10000, Loss: 2.4042
Step 2703/10000, Loss: 2.3508


Training Progress:  27%|███████████████▏                                        | 2703/10000 [43:41<8:56:51,  4.41s/it]


Checkpoint saved: checkpoints\best\checkpoint_step2703_loss2.3508_20250117_133531.pt

New best loss: 2.3508


Training Progress:  27%|███████████████▏                                        | 2705/10000 [43:42<4:38:57,  2.29s/it]

Step 2704/10000, Loss: 2.4499
Step 2705/10000, Loss: 2.7960


Training Progress:  27%|███████████████▏                                        | 2707/10000 [43:42<2:27:14,  1.21s/it]

Step 2706/10000, Loss: 2.6992
Step 2707/10000, Loss: 2.7697


Training Progress:  27%|███████████████▏                                        | 2709/10000 [43:42<1:22:45,  1.47it/s]

Step 2708/10000, Loss: 2.6396
Step 2709/10000, Loss: 2.6255


Training Progress:  27%|███████████████▋                                          | 2711/10000 [43:43<51:07,  2.38it/s]

Step 2710/10000, Loss: 2.6019
Step 2711/10000, Loss: 2.5154


Training Progress:  27%|███████████████▋                                          | 2713/10000 [43:43<35:26,  3.43it/s]

Step 2712/10000, Loss: 2.3683
Step 2713/10000, Loss: 2.4507


Training Progress:  27%|███████████████▋                                          | 2715/10000 [43:43<28:10,  4.31it/s]

Step 2714/10000, Loss: 2.4283
Step 2715/10000, Loss: 2.4352


Training Progress:  27%|███████████████▊                                          | 2717/10000 [43:44<24:14,  5.01it/s]

Step 2716/10000, Loss: 2.5137
Step 2717/10000, Loss: 2.6645


Training Progress:  27%|███████████████▊                                          | 2719/10000 [43:44<22:07,  5.49it/s]

Step 2718/10000, Loss: 2.6453
Step 2719/10000, Loss: 2.5822


Training Progress:  27%|███████████████▊                                          | 2721/10000 [43:44<21:14,  5.71it/s]

Step 2720/10000, Loss: 2.5981
Step 2721/10000, Loss: 2.5534


Training Progress:  27%|███████████████▊                                          | 2723/10000 [43:45<20:55,  5.80it/s]

Step 2722/10000, Loss: 2.6530
Step 2723/10000, Loss: 2.4018


Training Progress:  27%|███████████████▊                                          | 2725/10000 [43:45<21:00,  5.77it/s]

Step 2724/10000, Loss: 2.4596
Step 2725/10000, Loss: 2.6424


Training Progress:  27%|███████████████▊                                          | 2727/10000 [43:45<20:51,  5.81it/s]

Step 2726/10000, Loss: 2.4222
Step 2727/10000, Loss: 2.5270


Training Progress:  27%|███████████████▊                                          | 2729/10000 [43:46<20:53,  5.80it/s]

Step 2728/10000, Loss: 2.3511
Step 2729/10000, Loss: 2.6460


Training Progress:  27%|███████████████▊                                          | 2731/10000 [43:46<20:27,  5.92it/s]

Step 2730/10000, Loss: 2.6837
Step 2731/10000, Loss: 2.6949


Training Progress:  27%|███████████████▊                                          | 2733/10000 [43:46<20:22,  5.94it/s]

Step 2732/10000, Loss: 2.6887
Step 2733/10000, Loss: 2.5765


Training Progress:  27%|███████████████▊                                          | 2735/10000 [43:47<20:31,  5.90it/s]

Step 2734/10000, Loss: 2.6691
Step 2735/10000, Loss: 2.6704


Training Progress:  27%|███████████████▊                                          | 2737/10000 [43:47<20:34,  5.88it/s]

Step 2736/10000, Loss: 2.6204
Step 2737/10000, Loss: 2.6318


Training Progress:  27%|███████████████▉                                          | 2739/10000 [43:47<20:44,  5.83it/s]

Step 2738/10000, Loss: 2.5400
Step 2739/10000, Loss: 2.5521


Training Progress:  27%|███████████████▉                                          | 2741/10000 [43:48<20:50,  5.81it/s]

Step 2740/10000, Loss: 2.7389
Step 2741/10000, Loss: 2.7140


Training Progress:  27%|███████████████▉                                          | 2743/10000 [43:48<20:26,  5.92it/s]

Step 2742/10000, Loss: 2.6248
Step 2743/10000, Loss: 2.5676


Training Progress:  27%|███████████████▉                                          | 2745/10000 [43:48<20:21,  5.94it/s]

Step 2744/10000, Loss: 2.6298
Step 2745/10000, Loss: 2.6615


Training Progress:  27%|███████████████▉                                          | 2747/10000 [43:49<20:38,  5.86it/s]

Step 2746/10000, Loss: 2.4871
Step 2747/10000, Loss: 2.5151


Training Progress:  27%|███████████████▉                                          | 2749/10000 [43:49<20:38,  5.86it/s]

Step 2748/10000, Loss: 2.6779
Step 2749/10000, Loss: 2.5781


Training Progress:  28%|███████████████▉                                          | 2751/10000 [43:50<20:43,  5.83it/s]

Step 2750/10000, Loss: 2.6070
Step 2751/10000, Loss: 2.7997


Training Progress:  28%|███████████████▉                                          | 2753/10000 [43:50<20:18,  5.95it/s]

Step 2752/10000, Loss: 2.6628
Step 2753/10000, Loss: 2.6793


Training Progress:  28%|███████████████▉                                          | 2755/10000 [43:50<20:41,  5.84it/s]

Step 2754/10000, Loss: 2.5394
Step 2755/10000, Loss: 2.7064


Training Progress:  28%|███████████████▉                                          | 2757/10000 [43:51<20:22,  5.92it/s]

Step 2756/10000, Loss: 2.6056
Step 2757/10000, Loss: 2.5718


Training Progress:  28%|████████████████                                          | 2759/10000 [43:51<20:41,  5.83it/s]

Step 2758/10000, Loss: 2.5102
Step 2759/10000, Loss: 2.6587


Training Progress:  28%|████████████████                                          | 2761/10000 [43:51<20:45,  5.81it/s]

Step 2760/10000, Loss: 2.6667
Step 2761/10000, Loss: 2.6649


Training Progress:  28%|████████████████                                          | 2763/10000 [43:52<20:37,  5.85it/s]

Step 2762/10000, Loss: 2.8250
Step 2763/10000, Loss: 2.8190


Training Progress:  28%|████████████████                                          | 2765/10000 [43:52<20:28,  5.89it/s]

Step 2764/10000, Loss: 2.7299
Step 2765/10000, Loss: 2.5944


Training Progress:  28%|████████████████                                          | 2767/10000 [43:52<20:44,  5.81it/s]

Step 2766/10000, Loss: 2.5306
Step 2767/10000, Loss: 2.6179


Training Progress:  28%|████████████████                                          | 2769/10000 [43:53<20:47,  5.80it/s]

Step 2768/10000, Loss: 2.6091
Step 2769/10000, Loss: 2.4472


Training Progress:  28%|████████████████                                          | 2771/10000 [43:53<20:44,  5.81it/s]

Step 2770/10000, Loss: 2.3546
Step 2771/10000, Loss: 2.5120


Training Progress:  28%|████████████████                                          | 2773/10000 [43:53<20:20,  5.92it/s]

Step 2772/10000, Loss: 2.5383
Step 2773/10000, Loss: 2.4721


Training Progress:  28%|████████████████                                          | 2775/10000 [43:54<20:42,  5.81it/s]

Step 2774/10000, Loss: 2.4949
Step 2775/10000, Loss: 2.5210


Training Progress:  28%|████████████████                                          | 2777/10000 [43:54<20:29,  5.88it/s]

Step 2776/10000, Loss: 2.4223
Step 2777/10000, Loss: 2.4221


Training Progress:  28%|████████████████                                          | 2779/10000 [43:54<20:41,  5.81it/s]

Step 2778/10000, Loss: 2.8039
Step 2779/10000, Loss: 2.5738


Training Progress:  28%|████████████████▏                                         | 2781/10000 [43:55<20:30,  5.87it/s]

Step 2780/10000, Loss: 2.4311
Step 2781/10000, Loss: 2.3889


Training Progress:  28%|████████████████▏                                         | 2783/10000 [43:55<20:45,  5.79it/s]

Step 2782/10000, Loss: 2.4374
Step 2783/10000, Loss: 2.3678
Step 2784/10000, Loss: 2.2651


Training Progress:  28%|███████████████▌                                        | 2784/10000 [44:09<8:28:51,  4.23s/it]


Checkpoint saved: checkpoints\best\checkpoint_step2784_loss2.2651_20250117_133559.pt

New best loss: 2.2651


Training Progress:  28%|███████████████▌                                        | 2786/10000 [44:09<4:25:35,  2.21s/it]

Step 2785/10000, Loss: 2.2697
Step 2786/10000, Loss: 2.3556


Training Progress:  28%|███████████████▌                                        | 2788/10000 [44:10<2:20:26,  1.17s/it]

Step 2787/10000, Loss: 2.7116
Step 2788/10000, Loss: 2.6413


Training Progress:  28%|███████████████▌                                        | 2790/10000 [44:10<1:19:30,  1.51it/s]

Step 2789/10000, Loss: 2.7462
Step 2790/10000, Loss: 2.5583


Training Progress:  28%|████████████████▏                                         | 2792/10000 [44:10<49:09,  2.44it/s]

Step 2791/10000, Loss: 2.5717
Step 2792/10000, Loss: 2.5509


Training Progress:  28%|████████████████▏                                         | 2794/10000 [44:11<34:49,  3.45it/s]

Step 2793/10000, Loss: 2.4511
Step 2794/10000, Loss: 2.3015


Training Progress:  28%|████████████████▏                                         | 2796/10000 [44:11<27:23,  4.38it/s]

Step 2795/10000, Loss: 2.3986
Step 2796/10000, Loss: 2.3643


Training Progress:  28%|████████████████▏                                         | 2798/10000 [44:11<23:55,  5.02it/s]

Step 2797/10000, Loss: 2.3610
Step 2798/10000, Loss: 2.4727


Training Progress:  28%|████████████████▏                                         | 2800/10000 [44:12<22:07,  5.42it/s]

Step 2799/10000, Loss: 2.5976
Step 2800/10000, Loss: 2.5833


Training Progress:  28%|████████████████▎                                         | 2802/10000 [44:12<21:33,  5.56it/s]

Step 2801/10000, Loss: 2.5329
Step 2802/10000, Loss: 2.4978


Training Progress:  28%|████████████████▎                                         | 2804/10000 [44:12<20:41,  5.80it/s]

Step 2803/10000, Loss: 2.4558
Step 2804/10000, Loss: 2.5749


Training Progress:  28%|████████████████▎                                         | 2806/10000 [44:13<20:56,  5.73it/s]

Step 2805/10000, Loss: 2.3581
Step 2806/10000, Loss: 2.4439


Training Progress:  28%|████████████████▎                                         | 2808/10000 [44:13<20:32,  5.83it/s]

Step 2807/10000, Loss: 2.6260
Step 2808/10000, Loss: 2.4113


Training Progress:  28%|████████████████▎                                         | 2810/10000 [44:13<20:25,  5.87it/s]

Step 2809/10000, Loss: 2.4829
Step 2810/10000, Loss: 2.3083


Training Progress:  28%|████████████████▎                                         | 2812/10000 [44:14<20:25,  5.87it/s]

Step 2811/10000, Loss: 2.6244
Step 2812/10000, Loss: 2.6344


Training Progress:  28%|████████████████▎                                         | 2814/10000 [44:14<20:38,  5.80it/s]

Step 2813/10000, Loss: 2.6392
Step 2814/10000, Loss: 2.5801


Training Progress:  28%|████████████████▎                                         | 2816/10000 [44:14<20:42,  5.78it/s]

Step 2815/10000, Loss: 2.5295
Step 2816/10000, Loss: 2.5709


Training Progress:  28%|████████████████▎                                         | 2818/10000 [44:15<20:34,  5.82it/s]

Step 2817/10000, Loss: 2.5676
Step 2818/10000, Loss: 2.4773


Training Progress:  28%|████████████████▎                                         | 2820/10000 [44:15<20:20,  5.88it/s]

Step 2819/10000, Loss: 2.5247
Step 2820/10000, Loss: 2.4328


Training Progress:  28%|████████████████▎                                         | 2822/10000 [44:15<20:26,  5.85it/s]

Step 2821/10000, Loss: 2.4608
Step 2822/10000, Loss: 2.6527


Training Progress:  28%|████████████████▍                                         | 2824/10000 [44:16<20:24,  5.86it/s]

Step 2823/10000, Loss: 2.6377
Step 2824/10000, Loss: 2.5607


Training Progress:  28%|████████████████▍                                         | 2826/10000 [44:16<20:37,  5.79it/s]

Step 2825/10000, Loss: 2.4711
Step 2826/10000, Loss: 2.5407


Training Progress:  28%|████████████████▍                                         | 2828/10000 [44:16<20:30,  5.83it/s]

Step 2827/10000, Loss: 2.5158
Step 2828/10000, Loss: 2.3788


Training Progress:  28%|████████████████▍                                         | 2830/10000 [44:17<20:27,  5.84it/s]

Step 2829/10000, Loss: 2.4287
Step 2830/10000, Loss: 2.5597


Training Progress:  28%|████████████████▍                                         | 2832/10000 [44:17<20:18,  5.88it/s]

Step 2831/10000, Loss: 2.4736
Step 2832/10000, Loss: 2.5000


Training Progress:  28%|████████████████▍                                         | 2834/10000 [44:17<20:09,  5.92it/s]

Step 2833/10000, Loss: 2.6887
Step 2834/10000, Loss: 2.5428


Training Progress:  28%|████████████████▍                                         | 2836/10000 [44:18<20:23,  5.86it/s]

Step 2835/10000, Loss: 2.5224
Step 2836/10000, Loss: 2.4148


Training Progress:  28%|████████████████▍                                         | 2838/10000 [44:18<20:37,  5.79it/s]

Step 2837/10000, Loss: 2.5725
Step 2838/10000, Loss: 2.5209


Training Progress:  28%|████████████████▍                                         | 2840/10000 [44:18<20:27,  5.83it/s]

Step 2839/10000, Loss: 2.5142
Step 2840/10000, Loss: 2.5141


Training Progress:  28%|████████████████▍                                         | 2842/10000 [44:19<20:33,  5.80it/s]

Step 2841/10000, Loss: 2.6241
Step 2842/10000, Loss: 2.5840


Training Progress:  28%|████████████████▍                                         | 2844/10000 [44:19<20:16,  5.88it/s]

Step 2843/10000, Loss: 2.5648
Step 2844/10000, Loss: 2.6608


Training Progress:  28%|████████████████▌                                         | 2846/10000 [44:19<20:30,  5.82it/s]

Step 2845/10000, Loss: 2.6620
Step 2846/10000, Loss: 2.6274


Training Progress:  28%|████████████████▌                                         | 2848/10000 [44:20<20:19,  5.87it/s]

Step 2847/10000, Loss: 2.5483
Step 2848/10000, Loss: 2.5446


Training Progress:  28%|████████████████▌                                         | 2850/10000 [44:20<20:33,  5.80it/s]

Step 2849/10000, Loss: 2.6700
Step 2850/10000, Loss: 2.6244


Training Progress:  29%|████████████████▌                                         | 2851/10000 [44:20<20:08,  5.91it/s]

Step 2851/10000, Loss: 2.3901
Step 2852/10000, Loss: 2.2351


Training Progress:  29%|███████████████▉                                        | 2852/10000 [44:34<8:33:10,  4.31s/it]


Checkpoint saved: checkpoints\best\checkpoint_step2852_loss2.2351_20250117_133624.pt

New best loss: 2.2351


Training Progress:  29%|███████████████▉                                        | 2854/10000 [44:35<4:26:42,  2.24s/it]

Step 2853/10000, Loss: 2.3794
Step 2854/10000, Loss: 2.4119


Training Progress:  29%|███████████████▉                                        | 2856/10000 [44:35<2:20:37,  1.18s/it]

Step 2855/10000, Loss: 2.3230
Step 2856/10000, Loss: 2.3467


Training Progress:  29%|████████████████                                        | 2858/10000 [44:36<1:19:04,  1.51it/s]

Step 2857/10000, Loss: 2.3799
Step 2858/10000, Loss: 2.3400


Training Progress:  29%|████████████████▌                                         | 2860/10000 [44:36<49:11,  2.42it/s]

Step 2859/10000, Loss: 2.3449
Step 2860/10000, Loss: 2.7427


Training Progress:  29%|████████████████▌                                         | 2862/10000 [44:36<34:48,  3.42it/s]

Step 2861/10000, Loss: 2.5007
Step 2862/10000, Loss: 2.3923


Training Progress:  29%|████████████████▌                                         | 2864/10000 [44:37<27:14,  4.37it/s]

Step 2863/10000, Loss: 2.3501
Step 2864/10000, Loss: 2.4003


Training Progress:  29%|████████████████▌                                         | 2865/10000 [44:37<25:14,  4.71it/s]

Step 2865/10000, Loss: 2.3283
Step 2866/10000, Loss: 2.2229


Training Progress:  29%|███████████████▊                                       | 2866/10000 [44:58<13:06:14,  6.61s/it]


Checkpoint saved: checkpoints\best\checkpoint_step2866_loss2.2229_20250117_133641.pt

New best loss: 2.2229
Step 2867/10000, Loss: 2.1736


Training Progress:  29%|███████████████▊                                       | 2867/10000 [45:17<20:27:35, 10.33s/it]


Checkpoint saved: checkpoints\best\checkpoint_step2867_loss2.1736_20250117_133703.pt

New best loss: 2.1736


Training Progress:  29%|███████████████▊                                       | 2869/10000 [45:18<10:17:07,  5.19s/it]

Step 2868/10000, Loss: 2.2525
Step 2869/10000, Loss: 2.5717


Training Progress:  29%|████████████████                                        | 2871/10000 [45:18<5:12:40,  2.63s/it]

Step 2870/10000, Loss: 2.4981
Step 2871/10000, Loss: 2.5931


Training Progress:  29%|████████████████                                        | 2873/10000 [45:19<2:43:48,  1.38s/it]

Step 2872/10000, Loss: 2.4574
Step 2873/10000, Loss: 2.5069


Training Progress:  29%|████████████████                                        | 2875/10000 [45:19<1:30:25,  1.31it/s]

Step 2874/10000, Loss: 2.5362
Step 2875/10000, Loss: 2.4549


Training Progress:  29%|████████████████▋                                         | 2877/10000 [45:19<54:52,  2.16it/s]

Step 2876/10000, Loss: 2.2805
Step 2877/10000, Loss: 2.3903


Training Progress:  29%|████████████████▋                                         | 2879/10000 [45:20<37:08,  3.20it/s]

Step 2878/10000, Loss: 2.3298
Step 2879/10000, Loss: 2.3221


Training Progress:  29%|████████████████▋                                         | 2881/10000 [45:20<28:28,  4.17it/s]

Step 2880/10000, Loss: 2.4180
Step 2881/10000, Loss: 2.5599


Training Progress:  29%|████████████████▋                                         | 2883/10000 [45:20<24:30,  4.84it/s]

Step 2882/10000, Loss: 2.5486
Step 2883/10000, Loss: 2.4935


Training Progress:  29%|████████████████▋                                         | 2885/10000 [45:21<21:55,  5.41it/s]

Step 2884/10000, Loss: 2.4717
Step 2885/10000, Loss: 2.4009


Training Progress:  29%|████████████████▋                                         | 2887/10000 [45:21<21:15,  5.58it/s]

Step 2886/10000, Loss: 2.5343
Step 2887/10000, Loss: 2.2811


Training Progress:  29%|████████████████▊                                         | 2889/10000 [45:21<20:52,  5.68it/s]

Step 2888/10000, Loss: 2.3269
Step 2889/10000, Loss: 2.5197


Training Progress:  29%|████████████████▊                                         | 2891/10000 [45:22<20:28,  5.79it/s]

Step 2890/10000, Loss: 2.3220
Step 2891/10000, Loss: 2.3965


Training Progress:  29%|████████████████▊                                         | 2893/10000 [45:22<20:13,  5.86it/s]

Step 2892/10000, Loss: 2.2565
Step 2893/10000, Loss: 2.5513


Training Progress:  29%|████████████████▊                                         | 2895/10000 [45:22<20:15,  5.84it/s]

Step 2894/10000, Loss: 2.5628
Step 2895/10000, Loss: 2.5396


Training Progress:  29%|████████████████▊                                         | 2897/10000 [45:23<20:09,  5.87it/s]

Step 2896/10000, Loss: 2.5103
Step 2897/10000, Loss: 2.4146


Training Progress:  29%|████████████████▊                                         | 2899/10000 [45:23<20:20,  5.82it/s]

Step 2898/10000, Loss: 2.4743
Step 2899/10000, Loss: 2.4736


Training Progress:  29%|████████████████▊                                         | 2901/10000 [45:23<20:04,  5.89it/s]

Step 2900/10000, Loss: 2.3897
Step 2901/10000, Loss: 2.4173


Training Progress:  29%|████████████████▊                                         | 2903/10000 [45:24<20:23,  5.80it/s]

Step 2902/10000, Loss: 2.3387
Step 2903/10000, Loss: 2.3264


Training Progress:  29%|████████████████▊                                         | 2905/10000 [45:24<20:13,  5.85it/s]

Step 2904/10000, Loss: 2.5229
Step 2905/10000, Loss: 2.5211


Training Progress:  29%|████████████████▊                                         | 2907/10000 [45:24<20:14,  5.84it/s]

Step 2906/10000, Loss: 2.4600
Step 2907/10000, Loss: 2.3841


Training Progress:  29%|████████████████▊                                         | 2909/10000 [45:25<20:06,  5.88it/s]

Step 2908/10000, Loss: 2.4692
Step 2909/10000, Loss: 2.4488


Training Progress:  29%|████████████████▉                                         | 2911/10000 [45:25<19:51,  5.95it/s]

Step 2910/10000, Loss: 2.3353
Step 2911/10000, Loss: 2.3427


Training Progress:  29%|████████████████▉                                         | 2913/10000 [45:25<20:02,  5.89it/s]

Step 2912/10000, Loss: 2.4605
Step 2913/10000, Loss: 2.3984


Training Progress:  29%|████████████████▉                                         | 2915/10000 [45:26<19:54,  5.93it/s]

Step 2914/10000, Loss: 2.4013
Step 2915/10000, Loss: 2.5893


Training Progress:  29%|████████████████▉                                         | 2917/10000 [45:26<20:17,  5.82it/s]

Step 2916/10000, Loss: 2.4439
Step 2917/10000, Loss: 2.4506


Training Progress:  29%|████████████████▉                                         | 2919/10000 [45:26<20:24,  5.78it/s]

Step 2918/10000, Loss: 2.3088
Step 2919/10000, Loss: 2.4758


Training Progress:  29%|████████████████▉                                         | 2921/10000 [45:27<20:16,  5.82it/s]

Step 2920/10000, Loss: 2.3963
Step 2921/10000, Loss: 2.3841


Training Progress:  29%|████████████████▉                                         | 2923/10000 [45:27<20:01,  5.89it/s]

Step 2922/10000, Loss: 2.3639
Step 2923/10000, Loss: 2.4841


Training Progress:  29%|████████████████▉                                         | 2925/10000 [45:27<20:04,  5.87it/s]

Step 2924/10000, Loss: 2.5016
Step 2925/10000, Loss: 2.5054


Training Progress:  29%|████████████████▉                                         | 2927/10000 [45:28<20:15,  5.82it/s]

Step 2926/10000, Loss: 2.6642
Step 2927/10000, Loss: 2.6200


Training Progress:  29%|████████████████▉                                         | 2929/10000 [45:28<20:20,  5.79it/s]

Step 2928/10000, Loss: 2.5260
Step 2929/10000, Loss: 2.4681


Training Progress:  29%|████████████████▉                                         | 2931/10000 [45:28<20:12,  5.83it/s]

Step 2930/10000, Loss: 2.3879
Step 2931/10000, Loss: 2.5108


Training Progress:  29%|█████████████████                                         | 2933/10000 [45:29<20:10,  5.84it/s]

Step 2932/10000, Loss: 2.4866
Step 2933/10000, Loss: 2.3241


Training Progress:  29%|█████████████████                                         | 2935/10000 [45:29<19:58,  5.90it/s]

Step 2934/10000, Loss: 2.2374
Step 2935/10000, Loss: 2.3684


Training Progress:  29%|█████████████████                                         | 2937/10000 [45:29<19:56,  5.90it/s]

Step 2936/10000, Loss: 2.4017
Step 2937/10000, Loss: 2.3055


Training Progress:  29%|█████████████████                                         | 2939/10000 [45:30<20:06,  5.85it/s]

Step 2938/10000, Loss: 2.2492
Step 2939/10000, Loss: 2.2781


Training Progress:  29%|█████████████████                                         | 2941/10000 [45:30<20:17,  5.80it/s]

Step 2940/10000, Loss: 2.2211
Step 2941/10000, Loss: 2.2275


Training Progress:  29%|█████████████████                                         | 2943/10000 [45:30<20:09,  5.84it/s]

Step 2942/10000, Loss: 2.6183
Step 2943/10000, Loss: 2.4086


Training Progress:  29%|█████████████████                                         | 2945/10000 [45:31<20:15,  5.81it/s]

Step 2944/10000, Loss: 2.3452
Step 2945/10000, Loss: 2.3215


Training Progress:  29%|█████████████████                                         | 2947/10000 [45:31<20:00,  5.88it/s]

Step 2946/10000, Loss: 2.4012
Step 2947/10000, Loss: 2.3067


Training Progress:  29%|█████████████████                                         | 2948/10000 [45:31<20:17,  5.79it/s]

Step 2948/10000, Loss: 2.1847
Step 2949/10000, Loss: 2.1298


Training Progress:  29%|████████████████▌                                       | 2949/10000 [45:46<9:00:12,  4.60s/it]


Checkpoint saved: checkpoints\best\checkpoint_step2949_loss2.1298_20250117_133735.pt

New best loss: 2.1298


Training Progress:  30%|████████████████▌                                       | 2951/10000 [45:47<4:40:30,  2.39s/it]

Step 2950/10000, Loss: 2.1799
Step 2951/10000, Loss: 2.5211


Training Progress:  30%|████████████████▌                                       | 2953/10000 [45:47<2:28:07,  1.26s/it]

Step 2952/10000, Loss: 2.4161
Step 2953/10000, Loss: 2.5157


Training Progress:  30%|████████████████▌                                       | 2955/10000 [45:47<1:22:44,  1.42it/s]

Step 2954/10000, Loss: 2.3879
Step 2955/10000, Loss: 2.4476


Training Progress:  30%|█████████████████▏                                        | 2957/10000 [45:48<50:47,  2.31it/s]

Step 2956/10000, Loss: 2.4188
Step 2957/10000, Loss: 2.3806


Training Progress:  30%|█████████████████▏                                        | 2959/10000 [45:48<34:35,  3.39it/s]

Step 2958/10000, Loss: 2.2049
Step 2959/10000, Loss: 2.2988


Training Progress:  30%|█████████████████▏                                        | 2961/10000 [45:49<27:28,  4.27it/s]

Step 2960/10000, Loss: 2.2857
Step 2961/10000, Loss: 2.2688


Training Progress:  30%|█████████████████▏                                        | 2963/10000 [45:49<23:27,  5.00it/s]

Step 2962/10000, Loss: 2.3631
Step 2963/10000, Loss: 2.5009


Training Progress:  30%|█████████████████▏                                        | 2965/10000 [45:49<21:57,  5.34it/s]

Step 2964/10000, Loss: 2.4548
Step 2965/10000, Loss: 2.4173


Training Progress:  30%|█████████████████▏                                        | 2967/10000 [45:50<20:48,  5.63it/s]

Step 2966/10000, Loss: 2.4207
Step 2967/10000, Loss: 2.3388


Training Progress:  30%|█████████████████▏                                        | 2969/10000 [45:50<20:33,  5.70it/s]

Step 2968/10000, Loss: 2.5012
Step 2969/10000, Loss: 2.2489


Training Progress:  30%|█████████████████▏                                        | 2971/10000 [45:50<20:22,  5.75it/s]

Step 2970/10000, Loss: 2.2736
Step 2971/10000, Loss: 2.4771


Training Progress:  30%|█████████████████▏                                        | 2973/10000 [45:51<19:45,  5.93it/s]

Step 2972/10000, Loss: 2.3042
Step 2973/10000, Loss: 2.3622


Training Progress:  30%|█████████████████▎                                        | 2975/10000 [45:51<20:04,  5.83it/s]

Step 2974/10000, Loss: 2.2107
Step 2975/10000, Loss: 2.4848


Training Progress:  30%|█████████████████▎                                        | 2977/10000 [45:51<19:44,  5.93it/s]

Step 2976/10000, Loss: 2.5105
Step 2977/10000, Loss: 2.5105


Training Progress:  30%|█████████████████▎                                        | 2979/10000 [45:52<20:01,  5.84it/s]

Step 2978/10000, Loss: 2.4503
Step 2979/10000, Loss: 2.3873


Training Progress:  30%|█████████████████▎                                        | 2981/10000 [45:52<19:54,  5.88it/s]

Step 2980/10000, Loss: 2.4698
Step 2981/10000, Loss: 2.4502


Training Progress:  30%|█████████████████▎                                        | 2983/10000 [45:52<20:08,  5.81it/s]

Step 2982/10000, Loss: 2.3434
Step 2983/10000, Loss: 2.3797


Training Progress:  30%|█████████████████▎                                        | 2985/10000 [45:53<19:54,  5.87it/s]

Step 2984/10000, Loss: 2.2978
Step 2985/10000, Loss: 2.2984


Training Progress:  30%|█████████████████▎                                        | 2987/10000 [45:53<20:08,  5.80it/s]

Step 2986/10000, Loss: 2.4487
Step 2987/10000, Loss: 2.4358


Training Progress:  30%|█████████████████▎                                        | 2989/10000 [45:53<19:49,  5.89it/s]

Step 2988/10000, Loss: 2.4020
Step 2989/10000, Loss: 2.3428


Training Progress:  30%|█████████████████▎                                        | 2991/10000 [45:54<19:35,  5.96it/s]

Step 2990/10000, Loss: 2.3950
Step 2991/10000, Loss: 2.3837


Training Progress:  30%|█████████████████▎                                        | 2993/10000 [45:54<19:57,  5.85it/s]

Step 2992/10000, Loss: 2.2809
Step 2993/10000, Loss: 2.2730


Training Progress:  30%|█████████████████▎                                        | 2995/10000 [45:54<20:06,  5.81it/s]

Step 2994/10000, Loss: 2.3772
Step 2995/10000, Loss: 2.3170


Training Progress:  30%|█████████████████▍                                        | 2997/10000 [45:55<19:55,  5.86it/s]

Step 2996/10000, Loss: 2.3065
Step 2997/10000, Loss: 2.5118


Training Progress:  30%|█████████████████▍                                        | 2999/10000 [45:55<19:46,  5.90it/s]

Step 2998/10000, Loss: 2.3657
Step 2999/10000, Loss: 2.3501
Step 3000/10000, Loss: 2.2291


Training Progress:  30%|████████████████▊                                       | 3000/10000 [46:11<9:47:52,  5.04s/it]


Checkpoint saved: checkpoints\checkpoint_step3000_loss2.2291_20250117_133759.pt


Training Progress:  30%|████████████████▊                                       | 3002/10000 [46:12<5:04:57,  2.61s/it]

Step 3001/10000, Loss: 2.3907
Step 3002/10000, Loss: 2.2798


Training Progress:  30%|████████████████▊                                       | 3004/10000 [46:12<2:39:17,  1.37s/it]

Step 3003/10000, Loss: 2.2956
Step 3004/10000, Loss: 2.2915


Training Progress:  30%|████████████████▊                                       | 3006/10000 [46:13<1:28:32,  1.32it/s]

Step 3005/10000, Loss: 2.3842
Step 3006/10000, Loss: 2.3723


Training Progress:  30%|█████████████████▍                                        | 3008/10000 [46:13<53:29,  2.18it/s]

Step 3007/10000, Loss: 2.3949
Step 3008/10000, Loss: 2.5238


Training Progress:  30%|█████████████████▍                                        | 3010/10000 [46:13<36:29,  3.19it/s]

Step 3009/10000, Loss: 2.5384
Step 3010/10000, Loss: 2.4451


Training Progress:  30%|█████████████████▍                                        | 3012/10000 [46:14<27:30,  4.23it/s]

Step 3011/10000, Loss: 2.4215
Step 3012/10000, Loss: 2.3596


Training Progress:  30%|█████████████████▍                                        | 3014/10000 [46:14<24:04,  4.84it/s]

Step 3013/10000, Loss: 2.4285
Step 3014/10000, Loss: 2.4000


Training Progress:  30%|█████████████████▍                                        | 3015/10000 [46:14<22:47,  5.11it/s]

Step 3015/10000, Loss: 2.2407
Step 3016/10000, Loss: 2.1285


Training Progress:  30%|████████████████▌                                      | 3016/10000 [46:33<11:07:37,  5.74s/it]


Checkpoint saved: checkpoints\best\checkpoint_step3016_loss2.1285_20250117_133818.pt

New best loss: 2.1285


Training Progress:  30%|████████████████▉                                       | 3018/10000 [46:33<5:41:36,  2.94s/it]

Step 3017/10000, Loss: 2.2709
Step 3018/10000, Loss: 2.3264


Training Progress:  30%|████████████████▉                                       | 3020/10000 [46:34<2:57:14,  1.52s/it]

Step 3019/10000, Loss: 2.2561
Step 3020/10000, Loss: 2.2105


Training Progress:  30%|████████████████▉                                       | 3022/10000 [46:34<1:37:11,  1.20it/s]

Step 3021/10000, Loss: 2.2659
Step 3022/10000, Loss: 2.2058


Training Progress:  30%|█████████████████▌                                        | 3024/10000 [46:34<57:45,  2.01it/s]

Step 3023/10000, Loss: 2.1833
Step 3024/10000, Loss: 2.5362


Training Progress:  30%|█████████████████▌                                        | 3026/10000 [46:35<37:58,  3.06it/s]

Step 3025/10000, Loss: 2.3233
Step 3026/10000, Loss: 2.2449


Training Progress:  30%|█████████████████▌                                        | 3028/10000 [46:35<28:57,  4.01it/s]

Step 3027/10000, Loss: 2.2203
Step 3028/10000, Loss: 2.2629


Training Progress:  30%|█████████████████▌                                        | 3029/10000 [46:35<25:57,  4.48it/s]

Step 3029/10000, Loss: 2.1888
Step 3030/10000, Loss: 2.1246


Training Progress:  30%|████████████████▋                                      | 3030/10000 [46:55<11:42:52,  6.05s/it]


Checkpoint saved: checkpoints\best\checkpoint_step3030_loss2.1246_20250117_133839.pt

New best loss: 2.1246
Step 3031/10000, Loss: 2.0789


Training Progress:  30%|████████████████▋                                      | 3031/10000 [47:17<21:02:27, 10.87s/it]


Checkpoint saved: checkpoints\best\checkpoint_step3031_loss2.0789_20250117_133859.pt

New best loss: 2.0789


Training Progress:  30%|████████████████▋                                      | 3033/10000 [47:18<10:33:51,  5.46s/it]

Step 3032/10000, Loss: 2.1421
Step 3033/10000, Loss: 2.4558


Training Progress:  30%|████████████████▉                                       | 3035/10000 [47:18<5:20:21,  2.76s/it]

Step 3034/10000, Loss: 2.3550
Step 3035/10000, Loss: 2.4860


Training Progress:  30%|█████████████████                                       | 3037/10000 [47:18<2:47:08,  1.44s/it]

Step 3036/10000, Loss: 2.3452
Step 3037/10000, Loss: 2.3558


Training Progress:  30%|█████████████████                                       | 3039/10000 [47:19<1:31:38,  1.27it/s]

Step 3038/10000, Loss: 2.3790
Step 3039/10000, Loss: 2.3219


Training Progress:  30%|█████████████████▋                                        | 3041/10000 [47:19<54:53,  2.11it/s]

Step 3040/10000, Loss: 2.2127
Step 3041/10000, Loss: 2.2558


Training Progress:  30%|█████████████████▋                                        | 3043/10000 [47:19<37:01,  3.13it/s]

Step 3042/10000, Loss: 2.2222
Step 3043/10000, Loss: 2.2015


Training Progress:  30%|█████████████████▋                                        | 3045/10000 [47:20<28:03,  4.13it/s]

Step 3044/10000, Loss: 2.2847
Step 3045/10000, Loss: 2.4412


Training Progress:  30%|█████████████████▋                                        | 3047/10000 [47:20<24:08,  4.80it/s]

Step 3046/10000, Loss: 2.4173
Step 3047/10000, Loss: 2.3992


Training Progress:  30%|█████████████████▋                                        | 3049/10000 [47:20<21:21,  5.43it/s]

Step 3048/10000, Loss: 2.3822
Step 3049/10000, Loss: 2.3129


Training Progress:  31%|█████████████████▋                                        | 3051/10000 [47:21<20:41,  5.60it/s]

Step 3050/10000, Loss: 2.4522
Step 3051/10000, Loss: 2.2187


Training Progress:  31%|█████████████████▋                                        | 3053/10000 [47:21<20:13,  5.72it/s]

Step 3052/10000, Loss: 2.2589
Step 3053/10000, Loss: 2.4170


Training Progress:  31%|█████████████████▋                                        | 3055/10000 [47:21<20:01,  5.78it/s]

Step 3054/10000, Loss: 2.2427
Step 3055/10000, Loss: 2.3184


Training Progress:  31%|█████████████████▋                                        | 3057/10000 [47:22<19:52,  5.82it/s]

Step 3056/10000, Loss: 2.1673
Step 3057/10000, Loss: 2.4310


Training Progress:  31%|█████████████████▋                                        | 3059/10000 [47:22<20:00,  5.78it/s]

Step 3058/10000, Loss: 2.4389
Step 3059/10000, Loss: 2.4945


Training Progress:  31%|█████████████████▊                                        | 3061/10000 [47:22<19:30,  5.93it/s]

Step 3060/10000, Loss: 2.4502
Step 3061/10000, Loss: 2.3502


Training Progress:  31%|█████████████████▊                                        | 3063/10000 [47:23<19:42,  5.87it/s]

Step 3062/10000, Loss: 2.4123
Step 3063/10000, Loss: 2.3782


Training Progress:  31%|█████████████████▊                                        | 3065/10000 [47:23<19:44,  5.85it/s]

Step 3064/10000, Loss: 2.2759
Step 3065/10000, Loss: 2.3150


Training Progress:  31%|█████████████████▊                                        | 3067/10000 [47:23<19:43,  5.86it/s]

Step 3066/10000, Loss: 2.2496
Step 3067/10000, Loss: 2.2512


Training Progress:  31%|█████████████████▊                                        | 3069/10000 [47:24<19:41,  5.86it/s]

Step 3068/10000, Loss: 2.4040
Step 3069/10000, Loss: 2.4046


Training Progress:  31%|█████████████████▊                                        | 3071/10000 [47:24<19:32,  5.91it/s]

Step 3070/10000, Loss: 2.3397
Step 3071/10000, Loss: 2.2829


Training Progress:  31%|█████████████████▊                                        | 3073/10000 [47:24<19:30,  5.92it/s]

Step 3072/10000, Loss: 2.3523
Step 3073/10000, Loss: 2.3518


Training Progress:  31%|█████████████████▊                                        | 3075/10000 [47:25<19:48,  5.83it/s]

Step 3074/10000, Loss: 2.2355
Step 3075/10000, Loss: 2.2369


Training Progress:  31%|█████████████████▊                                        | 3077/10000 [47:25<19:50,  5.82it/s]

Step 3076/10000, Loss: 2.3467
Step 3077/10000, Loss: 2.2723


Training Progress:  31%|█████████████████▊                                        | 3079/10000 [47:25<19:45,  5.84it/s]

Step 3078/10000, Loss: 2.2627
Step 3079/10000, Loss: 2.4654


Training Progress:  31%|█████████████████▊                                        | 3081/10000 [47:26<19:17,  5.98it/s]

Step 3080/10000, Loss: 2.3228
Step 3081/10000, Loss: 2.3206


Training Progress:  31%|█████████████████▉                                        | 3083/10000 [47:26<19:45,  5.84it/s]

Step 3082/10000, Loss: 2.2128
Step 3083/10000, Loss: 2.3052


Training Progress:  31%|█████████████████▉                                        | 3085/10000 [47:26<19:53,  5.79it/s]

Step 3084/10000, Loss: 2.2118
Step 3085/10000, Loss: 2.2440


Training Progress:  31%|█████████████████▉                                        | 3087/10000 [47:27<19:50,  5.81it/s]

Step 3086/10000, Loss: 2.2194
Step 3087/10000, Loss: 2.2980


Training Progress:  31%|█████████████████▉                                        | 3089/10000 [47:27<19:14,  5.99it/s]

Step 3088/10000, Loss: 2.3117
Step 3089/10000, Loss: 2.3579


Training Progress:  31%|█████████████████▉                                        | 3091/10000 [47:27<19:49,  5.81it/s]

Step 3090/10000, Loss: 2.4271
Step 3091/10000, Loss: 2.4084


Training Progress:  31%|█████████████████▉                                        | 3093/10000 [47:28<19:31,  5.90it/s]

Step 3092/10000, Loss: 2.3553
Step 3093/10000, Loss: 2.3345


Training Progress:  31%|█████████████████▉                                        | 3095/10000 [47:28<19:45,  5.82it/s]

Step 3094/10000, Loss: 2.2387
Step 3095/10000, Loss: 2.3665


Training Progress:  31%|█████████████████▉                                        | 3097/10000 [47:28<19:35,  5.87it/s]

Step 3096/10000, Loss: 2.3690
Step 3097/10000, Loss: 2.2261
Step 3098/10000, Loss: 2.0728


Training Progress:  31%|█████████████████▎                                      | 3098/10000 [47:43<8:50:03,  4.61s/it]


Checkpoint saved: checkpoints\best\checkpoint_step3098_loss2.0728_20250117_133933.pt

New best loss: 2.0728


Training Progress:  31%|█████████████████▎                                      | 3100/10000 [47:44<4:33:59,  2.38s/it]

Step 3099/10000, Loss: 2.2096
Step 3100/10000, Loss: 2.2339


Training Progress:  31%|█████████████████▎                                      | 3102/10000 [47:44<2:24:21,  1.26s/it]

Step 3101/10000, Loss: 2.1545
Step 3102/10000, Loss: 2.0997


Training Progress:  31%|█████████████████▍                                      | 3104/10000 [47:45<1:20:53,  1.42it/s]

Step 3103/10000, Loss: 2.1398
Step 3104/10000, Loss: 2.1057


Training Progress:  31%|██████████████████                                        | 3106/10000 [47:45<49:15,  2.33it/s]

Step 3105/10000, Loss: 2.1045
Step 3106/10000, Loss: 2.4658


Training Progress:  31%|██████████████████                                        | 3108/10000 [47:45<34:04,  3.37it/s]

Step 3107/10000, Loss: 2.2419
Step 3108/10000, Loss: 2.1809


Training Progress:  31%|██████████████████                                        | 3110/10000 [47:46<26:45,  4.29it/s]

Step 3109/10000, Loss: 2.1656
Step 3110/10000, Loss: 2.1701


Training Progress:  31%|██████████████████                                        | 3111/10000 [47:46<24:42,  4.65it/s]

Step 3111/10000, Loss: 2.1383
Step 3112/10000, Loss: 2.0242


Training Progress:  31%|█████████████████                                      | 3112/10000 [48:03<10:15:52,  5.36s/it]


Checkpoint saved: checkpoints\best\checkpoint_step3112_loss2.0242_20250117_133950.pt

New best loss: 2.0242
Step 3113/10000, Loss: 1.9749


Training Progress:  31%|█████████████████                                      | 3113/10000 [48:25<19:39:19, 10.27s/it]


Checkpoint saved: checkpoints\best\checkpoint_step3113_loss1.9749_20250117_134007.pt

New best loss: 1.9749


Training Progress:  31%|█████████████████▍                                      | 3115/10000 [48:26<9:53:59,  5.18s/it]

Step 3114/10000, Loss: 2.0431
Step 3115/10000, Loss: 2.3577


Training Progress:  31%|█████████████████▍                                      | 3117/10000 [48:26<5:01:01,  2.62s/it]

Step 3116/10000, Loss: 2.2637
Step 3117/10000, Loss: 2.3527


Training Progress:  31%|█████████████████▍                                      | 3118/10000 [48:26<3:36:10,  1.88s/it]

Step 3118/10000, Loss: 2.2378


Training Progress:  31%|█████████████████▍                                      | 3120/10000 [48:26<1:57:20,  1.02s/it]

Step 3119/10000, Loss: 2.2745
Step 3120/10000, Loss: 2.3094


Training Progress:  31%|█████████████████▍                                      | 3122/10000 [48:27<1:06:56,  1.71it/s]

Step 3121/10000, Loss: 2.2008
Step 3122/10000, Loss: 2.0685


Training Progress:  31%|██████████████████                                        | 3124/10000 [48:27<42:46,  2.68it/s]

Step 3123/10000, Loss: 2.1115
Step 3124/10000, Loss: 2.1361


Training Progress:  31%|██████████████████▏                                       | 3126/10000 [48:27<30:49,  3.72it/s]

Step 3125/10000, Loss: 2.1434
Step 3126/10000, Loss: 2.2088


Training Progress:  31%|██████████████████▏                                       | 3128/10000 [48:28<24:51,  4.61it/s]

Step 3127/10000, Loss: 2.3283
Step 3128/10000, Loss: 2.2969


Training Progress:  31%|██████████████████▏                                       | 3130/10000 [48:28<22:30,  5.09it/s]

Step 3129/10000, Loss: 2.2674
Step 3130/10000, Loss: 2.2254


Training Progress:  31%|██████████████████▏                                       | 3132/10000 [48:28<20:32,  5.57it/s]

Step 3131/10000, Loss: 2.1870
Step 3132/10000, Loss: 2.3728


Training Progress:  31%|██████████████████▏                                       | 3134/10000 [48:29<19:52,  5.76it/s]

Step 3133/10000, Loss: 2.1323
Step 3134/10000, Loss: 2.1877


Training Progress:  31%|██████████████████▏                                       | 3136/10000 [48:29<19:35,  5.84it/s]

Step 3135/10000, Loss: 2.3524
Step 3136/10000, Loss: 2.1574


Training Progress:  31%|██████████████████▏                                       | 3138/10000 [48:30<19:22,  5.90it/s]

Step 3137/10000, Loss: 2.2429
Step 3138/10000, Loss: 2.0962


Training Progress:  31%|██████████████████▏                                       | 3140/10000 [48:30<19:48,  5.77it/s]

Step 3139/10000, Loss: 2.3506
Step 3140/10000, Loss: 2.3544


Training Progress:  31%|██████████████████▏                                       | 3142/10000 [48:30<19:42,  5.80it/s]

Step 3141/10000, Loss: 2.3789
Step 3142/10000, Loss: 2.3156


Training Progress:  31%|██████████████████▏                                       | 3144/10000 [48:31<19:35,  5.83it/s]

Step 3143/10000, Loss: 2.2493
Step 3144/10000, Loss: 2.3227


Training Progress:  31%|██████████████████▏                                       | 3146/10000 [48:31<19:28,  5.86it/s]

Step 3145/10000, Loss: 2.3326
Step 3146/10000, Loss: 2.2501


Training Progress:  31%|██████████████████▎                                       | 3148/10000 [48:31<19:43,  5.79it/s]

Step 3147/10000, Loss: 2.2690
Step 3148/10000, Loss: 2.1462


Training Progress:  32%|██████████████████▎                                       | 3150/10000 [48:32<19:32,  5.84it/s]

Step 3149/10000, Loss: 2.1637
Step 3150/10000, Loss: 2.3185


Training Progress:  32%|██████████████████▎                                       | 3152/10000 [48:32<19:18,  5.91it/s]

Step 3151/10000, Loss: 2.3322
Step 3152/10000, Loss: 2.2788


Training Progress:  32%|██████████████████▎                                       | 3154/10000 [48:32<19:34,  5.83it/s]

Step 3153/10000, Loss: 2.2110
Step 3154/10000, Loss: 2.2942


Training Progress:  32%|██████████████████▎                                       | 3156/10000 [48:33<19:33,  5.83it/s]

Step 3155/10000, Loss: 2.2944
Step 3156/10000, Loss: 2.1892


Training Progress:  32%|██████████████████▎                                       | 3158/10000 [48:33<19:24,  5.88it/s]

Step 3157/10000, Loss: 2.2119
Step 3158/10000, Loss: 2.3302


Training Progress:  32%|██████████████████▎                                       | 3160/10000 [48:33<19:04,  5.98it/s]

Step 3159/10000, Loss: 2.2495
Step 3160/10000, Loss: 2.2431


Training Progress:  32%|██████████████████▎                                       | 3162/10000 [48:34<19:27,  5.86it/s]

Step 3161/10000, Loss: 2.4225
Step 3162/10000, Loss: 2.2760


Training Progress:  32%|██████████████████▎                                       | 3164/10000 [48:34<19:37,  5.80it/s]

Step 3163/10000, Loss: 2.2642
Step 3164/10000, Loss: 2.1302


Training Progress:  32%|██████████████████▎                                       | 3166/10000 [48:34<19:34,  5.82it/s]

Step 3165/10000, Loss: 2.2467
Step 3166/10000, Loss: 2.2035


Training Progress:  32%|██████████████████▎                                       | 3168/10000 [48:35<19:31,  5.83it/s]

Step 3167/10000, Loss: 2.1939
Step 3168/10000, Loss: 2.1433


Training Progress:  32%|██████████████████▍                                       | 3170/10000 [48:35<19:07,  5.95it/s]

Step 3169/10000, Loss: 2.2566
Step 3170/10000, Loss: 2.2752


Training Progress:  32%|██████████████████▍                                       | 3172/10000 [48:35<19:26,  5.86it/s]

Step 3171/10000, Loss: 2.2630
Step 3172/10000, Loss: 2.3767


Training Progress:  32%|██████████████████▍                                       | 3174/10000 [48:36<19:03,  5.97it/s]

Step 3173/10000, Loss: 2.3187
Step 3174/10000, Loss: 2.2565


Training Progress:  32%|██████████████████▍                                       | 3176/10000 [48:36<19:30,  5.83it/s]

Step 3175/10000, Loss: 2.2356
Step 3176/10000, Loss: 2.1517


Training Progress:  32%|██████████████████▍                                       | 3178/10000 [48:36<19:32,  5.82it/s]

Step 3177/10000, Loss: 2.2644
Step 3178/10000, Loss: 2.2868


Training Progress:  32%|██████████████████▍                                       | 3180/10000 [48:37<19:16,  5.90it/s]

Step 3179/10000, Loss: 2.1468
Step 3180/10000, Loss: 2.0191


Training Progress:  32%|██████████████████▍                                       | 3182/10000 [48:37<19:30,  5.82it/s]

Step 3181/10000, Loss: 2.1620
Step 3182/10000, Loss: 2.1774


Training Progress:  32%|██████████████████▍                                       | 3184/10000 [48:37<19:05,  5.95it/s]

Step 3183/10000, Loss: 2.1063
Step 3184/10000, Loss: 2.0795


Training Progress:  32%|██████████████████▍                                       | 3186/10000 [48:38<19:25,  5.85it/s]

Step 3185/10000, Loss: 2.0913
Step 3186/10000, Loss: 2.0291


Training Progress:  32%|██████████████████▍                                       | 3188/10000 [48:38<19:34,  5.80it/s]

Step 3187/10000, Loss: 2.0375
Step 3188/10000, Loss: 2.3968


Training Progress:  32%|██████████████████▌                                       | 3190/10000 [48:38<19:15,  5.89it/s]

Step 3189/10000, Loss: 2.1518
Step 3190/10000, Loss: 2.1236


Training Progress:  32%|██████████████████▌                                       | 3192/10000 [48:39<19:30,  5.82it/s]

Step 3191/10000, Loss: 2.0845
Step 3192/10000, Loss: 2.1418


Training Progress:  32%|██████████████████▌                                       | 3194/10000 [48:39<19:05,  5.94it/s]

Step 3193/10000, Loss: 2.1120
Step 3194/10000, Loss: 2.0024
Step 3195/10000, Loss: 1.9419


Training Progress:  32%|█████████████████▉                                      | 3195/10000 [48:51<7:11:35,  3.81s/it]


Checkpoint saved: checkpoints\best\checkpoint_step3195_loss1.9419_20250117_134043.pt

New best loss: 1.9419


Training Progress:  32%|█████████████████▉                                      | 3197/10000 [48:52<3:47:10,  2.00s/it]

Step 3196/10000, Loss: 2.0538
Step 3197/10000, Loss: 2.3260


Training Progress:  32%|█████████████████▉                                      | 3199/10000 [48:52<2:01:20,  1.07s/it]

Step 3198/10000, Loss: 2.2191
Step 3199/10000, Loss: 2.3483


Training Progress:  32%|█████████████████▉                                      | 3201/10000 [48:53<1:09:08,  1.64it/s]

Step 3200/10000, Loss: 2.2037
Step 3201/10000, Loss: 2.2431


Training Progress:  32%|██████████████████▌                                       | 3203/10000 [48:53<43:59,  2.58it/s]

Step 3202/10000, Loss: 2.2486
Step 3203/10000, Loss: 2.1758


Training Progress:  32%|██████████████████▌                                       | 3205/10000 [48:53<31:37,  3.58it/s]

Step 3204/10000, Loss: 2.0578
Step 3205/10000, Loss: 2.1076


Training Progress:  32%|██████████████████▌                                       | 3207/10000 [48:54<25:20,  4.47it/s]

Step 3206/10000, Loss: 2.1189
Step 3207/10000, Loss: 2.1086


Training Progress:  32%|██████████████████▌                                       | 3209/10000 [48:54<22:05,  5.12it/s]

Step 3208/10000, Loss: 2.1677
Step 3209/10000, Loss: 2.3185


Training Progress:  32%|██████████████████▌                                       | 3211/10000 [48:54<20:53,  5.41it/s]

Step 3210/10000, Loss: 2.2373
Step 3211/10000, Loss: 2.2246


Training Progress:  32%|██████████████████▋                                       | 3213/10000 [48:55<19:46,  5.72it/s]

Step 3212/10000, Loss: 2.2327
Step 3213/10000, Loss: 2.1332


Training Progress:  32%|██████████████████▋                                       | 3215/10000 [48:55<19:45,  5.72it/s]

Step 3214/10000, Loss: 2.3016
Step 3215/10000, Loss: 2.0763


Training Progress:  32%|██████████████████▋                                       | 3217/10000 [48:55<19:11,  5.89it/s]

Step 3216/10000, Loss: 2.0924
Step 3217/10000, Loss: 2.2578


Training Progress:  32%|██████████████████▋                                       | 3219/10000 [48:56<19:25,  5.82it/s]

Step 3218/10000, Loss: 2.1285
Step 3219/10000, Loss: 2.2284


Training Progress:  32%|██████████████████▋                                       | 3221/10000 [48:56<19:04,  5.92it/s]

Step 3220/10000, Loss: 2.0875
Step 3221/10000, Loss: 2.3365


Training Progress:  32%|██████████████████▋                                       | 3223/10000 [48:56<19:23,  5.82it/s]

Step 3222/10000, Loss: 2.3276
Step 3223/10000, Loss: 2.3563


Training Progress:  32%|██████████████████▋                                       | 3225/10000 [48:57<19:04,  5.92it/s]

Step 3224/10000, Loss: 2.2977
Step 3225/10000, Loss: 2.2236


Training Progress:  32%|██████████████████▋                                       | 3227/10000 [48:57<19:23,  5.82it/s]

Step 3226/10000, Loss: 2.2556
Step 3227/10000, Loss: 2.2355


Training Progress:  32%|██████████████████▋                                       | 3229/10000 [48:57<19:09,  5.89it/s]

Step 3228/10000, Loss: 2.1743
Step 3229/10000, Loss: 2.2122


Training Progress:  32%|██████████████████▋                                       | 3231/10000 [48:58<19:26,  5.80it/s]

Step 3230/10000, Loss: 2.1041
Step 3231/10000, Loss: 2.1511


Training Progress:  32%|██████████████████▊                                       | 3233/10000 [48:58<19:00,  5.94it/s]

Step 3232/10000, Loss: 2.2699
Step 3233/10000, Loss: 2.2535


Training Progress:  32%|██████████████████▊                                       | 3235/10000 [48:58<19:21,  5.82it/s]

Step 3234/10000, Loss: 2.2053
Step 3235/10000, Loss: 2.1220


Training Progress:  32%|██████████████████▊                                       | 3237/10000 [48:59<19:00,  5.93it/s]

Step 3236/10000, Loss: 2.2303
Step 3237/10000, Loss: 2.2418


Training Progress:  32%|██████████████████▊                                       | 3239/10000 [48:59<19:31,  5.77it/s]

Step 3238/10000, Loss: 2.1612
Step 3239/10000, Loss: 2.1339


Training Progress:  32%|██████████████████▊                                       | 3241/10000 [48:59<18:56,  5.95it/s]

Step 3240/10000, Loss: 2.2732
Step 3241/10000, Loss: 2.2211


Training Progress:  32%|██████████████████▊                                       | 3243/10000 [49:00<19:18,  5.83it/s]

Step 3242/10000, Loss: 2.2004
Step 3243/10000, Loss: 2.4118


Training Progress:  32%|██████████████████▊                                       | 3245/10000 [49:00<18:57,  5.94it/s]

Step 3244/10000, Loss: 2.2208
Step 3245/10000, Loss: 2.2324


Training Progress:  32%|██████████████████▊                                       | 3247/10000 [49:00<19:18,  5.83it/s]

Step 3246/10000, Loss: 2.0747
Step 3247/10000, Loss: 2.2222


Training Progress:  32%|██████████████████▊                                       | 3249/10000 [49:01<19:13,  5.85it/s]

Step 3248/10000, Loss: 2.1747
Step 3249/10000, Loss: 2.1488


Training Progress:  33%|██████████████████▊                                       | 3251/10000 [49:01<19:15,  5.84it/s]

Step 3250/10000, Loss: 2.1076
Step 3251/10000, Loss: 2.2259


Training Progress:  33%|██████████████████▊                                       | 3253/10000 [49:02<18:55,  5.94it/s]

Step 3252/10000, Loss: 2.2298
Step 3253/10000, Loss: 2.2368


Training Progress:  33%|██████████████████▉                                       | 3255/10000 [49:02<19:14,  5.84it/s]

Step 3254/10000, Loss: 2.3302
Step 3255/10000, Loss: 2.3109


Training Progress:  33%|██████████████████▉                                       | 3257/10000 [49:02<19:26,  5.78it/s]

Step 3256/10000, Loss: 2.2165
Step 3257/10000, Loss: 2.1840


Training Progress:  33%|██████████████████▉                                       | 3259/10000 [49:03<19:08,  5.87it/s]

Step 3258/10000, Loss: 2.1012
Step 3259/10000, Loss: 2.2377


Training Progress:  33%|██████████████████▉                                       | 3261/10000 [49:03<19:22,  5.80it/s]

Step 3260/10000, Loss: 2.2367
Step 3261/10000, Loss: 2.0851


Training Progress:  33%|██████████████████▉                                       | 3263/10000 [49:03<19:06,  5.88it/s]

Step 3262/10000, Loss: 1.9758
Step 3263/10000, Loss: 2.1152


Training Progress:  33%|██████████████████▉                                       | 3265/10000 [49:04<19:20,  5.80it/s]

Step 3264/10000, Loss: 2.1647
Step 3265/10000, Loss: 2.0742


Training Progress:  33%|██████████████████▉                                       | 3267/10000 [49:04<19:02,  5.89it/s]

Step 3266/10000, Loss: 2.0610
Step 3267/10000, Loss: 2.0979


Training Progress:  33%|██████████████████▉                                       | 3269/10000 [49:04<19:18,  5.81it/s]

Step 3268/10000, Loss: 2.0001
Step 3269/10000, Loss: 1.9946


Training Progress:  33%|██████████████████▉                                       | 3271/10000 [49:05<18:54,  5.93it/s]

Step 3270/10000, Loss: 2.3310
Step 3271/10000, Loss: 2.1342


Training Progress:  33%|██████████████████▉                                       | 3273/10000 [49:05<19:15,  5.82it/s]

Step 3272/10000, Loss: 2.0731
Step 3273/10000, Loss: 2.0282


Training Progress:  33%|██████████████████▉                                       | 3275/10000 [49:05<18:53,  5.94it/s]

Step 3274/10000, Loss: 2.0805
Step 3275/10000, Loss: 2.0501


Training Progress:  33%|███████████████████                                       | 3276/10000 [49:05<19:03,  5.88it/s]

Step 3276/10000, Loss: 1.9478
Step 3277/10000, Loss: 1.9001


Training Progress:  33%|██████████████████▎                                     | 3277/10000 [49:21<9:06:33,  4.88s/it]


Checkpoint saved: checkpoints\best\checkpoint_step3277_loss1.9001_20250117_134109.pt

New best loss: 1.9001


Training Progress:  33%|██████████████████▎                                     | 3279/10000 [49:22<4:44:35,  2.54s/it]

Step 3278/10000, Loss: 2.0163
Step 3279/10000, Loss: 2.2745


Training Progress:  33%|██████████████████▎                                     | 3281/10000 [49:22<2:29:04,  1.33s/it]

Step 3280/10000, Loss: 2.2029
Step 3281/10000, Loss: 2.2891


Training Progress:  33%|██████████████████▍                                     | 3283/10000 [49:23<1:22:30,  1.36it/s]

Step 3282/10000, Loss: 2.1599
Step 3283/10000, Loss: 2.1861


Training Progress:  33%|███████████████████                                       | 3285/10000 [49:23<50:26,  2.22it/s]

Step 3284/10000, Loss: 2.1974
Step 3285/10000, Loss: 2.1398


Training Progress:  33%|███████████████████                                       | 3287/10000 [49:23<34:17,  3.26it/s]

Step 3286/10000, Loss: 2.0316
Step 3287/10000, Loss: 2.0708


Training Progress:  33%|███████████████████                                       | 3289/10000 [49:24<26:47,  4.18it/s]

Step 3288/10000, Loss: 2.0437
Step 3289/10000, Loss: 2.0498


Training Progress:  33%|███████████████████                                       | 3291/10000 [49:24<22:46,  4.91it/s]

Step 3290/10000, Loss: 2.1172
Step 3291/10000, Loss: 2.2788


Training Progress:  33%|███████████████████                                       | 3293/10000 [49:24<20:36,  5.43it/s]

Step 3292/10000, Loss: 2.2434
Step 3293/10000, Loss: 2.2444


Training Progress:  33%|███████████████████                                       | 3295/10000 [49:25<20:07,  5.55it/s]

Step 3294/10000, Loss: 2.2364
Step 3295/10000, Loss: 2.1773


Training Progress:  33%|███████████████████                                       | 3297/10000 [49:25<19:20,  5.77it/s]

Step 3296/10000, Loss: 2.3179
Step 3297/10000, Loss: 2.0922


Training Progress:  33%|███████████████████▏                                      | 3299/10000 [49:25<19:28,  5.74it/s]

Step 3298/10000, Loss: 2.1393
Step 3299/10000, Loss: 2.2463


Training Progress:  33%|███████████████████▏                                      | 3301/10000 [49:26<19:09,  5.83it/s]

Step 3300/10000, Loss: 2.1240
Step 3301/10000, Loss: 2.1786


Training Progress:  33%|███████████████████▏                                      | 3303/10000 [49:26<19:22,  5.76it/s]

Step 3302/10000, Loss: 2.0484
Step 3303/10000, Loss: 2.3122


Training Progress:  33%|███████████████████▏                                      | 3305/10000 [49:26<19:05,  5.84it/s]

Step 3304/10000, Loss: 2.3198
Step 3305/10000, Loss: 2.3152


Training Progress:  33%|███████████████████▏                                      | 3307/10000 [49:27<19:13,  5.80it/s]

Step 3306/10000, Loss: 2.2775
Step 3307/10000, Loss: 2.2261


Training Progress:  33%|███████████████████▏                                      | 3309/10000 [49:27<18:55,  5.89it/s]

Step 3308/10000, Loss: 2.2693
Step 3309/10000, Loss: 2.2592


Training Progress:  33%|███████████████████▏                                      | 3311/10000 [49:27<19:07,  5.83it/s]

Step 3310/10000, Loss: 2.1581
Step 3311/10000, Loss: 2.1847


Training Progress:  33%|███████████████████▏                                      | 3313/10000 [49:28<19:15,  5.79it/s]

Step 3312/10000, Loss: 2.0649
Step 3313/10000, Loss: 2.1210


Training Progress:  33%|███████████████████▏                                      | 3315/10000 [49:28<18:53,  5.90it/s]

Step 3314/10000, Loss: 2.2735
Step 3315/10000, Loss: 2.2497


Training Progress:  33%|███████████████████▏                                      | 3317/10000 [49:28<19:06,  5.83it/s]

Step 3316/10000, Loss: 2.2016
Step 3317/10000, Loss: 2.0868


Training Progress:  33%|███████████████████▎                                      | 3319/10000 [49:29<19:14,  5.79it/s]

Step 3318/10000, Loss: 2.1651
Step 3319/10000, Loss: 2.1775


Training Progress:  33%|███████████████████▎                                      | 3321/10000 [49:29<18:55,  5.88it/s]

Step 3320/10000, Loss: 2.0859
Step 3321/10000, Loss: 2.0460


Training Progress:  33%|███████████████████▎                                      | 3323/10000 [49:29<19:08,  5.81it/s]

Step 3322/10000, Loss: 2.1881
Step 3323/10000, Loss: 2.1865


Training Progress:  33%|███████████████████▎                                      | 3325/10000 [49:30<18:41,  5.95it/s]

Step 3324/10000, Loss: 2.1612
Step 3325/10000, Loss: 2.3569


Training Progress:  33%|███████████████████▎                                      | 3327/10000 [49:30<19:03,  5.84it/s]

Step 3326/10000, Loss: 2.1936
Step 3327/10000, Loss: 2.2011


Training Progress:  33%|███████████████████▎                                      | 3329/10000 [49:30<19:11,  5.79it/s]

Step 3328/10000, Loss: 2.0740
Step 3329/10000, Loss: 2.1810


Training Progress:  33%|███████████████████▎                                      | 3331/10000 [49:31<18:46,  5.92it/s]

Step 3330/10000, Loss: 2.1110
Step 3331/10000, Loss: 2.1061


Training Progress:  33%|███████████████████▎                                      | 3333/10000 [49:31<19:03,  5.83it/s]

Step 3332/10000, Loss: 2.0427
Step 3333/10000, Loss: 2.1340


Training Progress:  33%|███████████████████▎                                      | 3335/10000 [49:32<18:39,  5.95it/s]

Step 3334/10000, Loss: 2.1686
Step 3335/10000, Loss: 2.1890


Training Progress:  33%|███████████████████▎                                      | 3337/10000 [49:32<18:58,  5.85it/s]

Step 3336/10000, Loss: 2.3130
Step 3337/10000, Loss: 2.2670


Training Progress:  33%|███████████████████▎                                      | 3339/10000 [49:32<19:13,  5.77it/s]

Step 3338/10000, Loss: 2.1610
Step 3339/10000, Loss: 2.1503


Training Progress:  33%|███████████████████▍                                      | 3341/10000 [49:33<18:41,  5.94it/s]

Step 3340/10000, Loss: 2.0872
Step 3341/10000, Loss: 2.1751


Training Progress:  33%|███████████████████▍                                      | 3343/10000 [49:33<19:00,  5.84it/s]

Step 3342/10000, Loss: 2.1488
Step 3343/10000, Loss: 2.0238


Training Progress:  33%|███████████████████▍                                      | 3345/10000 [49:33<19:08,  5.79it/s]

Step 3344/10000, Loss: 1.9385
Step 3345/10000, Loss: 2.0598


Training Progress:  33%|███████████████████▍                                      | 3347/10000 [49:34<19:05,  5.81it/s]

Step 3346/10000, Loss: 2.0813
Step 3347/10000, Loss: 1.9779


Training Progress:  33%|███████████████████▍                                      | 3349/10000 [49:34<19:00,  5.83it/s]

Step 3348/10000, Loss: 2.0041
Step 3349/10000, Loss: 2.0470


Training Progress:  34%|███████████████████▍                                      | 3351/10000 [49:34<19:09,  5.78it/s]

Step 3350/10000, Loss: 1.9970
Step 3351/10000, Loss: 1.9575


Training Progress:  34%|███████████████████▍                                      | 3353/10000 [49:35<18:50,  5.88it/s]

Step 3352/10000, Loss: 2.3414
Step 3353/10000, Loss: 2.1589


Training Progress:  34%|███████████████████▍                                      | 3355/10000 [49:35<18:59,  5.83it/s]

Step 3354/10000, Loss: 2.0751
Step 3355/10000, Loss: 2.0044


Training Progress:  34%|███████████████████▍                                      | 3357/10000 [49:35<18:37,  5.95it/s]

Step 3356/10000, Loss: 2.0188
Step 3357/10000, Loss: 1.9610
Step 3358/10000, Loss: 1.8677


Training Progress:  34%|██████████████████▊                                     | 3358/10000 [49:51<8:44:13,  4.74s/it]


Checkpoint saved: checkpoints\best\checkpoint_step3358_loss1.8677_20250117_134139.pt

New best loss: 1.8677
Step 3359/10000, Loss: 1.8309


Training Progress:  34%|██████████████████▍                                    | 3359/10000 [50:11<17:34:32,  9.53s/it]


Checkpoint saved: checkpoints\best\checkpoint_step3359_loss1.8309_20250117_134155.pt

New best loss: 1.8309


Training Progress:  34%|██████████████████▊                                     | 3361/10000 [50:12<8:51:12,  4.80s/it]

Step 3360/10000, Loss: 1.9394
Step 3361/10000, Loss: 2.1916


Training Progress:  34%|██████████████████▊                                     | 3363/10000 [50:12<4:30:01,  2.44s/it]

Step 3362/10000, Loss: 2.1879
Step 3363/10000, Loss: 2.2500


Training Progress:  34%|██████████████████▊                                     | 3365/10000 [50:13<2:21:43,  1.28s/it]

Step 3364/10000, Loss: 2.1037
Step 3365/10000, Loss: 2.1411


Training Progress:  34%|██████████████████▊                                     | 3367/10000 [50:13<1:19:14,  1.40it/s]

Step 3366/10000, Loss: 2.1335
Step 3367/10000, Loss: 2.0950


Training Progress:  34%|███████████████████▌                                      | 3369/10000 [50:13<48:14,  2.29it/s]

Step 3368/10000, Loss: 1.9621
Step 3369/10000, Loss: 1.9957


Training Progress:  34%|███████████████████▌                                      | 3371/10000 [50:14<33:26,  3.30it/s]

Step 3370/10000, Loss: 1.9669
Step 3371/10000, Loss: 1.9215


Training Progress:  34%|███████████████████▌                                      | 3373/10000 [50:14<25:45,  4.29it/s]

Step 3372/10000, Loss: 2.0382
Step 3373/10000, Loss: 2.1711


Training Progress:  34%|███████████████████▌                                      | 3375/10000 [50:14<22:26,  4.92it/s]

Step 3374/10000, Loss: 2.1859
Step 3375/10000, Loss: 2.1552


Training Progress:  34%|███████████████████▌                                      | 3377/10000 [50:15<20:15,  5.45it/s]

Step 3376/10000, Loss: 2.1294
Step 3377/10000, Loss: 2.0392


Training Progress:  34%|███████████████████▌                                      | 3379/10000 [50:15<19:50,  5.56it/s]

Step 3378/10000, Loss: 2.2056
Step 3379/10000, Loss: 2.0159


Training Progress:  34%|███████████████████▌                                      | 3381/10000 [50:15<18:53,  5.84it/s]

Step 3380/10000, Loss: 2.0577
Step 3381/10000, Loss: 2.1752


Training Progress:  34%|███████████████████▌                                      | 3383/10000 [50:16<19:01,  5.80it/s]

Step 3382/10000, Loss: 2.0732
Step 3383/10000, Loss: 2.1086


Training Progress:  34%|███████████████████▋                                      | 3385/10000 [50:16<19:05,  5.77it/s]

Step 3384/10000, Loss: 1.9910
Step 3385/10000, Loss: 2.2197


Training Progress:  34%|███████████████████▋                                      | 3387/10000 [50:16<18:45,  5.87it/s]

Step 3386/10000, Loss: 2.1814
Step 3387/10000, Loss: 2.2252


Training Progress:  34%|███████████████████▋                                      | 3389/10000 [50:17<18:58,  5.81it/s]

Step 3388/10000, Loss: 2.1958
Step 3389/10000, Loss: 2.1385


Training Progress:  34%|███████████████████▋                                      | 3391/10000 [50:17<18:33,  5.94it/s]

Step 3390/10000, Loss: 2.1735
Step 3391/10000, Loss: 2.1576


Training Progress:  34%|███████████████████▋                                      | 3393/10000 [50:17<18:50,  5.84it/s]

Step 3392/10000, Loss: 2.0948
Step 3393/10000, Loss: 2.1715


Training Progress:  34%|███████████████████▋                                      | 3395/10000 [50:18<18:28,  5.96it/s]

Step 3394/10000, Loss: 2.0686
Step 3395/10000, Loss: 2.0678


Training Progress:  34%|███████████████████▋                                      | 3397/10000 [50:18<18:50,  5.84it/s]

Step 3396/10000, Loss: 2.1968
Step 3397/10000, Loss: 2.2079


Training Progress:  34%|███████████████████▋                                      | 3399/10000 [50:18<19:00,  5.79it/s]

Step 3398/10000, Loss: 2.1608
Step 3399/10000, Loss: 2.0625


Training Progress:  34%|███████████████████▋                                      | 3401/10000 [50:19<18:44,  5.87it/s]

Step 3400/10000, Loss: 2.1009
Step 3401/10000, Loss: 2.1226


Training Progress:  34%|███████████████████▋                                      | 3403/10000 [50:19<18:56,  5.81it/s]

Step 3402/10000, Loss: 2.0292
Step 3403/10000, Loss: 2.0034


Training Progress:  34%|███████████████████▋                                      | 3405/10000 [50:19<18:41,  5.88it/s]

Step 3404/10000, Loss: 2.1545
Step 3405/10000, Loss: 2.0881


Training Progress:  34%|███████████████████▊                                      | 3407/10000 [50:20<18:52,  5.82it/s]

Step 3406/10000, Loss: 2.0520
Step 3407/10000, Loss: 2.2376


Training Progress:  34%|███████████████████▊                                      | 3409/10000 [50:20<18:29,  5.94it/s]

Step 3408/10000, Loss: 2.1141
Step 3409/10000, Loss: 2.1028


Training Progress:  34%|███████████████████▊                                      | 3411/10000 [50:20<18:48,  5.84it/s]

Step 3410/10000, Loss: 2.0109
Step 3411/10000, Loss: 2.1580


Training Progress:  34%|███████████████████▊                                      | 3413/10000 [50:21<18:25,  5.96it/s]

Step 3412/10000, Loss: 2.0683
Step 3413/10000, Loss: 2.0774


Training Progress:  34%|███████████████████▊                                      | 3415/10000 [50:21<18:47,  5.84it/s]

Step 3414/10000, Loss: 2.0253
Step 3415/10000, Loss: 2.1047


Training Progress:  34%|███████████████████▊                                      | 3417/10000 [50:21<18:26,  5.95it/s]

Step 3416/10000, Loss: 2.1229
Step 3417/10000, Loss: 2.1068


Training Progress:  34%|███████████████████▊                                      | 3419/10000 [50:22<18:45,  5.85it/s]

Step 3418/10000, Loss: 2.2207
Step 3419/10000, Loss: 2.2012


Training Progress:  34%|███████████████████▊                                      | 3421/10000 [50:22<18:54,  5.80it/s]

Step 3420/10000, Loss: 2.1047
Step 3421/10000, Loss: 2.0928


Training Progress:  34%|███████████████████▊                                      | 3423/10000 [50:23<18:58,  5.78it/s]

Step 3422/10000, Loss: 2.0432
Step 3423/10000, Loss: 2.1336


Training Progress:  34%|███████████████████▊                                      | 3425/10000 [50:23<18:46,  5.84it/s]

Step 3424/10000, Loss: 2.1189
Step 3425/10000, Loss: 1.9844


Training Progress:  34%|███████████████████▉                                      | 3427/10000 [50:23<18:24,  5.95it/s]

Step 3426/10000, Loss: 1.8780
Step 3427/10000, Loss: 1.9987


Training Progress:  34%|███████████████████▉                                      | 3429/10000 [50:24<18:42,  5.85it/s]

Step 3428/10000, Loss: 2.0100
Step 3429/10000, Loss: 1.9287


Training Progress:  34%|███████████████████▉                                      | 3431/10000 [50:24<18:50,  5.81it/s]

Step 3430/10000, Loss: 1.8959
Step 3431/10000, Loss: 1.9487


Training Progress:  34%|███████████████████▉                                      | 3433/10000 [50:24<18:46,  5.83it/s]

Step 3432/10000, Loss: 1.8988
Step 3433/10000, Loss: 1.9151


Training Progress:  34%|███████████████████▉                                      | 3435/10000 [50:25<18:45,  5.83it/s]

Step 3434/10000, Loss: 2.2449
Step 3435/10000, Loss: 2.0535


Training Progress:  34%|███████████████████▉                                      | 3437/10000 [50:25<18:23,  5.95it/s]

Step 3436/10000, Loss: 2.0096
Step 3437/10000, Loss: 1.9350


Training Progress:  34%|███████████████████▉                                      | 3439/10000 [50:25<18:42,  5.84it/s]

Step 3438/10000, Loss: 1.9873
Step 3439/10000, Loss: 1.9069


Training Progress:  34%|███████████████████▉                                      | 3440/10000 [50:25<18:47,  5.82it/s]

Step 3440/10000, Loss: 1.8367
Step 3441/10000, Loss: 1.7419


Training Progress:  34%|███████████████████▎                                    | 3441/10000 [50:41<8:52:34,  4.87s/it]


Checkpoint saved: checkpoints\best\checkpoint_step3441_loss1.7419_20250117_134229.pt

New best loss: 1.7419


Training Progress:  34%|███████████████████▎                                    | 3443/10000 [50:42<4:38:10,  2.55s/it]

Step 3442/10000, Loss: 1.8782
Step 3443/10000, Loss: 2.1177


Training Progress:  34%|███████████████████▎                                    | 3445/10000 [50:42<2:25:35,  1.33s/it]

Step 3444/10000, Loss: 2.1062
Step 3445/10000, Loss: 2.1399


Training Progress:  34%|███████████████████▎                                    | 3447/10000 [50:43<1:21:00,  1.35it/s]

Step 3446/10000, Loss: 2.0229
Step 3447/10000, Loss: 2.0899


Training Progress:  34%|████████████████████                                      | 3449/10000 [50:43<48:58,  2.23it/s]

Step 3448/10000, Loss: 2.0832
Step 3449/10000, Loss: 2.0424


Training Progress:  35%|████████████████████                                      | 3451/10000 [50:43<33:39,  3.24it/s]

Step 3450/10000, Loss: 1.9196
Step 3451/10000, Loss: 1.9849


Training Progress:  35%|████████████████████                                      | 3453/10000 [50:44<26:02,  4.19it/s]

Step 3452/10000, Loss: 1.9356
Step 3453/10000, Loss: 1.9102


Training Progress:  35%|████████████████████                                      | 3455/10000 [50:44<22:06,  4.93it/s]

Step 3454/10000, Loss: 2.0186
Step 3455/10000, Loss: 2.1523


Training Progress:  35%|████████████████████                                      | 3457/10000 [50:44<20:28,  5.33it/s]

Step 3456/10000, Loss: 2.1609
Step 3457/10000, Loss: 2.1274


Training Progress:  35%|████████████████████                                      | 3459/10000 [50:45<19:09,  5.69it/s]

Step 3458/10000, Loss: 2.0814
Step 3459/10000, Loss: 2.0268


Training Progress:  35%|████████████████████                                      | 3461/10000 [50:45<19:01,  5.73it/s]

Step 3460/10000, Loss: 2.1615
Step 3461/10000, Loss: 1.9606


Training Progress:  35%|████████████████████                                      | 3463/10000 [50:45<18:57,  5.74it/s]

Step 3462/10000, Loss: 2.0093
Step 3463/10000, Loss: 2.0933


Training Progress:  35%|████████████████████                                      | 3465/10000 [50:46<18:34,  5.86it/s]

Step 3464/10000, Loss: 2.0039
Step 3465/10000, Loss: 2.0453


Training Progress:  35%|████████████████████                                      | 3467/10000 [50:46<18:43,  5.82it/s]

Step 3466/10000, Loss: 1.9096
Step 3467/10000, Loss: 2.1780


Training Progress:  35%|████████████████████                                      | 3469/10000 [50:46<18:20,  5.94it/s]

Step 3468/10000, Loss: 2.1617
Step 3469/10000, Loss: 2.1829


Training Progress:  35%|████████████████████▏                                     | 3471/10000 [50:47<18:35,  5.86it/s]

Step 3470/10000, Loss: 2.1490
Step 3471/10000, Loss: 2.0994


Training Progress:  35%|████████████████████▏                                     | 3473/10000 [50:47<18:44,  5.81it/s]

Step 3472/10000, Loss: 2.1283
Step 3473/10000, Loss: 2.0931


Training Progress:  35%|████████████████████▏                                     | 3475/10000 [50:47<18:16,  5.95it/s]

Step 3474/10000, Loss: 2.0350
Step 3475/10000, Loss: 2.0469


Training Progress:  35%|████████████████████▏                                     | 3477/10000 [50:48<18:34,  5.85it/s]

Step 3476/10000, Loss: 1.9687
Step 3477/10000, Loss: 1.9708


Training Progress:  35%|████████████████████▏                                     | 3479/10000 [50:48<18:42,  5.81it/s]

Step 3478/10000, Loss: 2.0998
Step 3479/10000, Loss: 2.1514


Training Progress:  35%|████████████████████▏                                     | 3481/10000 [50:48<18:15,  5.95it/s]

Step 3480/10000, Loss: 2.0961
Step 3481/10000, Loss: 2.0350


Training Progress:  35%|████████████████████▏                                     | 3483/10000 [50:49<18:32,  5.86it/s]

Step 3482/10000, Loss: 2.0493
Step 3483/10000, Loss: 2.0936


Training Progress:  35%|████████████████████▏                                     | 3485/10000 [50:49<18:42,  5.81it/s]

Step 3484/10000, Loss: 2.0234
Step 3485/10000, Loss: 1.9830


Training Progress:  35%|████████████████████▏                                     | 3487/10000 [50:49<18:14,  5.95it/s]

Step 3486/10000, Loss: 2.0765
Step 3487/10000, Loss: 2.0200


Training Progress:  35%|████████████████████▏                                     | 3489/10000 [50:50<18:32,  5.85it/s]

Step 3488/10000, Loss: 2.0422
Step 3489/10000, Loss: 2.1622


Training Progress:  35%|████████████████████▏                                     | 3491/10000 [50:50<18:42,  5.80it/s]

Step 3490/10000, Loss: 2.0210
Step 3491/10000, Loss: 2.0061


Training Progress:  35%|████████████████████▎                                     | 3493/10000 [50:50<18:26,  5.88it/s]

Step 3492/10000, Loss: 1.9255
Step 3493/10000, Loss: 2.0550


Training Progress:  35%|████████████████████▎                                     | 3495/10000 [50:51<18:37,  5.82it/s]

Step 3494/10000, Loss: 2.0001
Step 3495/10000, Loss: 1.9977


Training Progress:  35%|████████████████████▎                                     | 3497/10000 [50:51<18:42,  5.79it/s]

Step 3496/10000, Loss: 2.0122
Step 3497/10000, Loss: 2.0991


Training Progress:  35%|████████████████████▎                                     | 3499/10000 [50:51<18:24,  5.89it/s]

Step 3498/10000, Loss: 2.0765
Step 3499/10000, Loss: 2.0585


Training Progress:  35%|████████████████████▎                                     | 3501/10000 [50:52<18:37,  5.82it/s]

Step 3500/10000, Loss: 2.1428
Step 3501/10000, Loss: 2.0677


Training Progress:  35%|████████████████████▎                                     | 3503/10000 [50:52<18:14,  5.94it/s]

Step 3502/10000, Loss: 1.9800
Step 3503/10000, Loss: 1.9660


Training Progress:  35%|████████████████████▎                                     | 3505/10000 [50:52<18:30,  5.85it/s]

Step 3504/10000, Loss: 1.9185
Step 3505/10000, Loss: 2.0221


Training Progress:  35%|████████████████████▎                                     | 3507/10000 [50:53<18:39,  5.80it/s]

Step 3506/10000, Loss: 2.0182
Step 3507/10000, Loss: 1.8966


Training Progress:  35%|████████████████████▎                                     | 3509/10000 [50:53<18:23,  5.88it/s]

Step 3508/10000, Loss: 1.7838
Step 3509/10000, Loss: 1.9684


Training Progress:  35%|████████████████████▎                                     | 3511/10000 [50:54<18:42,  5.78it/s]

Step 3510/10000, Loss: 1.9371
Step 3511/10000, Loss: 1.8840


Training Progress:  35%|████████████████████▍                                     | 3513/10000 [50:54<18:37,  5.80it/s]

Step 3512/10000, Loss: 1.8381
Step 3513/10000, Loss: 1.8845


Training Progress:  35%|████████████████████▍                                     | 3515/10000 [50:54<18:14,  5.92it/s]

Step 3514/10000, Loss: 1.8290
Step 3515/10000, Loss: 1.8265


Training Progress:  35%|████████████████████▍                                     | 3517/10000 [50:55<18:30,  5.84it/s]

Step 3516/10000, Loss: 2.1381
Step 3517/10000, Loss: 1.9666


Training Progress:  35%|████████████████████▍                                     | 3519/10000 [50:55<18:07,  5.96it/s]

Step 3518/10000, Loss: 1.8877
Step 3519/10000, Loss: 1.8601


Training Progress:  35%|████████████████████▍                                     | 3521/10000 [50:55<18:24,  5.86it/s]

Step 3520/10000, Loss: 1.9049
Step 3521/10000, Loss: 1.8314


Training Progress:  35%|████████████████████▍                                     | 3522/10000 [50:55<18:29,  5.84it/s]

Step 3522/10000, Loss: 1.7578
Step 3523/10000, Loss: 1.7081


Training Progress:  35%|███████████████████▋                                    | 3523/10000 [51:10<8:15:48,  4.59s/it]


Checkpoint saved: checkpoints\best\checkpoint_step3523_loss1.7081_20250117_134259.pt

New best loss: 1.7081


Training Progress:  35%|███████████████████▋                                    | 3525/10000 [51:11<4:18:11,  2.39s/it]

Step 3524/10000, Loss: 1.8006
Step 3525/10000, Loss: 2.0198


Training Progress:  35%|███████████████████▊                                    | 3527/10000 [51:11<2:15:52,  1.26s/it]

Step 3526/10000, Loss: 2.0032
Step 3527/10000, Loss: 2.0410


Training Progress:  35%|███████████████████▊                                    | 3529/10000 [51:12<1:15:59,  1.42it/s]

Step 3528/10000, Loss: 1.9527
Step 3529/10000, Loss: 1.9964


Training Progress:  35%|████████████████████▍                                     | 3531/10000 [51:12<46:21,  2.33it/s]

Step 3530/10000, Loss: 2.0164
Step 3531/10000, Loss: 1.9725


Training Progress:  35%|████████████████████▍                                     | 3533/10000 [51:12<32:19,  3.34it/s]

Step 3532/10000, Loss: 1.8559
Step 3533/10000, Loss: 1.8830


Training Progress:  35%|████████████████████▌                                     | 3535/10000 [51:13<24:55,  4.32it/s]

Step 3534/10000, Loss: 1.8385
Step 3535/10000, Loss: 1.8308


Training Progress:  35%|████████████████████▌                                     | 3537/10000 [51:13<21:50,  4.93it/s]

Step 3536/10000, Loss: 1.9131
Step 3537/10000, Loss: 2.0480


Training Progress:  35%|████████████████████▌                                     | 3539/10000 [51:13<19:56,  5.40it/s]

Step 3538/10000, Loss: 2.0855
Step 3539/10000, Loss: 2.0806


Training Progress:  35%|████████████████████▌                                     | 3541/10000 [51:14<19:22,  5.56it/s]

Step 3540/10000, Loss: 2.0272
Step 3541/10000, Loss: 1.9527


Training Progress:  35%|████████████████████▌                                     | 3543/10000 [51:14<18:43,  5.75it/s]

Step 3542/10000, Loss: 2.1173
Step 3543/10000, Loss: 1.9308


Training Progress:  35%|████████████████████▌                                     | 3545/10000 [51:14<18:44,  5.74it/s]

Step 3544/10000, Loss: 1.9792
Step 3545/10000, Loss: 1.9901


Training Progress:  35%|████████████████████▌                                     | 3547/10000 [51:15<18:27,  5.83it/s]

Step 3546/10000, Loss: 1.9388
Step 3547/10000, Loss: 1.9792


Training Progress:  35%|████████████████████▌                                     | 3549/10000 [51:15<18:07,  5.93it/s]

Step 3548/10000, Loss: 1.8445
Step 3549/10000, Loss: 2.1161


Training Progress:  36%|████████████████████▌                                     | 3551/10000 [51:15<18:29,  5.81it/s]

Step 3550/10000, Loss: 2.1260
Step 3551/10000, Loss: 2.1360


Training Progress:  36%|████████████████████▌                                     | 3553/10000 [51:16<18:09,  5.92it/s]

Step 3552/10000, Loss: 2.1128
Step 3553/10000, Loss: 2.0082


Training Progress:  36%|████████████████████▌                                     | 3555/10000 [51:16<18:27,  5.82it/s]

Step 3554/10000, Loss: 2.0946
Step 3555/10000, Loss: 2.0743


Training Progress:  36%|████████████████████▋                                     | 3557/10000 [51:16<18:13,  5.89it/s]

Step 3556/10000, Loss: 2.0217
Step 3557/10000, Loss: 2.0356


Training Progress:  36%|████████████████████▋                                     | 3559/10000 [51:17<18:24,  5.83it/s]

Step 3558/10000, Loss: 1.9215
Step 3559/10000, Loss: 1.9498


Training Progress:  36%|████████████████████▋                                     | 3561/10000 [51:17<18:31,  5.79it/s]

Step 3560/10000, Loss: 2.1022
Step 3561/10000, Loss: 2.0783


Training Progress:  36%|████████████████████▋                                     | 3563/10000 [51:17<18:13,  5.89it/s]

Step 3562/10000, Loss: 2.0182
Step 3563/10000, Loss: 1.9729


Training Progress:  36%|████████████████████▋                                     | 3565/10000 [51:18<18:26,  5.82it/s]

Step 3564/10000, Loss: 1.9776
Step 3565/10000, Loss: 1.9896


Training Progress:  36%|████████████████████▋                                     | 3567/10000 [51:18<18:29,  5.80it/s]

Step 3566/10000, Loss: 1.9645
Step 3567/10000, Loss: 1.9362


Training Progress:  36%|████████████████████▋                                     | 3569/10000 [51:18<18:12,  5.89it/s]

Step 3568/10000, Loss: 2.0444
Step 3569/10000, Loss: 1.9789


Training Progress:  36%|████████████████████▋                                     | 3571/10000 [51:19<18:24,  5.82it/s]

Step 3570/10000, Loss: 1.9775
Step 3571/10000, Loss: 2.1270


Training Progress:  36%|████████████████████▋                                     | 3573/10000 [51:19<17:59,  5.95it/s]

Step 3572/10000, Loss: 1.9844
Step 3573/10000, Loss: 1.9899


Training Progress:  36%|████████████████████▋                                     | 3575/10000 [51:19<18:18,  5.85it/s]

Step 3574/10000, Loss: 1.8552
Step 3575/10000, Loss: 2.0197


Training Progress:  36%|████████████████████▋                                     | 3577/10000 [51:20<18:27,  5.80it/s]

Step 3576/10000, Loss: 1.9508
Step 3577/10000, Loss: 1.9254


Training Progress:  36%|████████████████████▊                                     | 3579/10000 [51:20<18:08,  5.90it/s]

Step 3578/10000, Loss: 1.9107
Step 3579/10000, Loss: 1.9863


Training Progress:  36%|████████████████████▊                                     | 3581/10000 [51:20<18:21,  5.83it/s]

Step 3580/10000, Loss: 2.0128
Step 3581/10000, Loss: 2.0011


Training Progress:  36%|████████████████████▊                                     | 3583/10000 [51:21<18:28,  5.79it/s]

Step 3582/10000, Loss: 2.1006
Step 3583/10000, Loss: 2.0620


Training Progress:  36%|████████████████████▊                                     | 3585/10000 [51:21<18:10,  5.88it/s]

Step 3584/10000, Loss: 1.9430
Step 3585/10000, Loss: 1.9624


Training Progress:  36%|████████████████████▊                                     | 3587/10000 [51:21<18:22,  5.82it/s]

Step 3586/10000, Loss: 1.8804
Step 3587/10000, Loss: 1.9633


Training Progress:  36%|████████████████████▊                                     | 3589/10000 [51:22<17:56,  5.96it/s]

Step 3588/10000, Loss: 1.9373
Step 3589/10000, Loss: 1.8276


Training Progress:  36%|████████████████████▊                                     | 3591/10000 [51:22<18:14,  5.85it/s]

Step 3590/10000, Loss: 1.7375
Step 3591/10000, Loss: 1.8655


Training Progress:  36%|████████████████████▊                                     | 3593/10000 [51:23<18:25,  5.80it/s]

Step 3592/10000, Loss: 1.8770
Step 3593/10000, Loss: 1.7975


Training Progress:  36%|████████████████████▊                                     | 3595/10000 [51:23<18:23,  5.81it/s]

Step 3594/10000, Loss: 1.7625
Step 3595/10000, Loss: 1.8051


Training Progress:  36%|████████████████████▊                                     | 3597/10000 [51:23<18:08,  5.88it/s]

Step 3596/10000, Loss: 1.7868
Step 3597/10000, Loss: 1.8108


Training Progress:  36%|████████████████████▊                                     | 3599/10000 [51:24<18:18,  5.83it/s]

Step 3598/10000, Loss: 2.1150
Step 3599/10000, Loss: 1.9590


Training Progress:  36%|████████████████████▉                                     | 3601/10000 [51:24<17:53,  5.96it/s]

Step 3600/10000, Loss: 1.8844
Step 3601/10000, Loss: 1.8400


Training Progress:  36%|████████████████████▉                                     | 3603/10000 [51:24<18:11,  5.86it/s]

Step 3602/10000, Loss: 1.8761
Step 3603/10000, Loss: 1.8038


Training Progress:  36%|████████████████████▉                                     | 3605/10000 [51:25<18:21,  5.81it/s]

Step 3604/10000, Loss: 1.7380
Step 3605/10000, Loss: 1.7154


Training Progress:  36%|████████████████████▉                                     | 3607/10000 [51:25<17:55,  5.95it/s]

Step 3606/10000, Loss: 1.7650
Step 3607/10000, Loss: 1.9896


Training Progress:  36%|████████████████████▉                                     | 3609/10000 [51:25<18:12,  5.85it/s]

Step 3608/10000, Loss: 1.9628
Step 3609/10000, Loss: 1.9818


Training Progress:  36%|████████████████████▉                                     | 3611/10000 [51:26<18:19,  5.81it/s]

Step 3610/10000, Loss: 1.8792
Step 3611/10000, Loss: 1.8934


Training Progress:  36%|████████████████████▉                                     | 3613/10000 [51:26<17:54,  5.95it/s]

Step 3612/10000, Loss: 1.9135
Step 3613/10000, Loss: 1.8964


Training Progress:  36%|████████████████████▉                                     | 3615/10000 [51:26<18:12,  5.84it/s]

Step 3614/10000, Loss: 1.7897
Step 3615/10000, Loss: 1.8340


Training Progress:  36%|████████████████████▉                                     | 3617/10000 [51:27<18:20,  5.80it/s]

Step 3616/10000, Loss: 1.8254
Step 3617/10000, Loss: 1.7733


Training Progress:  36%|████████████████████▉                                     | 3619/10000 [51:27<17:54,  5.94it/s]

Step 3618/10000, Loss: 1.8760
Step 3619/10000, Loss: 1.9676


Training Progress:  36%|█████████████████████                                     | 3621/10000 [51:27<18:13,  5.84it/s]

Step 3620/10000, Loss: 2.0011
Step 3621/10000, Loss: 1.9589


Training Progress:  36%|█████████████████████                                     | 3623/10000 [51:28<18:21,  5.79it/s]

Step 3622/10000, Loss: 1.9602
Step 3623/10000, Loss: 1.8914


Training Progress:  36%|█████████████████████                                     | 3625/10000 [51:28<18:02,  5.89it/s]

Step 3624/10000, Loss: 2.0732
Step 3625/10000, Loss: 1.8562


Training Progress:  36%|█████████████████████                                     | 3627/10000 [51:28<18:12,  5.83it/s]

Step 3626/10000, Loss: 1.8852
Step 3627/10000, Loss: 1.9386


Training Progress:  36%|█████████████████████                                     | 3629/10000 [51:29<18:18,  5.80it/s]

Step 3628/10000, Loss: 1.8506
Step 3629/10000, Loss: 1.8942


Training Progress:  36%|█████████████████████                                     | 3631/10000 [51:29<17:52,  5.94it/s]

Step 3630/10000, Loss: 1.7960
Step 3631/10000, Loss: 2.0102


Training Progress:  36%|█████████████████████                                     | 3633/10000 [51:29<18:09,  5.84it/s]

Step 3632/10000, Loss: 2.0495
Step 3633/10000, Loss: 2.0038


Training Progress:  36%|█████████████████████                                     | 3635/10000 [51:30<18:18,  5.79it/s]

Step 3634/10000, Loss: 2.0211
Step 3635/10000, Loss: 1.9270


Training Progress:  36%|█████████████████████                                     | 3637/10000 [51:30<17:51,  5.94it/s]

Step 3636/10000, Loss: 2.0199
Step 3637/10000, Loss: 1.9909


Training Progress:  36%|█████████████████████                                     | 3639/10000 [51:30<18:08,  5.84it/s]

Step 3638/10000, Loss: 1.8974
Step 3639/10000, Loss: 1.9387


Training Progress:  36%|█████████████████████                                     | 3641/10000 [51:31<18:16,  5.80it/s]

Step 3640/10000, Loss: 1.8582
Step 3641/10000, Loss: 1.8475


Training Progress:  36%|█████████████████████▏                                    | 3643/10000 [51:31<18:22,  5.76it/s]

Step 3642/10000, Loss: 2.0120
Step 3643/10000, Loss: 2.0009


Training Progress:  36%|█████████████████████▏                                    | 3645/10000 [51:31<18:06,  5.85it/s]

Step 3644/10000, Loss: 1.9229
Step 3645/10000, Loss: 1.8965


Training Progress:  36%|█████████████████████▏                                    | 3647/10000 [51:32<18:14,  5.80it/s]

Step 3646/10000, Loss: 1.9389
Step 3647/10000, Loss: 1.9625


Training Progress:  36%|█████████████████████▏                                    | 3649/10000 [51:32<17:49,  5.94it/s]

Step 3648/10000, Loss: 1.8899
Step 3649/10000, Loss: 1.8289


Training Progress:  37%|█████████████████████▏                                    | 3651/10000 [51:32<18:05,  5.85it/s]

Step 3650/10000, Loss: 1.9392
Step 3651/10000, Loss: 1.8799


Training Progress:  37%|█████████████████████▏                                    | 3653/10000 [51:33<18:14,  5.80it/s]

Step 3652/10000, Loss: 1.8892
Step 3653/10000, Loss: 2.0317


Training Progress:  37%|█████████████████████▏                                    | 3655/10000 [51:33<17:48,  5.94it/s]

Step 3654/10000, Loss: 1.9194
Step 3655/10000, Loss: 1.9343


Training Progress:  37%|█████████████████████▏                                    | 3657/10000 [51:33<18:04,  5.85it/s]

Step 3656/10000, Loss: 1.8195
Step 3657/10000, Loss: 1.9492


Training Progress:  37%|█████████████████████▏                                    | 3659/10000 [51:34<18:11,  5.81it/s]

Step 3658/10000, Loss: 1.8805
Step 3659/10000, Loss: 1.8363


Training Progress:  37%|█████████████████████▏                                    | 3661/10000 [51:34<18:17,  5.77it/s]

Step 3660/10000, Loss: 1.8216
Step 3661/10000, Loss: 1.9109


Training Progress:  37%|█████████████████████▏                                    | 3663/10000 [51:34<17:58,  5.88it/s]

Step 3662/10000, Loss: 1.9101
Step 3663/10000, Loss: 1.9419


Training Progress:  37%|█████████████████████▎                                    | 3665/10000 [51:35<18:09,  5.82it/s]

Step 3664/10000, Loss: 2.0229
Step 3665/10000, Loss: 1.9774


Training Progress:  37%|█████████████████████▎                                    | 3667/10000 [51:35<18:13,  5.79it/s]

Step 3666/10000, Loss: 1.8599
Step 3667/10000, Loss: 1.8790


Training Progress:  37%|█████████████████████▎                                    | 3669/10000 [51:36<17:45,  5.94it/s]

Step 3668/10000, Loss: 1.7979
Step 3669/10000, Loss: 1.8885


Training Progress:  37%|█████████████████████▎                                    | 3671/10000 [51:36<18:02,  5.85it/s]

Step 3670/10000, Loss: 1.8453
Step 3671/10000, Loss: 1.7793
Step 3672/10000, Loss: 1.6394


Training Progress:  37%|████████████████████▌                                   | 3672/10000 [51:51<7:58:02,  4.53s/it]


Checkpoint saved: checkpoints\best\checkpoint_step3672_loss1.6394_20250117_134340.pt

New best loss: 1.6394


Training Progress:  37%|████████████████████▌                                   | 3674/10000 [51:51<4:07:47,  2.35s/it]

Step 3673/10000, Loss: 1.7835
Step 3674/10000, Loss: 1.7925


Training Progress:  37%|████████████████████▌                                   | 3676/10000 [51:51<2:10:27,  1.24s/it]

Step 3675/10000, Loss: 1.7280
Step 3676/10000, Loss: 1.6992


Training Progress:  37%|████████████████████▌                                   | 3678/10000 [51:52<1:13:17,  1.44it/s]

Step 3677/10000, Loss: 1.7569
Step 3678/10000, Loss: 1.7117


Training Progress:  37%|█████████████████████▎                                    | 3680/10000 [51:52<44:56,  2.34it/s]

Step 3679/10000, Loss: 1.7205
Step 3680/10000, Loss: 2.0127


Training Progress:  37%|█████████████████████▎                                    | 3682/10000 [51:52<31:22,  3.36it/s]

Step 3681/10000, Loss: 1.8569
Step 3682/10000, Loss: 1.8133


Training Progress:  37%|█████████████████████▎                                    | 3684/10000 [51:53<24:22,  4.32it/s]

Step 3683/10000, Loss: 1.7521
Step 3684/10000, Loss: 1.7883


Training Progress:  37%|█████████████████████▍                                    | 3686/10000 [51:53<21:17,  4.94it/s]

Step 3685/10000, Loss: 1.7357
Step 3686/10000, Loss: 1.6926
Step 3687/10000, Loss: 1.6365


Training Progress:  37%|████████████████████▋                                   | 3687/10000 [52:12<9:57:53,  5.68s/it]


Checkpoint saved: checkpoints\best\checkpoint_step3687_loss1.6365_20250117_134357.pt

New best loss: 1.6365


Training Progress:  37%|████████████████████▋                                   | 3689/10000 [52:12<5:08:43,  2.94s/it]

Step 3688/10000, Loss: 1.7071
Step 3689/10000, Loss: 1.9224


Training Progress:  37%|████████████████████▋                                   | 3691/10000 [52:13<2:40:31,  1.53s/it]

Step 3690/10000, Loss: 1.9014
Step 3691/10000, Loss: 1.9224


Training Progress:  37%|████████████████████▋                                   | 3693/10000 [52:13<1:27:28,  1.20it/s]

Step 3692/10000, Loss: 1.8576
Step 3693/10000, Loss: 1.8651


Training Progress:  37%|█████████████████████▍                                    | 3695/10000 [52:13<52:13,  2.01it/s]

Step 3694/10000, Loss: 1.8973
Step 3695/10000, Loss: 1.8668


Training Progress:  37%|█████████████████████▍                                    | 3697/10000 [52:14<34:25,  3.05it/s]

Step 3696/10000, Loss: 1.7420
Step 3697/10000, Loss: 1.7831


Training Progress:  37%|█████████████████████▍                                    | 3699/10000 [52:14<26:08,  4.02it/s]

Step 3698/10000, Loss: 1.7366
Step 3699/10000, Loss: 1.6928


Training Progress:  37%|█████████████████████▍                                    | 3701/10000 [52:14<21:37,  4.85it/s]

Step 3700/10000, Loss: 1.8523
Step 3701/10000, Loss: 1.8811


Training Progress:  37%|█████████████████████▍                                    | 3703/10000 [52:15<19:54,  5.27it/s]

Step 3702/10000, Loss: 1.9233
Step 3703/10000, Loss: 1.8876


Training Progress:  37%|█████████████████████▍                                    | 3705/10000 [52:15<19:03,  5.51it/s]

Step 3704/10000, Loss: 1.8491
Step 3705/10000, Loss: 1.7901


Training Progress:  37%|█████████████████████▌                                    | 3707/10000 [52:15<18:15,  5.74it/s]

Step 3706/10000, Loss: 1.9832
Step 3707/10000, Loss: 1.7924


Training Progress:  37%|█████████████████████▌                                    | 3709/10000 [52:16<18:14,  5.75it/s]

Step 3708/10000, Loss: 1.8525
Step 3709/10000, Loss: 1.9049


Training Progress:  37%|█████████████████████▌                                    | 3711/10000 [52:16<17:43,  5.91it/s]

Step 3710/10000, Loss: 1.8054
Step 3711/10000, Loss: 1.8360


Training Progress:  37%|█████████████████████▌                                    | 3713/10000 [52:16<17:50,  5.87it/s]

Step 3712/10000, Loss: 1.7302
Step 3713/10000, Loss: 1.9529


Training Progress:  37%|█████████████████████▌                                    | 3715/10000 [52:17<18:03,  5.80it/s]

Step 3714/10000, Loss: 1.9453
Step 3715/10000, Loss: 1.9293


Training Progress:  37%|█████████████████████▌                                    | 3717/10000 [52:17<17:48,  5.88it/s]

Step 3716/10000, Loss: 1.9551
Step 3717/10000, Loss: 1.8704


Training Progress:  37%|█████████████████████▌                                    | 3719/10000 [52:17<18:00,  5.81it/s]

Step 3718/10000, Loss: 1.9547
Step 3719/10000, Loss: 1.9343


Training Progress:  37%|█████████████████████▌                                    | 3721/10000 [52:18<17:44,  5.90it/s]

Step 3720/10000, Loss: 1.8186
Step 3721/10000, Loss: 1.8781


Training Progress:  37%|█████████████████████▌                                    | 3723/10000 [52:18<17:57,  5.83it/s]

Step 3722/10000, Loss: 1.7728
Step 3723/10000, Loss: 1.7862


Training Progress:  37%|█████████████████████▌                                    | 3725/10000 [52:18<17:42,  5.91it/s]

Step 3724/10000, Loss: 1.8822
Step 3725/10000, Loss: 1.8996


Training Progress:  37%|█████████████████████▌                                    | 3727/10000 [52:19<17:56,  5.83it/s]

Step 3726/10000, Loss: 1.8069
Step 3727/10000, Loss: 1.7696


Training Progress:  37%|█████████████████████▋                                    | 3729/10000 [52:19<17:33,  5.95it/s]

Step 3728/10000, Loss: 1.8432
Step 3729/10000, Loss: 1.8682


Training Progress:  37%|█████████████████████▋                                    | 3731/10000 [52:19<17:51,  5.85it/s]

Step 3730/10000, Loss: 1.8115
Step 3731/10000, Loss: 1.7268


Training Progress:  37%|█████████████████████▋                                    | 3733/10000 [52:20<18:00,  5.80it/s]

Step 3732/10000, Loss: 1.8682
Step 3733/10000, Loss: 1.8387


Training Progress:  37%|█████████████████████▋                                    | 3735/10000 [52:20<17:43,  5.89it/s]

Step 3734/10000, Loss: 1.8329
Step 3735/10000, Loss: 1.9812


Training Progress:  37%|█████████████████████▋                                    | 3737/10000 [52:20<17:53,  5.83it/s]

Step 3736/10000, Loss: 1.8640
Step 3737/10000, Loss: 1.8433


Training Progress:  37%|█████████████████████▋                                    | 3739/10000 [52:21<18:00,  5.80it/s]

Step 3738/10000, Loss: 1.7230
Step 3739/10000, Loss: 1.9032


Training Progress:  37%|█████████████████████▋                                    | 3741/10000 [52:21<17:43,  5.89it/s]

Step 3740/10000, Loss: 1.8394
Step 3741/10000, Loss: 1.7962


Training Progress:  37%|█████████████████████▋                                    | 3743/10000 [52:21<17:54,  5.82it/s]

Step 3742/10000, Loss: 1.7667
Step 3743/10000, Loss: 1.8407


Training Progress:  37%|█████████████████████▋                                    | 3745/10000 [52:22<17:31,  5.95it/s]

Step 3744/10000, Loss: 1.8550
Step 3745/10000, Loss: 1.8628


Training Progress:  37%|█████████████████████▋                                    | 3747/10000 [52:22<17:49,  5.84it/s]

Step 3746/10000, Loss: 1.9532
Step 3747/10000, Loss: 1.9331


Training Progress:  37%|█████████████████████▋                                    | 3749/10000 [52:23<17:57,  5.80it/s]

Step 3748/10000, Loss: 1.8224
Step 3749/10000, Loss: 1.8440


Training Progress:  38%|█████████████████████▊                                    | 3751/10000 [52:23<17:40,  5.89it/s]

Step 3750/10000, Loss: 1.7282
Step 3751/10000, Loss: 1.8386


Training Progress:  38%|█████████████████████▊                                    | 3753/10000 [52:23<17:55,  5.81it/s]

Step 3752/10000, Loss: 1.8297
Step 3753/10000, Loss: 1.7187


Training Progress:  38%|█████████████████████▊                                    | 3755/10000 [52:24<17:39,  5.89it/s]

Step 3754/10000, Loss: 1.6379
Step 3755/10000, Loss: 1.7568


Training Progress:  38%|█████████████████████▊                                    | 3757/10000 [52:24<17:51,  5.83it/s]

Step 3756/10000, Loss: 1.7450
Step 3757/10000, Loss: 1.6878


Training Progress:  38%|█████████████████████▊                                    | 3759/10000 [52:24<17:56,  5.80it/s]

Step 3758/10000, Loss: 1.6488
Step 3759/10000, Loss: 1.6898
Step 3760/10000, Loss: 1.6217


Training Progress:  38%|█████████████████████                                   | 3760/10000 [52:38<7:32:28,  4.35s/it]


Checkpoint saved: checkpoints\best\checkpoint_step3760_loss1.6217_20250117_134428.pt

New best loss: 1.6217


Training Progress:  38%|█████████████████████                                   | 3762/10000 [52:39<3:55:21,  2.26s/it]

Step 3761/10000, Loss: 1.6590
Step 3762/10000, Loss: 1.9420


Training Progress:  38%|█████████████████████                                   | 3764/10000 [52:39<2:04:30,  1.20s/it]

Step 3763/10000, Loss: 1.8111
Step 3764/10000, Loss: 1.7071


Training Progress:  38%|█████████████████████                                   | 3766/10000 [52:40<1:09:50,  1.49it/s]

Step 3765/10000, Loss: 1.6677
Step 3766/10000, Loss: 1.6917


Training Progress:  38%|█████████████████████▊                                    | 3768/10000 [52:40<43:26,  2.39it/s]

Step 3767/10000, Loss: 1.6536
Step 3768/10000, Loss: 1.6226
Step 3769/10000, Loss: 1.5589


Training Progress:  38%|████████████████████▋                                  | 3769/10000 [52:58<10:08:38,  5.86s/it]


Checkpoint saved: checkpoints\best\checkpoint_step3769_loss1.5589_20250117_134444.pt

New best loss: 1.5589


Training Progress:  38%|█████████████████████                                   | 3771/10000 [52:59<5:12:56,  3.01s/it]

Step 3770/10000, Loss: 1.6528
Step 3771/10000, Loss: 1.8558


Training Progress:  38%|█████████████████████▏                                  | 3773/10000 [52:59<2:42:31,  1.57s/it]

Step 3772/10000, Loss: 1.7808
Step 3773/10000, Loss: 1.8217


Training Progress:  38%|█████████████████████▏                                  | 3775/10000 [53:00<1:28:31,  1.17it/s]

Step 3774/10000, Loss: 1.7827
Step 3775/10000, Loss: 1.7792


Training Progress:  38%|█████████████████████▉                                    | 3777/10000 [53:00<52:36,  1.97it/s]

Step 3776/10000, Loss: 1.7994
Step 3777/10000, Loss: 1.7560


Training Progress:  38%|█████████████████████▉                                    | 3779/10000 [53:00<34:50,  2.98it/s]

Step 3778/10000, Loss: 1.6982
Step 3779/10000, Loss: 1.7405


Training Progress:  38%|█████████████████████▉                                    | 3781/10000 [53:01<26:09,  3.96it/s]

Step 3780/10000, Loss: 1.7322
Step 3781/10000, Loss: 1.6513


Training Progress:  38%|█████████████████████▉                                    | 3783/10000 [53:01<21:40,  4.78it/s]

Step 3782/10000, Loss: 1.7522
Step 3783/10000, Loss: 1.8158


Training Progress:  38%|█████████████████████▉                                    | 3785/10000 [53:01<19:50,  5.22it/s]

Step 3784/10000, Loss: 1.8435
Step 3785/10000, Loss: 1.8014


Training Progress:  38%|█████████████████████▉                                    | 3787/10000 [53:02<18:35,  5.57it/s]

Step 3786/10000, Loss: 1.8143
Step 3787/10000, Loss: 1.7233


Training Progress:  38%|█████████████████████▉                                    | 3789/10000 [53:02<18:10,  5.70it/s]

Step 3788/10000, Loss: 1.9173
Step 3789/10000, Loss: 1.7579


Training Progress:  38%|█████████████████████▉                                    | 3791/10000 [53:02<17:54,  5.78it/s]

Step 3790/10000, Loss: 1.7709
Step 3791/10000, Loss: 1.8563


Training Progress:  38%|█████████████████████▉                                    | 3793/10000 [53:03<17:40,  5.85it/s]

Step 3792/10000, Loss: 1.7974
Step 3793/10000, Loss: 1.7869


Training Progress:  38%|██████████████████████                                    | 3795/10000 [53:03<17:53,  5.78it/s]

Step 3794/10000, Loss: 1.7109
Step 3795/10000, Loss: 1.8944


Training Progress:  38%|██████████████████████                                    | 3797/10000 [53:04<17:39,  5.85it/s]

Step 3796/10000, Loss: 1.8458
Step 3797/10000, Loss: 1.8572


Training Progress:  38%|██████████████████████                                    | 3799/10000 [53:04<17:41,  5.84it/s]

Step 3798/10000, Loss: 1.8400
Step 3799/10000, Loss: 1.7630


Training Progress:  38%|██████████████████████                                    | 3801/10000 [53:04<17:41,  5.84it/s]

Step 3800/10000, Loss: 1.8560
Step 3801/10000, Loss: 1.8952


Training Progress:  38%|██████████████████████                                    | 3803/10000 [53:05<17:24,  5.93it/s]

Step 3802/10000, Loss: 1.7724
Step 3803/10000, Loss: 1.8374


Training Progress:  38%|██████████████████████                                    | 3805/10000 [53:05<17:45,  5.81it/s]

Step 3804/10000, Loss: 1.7576
Step 3805/10000, Loss: 1.7371


Training Progress:  38%|██████████████████████                                    | 3807/10000 [53:05<17:36,  5.86it/s]

Step 3806/10000, Loss: 1.8798
Step 3807/10000, Loss: 1.8280


Training Progress:  38%|██████████████████████                                    | 3809/10000 [53:06<17:34,  5.87it/s]

Step 3808/10000, Loss: 1.7467
Step 3809/10000, Loss: 1.7195


Training Progress:  38%|██████████████████████                                    | 3811/10000 [53:06<17:37,  5.85it/s]

Step 3810/10000, Loss: 1.8003
Step 3811/10000, Loss: 1.8007


Training Progress:  38%|██████████████████████                                    | 3813/10000 [53:06<17:18,  5.96it/s]

Step 3812/10000, Loss: 1.7844
Step 3813/10000, Loss: 1.6815


Training Progress:  38%|██████████████████████▏                                   | 3815/10000 [53:07<17:37,  5.85it/s]

Step 3814/10000, Loss: 1.8759
Step 3815/10000, Loss: 1.7999


Training Progress:  38%|██████████████████████▏                                   | 3817/10000 [53:07<17:15,  5.97it/s]

Step 3816/10000, Loss: 1.7847
Step 3817/10000, Loss: 1.9171


Training Progress:  38%|██████████████████████▏                                   | 3819/10000 [53:07<17:36,  5.85it/s]

Step 3818/10000, Loss: 1.7966
Step 3819/10000, Loss: 1.8006


Training Progress:  38%|██████████████████████▏                                   | 3821/10000 [53:08<17:44,  5.80it/s]

Step 3820/10000, Loss: 1.6820
Step 3821/10000, Loss: 1.8102


Training Progress:  38%|██████████████████████▏                                   | 3823/10000 [53:08<17:29,  5.89it/s]

Step 3822/10000, Loss: 1.7714
Step 3823/10000, Loss: 1.7487


Training Progress:  38%|██████████████████████▏                                   | 3825/10000 [53:08<17:42,  5.81it/s]

Step 3824/10000, Loss: 1.7207
Step 3825/10000, Loss: 1.7713


Training Progress:  38%|██████████████████████▏                                   | 3827/10000 [53:09<17:27,  5.89it/s]

Step 3826/10000, Loss: 1.8200
Step 3827/10000, Loss: 1.8416


Training Progress:  38%|██████████████████████▏                                   | 3829/10000 [53:09<17:41,  5.82it/s]

Step 3828/10000, Loss: 1.8997
Step 3829/10000, Loss: 1.8262


Training Progress:  38%|██████████████████████▏                                   | 3831/10000 [53:09<17:17,  5.94it/s]

Step 3830/10000, Loss: 1.7655
Step 3831/10000, Loss: 1.7689


Training Progress:  38%|██████████████████████▏                                   | 3833/10000 [53:10<17:34,  5.85it/s]

Step 3832/10000, Loss: 1.6794
Step 3833/10000, Loss: 1.7893


Training Progress:  38%|██████████████████████▏                                   | 3835/10000 [53:10<17:15,  5.95it/s]

Step 3834/10000, Loss: 1.7786
Step 3835/10000, Loss: 1.6768


Training Progress:  38%|██████████████████████▎                                   | 3837/10000 [53:10<17:28,  5.88it/s]

Step 3836/10000, Loss: 1.5764
Step 3837/10000, Loss: 1.7334


Training Progress:  38%|██████████████████████▎                                   | 3839/10000 [53:11<17:40,  5.81it/s]

Step 3838/10000, Loss: 1.7237
Step 3839/10000, Loss: 1.6016


Training Progress:  38%|██████████████████████▎                                   | 3841/10000 [53:11<17:26,  5.89it/s]

Step 3840/10000, Loss: 1.5941
Step 3841/10000, Loss: 1.6404


Training Progress:  38%|██████████████████████▎                                   | 3843/10000 [53:11<17:37,  5.82it/s]

Step 3842/10000, Loss: 1.5719
Step 3843/10000, Loss: 1.6233


Training Progress:  38%|██████████████████████▎                                   | 3845/10000 [53:12<17:15,  5.95it/s]

Step 3844/10000, Loss: 1.8861
Step 3845/10000, Loss: 1.7052


Training Progress:  38%|██████████████████████▎                                   | 3847/10000 [53:12<17:30,  5.86it/s]

Step 3846/10000, Loss: 1.6475
Step 3847/10000, Loss: 1.6223


Training Progress:  38%|██████████████████████▎                                   | 3849/10000 [53:12<17:40,  5.80it/s]

Step 3848/10000, Loss: 1.6376
Step 3849/10000, Loss: 1.6040


Training Progress:  38%|██████████████████████▎                                   | 3850/10000 [53:13<17:15,  5.94it/s]

Step 3850/10000, Loss: 1.5922
Step 3851/10000, Loss: 1.5183


Training Progress:  39%|█████████████████████▌                                  | 3851/10000 [53:31<9:38:09,  5.64s/it]


Checkpoint saved: checkpoints\best\checkpoint_step3851_loss1.5183_20250117_134517.pt

New best loss: 1.5183


Training Progress:  39%|█████████████████████▌                                  | 3853/10000 [53:31<4:55:55,  2.89s/it]

Step 3852/10000, Loss: 1.5863
Step 3853/10000, Loss: 1.7683


Training Progress:  39%|█████████████████████▌                                  | 3855/10000 [53:32<2:34:01,  1.50s/it]

Step 3854/10000, Loss: 1.7421
Step 3855/10000, Loss: 1.7616


Training Progress:  39%|█████████████████████▌                                  | 3857/10000 [53:32<1:24:31,  1.21it/s]

Step 3856/10000, Loss: 1.7192
Step 3857/10000, Loss: 1.7132


Training Progress:  39%|██████████████████████▍                                   | 3859/10000 [53:32<50:09,  2.04it/s]

Step 3858/10000, Loss: 1.7101
Step 3859/10000, Loss: 1.6934


Training Progress:  39%|██████████████████████▍                                   | 3861/10000 [53:33<33:38,  3.04it/s]

Step 3860/10000, Loss: 1.6129
Step 3861/10000, Loss: 1.6876


Training Progress:  39%|██████████████████████▍                                   | 3863/10000 [53:33<25:13,  4.06it/s]

Step 3862/10000, Loss: 1.6898
Step 3863/10000, Loss: 1.6248


Training Progress:  39%|██████████████████████▍                                   | 3865/10000 [53:34<21:26,  4.77it/s]

Step 3864/10000, Loss: 1.7111
Step 3865/10000, Loss: 1.7894


Training Progress:  39%|██████████████████████▍                                   | 3867/10000 [53:34<19:05,  5.35it/s]

Step 3866/10000, Loss: 1.7767
Step 3867/10000, Loss: 1.7533


Training Progress:  39%|██████████████████████▍                                   | 3869/10000 [53:34<18:24,  5.55it/s]

Step 3868/10000, Loss: 1.7427
Step 3869/10000, Loss: 1.6934


Training Progress:  39%|██████████████████████▍                                   | 3871/10000 [53:35<17:36,  5.80it/s]

Step 3870/10000, Loss: 1.8273
Step 3871/10000, Loss: 1.6896


Training Progress:  39%|██████████████████████▍                                   | 3873/10000 [53:35<17:41,  5.77it/s]

Step 3872/10000, Loss: 1.7249
Step 3873/10000, Loss: 1.7690


Training Progress:  39%|██████████████████████▍                                   | 3875/10000 [53:35<17:41,  5.77it/s]

Step 3874/10000, Loss: 1.7199
Step 3875/10000, Loss: 1.7309


Training Progress:  39%|██████████████████████▍                                   | 3877/10000 [53:36<17:35,  5.80it/s]

Step 3876/10000, Loss: 1.6381
Step 3877/10000, Loss: 1.8324


Training Progress:  39%|██████████████████████▍                                   | 3879/10000 [53:36<17:30,  5.83it/s]

Step 3878/10000, Loss: 1.7951
Step 3879/10000, Loss: 1.8163


Training Progress:  39%|██████████████████████▌                                   | 3881/10000 [53:36<17:16,  5.90it/s]

Step 3880/10000, Loss: 1.8049
Step 3881/10000, Loss: 1.7007


Training Progress:  39%|██████████████████████▌                                   | 3883/10000 [53:37<17:29,  5.83it/s]

Step 3882/10000, Loss: 1.7864
Step 3883/10000, Loss: 1.7659


Training Progress:  39%|██████████████████████▌                                   | 3885/10000 [53:37<17:06,  5.95it/s]

Step 3884/10000, Loss: 1.6981
Step 3885/10000, Loss: 1.7606


Training Progress:  39%|██████████████████████▌                                   | 3887/10000 [53:37<17:27,  5.84it/s]

Step 3886/10000, Loss: 1.6762
Step 3887/10000, Loss: 1.6973


Training Progress:  39%|██████████████████████▌                                   | 3889/10000 [53:38<17:31,  5.81it/s]

Step 3888/10000, Loss: 1.7982
Step 3889/10000, Loss: 1.7945


Training Progress:  39%|██████████████████████▌                                   | 3891/10000 [53:38<17:15,  5.90it/s]

Step 3890/10000, Loss: 1.7388
Step 3891/10000, Loss: 1.6640


Training Progress:  39%|██████████████████████▌                                   | 3893/10000 [53:38<17:29,  5.82it/s]

Step 3892/10000, Loss: 1.7257
Step 3893/10000, Loss: 1.7205


Training Progress:  39%|██████████████████████▌                                   | 3895/10000 [53:39<17:33,  5.79it/s]

Step 3894/10000, Loss: 1.6857
Step 3895/10000, Loss: 1.5907


Training Progress:  39%|██████████████████████▌                                   | 3897/10000 [53:39<17:18,  5.88it/s]

Step 3896/10000, Loss: 1.7949
Step 3897/10000, Loss: 1.7645


Training Progress:  39%|██████████████████████▌                                   | 3899/10000 [53:39<17:28,  5.82it/s]

Step 3898/10000, Loss: 1.7098
Step 3899/10000, Loss: 1.8590


Training Progress:  39%|██████████████████████▋                                   | 3901/10000 [53:40<17:07,  5.94it/s]

Step 3900/10000, Loss: 1.7416
Step 3901/10000, Loss: 1.7345


Training Progress:  39%|██████████████████████▋                                   | 3903/10000 [53:40<17:23,  5.84it/s]

Step 3902/10000, Loss: 1.6561
Step 3903/10000, Loss: 1.7402


Training Progress:  39%|██████████████████████▋                                   | 3905/10000 [53:40<17:01,  5.96it/s]

Step 3904/10000, Loss: 1.7158
Step 3905/10000, Loss: 1.6716


Training Progress:  39%|██████████████████████▋                                   | 3907/10000 [53:41<17:20,  5.86it/s]

Step 3906/10000, Loss: 1.6375
Step 3907/10000, Loss: 1.6789


Training Progress:  39%|██████████████████████▋                                   | 3909/10000 [53:41<17:28,  5.81it/s]

Step 3908/10000, Loss: 1.7097
Step 3909/10000, Loss: 1.7361


Training Progress:  39%|██████████████████████▋                                   | 3911/10000 [53:41<17:29,  5.80it/s]

Step 3910/10000, Loss: 1.7981
Step 3911/10000, Loss: 1.7491


Training Progress:  39%|██████████████████████▋                                   | 3913/10000 [53:42<17:15,  5.88it/s]

Step 3912/10000, Loss: 1.6997
Step 3913/10000, Loss: 1.7136


Training Progress:  39%|██████████████████████▋                                   | 3915/10000 [53:42<17:26,  5.82it/s]

Step 3914/10000, Loss: 1.6231
Step 3915/10000, Loss: 1.7273


Training Progress:  39%|██████████████████████▋                                   | 3917/10000 [53:42<17:01,  5.96it/s]

Step 3916/10000, Loss: 1.7077
Step 3917/10000, Loss: 1.6043


Training Progress:  39%|██████████████████████▋                                   | 3919/10000 [53:43<17:17,  5.86it/s]

Step 3918/10000, Loss: 1.5244
Step 3919/10000, Loss: 1.6479


Training Progress:  39%|██████████████████████▋                                   | 3921/10000 [53:43<17:30,  5.79it/s]

Step 3920/10000, Loss: 1.6308
Step 3921/10000, Loss: 1.5344
Step 3922/10000, Loss: 1.5090


Training Progress:  39%|█████████████████████▉                                  | 3922/10000 [53:58<7:34:39,  4.49s/it]


Checkpoint saved: checkpoints\best\checkpoint_step3922_loss1.5090_20250117_134547.pt

New best loss: 1.5090


Training Progress:  39%|█████████████████████▉                                  | 3924/10000 [53:58<3:57:24,  2.34s/it]

Step 3923/10000, Loss: 1.5623
Step 3924/10000, Loss: 1.5203


Training Progress:  39%|█████████████████████▉                                  | 3926/10000 [53:59<2:04:59,  1.23s/it]

Step 3925/10000, Loss: 1.5558
Step 3926/10000, Loss: 1.7944


Training Progress:  39%|█████████████████████▉                                  | 3928/10000 [53:59<1:10:12,  1.44it/s]

Step 3927/10000, Loss: 1.7066
Step 3928/10000, Loss: 1.6112


Training Progress:  39%|██████████████████████▊                                   | 3930/10000 [53:59<42:56,  2.36it/s]

Step 3929/10000, Loss: 1.6015
Step 3930/10000, Loss: 1.6004


Training Progress:  39%|██████████████████████▊                                   | 3932/10000 [54:00<30:02,  3.37it/s]

Step 3931/10000, Loss: 1.5741
Step 3932/10000, Loss: 1.5377
Step 3933/10000, Loss: 1.4516


Training Progress:  39%|█████████████████████▋                                 | 3933/10000 [54:20<10:26:08,  6.19s/it]


Checkpoint saved: checkpoints\best\checkpoint_step3933_loss1.4516_20250117_134604.pt

New best loss: 1.4516


Training Progress:  39%|██████████████████████                                  | 3935/10000 [54:20<5:22:12,  3.19s/it]

Step 3934/10000, Loss: 1.5412
Step 3935/10000, Loss: 1.7368


Training Progress:  39%|██████████████████████                                  | 3937/10000 [54:21<2:46:47,  1.65s/it]

Step 3936/10000, Loss: 1.6981
Step 3937/10000, Loss: 1.7164


Training Progress:  39%|██████████████████████                                  | 3939/10000 [54:21<1:30:39,  1.11it/s]

Step 3938/10000, Loss: 1.6712
Step 3939/10000, Loss: 1.6660


Training Progress:  39%|██████████████████████▊                                   | 3941/10000 [54:21<53:01,  1.90it/s]

Step 3940/10000, Loss: 1.6754
Step 3941/10000, Loss: 1.6452


Training Progress:  39%|██████████████████████▊                                   | 3943/10000 [54:22<34:56,  2.89it/s]

Step 3942/10000, Loss: 1.5751
Step 3943/10000, Loss: 1.6160


Training Progress:  39%|██████████████████████▉                                   | 3945/10000 [54:22<25:43,  3.92it/s]

Step 3944/10000, Loss: 1.6272
Step 3945/10000, Loss: 1.5348


Training Progress:  39%|██████████████████████▉                                   | 3947/10000 [54:22<21:29,  4.69it/s]

Step 3946/10000, Loss: 1.6528
Step 3947/10000, Loss: 1.7424


Training Progress:  39%|██████████████████████▉                                   | 3949/10000 [54:23<19:08,  5.27it/s]

Step 3948/10000, Loss: 1.7395
Step 3949/10000, Loss: 1.7194


Training Progress:  40%|██████████████████████▉                                   | 3951/10000 [54:23<18:18,  5.50it/s]

Step 3950/10000, Loss: 1.6649
Step 3951/10000, Loss: 1.6136


Training Progress:  40%|██████████████████████▉                                   | 3953/10000 [54:23<17:37,  5.72it/s]

Step 3952/10000, Loss: 1.7775
Step 3953/10000, Loss: 1.6131


Training Progress:  40%|██████████████████████▉                                   | 3955/10000 [54:24<17:25,  5.78it/s]

Step 3954/10000, Loss: 1.6454
Step 3955/10000, Loss: 1.7105


Training Progress:  40%|██████████████████████▉                                   | 3957/10000 [54:24<17:28,  5.77it/s]

Step 3956/10000, Loss: 1.6532
Step 3957/10000, Loss: 1.6730


Training Progress:  40%|██████████████████████▉                                   | 3959/10000 [54:24<17:09,  5.87it/s]

Step 3958/10000, Loss: 1.5617
Step 3959/10000, Loss: 1.7683


Training Progress:  40%|██████████████████████▉                                   | 3961/10000 [54:25<17:19,  5.81it/s]

Step 3960/10000, Loss: 1.7849
Step 3961/10000, Loss: 1.7872


Training Progress:  40%|██████████████████████▉                                   | 3963/10000 [54:25<17:05,  5.89it/s]

Step 3962/10000, Loss: 1.7589
Step 3963/10000, Loss: 1.6754


Training Progress:  40%|██████████████████████▉                                   | 3965/10000 [54:25<17:15,  5.83it/s]

Step 3964/10000, Loss: 1.7290
Step 3965/10000, Loss: 1.6980


Training Progress:  40%|███████████████████████                                   | 3967/10000 [54:26<16:54,  5.94it/s]

Step 3966/10000, Loss: 1.6076
Step 3967/10000, Loss: 1.7063


Training Progress:  40%|███████████████████████                                   | 3969/10000 [54:26<17:12,  5.84it/s]

Step 3968/10000, Loss: 1.5995
Step 3969/10000, Loss: 1.6468


Training Progress:  40%|███████████████████████                                   | 3971/10000 [54:26<16:52,  5.96it/s]

Step 3970/10000, Loss: 1.7336
Step 3971/10000, Loss: 1.7056


Training Progress:  40%|███████████████████████                                   | 3973/10000 [54:27<17:10,  5.85it/s]

Step 3972/10000, Loss: 1.6624
Step 3973/10000, Loss: 1.6163


Training Progress:  40%|███████████████████████                                   | 3975/10000 [54:27<17:18,  5.80it/s]

Step 3974/10000, Loss: 1.6983
Step 3975/10000, Loss: 1.7077


Training Progress:  40%|███████████████████████                                   | 3977/10000 [54:27<17:01,  5.89it/s]

Step 3976/10000, Loss: 1.6313
Step 3977/10000, Loss: 1.5539


Training Progress:  40%|███████████████████████                                   | 3979/10000 [54:28<17:13,  5.83it/s]

Step 3978/10000, Loss: 1.6995
Step 3979/10000, Loss: 1.6484


Training Progress:  40%|███████████████████████                                   | 3981/10000 [54:28<16:50,  5.96it/s]

Step 3980/10000, Loss: 1.6289
Step 3981/10000, Loss: 1.7857


Training Progress:  40%|███████████████████████                                   | 3983/10000 [54:28<17:08,  5.85it/s]

Step 3982/10000, Loss: 1.6857
Step 3983/10000, Loss: 1.6745


Training Progress:  40%|███████████████████████                                   | 3985/10000 [54:29<17:15,  5.81it/s]

Step 3984/10000, Loss: 1.5997
Step 3985/10000, Loss: 1.6712


Training Progress:  40%|███████████████████████                                   | 3987/10000 [54:29<16:58,  5.90it/s]

Step 3986/10000, Loss: 1.6490
Step 3987/10000, Loss: 1.5961


Training Progress:  40%|███████████████████████▏                                  | 3989/10000 [54:29<17:11,  5.83it/s]

Step 3988/10000, Loss: 1.5942
Step 3989/10000, Loss: 1.6503


Training Progress:  40%|███████████████████████▏                                  | 3991/10000 [54:30<16:49,  5.95it/s]

Step 3990/10000, Loss: 1.6432
Step 3991/10000, Loss: 1.6782


Training Progress:  40%|███████████████████████▏                                  | 3993/10000 [54:30<17:07,  5.85it/s]

Step 3992/10000, Loss: 1.7521
Step 3993/10000, Loss: 1.6936


Training Progress:  40%|███████████████████████▏                                  | 3995/10000 [54:30<16:46,  5.96it/s]

Step 3994/10000, Loss: 1.6248
Step 3995/10000, Loss: 1.6533


Training Progress:  40%|███████████████████████▏                                  | 3997/10000 [54:31<17:03,  5.86it/s]

Step 3996/10000, Loss: 1.5604
Step 3997/10000, Loss: 1.6740


Training Progress:  40%|███████████████████████▏                                  | 3999/10000 [54:31<17:14,  5.80it/s]

Step 3998/10000, Loss: 1.6332
Step 3999/10000, Loss: 1.5320
Step 4000/10000, Loss: 1.4504

Checkpoint saved: checkpoints\checkpoint_step4000_loss1.4504_20250117_134635.pt


Training Progress:  40%|██████████████████████                                 | 4000/10000 [55:09<19:19:56, 11.60s/it]


Checkpoint saved: checkpoints\best\checkpoint_step4000_loss1.4504_20250117_134652.pt

New best loss: 1.4504


Training Progress:  40%|██████████████████████▍                                 | 4002/10000 [55:10<9:42:44,  5.83s/it]

Step 4001/10000, Loss: 1.5777
Step 4002/10000, Loss: 1.5504


Training Progress:  40%|██████████████████████▍                                 | 4004/10000 [55:10<4:54:19,  2.95s/it]

Step 4003/10000, Loss: 1.4743
Step 4004/10000, Loss: 1.4692


Training Progress:  40%|██████████████████████▍                                 | 4006/10000 [55:11<2:32:43,  1.53s/it]

Step 4005/10000, Loss: 1.5128
Step 4006/10000, Loss: 1.4733


Training Progress:  40%|██████████████████████▍                                 | 4008/10000 [55:11<1:23:12,  1.20it/s]

Step 4007/10000, Loss: 1.4945
Step 4008/10000, Loss: 1.7312


Training Progress:  40%|███████████████████████▎                                  | 4010/10000 [55:11<49:38,  2.01it/s]

Step 4009/10000, Loss: 1.6066
Step 4010/10000, Loss: 1.5348


Training Progress:  40%|███████████████████████▎                                  | 4012/10000 [55:12<32:42,  3.05it/s]

Step 4011/10000, Loss: 1.5352
Step 4012/10000, Loss: 1.5311


Training Progress:  40%|███████████████████████▎                                  | 4014/10000 [55:12<24:55,  4.00it/s]

Step 4013/10000, Loss: 1.5193
Step 4014/10000, Loss: 1.4853
Step 4015/10000, Loss: 1.4003


Training Progress:  40%|██████████████████████▍                                 | 4015/10000 [55:31<9:53:51,  5.95s/it]


Checkpoint saved: checkpoints\best\checkpoint_step4015_loss1.4003_20250117_134716.pt

New best loss: 1.4003


Training Progress:  40%|██████████████████████▍                                 | 4017/10000 [55:32<5:06:35,  3.07s/it]

Step 4016/10000, Loss: 1.5008
Step 4017/10000, Loss: 1.6812


Training Progress:  40%|██████████████████████▌                                 | 4019/10000 [55:32<2:39:01,  1.60s/it]

Step 4018/10000, Loss: 1.6473
Step 4019/10000, Loss: 1.6961


Training Progress:  40%|██████████████████████▌                                 | 4021/10000 [55:33<1:26:13,  1.16it/s]

Step 4020/10000, Loss: 1.6036
Step 4021/10000, Loss: 1.6185


Training Progress:  40%|███████████████████████▎                                  | 4023/10000 [55:33<51:04,  1.95it/s]

Step 4022/10000, Loss: 1.6181
Step 4023/10000, Loss: 1.5929


Training Progress:  40%|███████████████████████▎                                  | 4025/10000 [55:33<33:22,  2.98it/s]

Step 4024/10000, Loss: 1.5335
Step 4025/10000, Loss: 1.5890


Training Progress:  40%|███████████████████████▎                                  | 4027/10000 [55:34<25:08,  3.96it/s]

Step 4026/10000, Loss: 1.5717
Step 4027/10000, Loss: 1.5208


Training Progress:  40%|███████████████████████▎                                  | 4029/10000 [55:34<20:42,  4.80it/s]

Step 4028/10000, Loss: 1.6275
Step 4029/10000, Loss: 1.7179


Training Progress:  40%|███████████████████████▍                                  | 4031/10000 [55:34<18:59,  5.24it/s]

Step 4030/10000, Loss: 1.7037
Step 4031/10000, Loss: 1.6701


Training Progress:  40%|███████████████████████▍                                  | 4033/10000 [55:35<18:06,  5.49it/s]

Step 4032/10000, Loss: 1.6356
Step 4033/10000, Loss: 1.5770


Training Progress:  40%|███████████████████████▍                                  | 4035/10000 [55:35<17:21,  5.73it/s]

Step 4034/10000, Loss: 1.7453
Step 4035/10000, Loss: 1.5705


Training Progress:  40%|███████████████████████▍                                  | 4037/10000 [55:35<17:19,  5.74it/s]

Step 4036/10000, Loss: 1.5898
Step 4037/10000, Loss: 1.6657


Training Progress:  40%|███████████████████████▍                                  | 4039/10000 [55:36<16:58,  5.85it/s]

Step 4038/10000, Loss: 1.6119
Step 4039/10000, Loss: 1.6365


Training Progress:  40%|███████████████████████▍                                  | 4041/10000 [55:36<17:08,  5.79it/s]

Step 4040/10000, Loss: 1.5614
Step 4041/10000, Loss: 1.7327


Training Progress:  40%|███████████████████████▍                                  | 4043/10000 [55:36<16:52,  5.88it/s]

Step 4042/10000, Loss: 1.7476
Step 4043/10000, Loss: 1.7230


Training Progress:  40%|███████████████████████▍                                  | 4045/10000 [55:37<17:05,  5.81it/s]

Step 4044/10000, Loss: 1.6978
Step 4045/10000, Loss: 1.6134


Training Progress:  40%|███████████████████████▍                                  | 4047/10000 [55:37<16:47,  5.91it/s]

Step 4046/10000, Loss: 1.7061
Step 4047/10000, Loss: 1.6753


Training Progress:  40%|███████████████████████▍                                  | 4049/10000 [55:37<17:01,  5.83it/s]

Step 4048/10000, Loss: 1.5732
Step 4049/10000, Loss: 1.6813


Training Progress:  41%|███████████████████████▍                                  | 4051/10000 [55:38<16:48,  5.90it/s]

Step 4050/10000, Loss: 1.5649
Step 4051/10000, Loss: 1.6120


Training Progress:  41%|███████████████████████▌                                  | 4053/10000 [55:38<17:02,  5.82it/s]

Step 4052/10000, Loss: 1.6606
Step 4053/10000, Loss: 1.6696


Training Progress:  41%|███████████████████████▌                                  | 4055/10000 [55:38<16:52,  5.87it/s]

Step 4054/10000, Loss: 1.6224
Step 4055/10000, Loss: 1.6005


Training Progress:  41%|███████████████████████▌                                  | 4057/10000 [55:39<17:04,  5.80it/s]

Step 4056/10000, Loss: 1.6546
Step 4057/10000, Loss: 1.6631


Training Progress:  41%|███████████████████████▌                                  | 4059/10000 [55:39<16:50,  5.88it/s]

Step 4058/10000, Loss: 1.5887
Step 4059/10000, Loss: 1.5049


Training Progress:  41%|███████████████████████▌                                  | 4061/10000 [55:40<17:01,  5.81it/s]

Step 4060/10000, Loss: 1.6480
Step 4061/10000, Loss: 1.6203


Training Progress:  41%|███████████████████████▌                                  | 4063/10000 [55:40<17:05,  5.79it/s]

Step 4062/10000, Loss: 1.5763
Step 4063/10000, Loss: 1.7368


Training Progress:  41%|███████████████████████▌                                  | 4065/10000 [55:40<16:57,  5.83it/s]

Step 4064/10000, Loss: 1.6202
Step 4065/10000, Loss: 1.6152


Training Progress:  41%|███████████████████████▌                                  | 4067/10000 [55:41<16:43,  5.91it/s]

Step 4066/10000, Loss: 1.5496
Step 4067/10000, Loss: 1.6478


Training Progress:  41%|███████████████████████▌                                  | 4069/10000 [55:41<16:57,  5.83it/s]

Step 4068/10000, Loss: 1.6120
Step 4069/10000, Loss: 1.6050


Training Progress:  41%|███████████████████████▌                                  | 4071/10000 [55:41<16:46,  5.89it/s]

Step 4070/10000, Loss: 1.5485
Step 4071/10000, Loss: 1.5782


Training Progress:  41%|███████████████████████▌                                  | 4073/10000 [55:42<16:59,  5.81it/s]

Step 4072/10000, Loss: 1.5837
Step 4073/10000, Loss: 1.6185


Training Progress:  41%|███████████████████████▋                                  | 4075/10000 [55:42<16:37,  5.94it/s]

Step 4074/10000, Loss: 1.7121
Step 4075/10000, Loss: 1.6327


Training Progress:  41%|███████████████████████▋                                  | 4077/10000 [55:42<16:55,  5.83it/s]

Step 4076/10000, Loss: 1.5483
Step 4077/10000, Loss: 1.5837


Training Progress:  41%|███████████████████████▋                                  | 4079/10000 [55:43<16:35,  5.95it/s]

Step 4078/10000, Loss: 1.5251
Step 4079/10000, Loss: 1.6573


Training Progress:  41%|███████████████████████▋                                  | 4081/10000 [55:43<16:51,  5.85it/s]

Step 4080/10000, Loss: 1.5918
Step 4081/10000, Loss: 1.5010


Training Progress:  41%|███████████████████████▋                                  | 4083/10000 [55:43<17:00,  5.80it/s]

Step 4082/10000, Loss: 1.4099
Step 4083/10000, Loss: 1.5719


Training Progress:  41%|███████████████████████▋                                  | 4085/10000 [55:44<16:45,  5.89it/s]

Step 4084/10000, Loss: 1.5153
Step 4085/10000, Loss: 1.4163


Training Progress:  41%|███████████████████████▋                                  | 4087/10000 [55:44<16:57,  5.81it/s]

Step 4086/10000, Loss: 1.4068
Step 4087/10000, Loss: 1.4153


Training Progress:  41%|███████████████████████▋                                  | 4089/10000 [55:44<16:44,  5.89it/s]

Step 4088/10000, Loss: 1.4404
Step 4089/10000, Loss: 1.4633


Training Progress:  41%|███████████████████████▋                                  | 4091/10000 [55:45<16:51,  5.84it/s]

Step 4090/10000, Loss: 1.6712
Step 4091/10000, Loss: 1.5516


Training Progress:  41%|███████████████████████▋                                  | 4093/10000 [55:45<16:33,  5.94it/s]

Step 4092/10000, Loss: 1.5015
Step 4093/10000, Loss: 1.4807


Training Progress:  41%|███████████████████████▊                                  | 4095/10000 [55:45<16:50,  5.84it/s]

Step 4094/10000, Loss: 1.4960
Step 4095/10000, Loss: 1.4302


Training Progress:  41%|███████████████████████▊                                  | 4096/10000 [55:45<16:54,  5.82it/s]

Step 4096/10000, Loss: 1.4364
Step 4097/10000, Loss: 1.3872


Training Progress:  41%|██████████████████████▉                                 | 4097/10000 [56:00<7:06:48,  4.34s/it]


Checkpoint saved: checkpoints\best\checkpoint_step4097_loss1.3872_20250117_134750.pt

New best loss: 1.3872


Training Progress:  41%|██████████████████████▉                                 | 4099/10000 [56:00<3:44:24,  2.28s/it]

Step 4098/10000, Loss: 1.4322
Step 4099/10000, Loss: 1.6225


Training Progress:  41%|██████████████████████▉                                 | 4101/10000 [56:01<1:58:18,  1.20s/it]

Step 4100/10000, Loss: 1.5722
Step 4101/10000, Loss: 1.6484


Training Progress:  41%|██████████████████████▉                                 | 4103/10000 [56:01<1:06:43,  1.47it/s]

Step 4102/10000, Loss: 1.5571
Step 4103/10000, Loss: 1.5720


Training Progress:  41%|███████████████████████▊                                  | 4105/10000 [56:01<41:07,  2.39it/s]

Step 4104/10000, Loss: 1.5414
Step 4105/10000, Loss: 1.5392


Training Progress:  41%|███████████████████████▊                                  | 4107/10000 [56:02<28:48,  3.41it/s]

Step 4106/10000, Loss: 1.4668
Step 4107/10000, Loss: 1.5352


Training Progress:  41%|███████████████████████▊                                  | 4109/10000 [56:02<22:38,  4.34it/s]

Step 4108/10000, Loss: 1.5317
Step 4109/10000, Loss: 1.4754


Training Progress:  41%|███████████████████████▊                                  | 4111/10000 [56:02<19:23,  5.06it/s]

Step 4110/10000, Loss: 1.6425
Step 4111/10000, Loss: 1.6962


Training Progress:  41%|███████████████████████▊                                  | 4113/10000 [56:03<18:17,  5.36it/s]

Step 4112/10000, Loss: 1.6755
Step 4113/10000, Loss: 1.6417


Training Progress:  41%|███████████████████████▊                                  | 4115/10000 [56:03<17:24,  5.64it/s]

Step 4114/10000, Loss: 1.5646
Step 4115/10000, Loss: 1.5334


Training Progress:  41%|███████████████████████▉                                  | 4117/10000 [56:03<17:06,  5.73it/s]

Step 4116/10000, Loss: 1.7101
Step 4117/10000, Loss: 1.5357


Training Progress:  41%|███████████████████████▉                                  | 4119/10000 [56:04<16:57,  5.78it/s]

Step 4118/10000, Loss: 1.5322
Step 4119/10000, Loss: 1.6063


Training Progress:  41%|███████████████████████▉                                  | 4121/10000 [56:04<16:36,  5.90it/s]

Step 4120/10000, Loss: 1.5626
Step 4121/10000, Loss: 1.5666


Training Progress:  41%|███████████████████████▉                                  | 4123/10000 [56:04<16:53,  5.80it/s]

Step 4122/10000, Loss: 1.5109
Step 4123/10000, Loss: 1.6740


Training Progress:  41%|███████████████████████▉                                  | 4125/10000 [56:05<16:43,  5.85it/s]

Step 4124/10000, Loss: 1.6824
Step 4125/10000, Loss: 1.6828


Training Progress:  41%|███████████████████████▉                                  | 4127/10000 [56:05<16:37,  5.89it/s]

Step 4126/10000, Loss: 1.6777
Step 4127/10000, Loss: 1.5979


Training Progress:  41%|███████████████████████▉                                  | 4129/10000 [56:05<16:43,  5.85it/s]

Step 4128/10000, Loss: 1.6562
Step 4129/10000, Loss: 1.6468


Training Progress:  41%|███████████████████████▉                                  | 4131/10000 [56:06<16:24,  5.96it/s]

Step 4130/10000, Loss: 1.5369
Step 4131/10000, Loss: 1.6228


Training Progress:  41%|███████████████████████▉                                  | 4133/10000 [56:06<16:41,  5.86it/s]

Step 4132/10000, Loss: 1.5484
Step 4133/10000, Loss: 1.5183


Training Progress:  41%|███████████████████████▉                                  | 4135/10000 [56:06<16:51,  5.80it/s]

Step 4134/10000, Loss: 1.6075
Step 4135/10000, Loss: 1.6198


Training Progress:  41%|███████████████████████▉                                  | 4137/10000 [56:07<16:36,  5.88it/s]

Step 4136/10000, Loss: 1.5763
Step 4137/10000, Loss: 1.5383


Training Progress:  41%|████████████████████████                                  | 4139/10000 [56:07<16:45,  5.83it/s]

Step 4138/10000, Loss: 1.5800
Step 4139/10000, Loss: 1.6282


Training Progress:  41%|████████████████████████                                  | 4141/10000 [56:07<16:24,  5.95it/s]

Step 4140/10000, Loss: 1.5465
Step 4141/10000, Loss: 1.4526


Training Progress:  41%|████████████████████████                                  | 4143/10000 [56:08<16:42,  5.84it/s]

Step 4142/10000, Loss: 1.5583
Step 4143/10000, Loss: 1.5554


Training Progress:  41%|████████████████████████                                  | 4145/10000 [56:08<16:50,  5.79it/s]

Step 4144/10000, Loss: 1.5527
Step 4145/10000, Loss: 1.6281


Training Progress:  41%|████████████████████████                                  | 4147/10000 [56:08<16:33,  5.89it/s]

Step 4146/10000, Loss: 1.5499
Step 4147/10000, Loss: 1.5686


Training Progress:  41%|████████████████████████                                  | 4149/10000 [56:09<16:43,  5.83it/s]

Step 4148/10000, Loss: 1.5283
Step 4149/10000, Loss: 1.6038


Training Progress:  42%|████████████████████████                                  | 4151/10000 [56:09<16:24,  5.94it/s]

Step 4150/10000, Loss: 1.5388
Step 4151/10000, Loss: 1.5216


Training Progress:  42%|████████████████████████                                  | 4153/10000 [56:09<16:41,  5.84it/s]

Step 4152/10000, Loss: 1.5065
Step 4153/10000, Loss: 1.5310


Training Progress:  42%|████████████████████████                                  | 4155/10000 [56:10<16:46,  5.80it/s]

Step 4154/10000, Loss: 1.5578
Step 4155/10000, Loss: 1.5424


Training Progress:  42%|████████████████████████                                  | 4157/10000 [56:10<16:31,  5.89it/s]

Step 4156/10000, Loss: 1.6538
Step 4157/10000, Loss: 1.5723


Training Progress:  42%|████████████████████████                                  | 4159/10000 [56:10<16:46,  5.81it/s]

Step 4158/10000, Loss: 1.4933
Step 4159/10000, Loss: 1.5028


Training Progress:  42%|████████████████████████▏                                 | 4161/10000 [56:11<16:40,  5.83it/s]

Step 4160/10000, Loss: 1.4468
Step 4161/10000, Loss: 1.6010


Training Progress:  42%|████████████████████████▏                                 | 4163/10000 [56:11<16:32,  5.88it/s]

Step 4162/10000, Loss: 1.5187
Step 4163/10000, Loss: 1.4485
Step 4164/10000, Loss: 1.3589


Training Progress:  42%|███████████████████████▎                                | 4164/10000 [56:27<7:47:42,  4.81s/it]


Checkpoint saved: checkpoints\best\checkpoint_step4164_loss1.3589_20250117_134815.pt

New best loss: 1.3589


Training Progress:  42%|███████████████████████▎                                | 4166/10000 [56:27<4:03:44,  2.51s/it]

Step 4165/10000, Loss: 1.4948
Step 4166/10000, Loss: 1.4703
Step 4167/10000, Loss: 1.3525


Training Progress:  42%|██████████████████████▉                                | 4167/10000 [56:49<13:33:07,  8.36s/it]


Checkpoint saved: checkpoints\best\checkpoint_step4167_loss1.3525_20250117_134831.pt

New best loss: 1.3525


Training Progress:  42%|███████████████████████▎                                | 4169/10000 [56:50<6:51:27,  4.23s/it]

Step 4168/10000, Loss: 1.3549
Step 4169/10000, Loss: 1.3636


Training Progress:  42%|███████████████████████▎                                | 4171/10000 [56:50<3:29:41,  2.16s/it]

Step 4170/10000, Loss: 1.3795
Step 4171/10000, Loss: 1.3839


Training Progress:  42%|███████████████████████▎                                | 4173/10000 [56:51<1:51:16,  1.15s/it]

Step 4172/10000, Loss: 1.6274
Step 4173/10000, Loss: 1.4817


Training Progress:  42%|███████████████████████▍                                | 4175/10000 [56:51<1:02:41,  1.55it/s]

Step 4174/10000, Loss: 1.4429
Step 4175/10000, Loss: 1.4286


Training Progress:  42%|████████████████████████▏                                 | 4177/10000 [56:51<39:16,  2.47it/s]

Step 4176/10000, Loss: 1.4278
Step 4177/10000, Loss: 1.3721


Training Progress:  42%|████████████████████████▏                                 | 4178/10000 [56:52<32:32,  2.98it/s]

Step 4178/10000, Loss: 1.3742
Step 4179/10000, Loss: 1.3321


Training Progress:  42%|███████████████████████▍                                | 4179/10000 [57:11<9:34:50,  5.93s/it]


Checkpoint saved: checkpoints\best\checkpoint_step4179_loss1.3321_20250117_134856.pt

New best loss: 1.3321


Training Progress:  42%|███████████████████████▍                                | 4181/10000 [57:11<4:57:02,  3.06s/it]

Step 4180/10000, Loss: 1.3813
Step 4181/10000, Loss: 1.5749


Training Progress:  42%|███████████████████████▍                                | 4183/10000 [57:12<2:33:54,  1.59s/it]

Step 4182/10000, Loss: 1.5374
Step 4183/10000, Loss: 1.5496


Training Progress:  42%|███████████████████████▍                                | 4185/10000 [57:12<1:23:53,  1.16it/s]

Step 4184/10000, Loss: 1.4765
Step 4185/10000, Loss: 1.4943


Training Progress:  42%|████████████████████████▎                                 | 4187/10000 [57:12<49:15,  1.97it/s]

Step 4186/10000, Loss: 1.5199
Step 4187/10000, Loss: 1.4415


Training Progress:  42%|████████████████████████▎                                 | 4189/10000 [57:13<32:45,  2.96it/s]

Step 4188/10000, Loss: 1.4135
Step 4189/10000, Loss: 1.4410


Training Progress:  42%|████████████████████████▎                                 | 4191/10000 [57:13<24:21,  3.97it/s]

Step 4190/10000, Loss: 1.4218
Step 4191/10000, Loss: 1.4246


Training Progress:  42%|████████████████████████▎                                 | 4193/10000 [57:13<20:33,  4.71it/s]

Step 4192/10000, Loss: 1.5236
Step 4193/10000, Loss: 1.6077


Training Progress:  42%|████████████████████████▎                                 | 4195/10000 [57:14<18:21,  5.27it/s]

Step 4194/10000, Loss: 1.6110
Step 4195/10000, Loss: 1.6106


Training Progress:  42%|████████████████████████▎                                 | 4197/10000 [57:14<17:36,  5.49it/s]

Step 4196/10000, Loss: 1.5571
Step 4197/10000, Loss: 1.4916


Training Progress:  42%|████████████████████████▎                                 | 4199/10000 [57:14<16:54,  5.72it/s]

Step 4198/10000, Loss: 1.6541
Step 4199/10000, Loss: 1.5281


Training Progress:  42%|████████████████████████▎                                 | 4201/10000 [57:15<16:55,  5.71it/s]

Step 4200/10000, Loss: 1.4662
Step 4201/10000, Loss: 1.5286


Training Progress:  42%|████████████████████████▍                                 | 4203/10000 [57:15<16:38,  5.81it/s]

Step 4202/10000, Loss: 1.4884
Step 4203/10000, Loss: 1.4965


Training Progress:  42%|████████████████████████▍                                 | 4205/10000 [57:15<16:27,  5.87it/s]

Step 4204/10000, Loss: 1.4411
Step 4205/10000, Loss: 1.5958


Training Progress:  42%|████████████████████████▍                                 | 4207/10000 [57:16<16:40,  5.79it/s]

Step 4206/10000, Loss: 1.5592
Step 4207/10000, Loss: 1.5932


Training Progress:  42%|████████████████████████▍                                 | 4209/10000 [57:16<16:27,  5.86it/s]

Step 4208/10000, Loss: 1.5978
Step 4209/10000, Loss: 1.5401


Training Progress:  42%|████████████████████████▍                                 | 4211/10000 [57:16<16:13,  5.94it/s]

Step 4210/10000, Loss: 1.5895
Step 4211/10000, Loss: 1.5808


Training Progress:  42%|████████████████████████▍                                 | 4213/10000 [57:17<16:32,  5.83it/s]

Step 4212/10000, Loss: 1.5012
Step 4213/10000, Loss: 1.5578


Training Progress:  42%|████████████████████████▍                                 | 4215/10000 [57:17<16:11,  5.96it/s]

Step 4214/10000, Loss: 1.4862
Step 4215/10000, Loss: 1.4739


Training Progress:  42%|████████████████████████▍                                 | 4217/10000 [57:17<16:29,  5.84it/s]

Step 4216/10000, Loss: 1.5510
Step 4217/10000, Loss: 1.5710


Training Progress:  42%|████████████████████████▍                                 | 4219/10000 [57:18<16:37,  5.79it/s]

Step 4218/10000, Loss: 1.5243
Step 4219/10000, Loss: 1.4949


Training Progress:  42%|████████████████████████▍                                 | 4221/10000 [57:18<16:17,  5.91it/s]

Step 4220/10000, Loss: 1.5148
Step 4221/10000, Loss: 1.5505


Training Progress:  42%|████████████████████████▍                                 | 4223/10000 [57:18<16:33,  5.82it/s]

Step 4222/10000, Loss: 1.5027
Step 4223/10000, Loss: 1.4260


Training Progress:  42%|████████████████████████▌                                 | 4225/10000 [57:19<16:18,  5.90it/s]

Step 4224/10000, Loss: 1.5879
Step 4225/10000, Loss: 1.5177


Training Progress:  42%|████████████████████████▌                                 | 4227/10000 [57:19<16:31,  5.82it/s]

Step 4226/10000, Loss: 1.5198
Step 4227/10000, Loss: 1.6158


Training Progress:  42%|████████████████████████▌                                 | 4229/10000 [57:19<16:20,  5.89it/s]

Step 4228/10000, Loss: 1.5148
Step 4229/10000, Loss: 1.5389


Training Progress:  42%|████████████████████████▌                                 | 4231/10000 [57:20<16:31,  5.82it/s]

Step 4230/10000, Loss: 1.4647
Step 4231/10000, Loss: 1.5597


Training Progress:  42%|████████████████████████▌                                 | 4233/10000 [57:20<16:18,  5.89it/s]

Step 4232/10000, Loss: 1.5147
Step 4233/10000, Loss: 1.5004


Training Progress:  42%|████████████████████████▌                                 | 4235/10000 [57:20<16:31,  5.81it/s]

Step 4234/10000, Loss: 1.4883
Step 4235/10000, Loss: 1.5142


Training Progress:  42%|████████████████████████▌                                 | 4237/10000 [57:21<16:18,  5.89it/s]

Step 4236/10000, Loss: 1.5070
Step 4237/10000, Loss: 1.5608


Training Progress:  42%|████████████████████████▌                                 | 4239/10000 [57:21<16:29,  5.82it/s]

Step 4238/10000, Loss: 1.6068
Step 4239/10000, Loss: 1.5052


Training Progress:  42%|████████████████████████▌                                 | 4241/10000 [57:21<16:06,  5.96it/s]

Step 4240/10000, Loss: 1.4418
Step 4241/10000, Loss: 1.4674


Training Progress:  42%|████████████████████████▌                                 | 4243/10000 [57:22<16:23,  5.85it/s]

Step 4242/10000, Loss: 1.3770
Step 4243/10000, Loss: 1.5462


Training Progress:  42%|████████████████████████▌                                 | 4245/10000 [57:22<16:30,  5.81it/s]

Step 4244/10000, Loss: 1.4664
Step 4245/10000, Loss: 1.4236
Step 4246/10000, Loss: 1.2976


Training Progress:  42%|███████████████████████▊                                | 4246/10000 [57:38<7:37:11,  4.77s/it]


Checkpoint saved: checkpoints\best\checkpoint_step4246_loss1.2976_20250117_134926.pt

New best loss: 1.2976


Training Progress:  42%|███████████████████████▊                                | 4248/10000 [57:38<3:57:35,  2.48s/it]

Step 4247/10000, Loss: 1.4438
Step 4248/10000, Loss: 1.4283


Training Progress:  42%|███████████████████████▊                                | 4250/10000 [57:39<2:04:30,  1.30s/it]

Step 4249/10000, Loss: 1.3323
Step 4250/10000, Loss: 1.3032


Training Progress:  43%|███████████████████████▊                                | 4252/10000 [57:39<1:09:29,  1.38it/s]

Step 4251/10000, Loss: 1.3327
Step 4252/10000, Loss: 1.3515


Training Progress:  43%|████████████████████████▋                                 | 4254/10000 [57:39<42:04,  2.28it/s]

Step 4253/10000, Loss: 1.3475
Step 4254/10000, Loss: 1.5811


Training Progress:  43%|████████████████████████▋                                 | 4256/10000 [57:40<29:14,  3.27it/s]

Step 4255/10000, Loss: 1.4361
Step 4256/10000, Loss: 1.4161


Training Progress:  43%|████████████████████████▋                                 | 4258/10000 [57:40<22:16,  4.30it/s]

Step 4257/10000, Loss: 1.3553
Step 4258/10000, Loss: 1.3502


Training Progress:  43%|████████████████████████▋                                 | 4260/10000 [57:40<19:23,  4.93it/s]

Step 4259/10000, Loss: 1.3199
Step 4260/10000, Loss: 1.3070
Step 4261/10000, Loss: 1.2542


Training Progress:  43%|███████████████████████▊                                | 4261/10000 [57:59<9:15:08,  5.80s/it]


Checkpoint saved: checkpoints\best\checkpoint_step4261_loss1.2542_20250117_134944.pt

New best loss: 1.2542


Training Progress:  43%|███████████████████████▊                                | 4263/10000 [58:00<4:46:56,  3.00s/it]

Step 4262/10000, Loss: 1.3092
Step 4263/10000, Loss: 1.4562


Training Progress:  43%|███████████████████████▉                                | 4265/10000 [58:00<2:28:54,  1.56s/it]

Step 4264/10000, Loss: 1.4459
Step 4265/10000, Loss: 1.4830


Training Progress:  43%|███████████████████████▉                                | 4267/10000 [58:00<1:21:05,  1.18it/s]

Step 4266/10000, Loss: 1.4287
Step 4267/10000, Loss: 1.4319


Training Progress:  43%|████████████████████████▊                                 | 4269/10000 [58:01<48:13,  1.98it/s]

Step 4268/10000, Loss: 1.4499
Step 4269/10000, Loss: 1.4374


Training Progress:  43%|████████████████████████▊                                 | 4271/10000 [58:01<31:47,  3.00it/s]

Step 4270/10000, Loss: 1.3953
Step 4271/10000, Loss: 1.4227


Training Progress:  43%|████████████████████████▊                                 | 4273/10000 [58:01<23:57,  3.98it/s]

Step 4272/10000, Loss: 1.4040
Step 4273/10000, Loss: 1.3695


Training Progress:  43%|████████████████████████▊                                 | 4275/10000 [58:02<19:58,  4.78it/s]

Step 4274/10000, Loss: 1.4478
Step 4275/10000, Loss: 1.5053


Training Progress:  43%|████████████████████████▊                                 | 4277/10000 [58:02<17:48,  5.35it/s]

Step 4276/10000, Loss: 1.5144
Step 4277/10000, Loss: 1.5523


Training Progress:  43%|████████████████████████▊                                 | 4279/10000 [58:03<17:11,  5.55it/s]

Step 4278/10000, Loss: 1.4993
Step 4279/10000, Loss: 1.4597


Training Progress:  43%|████████████████████████▊                                 | 4281/10000 [58:03<16:26,  5.80it/s]

Step 4280/10000, Loss: 1.6149
Step 4281/10000, Loss: 1.4895


Training Progress:  43%|████████████████████████▊                                 | 4283/10000 [58:03<16:37,  5.73it/s]

Step 4282/10000, Loss: 1.4892
Step 4283/10000, Loss: 1.5447


Training Progress:  43%|████████████████████████▊                                 | 4285/10000 [58:04<16:04,  5.93it/s]

Step 4284/10000, Loss: 1.4703
Step 4285/10000, Loss: 1.4767


Training Progress:  43%|████████████████████████▊                                 | 4287/10000 [58:04<16:21,  5.82it/s]

Step 4286/10000, Loss: 1.4033
Step 4287/10000, Loss: 1.5872


Training Progress:  43%|████████████████████████▉                                 | 4289/10000 [58:04<16:01,  5.94it/s]

Step 4288/10000, Loss: 1.5082
Step 4289/10000, Loss: 1.5400


Training Progress:  43%|████████████████████████▉                                 | 4291/10000 [58:05<16:18,  5.84it/s]

Step 4290/10000, Loss: 1.5381
Step 4291/10000, Loss: 1.4683


Training Progress:  43%|████████████████████████▉                                 | 4293/10000 [58:05<16:16,  5.85it/s]

Step 4292/10000, Loss: 1.5176
Step 4293/10000, Loss: 1.5268


Training Progress:  43%|████████████████████████▉                                 | 4295/10000 [58:05<16:15,  5.85it/s]

Step 4294/10000, Loss: 1.4635
Step 4295/10000, Loss: 1.5178


Training Progress:  43%|████████████████████████▉                                 | 4297/10000 [58:06<16:06,  5.90it/s]

Step 4296/10000, Loss: 1.4750
Step 4297/10000, Loss: 1.4343


Training Progress:  43%|████████████████████████▉                                 | 4299/10000 [58:06<16:21,  5.81it/s]

Step 4298/10000, Loss: 1.5005
Step 4299/10000, Loss: 1.4973


Training Progress:  43%|████████████████████████▉                                 | 4301/10000 [58:06<16:08,  5.89it/s]

Step 4300/10000, Loss: 1.4600
Step 4301/10000, Loss: 1.4192


Training Progress:  43%|████████████████████████▉                                 | 4303/10000 [58:07<16:20,  5.81it/s]

Step 4302/10000, Loss: 1.4608
Step 4303/10000, Loss: 1.4981


Training Progress:  43%|████████████████████████▉                                 | 4305/10000 [58:07<16:10,  5.87it/s]

Step 4304/10000, Loss: 1.4348
Step 4305/10000, Loss: 1.3677


Training Progress:  43%|████████████████████████▉                                 | 4307/10000 [58:07<15:53,  5.97it/s]

Step 4306/10000, Loss: 1.5012
Step 4307/10000, Loss: 1.4652


Training Progress:  43%|████████████████████████▉                                 | 4309/10000 [58:08<16:13,  5.85it/s]

Step 4308/10000, Loss: 1.4376
Step 4309/10000, Loss: 1.5239


Training Progress:  43%|█████████████████████████                                 | 4311/10000 [58:08<15:56,  5.95it/s]

Step 4310/10000, Loss: 1.4503
Step 4311/10000, Loss: 1.4550


Training Progress:  43%|█████████████████████████                                 | 4313/10000 [58:08<16:17,  5.82it/s]

Step 4312/10000, Loss: 1.4008
Step 4313/10000, Loss: 1.4987


Training Progress:  43%|█████████████████████████                                 | 4315/10000 [58:09<15:56,  5.94it/s]

Step 4314/10000, Loss: 1.4697
Step 4315/10000, Loss: 1.4310


Training Progress:  43%|█████████████████████████                                 | 4317/10000 [58:09<16:14,  5.83it/s]

Step 4316/10000, Loss: 1.4171
Step 4317/10000, Loss: 1.4731


Training Progress:  43%|█████████████████████████                                 | 4319/10000 [58:09<16:03,  5.89it/s]

Step 4318/10000, Loss: 1.4649
Step 4319/10000, Loss: 1.4954


Training Progress:  43%|█████████████████████████                                 | 4321/10000 [58:10<16:17,  5.81it/s]

Step 4320/10000, Loss: 1.5695
Step 4321/10000, Loss: 1.4491


Training Progress:  43%|█████████████████████████                                 | 4323/10000 [58:10<16:13,  5.83it/s]

Step 4322/10000, Loss: 1.3812
Step 4323/10000, Loss: 1.4312


Training Progress:  43%|█████████████████████████                                 | 4325/10000 [58:10<16:16,  5.81it/s]

Step 4324/10000, Loss: 1.3317
Step 4325/10000, Loss: 1.4779


Training Progress:  43%|█████████████████████████                                 | 4327/10000 [58:11<16:02,  5.89it/s]

Step 4326/10000, Loss: 1.4509
Step 4327/10000, Loss: 1.3593
Step 4328/10000, Loss: 1.2535


Training Progress:  43%|████████████████████████▏                               | 4328/10000 [58:25<7:00:39,  4.45s/it]


Checkpoint saved: checkpoints\best\checkpoint_step4328_loss1.2535_20250117_135015.pt

New best loss: 1.2535


Training Progress:  43%|████████████████████████▏                               | 4330/10000 [58:26<3:40:41,  2.34s/it]

Step 4329/10000, Loss: 1.4061
Step 4330/10000, Loss: 1.3971


Training Progress:  43%|████████████████████████▎                               | 4331/10000 [58:26<2:39:24,  1.69s/it]

Step 4331/10000, Loss: 1.2879
Step 4332/10000, Loss: 1.2249


Training Progress:  43%|███████████████████████▊                               | 4332/10000 [58:47<11:36:45,  7.38s/it]


Checkpoint saved: checkpoints\best\checkpoint_step4332_loss1.2249_20250117_135030.pt

New best loss: 1.2249


Training Progress:  43%|████████████████████████▎                               | 4334/10000 [58:47<5:53:56,  3.75s/it]

Step 4333/10000, Loss: 1.2336
Step 4334/10000, Loss: 1.2670


Training Progress:  43%|████████████████████████▎                               | 4336/10000 [58:48<3:01:54,  1.93s/it]

Step 4335/10000, Loss: 1.2755
Step 4336/10000, Loss: 1.4949


Training Progress:  43%|████████████████████████▎                               | 4338/10000 [58:48<1:36:55,  1.03s/it]

Step 4337/10000, Loss: 1.3660
Step 4338/10000, Loss: 1.3652


Training Progress:  43%|█████████████████████████▏                                | 4340/10000 [58:48<55:51,  1.69it/s]

Step 4339/10000, Loss: 1.2978
Step 4340/10000, Loss: 1.2880


Training Progress:  43%|█████████████████████████▏                                | 4342/10000 [58:49<35:18,  2.67it/s]

Step 4341/10000, Loss: 1.2618
Step 4342/10000, Loss: 1.2295
Step 4343/10000, Loss: 1.1950


Training Progress:  43%|████████████████████████▎                               | 4343/10000 [59:08<9:29:24,  6.04s/it]


Checkpoint saved: checkpoints\best\checkpoint_step4343_loss1.1950_20250117_135053.pt

New best loss: 1.1950


Training Progress:  43%|████████████████████████▎                               | 4345/10000 [59:08<4:51:55,  3.10s/it]

Step 4344/10000, Loss: 1.2369
Step 4345/10000, Loss: 1.3836


Training Progress:  43%|████████████████████████▎                               | 4347/10000 [59:09<2:30:52,  1.60s/it]

Step 4346/10000, Loss: 1.3599
Step 4347/10000, Loss: 1.3932


Training Progress:  43%|████████████████████████▎                               | 4349/10000 [59:09<1:22:15,  1.15it/s]

Step 4348/10000, Loss: 1.3078
Step 4349/10000, Loss: 1.3156


Training Progress:  44%|█████████████████████████▏                                | 4351/10000 [59:09<48:37,  1.94it/s]

Step 4350/10000, Loss: 1.3447
Step 4351/10000, Loss: 1.3188


Training Progress:  44%|█████████████████████████▏                                | 4353/10000 [59:10<31:50,  2.96it/s]

Step 4352/10000, Loss: 1.2808
Step 4353/10000, Loss: 1.3501


Training Progress:  44%|█████████████████████████▎                                | 4355/10000 [59:10<23:57,  3.93it/s]

Step 4354/10000, Loss: 1.3555
Step 4355/10000, Loss: 1.3262


Training Progress:  44%|█████████████████████████▎                                | 4357/10000 [59:10<19:37,  4.79it/s]

Step 4356/10000, Loss: 1.4171
Step 4357/10000, Loss: 1.4721


Training Progress:  44%|█████████████████████████▎                                | 4359/10000 [59:11<17:57,  5.24it/s]

Step 4358/10000, Loss: 1.4851
Step 4359/10000, Loss: 1.4190


Training Progress:  44%|█████████████████████████▎                                | 4361/10000 [59:11<16:42,  5.63it/s]

Step 4360/10000, Loss: 1.3976
Step 4361/10000, Loss: 1.3581


Training Progress:  44%|█████████████████████████▎                                | 4363/10000 [59:11<16:40,  5.64it/s]

Step 4362/10000, Loss: 1.4752
Step 4363/10000, Loss: 1.3818


Training Progress:  44%|█████████████████████████▎                                | 4365/10000 [59:12<15:55,  5.90it/s]

Step 4364/10000, Loss: 1.3983
Step 4365/10000, Loss: 1.4331


Training Progress:  44%|█████████████████████████▎                                | 4367/10000 [59:12<16:07,  5.82it/s]

Step 4366/10000, Loss: 1.3987
Step 4367/10000, Loss: 1.4089


Training Progress:  44%|█████████████████████████▎                                | 4369/10000 [59:12<16:13,  5.78it/s]

Step 4368/10000, Loss: 1.3832
Step 4369/10000, Loss: 1.4927


Training Progress:  44%|█████████████████████████▎                                | 4371/10000 [59:13<15:55,  5.89it/s]

Step 4370/10000, Loss: 1.4910
Step 4371/10000, Loss: 1.5030


Training Progress:  44%|█████████████████████████▎                                | 4373/10000 [59:13<16:11,  5.79it/s]

Step 4372/10000, Loss: 1.4632
Step 4373/10000, Loss: 1.4238


Training Progress:  44%|█████████████████████████▍                                | 4375/10000 [59:13<15:44,  5.96it/s]

Step 4374/10000, Loss: 1.4611
Step 4375/10000, Loss: 1.4429


Training Progress:  44%|█████████████████████████▍                                | 4377/10000 [59:14<16:00,  5.86it/s]

Step 4376/10000, Loss: 1.3943
Step 4377/10000, Loss: 1.4686


Training Progress:  44%|█████████████████████████▍                                | 4379/10000 [59:14<16:08,  5.80it/s]

Step 4378/10000, Loss: 1.3938
Step 4379/10000, Loss: 1.3998


Training Progress:  44%|█████████████████████████▍                                | 4381/10000 [59:15<15:54,  5.89it/s]

Step 4380/10000, Loss: 1.4573
Step 4381/10000, Loss: 1.4426


Training Progress:  44%|█████████████████████████▍                                | 4383/10000 [59:15<16:04,  5.82it/s]

Step 4382/10000, Loss: 1.4473
Step 4383/10000, Loss: 1.3690


Training Progress:  44%|█████████████████████████▍                                | 4385/10000 [59:15<15:43,  5.95it/s]

Step 4384/10000, Loss: 1.4506
Step 4385/10000, Loss: 1.4448


Training Progress:  44%|█████████████████████████▍                                | 4387/10000 [59:16<15:58,  5.86it/s]

Step 4386/10000, Loss: 1.4064
Step 4387/10000, Loss: 1.3012


Training Progress:  44%|█████████████████████████▍                                | 4389/10000 [59:16<16:06,  5.81it/s]

Step 4388/10000, Loss: 1.4647
Step 4389/10000, Loss: 1.4212


Training Progress:  44%|█████████████████████████▍                                | 4391/10000 [59:16<15:51,  5.90it/s]

Step 4390/10000, Loss: 1.4027
Step 4391/10000, Loss: 1.4754


Training Progress:  44%|█████████████████████████▍                                | 4393/10000 [59:17<16:02,  5.83it/s]

Step 4392/10000, Loss: 1.4173
Step 4393/10000, Loss: 1.4163


Training Progress:  44%|█████████████████████████▍                                | 4395/10000 [59:17<15:42,  5.95it/s]

Step 4394/10000, Loss: 1.3375
Step 4395/10000, Loss: 1.4262


Training Progress:  44%|█████████████████████████▌                                | 4397/10000 [59:17<15:58,  5.85it/s]

Step 4396/10000, Loss: 1.3811
Step 4397/10000, Loss: 1.3332


Training Progress:  44%|█████████████████████████▌                                | 4399/10000 [59:18<16:04,  5.81it/s]

Step 4398/10000, Loss: 1.3514
Step 4399/10000, Loss: 1.3934


Training Progress:  44%|█████████████████████████▌                                | 4401/10000 [59:18<15:50,  5.89it/s]

Step 4400/10000, Loss: 1.3902
Step 4401/10000, Loss: 1.4408


Training Progress:  44%|█████████████████████████▌                                | 4403/10000 [59:18<16:00,  5.83it/s]

Step 4402/10000, Loss: 1.4963
Step 4403/10000, Loss: 1.3962


Training Progress:  44%|█████████████████████████▌                                | 4405/10000 [59:19<15:47,  5.91it/s]

Step 4404/10000, Loss: 1.3326
Step 4405/10000, Loss: 1.3710


Training Progress:  44%|█████████████████████████▌                                | 4407/10000 [59:19<15:59,  5.83it/s]

Step 4406/10000, Loss: 1.2775
Step 4407/10000, Loss: 1.4523


Training Progress:  44%|█████████████████████████▌                                | 4409/10000 [59:19<15:38,  5.96it/s]

Step 4408/10000, Loss: 1.3716
Step 4409/10000, Loss: 1.3204


Training Progress:  44%|█████████████████████████▌                                | 4411/10000 [59:20<15:49,  5.89it/s]

Step 4410/10000, Loss: 1.2026
Step 4411/10000, Loss: 1.4203


Training Progress:  44%|█████████████████████████▌                                | 4413/10000 [59:20<15:59,  5.82it/s]

Step 4412/10000, Loss: 1.3404
Step 4413/10000, Loss: 1.2488


Training Progress:  44%|█████████████████████████▌                                | 4415/10000 [59:20<15:46,  5.90it/s]

Step 4414/10000, Loss: 1.2444
Step 4415/10000, Loss: 1.2387


Training Progress:  44%|█████████████████████████▌                                | 4417/10000 [59:21<15:58,  5.83it/s]

Step 4416/10000, Loss: 1.2799
Step 4417/10000, Loss: 1.2976


Training Progress:  44%|█████████████████████████▋                                | 4419/10000 [59:21<15:37,  5.95it/s]

Step 4418/10000, Loss: 1.4731
Step 4419/10000, Loss: 1.3515


Training Progress:  44%|█████████████████████████▋                                | 4421/10000 [59:21<15:51,  5.86it/s]

Step 4420/10000, Loss: 1.3082
Step 4421/10000, Loss: 1.2258


Training Progress:  44%|█████████████████████████▋                                | 4423/10000 [59:22<16:00,  5.81it/s]

Step 4422/10000, Loss: 1.2763
Step 4423/10000, Loss: 1.2045


Training Progress:  44%|█████████████████████████▋                                | 4425/10000 [59:22<15:45,  5.90it/s]

Step 4424/10000, Loss: 1.2186
Step 4425/10000, Loss: 1.2075


Training Progress:  44%|█████████████████████████▋                                | 4427/10000 [59:22<15:56,  5.83it/s]

Step 4426/10000, Loss: 1.2516
Step 4427/10000, Loss: 1.3421


Training Progress:  44%|█████████████████████████▋                                | 4429/10000 [59:23<15:37,  5.95it/s]

Step 4428/10000, Loss: 1.3038
Step 4429/10000, Loss: 1.3366


Training Progress:  44%|█████████████████████████▋                                | 4431/10000 [59:23<15:51,  5.85it/s]

Step 4430/10000, Loss: 1.2716
Step 4431/10000, Loss: 1.3097


Training Progress:  44%|█████████████████████████▋                                | 4433/10000 [59:23<15:58,  5.81it/s]

Step 4432/10000, Loss: 1.3388
Step 4433/10000, Loss: 1.2786


Training Progress:  44%|█████████████████████████▋                                | 4435/10000 [59:24<15:35,  5.95it/s]

Step 4434/10000, Loss: 1.2423
Step 4435/10000, Loss: 1.2790


Training Progress:  44%|█████████████████████████▋                                | 4437/10000 [59:24<15:57,  5.81it/s]

Step 4436/10000, Loss: 1.2861
Step 4437/10000, Loss: 1.2249


Training Progress:  44%|█████████████████████████▋                                | 4439/10000 [59:24<15:57,  5.81it/s]

Step 4438/10000, Loss: 1.3315
Step 4439/10000, Loss: 1.3985


Training Progress:  44%|█████████████████████████▊                                | 4441/10000 [59:25<15:33,  5.95it/s]

Step 4440/10000, Loss: 1.4142
Step 4441/10000, Loss: 1.3641


Training Progress:  44%|█████████████████████████▊                                | 4443/10000 [59:25<15:49,  5.86it/s]

Step 4442/10000, Loss: 1.3700
Step 4443/10000, Loss: 1.2981


Training Progress:  44%|█████████████████████████▊                                | 4445/10000 [59:25<15:53,  5.83it/s]

Step 4444/10000, Loss: 1.4441
Step 4445/10000, Loss: 1.2993


Training Progress:  44%|█████████████████████████▊                                | 4447/10000 [59:26<15:58,  5.79it/s]

Step 4446/10000, Loss: 1.3253
Step 4447/10000, Loss: 1.3636


Training Progress:  44%|█████████████████████████▊                                | 4449/10000 [59:26<15:42,  5.89it/s]

Step 4448/10000, Loss: 1.3591
Step 4449/10000, Loss: 1.3553


Training Progress:  45%|█████████████████████████▊                                | 4451/10000 [59:26<15:53,  5.82it/s]

Step 4450/10000, Loss: 1.3033
Step 4451/10000, Loss: 1.4295


Training Progress:  45%|█████████████████████████▊                                | 4453/10000 [59:27<15:31,  5.95it/s]

Step 4452/10000, Loss: 1.4537
Step 4453/10000, Loss: 1.4287


Training Progress:  45%|█████████████████████████▊                                | 4455/10000 [59:27<15:46,  5.86it/s]

Step 4454/10000, Loss: 1.4172
Step 4455/10000, Loss: 1.3331


Training Progress:  45%|█████████████████████████▊                                | 4457/10000 [59:27<15:54,  5.81it/s]

Step 4456/10000, Loss: 1.3872
Step 4457/10000, Loss: 1.3790


Training Progress:  45%|█████████████████████████▊                                | 4459/10000 [59:28<15:56,  5.79it/s]

Step 4458/10000, Loss: 1.3396
Step 4459/10000, Loss: 1.4179


Training Progress:  45%|█████████████████████████▊                                | 4461/10000 [59:28<15:51,  5.82it/s]

Step 4460/10000, Loss: 1.3704
Step 4461/10000, Loss: 1.3302


Training Progress:  45%|█████████████████████████▉                                | 4463/10000 [59:29<15:49,  5.83it/s]

Step 4462/10000, Loss: 1.4025
Step 4463/10000, Loss: 1.3820


Training Progress:  45%|█████████████████████████▉                                | 4465/10000 [59:29<15:55,  5.79it/s]

Step 4464/10000, Loss: 1.3738
Step 4465/10000, Loss: 1.3500


Training Progress:  45%|█████████████████████████▉                                | 4467/10000 [59:29<15:40,  5.88it/s]

Step 4466/10000, Loss: 1.3953
Step 4467/10000, Loss: 1.4425


Training Progress:  45%|█████████████████████████▉                                | 4469/10000 [59:30<15:48,  5.83it/s]

Step 4468/10000, Loss: 1.3788
Step 4469/10000, Loss: 1.2965


Training Progress:  45%|█████████████████████████▉                                | 4471/10000 [59:30<15:54,  5.79it/s]

Step 4470/10000, Loss: 1.3973
Step 4471/10000, Loss: 1.3819


Training Progress:  45%|█████████████████████████▉                                | 4473/10000 [59:30<15:39,  5.88it/s]

Step 4472/10000, Loss: 1.3339
Step 4473/10000, Loss: 1.4316


Training Progress:  45%|█████████████████████████▉                                | 4475/10000 [59:31<15:48,  5.82it/s]

Step 4474/10000, Loss: 1.3631
Step 4475/10000, Loss: 1.3952


Training Progress:  45%|█████████████████████████▉                                | 4477/10000 [59:31<15:29,  5.94it/s]

Step 4476/10000, Loss: 1.3384
Step 4477/10000, Loss: 1.4214


Training Progress:  45%|█████████████████████████▉                                | 4479/10000 [59:31<15:42,  5.86it/s]

Step 4478/10000, Loss: 1.3601
Step 4479/10000, Loss: 1.3015


Training Progress:  45%|█████████████████████████▉                                | 4481/10000 [59:32<15:50,  5.81it/s]

Step 4480/10000, Loss: 1.2927
Step 4481/10000, Loss: 1.3433


Training Progress:  45%|██████████████████████████                                | 4483/10000 [59:32<15:53,  5.79it/s]

Step 4482/10000, Loss: 1.3136
Step 4483/10000, Loss: 1.3756


Training Progress:  45%|██████████████████████████                                | 4485/10000 [59:32<15:43,  5.85it/s]

Step 4484/10000, Loss: 1.4377
Step 4485/10000, Loss: 1.3572


Training Progress:  45%|██████████████████████████                                | 4487/10000 [59:33<15:47,  5.82it/s]

Step 4486/10000, Loss: 1.2744
Step 4487/10000, Loss: 1.3430


Training Progress:  45%|██████████████████████████                                | 4489/10000 [59:33<15:52,  5.78it/s]

Step 4488/10000, Loss: 1.2642
Step 4489/10000, Loss: 1.3829


Training Progress:  45%|██████████████████████████                                | 4491/10000 [59:33<15:28,  5.93it/s]

Step 4490/10000, Loss: 1.3313
Step 4491/10000, Loss: 1.2699
Step 4492/10000, Loss: 1.1309


Training Progress:  45%|█████████████████████████▏                              | 4492/10000 [59:48<6:45:34,  4.42s/it]


Checkpoint saved: checkpoints\best\checkpoint_step4492_loss1.1309_20250117_135137.pt

New best loss: 1.1309


Training Progress:  45%|█████████████████████████▏                              | 4494/10000 [59:48<3:30:29,  2.29s/it]

Step 4493/10000, Loss: 1.3455
Step 4494/10000, Loss: 1.2776


Training Progress:  45%|█████████████████████████▏                              | 4496/10000 [59:49<1:50:52,  1.21s/it]

Step 4495/10000, Loss: 1.2001
Step 4496/10000, Loss: 1.1963


Training Progress:  45%|█████████████████████████▏                              | 4498/10000 [59:49<1:02:29,  1.47it/s]

Step 4497/10000, Loss: 1.2091
Step 4498/10000, Loss: 1.2236


Training Progress:  45%|██████████████████████████                                | 4500/10000 [59:49<38:20,  2.39it/s]

Step 4499/10000, Loss: 1.2354
Step 4500/10000, Loss: 1.3994


Training Progress:  45%|██████████████████████████                                | 4502/10000 [59:50<26:55,  3.40it/s]

Step 4501/10000, Loss: 1.3121
Step 4502/10000, Loss: 1.2546


Training Progress:  45%|██████████████████████████                                | 4504/10000 [59:50<21:20,  4.29it/s]

Step 4503/10000, Loss: 1.2023
Step 4504/10000, Loss: 1.2316


Training Progress:  45%|██████████████████████████▏                               | 4506/10000 [59:50<18:17,  5.01it/s]

Step 4505/10000, Loss: 1.1553
Step 4506/10000, Loss: 1.1638


Training Progress:  45%|██████████████████████████▏                               | 4508/10000 [59:51<17:06,  5.35it/s]

Step 4507/10000, Loss: 1.1648
Step 4508/10000, Loss: 1.2039


Training Progress:  45%|██████████████████████████▏                               | 4510/10000 [59:51<16:13,  5.64it/s]

Step 4509/10000, Loss: 1.2973
Step 4510/10000, Loss: 1.2751


Training Progress:  45%|██████████████████████████▏                               | 4512/10000 [59:51<16:05,  5.68it/s]

Step 4511/10000, Loss: 1.3069
Step 4512/10000, Loss: 1.2172


Training Progress:  45%|██████████████████████████▏                               | 4514/10000 [59:52<15:47,  5.79it/s]

Step 4513/10000, Loss: 1.2421
Step 4514/10000, Loss: 1.2362


Training Progress:  45%|██████████████████████████▏                               | 4516/10000 [59:52<15:52,  5.76it/s]

Step 4515/10000, Loss: 1.2277
Step 4516/10000, Loss: 1.2009


Training Progress:  45%|██████████████████████████▏                               | 4518/10000 [59:52<15:39,  5.84it/s]

Step 4517/10000, Loss: 1.1970
Step 4518/10000, Loss: 1.2173


Training Progress:  45%|██████████████████████████▏                               | 4520/10000 [59:53<15:49,  5.77it/s]

Step 4519/10000, Loss: 1.1660
Step 4520/10000, Loss: 1.2458


Training Progress:  45%|██████████████████████████▏                               | 4522/10000 [59:53<15:24,  5.93it/s]

Step 4521/10000, Loss: 1.3397
Step 4522/10000, Loss: 1.3451


Training Progress:  45%|██████████████████████████▏                               | 4524/10000 [59:53<15:39,  5.83it/s]

Step 4523/10000, Loss: 1.3303
Step 4524/10000, Loss: 1.2998


Training Progress:  45%|██████████████████████████▎                               | 4526/10000 [59:54<15:44,  5.79it/s]

Step 4525/10000, Loss: 1.2195
Step 4526/10000, Loss: 1.4070


Training Progress:  45%|██████████████████████████▎                               | 4528/10000 [59:54<15:38,  5.83it/s]

Step 4527/10000, Loss: 1.2358
Step 4528/10000, Loss: 1.2619


Training Progress:  45%|██████████████████████████▎                               | 4530/10000 [59:54<15:32,  5.87it/s]

Step 4529/10000, Loss: 1.3176
Step 4530/10000, Loss: 1.2642


Training Progress:  45%|██████████████████████████▎                               | 4532/10000 [59:55<15:40,  5.81it/s]

Step 4531/10000, Loss: 1.2883
Step 4532/10000, Loss: 1.1978


Training Progress:  45%|██████████████████████████▎                               | 4534/10000 [59:55<15:18,  5.95it/s]

Step 4533/10000, Loss: 1.3628
Step 4534/10000, Loss: 1.3721


Training Progress:  45%|██████████████████████████▎                               | 4536/10000 [59:55<15:34,  5.85it/s]

Step 4535/10000, Loss: 1.3722
Step 4536/10000, Loss: 1.3332


Training Progress:  45%|██████████████████████████▎                               | 4538/10000 [59:56<15:42,  5.80it/s]

Step 4537/10000, Loss: 1.3003
Step 4538/10000, Loss: 1.3751


Training Progress:  45%|██████████████████████████▎                               | 4540/10000 [59:56<15:18,  5.94it/s]

Step 4539/10000, Loss: 1.3198
Step 4540/10000, Loss: 1.2573


Training Progress:  45%|██████████████████████████▎                               | 4542/10000 [59:56<15:33,  5.85it/s]

Step 4541/10000, Loss: 1.3521
Step 4542/10000, Loss: 1.2822


Training Progress:  45%|██████████████████████████▎                               | 4544/10000 [59:57<15:42,  5.79it/s]

Step 4543/10000, Loss: 1.2470
Step 4544/10000, Loss: 1.3644


Training Progress:  45%|██████████████████████████▎                               | 4546/10000 [59:57<15:20,  5.92it/s]

Step 4545/10000, Loss: 1.3346
Step 4546/10000, Loss: 1.3092


Training Progress:  45%|██████████████████████████▍                               | 4548/10000 [59:57<15:34,  5.83it/s]

Step 4547/10000, Loss: 1.2420
Step 4548/10000, Loss: 1.3275


Training Progress:  46%|██████████████████████████▍                               | 4550/10000 [59:58<15:39,  5.80it/s]

Step 4549/10000, Loss: 1.3598
Step 4550/10000, Loss: 1.3192


Training Progress:  46%|██████████████████████████▍                               | 4552/10000 [59:58<15:30,  5.85it/s]

Step 4551/10000, Loss: 1.2140
Step 4552/10000, Loss: 1.3750


Training Progress:  46%|██████████████████████████▍                               | 4554/10000 [59:58<15:27,  5.87it/s]

Step 4553/10000, Loss: 1.3429
Step 4554/10000, Loss: 1.2879


Training Progress:  46%|██████████████████████████▍                               | 4556/10000 [59:59<15:36,  5.82it/s]

Step 4555/10000, Loss: 1.3626
Step 4556/10000, Loss: 1.2914


Training Progress:  46%|██████████████████████████▍                               | 4558/10000 [59:59<15:13,  5.95it/s]

Step 4557/10000, Loss: 1.3127
Step 4558/10000, Loss: 1.2673


Training Progress:  46%|██████████████████████████▍                               | 4560/10000 [59:59<15:23,  5.89it/s]

Step 4559/10000, Loss: 1.3379
Step 4560/10000, Loss: 1.3118


Training Progress:  46%|█████████████████████████▌                              | 4562/10000 [1:00:00<15:34,  5.82it/s]

Step 4561/10000, Loss: 1.3004
Step 4562/10000, Loss: 1.2737


Training Progress:  46%|█████████████████████████▌                              | 4564/10000 [1:00:00<15:14,  5.95it/s]

Step 4563/10000, Loss: 1.2927
Step 4564/10000, Loss: 1.2751


Training Progress:  46%|█████████████████████████▌                              | 4566/10000 [1:00:00<15:29,  5.85it/s]

Step 4565/10000, Loss: 1.3052
Step 4566/10000, Loss: 1.3778


Training Progress:  46%|█████████████████████████▌                              | 4568/10000 [1:00:01<15:37,  5.80it/s]

Step 4567/10000, Loss: 1.2923
Step 4568/10000, Loss: 1.2195


Training Progress:  46%|█████████████████████████▌                              | 4570/10000 [1:00:01<15:13,  5.94it/s]

Step 4569/10000, Loss: 1.3176
Step 4570/10000, Loss: 1.2157


Training Progress:  46%|█████████████████████████▌                              | 4572/10000 [1:00:02<15:28,  5.85it/s]

Step 4571/10000, Loss: 1.3463
Step 4572/10000, Loss: 1.3109


Training Progress:  46%|█████████████████████████▌                              | 4573/10000 [1:00:02<15:31,  5.83it/s]

Step 4573/10000, Loss: 1.2455
Step 4574/10000, Loss: 1.1222


Training Progress:  46%|████████████████████████▋                             | 4574/10000 [1:00:16<6:48:44,  4.52s/it]


Checkpoint saved: checkpoints\best\checkpoint_step4574_loss1.1222_20250117_135206.pt

New best loss: 1.1222


Training Progress:  46%|████████████████████████▋                             | 4576/10000 [1:00:17<3:32:58,  2.36s/it]

Step 4575/10000, Loss: 1.2747
Step 4576/10000, Loss: 1.2312


Training Progress:  46%|████████████████████████▋                             | 4578/10000 [1:00:17<1:52:29,  1.24s/it]

Step 4577/10000, Loss: 1.1354
Step 4578/10000, Loss: 1.1722


Training Progress:  46%|████████████████████████▋                             | 4580/10000 [1:00:18<1:03:02,  1.43it/s]

Step 4579/10000, Loss: 1.1546
Step 4580/10000, Loss: 1.1972


Training Progress:  46%|█████████████████████████▋                              | 4582/10000 [1:00:18<38:36,  2.34it/s]

Step 4581/10000, Loss: 1.1913
Step 4582/10000, Loss: 1.3623


Training Progress:  46%|█████████████████████████▋                              | 4584/10000 [1:00:18<26:56,  3.35it/s]

Step 4583/10000, Loss: 1.2639
Step 4584/10000, Loss: 1.1881


Training Progress:  46%|█████████████████████████▋                              | 4586/10000 [1:00:19<20:46,  4.34it/s]

Step 4585/10000, Loss: 1.1821
Step 4586/10000, Loss: 1.1871


Training Progress:  46%|█████████████████████████▋                              | 4587/10000 [1:00:19<19:15,  4.69it/s]

Step 4587/10000, Loss: 1.1414
Step 4588/10000, Loss: 1.1220


Training Progress:  46%|████████████████████████▊                             | 4588/10000 [1:00:38<8:54:45,  5.93s/it]


Checkpoint saved: checkpoints\best\checkpoint_step4588_loss1.1220_20250117_135223.pt

New best loss: 1.1220
Step 4589/10000, Loss: 1.1184


Training Progress:  46%|████████████████████████▎                            | 4589/10000 [1:01:00<15:53:15, 10.57s/it]


Checkpoint saved: checkpoints\best\checkpoint_step4589_loss1.1184_20250117_135242.pt

New best loss: 1.1184


Training Progress:  46%|████████████████████████▊                             | 4591/10000 [1:01:00<8:00:39,  5.33s/it]

Step 4590/10000, Loss: 1.1591
Step 4591/10000, Loss: 1.2768


Training Progress:  46%|████████████████████████▊                             | 4593/10000 [1:01:00<4:03:09,  2.70s/it]

Step 4592/10000, Loss: 1.2054
Step 4593/10000, Loss: 1.2977


Training Progress:  46%|████████████████████████▊                             | 4595/10000 [1:01:01<2:07:05,  1.41s/it]

Step 4594/10000, Loss: 1.1969
Step 4595/10000, Loss: 1.1986


Training Progress:  46%|████████████████████████▊                             | 4597/10000 [1:01:01<1:09:57,  1.29it/s]

Step 4596/10000, Loss: 1.2348
Step 4597/10000, Loss: 1.2359


Training Progress:  46%|█████████████████████████▊                              | 4599/10000 [1:01:02<42:15,  2.13it/s]

Step 4598/10000, Loss: 1.1416
Step 4599/10000, Loss: 1.1741


Training Progress:  46%|█████████████████████████▊                              | 4600/10000 [1:01:02<33:50,  2.66it/s]

Step 4600/10000, Loss: 1.1586
Step 4601/10000, Loss: 1.1043


Training Progress:  46%|████████████████████████▍                            | 4601/10000 [1:01:24<10:18:28,  6.87s/it]


Checkpoint saved: checkpoints\best\checkpoint_step4601_loss1.1043_20250117_135306.pt

New best loss: 1.1043


Training Progress:  46%|████████████████████████▊                             | 4603/10000 [1:01:24<5:15:57,  3.51s/it]

Step 4602/10000, Loss: 1.2114
Step 4603/10000, Loss: 1.2619


Training Progress:  46%|████████████████████████▊                             | 4605/10000 [1:01:25<2:42:45,  1.81s/it]

Step 4604/10000, Loss: 1.2852
Step 4605/10000, Loss: 1.2527


Training Progress:  46%|████████████████████████▉                             | 4607/10000 [1:01:25<1:27:24,  1.03it/s]

Step 4606/10000, Loss: 1.2378
Step 4607/10000, Loss: 1.1535


Training Progress:  46%|█████████████████████████▊                              | 4609/10000 [1:01:25<50:21,  1.78it/s]

Step 4608/10000, Loss: 1.3574
Step 4609/10000, Loss: 1.2145


Training Progress:  46%|█████████████████████████▊                              | 4611/10000 [1:01:26<32:40,  2.75it/s]

Step 4610/10000, Loss: 1.2073
Step 4611/10000, Loss: 1.2285


Training Progress:  46%|█████████████████████████▊                              | 4613/10000 [1:01:26<23:34,  3.81it/s]

Step 4612/10000, Loss: 1.2292
Step 4613/10000, Loss: 1.2224


Training Progress:  46%|█████████████████████████▊                              | 4615/10000 [1:01:26<19:32,  4.59it/s]

Step 4614/10000, Loss: 1.1685
Step 4615/10000, Loss: 1.3043


Training Progress:  46%|█████████████████████████▊                              | 4617/10000 [1:01:27<17:14,  5.20it/s]

Step 4616/10000, Loss: 1.3145
Step 4617/10000, Loss: 1.3011


Training Progress:  46%|█████████████████████████▊                              | 4619/10000 [1:01:27<16:25,  5.46it/s]

Step 4618/10000, Loss: 1.3363
Step 4619/10000, Loss: 1.2685


Training Progress:  46%|█████████████████████████▉                              | 4621/10000 [1:01:27<15:43,  5.70it/s]

Step 4620/10000, Loss: 1.2863
Step 4621/10000, Loss: 1.2799


Training Progress:  46%|█████████████████████████▉                              | 4623/10000 [1:01:28<15:38,  5.73it/s]

Step 4622/10000, Loss: 1.2259
Step 4623/10000, Loss: 1.3258


Training Progress:  46%|█████████████████████████▉                              | 4625/10000 [1:01:28<15:17,  5.86it/s]

Step 4624/10000, Loss: 1.2365
Step 4625/10000, Loss: 1.2130


Training Progress:  46%|█████████████████████████▉                              | 4627/10000 [1:01:28<15:26,  5.80it/s]

Step 4626/10000, Loss: 1.2684
Step 4627/10000, Loss: 1.2602


Training Progress:  46%|█████████████████████████▉                              | 4629/10000 [1:01:29<15:04,  5.94it/s]

Step 4628/10000, Loss: 1.2392
Step 4629/10000, Loss: 1.2160


Training Progress:  46%|█████████████████████████▉                              | 4631/10000 [1:01:29<15:18,  5.85it/s]

Step 4630/10000, Loss: 1.2886
Step 4631/10000, Loss: 1.2986


Training Progress:  46%|█████████████████████████▉                              | 4633/10000 [1:01:29<14:58,  5.97it/s]

Step 4632/10000, Loss: 1.2520
Step 4633/10000, Loss: 1.1712


Training Progress:  46%|█████████████████████████▉                              | 4635/10000 [1:01:30<15:13,  5.87it/s]

Step 4634/10000, Loss: 1.2898
Step 4635/10000, Loss: 1.2915


Training Progress:  46%|█████████████████████████▉                              | 4637/10000 [1:01:30<15:21,  5.82it/s]

Step 4636/10000, Loss: 1.2658
Step 4637/10000, Loss: 1.3729


Training Progress:  46%|█████████████████████████▉                              | 4639/10000 [1:01:30<15:09,  5.90it/s]

Step 4638/10000, Loss: 1.2778
Step 4639/10000, Loss: 1.2627


Training Progress:  46%|█████████████████████████▉                              | 4641/10000 [1:01:31<15:20,  5.82it/s]

Step 4640/10000, Loss: 1.1979
Step 4641/10000, Loss: 1.2933


Training Progress:  46%|██████████████████████████                              | 4643/10000 [1:01:31<15:04,  5.92it/s]

Step 4642/10000, Loss: 1.2679
Step 4643/10000, Loss: 1.2196


Training Progress:  46%|██████████████████████████                              | 4645/10000 [1:01:31<15:16,  5.84it/s]

Step 4644/10000, Loss: 1.2357
Step 4645/10000, Loss: 1.2567


Training Progress:  46%|██████████████████████████                              | 4647/10000 [1:01:32<14:57,  5.96it/s]

Step 4646/10000, Loss: 1.2587
Step 4647/10000, Loss: 1.3122


Training Progress:  46%|██████████████████████████                              | 4649/10000 [1:01:32<15:14,  5.85it/s]

Step 4648/10000, Loss: 1.3379
Step 4649/10000, Loss: 1.2322


Training Progress:  47%|██████████████████████████                              | 4651/10000 [1:01:32<15:20,  5.81it/s]

Step 4650/10000, Loss: 1.1369
Step 4651/10000, Loss: 1.2012


Training Progress:  47%|██████████████████████████                              | 4653/10000 [1:01:33<15:05,  5.90it/s]

Step 4652/10000, Loss: 1.1516
Step 4653/10000, Loss: 1.2578


Training Progress:  47%|██████████████████████████                              | 4655/10000 [1:01:33<15:18,  5.82it/s]

Step 4654/10000, Loss: 1.2297
Step 4655/10000, Loss: 1.1640
Step 4656/10000, Loss: 1.0829


Training Progress:  47%|█████████████████████████▏                            | 4656/10000 [1:01:50<7:29:22,  5.05s/it]


Checkpoint saved: checkpoints\best\checkpoint_step4656_loss1.0829_20250117_135337.pt

New best loss: 1.0829


Training Progress:  47%|█████████████████████████▏                            | 4658/10000 [1:01:50<3:51:36,  2.60s/it]

Step 4657/10000, Loss: 1.2408
Step 4658/10000, Loss: 1.1781
Step 4659/10000, Loss: 1.0594


Training Progress:  47%|████████████████████████▋                            | 4659/10000 [1:02:12<12:14:06,  8.25s/it]


Checkpoint saved: checkpoints\best\checkpoint_step4659_loss1.0594_20250117_135354.pt

New best loss: 1.0594


Training Progress:  47%|█████████████████████████▏                            | 4661/10000 [1:02:12<6:11:16,  4.17s/it]

Step 4660/10000, Loss: 1.1172
Step 4661/10000, Loss: 1.0804


Training Progress:  47%|█████████████████████████▏                            | 4663/10000 [1:02:12<3:09:47,  2.13s/it]

Step 4662/10000, Loss: 1.1385
Step 4663/10000, Loss: 1.1733


Training Progress:  47%|█████████████████████████▏                            | 4665/10000 [1:02:13<1:40:26,  1.13s/it]

Step 4664/10000, Loss: 1.2976
Step 4665/10000, Loss: 1.2620


Training Progress:  47%|██████████████████████████▏                             | 4667/10000 [1:02:13<57:04,  1.56it/s]

Step 4666/10000, Loss: 1.2072
Step 4667/10000, Loss: 1.1450


Training Progress:  47%|██████████████████████████▏                             | 4669/10000 [1:02:13<35:42,  2.49it/s]

Step 4668/10000, Loss: 1.1479
Step 4669/10000, Loss: 1.0680
Step 4670/10000, Loss: 1.0445


Training Progress:  47%|█████████████████████████▏                            | 4670/10000 [1:02:33<9:06:25,  6.15s/it]


Checkpoint saved: checkpoints\best\checkpoint_step4670_loss1.0445_20250117_135418.pt

New best loss: 1.0445


Training Progress:  47%|█████████████████████████▏                            | 4672/10000 [1:02:34<4:41:02,  3.16s/it]

Step 4671/10000, Loss: 1.0730
Step 4672/10000, Loss: 1.0825


Training Progress:  47%|█████████████████████████▏                            | 4674/10000 [1:02:34<2:25:32,  1.64s/it]

Step 4673/10000, Loss: 1.2288
Step 4674/10000, Loss: 1.1946


Training Progress:  47%|█████████████████████████▎                            | 4676/10000 [1:02:34<1:18:45,  1.13it/s]

Step 4675/10000, Loss: 1.2830
Step 4676/10000, Loss: 1.1455


Training Progress:  47%|██████████████████████████▏                             | 4678/10000 [1:02:35<46:28,  1.91it/s]

Step 4677/10000, Loss: 1.1651
Step 4678/10000, Loss: 1.2020


Training Progress:  47%|██████████████████████████▏                             | 4680/10000 [1:02:35<30:13,  2.93it/s]

Step 4679/10000, Loss: 1.1253
Step 4680/10000, Loss: 1.0959


Training Progress:  47%|██████████████████████████▏                             | 4682/10000 [1:02:35<22:39,  3.91it/s]

Step 4681/10000, Loss: 1.1010
Step 4682/10000, Loss: 1.1340


Training Progress:  47%|██████████████████████████▏                             | 4684/10000 [1:02:36<18:41,  4.74it/s]

Step 4683/10000, Loss: 1.0887
Step 4684/10000, Loss: 1.1443


Training Progress:  47%|██████████████████████████▏                             | 4686/10000 [1:02:36<17:03,  5.19it/s]

Step 4685/10000, Loss: 1.1963
Step 4686/10000, Loss: 1.2087


Training Progress:  47%|██████████████████████████▎                             | 4688/10000 [1:02:36<15:55,  5.56it/s]

Step 4687/10000, Loss: 1.1728
Step 4688/10000, Loss: 1.1723


Training Progress:  47%|██████████████████████████▎                             | 4690/10000 [1:02:37<15:39,  5.65it/s]

Step 4689/10000, Loss: 1.0881
Step 4690/10000, Loss: 1.2712


Training Progress:  47%|██████████████████████████▎                             | 4692/10000 [1:02:37<15:14,  5.80it/s]

Step 4691/10000, Loss: 1.1616
Step 4692/10000, Loss: 1.1992


Training Progress:  47%|██████████████████████████▎                             | 4694/10000 [1:02:37<15:22,  5.75it/s]

Step 4693/10000, Loss: 1.2153
Step 4694/10000, Loss: 1.2045


Training Progress:  47%|██████████████████████████▎                             | 4696/10000 [1:02:38<15:08,  5.84it/s]

Step 4695/10000, Loss: 1.1613
Step 4696/10000, Loss: 1.1226


Training Progress:  47%|██████████████████████████▎                             | 4698/10000 [1:02:38<15:15,  5.79it/s]

Step 4697/10000, Loss: 1.2120
Step 4698/10000, Loss: 1.1867


Training Progress:  47%|██████████████████████████▎                             | 4700/10000 [1:02:38<14:56,  5.91it/s]

Step 4699/10000, Loss: 1.1902
Step 4700/10000, Loss: 1.1785


Training Progress:  47%|██████████████████████████▎                             | 4702/10000 [1:02:39<15:08,  5.83it/s]

Step 4701/10000, Loss: 1.1446
Step 4702/10000, Loss: 1.2049


Training Progress:  47%|██████████████████████████▎                             | 4704/10000 [1:02:39<14:59,  5.89it/s]

Step 4703/10000, Loss: 1.1950
Step 4704/10000, Loss: 1.1387


Training Progress:  47%|██████████████████████████▎                             | 4706/10000 [1:02:40<15:11,  5.81it/s]

Step 4705/10000, Loss: 1.2385
Step 4706/10000, Loss: 1.1618


Training Progress:  47%|██████████████████████████▎                             | 4708/10000 [1:02:40<14:58,  5.89it/s]

Step 4707/10000, Loss: 1.1563
Step 4708/10000, Loss: 1.2237


Training Progress:  47%|██████████████████████████▍                             | 4710/10000 [1:02:40<15:08,  5.82it/s]

Step 4709/10000, Loss: 1.1827
Step 4710/10000, Loss: 1.1641


Training Progress:  47%|██████████████████████████▍                             | 4712/10000 [1:02:41<14:58,  5.89it/s]

Step 4711/10000, Loss: 1.1343
Step 4712/10000, Loss: 1.2096


Training Progress:  47%|██████████████████████████▍                             | 4714/10000 [1:02:41<15:07,  5.82it/s]

Step 4713/10000, Loss: 1.2290
Step 4714/10000, Loss: 1.1776


Training Progress:  47%|██████████████████████████▍                             | 4716/10000 [1:02:41<14:48,  5.95it/s]

Step 4715/10000, Loss: 1.0644
Step 4716/10000, Loss: 1.2051


Training Progress:  47%|██████████████████████████▍                             | 4718/10000 [1:02:42<15:03,  5.85it/s]

Step 4717/10000, Loss: 1.2263
Step 4718/10000, Loss: 1.1970


Training Progress:  47%|██████████████████████████▍                             | 4720/10000 [1:02:42<15:11,  5.79it/s]

Step 4719/10000, Loss: 1.2800
Step 4720/10000, Loss: 1.2166


Training Progress:  47%|██████████████████████████▍                             | 4722/10000 [1:02:42<14:55,  5.89it/s]

Step 4721/10000, Loss: 1.1955
Step 4722/10000, Loss: 1.1129


Training Progress:  47%|██████████████████████████▍                             | 4724/10000 [1:02:43<15:06,  5.82it/s]

Step 4723/10000, Loss: 1.2076
Step 4724/10000, Loss: 1.1840


Training Progress:  47%|██████████████████████████▍                             | 4726/10000 [1:02:43<15:11,  5.79it/s]

Step 4725/10000, Loss: 1.1354
Step 4726/10000, Loss: 1.1326


Training Progress:  47%|██████████████████████████▍                             | 4728/10000 [1:02:43<15:02,  5.84it/s]

Step 4727/10000, Loss: 1.1873
Step 4728/10000, Loss: 1.2029


Training Progress:  47%|██████████████████████████▍                             | 4730/10000 [1:02:44<14:44,  5.96it/s]

Step 4729/10000, Loss: 1.2399
Step 4730/10000, Loss: 1.2544


Training Progress:  47%|██████████████████████████▍                             | 4732/10000 [1:02:44<15:00,  5.85it/s]

Step 4731/10000, Loss: 1.1753
Step 4732/10000, Loss: 1.0904


Training Progress:  47%|██████████████████████████▌                             | 4734/10000 [1:02:44<14:42,  5.97it/s]

Step 4733/10000, Loss: 1.1815
Step 4734/10000, Loss: 1.0556


Training Progress:  47%|██████████████████████████▌                             | 4736/10000 [1:02:45<15:07,  5.80it/s]

Step 4735/10000, Loss: 1.1567
Step 4736/10000, Loss: 1.1408


Training Progress:  47%|██████████████████████████▌                             | 4737/10000 [1:02:45<14:59,  5.85it/s]

Step 4737/10000, Loss: 1.0575
Step 4738/10000, Loss: 1.0096


Training Progress:  47%|█████████████████████████▌                            | 4738/10000 [1:03:00<6:48:57,  4.66s/it]


Checkpoint saved: checkpoints\best\checkpoint_step4738_loss1.0096_20250117_135449.pt

New best loss: 1.0096


Training Progress:  47%|█████████████████████████▌                            | 4740/10000 [1:03:01<3:32:24,  2.42s/it]

Step 4739/10000, Loss: 1.1376
Step 4740/10000, Loss: 1.0802
Step 4741/10000, Loss: 1.0066


Training Progress:  47%|█████████████████████████▏                           | 4741/10000 [1:03:22<11:55:49,  8.17s/it]


Checkpoint saved: checkpoints\best\checkpoint_step4741_loss1.0066_20250117_135505.pt

New best loss: 1.0066


Training Progress:  47%|█████████████████████████▌                            | 4742/10000 [1:03:23<8:32:40,  5.85s/it]

Step 4742/10000, Loss: 1.0253
Step 4743/10000, Loss: 1.0014


Training Progress:  47%|█████████████████████████▏                           | 4743/10000 [1:03:44<15:18:05, 10.48s/it]


Checkpoint saved: checkpoints\best\checkpoint_step4743_loss1.0014_20250117_135527.pt

New best loss: 1.0014


Training Progress:  47%|█████████████████████████▌                            | 4745/10000 [1:03:44<7:43:38,  5.29s/it]

Step 4744/10000, Loss: 1.0268
Step 4745/10000, Loss: 1.0667


Training Progress:  47%|█████████████████████████▋                            | 4747/10000 [1:03:45<3:54:30,  2.68s/it]

Step 4746/10000, Loss: 1.1901
Step 4747/10000, Loss: 1.1235


Training Progress:  47%|█████████████████████████▋                            | 4749/10000 [1:03:45<2:02:41,  1.40s/it]

Step 4748/10000, Loss: 1.1005
Step 4749/10000, Loss: 1.0518


Training Progress:  48%|█████████████████████████▋                            | 4751/10000 [1:03:46<1:07:36,  1.29it/s]

Step 4750/10000, Loss: 1.0882
Step 4751/10000, Loss: 1.0057


Training Progress:  48%|██████████████████████████▌                             | 4753/10000 [1:03:46<40:31,  2.16it/s]

Step 4752/10000, Loss: 1.0242
Step 4753/10000, Loss: 1.0117


Training Progress:  48%|██████████████████████████▋                             | 4755/10000 [1:03:46<27:40,  3.16it/s]

Step 4754/10000, Loss: 1.0078
Step 4755/10000, Loss: 1.1020


Training Progress:  48%|██████████████████████████▋                             | 4757/10000 [1:03:47<21:01,  4.15it/s]

Step 4756/10000, Loss: 1.1221
Step 4757/10000, Loss: 1.1943


Training Progress:  48%|██████████████████████████▋                             | 4759/10000 [1:03:47<17:58,  4.86it/s]

Step 4758/10000, Loss: 1.0811
Step 4759/10000, Loss: 1.1059


Training Progress:  48%|██████████████████████████▋                             | 4761/10000 [1:03:47<16:23,  5.33it/s]

Step 4760/10000, Loss: 1.1378
Step 4761/10000, Loss: 1.0656


Training Progress:  48%|██████████████████████████▋                             | 4763/10000 [1:03:48<15:23,  5.67it/s]

Step 4762/10000, Loss: 1.0297
Step 4763/10000, Loss: 1.0562


Training Progress:  48%|██████████████████████████▋                             | 4764/10000 [1:03:48<15:23,  5.67it/s]

Step 4764/10000, Loss: 1.0763
Step 4765/10000, Loss: 0.9791


Training Progress:  48%|█████████████████████████▋                            | 4765/10000 [1:04:07<8:48:14,  6.05s/it]


Checkpoint saved: checkpoints\best\checkpoint_step4765_loss0.9791_20250117_135552.pt

New best loss: 0.9791


Training Progress:  48%|█████████████████████████▋                            | 4767/10000 [1:04:08<4:31:05,  3.11s/it]

Step 4766/10000, Loss: 1.0686
Step 4767/10000, Loss: 1.1140


Training Progress:  48%|█████████████████████████▊                            | 4769/10000 [1:04:08<2:20:13,  1.61s/it]

Step 4768/10000, Loss: 1.1663
Step 4769/10000, Loss: 1.1345


Training Progress:  48%|█████████████████████████▊                            | 4771/10000 [1:04:09<1:16:02,  1.15it/s]

Step 4770/10000, Loss: 1.1124
Step 4771/10000, Loss: 1.0259


Training Progress:  48%|██████████████████████████▋                             | 4773/10000 [1:04:09<45:00,  1.94it/s]

Step 4772/10000, Loss: 1.2165
Step 4773/10000, Loss: 1.1215


Training Progress:  48%|██████████████████████████▋                             | 4775/10000 [1:04:09<29:27,  2.96it/s]

Step 4774/10000, Loss: 1.1168
Step 4775/10000, Loss: 1.1316


Training Progress:  48%|██████████████████████████▊                             | 4777/10000 [1:04:10<22:11,  3.92it/s]

Step 4776/10000, Loss: 1.1063
Step 4777/10000, Loss: 1.0796


Training Progress:  48%|██████████████████████████▊                             | 4779/10000 [1:04:10<18:20,  4.74it/s]

Step 4778/10000, Loss: 1.0582
Step 4779/10000, Loss: 1.1618


Training Progress:  48%|██████████████████████████▊                             | 4781/10000 [1:04:10<16:19,  5.33it/s]

Step 4780/10000, Loss: 1.1427
Step 4781/10000, Loss: 1.1653


Training Progress:  48%|██████████████████████████▊                             | 4783/10000 [1:04:11<15:43,  5.53it/s]

Step 4782/10000, Loss: 1.0911
Step 4783/10000, Loss: 1.1086


Training Progress:  48%|██████████████████████████▊                             | 4785/10000 [1:04:11<15:01,  5.78it/s]

Step 4784/10000, Loss: 1.1540
Step 4785/10000, Loss: 1.1175


Training Progress:  48%|██████████████████████████▊                             | 4787/10000 [1:04:11<15:13,  5.70it/s]

Step 4786/10000, Loss: 1.0835
Step 4787/10000, Loss: 1.1973


Training Progress:  48%|██████████████████████████▊                             | 4789/10000 [1:04:12<15:03,  5.77it/s]

Step 4788/10000, Loss: 1.1168
Step 4789/10000, Loss: 1.1145


Training Progress:  48%|██████████████████████████▊                             | 4791/10000 [1:04:12<15:01,  5.78it/s]

Step 4790/10000, Loss: 1.1674
Step 4791/10000, Loss: 1.1342


Training Progress:  48%|██████████████████████████▊                             | 4793/10000 [1:04:13<14:54,  5.82it/s]

Step 4792/10000, Loss: 1.0833
Step 4793/10000, Loss: 1.0566


Training Progress:  48%|██████████████████████████▊                             | 4795/10000 [1:04:13<14:43,  5.89it/s]

Step 4794/10000, Loss: 1.1175
Step 4795/10000, Loss: 1.1110


Training Progress:  48%|██████████████████████████▊                             | 4797/10000 [1:04:13<14:55,  5.81it/s]

Step 4796/10000, Loss: 1.1387
Step 4797/10000, Loss: 0.9999


Training Progress:  48%|██████████████████████████▊                             | 4799/10000 [1:04:14<14:45,  5.88it/s]

Step 4798/10000, Loss: 1.1186
Step 4799/10000, Loss: 1.1688


Training Progress:  48%|██████████████████████████▉                             | 4801/10000 [1:04:14<14:55,  5.81it/s]

Step 4800/10000, Loss: 1.1433
Step 4801/10000, Loss: 1.2162


Training Progress:  48%|██████████████████████████▉                             | 4803/10000 [1:04:14<14:43,  5.88it/s]

Step 4802/10000, Loss: 1.1215
Step 4803/10000, Loss: 1.1472


Training Progress:  48%|██████████████████████████▉                             | 4805/10000 [1:04:15<14:30,  5.97it/s]

Step 4804/10000, Loss: 1.0738
Step 4805/10000, Loss: 1.1160


Training Progress:  48%|██████████████████████████▉                             | 4807/10000 [1:04:15<14:53,  5.81it/s]

Step 4806/10000, Loss: 1.1329
Step 4807/10000, Loss: 1.0368


Training Progress:  48%|██████████████████████████▉                             | 4809/10000 [1:04:15<14:30,  5.96it/s]

Step 4808/10000, Loss: 1.0703
Step 4809/10000, Loss: 1.0965


Training Progress:  48%|██████████████████████████▉                             | 4811/10000 [1:04:16<14:49,  5.83it/s]

Step 4810/10000, Loss: 1.1085
Step 4811/10000, Loss: 1.1555


Training Progress:  48%|██████████████████████████▉                             | 4813/10000 [1:04:16<14:34,  5.93it/s]

Step 4812/10000, Loss: 1.2156
Step 4813/10000, Loss: 1.0614


Training Progress:  48%|██████████████████████████▉                             | 4815/10000 [1:04:16<14:49,  5.83it/s]

Step 4814/10000, Loss: 1.0128
Step 4815/10000, Loss: 1.1466


Training Progress:  48%|██████████████████████████▉                             | 4817/10000 [1:04:17<14:47,  5.84it/s]

Step 4816/10000, Loss: 1.0279
Step 4817/10000, Loss: 1.0937


Training Progress:  48%|██████████████████████████▉                             | 4818/10000 [1:04:17<14:44,  5.86it/s]

Step 4818/10000, Loss: 1.0996
Step 4819/10000, Loss: 0.9682


Training Progress:  48%|██████████████████████████                            | 4819/10000 [1:04:31<6:28:18,  4.50s/it]


Checkpoint saved: checkpoints\best\checkpoint_step4819_loss0.9682_20250117_135621.pt

New best loss: 0.9682
Step 4820/10000, Loss: 0.9500


Training Progress:  48%|█████████████████████████▌                           | 4820/10000 [1:04:53<13:59:43,  9.73s/it]


Checkpoint saved: checkpoints\best\checkpoint_step4820_loss0.9500_20250117_135636.pt

New best loss: 0.9500


Training Progress:  48%|██████████████████████████                            | 4822/10000 [1:04:54<7:03:08,  4.90s/it]

Step 4821/10000, Loss: 1.0441
Step 4822/10000, Loss: 1.0046
Step 4823/10000, Loss: 0.9297


Training Progress:  48%|█████████████████████████▌                           | 4823/10000 [1:05:15<13:58:38,  9.72s/it]


Checkpoint saved: checkpoints\best\checkpoint_step4823_loss0.9297_20250117_135658.pt

New best loss: 0.9297


Training Progress:  48%|██████████████████████████                            | 4825/10000 [1:05:15<7:03:35,  4.91s/it]

Step 4824/10000, Loss: 0.9483
Step 4825/10000, Loss: 0.9445


Training Progress:  48%|██████████████████████████                            | 4827/10000 [1:05:16<3:34:40,  2.49s/it]

Step 4826/10000, Loss: 0.9499
Step 4827/10000, Loss: 0.9890


Training Progress:  48%|██████████████████████████                            | 4829/10000 [1:05:16<1:52:45,  1.31s/it]

Step 4828/10000, Loss: 1.1328
Step 4829/10000, Loss: 1.0341


Training Progress:  48%|██████████████████████████                            | 4831/10000 [1:05:16<1:02:52,  1.37it/s]

Step 4830/10000, Loss: 1.0157
Step 4831/10000, Loss: 0.9689


Training Progress:  48%|███████████████████████████                             | 4833/10000 [1:05:17<38:10,  2.26it/s]

Step 4832/10000, Loss: 0.9900
Step 4833/10000, Loss: 0.9335


Training Progress:  48%|███████████████████████████                             | 4835/10000 [1:05:17<25:54,  3.32it/s]

Step 4834/10000, Loss: 0.9476
Step 4835/10000, Loss: 0.9910


Training Progress:  48%|███████████████████████████                             | 4837/10000 [1:05:18<20:20,  4.23it/s]

Step 4836/10000, Loss: 0.9944
Step 4837/10000, Loss: 1.0422


Training Progress:  48%|███████████████████████████                             | 4839/10000 [1:05:18<17:22,  4.95it/s]

Step 4838/10000, Loss: 1.0621
Step 4839/10000, Loss: 1.0568


Training Progress:  48%|███████████████████████████                             | 4841/10000 [1:05:18<16:00,  5.37it/s]

Step 4840/10000, Loss: 0.9789
Step 4841/10000, Loss: 0.9952


Training Progress:  48%|███████████████████████████                             | 4843/10000 [1:05:19<15:26,  5.57it/s]

Step 4842/10000, Loss: 1.0432
Step 4843/10000, Loss: 1.0046


Training Progress:  48%|███████████████████████████▏                            | 4845/10000 [1:05:19<14:53,  5.77it/s]

Step 4844/10000, Loss: 0.9699
Step 4845/10000, Loss: 1.0186


Training Progress:  48%|███████████████████████████▏                            | 4847/10000 [1:05:19<14:53,  5.77it/s]

Step 4846/10000, Loss: 1.0506
Step 4847/10000, Loss: 0.9970


Training Progress:  48%|███████████████████████████▏                            | 4849/10000 [1:05:20<14:34,  5.89it/s]

Step 4848/10000, Loss: 1.0402
Step 4849/10000, Loss: 1.0625


Training Progress:  49%|███████████████████████████▏                            | 4851/10000 [1:05:20<14:40,  5.85it/s]

Step 4850/10000, Loss: 1.0918
Step 4851/10000, Loss: 1.0721


Training Progress:  49%|███████████████████████████▏                            | 4853/10000 [1:05:20<14:22,  5.97it/s]

Step 4852/10000, Loss: 1.0529
Step 4853/10000, Loss: 1.0034


Training Progress:  49%|███████████████████████████▏                            | 4855/10000 [1:05:21<14:39,  5.85it/s]

Step 4854/10000, Loss: 1.1281
Step 4855/10000, Loss: 1.0416


Training Progress:  49%|███████████████████████████▏                            | 4857/10000 [1:05:21<14:20,  5.98it/s]

Step 4856/10000, Loss: 1.0252
Step 4857/10000, Loss: 1.0505


Training Progress:  49%|███████████████████████████▏                            | 4859/10000 [1:05:21<14:39,  5.84it/s]

Step 4858/10000, Loss: 1.0479
Step 4859/10000, Loss: 1.0006


Training Progress:  49%|███████████████████████████▏                            | 4861/10000 [1:05:22<14:22,  5.96it/s]

Step 4860/10000, Loss: 0.9836
Step 4861/10000, Loss: 1.1151


Training Progress:  49%|███████████████████████████▏                            | 4863/10000 [1:05:22<14:39,  5.84it/s]

Step 4862/10000, Loss: 1.0748
Step 4863/10000, Loss: 1.0551


Training Progress:  49%|███████████████████████████▏                            | 4865/10000 [1:05:22<14:23,  5.95it/s]

Step 4864/10000, Loss: 1.0470
Step 4865/10000, Loss: 1.0485


Training Progress:  49%|███████████████████████████▎                            | 4867/10000 [1:05:23<14:39,  5.84it/s]

Step 4866/10000, Loss: 1.1130
Step 4867/10000, Loss: 1.0673


Training Progress:  49%|███████████████████████████▎                            | 4869/10000 [1:05:23<14:33,  5.88it/s]

Step 4868/10000, Loss: 1.0349
Step 4869/10000, Loss: 1.1238


Training Progress:  49%|███████████████████████████▎                            | 4871/10000 [1:05:23<14:42,  5.81it/s]

Step 4870/10000, Loss: 1.0445
Step 4871/10000, Loss: 1.0523


Training Progress:  49%|███████████████████████████▎                            | 4873/10000 [1:05:24<14:30,  5.89it/s]

Step 4872/10000, Loss: 1.0818
Step 4873/10000, Loss: 1.0964


Training Progress:  49%|███████████████████████████▎                            | 4875/10000 [1:05:24<14:42,  5.81it/s]

Step 4874/10000, Loss: 1.0657
Step 4875/10000, Loss: 1.0348


Training Progress:  49%|███████████████████████████▎                            | 4877/10000 [1:05:24<14:31,  5.88it/s]

Step 4876/10000, Loss: 1.0437
Step 4877/10000, Loss: 1.0759


Training Progress:  49%|███████████████████████████▎                            | 4879/10000 [1:05:25<14:27,  5.90it/s]

Step 4878/10000, Loss: 1.0493
Step 4879/10000, Loss: 0.9364


Training Progress:  49%|███████████████████████████▎                            | 4881/10000 [1:05:25<14:33,  5.86it/s]

Step 4880/10000, Loss: 1.0358
Step 4881/10000, Loss: 1.0931


Training Progress:  49%|███████████████████████████▎                            | 4883/10000 [1:05:25<14:19,  5.95it/s]

Step 4882/10000, Loss: 1.0521
Step 4883/10000, Loss: 1.1128


Training Progress:  49%|███████████████████████████▎                            | 4885/10000 [1:05:26<14:34,  5.85it/s]

Step 4884/10000, Loss: 1.0310
Step 4885/10000, Loss: 1.0630


Training Progress:  49%|███████████████████████████▎                            | 4887/10000 [1:05:26<14:18,  5.95it/s]

Step 4886/10000, Loss: 1.0406
Step 4887/10000, Loss: 1.1010


Training Progress:  49%|███████████████████████████▍                            | 4889/10000 [1:05:26<14:35,  5.84it/s]

Step 4888/10000, Loss: 1.1056
Step 4889/10000, Loss: 1.0189


Training Progress:  49%|███████████████████████████▍                            | 4891/10000 [1:05:27<14:18,  5.95it/s]

Step 4890/10000, Loss: 1.0082
Step 4891/10000, Loss: 1.0221


Training Progress:  49%|███████████████████████████▍                            | 4893/10000 [1:05:27<14:31,  5.86it/s]

Step 4892/10000, Loss: 1.0287
Step 4893/10000, Loss: 1.0604


Training Progress:  49%|███████████████████████████▍                            | 4895/10000 [1:05:27<14:17,  5.95it/s]

Step 4894/10000, Loss: 1.1016
Step 4895/10000, Loss: 0.9841
Step 4896/10000, Loss: 0.9293


Training Progress:  49%|██████████████████████████▍                           | 4896/10000 [1:05:44<7:27:03,  5.26s/it]


Checkpoint saved: checkpoints\best\checkpoint_step4896_loss0.9293_20250117_135731.pt

New best loss: 0.9293


Training Progress:  49%|██████████████████████████▍                           | 4898/10000 [1:05:45<3:50:28,  2.71s/it]

Step 4897/10000, Loss: 1.0574
Step 4898/10000, Loss: 0.9674


Training Progress:  49%|██████████████████████████▍                           | 4900/10000 [1:05:45<2:00:09,  1.41s/it]

Step 4899/10000, Loss: 1.0396
Step 4900/10000, Loss: 1.0222
Step 4901/10000, Loss: 0.9263


Training Progress:  49%|█████████████████████████▉                           | 4901/10000 [1:06:06<10:14:47,  7.23s/it]


Checkpoint saved: checkpoints\best\checkpoint_step4901_loss0.9263_20250117_135749.pt

New best loss: 0.9263
Step 4902/10000, Loss: 0.9009


Training Progress:  49%|█████████████████████████▉                           | 4902/10000 [1:06:28<16:37:52, 11.74s/it]


Checkpoint saved: checkpoints\best\checkpoint_step4902_loss0.9009_20250117_135811.pt

New best loss: 0.9009


Training Progress:  49%|██████████████████████████▍                           | 4904/10000 [1:06:29<8:20:19,  5.89s/it]

Step 4903/10000, Loss: 1.0140
Step 4904/10000, Loss: 0.9546
Step 4905/10000, Loss: 0.8959


Training Progress:  49%|█████████████████████████▉                           | 4905/10000 [1:06:50<14:37:49, 10.34s/it]


Checkpoint saved: checkpoints\best\checkpoint_step4905_loss0.8959_20250117_135833.pt

New best loss: 0.8959
Step 4906/10000, Loss: 0.8813


Training Progress:  49%|██████████████████████████                           | 4906/10000 [1:07:11<19:26:42, 13.74s/it]


Checkpoint saved: checkpoints\best\checkpoint_step4906_loss0.8813_20250117_135854.pt

New best loss: 0.8813
Step 4907/10000, Loss: 0.8454


Training Progress:  49%|██████████████████████████                           | 4907/10000 [1:07:32<22:25:38, 15.85s/it]


Checkpoint saved: checkpoints\best\checkpoint_step4907_loss0.8454_20250117_135916.pt

New best loss: 0.8454


Training Progress:  49%|██████████████████████████                           | 4909/10000 [1:07:33<11:11:10,  7.91s/it]

Step 4908/10000, Loss: 0.8988
Step 4909/10000, Loss: 0.9004


Training Progress:  49%|██████████████████████████▌                           | 4911/10000 [1:07:33<5:35:52,  3.96s/it]

Step 4910/10000, Loss: 1.0564
Step 4911/10000, Loss: 0.9710


Training Progress:  49%|██████████████████████████▌                           | 4913/10000 [1:07:34<2:51:56,  2.03s/it]

Step 4912/10000, Loss: 0.9681
Step 4913/10000, Loss: 0.9218


Training Progress:  49%|██████████████████████████▌                           | 4915/10000 [1:07:34<1:31:43,  1.08s/it]

Step 4914/10000, Loss: 0.9402
Step 4915/10000, Loss: 0.8859


Training Progress:  49%|███████████████████████████▌                            | 4917/10000 [1:07:34<52:08,  1.62it/s]

Step 4916/10000, Loss: 0.8760
Step 4917/10000, Loss: 0.8869


Training Progress:  49%|███████████████████████████▌                            | 4919/10000 [1:07:35<33:05,  2.56it/s]

Step 4918/10000, Loss: 0.9320
Step 4919/10000, Loss: 0.9667


Training Progress:  49%|███████████████████████████▌                            | 4921/10000 [1:07:35<23:25,  3.61it/s]

Step 4920/10000, Loss: 0.9857
Step 4921/10000, Loss: 1.0012


Training Progress:  49%|███████████████████████████▌                            | 4923/10000 [1:07:35<18:58,  4.46it/s]

Step 4922/10000, Loss: 0.9107
Step 4923/10000, Loss: 0.9348


Training Progress:  49%|███████████████████████████▌                            | 4925/10000 [1:07:36<16:30,  5.12it/s]

Step 4924/10000, Loss: 0.9694
Step 4925/10000, Loss: 0.9272


Training Progress:  49%|███████████████████████████▌                            | 4927/10000 [1:07:36<15:37,  5.41it/s]

Step 4926/10000, Loss: 0.9012
Step 4927/10000, Loss: 0.9552


Training Progress:  49%|███████████████████████████▌                            | 4929/10000 [1:07:36<14:44,  5.73it/s]

Step 4928/10000, Loss: 0.9389
Step 4929/10000, Loss: 0.9193


Training Progress:  49%|███████████████████████████▌                            | 4931/10000 [1:07:37<14:42,  5.75it/s]

Step 4930/10000, Loss: 1.0056
Step 4931/10000, Loss: 1.0344


Training Progress:  49%|███████████████████████████▌                            | 4933/10000 [1:07:37<14:17,  5.91it/s]

Step 4932/10000, Loss: 1.0862
Step 4933/10000, Loss: 1.0637


Training Progress:  49%|███████████████████████████▋                            | 4935/10000 [1:07:37<14:31,  5.81it/s]

Step 4934/10000, Loss: 1.0093
Step 4935/10000, Loss: 0.9212


Training Progress:  49%|███████████████████████████▋                            | 4937/10000 [1:07:38<14:35,  5.78it/s]

Step 4936/10000, Loss: 1.0738
Step 4937/10000, Loss: 0.9499


Training Progress:  49%|███████████████████████████▋                            | 4939/10000 [1:07:38<14:20,  5.88it/s]

Step 4938/10000, Loss: 0.9580
Step 4939/10000, Loss: 1.0113


Training Progress:  49%|███████████████████████████▋                            | 4941/10000 [1:07:38<14:30,  5.81it/s]

Step 4940/10000, Loss: 0.9831
Step 4941/10000, Loss: 0.9544


Training Progress:  49%|███████████████████████████▋                            | 4943/10000 [1:07:39<14:20,  5.87it/s]

Step 4942/10000, Loss: 0.9517
Step 4943/10000, Loss: 1.0583


Training Progress:  49%|███████████████████████████▋                            | 4945/10000 [1:07:39<14:11,  5.94it/s]

Step 4944/10000, Loss: 1.0233
Step 4945/10000, Loss: 1.0035


Training Progress:  49%|███████████████████████████▋                            | 4947/10000 [1:07:39<14:30,  5.80it/s]

Step 4946/10000, Loss: 0.9963
Step 4947/10000, Loss: 0.9683


Training Progress:  49%|███████████████████████████▋                            | 4949/10000 [1:07:40<14:27,  5.82it/s]

Step 4948/10000, Loss: 1.0009
Step 4949/10000, Loss: 0.9874


Training Progress:  50%|███████████████████████████▋                            | 4951/10000 [1:07:40<14:06,  5.97it/s]

Step 4950/10000, Loss: 0.9514
Step 4951/10000, Loss: 1.0322


Training Progress:  50%|███████████████████████████▋                            | 4953/10000 [1:07:40<14:27,  5.82it/s]

Step 4952/10000, Loss: 0.9824
Step 4953/10000, Loss: 0.9770


Training Progress:  50%|███████████████████████████▋                            | 4955/10000 [1:07:41<14:19,  5.87it/s]

Step 4954/10000, Loss: 1.0353
Step 4955/10000, Loss: 1.0070


Training Progress:  50%|███████████████████████████▊                            | 4957/10000 [1:07:41<14:08,  5.94it/s]

Step 4956/10000, Loss: 1.0026
Step 4957/10000, Loss: 0.9438


Training Progress:  50%|███████████████████████████▊                            | 4959/10000 [1:07:41<14:26,  5.82it/s]

Step 4958/10000, Loss: 1.0019
Step 4959/10000, Loss: 1.0224


Training Progress:  50%|███████████████████████████▊                            | 4961/10000 [1:07:42<14:19,  5.87it/s]

Step 4960/10000, Loss: 0.9901
Step 4961/10000, Loss: 0.8871


Training Progress:  50%|███████████████████████████▊                            | 4963/10000 [1:07:42<14:08,  5.93it/s]

Step 4962/10000, Loss: 0.9962
Step 4963/10000, Loss: 1.0169


Training Progress:  50%|███████████████████████████▊                            | 4965/10000 [1:07:42<14:26,  5.81it/s]

Step 4964/10000, Loss: 0.9884
Step 4965/10000, Loss: 1.0702


Training Progress:  50%|███████████████████████████▊                            | 4967/10000 [1:07:43<14:18,  5.86it/s]

Step 4966/10000, Loss: 0.9425
Step 4967/10000, Loss: 0.9973


Training Progress:  50%|███████████████████████████▊                            | 4969/10000 [1:07:43<14:15,  5.88it/s]

Step 4968/10000, Loss: 0.9529
Step 4969/10000, Loss: 1.0177


Training Progress:  50%|███████████████████████████▊                            | 4971/10000 [1:07:43<14:24,  5.82it/s]

Step 4970/10000, Loss: 1.0224
Step 4971/10000, Loss: 0.9459


Training Progress:  50%|███████████████████████████▊                            | 4973/10000 [1:07:44<14:09,  5.91it/s]

Step 4972/10000, Loss: 0.9733
Step 4973/10000, Loss: 0.9778


Training Progress:  50%|███████████████████████████▊                            | 4975/10000 [1:07:44<14:26,  5.80it/s]

Step 4974/10000, Loss: 0.9979
Step 4975/10000, Loss: 0.9814


Training Progress:  50%|███████████████████████████▊                            | 4977/10000 [1:07:44<14:15,  5.87it/s]

Step 4976/10000, Loss: 1.0751
Step 4977/10000, Loss: 0.9302


Training Progress:  50%|███████████████████████████▉                            | 4979/10000 [1:07:45<14:05,  5.94it/s]

Step 4978/10000, Loss: 0.8581
Step 4979/10000, Loss: 0.9704


Training Progress:  50%|███████████████████████████▉                            | 4981/10000 [1:07:45<14:00,  5.97it/s]

Step 4980/10000, Loss: 0.8687
Step 4981/10000, Loss: 0.9724


Training Progress:  50%|███████████████████████████▉                            | 4983/10000 [1:07:45<14:21,  5.82it/s]

Step 4982/10000, Loss: 0.9558
Step 4983/10000, Loss: 0.8896


Training Progress:  50%|███████████████████████████▉                            | 4985/10000 [1:07:46<14:15,  5.86it/s]

Step 4984/10000, Loss: 0.8480
Step 4985/10000, Loss: 0.9614


Training Progress:  50%|███████████████████████████▉                            | 4986/10000 [1:07:46<14:23,  5.81it/s]

Step 4986/10000, Loss: 0.9045
Step 4987/10000, Loss: 0.8387


Training Progress:  50%|██████████████████████████▉                           | 4987/10000 [1:08:02<6:56:18,  4.98s/it]


Checkpoint saved: checkpoints\best\checkpoint_step4987_loss0.8387_20250117_135950.pt

New best loss: 0.8387
Step 4988/10000, Loss: 0.8285


Training Progress:  50%|██████████████████████████▍                          | 4988/10000 [1:08:24<14:04:00, 10.10s/it]


Checkpoint saved: checkpoints\best\checkpoint_step4988_loss0.8285_20250117_140006.pt

New best loss: 0.8285
Step 4989/10000, Loss: 0.8183


Training Progress:  50%|██████████████████████████▍                          | 4989/10000 [1:08:46<18:45:34, 13.48s/it]


Checkpoint saved: checkpoints\best\checkpoint_step4989_loss0.8183_20250117_140028.pt

New best loss: 0.8183


Training Progress:  50%|██████████████████████████▉                           | 4991/10000 [1:08:46<9:24:00,  6.76s/it]

Step 4990/10000, Loss: 0.8390
Step 4991/10000, Loss: 0.8413


Training Progress:  50%|██████████████████████████▉                           | 4993/10000 [1:08:47<4:43:23,  3.40s/it]

Step 4992/10000, Loss: 0.9698
Step 4993/10000, Loss: 0.9243


Training Progress:  50%|██████████████████████████▉                           | 4995/10000 [1:08:47<2:26:13,  1.75s/it]

Step 4994/10000, Loss: 0.8800
Step 4995/10000, Loss: 0.8443


Training Progress:  50%|██████████████████████████▉                           | 4997/10000 [1:08:47<1:18:45,  1.06it/s]

Step 4996/10000, Loss: 0.8491
Step 4997/10000, Loss: 0.8281


Training Progress:  50%|███████████████████████████▉                            | 4999/10000 [1:08:48<45:58,  1.81it/s]

Step 4998/10000, Loss: 0.8237
Step 4999/10000, Loss: 0.8366
Step 5000/10000, Loss: 0.8379


Training Progress:  50%|███████████████████████████                           | 5000/10000 [1:09:09<9:30:45,  6.85s/it]


Checkpoint saved: checkpoints\checkpoint_step5000_loss0.8379_20250117_140052.pt


Training Progress:  50%|███████████████████████████                           | 5002/10000 [1:09:10<4:52:43,  3.51s/it]

Step 5001/10000, Loss: 0.9354
Step 5002/10000, Loss: 0.9130


Training Progress:  50%|███████████████████████████                           | 5004/10000 [1:09:10<2:30:30,  1.81s/it]

Step 5003/10000, Loss: 0.9360
Step 5004/10000, Loss: 0.8621


Training Progress:  50%|███████████████████████████                           | 5006/10000 [1:09:10<1:21:01,  1.03it/s]

Step 5005/10000, Loss: 0.8985
Step 5006/10000, Loss: 0.9170


Training Progress:  50%|████████████████████████████                            | 5008/10000 [1:09:11<46:43,  1.78it/s]

Step 5007/10000, Loss: 0.9137
Step 5008/10000, Loss: 0.8804


Training Progress:  50%|████████████████████████████                            | 5010/10000 [1:09:11<30:17,  2.75it/s]

Step 5009/10000, Loss: 0.8597
Step 5010/10000, Loss: 0.8619


Training Progress:  50%|████████████████████████████                            | 5012/10000 [1:09:11<21:47,  3.81it/s]

Step 5011/10000, Loss: 0.8263
Step 5012/10000, Loss: 0.9132


Training Progress:  50%|████████████████████████████                            | 5014/10000 [1:09:12<18:03,  4.60it/s]

Step 5013/10000, Loss: 0.9765
Step 5014/10000, Loss: 0.9978


Training Progress:  50%|████████████████████████████                            | 5016/10000 [1:09:12<15:49,  5.25it/s]

Step 5015/10000, Loss: 0.9891
Step 5016/10000, Loss: 0.9829


Training Progress:  50%|████████████████████████████                            | 5018/10000 [1:09:13<15:09,  5.48it/s]

Step 5017/10000, Loss: 0.9044
Step 5018/10000, Loss: 1.0277


Training Progress:  50%|████████████████████████████                            | 5020/10000 [1:09:13<14:23,  5.77it/s]

Step 5019/10000, Loss: 0.9205
Step 5020/10000, Loss: 0.9085


Training Progress:  50%|████████████████████████████                            | 5022/10000 [1:09:13<14:24,  5.76it/s]

Step 5021/10000, Loss: 0.8883
Step 5022/10000, Loss: 0.9297


Training Progress:  50%|████████████████████████████▏                           | 5024/10000 [1:09:14<14:00,  5.92it/s]

Step 5023/10000, Loss: 0.9128
Step 5024/10000, Loss: 0.9024


Training Progress:  50%|████████████████████████████▏                           | 5026/10000 [1:09:14<14:14,  5.82it/s]

Step 5025/10000, Loss: 0.9818
Step 5026/10000, Loss: 0.9453


Training Progress:  50%|████████████████████████████▏                           | 5028/10000 [1:09:14<14:08,  5.86it/s]

Step 5027/10000, Loss: 0.9243
Step 5028/10000, Loss: 0.9454


Training Progress:  50%|████████████████████████████▏                           | 5030/10000 [1:09:15<13:57,  5.93it/s]

Step 5029/10000, Loss: 0.9327
Step 5030/10000, Loss: 0.9605


Training Progress:  50%|████████████████████████████▏                           | 5032/10000 [1:09:15<14:18,  5.79it/s]

Step 5031/10000, Loss: 0.9383
Step 5032/10000, Loss: 0.8781


Training Progress:  50%|████████████████████████████▏                           | 5034/10000 [1:09:15<14:06,  5.86it/s]

Step 5033/10000, Loss: 0.9493
Step 5034/10000, Loss: 0.9118


Training Progress:  50%|████████████████████████████▏                           | 5036/10000 [1:09:16<13:56,  5.93it/s]

Step 5035/10000, Loss: 0.9180
Step 5036/10000, Loss: 0.9555


Training Progress:  50%|████████████████████████████▏                           | 5038/10000 [1:09:16<14:14,  5.81it/s]

Step 5037/10000, Loss: 0.9317
Step 5038/10000, Loss: 0.9262


Training Progress:  50%|████████████████████████████▏                           | 5040/10000 [1:09:16<14:07,  5.85it/s]

Step 5039/10000, Loss: 0.8791
Step 5040/10000, Loss: 0.9392


Training Progress:  50%|████████████████████████████▏                           | 5042/10000 [1:09:17<14:07,  5.85it/s]

Step 5041/10000, Loss: 0.9374
Step 5042/10000, Loss: 0.9209


Training Progress:  50%|████████████████████████████▏                           | 5044/10000 [1:09:17<14:11,  5.82it/s]

Step 5043/10000, Loss: 0.8289
Step 5044/10000, Loss: 0.9088


Training Progress:  50%|████████████████████████████▎                           | 5046/10000 [1:09:17<14:04,  5.87it/s]

Step 5045/10000, Loss: 0.9479
Step 5046/10000, Loss: 0.9206


Training Progress:  50%|████████████████████████████▎                           | 5048/10000 [1:09:18<13:54,  5.94it/s]

Step 5047/10000, Loss: 0.9903
Step 5048/10000, Loss: 0.9085


Training Progress:  50%|████████████████████████████▎                           | 5050/10000 [1:09:18<14:10,  5.82it/s]

Step 5049/10000, Loss: 0.9508
Step 5050/10000, Loss: 0.8880


Training Progress:  51%|████████████████████████████▎                           | 5052/10000 [1:09:18<14:04,  5.86it/s]

Step 5051/10000, Loss: 0.9656
Step 5052/10000, Loss: 0.9543


Training Progress:  51%|████████████████████████████▎                           | 5054/10000 [1:09:19<14:00,  5.88it/s]

Step 5053/10000, Loss: 0.8817
Step 5054/10000, Loss: 0.8898


Training Progress:  51%|████████████████████████████▎                           | 5056/10000 [1:09:19<13:52,  5.94it/s]

Step 5055/10000, Loss: 0.9110
Step 5056/10000, Loss: 0.9275


Training Progress:  51%|████████████████████████████▎                           | 5058/10000 [1:09:19<14:10,  5.81it/s]

Step 5057/10000, Loss: 0.9175
Step 5058/10000, Loss: 0.9962


Training Progress:  51%|████████████████████████████▎                           | 5060/10000 [1:09:20<14:02,  5.86it/s]

Step 5059/10000, Loss: 0.8925
Step 5060/10000, Loss: 0.8367


Training Progress:  51%|████████████████████████████▎                           | 5061/10000 [1:09:20<14:10,  5.81it/s]

Step 5061/10000, Loss: 0.9036
Step 5062/10000, Loss: 0.7995


Training Progress:  51%|███████████████████████████▎                          | 5062/10000 [1:09:36<6:49:19,  4.97s/it]


Checkpoint saved: checkpoints\best\checkpoint_step5062_loss0.7995_20250117_140124.pt

New best loss: 0.7995


Training Progress:  51%|███████████████████████████▎                          | 5064/10000 [1:09:37<3:31:41,  2.57s/it]

Step 5063/10000, Loss: 0.8991
Step 5064/10000, Loss: 0.8673


Training Progress:  51%|███████████████████████████▎                          | 5065/10000 [1:09:37<2:32:04,  1.85s/it]

Step 5065/10000, Loss: 0.8123
Step 5066/10000, Loss: 0.7913


Training Progress:  51%|██████████████████████████▊                          | 5066/10000 [1:10:00<11:15:40,  8.22s/it]


Checkpoint saved: checkpoints\best\checkpoint_step5066_loss0.7913_20250117_140141.pt

New best loss: 0.7913


Training Progress:  51%|███████████████████████████▎                          | 5068/10000 [1:10:00<5:41:47,  4.16s/it]

Step 5067/10000, Loss: 0.9337
Step 5068/10000, Loss: 0.8549


Training Progress:  51%|███████████████████████████▎                          | 5069/10000 [1:10:01<4:03:07,  2.96s/it]

Step 5069/10000, Loss: 0.8059
Step 5070/10000, Loss: 0.7705


Training Progress:  51%|██████████████████████████▊                          | 5070/10000 [1:10:20<10:44:50,  7.85s/it]


Checkpoint saved: checkpoints\best\checkpoint_step5070_loss0.7705_20250117_140205.pt

New best loss: 0.7705
Step 5071/10000, Loss: 0.7656


Training Progress:  51%|██████████████████████████▉                          | 5071/10000 [1:10:43<16:52:18, 12.32s/it]


Checkpoint saved: checkpoints\best\checkpoint_step5071_loss0.7656_20250117_140224.pt

New best loss: 0.7656


Training Progress:  51%|███████████████████████████▍                          | 5073/10000 [1:10:43<8:27:44,  6.18s/it]

Step 5072/10000, Loss: 0.7784
Step 5073/10000, Loss: 0.8232


Training Progress:  51%|███████████████████████████▍                          | 5075/10000 [1:10:44<4:15:31,  3.11s/it]

Step 5074/10000, Loss: 0.9198
Step 5075/10000, Loss: 0.8756


Training Progress:  51%|███████████████████████████▍                          | 5077/10000 [1:10:44<2:12:29,  1.61s/it]

Step 5076/10000, Loss: 0.8215
Step 5077/10000, Loss: 0.7952


Training Progress:  51%|███████████████████████████▍                          | 5078/10000 [1:10:44<1:36:39,  1.18s/it]

Step 5078/10000, Loss: 0.7812
Step 5079/10000, Loss: 0.7596


Training Progress:  51%|███████████████████████████▍                          | 5079/10000 [1:11:04<9:31:32,  6.97s/it]


Checkpoint saved: checkpoints\best\checkpoint_step5079_loss0.7596_20250117_140248.pt

New best loss: 0.7596


Training Progress:  51%|███████████████████████████▍                          | 5081/10000 [1:11:05<4:52:39,  3.57s/it]

Step 5080/10000, Loss: 0.7733
Step 5081/10000, Loss: 0.7902


Training Progress:  51%|███████████████████████████▍                          | 5083/10000 [1:11:05<2:30:21,  1.83s/it]

Step 5082/10000, Loss: 0.7884
Step 5083/10000, Loss: 0.8768


Training Progress:  51%|███████████████████████████▍                          | 5085/10000 [1:11:06<1:20:48,  1.01it/s]

Step 5084/10000, Loss: 0.8727
Step 5085/10000, Loss: 0.8744


Training Progress:  51%|████████████████████████████▍                           | 5087/10000 [1:11:06<46:28,  1.76it/s]

Step 5086/10000, Loss: 0.7745
Step 5087/10000, Loss: 0.8242


Training Progress:  51%|████████████████████████████▍                           | 5089/10000 [1:11:07<30:02,  2.72it/s]

Step 5088/10000, Loss: 0.8500
Step 5089/10000, Loss: 0.8342


Training Progress:  51%|████████████████████████████▌                           | 5091/10000 [1:11:07<21:36,  3.79it/s]

Step 5090/10000, Loss: 0.8215
Step 5091/10000, Loss: 0.8379


Training Progress:  51%|████████████████████████████▌                           | 5093/10000 [1:11:07<17:50,  4.58it/s]

Step 5092/10000, Loss: 0.8340
Step 5093/10000, Loss: 0.8115


Training Progress:  51%|████████████████████████████▌                           | 5095/10000 [1:11:08<15:37,  5.23it/s]

Step 5094/10000, Loss: 0.8542
Step 5095/10000, Loss: 0.9113


Training Progress:  51%|████████████████████████████▌                           | 5097/10000 [1:11:08<14:54,  5.48it/s]

Step 5096/10000, Loss: 0.8972
Step 5097/10000, Loss: 0.8833


Training Progress:  51%|████████████████████████████▌                           | 5099/10000 [1:11:08<14:11,  5.75it/s]

Step 5098/10000, Loss: 0.9068
Step 5099/10000, Loss: 0.8197


Training Progress:  51%|████████████████████████████▌                           | 5101/10000 [1:11:09<14:10,  5.76it/s]

Step 5100/10000, Loss: 1.0071
Step 5101/10000, Loss: 0.8857


Training Progress:  51%|████████████████████████████▌                           | 5103/10000 [1:11:09<13:49,  5.90it/s]

Step 5102/10000, Loss: 0.8715
Step 5103/10000, Loss: 0.8390


Training Progress:  51%|████████████████████████████▌                           | 5105/10000 [1:11:09<14:02,  5.81it/s]

Step 5104/10000, Loss: 0.8802
Step 5105/10000, Loss: 0.8927


Training Progress:  51%|████████████████████████████▌                           | 5107/10000 [1:11:10<13:43,  5.95it/s]

Step 5106/10000, Loss: 0.8153
Step 5107/10000, Loss: 0.9089


Training Progress:  51%|████████████████████████████▌                           | 5109/10000 [1:11:10<13:57,  5.84it/s]

Step 5108/10000, Loss: 0.8754
Step 5109/10000, Loss: 0.8851


Training Progress:  51%|████████████████████████████▌                           | 5111/10000 [1:11:10<13:42,  5.94it/s]

Step 5110/10000, Loss: 0.8975
Step 5111/10000, Loss: 0.8954


Training Progress:  51%|████████████████████████████▋                           | 5113/10000 [1:11:11<13:58,  5.83it/s]

Step 5112/10000, Loss: 0.8776
Step 5113/10000, Loss: 0.8781


Training Progress:  51%|████████████████████████████▋                           | 5115/10000 [1:11:11<13:42,  5.94it/s]

Step 5114/10000, Loss: 0.8182
Step 5115/10000, Loss: 0.8936


Training Progress:  51%|████████████████████████████▋                           | 5117/10000 [1:11:11<13:57,  5.83it/s]

Step 5116/10000, Loss: 0.8140
Step 5117/10000, Loss: 0.8646


Training Progress:  51%|████████████████████████████▋                           | 5119/10000 [1:11:12<13:40,  5.95it/s]

Step 5118/10000, Loss: 0.8815
Step 5119/10000, Loss: 0.8743


Training Progress:  51%|████████████████████████████▋                           | 5121/10000 [1:11:12<13:57,  5.82it/s]

Step 5120/10000, Loss: 0.8513
Step 5121/10000, Loss: 0.8355


Training Progress:  51%|████████████████████████████▋                           | 5123/10000 [1:11:12<13:40,  5.94it/s]

Step 5122/10000, Loss: 0.8871
Step 5123/10000, Loss: 0.9170


Training Progress:  51%|████████████████████████████▋                           | 5125/10000 [1:11:13<13:56,  5.83it/s]

Step 5124/10000, Loss: 0.8988
Step 5125/10000, Loss: 0.7691


Training Progress:  51%|████████████████████████████▋                           | 5127/10000 [1:11:13<14:02,  5.78it/s]

Step 5126/10000, Loss: 0.8680
Step 5127/10000, Loss: 0.8609


Training Progress:  51%|████████████████████████████▋                           | 5129/10000 [1:11:13<13:55,  5.83it/s]

Step 5128/10000, Loss: 0.8553
Step 5129/10000, Loss: 0.9077


Training Progress:  51%|████████████████████████████▋                           | 5131/10000 [1:11:14<13:45,  5.90it/s]

Step 5130/10000, Loss: 0.8483
Step 5131/10000, Loss: 0.8639


Training Progress:  51%|████████████████████████████▋                           | 5133/10000 [1:11:14<13:58,  5.80it/s]

Step 5132/10000, Loss: 0.8256
Step 5133/10000, Loss: 0.8812


Training Progress:  51%|████████████████████████████▊                           | 5135/10000 [1:11:14<13:45,  5.89it/s]

Step 5134/10000, Loss: 0.8825
Step 5135/10000, Loss: 0.8230


Training Progress:  51%|████████████████████████████▊                           | 5137/10000 [1:11:15<13:58,  5.80it/s]

Step 5136/10000, Loss: 0.8643
Step 5137/10000, Loss: 0.8646


Training Progress:  51%|████████████████████████████▊                           | 5139/10000 [1:11:15<13:46,  5.88it/s]

Step 5138/10000, Loss: 0.8450
Step 5139/10000, Loss: 0.8416


Training Progress:  51%|████████████████████████████▊                           | 5141/10000 [1:11:15<13:57,  5.80it/s]

Step 5140/10000, Loss: 0.9236
Step 5141/10000, Loss: 0.8471


Training Progress:  51%|████████████████████████████▊                           | 5143/10000 [1:11:16<13:39,  5.93it/s]

Step 5142/10000, Loss: 0.7652
Step 5143/10000, Loss: 0.8734
Step 5144/10000, Loss: 0.7541


Training Progress:  51%|███████████████████████████▊                          | 5144/10000 [1:11:32<6:36:08,  4.89s/it]


Checkpoint saved: checkpoints\best\checkpoint_step5144_loss0.7541_20250117_140320.pt

New best loss: 0.7541


Training Progress:  51%|███████████████████████████▊                          | 5146/10000 [1:11:32<3:23:49,  2.52s/it]

Step 5145/10000, Loss: 0.8331
Step 5146/10000, Loss: 0.7928
Step 5147/10000, Loss: 0.7340


Training Progress:  51%|███████████████████████████▎                         | 5147/10000 [1:11:54<11:03:16,  8.20s/it]


Checkpoint saved: checkpoints\best\checkpoint_step5147_loss0.7340_20250117_140336.pt

New best loss: 0.7340
Step 5148/10000, Loss: 0.7036


Training Progress:  51%|███████████████████████████▎                         | 5148/10000 [1:12:16<16:58:59, 12.60s/it]


Checkpoint saved: checkpoints\best\checkpoint_step5148_loss0.7036_20250117_140358.pt

New best loss: 0.7036


Training Progress:  52%|███████████████████████████▊                          | 5150/10000 [1:12:17<8:31:46,  6.33s/it]

Step 5149/10000, Loss: 0.8650
Step 5150/10000, Loss: 0.7759


Training Progress:  52%|███████████████████████████▊                          | 5152/10000 [1:12:17<4:17:21,  3.19s/it]

Step 5151/10000, Loss: 0.7292
Step 5152/10000, Loss: 0.7259


Training Progress:  52%|███████████████████████████▊                          | 5154/10000 [1:12:18<2:13:13,  1.65s/it]

Step 5153/10000, Loss: 0.7139
Step 5154/10000, Loss: 0.7537


Training Progress:  52%|███████████████████████████▊                          | 5156/10000 [1:12:18<1:12:09,  1.12it/s]

Step 5155/10000, Loss: 0.7586
Step 5156/10000, Loss: 0.8123


Training Progress:  52%|████████████████████████████▉                           | 5158/10000 [1:12:19<42:29,  1.90it/s]

Step 5157/10000, Loss: 0.8109
Step 5158/10000, Loss: 0.7545


Training Progress:  52%|████████████████████████████▉                           | 5160/10000 [1:12:19<27:42,  2.91it/s]

Step 5159/10000, Loss: 0.7109
Step 5160/10000, Loss: 0.7251
Step 5161/10000, Loss: 0.6740


Training Progress:  52%|███████████████████████████▊                          | 5161/10000 [1:12:37<7:44:38,  5.76s/it]


Checkpoint saved: checkpoints\best\checkpoint_step5161_loss0.6740_20250117_140423.pt

New best loss: 0.6740


Training Progress:  52%|███████████████████████████▉                          | 5163/10000 [1:12:38<3:59:58,  2.98s/it]

Step 5162/10000, Loss: 0.6938
Step 5163/10000, Loss: 0.6995


Training Progress:  52%|███████████████████████████▉                          | 5165/10000 [1:12:38<2:04:39,  1.55s/it]

Step 5164/10000, Loss: 0.7262
Step 5165/10000, Loss: 0.7933


Training Progress:  52%|███████████████████████████▉                          | 5167/10000 [1:12:39<1:07:49,  1.19it/s]

Step 5166/10000, Loss: 0.7794
Step 5167/10000, Loss: 0.8208


Training Progress:  52%|████████████████████████████▉                           | 5169/10000 [1:12:39<40:21,  1.99it/s]

Step 5168/10000, Loss: 0.7222
Step 5169/10000, Loss: 0.7573


Training Progress:  52%|████████████████████████████▉                           | 5171/10000 [1:12:39<26:43,  3.01it/s]

Step 5170/10000, Loss: 0.8013
Step 5171/10000, Loss: 0.7670


Training Progress:  52%|████████████████████████████▉                           | 5173/10000 [1:12:40<20:06,  4.00it/s]

Step 5172/10000, Loss: 0.7241
Step 5173/10000, Loss: 0.7384


Training Progress:  52%|████████████████████████████▉                           | 5175/10000 [1:12:40<16:35,  4.85it/s]

Step 5174/10000, Loss: 0.7523
Step 5175/10000, Loss: 0.7164


Training Progress:  52%|████████████████████████████▉                           | 5177/10000 [1:12:40<15:14,  5.27it/s]

Step 5176/10000, Loss: 0.7716
Step 5177/10000, Loss: 0.8684


Training Progress:  52%|█████████████████████████████                           | 5179/10000 [1:12:41<14:11,  5.66it/s]

Step 5178/10000, Loss: 0.8524
Step 5179/10000, Loss: 0.8220


Training Progress:  52%|█████████████████████████████                           | 5181/10000 [1:12:41<14:08,  5.68it/s]

Step 5180/10000, Loss: 0.8279
Step 5181/10000, Loss: 0.7423


Training Progress:  52%|█████████████████████████████                           | 5183/10000 [1:12:41<13:59,  5.74it/s]

Step 5182/10000, Loss: 0.8748
Step 5183/10000, Loss: 0.7791


Training Progress:  52%|█████████████████████████████                           | 5185/10000 [1:12:42<13:42,  5.85it/s]

Step 5184/10000, Loss: 0.7848
Step 5185/10000, Loss: 0.7931


Training Progress:  52%|█████████████████████████████                           | 5187/10000 [1:12:42<13:49,  5.80it/s]

Step 5186/10000, Loss: 0.8002
Step 5187/10000, Loss: 0.8050


Training Progress:  52%|█████████████████████████████                           | 5189/10000 [1:12:42<13:37,  5.89it/s]

Step 5188/10000, Loss: 0.7556
Step 5189/10000, Loss: 0.8299


Training Progress:  52%|█████████████████████████████                           | 5191/10000 [1:12:43<13:46,  5.82it/s]

Step 5190/10000, Loss: 0.7695
Step 5191/10000, Loss: 0.7700


Training Progress:  52%|█████████████████████████████                           | 5193/10000 [1:12:43<13:35,  5.90it/s]

Step 5192/10000, Loss: 0.7715
Step 5193/10000, Loss: 0.7861


Training Progress:  52%|█████████████████████████████                           | 5195/10000 [1:12:43<13:45,  5.82it/s]

Step 5194/10000, Loss: 0.7772
Step 5195/10000, Loss: 0.8003


Training Progress:  52%|█████████████████████████████                           | 5197/10000 [1:12:44<13:33,  5.90it/s]

Step 5196/10000, Loss: 0.7348
Step 5197/10000, Loss: 0.8196


Training Progress:  52%|█████████████████████████████                           | 5199/10000 [1:12:44<13:43,  5.83it/s]

Step 5198/10000, Loss: 0.7899
Step 5199/10000, Loss: 0.7766


Training Progress:  52%|█████████████████████████████▏                          | 5201/10000 [1:12:44<13:41,  5.84it/s]

Step 5200/10000, Loss: 0.8076
Step 5201/10000, Loss: 0.7946


Training Progress:  52%|█████████████████████████████▏                          | 5203/10000 [1:12:45<13:36,  5.88it/s]

Step 5202/10000, Loss: 0.7632
Step 5203/10000, Loss: 0.7603


Training Progress:  52%|█████████████████████████████▏                          | 5205/10000 [1:12:45<13:22,  5.98it/s]

Step 5204/10000, Loss: 0.7577
Step 5205/10000, Loss: 0.8087


Training Progress:  52%|█████████████████████████████▏                          | 5207/10000 [1:12:45<13:36,  5.87it/s]

Step 5206/10000, Loss: 0.8077
Step 5207/10000, Loss: 0.7101


Training Progress:  52%|█████████████████████████████▏                          | 5209/10000 [1:12:46<13:44,  5.81it/s]

Step 5208/10000, Loss: 0.7766
Step 5209/10000, Loss: 0.8169


Training Progress:  52%|█████████████████████████████▏                          | 5211/10000 [1:12:46<13:42,  5.82it/s]

Step 5210/10000, Loss: 0.7894
Step 5211/10000, Loss: 0.8201


Training Progress:  52%|█████████████████████████████▏                          | 5213/10000 [1:12:46<13:39,  5.84it/s]

Step 5212/10000, Loss: 0.7514
Step 5213/10000, Loss: 0.7761


Training Progress:  52%|█████████████████████████████▏                          | 5215/10000 [1:12:47<13:27,  5.92it/s]

Step 5214/10000, Loss: 0.7623
Step 5215/10000, Loss: 0.8135


Training Progress:  52%|█████████████████████████████▏                          | 5217/10000 [1:12:47<13:40,  5.83it/s]

Step 5216/10000, Loss: 0.8233
Step 5217/10000, Loss: 0.7407


Training Progress:  52%|█████████████████████████████▏                          | 5219/10000 [1:12:47<13:22,  5.96it/s]

Step 5218/10000, Loss: 0.7767
Step 5219/10000, Loss: 0.7646


Training Progress:  52%|█████████████████████████████▏                          | 5221/10000 [1:12:48<13:40,  5.82it/s]

Step 5220/10000, Loss: 0.7472
Step 5221/10000, Loss: 0.7726


Training Progress:  52%|█████████████████████████████▏                          | 5223/10000 [1:12:48<13:41,  5.81it/s]

Step 5222/10000, Loss: 0.8545
Step 5223/10000, Loss: 0.7452


Training Progress:  52%|█████████████████████████████▎                          | 5225/10000 [1:12:48<13:28,  5.90it/s]

Step 5224/10000, Loss: 0.6964
Step 5225/10000, Loss: 0.7886


Training Progress:  52%|█████████████████████████████▎                          | 5227/10000 [1:12:49<13:40,  5.82it/s]

Step 5226/10000, Loss: 0.7132
Step 5227/10000, Loss: 0.7760


Training Progress:  52%|█████████████████████████████▎                          | 5229/10000 [1:12:49<13:28,  5.90it/s]

Step 5228/10000, Loss: 0.7700
Step 5229/10000, Loss: 0.6934
Step 5230/10000, Loss: 0.6470


Training Progress:  52%|████████████████████████████▏                         | 5230/10000 [1:13:04<6:04:46,  4.59s/it]


Checkpoint saved: checkpoints\best\checkpoint_step5230_loss0.6470_20250117_140453.pt

New best loss: 0.6470


Training Progress:  52%|████████████████████████████▎                         | 5232/10000 [1:13:05<3:08:23,  2.37s/it]

Step 5231/10000, Loss: 0.7822
Step 5232/10000, Loss: 0.6914


Training Progress:  52%|████████████████████████████▎                         | 5234/10000 [1:13:05<1:38:58,  1.25s/it]

Step 5233/10000, Loss: 0.6502
Step 5234/10000, Loss: 0.6507


Training Progress:  52%|█████████████████████████████▎                          | 5236/10000 [1:13:05<55:31,  1.43it/s]

Step 5235/10000, Loss: 0.6507
Step 5236/10000, Loss: 0.6960


Training Progress:  52%|█████████████████████████████▎                          | 5238/10000 [1:13:06<33:52,  2.34it/s]

Step 5237/10000, Loss: 0.6896
Step 5238/10000, Loss: 0.7542


Training Progress:  52%|█████████████████████████████▎                          | 5240/10000 [1:13:06<23:40,  3.35it/s]

Step 5239/10000, Loss: 0.7028
Step 5240/10000, Loss: 0.6804


Training Progress:  52%|█████████████████████████████▎                          | 5241/10000 [1:13:06<20:43,  3.83it/s]

Step 5241/10000, Loss: 0.6579
Step 5242/10000, Loss: 0.6414


Training Progress:  52%|████████████████████████████▎                         | 5242/10000 [1:13:25<7:53:26,  5.97s/it]


Checkpoint saved: checkpoints\best\checkpoint_step5242_loss0.6414_20250117_140510.pt

New best loss: 0.6414
Step 5243/10000, Loss: 0.6364


Training Progress:  52%|███████████████████████████▊                         | 5243/10000 [1:13:49<14:59:08, 11.34s/it]


Checkpoint saved: checkpoints\best\checkpoint_step5243_loss0.6364_20250117_140530.pt

New best loss: 0.6364


Training Progress:  52%|███████████████████████████▊                         | 5244/10000 [1:13:50<10:40:56,  8.09s/it]

Step 5244/10000, Loss: 0.6398
Step 5245/10000, Loss: 0.6352


Training Progress:  52%|███████████████████████████▊                         | 5245/10000 [1:14:09<15:14:16, 11.54s/it]


Checkpoint saved: checkpoints\best\checkpoint_step5245_loss0.6352_20250117_140554.pt

New best loss: 0.6352
Step 5246/10000, Loss: 0.6279


Training Progress:  52%|███████████████████████████▊                         | 5246/10000 [1:14:31<19:11:43, 14.54s/it]


Checkpoint saved: checkpoints\best\checkpoint_step5246_loss0.6279_20250117_140614.pt

New best loss: 0.6279


Training Progress:  52%|████████████████████████████▎                         | 5248/10000 [1:14:31<9:35:51,  7.27s/it]

Step 5247/10000, Loss: 0.7046
Step 5248/10000, Loss: 0.6839


Training Progress:  52%|████████████████████████████▎                         | 5250/10000 [1:14:32<4:49:06,  3.65s/it]

Step 5249/10000, Loss: 0.7178
Step 5250/10000, Loss: 0.6654


Training Progress:  53%|████████████████████████████▎                         | 5252/10000 [1:14:32<2:28:36,  1.88s/it]

Step 5251/10000, Loss: 0.6668
Step 5252/10000, Loss: 0.7297


Training Progress:  53%|████████████████████████████▎                         | 5254/10000 [1:14:33<1:19:28,  1.00s/it]

Step 5253/10000, Loss: 0.6939
Step 5254/10000, Loss: 0.6452


Training Progress:  53%|█████████████████████████████▍                          | 5256/10000 [1:14:33<45:55,  1.72it/s]

Step 5255/10000, Loss: 0.6716
Step 5256/10000, Loss: 0.6735
Step 5257/10000, Loss: 0.6213


Training Progress:  53%|████████████████████████████▍                         | 5257/10000 [1:14:54<8:53:02,  6.74s/it]


Checkpoint saved: checkpoints\best\checkpoint_step5257_loss0.6213_20250117_140637.pt

New best loss: 0.6213


Training Progress:  53%|████████████████████████████▍                         | 5259/10000 [1:14:54<4:30:36,  3.42s/it]

Step 5258/10000, Loss: 0.7108
Step 5259/10000, Loss: 0.7730


Training Progress:  53%|████████████████████████████▍                         | 5261/10000 [1:14:55<2:19:34,  1.77s/it]

Step 5260/10000, Loss: 0.7762
Step 5261/10000, Loss: 0.7743


Training Progress:  53%|████████████████████████████▍                         | 5263/10000 [1:14:55<1:15:06,  1.05it/s]

Step 5262/10000, Loss: 0.7763
Step 5263/10000, Loss: 0.6897


Training Progress:  53%|█████████████████████████████▍                          | 5265/10000 [1:14:56<43:49,  1.80it/s]

Step 5264/10000, Loss: 0.7826
Step 5265/10000, Loss: 0.7602


Training Progress:  53%|█████████████████████████████▍                          | 5267/10000 [1:14:56<28:21,  2.78it/s]

Step 5266/10000, Loss: 0.7288
Step 5267/10000, Loss: 0.6896


Training Progress:  53%|█████████████████████████████▌                          | 5269/10000 [1:14:56<20:25,  3.86it/s]

Step 5268/10000, Loss: 0.7480
Step 5269/10000, Loss: 0.7443


Training Progress:  53%|█████████████████████████████▌                          | 5271/10000 [1:14:57<17:02,  4.62it/s]

Step 5270/10000, Loss: 0.7059
Step 5271/10000, Loss: 0.7551


Training Progress:  53%|█████████████████████████████▌                          | 5273/10000 [1:14:57<14:59,  5.25it/s]

Step 5272/10000, Loss: 0.7221
Step 5273/10000, Loss: 0.7321


Training Progress:  53%|█████████████████████████████▌                          | 5275/10000 [1:14:57<14:21,  5.48it/s]

Step 5274/10000, Loss: 0.7222
Step 5275/10000, Loss: 0.6987


Training Progress:  53%|█████████████████████████████▌                          | 5277/10000 [1:14:58<13:56,  5.65it/s]

Step 5276/10000, Loss: 0.7206
Step 5277/10000, Loss: 0.6785


Training Progress:  53%|█████████████████████████████▌                          | 5279/10000 [1:14:58<13:21,  5.89it/s]

Step 5278/10000, Loss: 0.6699
Step 5279/10000, Loss: 0.7437


Training Progress:  53%|█████████████████████████████▌                          | 5281/10000 [1:14:58<13:32,  5.81it/s]

Step 5280/10000, Loss: 0.6908
Step 5281/10000, Loss: 0.6866


Training Progress:  53%|█████████████████████████████▌                          | 5283/10000 [1:14:59<13:14,  5.93it/s]

Step 5282/10000, Loss: 0.7265
Step 5283/10000, Loss: 0.7449


Training Progress:  53%|█████████████████████████████▌                          | 5285/10000 [1:14:59<13:28,  5.83it/s]

Step 5284/10000, Loss: 0.7069
Step 5285/10000, Loss: 0.6782


Training Progress:  53%|█████████████████████████████▌                          | 5287/10000 [1:14:59<13:27,  5.84it/s]

Step 5286/10000, Loss: 0.6991
Step 5287/10000, Loss: 0.7322


Training Progress:  53%|█████████████████████████████▌                          | 5289/10000 [1:15:00<13:28,  5.82it/s]

Step 5288/10000, Loss: 0.7238
Step 5289/10000, Loss: 0.6455


Training Progress:  53%|█████████████████████████████▋                          | 5291/10000 [1:15:00<13:23,  5.86it/s]

Step 5290/10000, Loss: 0.7031
Step 5291/10000, Loss: 0.7370


Training Progress:  53%|█████████████████████████████▋                          | 5293/10000 [1:15:00<13:19,  5.89it/s]

Step 5292/10000, Loss: 0.7280
Step 5293/10000, Loss: 0.7488


Training Progress:  53%|█████████████████████████████▋                          | 5295/10000 [1:15:01<13:10,  5.95it/s]

Step 5294/10000, Loss: 0.6938
Step 5295/10000, Loss: 0.6914


Training Progress:  53%|█████████████████████████████▋                          | 5297/10000 [1:15:01<13:28,  5.82it/s]

Step 5296/10000, Loss: 0.6688
Step 5297/10000, Loss: 0.7330


Training Progress:  53%|█████████████████████████████▋                          | 5299/10000 [1:15:01<13:33,  5.78it/s]

Step 5298/10000, Loss: 0.7162
Step 5299/10000, Loss: 0.6648


Training Progress:  53%|█████████████████████████████▋                          | 5301/10000 [1:15:02<13:09,  5.95it/s]

Step 5300/10000, Loss: 0.6949
Step 5301/10000, Loss: 0.6916


Training Progress:  53%|█████████████████████████████▋                          | 5303/10000 [1:15:02<13:27,  5.82it/s]

Step 5302/10000, Loss: 0.7042
Step 5303/10000, Loss: 0.7018


Training Progress:  53%|█████████████████████████████▋                          | 5305/10000 [1:15:02<13:20,  5.86it/s]

Step 5304/10000, Loss: 0.7568
Step 5305/10000, Loss: 0.6680


Training Progress:  53%|█████████████████████████████▋                          | 5307/10000 [1:15:03<13:25,  5.83it/s]

Step 5306/10000, Loss: 0.6434
Step 5307/10000, Loss: 0.7236
Step 5308/10000, Loss: 0.6203


Training Progress:  53%|████████████████████████████▋                         | 5308/10000 [1:15:19<6:41:25,  5.13s/it]


Checkpoint saved: checkpoints\best\checkpoint_step5308_loss0.6203_20250117_140707.pt

New best loss: 0.6203


Training Progress:  53%|████████████████████████████▋                         | 5310/10000 [1:15:20<3:25:58,  2.64s/it]

Step 5309/10000, Loss: 0.6999
Step 5310/10000, Loss: 0.6870


Training Progress:  53%|████████████████████████████▋                         | 5311/10000 [1:15:20<2:28:13,  1.90s/it]

Step 5311/10000, Loss: 0.6547
Step 5312/10000, Loss: 0.5996


Training Progress:  53%|████████████████████████████▋                         | 5312/10000 [1:15:41<9:59:45,  7.68s/it]


Checkpoint saved: checkpoints\best\checkpoint_step5312_loss0.5996_20250117_140724.pt

New best loss: 0.5996


Training Progress:  53%|████████████████████████████▋                         | 5314/10000 [1:15:42<5:05:53,  3.92s/it]

Step 5313/10000, Loss: 0.7049
Step 5314/10000, Loss: 0.6213
Step 5315/10000, Loss: 0.5646


Training Progress:  53%|████████████████████████████▏                        | 5315/10000 [1:16:05<12:42:44,  9.77s/it]


Checkpoint saved: checkpoints\best\checkpoint_step5315_loss0.5646_20250117_140746.pt

New best loss: 0.5646


Training Progress:  53%|████████████████████████████▋                         | 5317/10000 [1:16:06<6:24:03,  4.92s/it]

Step 5316/10000, Loss: 0.5669
Step 5317/10000, Loss: 0.5711


Training Progress:  53%|████████████████████████████▋                         | 5319/10000 [1:16:06<3:15:08,  2.50s/it]

Step 5318/10000, Loss: 0.6022
Step 5319/10000, Loss: 0.6540


Training Progress:  53%|████████████████████████████▋                         | 5321/10000 [1:16:07<1:42:13,  1.31s/it]

Step 5320/10000, Loss: 0.7035
Step 5321/10000, Loss: 0.6875


Training Progress:  53%|█████████████████████████████▊                          | 5323/10000 [1:16:07<56:37,  1.38it/s]

Step 5322/10000, Loss: 0.6708
Step 5323/10000, Loss: 0.5973


Training Progress:  53%|█████████████████████████████▊                          | 5325/10000 [1:16:07<34:40,  2.25it/s]

Step 5324/10000, Loss: 0.6256
Step 5325/10000, Loss: 0.5771


Training Progress:  53%|█████████████████████████████▊                          | 5327/10000 [1:16:08<23:34,  3.30it/s]

Step 5326/10000, Loss: 0.5768
Step 5327/10000, Loss: 0.5945


Training Progress:  53%|█████████████████████████████▊                          | 5329/10000 [1:16:08<18:28,  4.21it/s]

Step 5328/10000, Loss: 0.5916
Step 5329/10000, Loss: 0.6306


Training Progress:  53%|█████████████████████████████▊                          | 5331/10000 [1:16:08<15:42,  4.95it/s]

Step 5330/10000, Loss: 0.6189
Step 5331/10000, Loss: 0.6499


Training Progress:  53%|█████████████████████████████▊                          | 5333/10000 [1:16:09<14:37,  5.32it/s]

Step 5332/10000, Loss: 0.5966
Step 5333/10000, Loss: 0.6108


Training Progress:  53%|█████████████████████████████▉                          | 5335/10000 [1:16:09<13:50,  5.62it/s]

Step 5334/10000, Loss: 0.6659
Step 5335/10000, Loss: 0.6373


Training Progress:  53%|█████████████████████████████▉                          | 5337/10000 [1:16:09<13:18,  5.84it/s]

Step 5336/10000, Loss: 0.5828
Step 5337/10000, Loss: 0.5962


Training Progress:  53%|█████████████████████████████▉                          | 5339/10000 [1:16:10<13:25,  5.78it/s]

Step 5338/10000, Loss: 0.6076
Step 5339/10000, Loss: 0.5855


Training Progress:  53%|█████████████████████████████▉                          | 5341/10000 [1:16:10<13:08,  5.91it/s]

Step 5340/10000, Loss: 0.6334
Step 5341/10000, Loss: 0.6669


Training Progress:  53%|█████████████████████████████▉                          | 5343/10000 [1:16:10<13:20,  5.81it/s]

Step 5342/10000, Loss: 0.6707
Step 5343/10000, Loss: 0.6612


Training Progress:  53%|█████████████████████████████▉                          | 5345/10000 [1:16:11<13:05,  5.92it/s]

Step 5344/10000, Loss: 0.6924
Step 5345/10000, Loss: 0.6492


Training Progress:  53%|█████████████████████████████▉                          | 5347/10000 [1:16:11<13:19,  5.82it/s]

Step 5346/10000, Loss: 0.7586
Step 5347/10000, Loss: 0.6746


Training Progress:  53%|█████████████████████████████▉                          | 5349/10000 [1:16:11<13:22,  5.80it/s]

Step 5348/10000, Loss: 0.6907
Step 5349/10000, Loss: 0.6165


Training Progress:  54%|█████████████████████████████▉                          | 5351/10000 [1:16:12<13:19,  5.82it/s]

Step 5350/10000, Loss: 0.6738
Step 5351/10000, Loss: 0.6243


Training Progress:  54%|█████████████████████████████▉                          | 5353/10000 [1:16:12<13:09,  5.88it/s]

Step 5352/10000, Loss: 0.6200
Step 5353/10000, Loss: 0.6889


Training Progress:  54%|█████████████████████████████▉                          | 5355/10000 [1:16:12<13:00,  5.95it/s]

Step 5354/10000, Loss: 0.6648
Step 5355/10000, Loss: 0.6722


Training Progress:  54%|█████████████████████████████▉                          | 5357/10000 [1:16:13<13:15,  5.84it/s]

Step 5356/10000, Loss: 0.6559
Step 5357/10000, Loss: 0.6404


Training Progress:  54%|██████████████████████████████                          | 5359/10000 [1:16:13<13:14,  5.84it/s]

Step 5358/10000, Loss: 0.6468
Step 5359/10000, Loss: 0.6178


Training Progress:  54%|██████████████████████████████                          | 5361/10000 [1:16:13<13:11,  5.86it/s]

Step 5360/10000, Loss: 0.6068
Step 5361/10000, Loss: 0.6635


Training Progress:  54%|██████████████████████████████                          | 5363/10000 [1:16:14<12:57,  5.96it/s]

Step 5362/10000, Loss: 0.6194
Step 5363/10000, Loss: 0.6358


Training Progress:  54%|██████████████████████████████                          | 5365/10000 [1:16:14<13:13,  5.84it/s]

Step 5364/10000, Loss: 0.6483
Step 5365/10000, Loss: 0.6247


Training Progress:  54%|██████████████████████████████                          | 5367/10000 [1:16:14<12:57,  5.96it/s]

Step 5366/10000, Loss: 0.6261
Step 5367/10000, Loss: 0.6079


Training Progress:  54%|██████████████████████████████                          | 5369/10000 [1:16:15<13:15,  5.82it/s]

Step 5368/10000, Loss: 0.6351
Step 5369/10000, Loss: 0.6467


Training Progress:  54%|██████████████████████████████                          | 5371/10000 [1:16:15<12:56,  5.96it/s]

Step 5370/10000, Loss: 0.6424
Step 5371/10000, Loss: 0.5949


Training Progress:  54%|██████████████████████████████                          | 5373/10000 [1:16:15<13:12,  5.84it/s]

Step 5372/10000, Loss: 0.6261
Step 5373/10000, Loss: 0.6537


Training Progress:  54%|██████████████████████████████                          | 5375/10000 [1:16:16<12:58,  5.94it/s]

Step 5374/10000, Loss: 0.6347
Step 5375/10000, Loss: 0.6860


Training Progress:  54%|██████████████████████████████                          | 5377/10000 [1:16:16<13:12,  5.83it/s]

Step 5376/10000, Loss: 0.5899
Step 5377/10000, Loss: 0.6422


Training Progress:  54%|██████████████████████████████                          | 5379/10000 [1:16:16<13:10,  5.84it/s]

Step 5378/10000, Loss: 0.6138
Step 5379/10000, Loss: 0.6419


Training Progress:  54%|██████████████████████████████▏                         | 5381/10000 [1:16:17<13:14,  5.82it/s]

Step 5380/10000, Loss: 0.6392
Step 5381/10000, Loss: 0.5747


Training Progress:  54%|██████████████████████████████▏                         | 5383/10000 [1:16:17<13:03,  5.89it/s]

Step 5382/10000, Loss: 0.6051
Step 5383/10000, Loss: 0.6005


Training Progress:  54%|██████████████████████████████▏                         | 5385/10000 [1:16:17<13:15,  5.80it/s]

Step 5384/10000, Loss: 0.5922
Step 5385/10000, Loss: 0.6208


Training Progress:  54%|██████████████████████████████▏                         | 5387/10000 [1:16:18<13:06,  5.87it/s]

Step 5386/10000, Loss: 0.6634
Step 5387/10000, Loss: 0.6069


Training Progress:  54%|██████████████████████████████▏                         | 5389/10000 [1:16:18<13:15,  5.79it/s]

Step 5388/10000, Loss: 0.5856
Step 5389/10000, Loss: 0.6432
Step 5390/10000, Loss: 0.5456


Training Progress:  54%|█████████████████████████████                         | 5390/10000 [1:16:34<6:03:13,  4.73s/it]


Checkpoint saved: checkpoints\best\checkpoint_step5390_loss0.5456_20250117_140822.pt

New best loss: 0.5456


Training Progress:  54%|█████████████████████████████                         | 5392/10000 [1:16:34<3:08:16,  2.45s/it]

Step 5391/10000, Loss: 0.6001
Step 5392/10000, Loss: 0.5696


Training Progress:  54%|█████████████████████████████                         | 5393/10000 [1:16:34<2:15:24,  1.76s/it]

Step 5393/10000, Loss: 0.5553
Step 5394/10000, Loss: 0.5340


Training Progress:  54%|█████████████████████████████▏                        | 5394/10000 [1:16:54<9:13:53,  7.22s/it]


Checkpoint saved: checkpoints\best\checkpoint_step5394_loss0.5340_20250117_140838.pt

New best loss: 0.5340


Training Progress:  54%|█████████████████████████████▏                        | 5396/10000 [1:16:55<4:40:40,  3.66s/it]

Step 5395/10000, Loss: 0.6241
Step 5396/10000, Loss: 0.5933


Training Progress:  54%|█████████████████████████████▏                        | 5398/10000 [1:16:55<2:23:54,  1.88s/it]

Step 5397/10000, Loss: 0.5431
Step 5398/10000, Loss: 0.5364
Step 5399/10000, Loss: 0.5293


Training Progress:  54%|█████████████████████████████▏                        | 5399/10000 [1:17:16<9:46:18,  7.65s/it]


Checkpoint saved: checkpoints\best\checkpoint_step5399_loss0.5293_20250117_140859.pt

New best loss: 0.5293


Training Progress:  54%|█████████████████████████████▏                        | 5401/10000 [1:17:17<4:57:14,  3.88s/it]

Step 5400/10000, Loss: 0.5433
Step 5401/10000, Loss: 0.5543


Training Progress:  54%|█████████████████████████████▏                        | 5403/10000 [1:17:17<2:32:07,  1.99s/it]

Step 5402/10000, Loss: 0.6063
Step 5403/10000, Loss: 0.5817


Training Progress:  54%|█████████████████████████████▏                        | 5405/10000 [1:17:17<1:21:17,  1.06s/it]

Step 5404/10000, Loss: 0.5954
Step 5405/10000, Loss: 0.5592


Training Progress:  54%|██████████████████████████████▎                         | 5407/10000 [1:17:18<46:34,  1.64it/s]

Step 5406/10000, Loss: 0.5555
Step 5407/10000, Loss: 0.5393


Training Progress:  54%|██████████████████████████████▎                         | 5409/10000 [1:17:18<29:26,  2.60it/s]

Step 5408/10000, Loss: 0.5364
Step 5409/10000, Loss: 0.5413


Training Progress:  54%|██████████████████████████████▎                         | 5411/10000 [1:17:18<20:55,  3.65it/s]

Step 5410/10000, Loss: 0.5422
Step 5411/10000, Loss: 0.5930


Training Progress:  54%|██████████████████████████████▎                         | 5413/10000 [1:17:19<17:00,  4.49it/s]

Step 5412/10000, Loss: 0.5365
Step 5413/10000, Loss: 0.5730
Step 5414/10000, Loss: 0.5265


Training Progress:  54%|█████████████████████████████▏                        | 5414/10000 [1:17:39<7:51:40,  6.17s/it]


Checkpoint saved: checkpoints\best\checkpoint_step5414_loss0.5265_20250117_140923.pt

New best loss: 0.5265
Step 5415/10000, Loss: 0.5097


Training Progress:  54%|████████████████████████████▋                        | 5415/10000 [1:18:00<13:45:32, 10.80s/it]


Checkpoint saved: checkpoints\best\checkpoint_step5415_loss0.5097_20250117_140943.pt

New best loss: 0.5097


Training Progress:  54%|█████████████████████████████▎                        | 5417/10000 [1:18:01<6:56:09,  5.45s/it]

Step 5416/10000, Loss: 0.5517
Step 5417/10000, Loss: 0.5493


Training Progress:  54%|█████████████████████████████▎                        | 5419/10000 [1:18:01<3:30:33,  2.76s/it]

Step 5418/10000, Loss: 0.5215
Step 5419/10000, Loss: 0.5423


Training Progress:  54%|█████████████████████████████▎                        | 5421/10000 [1:18:02<1:49:37,  1.44s/it]

Step 5420/10000, Loss: 0.5617
Step 5421/10000, Loss: 0.5280


Training Progress:  54%|█████████████████████████████▎                        | 5423/10000 [1:18:02<1:00:26,  1.26it/s]

Step 5422/10000, Loss: 0.5685
Step 5423/10000, Loss: 0.6022


Training Progress:  54%|██████████████████████████████▍                         | 5425/10000 [1:18:02<36:07,  2.11it/s]

Step 5424/10000, Loss: 0.6156
Step 5425/10000, Loss: 0.6059


Training Progress:  54%|██████████████████████████████▍                         | 5427/10000 [1:18:03<24:05,  3.16it/s]

Step 5426/10000, Loss: 0.6147
Step 5427/10000, Loss: 0.5465


Training Progress:  54%|██████████████████████████████▍                         | 5429/10000 [1:18:03<18:34,  4.10it/s]

Step 5428/10000, Loss: 0.6666
Step 5429/10000, Loss: 0.6199


Training Progress:  54%|██████████████████████████████▍                         | 5431/10000 [1:18:03<15:43,  4.85it/s]

Step 5430/10000, Loss: 0.6370
Step 5431/10000, Loss: 0.5645


Training Progress:  54%|██████████████████████████████▍                         | 5433/10000 [1:18:04<14:19,  5.31it/s]

Step 5432/10000, Loss: 0.6157
Step 5433/10000, Loss: 0.5747


Training Progress:  54%|██████████████████████████████▍                         | 5435/10000 [1:18:04<13:26,  5.66it/s]

Step 5434/10000, Loss: 0.5476
Step 5435/10000, Loss: 0.6164


Training Progress:  54%|██████████████████████████████▍                         | 5437/10000 [1:18:04<13:18,  5.71it/s]

Step 5436/10000, Loss: 0.5619
Step 5437/10000, Loss: 0.5597


Training Progress:  54%|██████████████████████████████▍                         | 5439/10000 [1:18:05<12:55,  5.89it/s]

Step 5438/10000, Loss: 0.5684
Step 5439/10000, Loss: 0.5761


Training Progress:  54%|██████████████████████████████▍                         | 5441/10000 [1:18:05<13:05,  5.80it/s]

Step 5440/10000, Loss: 0.5998
Step 5441/10000, Loss: 0.5811


Training Progress:  54%|██████████████████████████████▍                         | 5443/10000 [1:18:05<12:55,  5.88it/s]

Step 5442/10000, Loss: 0.5379
Step 5443/10000, Loss: 0.6166


Training Progress:  54%|██████████████████████████████▍                         | 5445/10000 [1:18:06<13:03,  5.81it/s]

Step 5444/10000, Loss: 0.5710
Step 5445/10000, Loss: 0.5547


Training Progress:  54%|██████████████████████████████▌                         | 5447/10000 [1:18:06<12:52,  5.89it/s]

Step 5446/10000, Loss: 0.5727
Step 5447/10000, Loss: 0.5617


Training Progress:  54%|██████████████████████████████▌                         | 5449/10000 [1:18:06<13:01,  5.82it/s]

Step 5448/10000, Loss: 0.5579
Step 5449/10000, Loss: 0.5247


Training Progress:  55%|██████████████████████████████▌                         | 5451/10000 [1:18:07<12:55,  5.86it/s]

Step 5450/10000, Loss: 0.5659
Step 5451/10000, Loss: 0.5681


Training Progress:  55%|██████████████████████████████▌                         | 5453/10000 [1:18:07<12:56,  5.86it/s]

Step 5452/10000, Loss: 0.5648
Step 5453/10000, Loss: 0.5148


Training Progress:  55%|██████████████████████████████▌                         | 5455/10000 [1:18:07<12:49,  5.91it/s]

Step 5454/10000, Loss: 0.5661
Step 5455/10000, Loss: 0.6137


Training Progress:  55%|██████████████████████████████▌                         | 5457/10000 [1:18:08<13:02,  5.81it/s]

Step 5456/10000, Loss: 0.5867
Step 5457/10000, Loss: 0.5919


Training Progress:  55%|██████████████████████████████▌                         | 5459/10000 [1:18:08<12:51,  5.88it/s]

Step 5458/10000, Loss: 0.5528
Step 5459/10000, Loss: 0.5473


Training Progress:  55%|██████████████████████████████▌                         | 5461/10000 [1:18:09<12:57,  5.84it/s]

Step 5460/10000, Loss: 0.5353
Step 5461/10000, Loss: 0.6015


Training Progress:  55%|██████████████████████████████▌                         | 5463/10000 [1:18:09<12:53,  5.87it/s]

Step 5462/10000, Loss: 0.5678
Step 5463/10000, Loss: 0.5232


Training Progress:  55%|██████████████████████████████▌                         | 5465/10000 [1:18:09<12:39,  5.97it/s]

Step 5464/10000, Loss: 0.5470
Step 5465/10000, Loss: 0.5326


Training Progress:  55%|██████████████████████████████▌                         | 5467/10000 [1:18:10<12:56,  5.84it/s]

Step 5466/10000, Loss: 0.5497
Step 5467/10000, Loss: 0.5845


Training Progress:  55%|██████████████████████████████▋                         | 5469/10000 [1:18:10<12:42,  5.95it/s]

Step 5468/10000, Loss: 0.5883
Step 5469/10000, Loss: 0.5396


Training Progress:  55%|██████████████████████████████▋                         | 5471/10000 [1:18:10<12:57,  5.83it/s]

Step 5470/10000, Loss: 0.5267
Step 5471/10000, Loss: 0.5698
Step 5472/10000, Loss: 0.5086


Training Progress:  55%|█████████████████████████████▌                        | 5472/10000 [1:18:25<5:52:01,  4.66s/it]


Checkpoint saved: checkpoints\best\checkpoint_step5472_loss0.5086_20250117_141014.pt

New best loss: 0.5086


Training Progress:  55%|█████████████████████████████▌                        | 5474/10000 [1:18:26<3:04:08,  2.44s/it]

Step 5473/10000, Loss: 0.5716
Step 5474/10000, Loss: 0.5375
Step 5475/10000, Loss: 0.4908


Training Progress:  55%|█████████████████████████████                        | 5475/10000 [1:18:49<10:55:43,  8.69s/it]


Checkpoint saved: checkpoints\best\checkpoint_step5475_loss0.4908_20250117_141030.pt

New best loss: 0.4908
Step 5476/10000, Loss: 0.4648


Training Progress:  55%|█████████████████████████████                        | 5476/10000 [1:19:08<14:45:21, 11.74s/it]


Checkpoint saved: checkpoints\best\checkpoint_step5476_loss0.4648_20250117_141054.pt

New best loss: 0.4648


Training Progress:  55%|█████████████████████████████▌                        | 5478/10000 [1:19:09<7:25:04,  5.91s/it]

Step 5477/10000, Loss: 0.5401
Step 5478/10000, Loss: 0.5194


Training Progress:  55%|█████████████████████████████▌                        | 5479/10000 [1:19:09<5:15:04,  4.18s/it]

Step 5479/10000, Loss: 0.4879
Step 5480/10000, Loss: 0.4593


Training Progress:  55%|█████████████████████████████                        | 5480/10000 [1:19:31<12:09:04,  9.68s/it]


Checkpoint saved: checkpoints\best\checkpoint_step5480_loss0.4593_20250117_141113.pt

New best loss: 0.4593


Training Progress:  55%|█████████████████████████████▌                        | 5482/10000 [1:19:32<6:06:41,  4.87s/it]

Step 5481/10000, Loss: 0.4837
Step 5482/10000, Loss: 0.4837


Training Progress:  55%|█████████████████████████████▌                        | 5484/10000 [1:19:32<3:06:18,  2.48s/it]

Step 5483/10000, Loss: 0.5134
Step 5484/10000, Loss: 0.5631


Training Progress:  55%|█████████████████████████████▌                        | 5486/10000 [1:19:33<1:37:41,  1.30s/it]

Step 5485/10000, Loss: 0.5264
Step 5486/10000, Loss: 0.5368


Training Progress:  55%|██████████████████████████████▋                         | 5488/10000 [1:19:33<54:28,  1.38it/s]

Step 5487/10000, Loss: 0.4933
Step 5488/10000, Loss: 0.4955


Training Progress:  55%|██████████████████████████████▋                         | 5490/10000 [1:19:33<33:08,  2.27it/s]

Step 5489/10000, Loss: 0.4934
Step 5490/10000, Loss: 0.4757


Training Progress:  55%|██████████████████████████████▊                         | 5492/10000 [1:19:34<22:49,  3.29it/s]

Step 5491/10000, Loss: 0.5134
Step 5492/10000, Loss: 0.4780


Training Progress:  55%|██████████████████████████████▊                         | 5494/10000 [1:19:34<17:51,  4.20it/s]

Step 5493/10000, Loss: 0.5234
Step 5494/10000, Loss: 0.5104


Training Progress:  55%|██████████████████████████████▊                         | 5496/10000 [1:19:34<15:11,  4.94it/s]

Step 5495/10000, Loss: 0.5325
Step 5496/10000, Loss: 0.4914


Training Progress:  55%|██████████████████████████████▊                         | 5498/10000 [1:19:35<14:05,  5.32it/s]

Step 5497/10000, Loss: 0.4730
Step 5498/10000, Loss: 0.5123


Training Progress:  55%|██████████████████████████████▊                         | 5499/10000 [1:19:35<13:47,  5.44it/s]

Step 5499/10000, Loss: 0.5108
Step 5500/10000, Loss: 0.4410


Training Progress:  55%|█████████████████████████████▋                        | 5500/10000 [1:19:59<9:16:01,  7.41s/it]


Checkpoint saved: checkpoints\best\checkpoint_step5500_loss0.4410_20250117_141139.pt

New best loss: 0.4410


Training Progress:  55%|█████████████████████████████▋                        | 5502/10000 [1:20:00<4:42:43,  3.77s/it]

Step 5501/10000, Loss: 0.4728
Step 5502/10000, Loss: 0.4994


Training Progress:  55%|█████████████████████████████▋                        | 5504/10000 [1:20:00<2:25:03,  1.94s/it]

Step 5503/10000, Loss: 0.4880
Step 5504/10000, Loss: 0.5245


Training Progress:  55%|█████████████████████████████▋                        | 5506/10000 [1:20:01<1:17:41,  1.04s/it]

Step 5505/10000, Loss: 0.5504
Step 5506/10000, Loss: 0.5784


Training Progress:  55%|██████████████████████████████▊                         | 5508/10000 [1:20:01<44:26,  1.68it/s]

Step 5507/10000, Loss: 0.5629
Step 5508/10000, Loss: 0.5593


Training Progress:  55%|██████████████████████████████▊                         | 5510/10000 [1:20:01<28:22,  2.64it/s]

Step 5509/10000, Loss: 0.5076
Step 5510/10000, Loss: 0.6129


Training Progress:  55%|██████████████████████████████▊                         | 5512/10000 [1:20:02<20:16,  3.69it/s]

Step 5511/10000, Loss: 0.5243
Step 5512/10000, Loss: 0.5224


Training Progress:  55%|██████████████████████████████▉                         | 5514/10000 [1:20:02<16:34,  4.51it/s]

Step 5513/10000, Loss: 0.5394
Step 5514/10000, Loss: 0.5485


Training Progress:  55%|██████████████████████████████▉                         | 5516/10000 [1:20:02<14:23,  5.19it/s]

Step 5515/10000, Loss: 0.5304
Step 5516/10000, Loss: 0.5222


Training Progress:  55%|██████████████████████████████▉                         | 5518/10000 [1:20:03<13:39,  5.47it/s]

Step 5517/10000, Loss: 0.5896
Step 5518/10000, Loss: 0.5555


Training Progress:  55%|██████████████████████████████▉                         | 5520/10000 [1:20:03<12:58,  5.76it/s]

Step 5519/10000, Loss: 0.5167
Step 5520/10000, Loss: 0.5254


Training Progress:  55%|██████████████████████████████▉                         | 5522/10000 [1:20:03<12:56,  5.77it/s]

Step 5521/10000, Loss: 0.5242
Step 5522/10000, Loss: 0.5511


Training Progress:  55%|██████████████████████████████▉                         | 5524/10000 [1:20:04<12:56,  5.77it/s]

Step 5523/10000, Loss: 0.5226
Step 5524/10000, Loss: 0.4999


Training Progress:  55%|██████████████████████████████▉                         | 5526/10000 [1:20:04<12:42,  5.87it/s]

Step 5525/10000, Loss: 0.5620
Step 5526/10000, Loss: 0.5102


Training Progress:  55%|██████████████████████████████▉                         | 5528/10000 [1:20:04<12:50,  5.81it/s]

Step 5527/10000, Loss: 0.5225
Step 5528/10000, Loss: 0.5383


Training Progress:  55%|██████████████████████████████▉                         | 5530/10000 [1:20:05<12:31,  5.94it/s]

Step 5529/10000, Loss: 0.5195
Step 5530/10000, Loss: 0.5054


Training Progress:  55%|██████████████████████████████▉                         | 5532/10000 [1:20:05<12:45,  5.84it/s]

Step 5531/10000, Loss: 0.4969
Step 5532/10000, Loss: 0.5091


Training Progress:  55%|██████████████████████████████▉                         | 5534/10000 [1:20:05<12:29,  5.96it/s]

Step 5533/10000, Loss: 0.5376
Step 5534/10000, Loss: 0.5124


Training Progress:  55%|███████████████████████████████                         | 5536/10000 [1:20:06<12:51,  5.79it/s]

Step 5535/10000, Loss: 0.4485
Step 5536/10000, Loss: 0.5280


Training Progress:  55%|███████████████████████████████                         | 5538/10000 [1:20:06<12:45,  5.83it/s]

Step 5537/10000, Loss: 0.5460
Step 5538/10000, Loss: 0.5205


Training Progress:  55%|███████████████████████████████                         | 5540/10000 [1:20:06<12:35,  5.90it/s]

Step 5539/10000, Loss: 0.5209
Step 5540/10000, Loss: 0.4896


Training Progress:  55%|███████████████████████████████                         | 5542/10000 [1:20:07<12:45,  5.83it/s]

Step 5541/10000, Loss: 0.5019
Step 5542/10000, Loss: 0.4874


Training Progress:  55%|███████████████████████████████                         | 5544/10000 [1:20:07<12:28,  5.95it/s]

Step 5543/10000, Loss: 0.5435
Step 5544/10000, Loss: 0.4982


Training Progress:  55%|███████████████████████████████                         | 5546/10000 [1:20:07<12:43,  5.83it/s]

Step 5545/10000, Loss: 0.4674
Step 5546/10000, Loss: 0.4590


Training Progress:  55%|███████████████████████████████                         | 5548/10000 [1:20:08<12:26,  5.96it/s]

Step 5547/10000, Loss: 0.4675
Step 5548/10000, Loss: 0.4855


Training Progress:  56%|███████████████████████████████                         | 5550/10000 [1:20:08<12:39,  5.86it/s]

Step 5549/10000, Loss: 0.4919
Step 5550/10000, Loss: 0.5195


Training Progress:  56%|███████████████████████████████                         | 5551/10000 [1:20:08<12:43,  5.83it/s]

Step 5551/10000, Loss: 0.4573
Step 5552/10000, Loss: 0.4363


Training Progress:  56%|█████████████████████████████▉                        | 5552/10000 [1:20:26<6:49:35,  5.52s/it]


Checkpoint saved: checkpoints\best\checkpoint_step5552_loss0.4363_20250117_141212.pt

New best loss: 0.4363


Training Progress:  56%|█████████████████████████████▉                        | 5554/10000 [1:20:27<3:31:20,  2.85s/it]

Step 5553/10000, Loss: 0.5037
Step 5554/10000, Loss: 0.4377


Training Progress:  56%|██████████████████████████████                        | 5556/10000 [1:20:27<1:49:45,  1.48s/it]

Step 5555/10000, Loss: 0.5016
Step 5556/10000, Loss: 0.4938


Training Progress:  56%|██████████████████████████████                        | 5558/10000 [1:20:27<1:00:21,  1.23it/s]

Step 5557/10000, Loss: 0.4675
Step 5558/10000, Loss: 0.4481


Training Progress:  56%|███████████████████████████████▏                        | 5560/10000 [1:20:28<35:55,  2.06it/s]

Step 5559/10000, Loss: 0.4923
Step 5560/10000, Loss: 0.4543
Step 5561/10000, Loss: 0.4119


Training Progress:  56%|██████████████████████████████                        | 5561/10000 [1:20:47<7:26:52,  6.04s/it]


Checkpoint saved: checkpoints\best\checkpoint_step5561_loss0.4119_20250117_141232.pt

New best loss: 0.4119


Training Progress:  56%|██████████████████████████████                        | 5562/10000 [1:20:47<5:21:51,  4.35s/it]

Step 5562/10000, Loss: 0.4129
Step 5563/10000, Loss: 0.4039


Training Progress:  56%|█████████████████████████████▍                       | 5563/10000 [1:21:09<11:38:19,  9.44s/it]


Checkpoint saved: checkpoints\best\checkpoint_step5563_loss0.4039_20250117_141251.pt

New best loss: 0.4039


Training Progress:  56%|██████████████████████████████                        | 5565/10000 [1:21:09<5:52:59,  4.78s/it]

Step 5564/10000, Loss: 0.4457
Step 5565/10000, Loss: 0.4489


Training Progress:  56%|██████████████████████████████                        | 5567/10000 [1:21:10<2:59:26,  2.43s/it]

Step 5566/10000, Loss: 0.4703
Step 5567/10000, Loss: 0.4531


Training Progress:  56%|██████████████████████████████                        | 5569/10000 [1:21:10<1:34:12,  1.28s/it]

Step 5568/10000, Loss: 0.4644
Step 5569/10000, Loss: 0.4250


Training Progress:  56%|███████████████████████████████▏                        | 5571/10000 [1:21:10<52:42,  1.40it/s]

Step 5570/10000, Loss: 0.4410
Step 5571/10000, Loss: 0.4260


Training Progress:  56%|███████████████████████████████▏                        | 5573/10000 [1:21:11<32:07,  2.30it/s]

Step 5572/10000, Loss: 0.4472
Step 5573/10000, Loss: 0.4506


Training Progress:  56%|███████████████████████████████▏                        | 5575/10000 [1:21:11<22:16,  3.31it/s]

Step 5574/10000, Loss: 0.4407
Step 5575/10000, Loss: 0.4837


Training Progress:  56%|███████████████████████████████▏                        | 5577/10000 [1:21:11<17:08,  4.30it/s]

Step 5576/10000, Loss: 0.4746
Step 5577/10000, Loss: 0.4932


Training Progress:  56%|███████████████████████████████▏                        | 5579/10000 [1:21:12<14:57,  4.93it/s]

Step 5578/10000, Loss: 0.4549
Step 5579/10000, Loss: 0.4368


Training Progress:  56%|███████████████████████████████▎                        | 5581/10000 [1:21:12<13:38,  5.40it/s]

Step 5580/10000, Loss: 0.4567
Step 5581/10000, Loss: 0.4628


Training Progress:  56%|███████████████████████████████▎                        | 5583/10000 [1:21:12<13:14,  5.56it/s]

Step 5582/10000, Loss: 0.4247
Step 5583/10000, Loss: 0.4362


Training Progress:  56%|███████████████████████████████▎                        | 5585/10000 [1:21:13<12:47,  5.76it/s]

Step 5584/10000, Loss: 0.4290
Step 5585/10000, Loss: 0.4282


Training Progress:  56%|███████████████████████████████▎                        | 5587/10000 [1:21:13<12:48,  5.74it/s]

Step 5586/10000, Loss: 0.4719
Step 5587/10000, Loss: 0.4971


Training Progress:  56%|███████████████████████████████▎                        | 5589/10000 [1:21:13<12:33,  5.86it/s]

Step 5588/10000, Loss: 0.5149
Step 5589/10000, Loss: 0.4959


Training Progress:  56%|███████████████████████████████▎                        | 5591/10000 [1:21:14<12:41,  5.79it/s]

Step 5590/10000, Loss: 0.5179
Step 5591/10000, Loss: 0.4636


Training Progress:  56%|███████████████████████████████▎                        | 5593/10000 [1:21:14<12:43,  5.77it/s]

Step 5592/10000, Loss: 0.5355
Step 5593/10000, Loss: 0.4793


Training Progress:  56%|███████████████████████████████▎                        | 5595/10000 [1:21:14<12:35,  5.83it/s]

Step 5594/10000, Loss: 0.4809
Step 5595/10000, Loss: 0.4745


Training Progress:  56%|███████████████████████████████▎                        | 5597/10000 [1:21:15<12:25,  5.90it/s]

Step 5596/10000, Loss: 0.5008
Step 5597/10000, Loss: 0.4700


Training Progress:  56%|███████████████████████████████▎                        | 5599/10000 [1:21:15<12:38,  5.80it/s]

Step 5598/10000, Loss: 0.4900
Step 5599/10000, Loss: 0.5253


Training Progress:  56%|███████████████████████████████▎                        | 5601/10000 [1:21:15<12:27,  5.88it/s]

Step 5600/10000, Loss: 0.5086
Step 5601/10000, Loss: 0.4680


Training Progress:  56%|███████████████████████████████▍                        | 5603/10000 [1:21:16<12:37,  5.81it/s]

Step 5602/10000, Loss: 0.4691
Step 5603/10000, Loss: 0.4695


Training Progress:  56%|███████████████████████████████▍                        | 5605/10000 [1:21:16<12:27,  5.88it/s]

Step 5604/10000, Loss: 0.4714
Step 5605/10000, Loss: 0.4698


Training Progress:  56%|███████████████████████████████▍                        | 5607/10000 [1:21:16<12:16,  5.96it/s]

Step 5606/10000, Loss: 0.4287
Step 5607/10000, Loss: 0.5059


Training Progress:  56%|███████████████████████████████▍                        | 5609/10000 [1:21:17<12:30,  5.85it/s]

Step 5608/10000, Loss: 0.4656
Step 5609/10000, Loss: 0.4904


Training Progress:  56%|███████████████████████████████▍                        | 5611/10000 [1:21:17<12:18,  5.94it/s]

Step 5610/10000, Loss: 0.4941
Step 5611/10000, Loss: 0.4890


Training Progress:  56%|███████████████████████████████▍                        | 5613/10000 [1:21:17<12:34,  5.82it/s]

Step 5612/10000, Loss: 0.4571
Step 5613/10000, Loss: 0.4665


Training Progress:  56%|███████████████████████████████▍                        | 5615/10000 [1:21:18<12:17,  5.94it/s]

Step 5614/10000, Loss: 0.4465
Step 5615/10000, Loss: 0.4971


Training Progress:  56%|███████████████████████████████▍                        | 5616/10000 [1:21:18<12:26,  5.88it/s]

Step 5616/10000, Loss: 0.4514
Step 5617/10000, Loss: 0.4024


Training Progress:  56%|██████████████████████████████▎                       | 5617/10000 [1:21:36<6:36:00,  5.42s/it]


Checkpoint saved: checkpoints\best\checkpoint_step5617_loss0.4024_20250117_141322.pt

New best loss: 0.4024


Training Progress:  56%|██████████████████████████████▎                       | 5619/10000 [1:21:36<3:24:30,  2.80s/it]

Step 5618/10000, Loss: 0.4615
Step 5619/10000, Loss: 0.4462


Training Progress:  56%|██████████████████████████████▎                       | 5621/10000 [1:21:37<1:46:37,  1.46s/it]

Step 5620/10000, Loss: 0.4522
Step 5621/10000, Loss: 0.4525


Training Progress:  56%|███████████████████████████████▍                        | 5623/10000 [1:21:37<58:26,  1.25it/s]

Step 5622/10000, Loss: 0.4437
Step 5623/10000, Loss: 0.4280


Training Progress:  56%|███████████████████████████████▌                        | 5625/10000 [1:21:37<34:48,  2.09it/s]

Step 5624/10000, Loss: 0.4725
Step 5625/10000, Loss: 0.5011


Training Progress:  56%|███████████████████████████████▌                        | 5627/10000 [1:21:38<23:32,  3.10it/s]

Step 5626/10000, Loss: 0.4625
Step 5627/10000, Loss: 0.4439


Training Progress:  56%|███████████████████████████████▌                        | 5629/10000 [1:21:38<17:55,  4.06it/s]

Step 5628/10000, Loss: 0.4233
Step 5629/10000, Loss: 0.4398


Training Progress:  56%|███████████████████████████████▌                        | 5631/10000 [1:21:38<15:08,  4.81it/s]

Step 5630/10000, Loss: 0.4469
Step 5631/10000, Loss: 0.4533


Training Progress:  56%|███████████████████████████████▌                        | 5633/10000 [1:21:39<13:38,  5.34it/s]

Step 5632/10000, Loss: 0.4727
Step 5633/10000, Loss: 0.4210
Step 5634/10000, Loss: 0.3940


Training Progress:  56%|██████████████████████████████▍                       | 5634/10000 [1:21:56<6:30:52,  5.37s/it]


Checkpoint saved: checkpoints\best\checkpoint_step5634_loss0.3940_20250117_141343.pt

New best loss: 0.3940


Training Progress:  56%|██████████████████████████████▍                       | 5635/10000 [1:21:56<4:41:50,  3.87s/it]

Step 5635/10000, Loss: 0.4547
Step 5636/10000, Loss: 0.3896


Training Progress:  56%|█████████████████████████████▊                       | 5636/10000 [1:22:40<19:09:22, 15.80s/it]


Checkpoint saved: checkpoints\best\checkpoint_step5636_loss0.3896_20250117_141400.pt

New best loss: 0.3896


Training Progress:  56%|██████████████████████████████▍                       | 5638/10000 [1:22:41<9:31:57,  7.87s/it]

Step 5637/10000, Loss: 0.4522
Step 5638/10000, Loss: 0.4228


Training Progress:  56%|██████████████████████████████▍                       | 5639/10000 [1:22:41<6:44:08,  5.56s/it]

Step 5639/10000, Loss: 0.3953
Step 5640/10000, Loss: 0.3681


Training Progress:  56%|█████████████████████████████▉                       | 5640/10000 [1:23:18<18:23:32, 15.19s/it]


Checkpoint saved: checkpoints\best\checkpoint_step5640_loss0.3681_20250117_141445.pt

New best loss: 0.3681


Training Progress:  56%|██████████████████████████████▍                       | 5642/10000 [1:23:19<9:10:20,  7.58s/it]

Step 5641/10000, Loss: 0.4301
Step 5642/10000, Loss: 0.4184


Training Progress:  56%|██████████████████████████████▍                       | 5643/10000 [1:23:19<6:28:36,  5.35s/it]

Step 5643/10000, Loss: 0.3842
Step 5644/10000, Loss: 0.3543


Training Progress:  56%|█████████████████████████████▉                       | 5644/10000 [1:23:38<11:28:15,  9.48s/it]


Checkpoint saved: checkpoints\best\checkpoint_step5644_loss0.3543_20250117_141523.pt

New best loss: 0.3543


Training Progress:  56%|██████████████████████████████▍                       | 5646/10000 [1:23:39<5:46:10,  4.77s/it]

Step 5645/10000, Loss: 0.3599
Step 5646/10000, Loss: 0.3717


Training Progress:  56%|██████████████████████████████▍                       | 5648/10000 [1:23:39<2:55:44,  2.42s/it]

Step 5647/10000, Loss: 0.3845
Step 5648/10000, Loss: 0.4025


Training Progress:  56%|██████████████████████████████▌                       | 5650/10000 [1:23:39<1:32:30,  1.28s/it]

Step 5649/10000, Loss: 0.4089
Step 5650/10000, Loss: 0.4154


Training Progress:  57%|███████████████████████████████▋                        | 5652/10000 [1:23:40<51:31,  1.41it/s]

Step 5651/10000, Loss: 0.3861
Step 5652/10000, Loss: 0.4025


Training Progress:  57%|███████████████████████████████▋                        | 5654/10000 [1:23:40<31:42,  2.28it/s]

Step 5653/10000, Loss: 0.3749
Step 5654/10000, Loss: 0.3871


Training Progress:  57%|███████████████████████████████▋                        | 5656/10000 [1:23:40<21:38,  3.35it/s]

Step 5655/10000, Loss: 0.3875
Step 5656/10000, Loss: 0.3776


Training Progress:  57%|███████████████████████████████▋                        | 5658/10000 [1:23:41<17:03,  4.24it/s]

Step 5657/10000, Loss: 0.4084
Step 5658/10000, Loss: 0.4167


Training Progress:  57%|███████████████████████████████▋                        | 5660/10000 [1:23:41<14:32,  4.98it/s]

Step 5659/10000, Loss: 0.4465
Step 5660/10000, Loss: 0.3991


Training Progress:  57%|███████████████████████████████▋                        | 5662/10000 [1:23:41<13:35,  5.32it/s]

Step 5661/10000, Loss: 0.4138
Step 5662/10000, Loss: 0.4160


Training Progress:  57%|███████████████████████████████▋                        | 5664/10000 [1:23:42<12:43,  5.68it/s]

Step 5663/10000, Loss: 0.4062
Step 5664/10000, Loss: 0.3938


Training Progress:  57%|███████████████████████████████▋                        | 5666/10000 [1:23:42<12:39,  5.70it/s]

Step 5665/10000, Loss: 0.3793
Step 5666/10000, Loss: 0.4002


Training Progress:  57%|███████████████████████████████▋                        | 5668/10000 [1:23:43<12:18,  5.86it/s]

Step 5667/10000, Loss: 0.3891
Step 5668/10000, Loss: 0.4166


Training Progress:  57%|███████████████████████████████▊                        | 5670/10000 [1:23:43<12:30,  5.77it/s]

Step 5669/10000, Loss: 0.4400
Step 5670/10000, Loss: 0.4714


Training Progress:  57%|███████████████████████████████▊                        | 5672/10000 [1:23:43<12:19,  5.85it/s]

Step 5671/10000, Loss: 0.4294
Step 5672/10000, Loss: 0.4345


Training Progress:  57%|███████████████████████████████▊                        | 5674/10000 [1:23:44<12:06,  5.95it/s]

Step 5673/10000, Loss: 0.3998
Step 5674/10000, Loss: 0.4651


Training Progress:  57%|███████████████████████████████▊                        | 5676/10000 [1:23:44<12:21,  5.84it/s]

Step 5675/10000, Loss: 0.4416
Step 5676/10000, Loss: 0.4605


Training Progress:  57%|███████████████████████████████▊                        | 5678/10000 [1:23:44<12:06,  5.95it/s]

Step 5677/10000, Loss: 0.4273
Step 5678/10000, Loss: 0.4275


Training Progress:  57%|███████████████████████████████▊                        | 5680/10000 [1:23:45<12:18,  5.85it/s]

Step 5679/10000, Loss: 0.4091
Step 5680/10000, Loss: 0.4259


Training Progress:  57%|███████████████████████████████▊                        | 5682/10000 [1:23:45<12:23,  5.81it/s]

Step 5681/10000, Loss: 0.4452
Step 5682/10000, Loss: 0.4319


Training Progress:  57%|███████████████████████████████▊                        | 5684/10000 [1:23:45<12:06,  5.94it/s]

Step 5683/10000, Loss: 0.4503
Step 5684/10000, Loss: 0.4143


Training Progress:  57%|███████████████████████████████▊                        | 5686/10000 [1:23:46<12:18,  5.84it/s]

Step 5685/10000, Loss: 0.4355
Step 5686/10000, Loss: 0.4068


Training Progress:  57%|███████████████████████████████▊                        | 5688/10000 [1:23:46<12:22,  5.81it/s]

Step 5687/10000, Loss: 0.3938
Step 5688/10000, Loss: 0.3986


Training Progress:  57%|███████████████████████████████▊                        | 5690/10000 [1:23:46<12:06,  5.94it/s]

Step 5689/10000, Loss: 0.4400
Step 5690/10000, Loss: 0.4347


Training Progress:  57%|███████████████████████████████▉                        | 5692/10000 [1:23:47<12:17,  5.84it/s]

Step 5691/10000, Loss: 0.4280
Step 5692/10000, Loss: 0.4382


Training Progress:  57%|███████████████████████████████▉                        | 5694/10000 [1:23:47<12:02,  5.96it/s]

Step 5693/10000, Loss: 0.4300
Step 5694/10000, Loss: 0.4341


Training Progress:  57%|███████████████████████████████▉                        | 5696/10000 [1:23:47<12:15,  5.85it/s]

Step 5695/10000, Loss: 0.4082
Step 5696/10000, Loss: 0.4162


Training Progress:  57%|███████████████████████████████▉                        | 5698/10000 [1:23:48<12:22,  5.80it/s]

Step 5697/10000, Loss: 0.4383
Step 5698/10000, Loss: 0.4054


Training Progress:  57%|███████████████████████████████▉                        | 5700/10000 [1:23:48<12:07,  5.91it/s]

Step 5699/10000, Loss: 0.3768
Step 5700/10000, Loss: 0.4165


Training Progress:  57%|███████████████████████████████▉                        | 5702/10000 [1:23:48<12:16,  5.83it/s]

Step 5701/10000, Loss: 0.4215
Step 5702/10000, Loss: 0.4270


Training Progress:  57%|███████████████████████████████▉                        | 5704/10000 [1:23:49<12:21,  5.79it/s]

Step 5703/10000, Loss: 0.4459
Step 5704/10000, Loss: 0.4160


Training Progress:  57%|███████████████████████████████▉                        | 5706/10000 [1:23:49<12:18,  5.82it/s]

Step 5705/10000, Loss: 0.4292
Step 5706/10000, Loss: 0.4324


Training Progress:  57%|███████████████████████████████▉                        | 5708/10000 [1:23:49<12:16,  5.83it/s]

Step 5707/10000, Loss: 0.4366
Step 5708/10000, Loss: 0.4173


Training Progress:  57%|███████████████████████████████▉                        | 5710/10000 [1:23:50<11:59,  5.96it/s]

Step 5709/10000, Loss: 0.4022
Step 5710/10000, Loss: 0.3870


Training Progress:  57%|███████████████████████████████▉                        | 5712/10000 [1:23:50<12:12,  5.86it/s]

Step 5711/10000, Loss: 0.3960
Step 5712/10000, Loss: 0.3966


Training Progress:  57%|███████████████████████████████▉                        | 5714/10000 [1:23:50<12:16,  5.82it/s]

Step 5713/10000, Loss: 0.3979
Step 5714/10000, Loss: 0.4385


Training Progress:  57%|████████████████████████████████                        | 5716/10000 [1:23:51<12:06,  5.90it/s]

Step 5715/10000, Loss: 0.3846
Step 5716/10000, Loss: 0.3661


Training Progress:  57%|████████████████████████████████                        | 5718/10000 [1:23:51<12:10,  5.86it/s]

Step 5717/10000, Loss: 0.4124
Step 5718/10000, Loss: 0.3704


Training Progress:  57%|████████████████████████████████                        | 5720/10000 [1:23:51<12:16,  5.81it/s]

Step 5719/10000, Loss: 0.4140
Step 5720/10000, Loss: 0.3851
Step 5721/10000, Loss: 0.3520


Training Progress:  57%|██████████████████████████████▉                       | 5721/10000 [1:24:07<5:39:06,  4.76s/it]


Checkpoint saved: checkpoints\best\checkpoint_step5721_loss0.3520_20250117_141555.pt

New best loss: 0.3520


Training Progress:  57%|██████████████████████████████▉                       | 5723/10000 [1:24:07<2:54:50,  2.45s/it]

Step 5722/10000, Loss: 0.3523
Step 5723/10000, Loss: 0.3959


Training Progress:  57%|██████████████████████████████▉                       | 5724/10000 [1:24:08<2:06:04,  1.77s/it]

Step 5724/10000, Loss: 0.3706
Step 5725/10000, Loss: 0.3352


Training Progress:  57%|██████████████████████████████▉                       | 5725/10000 [1:24:29<9:03:43,  7.63s/it]


Checkpoint saved: checkpoints\best\checkpoint_step5725_loss0.3352_20250117_141612.pt

New best loss: 0.3352
Step 5726/10000, Loss: 0.3164


Training Progress:  57%|██████████████████████████████▎                      | 5726/10000 [1:24:49<13:33:31, 11.42s/it]


Checkpoint saved: checkpoints\best\checkpoint_step5726_loss0.3164_20250117_141633.pt

New best loss: 0.3164
Step 5727/10000, Loss: 0.3083


Training Progress:  57%|██████████████████████████████▎                      | 5727/10000 [1:25:14<18:15:48, 15.39s/it]


Checkpoint saved: checkpoints\best\checkpoint_step5727_loss0.3083_20250117_141653.pt

New best loss: 0.3083


Training Progress:  57%|██████████████████████████████▉                       | 5729/10000 [1:25:14<9:06:29,  7.68s/it]

Step 5728/10000, Loss: 0.3493
Step 5729/10000, Loss: 0.3598


Training Progress:  57%|██████████████████████████████▉                       | 5731/10000 [1:25:15<4:34:00,  3.85s/it]

Step 5730/10000, Loss: 0.3818
Step 5731/10000, Loss: 0.3615


Training Progress:  57%|██████████████████████████████▉                       | 5733/10000 [1:25:15<2:20:19,  1.97s/it]

Step 5732/10000, Loss: 0.3828
Step 5733/10000, Loss: 0.3372


Training Progress:  57%|██████████████████████████████▉                       | 5735/10000 [1:25:15<1:15:03,  1.06s/it]

Step 5734/10000, Loss: 0.3560
Step 5735/10000, Loss: 0.3200


Training Progress:  57%|████████████████████████████████▏                       | 5737/10000 [1:25:16<42:52,  1.66it/s]

Step 5736/10000, Loss: 0.3403
Step 5737/10000, Loss: 0.3211


Training Progress:  57%|████████████████████████████████▏                       | 5739/10000 [1:25:16<26:59,  2.63it/s]

Step 5738/10000, Loss: 0.3396
Step 5739/10000, Loss: 0.3707


Training Progress:  57%|████████████████████████████████▏                       | 5741/10000 [1:25:16<19:32,  3.63it/s]

Step 5740/10000, Loss: 0.3784
Step 5741/10000, Loss: 0.3833


Training Progress:  57%|████████████████████████████████▏                       | 5743/10000 [1:25:17<15:38,  4.54it/s]

Step 5742/10000, Loss: 0.3583
Step 5743/10000, Loss: 0.3747


Training Progress:  57%|████████████████████████████████▏                       | 5745/10000 [1:25:17<13:57,  5.08it/s]

Step 5744/10000, Loss: 0.3829
Step 5745/10000, Loss: 0.3674


Training Progress:  57%|████████████████████████████████▏                       | 5747/10000 [1:25:17<12:52,  5.50it/s]

Step 5746/10000, Loss: 0.3394
Step 5747/10000, Loss: 0.3376


Training Progress:  57%|████████████████████████████████▏                       | 5749/10000 [1:25:18<12:36,  5.62it/s]

Step 5748/10000, Loss: 0.3520
Step 5749/10000, Loss: 0.3552


Training Progress:  58%|████████████████████████████████▏                       | 5751/10000 [1:25:18<12:14,  5.79it/s]

Step 5750/10000, Loss: 0.3795
Step 5751/10000, Loss: 0.3818


Training Progress:  58%|████████████████████████████████▏                       | 5753/10000 [1:25:18<12:16,  5.77it/s]

Step 5752/10000, Loss: 0.3957
Step 5753/10000, Loss: 0.4047


Training Progress:  58%|████████████████████████████████▏                       | 5755/10000 [1:25:19<12:10,  5.81it/s]

Step 5754/10000, Loss: 0.4082
Step 5755/10000, Loss: 0.3487


Training Progress:  58%|████████████████████████████████▏                       | 5757/10000 [1:25:19<12:11,  5.80it/s]

Step 5756/10000, Loss: 0.4087
Step 5757/10000, Loss: 0.3869


Training Progress:  58%|████████████████████████████████▎                       | 5759/10000 [1:25:19<12:04,  5.86it/s]

Step 5758/10000, Loss: 0.4108
Step 5759/10000, Loss: 0.3636


Training Progress:  58%|████████████████████████████████▎                       | 5761/10000 [1:25:20<12:00,  5.88it/s]

Step 5760/10000, Loss: 0.3873
Step 5761/10000, Loss: 0.3708


Training Progress:  58%|████████████████████████████████▎                       | 5763/10000 [1:25:20<11:53,  5.94it/s]

Step 5762/10000, Loss: 0.3820
Step 5763/10000, Loss: 0.4069


Training Progress:  58%|████████████████████████████████▎                       | 5765/10000 [1:25:20<12:08,  5.81it/s]

Step 5764/10000, Loss: 0.3826
Step 5765/10000, Loss: 0.3935


Training Progress:  58%|████████████████████████████████▎                       | 5767/10000 [1:25:21<12:00,  5.87it/s]

Step 5766/10000, Loss: 0.3821
Step 5767/10000, Loss: 0.3815


Training Progress:  58%|████████████████████████████████▎                       | 5769/10000 [1:25:21<11:51,  5.95it/s]

Step 5768/10000, Loss: 0.3883
Step 5769/10000, Loss: 0.3703


Training Progress:  58%|████████████████████████████████▎                       | 5771/10000 [1:25:21<12:08,  5.81it/s]

Step 5770/10000, Loss: 0.3318
Step 5771/10000, Loss: 0.3802


Training Progress:  58%|████████████████████████████████▎                       | 5773/10000 [1:25:22<12:00,  5.87it/s]

Step 5772/10000, Loss: 0.3512
Step 5773/10000, Loss: 0.3723


Training Progress:  58%|████████████████████████████████▎                       | 5775/10000 [1:25:22<11:58,  5.88it/s]

Step 5774/10000, Loss: 0.3788
Step 5775/10000, Loss: 0.3830


Training Progress:  58%|████████████████████████████████▎                       | 5777/10000 [1:25:23<12:05,  5.82it/s]

Step 5776/10000, Loss: 0.3563
Step 5777/10000, Loss: 0.3457


Training Progress:  58%|████████████████████████████████▎                       | 5779/10000 [1:25:23<12:04,  5.83it/s]

Step 5778/10000, Loss: 0.3660
Step 5779/10000, Loss: 0.4006


Training Progress:  58%|████████████████████████████████▎                       | 5781/10000 [1:25:23<11:47,  5.96it/s]

Step 5780/10000, Loss: 0.3742
Step 5781/10000, Loss: 0.3530


Training Progress:  58%|████████████████████████████████▍                       | 5783/10000 [1:25:24<12:04,  5.82it/s]

Step 5782/10000, Loss: 0.3738
Step 5783/10000, Loss: 0.3597


Training Progress:  58%|████████████████████████████████▍                       | 5785/10000 [1:25:24<11:59,  5.86it/s]

Step 5784/10000, Loss: 0.3681
Step 5785/10000, Loss: 0.3737


Training Progress:  58%|████████████████████████████████▍                       | 5787/10000 [1:25:24<11:55,  5.89it/s]

Step 5786/10000, Loss: 0.3507
Step 5787/10000, Loss: 0.3642


Training Progress:  58%|████████████████████████████████▍                       | 5789/10000 [1:25:25<11:47,  5.96it/s]

Step 5788/10000, Loss: 0.3830
Step 5789/10000, Loss: 0.3892


Training Progress:  58%|████████████████████████████████▍                       | 5791/10000 [1:25:25<12:04,  5.81it/s]

Step 5790/10000, Loss: 0.3660
Step 5791/10000, Loss: 0.3810


Training Progress:  58%|████████████████████████████████▍                       | 5793/10000 [1:25:25<11:52,  5.90it/s]

Step 5792/10000, Loss: 0.3613
Step 5793/10000, Loss: 0.3664


Training Progress:  58%|████████████████████████████████▍                       | 5795/10000 [1:25:26<11:49,  5.93it/s]

Step 5794/10000, Loss: 0.3510
Step 5795/10000, Loss: 0.3643


Training Progress:  58%|████████████████████████████████▍                       | 5797/10000 [1:25:26<12:01,  5.83it/s]

Step 5796/10000, Loss: 0.3753
Step 5797/10000, Loss: 0.3459
Step 5798/10000, Loss: 0.2901


Training Progress:  58%|███████████████████████████████▎                      | 5798/10000 [1:25:43<6:17:31,  5.39s/it]


Checkpoint saved: checkpoints\best\checkpoint_step5798_loss0.2901_20250117_141730.pt

New best loss: 0.2901


Training Progress:  58%|███████████████████████████████▎                      | 5800/10000 [1:25:44<3:15:29,  2.79s/it]

Step 5799/10000, Loss: 0.3414
Step 5800/10000, Loss: 0.3213


Training Progress:  58%|███████████████████████████████▎                      | 5802/10000 [1:25:44<1:41:45,  1.45s/it]

Step 5801/10000, Loss: 0.3633
Step 5802/10000, Loss: 0.3581


Training Progress:  58%|████████████████████████████████▌                       | 5804/10000 [1:25:45<55:45,  1.25it/s]

Step 5803/10000, Loss: 0.3184
Step 5804/10000, Loss: 0.3111


Training Progress:  58%|████████████████████████████████▌                       | 5806/10000 [1:25:45<33:34,  2.08it/s]

Step 5805/10000, Loss: 0.3804
Step 5806/10000, Loss: 0.3314


Training Progress:  58%|████████████████████████████████▌                       | 5808/10000 [1:25:45<22:23,  3.12it/s]

Step 5807/10000, Loss: 0.3204
Step 5808/10000, Loss: 0.2931
Step 5809/10000, Loss: 0.2805


Training Progress:  58%|███████████████████████████████▎                      | 5809/10000 [1:26:06<7:17:11,  6.26s/it]


Checkpoint saved: checkpoints\best\checkpoint_step5809_loss0.2805_20250117_141750.pt

New best loss: 0.2805


Training Progress:  58%|███████████████████████████████▍                      | 5811/10000 [1:26:06<3:42:55,  3.19s/it]

Step 5810/10000, Loss: 0.3454
Step 5811/10000, Loss: 0.3322


Training Progress:  58%|███████████████████████████████▍                      | 5813/10000 [1:26:06<1:55:01,  1.65s/it]

Step 5812/10000, Loss: 0.3218
Step 5813/10000, Loss: 0.3118


Training Progress:  58%|███████████████████████████████▍                      | 5815/10000 [1:26:07<1:02:31,  1.12it/s]

Step 5814/10000, Loss: 0.3291
Step 5815/10000, Loss: 0.2851


Training Progress:  58%|████████████████████████████████▌                       | 5817/10000 [1:26:07<36:28,  1.91it/s]

Step 5816/10000, Loss: 0.3020
Step 5817/10000, Loss: 0.2937


Training Progress:  58%|████████████████████████████████▌                       | 5819/10000 [1:26:07<24:03,  2.90it/s]

Step 5818/10000, Loss: 0.3028
Step 5819/10000, Loss: 0.2958


Training Progress:  58%|████████████████████████████████▌                       | 5821/10000 [1:26:08<17:38,  3.95it/s]

Step 5820/10000, Loss: 0.3104
Step 5821/10000, Loss: 0.3064


Training Progress:  58%|████████████████████████████████▌                       | 5823/10000 [1:26:08<14:49,  4.70it/s]

Step 5822/10000, Loss: 0.3123
Step 5823/10000, Loss: 0.3305


Training Progress:  58%|████████████████████████████████▌                       | 5825/10000 [1:26:08<13:16,  5.24it/s]

Step 5824/10000, Loss: 0.3126
Step 5825/10000, Loss: 0.3234


Training Progress:  58%|████████████████████████████████▋                       | 5827/10000 [1:26:09<12:36,  5.51it/s]

Step 5826/10000, Loss: 0.3452
Step 5827/10000, Loss: 0.3203


Training Progress:  58%|████████████████████████████████▋                       | 5829/10000 [1:26:09<12:01,  5.78it/s]

Step 5828/10000, Loss: 0.3039
Step 5829/10000, Loss: 0.3095


Training Progress:  58%|████████████████████████████████▋                       | 5831/10000 [1:26:10<12:03,  5.77it/s]

Step 5830/10000, Loss: 0.2974
Step 5831/10000, Loss: 0.3063


Training Progress:  58%|████████████████████████████████▋                       | 5833/10000 [1:26:10<11:44,  5.91it/s]

Step 5832/10000, Loss: 0.3220
Step 5833/10000, Loss: 0.3133


Training Progress:  58%|████████████████████████████████▋                       | 5835/10000 [1:26:10<11:55,  5.82it/s]

Step 5834/10000, Loss: 0.3382
Step 5835/10000, Loss: 0.3281


Training Progress:  58%|████████████████████████████████▋                       | 5837/10000 [1:26:11<11:39,  5.95it/s]

Step 5836/10000, Loss: 0.3548
Step 5837/10000, Loss: 0.3036


Training Progress:  58%|████████████████████████████████▋                       | 5839/10000 [1:26:11<11:53,  5.83it/s]

Step 5838/10000, Loss: 0.3610
Step 5839/10000, Loss: 0.3476


Training Progress:  58%|████████████████████████████████▋                       | 5841/10000 [1:26:11<11:45,  5.89it/s]

Step 5840/10000, Loss: 0.3391
Step 5841/10000, Loss: 0.3334


Training Progress:  58%|████████████████████████████████▋                       | 5843/10000 [1:26:12<11:54,  5.82it/s]

Step 5842/10000, Loss: 0.3576
Step 5843/10000, Loss: 0.3350


Training Progress:  58%|████████████████████████████████▋                       | 5845/10000 [1:26:12<11:47,  5.87it/s]

Step 5844/10000, Loss: 0.3493
Step 5845/10000, Loss: 0.3593


Training Progress:  58%|████████████████████████████████▋                       | 5847/10000 [1:26:12<11:56,  5.80it/s]

Step 5846/10000, Loss: 0.3368
Step 5847/10000, Loss: 0.3407


Training Progress:  58%|████████████████████████████████▊                       | 5849/10000 [1:26:13<11:46,  5.87it/s]

Step 5848/10000, Loss: 0.3423
Step 5849/10000, Loss: 0.3477


Training Progress:  59%|████████████████████████████████▊                       | 5851/10000 [1:26:13<11:35,  5.97it/s]

Step 5850/10000, Loss: 0.3347
Step 5851/10000, Loss: 0.3342


Training Progress:  59%|████████████████████████████████▊                       | 5853/10000 [1:26:13<11:47,  5.86it/s]

Step 5852/10000, Loss: 0.3211
Step 5853/10000, Loss: 0.3303


Training Progress:  59%|████████████████████████████████▊                       | 5855/10000 [1:26:14<11:36,  5.95it/s]

Step 5854/10000, Loss: 0.3266
Step 5855/10000, Loss: 0.3189


Training Progress:  59%|████████████████████████████████▊                       | 5857/10000 [1:26:14<11:47,  5.86it/s]

Step 5856/10000, Loss: 0.3370
Step 5857/10000, Loss: 0.3291


Training Progress:  59%|████████████████████████████████▊                       | 5859/10000 [1:26:14<11:33,  5.97it/s]

Step 5858/10000, Loss: 0.3182
Step 5859/10000, Loss: 0.3012


Training Progress:  59%|████████████████████████████████▊                       | 5861/10000 [1:26:15<11:46,  5.86it/s]

Step 5860/10000, Loss: 0.3040
Step 5861/10000, Loss: 0.3390


Training Progress:  59%|████████████████████████████████▊                       | 5863/10000 [1:26:15<11:53,  5.80it/s]

Step 5862/10000, Loss: 0.3067
Step 5863/10000, Loss: 0.3059


Training Progress:  59%|████████████████████████████████▊                       | 5865/10000 [1:26:15<11:43,  5.88it/s]

Step 5864/10000, Loss: 0.3200
Step 5865/10000, Loss: 0.3372


Training Progress:  59%|████████████████████████████████▊                       | 5867/10000 [1:26:16<11:53,  5.80it/s]

Step 5866/10000, Loss: 0.3284
Step 5867/10000, Loss: 0.3598


Training Progress:  59%|████████████████████████████████▊                       | 5869/10000 [1:26:16<11:39,  5.91it/s]

Step 5868/10000, Loss: 0.3135
Step 5869/10000, Loss: 0.3290


Training Progress:  59%|████████████████████████████████▉                       | 5871/10000 [1:26:16<11:48,  5.83it/s]

Step 5870/10000, Loss: 0.3363
Step 5871/10000, Loss: 0.3375


Training Progress:  59%|████████████████████████████████▉                       | 5873/10000 [1:26:17<11:33,  5.95it/s]

Step 5872/10000, Loss: 0.3217
Step 5873/10000, Loss: 0.3074


Training Progress:  59%|████████████████████████████████▉                       | 5875/10000 [1:26:17<11:45,  5.85it/s]

Step 5874/10000, Loss: 0.3047
Step 5875/10000, Loss: 0.3270


Training Progress:  59%|████████████████████████████████▉                       | 5877/10000 [1:26:17<11:50,  5.80it/s]

Step 5876/10000, Loss: 0.3166
Step 5877/10000, Loss: 0.3362


Training Progress:  59%|████████████████████████████████▉                       | 5879/10000 [1:26:18<11:51,  5.79it/s]

Step 5878/10000, Loss: 0.3522
Step 5879/10000, Loss: 0.3233
Step 5880/10000, Loss: 0.2733


Training Progress:  59%|███████████████████████████████▊                      | 5880/10000 [1:26:33<5:20:40,  4.67s/it]


Checkpoint saved: checkpoints\best\checkpoint_step5880_loss0.2733_20250117_141822.pt

New best loss: 0.2733


Training Progress:  59%|███████████████████████████████▊                      | 5881/10000 [1:26:33<3:54:09,  3.41s/it]

Step 5881/10000, Loss: 0.3269
Step 5882/10000, Loss: 0.2733


Training Progress:  59%|███████████████████████████████▏                     | 5882/10000 [1:26:56<10:27:02,  9.14s/it]


Checkpoint saved: checkpoints\best\checkpoint_step5882_loss0.2733_20250117_141837.pt

New best loss: 0.2733


Training Progress:  59%|███████████████████████████████▊                      | 5884/10000 [1:26:56<5:17:42,  4.63s/it]

Step 5883/10000, Loss: 0.3185
Step 5884/10000, Loss: 0.3105


Training Progress:  59%|███████████████████████████████▊                      | 5885/10000 [1:26:57<3:45:56,  3.29s/it]

Step 5885/10000, Loss: 0.2904
Step 5886/10000, Loss: 0.2706


Training Progress:  59%|███████████████████████████████▏                     | 5886/10000 [1:27:18<10:02:18,  8.78s/it]


Checkpoint saved: checkpoints\best\checkpoint_step5886_loss0.2706_20250117_141901.pt

New best loss: 0.2706


Training Progress:  59%|███████████████████████████████▊                      | 5888/10000 [1:27:19<5:04:52,  4.45s/it]

Step 5887/10000, Loss: 0.3203
Step 5888/10000, Loss: 0.3079


Training Progress:  59%|███████████████████████████████▊                      | 5889/10000 [1:27:19<3:36:56,  3.17s/it]

Step 5889/10000, Loss: 0.3053
Step 5890/10000, Loss: 0.2671


Training Progress:  59%|███████████████████████████████▏                     | 5890/10000 [1:27:42<10:23:29,  9.10s/it]


Checkpoint saved: checkpoints\best\checkpoint_step5890_loss0.2671_20250117_141923.pt

New best loss: 0.2671
Step 5891/10000, Loss: 0.2625


Training Progress:  59%|███████████████████████████████▏                     | 5891/10000 [1:28:05<15:01:03, 13.16s/it]


Checkpoint saved: checkpoints\best\checkpoint_step5891_loss0.2625_20250117_141946.pt

New best loss: 0.2625


Training Progress:  59%|███████████████████████████████▊                      | 5893/10000 [1:28:05<7:31:39,  6.60s/it]

Step 5892/10000, Loss: 0.2888
Step 5893/10000, Loss: 0.2881


Training Progress:  59%|███████████████████████████████▊                      | 5895/10000 [1:28:06<3:47:15,  3.32s/it]

Step 5894/10000, Loss: 0.2825
Step 5895/10000, Loss: 0.2883


Training Progress:  59%|███████████████████████████████▊                      | 5896/10000 [1:28:06<2:42:18,  2.37s/it]

Step 5896/10000, Loss: 0.3068
Step 5897/10000, Loss: 0.2622


Training Progress:  59%|███████████████████████████████▊                      | 5897/10000 [1:28:26<8:56:24,  7.84s/it]


Checkpoint saved: checkpoints\best\checkpoint_step5897_loss0.2622_20250117_142010.pt

New best loss: 0.2622


Training Progress:  59%|███████████████████████████████▊                      | 5898/10000 [1:28:27<6:25:41,  5.64s/it]

Step 5898/10000, Loss: 0.2642
Step 5899/10000, Loss: 0.2511


Training Progress:  59%|███████████████████████████████▎                     | 5899/10000 [1:28:48<11:32:36, 10.13s/it]


Checkpoint saved: checkpoints\best\checkpoint_step5899_loss0.2511_20250117_142031.pt

New best loss: 0.2511
Step 5900/10000, Loss: 0.2492


Training Progress:  59%|███████████████████████████████▎                     | 5900/10000 [1:29:10<15:49:38, 13.90s/it]


Checkpoint saved: checkpoints\best\checkpoint_step5900_loss0.2492_20250117_142052.pt

New best loss: 0.2492


Training Progress:  59%|███████████████████████████████▊                      | 5902/10000 [1:29:11<7:55:17,  6.96s/it]

Step 5901/10000, Loss: 0.2612
Step 5902/10000, Loss: 0.2657


Training Progress:  59%|███████████████████████████████▉                      | 5904/10000 [1:29:11<3:58:54,  3.50s/it]

Step 5903/10000, Loss: 0.2795
Step 5904/10000, Loss: 0.2655


Training Progress:  59%|███████████████████████████████▉                      | 5906/10000 [1:29:12<2:02:55,  1.80s/it]

Step 5905/10000, Loss: 0.2871
Step 5906/10000, Loss: 0.2672


Training Progress:  59%|███████████████████████████████▉                      | 5908/10000 [1:29:12<1:05:55,  1.03it/s]

Step 5907/10000, Loss: 0.2960
Step 5908/10000, Loss: 0.2999


Training Progress:  59%|█████████████████████████████████                       | 5910/10000 [1:29:12<38:25,  1.77it/s]

Step 5909/10000, Loss: 0.2743
Step 5910/10000, Loss: 0.2656


Training Progress:  59%|█████████████████████████████████                       | 5912/10000 [1:29:13<24:44,  2.75it/s]

Step 5911/10000, Loss: 0.2753
Step 5912/10000, Loss: 0.2758


Training Progress:  59%|█████████████████████████████████                       | 5914/10000 [1:29:13<17:58,  3.79it/s]

Step 5913/10000, Loss: 0.2772
Step 5914/10000, Loss: 0.2759


Training Progress:  59%|█████████████████████████████████▏                      | 5916/10000 [1:29:13<14:48,  4.60it/s]

Step 5915/10000, Loss: 0.2625
Step 5916/10000, Loss: 0.2817


Training Progress:  59%|█████████████████████████████████▏                      | 5918/10000 [1:29:14<13:20,  5.10it/s]

Step 5917/10000, Loss: 0.3024
Step 5918/10000, Loss: 0.2864


Training Progress:  59%|█████████████████████████████████▏                      | 5920/10000 [1:29:14<12:29,  5.45it/s]

Step 5919/10000, Loss: 0.2642
Step 5920/10000, Loss: 0.3461


Training Progress:  59%|█████████████████████████████████▏                      | 5922/10000 [1:29:14<12:05,  5.62it/s]

Step 5921/10000, Loss: 0.3239
Step 5922/10000, Loss: 0.3240


Training Progress:  59%|█████████████████████████████████▏                      | 5924/10000 [1:29:15<11:46,  5.77it/s]

Step 5923/10000, Loss: 0.3031
Step 5924/10000, Loss: 0.2986


Training Progress:  59%|█████████████████████████████████▏                      | 5926/10000 [1:29:15<11:49,  5.74it/s]

Step 5925/10000, Loss: 0.2785
Step 5926/10000, Loss: 0.2993


Training Progress:  59%|█████████████████████████████████▏                      | 5928/10000 [1:29:15<11:28,  5.91it/s]

Step 5927/10000, Loss: 0.3071
Step 5928/10000, Loss: 0.3021


Training Progress:  59%|█████████████████████████████████▏                      | 5930/10000 [1:29:16<11:42,  5.79it/s]

Step 5929/10000, Loss: 0.3110
Step 5930/10000, Loss: 0.2853


Training Progress:  59%|█████████████████████████████████▏                      | 5932/10000 [1:29:16<11:38,  5.82it/s]

Step 5931/10000, Loss: 0.2954
Step 5932/10000, Loss: 0.3038


Training Progress:  59%|█████████████████████████████████▏                      | 5934/10000 [1:29:16<11:38,  5.82it/s]

Step 5933/10000, Loss: 0.2932
Step 5934/10000, Loss: 0.2773


Training Progress:  59%|█████████████████████████████████▏                      | 5936/10000 [1:29:17<11:38,  5.82it/s]

Step 5935/10000, Loss: 0.3030
Step 5936/10000, Loss: 0.3182


Training Progress:  59%|█████████████████████████████████▎                      | 5938/10000 [1:29:17<11:35,  5.84it/s]

Step 5937/10000, Loss: 0.2841
Step 5938/10000, Loss: 0.3042


Training Progress:  59%|█████████████████████████████████▎                      | 5940/10000 [1:29:17<11:22,  5.95it/s]

Step 5939/10000, Loss: 0.2956
Step 5940/10000, Loss: 0.2643


Training Progress:  59%|█████████████████████████████████▎                      | 5942/10000 [1:29:18<11:40,  5.79it/s]

Step 5941/10000, Loss: 0.2708
Step 5942/10000, Loss: 0.2637


Training Progress:  59%|█████████████████████████████████▎                      | 5944/10000 [1:29:18<11:34,  5.84it/s]

Step 5943/10000, Loss: 0.2922
Step 5944/10000, Loss: 0.2737


Training Progress:  59%|█████████████████████████████████▎                      | 5946/10000 [1:29:18<11:25,  5.91it/s]

Step 5945/10000, Loss: 0.2710
Step 5946/10000, Loss: 0.2794


Training Progress:  59%|█████████████████████████████████▎                      | 5948/10000 [1:29:19<11:35,  5.82it/s]

Step 5947/10000, Loss: 0.2843
Step 5948/10000, Loss: 0.2881


Training Progress:  60%|█████████████████████████████████▎                      | 5950/10000 [1:29:19<11:43,  5.76it/s]

Step 5949/10000, Loss: 0.2918
Step 5950/10000, Loss: 0.2711


Training Progress:  60%|█████████████████████████████████▎                      | 5952/10000 [1:29:19<11:38,  5.79it/s]

Step 5951/10000, Loss: 0.2816
Step 5952/10000, Loss: 0.2711


Training Progress:  60%|█████████████████████████████████▎                      | 5954/10000 [1:29:20<11:43,  5.75it/s]

Step 5953/10000, Loss: 0.2827
Step 5954/10000, Loss: 0.2764


Training Progress:  60%|█████████████████████████████████▎                      | 5956/10000 [1:29:20<11:20,  5.94it/s]

Step 5955/10000, Loss: 0.2680
Step 5956/10000, Loss: 0.2915


Training Progress:  60%|█████████████████████████████████▎                      | 5958/10000 [1:29:20<11:24,  5.90it/s]

Step 5957/10000, Loss: 0.2957
Step 5958/10000, Loss: 0.2688


Training Progress:  60%|█████████████████████████████████▍                      | 5960/10000 [1:29:21<11:38,  5.78it/s]

Step 5959/10000, Loss: 0.2695
Step 5960/10000, Loss: 0.2759


Training Progress:  60%|█████████████████████████████████▍                      | 5961/10000 [1:29:21<11:40,  5.76it/s]

Step 5961/10000, Loss: 0.2644
Step 5962/10000, Loss: 0.2424


Training Progress:  60%|████████████████████████████████▏                     | 5962/10000 [1:29:37<5:29:33,  4.90s/it]


Checkpoint saved: checkpoints\best\checkpoint_step5962_loss0.2424_20250117_142125.pt

New best loss: 0.2424


Training Progress:  60%|████████████████████████████████▏                     | 5964/10000 [1:29:37<2:50:33,  2.54s/it]

Step 5963/10000, Loss: 0.2918
Step 5964/10000, Loss: 0.2495


Training Progress:  60%|████████████████████████████████▏                     | 5966/10000 [1:29:38<1:29:21,  1.33s/it]

Step 5965/10000, Loss: 0.2843
Step 5966/10000, Loss: 0.2674


Training Progress:  60%|████████████████████████████████▏                     | 5967/10000 [1:29:38<1:05:56,  1.02it/s]

Step 5967/10000, Loss: 0.2590
Step 5968/10000, Loss: 0.2371


Training Progress:  60%|████████████████████████████████▏                     | 5968/10000 [1:29:59<7:41:40,  6.87s/it]


Checkpoint saved: checkpoints\best\checkpoint_step5968_loss0.2371_20250117_142142.pt

New best loss: 0.2371


Training Progress:  60%|████████████████████████████████▏                     | 5970/10000 [1:29:59<3:56:29,  3.52s/it]

Step 5969/10000, Loss: 0.2561
Step 5970/10000, Loss: 0.2513


Training Progress:  60%|████████████████████████████████▏                     | 5972/10000 [1:30:00<2:01:43,  1.81s/it]

Step 5971/10000, Loss: 0.2549
Step 5972/10000, Loss: 0.2439


Training Progress:  60%|████████████████████████████████▎                     | 5974/10000 [1:30:00<1:05:38,  1.02it/s]

Step 5973/10000, Loss: 0.2594
Step 5974/10000, Loss: 0.2734


Training Progress:  60%|█████████████████████████████████▍                      | 5976/10000 [1:30:00<37:57,  1.77it/s]

Step 5975/10000, Loss: 0.2678
Step 5976/10000, Loss: 0.2488


Training Progress:  60%|█████████████████████████████████▍                      | 5978/10000 [1:30:01<24:35,  2.73it/s]

Step 5977/10000, Loss: 0.2686
Step 5978/10000, Loss: 0.2828
Step 5979/10000, Loss: 0.2300


Training Progress:  60%|████████████████████████████████▎                     | 5979/10000 [1:30:21<7:04:44,  6.34s/it]


Checkpoint saved: checkpoints\best\checkpoint_step5979_loss0.2300_20250117_142205.pt

New best loss: 0.2300


Training Progress:  60%|████████████████████████████████▎                     | 5980/10000 [1:30:21<5:04:59,  4.55s/it]

Step 5980/10000, Loss: 0.2330
Step 5981/10000, Loss: 0.2101


Training Progress:  60%|███████████████████████████████▋                     | 5981/10000 [1:30:43<10:43:37,  9.61s/it]


Checkpoint saved: checkpoints\best\checkpoint_step5981_loss0.2101_20250117_142225.pt

New best loss: 0.2101


Training Progress:  60%|████████████████████████████████▎                     | 5983/10000 [1:30:43<5:24:02,  4.84s/it]

Step 5982/10000, Loss: 0.2196
Step 5983/10000, Loss: 0.2249


Training Progress:  60%|████████████████████████████████▎                     | 5985/10000 [1:30:44<2:44:34,  2.46s/it]

Step 5984/10000, Loss: 0.2267
Step 5985/10000, Loss: 0.2339


Training Progress:  60%|████████████████████████████████▎                     | 5987/10000 [1:30:44<1:26:35,  1.29s/it]

Step 5986/10000, Loss: 0.2317
Step 5987/10000, Loss: 0.2460


Training Progress:  60%|█████████████████████████████████▌                      | 5989/10000 [1:30:44<48:10,  1.39it/s]

Step 5988/10000, Loss: 0.2253
Step 5989/10000, Loss: 0.2521


Training Progress:  60%|█████████████████████████████████▌                      | 5991/10000 [1:30:45<29:33,  2.26it/s]

Step 5990/10000, Loss: 0.2358
Step 5991/10000, Loss: 0.2262


Training Progress:  60%|█████████████████████████████████▌                      | 5993/10000 [1:30:45<20:11,  3.31it/s]

Step 5992/10000, Loss: 0.2307
Step 5993/10000, Loss: 0.2352


Training Progress:  60%|█████████████████████████████████▌                      | 5995/10000 [1:30:45<15:52,  4.20it/s]

Step 5994/10000, Loss: 0.2308
Step 5995/10000, Loss: 0.2316


Training Progress:  60%|█████████████████████████████████▌                      | 5997/10000 [1:30:46<13:26,  4.96it/s]

Step 5996/10000, Loss: 0.2332
Step 5997/10000, Loss: 0.2471


Training Progress:  60%|█████████████████████████████████▌                      | 5999/10000 [1:30:46<12:32,  5.31it/s]

Step 5998/10000, Loss: 0.2654
Step 5999/10000, Loss: 0.2634
Step 6000/10000, Loss: 0.2526


Training Progress:  60%|████████████████████████████████▍                     | 6000/10000 [1:31:06<6:40:40,  6.01s/it]


Checkpoint saved: checkpoints\checkpoint_step6000_loss0.2526_20250117_142250.pt


Training Progress:  60%|████████████████████████████████▍                     | 6002/10000 [1:31:06<3:26:24,  3.10s/it]

Step 6001/10000, Loss: 0.2173
Step 6002/10000, Loss: 0.2667


Training Progress:  60%|████████████████████████████████▍                     | 6004/10000 [1:31:07<1:46:46,  1.60s/it]

Step 6003/10000, Loss: 0.2407
Step 6004/10000, Loss: 0.2576


Training Progress:  60%|█████████████████████████████████▋                      | 6006/10000 [1:31:07<58:17,  1.14it/s]

Step 6005/10000, Loss: 0.2382
Step 6006/10000, Loss: 0.2466


Training Progress:  60%|█████████████████████████████████▋                      | 6008/10000 [1:31:07<34:15,  1.94it/s]

Step 6007/10000, Loss: 0.2411
Step 6008/10000, Loss: 0.2380


Training Progress:  60%|█████████████████████████████████▋                      | 6010/10000 [1:31:08<22:42,  2.93it/s]

Step 6009/10000, Loss: 0.2617
Step 6010/10000, Loss: 0.2533


Training Progress:  60%|█████████████████████████████████▋                      | 6012/10000 [1:31:08<16:46,  3.96it/s]

Step 6011/10000, Loss: 0.2516
Step 6012/10000, Loss: 0.2521


Training Progress:  60%|█████████████████████████████████▋                      | 6014/10000 [1:31:08<14:12,  4.68it/s]

Step 6013/10000, Loss: 0.2739
Step 6014/10000, Loss: 0.2532


Training Progress:  60%|█████████████████████████████████▋                      | 6016/10000 [1:31:09<12:38,  5.26it/s]

Step 6015/10000, Loss: 0.2467
Step 6016/10000, Loss: 0.2408


Training Progress:  60%|█████████████████████████████████▋                      | 6018/10000 [1:31:09<12:00,  5.52it/s]

Step 6017/10000, Loss: 0.2552
Step 6018/10000, Loss: 0.2488


Training Progress:  60%|█████████████████████████████████▋                      | 6020/10000 [1:31:09<11:31,  5.76it/s]

Step 6019/10000, Loss: 0.2490
Step 6020/10000, Loss: 0.2414


Training Progress:  60%|█████████████████████████████████▋                      | 6022/10000 [1:31:10<11:34,  5.73it/s]

Step 6021/10000, Loss: 0.2584
Step 6022/10000, Loss: 0.2438


Training Progress:  60%|█████████████████████████████████▋                      | 6024/10000 [1:31:10<11:17,  5.87it/s]

Step 6023/10000, Loss: 0.2329
Step 6024/10000, Loss: 0.2451


Training Progress:  60%|█████████████████████████████████▋                      | 6026/10000 [1:31:10<11:22,  5.82it/s]

Step 6025/10000, Loss: 0.2612
Step 6026/10000, Loss: 0.2512


Training Progress:  60%|█████████████████████████████████▊                      | 6028/10000 [1:31:11<11:30,  5.75it/s]

Step 6027/10000, Loss: 0.2284
Step 6028/10000, Loss: 0.2373


Training Progress:  60%|█████████████████████████████████▊                      | 6030/10000 [1:31:11<11:21,  5.83it/s]

Step 6029/10000, Loss: 0.2491
Step 6030/10000, Loss: 0.2243


Training Progress:  60%|█████████████████████████████████▊                      | 6032/10000 [1:31:11<11:25,  5.79it/s]

Step 6031/10000, Loss: 0.2494
Step 6032/10000, Loss: 0.2194


Training Progress:  60%|█████████████████████████████████▊                      | 6034/10000 [1:31:12<11:13,  5.89it/s]

Step 6033/10000, Loss: 0.2281
Step 6034/10000, Loss: 0.2479


Training Progress:  60%|█████████████████████████████████▊                      | 6036/10000 [1:31:12<11:25,  5.78it/s]

Step 6035/10000, Loss: 0.2590
Step 6036/10000, Loss: 0.2420


Training Progress:  60%|█████████████████████████████████▊                      | 6038/10000 [1:31:12<11:25,  5.78it/s]

Step 6037/10000, Loss: 0.2499
Step 6038/10000, Loss: 0.2451


Training Progress:  60%|█████████████████████████████████▊                      | 6040/10000 [1:31:13<11:15,  5.86it/s]

Step 6039/10000, Loss: 0.2548
Step 6040/10000, Loss: 0.2418


Training Progress:  60%|█████████████████████████████████▊                      | 6042/10000 [1:31:13<11:26,  5.77it/s]

Step 6041/10000, Loss: 0.2451
Step 6042/10000, Loss: 0.2535


Training Progress:  60%|█████████████████████████████████▊                      | 6044/10000 [1:31:13<11:17,  5.84it/s]

Step 6043/10000, Loss: 0.2291
Step 6044/10000, Loss: 0.2226


Training Progress:  60%|█████████████████████████████████▊                      | 6045/10000 [1:31:14<11:09,  5.91it/s]

Step 6045/10000, Loss: 0.2329
Step 6046/10000, Loss: 0.1957


Training Progress:  60%|████████████████████████████████▋                     | 6046/10000 [1:31:29<5:03:25,  4.60s/it]


Checkpoint saved: checkpoints\best\checkpoint_step6046_loss0.1957_20250117_142318.pt

New best loss: 0.1957


Training Progress:  60%|████████████████████████████████▋                     | 6048/10000 [1:31:29<2:39:02,  2.41s/it]

Step 6047/10000, Loss: 0.2176
Step 6048/10000, Loss: 0.2374


Training Progress:  60%|████████████████████████████████▋                     | 6050/10000 [1:31:30<1:23:31,  1.27s/it]

Step 6049/10000, Loss: 0.2257
Step 6050/10000, Loss: 0.2148


Training Progress:  61%|█████████████████████████████████▉                      | 6052/10000 [1:31:30<46:44,  1.41it/s]

Step 6051/10000, Loss: 0.2427
Step 6052/10000, Loss: 0.2203


Training Progress:  61%|█████████████████████████████████▉                      | 6054/10000 [1:31:30<28:37,  2.30it/s]

Step 6053/10000, Loss: 0.2384
Step 6054/10000, Loss: 0.1996
Step 6055/10000, Loss: 0.1938


Training Progress:  61%|████████████████████████████████▋                     | 6055/10000 [1:31:53<7:51:39,  7.17s/it]


Checkpoint saved: checkpoints\best\checkpoint_step6055_loss0.1938_20250117_142334.pt

New best loss: 0.1938


Training Progress:  61%|████████████████████████████████▋                     | 6057/10000 [1:31:54<4:01:19,  3.67s/it]

Step 6056/10000, Loss: 0.2141
Step 6057/10000, Loss: 0.2147


Training Progress:  61%|████████████████████████████████▋                     | 6059/10000 [1:31:54<2:03:53,  1.89s/it]

Step 6058/10000, Loss: 0.2130
Step 6059/10000, Loss: 0.2247


Training Progress:  61%|████████████████████████████████▋                     | 6061/10000 [1:31:54<1:06:23,  1.01s/it]

Step 6060/10000, Loss: 0.2396
Step 6061/10000, Loss: 0.2166


Training Progress:  61%|█████████████████████████████████▉                      | 6062/10000 [1:31:55<49:49,  1.32it/s]

Step 6062/10000, Loss: 0.2043
Step 6063/10000, Loss: 0.1855


Training Progress:  61%|████████████████████████████████▋                     | 6063/10000 [1:32:15<7:22:42,  6.75s/it]


Checkpoint saved: checkpoints\best\checkpoint_step6063_loss0.1855_20250117_142359.pt

New best loss: 0.1855


Training Progress:  61%|████████████████████████████████▊                     | 6065/10000 [1:32:16<3:46:22,  3.45s/it]

Step 6064/10000, Loss: 0.1898
Step 6065/10000, Loss: 0.1935


Training Progress:  61%|████████████████████████████████▊                     | 6067/10000 [1:32:16<1:56:25,  1.78s/it]

Step 6066/10000, Loss: 0.1987
Step 6067/10000, Loss: 0.1882


Training Progress:  61%|████████████████████████████████▊                     | 6069/10000 [1:32:17<1:02:48,  1.04it/s]

Step 6068/10000, Loss: 0.1886
Step 6069/10000, Loss: 0.2119
Step 6070/10000, Loss: 0.1832


Training Progress:  61%|████████████████████████████████▊                     | 6070/10000 [1:32:37<7:28:55,  6.85s/it]


Checkpoint saved: checkpoints\best\checkpoint_step6070_loss0.1832_20250117_142421.pt

New best loss: 0.1832


Training Progress:  61%|████████████████████████████████▊                     | 6071/10000 [1:32:38<5:20:44,  4.90s/it]

Step 6071/10000, Loss: 0.2045
Step 6072/10000, Loss: 0.1828


Training Progress:  61%|████████████████████████████████▏                    | 6072/10000 [1:33:00<11:10:05, 10.24s/it]


Checkpoint saved: checkpoints\best\checkpoint_step6072_loss0.1828_20250117_142442.pt

New best loss: 0.1828


Training Progress:  61%|████████████████████████████████▊                     | 6073/10000 [1:33:01<7:58:38,  7.31s/it]

Step 6073/10000, Loss: 0.2022
Step 6074/10000, Loss: 0.1796


Training Progress:  61%|████████████████████████████████▏                    | 6074/10000 [1:33:24<13:15:20, 12.15s/it]


Checkpoint saved: checkpoints\best\checkpoint_step6074_loss0.1796_20250117_142505.pt

New best loss: 0.1796


Training Progress:  61%|████████████████████████████████▊                     | 6076/10000 [1:33:25<6:37:33,  6.08s/it]

Step 6075/10000, Loss: 0.2014
Step 6076/10000, Loss: 0.2179


Training Progress:  61%|████████████████████████████████▊                     | 6078/10000 [1:33:25<3:20:28,  3.07s/it]

Step 6077/10000, Loss: 0.1991
Step 6078/10000, Loss: 0.1970


Training Progress:  61%|████████████████████████████████▊                     | 6080/10000 [1:33:25<1:43:43,  1.59s/it]

Step 6079/10000, Loss: 0.2196
Step 6080/10000, Loss: 0.2363


Training Progress:  61%|██████████████████████████████████                      | 6082/10000 [1:33:26<56:39,  1.15it/s]

Step 6081/10000, Loss: 0.2222
Step 6082/10000, Loss: 0.2289


Training Progress:  61%|██████████████████████████████████                      | 6084/10000 [1:33:26<33:19,  1.96it/s]

Step 6083/10000, Loss: 0.1839
Step 6084/10000, Loss: 0.2123


Training Progress:  61%|██████████████████████████████████                      | 6086/10000 [1:33:27<22:00,  2.96it/s]

Step 6085/10000, Loss: 0.1962
Step 6086/10000, Loss: 0.2200


Training Progress:  61%|██████████████████████████████████                      | 6088/10000 [1:33:27<16:39,  3.91it/s]

Step 6087/10000, Loss: 0.1994
Step 6088/10000, Loss: 0.2293


Training Progress:  61%|██████████████████████████████████                      | 6090/10000 [1:33:27<13:40,  4.77it/s]

Step 6089/10000, Loss: 0.2023
Step 6090/10000, Loss: 0.2109


Training Progress:  61%|██████████████████████████████████                      | 6092/10000 [1:33:28<12:32,  5.19it/s]

Step 6091/10000, Loss: 0.2368
Step 6092/10000, Loss: 0.2321


Training Progress:  61%|██████████████████████████████████▏                     | 6094/10000 [1:33:28<11:58,  5.43it/s]

Step 6093/10000, Loss: 0.2142
Step 6094/10000, Loss: 0.2041


Training Progress:  61%|██████████████████████████████████▏                     | 6096/10000 [1:33:28<11:32,  5.64it/s]

Step 6095/10000, Loss: 0.2342
Step 6096/10000, Loss: 0.2065


Training Progress:  61%|██████████████████████████████████▏                     | 6098/10000 [1:33:29<11:23,  5.71it/s]

Step 6097/10000, Loss: 0.2000
Step 6098/10000, Loss: 0.1872


Training Progress:  61%|██████████████████████████████████▏                     | 6100/10000 [1:33:29<11:06,  5.85it/s]

Step 6099/10000, Loss: 0.2108
Step 6100/10000, Loss: 0.2114


Training Progress:  61%|██████████████████████████████████▏                     | 6102/10000 [1:33:29<11:18,  5.74it/s]

Step 6101/10000, Loss: 0.2084
Step 6102/10000, Loss: 0.2252


Training Progress:  61%|██████████████████████████████████▏                     | 6104/10000 [1:33:30<10:59,  5.91it/s]

Step 6103/10000, Loss: 0.2299
Step 6104/10000, Loss: 0.2146


Training Progress:  61%|██████████████████████████████████▏                     | 6106/10000 [1:33:30<11:04,  5.86it/s]

Step 6105/10000, Loss: 0.2051
Step 6106/10000, Loss: 0.2085


Training Progress:  61%|██████████████████████████████████▏                     | 6108/10000 [1:33:30<11:00,  5.89it/s]

Step 6107/10000, Loss: 0.2178
Step 6108/10000, Loss: 0.2131


Training Progress:  61%|██████████████████████████████████▏                     | 6110/10000 [1:33:31<11:16,  5.75it/s]

Step 6109/10000, Loss: 0.1988
Step 6110/10000, Loss: 0.2030


Training Progress:  61%|██████████████████████████████████▏                     | 6112/10000 [1:33:31<11:12,  5.78it/s]

Step 6111/10000, Loss: 0.2404
Step 6112/10000, Loss: 0.2051


Training Progress:  61%|██████████████████████████████████▏                     | 6114/10000 [1:33:31<11:02,  5.87it/s]

Step 6113/10000, Loss: 0.2055
Step 6114/10000, Loss: 0.1806


Training Progress:  61%|██████████████████████████████████▏                     | 6116/10000 [1:33:32<11:06,  5.83it/s]

Step 6115/10000, Loss: 0.1975
Step 6116/10000, Loss: 0.2050


Training Progress:  61%|██████████████████████████████████▎                     | 6118/10000 [1:33:32<10:53,  5.94it/s]

Step 6117/10000, Loss: 0.2103
Step 6118/10000, Loss: 0.2195


Training Progress:  61%|██████████████████████████████████▎                     | 6120/10000 [1:33:32<11:00,  5.87it/s]

Step 6119/10000, Loss: 0.2123
Step 6120/10000, Loss: 0.1930


Training Progress:  61%|██████████████████████████████████▎                     | 6122/10000 [1:33:33<11:12,  5.77it/s]

Step 6121/10000, Loss: 0.2341
Step 6122/10000, Loss: 0.2141


Training Progress:  61%|██████████████████████████████████▎                     | 6124/10000 [1:33:33<11:09,  5.79it/s]

Step 6123/10000, Loss: 0.2230
Step 6124/10000, Loss: 0.2570


Training Progress:  61%|██████████████████████████████████▎                     | 6126/10000 [1:33:33<11:07,  5.81it/s]

Step 6125/10000, Loss: 0.2173
Step 6126/10000, Loss: 0.1976


Training Progress:  61%|██████████████████████████████████▎                     | 6127/10000 [1:33:34<11:02,  5.84it/s]

Step 6127/10000, Loss: 0.2232
Step 6128/10000, Loss: 0.1773


Training Progress:  61%|█████████████████████████████████                     | 6128/10000 [1:33:51<5:55:11,  5.50s/it]


Checkpoint saved: checkpoints\best\checkpoint_step6128_loss0.1773_20250117_142538.pt

New best loss: 0.1773


Training Progress:  61%|█████████████████████████████████                     | 6130/10000 [1:33:52<3:04:16,  2.86s/it]

Step 6129/10000, Loss: 0.1999
Step 6130/10000, Loss: 0.2024


Training Progress:  61%|█████████████████████████████████                     | 6132/10000 [1:33:53<1:35:53,  1.49s/it]

Step 6131/10000, Loss: 0.1909
Step 6132/10000, Loss: 0.1931


Training Progress:  61%|██████████████████████████████████▎                     | 6134/10000 [1:33:53<52:34,  1.23it/s]

Step 6133/10000, Loss: 0.2059
Step 6134/10000, Loss: 0.2028


Training Progress:  61%|██████████████████████████████████▎                     | 6136/10000 [1:33:53<31:23,  2.05it/s]

Step 6135/10000, Loss: 0.2088
Step 6136/10000, Loss: 0.1851


Training Progress:  61%|██████████████████████████████████▎                     | 6138/10000 [1:33:54<20:59,  3.07it/s]

Step 6137/10000, Loss: 0.1780
Step 6138/10000, Loss: 0.2025


Training Progress:  61%|██████████████████████████████████▍                     | 6140/10000 [1:33:54<15:53,  4.05it/s]

Step 6139/10000, Loss: 0.1837
Step 6140/10000, Loss: 0.1826


Training Progress:  61%|██████████████████████████████████▍                     | 6142/10000 [1:33:54<13:24,  4.80it/s]

Step 6141/10000, Loss: 0.2030
Step 6142/10000, Loss: 0.1993
Step 6143/10000, Loss: 0.1727


Training Progress:  61%|█████████████████████████████████▏                    | 6143/10000 [1:34:13<6:07:20,  5.71s/it]


Checkpoint saved: checkpoints\best\checkpoint_step6143_loss0.1727_20250117_142558.pt

New best loss: 0.1727


Training Progress:  61%|█████████████████████████████████▏                    | 6144/10000 [1:34:13<4:25:05,  4.12s/it]

Step 6144/10000, Loss: 0.1827
Step 6145/10000, Loss: 0.1690


Training Progress:  61%|█████████████████████████████████▏                    | 6145/10000 [1:34:33<9:36:04,  8.97s/it]


Checkpoint saved: checkpoints\best\checkpoint_step6145_loss0.1690_20250117_142617.pt

New best loss: 0.1690


Training Progress:  61%|█████████████████████████████████▏                    | 6147/10000 [1:34:34<4:52:29,  4.55s/it]

Step 6146/10000, Loss: 0.1715
Step 6147/10000, Loss: 0.1715


Training Progress:  61%|█████████████████████████████████▏                    | 6149/10000 [1:34:35<2:28:59,  2.32s/it]

Step 6148/10000, Loss: 0.1741
Step 6149/10000, Loss: 0.1779


Training Progress:  62%|█████████████████████████████████▏                    | 6151/10000 [1:34:35<1:18:25,  1.22s/it]

Step 6150/10000, Loss: 0.1843
Step 6151/10000, Loss: 0.1741
Step 6152/10000, Loss: 0.1683


Training Progress:  62%|█████████████████████████████████▏                    | 6152/10000 [1:34:57<8:03:15,  7.54s/it]


Checkpoint saved: checkpoints\best\checkpoint_step6152_loss0.1683_20250117_142639.pt

New best loss: 0.1683


Training Progress:  62%|█████████████████████████████████▏                    | 6153/10000 [1:34:58<5:47:16,  5.42s/it]

Step 6153/10000, Loss: 0.1748
Step 6154/10000, Loss: 0.1641


Training Progress:  62%|████████████████████████████████▌                    | 6154/10000 [1:35:20<11:17:21, 10.57s/it]


Checkpoint saved: checkpoints\best\checkpoint_step6154_loss0.1641_20250117_142702.pt

New best loss: 0.1641


Training Progress:  62%|█████████████████████████████████▏                    | 6155/10000 [1:35:21<8:01:53,  7.52s/it]

Step 6155/10000, Loss: 0.1777
Step 6156/10000, Loss: 0.1619


Training Progress:  62%|████████████████████████████████▋                    | 6156/10000 [1:35:41<12:13:43, 11.45s/it]


Checkpoint saved: checkpoints\best\checkpoint_step6156_loss0.1619_20250117_142725.pt

New best loss: 0.1619


Training Progress:  62%|█████████████████████████████████▎                    | 6158/10000 [1:35:42<6:07:27,  5.74s/it]

Step 6157/10000, Loss: 0.1699
Step 6158/10000, Loss: 0.1927


Training Progress:  62%|█████████████████████████████████▎                    | 6159/10000 [1:35:42<4:20:39,  4.07s/it]

Step 6159/10000, Loss: 0.1665
Step 6160/10000, Loss: 0.1615


Training Progress:  62%|█████████████████████████████████▎                    | 6160/10000 [1:36:03<9:51:58,  9.25s/it]


Checkpoint saved: checkpoints\best\checkpoint_step6160_loss0.1615_20250117_142746.pt

New best loss: 0.1615


Training Progress:  62%|█████████████████████████████████▎                    | 6162/10000 [1:36:04<5:00:14,  4.69s/it]

Step 6161/10000, Loss: 0.1835
Step 6162/10000, Loss: 0.2006


Training Progress:  62%|█████████████████████████████████▎                    | 6164/10000 [1:36:04<2:32:33,  2.39s/it]

Step 6163/10000, Loss: 0.1897
Step 6164/10000, Loss: 0.1909


Training Progress:  62%|█████████████████████████████████▎                    | 6166/10000 [1:36:05<1:20:21,  1.26s/it]

Step 6165/10000, Loss: 0.1705
Step 6166/10000, Loss: 0.1979


Training Progress:  62%|██████████████████████████████████▌                     | 6168/10000 [1:36:05<44:48,  1.43it/s]

Step 6167/10000, Loss: 0.1627
Step 6168/10000, Loss: 0.1769


Training Progress:  62%|██████████████████████████████████▌                     | 6170/10000 [1:36:05<27:40,  2.31it/s]

Step 6169/10000, Loss: 0.1687
Step 6170/10000, Loss: 0.1729
Step 6171/10000, Loss: 0.1604


Training Progress:  62%|█████████████████████████████████▎                    | 6171/10000 [1:36:26<6:53:20,  6.48s/it]


Checkpoint saved: checkpoints\best\checkpoint_step6171_loss0.1604_20250117_142809.pt

New best loss: 0.1604


Training Progress:  62%|█████████████████████████████████▎                    | 6173/10000 [1:36:27<3:32:05,  3.33s/it]

Step 6172/10000, Loss: 0.1786
Step 6173/10000, Loss: 0.1810


Training Progress:  62%|█████████████████████████████████▎                    | 6174/10000 [1:36:27<2:31:39,  2.38s/it]

Step 6174/10000, Loss: 0.1792
Step 6175/10000, Loss: 0.1602


Training Progress:  62%|█████████████████████████████████▎                    | 6175/10000 [1:36:49<8:56:44,  8.42s/it]


Checkpoint saved: checkpoints\best\checkpoint_step6175_loss0.1602_20250117_142831.pt

New best loss: 0.1602


Training Progress:  62%|█████████████████████████████████▎                    | 6177/10000 [1:36:50<4:30:40,  4.25s/it]

Step 6176/10000, Loss: 0.1689
Step 6177/10000, Loss: 0.1851


Training Progress:  62%|█████████████████████████████████▎                    | 6179/10000 [1:36:50<2:18:11,  2.17s/it]

Step 6178/10000, Loss: 0.1947
Step 6179/10000, Loss: 0.1786


Training Progress:  62%|█████████████████████████████████▍                    | 6181/10000 [1:36:50<1:13:11,  1.15s/it]

Step 6180/10000, Loss: 0.1682
Step 6181/10000, Loss: 0.1811


Training Progress:  62%|██████████████████████████████████▌                     | 6183/10000 [1:36:51<41:16,  1.54it/s]

Step 6182/10000, Loss: 0.1766
Step 6183/10000, Loss: 0.1672


Training Progress:  62%|██████████████████████████████████▋                     | 6185/10000 [1:36:51<25:54,  2.45it/s]

Step 6184/10000, Loss: 0.1643
Step 6185/10000, Loss: 0.1759
Step 6186/10000, Loss: 0.1484


Training Progress:  62%|█████████████████████████████████▍                    | 6186/10000 [1:37:12<7:01:23,  6.63s/it]


Checkpoint saved: checkpoints\best\checkpoint_step6186_loss0.1484_20250117_142855.pt

New best loss: 0.1484


Training Progress:  62%|█████████████████████████████████▍                    | 6188/10000 [1:37:13<3:36:25,  3.41s/it]

Step 6187/10000, Loss: 0.1608
Step 6188/10000, Loss: 0.1751


Training Progress:  62%|█████████████████████████████████▍                    | 6190/10000 [1:37:13<1:51:35,  1.76s/it]

Step 6189/10000, Loss: 0.1825
Step 6190/10000, Loss: 0.1914


Training Progress:  62%|█████████████████████████████████▍                    | 6192/10000 [1:37:14<1:00:11,  1.05it/s]

Step 6191/10000, Loss: 0.1790
Step 6192/10000, Loss: 0.1891


Training Progress:  62%|██████████████████████████████████▋                     | 6194/10000 [1:37:14<35:00,  1.81it/s]

Step 6193/10000, Loss: 0.2016
Step 6194/10000, Loss: 0.1869


Training Progress:  62%|██████████████████████████████████▋                     | 6196/10000 [1:37:14<22:34,  2.81it/s]

Step 6195/10000, Loss: 0.1935
Step 6196/10000, Loss: 0.1728


Training Progress:  62%|██████████████████████████████████▋                     | 6198/10000 [1:37:15<16:40,  3.80it/s]

Step 6197/10000, Loss: 0.1792
Step 6198/10000, Loss: 0.1553


Training Progress:  62%|██████████████████████████████████▋                     | 6200/10000 [1:37:15<13:42,  4.62it/s]

Step 6199/10000, Loss: 0.1618
Step 6200/10000, Loss: 0.1687


Training Progress:  62%|██████████████████████████████████▋                     | 6202/10000 [1:37:15<12:16,  5.16it/s]

Step 6201/10000, Loss: 0.1677
Step 6202/10000, Loss: 0.1490


Training Progress:  62%|██████████████████████████████████▋                     | 6204/10000 [1:37:16<11:32,  5.48it/s]

Step 6203/10000, Loss: 0.1828
Step 6204/10000, Loss: 0.1818


Training Progress:  62%|██████████████████████████████████▊                     | 6206/10000 [1:37:16<11:09,  5.67it/s]

Step 6205/10000, Loss: 0.1877
Step 6206/10000, Loss: 0.2260


Training Progress:  62%|██████████████████████████████████▊                     | 6208/10000 [1:37:16<10:50,  5.83it/s]

Step 6207/10000, Loss: 0.1869
Step 6208/10000, Loss: 0.1574


Training Progress:  62%|██████████████████████████████████▊                     | 6210/10000 [1:37:17<10:53,  5.80it/s]

Step 6209/10000, Loss: 0.1920
Step 6210/10000, Loss: 0.1740


Training Progress:  62%|██████████████████████████████████▊                     | 6212/10000 [1:37:17<11:00,  5.73it/s]

Step 6211/10000, Loss: 0.1895
Step 6212/10000, Loss: 0.1757


Training Progress:  62%|██████████████████████████████████▊                     | 6214/10000 [1:37:17<10:44,  5.87it/s]

Step 6213/10000, Loss: 0.1799
Step 6214/10000, Loss: 0.1528


Training Progress:  62%|██████████████████████████████████▊                     | 6216/10000 [1:37:18<10:54,  5.78it/s]

Step 6215/10000, Loss: 0.1907
Step 6216/10000, Loss: 0.1736


Training Progress:  62%|██████████████████████████████████▊                     | 6218/10000 [1:37:18<10:43,  5.88it/s]

Step 6217/10000, Loss: 0.1730
Step 6218/10000, Loss: 0.1525


Training Progress:  62%|██████████████████████████████████▊                     | 6220/10000 [1:37:18<10:52,  5.79it/s]

Step 6219/10000, Loss: 0.1686
Step 6220/10000, Loss: 0.1788


Training Progress:  62%|██████████████████████████████████▊                     | 6222/10000 [1:37:19<10:41,  5.88it/s]

Step 6221/10000, Loss: 0.1567
Step 6222/10000, Loss: 0.1684


Training Progress:  62%|██████████████████████████████████▊                     | 6224/10000 [1:37:19<10:48,  5.82it/s]

Step 6223/10000, Loss: 0.1862
Step 6224/10000, Loss: 0.1787


Training Progress:  62%|██████████████████████████████████▊                     | 6226/10000 [1:37:19<10:54,  5.77it/s]

Step 6225/10000, Loss: 0.1561
Step 6226/10000, Loss: 0.1572


Training Progress:  62%|██████████████████████████████████▉                     | 6228/10000 [1:37:20<10:42,  5.87it/s]

Step 6227/10000, Loss: 0.1582
Step 6228/10000, Loss: 0.1670


Training Progress:  62%|██████████████████████████████████▉                     | 6230/10000 [1:37:20<10:52,  5.78it/s]

Step 6229/10000, Loss: 0.1609
Step 6230/10000, Loss: 0.1650


Training Progress:  62%|██████████████████████████████████▉                     | 6232/10000 [1:37:20<10:39,  5.89it/s]

Step 6231/10000, Loss: 0.1665
Step 6232/10000, Loss: 0.1633


Training Progress:  62%|██████████████████████████████████▉                     | 6234/10000 [1:37:21<10:46,  5.82it/s]

Step 6233/10000, Loss: 0.1583
Step 6234/10000, Loss: 0.1513
Step 6235/10000, Loss: 0.1470


Training Progress:  62%|█████████████████████████████████▋                    | 6235/10000 [1:37:39<5:55:03,  5.66s/it]


Checkpoint saved: checkpoints\best\checkpoint_step6235_loss0.1470_20250117_142925.pt

New best loss: 0.1470
Step 6236/10000, Loss: 0.1437


Training Progress:  62%|█████████████████████████████████                    | 6236/10000 [1:38:01<10:51:18, 10.38s/it]


Checkpoint saved: checkpoints\best\checkpoint_step6236_loss0.1437_20250117_142944.pt

New best loss: 0.1437


Training Progress:  62%|█████████████████████████████████▋                    | 6237/10000 [1:38:01<7:44:30,  7.41s/it]

Step 6237/10000, Loss: 0.1546
Step 6238/10000, Loss: 0.1384


Training Progress:  62%|█████████████████████████████████                    | 6238/10000 [1:38:21<11:39:58, 11.16s/it]


Checkpoint saved: checkpoints\best\checkpoint_step6238_loss0.1384_20250117_143005.pt

New best loss: 0.1384


Training Progress:  62%|█████████████████████████████████▋                    | 6240/10000 [1:38:22<5:52:14,  5.62s/it]

Step 6239/10000, Loss: 0.1512
Step 6240/10000, Loss: 0.1497


Training Progress:  62%|█████████████████████████████████▋                    | 6242/10000 [1:38:22<2:57:57,  2.84s/it]

Step 6241/10000, Loss: 0.1401
Step 6242/10000, Loss: 0.1497


Training Progress:  62%|█████████████████████████████████▋                    | 6244/10000 [1:38:22<1:32:35,  1.48s/it]

Step 6243/10000, Loss: 0.1734
Step 6244/10000, Loss: 0.1801


Training Progress:  62%|██████████████████████████████████▉                     | 6246/10000 [1:38:23<50:49,  1.23it/s]

Step 6245/10000, Loss: 0.1740
Step 6246/10000, Loss: 0.1544


Training Progress:  62%|██████████████████████████████████▉                     | 6248/10000 [1:38:23<30:20,  2.06it/s]

Step 6247/10000, Loss: 0.1528
Step 6248/10000, Loss: 0.1962


Training Progress:  62%|███████████████████████████████████                     | 6250/10000 [1:38:23<20:22,  3.07it/s]

Step 6249/10000, Loss: 0.1608
Step 6250/10000, Loss: 0.1548


Training Progress:  63%|███████████████████████████████████                     | 6252/10000 [1:38:24<15:24,  4.06it/s]

Step 6251/10000, Loss: 0.1476
Step 6252/10000, Loss: 0.1496


Training Progress:  63%|███████████████████████████████████                     | 6254/10000 [1:38:24<13:00,  4.80it/s]

Step 6253/10000, Loss: 0.1469
Step 6254/10000, Loss: 0.1573


Training Progress:  63%|███████████████████████████████████                     | 6256/10000 [1:38:24<11:52,  5.25it/s]

Step 6255/10000, Loss: 0.1432
Step 6256/10000, Loss: 0.1493


Training Progress:  63%|███████████████████████████████████                     | 6258/10000 [1:38:25<11:14,  5.54it/s]

Step 6257/10000, Loss: 0.1401
Step 6258/10000, Loss: 0.1428


Training Progress:  63%|███████████████████████████████████                     | 6260/10000 [1:38:25<10:57,  5.69it/s]

Step 6259/10000, Loss: 0.1490
Step 6260/10000, Loss: 0.1547


Training Progress:  63%|███████████████████████████████████                     | 6261/10000 [1:38:25<10:52,  5.73it/s]

Step 6261/10000, Loss: 0.1510
Step 6262/10000, Loss: 0.1341


Training Progress:  63%|█████████████████████████████████▊                    | 6262/10000 [1:38:45<6:08:07,  5.91s/it]


Checkpoint saved: checkpoints\best\checkpoint_step6262_loss0.1341_20250117_143029.pt

New best loss: 0.1341


Training Progress:  63%|█████████████████████████████████▊                    | 6264/10000 [1:38:45<3:08:01,  3.02s/it]

Step 6263/10000, Loss: 0.1606
Step 6264/10000, Loss: 0.1540


Training Progress:  63%|█████████████████████████████████▊                    | 6266/10000 [1:38:45<1:37:22,  1.56s/it]

Step 6265/10000, Loss: 0.1550
Step 6266/10000, Loss: 0.1702


Training Progress:  63%|███████████████████████████████████                     | 6268/10000 [1:38:46<52:58,  1.17it/s]

Step 6267/10000, Loss: 0.1493
Step 6268/10000, Loss: 0.1362


Training Progress:  63%|███████████████████████████████████                     | 6270/10000 [1:38:46<31:25,  1.98it/s]

Step 6269/10000, Loss: 0.1351
Step 6270/10000, Loss: 0.1355


Training Progress:  63%|███████████████████████████████████                     | 6272/10000 [1:38:46<20:57,  2.96it/s]

Step 6271/10000, Loss: 0.1500
Step 6272/10000, Loss: 0.1467


Training Progress:  63%|███████████████████████████████████▏                    | 6274/10000 [1:38:47<15:31,  4.00it/s]

Step 6273/10000, Loss: 0.1505
Step 6274/10000, Loss: 0.1441


Training Progress:  63%|███████████████████████████████████▏                    | 6276/10000 [1:38:47<13:10,  4.71it/s]

Step 6275/10000, Loss: 0.1484
Step 6276/10000, Loss: 0.1498


Training Progress:  63%|███████████████████████████████████▏                    | 6278/10000 [1:38:47<11:44,  5.29it/s]

Step 6277/10000, Loss: 0.1498
Step 6278/10000, Loss: 0.1471


Training Progress:  63%|███████████████████████████████████▏                    | 6280/10000 [1:38:48<11:14,  5.52it/s]

Step 6279/10000, Loss: 0.1537
Step 6280/10000, Loss: 0.1490


Training Progress:  63%|███████████████████████████████████▏                    | 6282/10000 [1:38:48<10:47,  5.74it/s]

Step 6281/10000, Loss: 0.1643
Step 6282/10000, Loss: 0.1534
Step 6283/10000, Loss: 0.1278


Training Progress:  63%|█████████████████████████████████▉                    | 6283/10000 [1:39:07<5:48:32,  5.63s/it]


Checkpoint saved: checkpoints\best\checkpoint_step6283_loss0.1278_20250117_143052.pt

New best loss: 0.1278
Step 6284/10000, Loss: 0.1270


Training Progress:  63%|█████████████████████████████████▎                   | 6284/10000 [1:39:29<10:56:21, 10.60s/it]


Checkpoint saved: checkpoints\best\checkpoint_step6284_loss0.1270_20250117_143111.pt

New best loss: 0.1270


Training Progress:  63%|█████████████████████████████████▉                    | 6286/10000 [1:39:29<5:29:47,  5.33s/it]

Step 6285/10000, Loss: 0.1379
Step 6286/10000, Loss: 0.1437


Training Progress:  63%|█████████████████████████████████▉                    | 6288/10000 [1:39:30<2:46:53,  2.70s/it]

Step 6287/10000, Loss: 0.1424
Step 6288/10000, Loss: 0.1681


Training Progress:  63%|█████████████████████████████████▉                    | 6290/10000 [1:39:30<1:27:07,  1.41s/it]

Step 6289/10000, Loss: 0.1528
Step 6290/10000, Loss: 0.1400


Training Progress:  63%|███████████████████████████████████▏                    | 6292/10000 [1:39:30<48:02,  1.29it/s]

Step 6291/10000, Loss: 0.1692
Step 6292/10000, Loss: 0.1463


Training Progress:  63%|███████████████████████████████████▏                    | 6294/10000 [1:39:31<28:56,  2.13it/s]

Step 6293/10000, Loss: 0.1618
Step 6294/10000, Loss: 0.1572


Training Progress:  63%|███████████████████████████████████▎                    | 6296/10000 [1:39:31<19:33,  3.16it/s]

Step 6295/10000, Loss: 0.1719
Step 6296/10000, Loss: 0.1432


Training Progress:  63%|███████████████████████████████████▎                    | 6298/10000 [1:39:31<14:57,  4.12it/s]

Step 6297/10000, Loss: 0.1685
Step 6298/10000, Loss: 0.1352


Training Progress:  63%|███████████████████████████████████▎                    | 6300/10000 [1:39:32<12:43,  4.84it/s]

Step 6299/10000, Loss: 0.1442
Step 6300/10000, Loss: 0.1350


Training Progress:  63%|███████████████████████████████████▎                    | 6302/10000 [1:39:32<11:35,  5.32it/s]

Step 6301/10000, Loss: 0.1319
Step 6302/10000, Loss: 0.1477


Training Progress:  63%|███████████████████████████████████▎                    | 6304/10000 [1:39:32<10:53,  5.65it/s]

Step 6303/10000, Loss: 0.1385
Step 6304/10000, Loss: 0.1609


Training Progress:  63%|███████████████████████████████████▎                    | 6306/10000 [1:39:33<10:52,  5.66it/s]

Step 6305/10000, Loss: 0.1789
Step 6306/10000, Loss: 0.1582


Training Progress:  63%|███████████████████████████████████▎                    | 6308/10000 [1:39:33<10:32,  5.83it/s]

Step 6307/10000, Loss: 0.1465
Step 6308/10000, Loss: 0.1360


Training Progress:  63%|███████████████████████████████████▎                    | 6310/10000 [1:39:33<10:36,  5.80it/s]

Step 6309/10000, Loss: 0.1394
Step 6310/10000, Loss: 0.1361


Training Progress:  63%|███████████████████████████████████▎                    | 6311/10000 [1:39:34<10:36,  5.80it/s]

Step 6311/10000, Loss: 0.1383
Step 6312/10000, Loss: 0.1215


Training Progress:  63%|██████████████████████████████████                    | 6312/10000 [1:39:50<5:11:59,  5.08s/it]


Checkpoint saved: checkpoints\best\checkpoint_step6312_loss0.1215_20250117_143138.pt

New best loss: 0.1215


Training Progress:  63%|██████████████████████████████████                    | 6314/10000 [1:39:51<2:41:28,  2.63s/it]

Step 6313/10000, Loss: 0.1279
Step 6314/10000, Loss: 0.1372


Training Progress:  63%|██████████████████████████████████                    | 6316/10000 [1:39:51<1:24:25,  1.38s/it]

Step 6315/10000, Loss: 0.1471
Step 6316/10000, Loss: 0.1481


Training Progress:  63%|███████████████████████████████████▍                    | 6318/10000 [1:39:51<46:45,  1.31it/s]

Step 6317/10000, Loss: 0.1490
Step 6318/10000, Loss: 0.1336


Training Progress:  63%|███████████████████████████████████▍                    | 6319/10000 [1:39:52<35:49,  1.71it/s]

Step 6319/10000, Loss: 0.1451
Step 6320/10000, Loss: 0.1184


Training Progress:  63%|██████████████████████████████████▏                   | 6320/10000 [1:40:14<7:12:44,  7.06s/it]


Checkpoint saved: checkpoints\best\checkpoint_step6320_loss0.1184_20250117_143156.pt

New best loss: 0.1184


Training Progress:  63%|██████████████████████████████████▏                   | 6322/10000 [1:40:14<3:40:14,  3.59s/it]

Step 6321/10000, Loss: 0.1286
Step 6322/10000, Loss: 0.1289
Step 6323/10000, Loss: 0.1133


Training Progress:  63%|██████████████████████████████████▏                   | 6323/10000 [1:40:35<9:00:03,  8.81s/it]


Checkpoint saved: checkpoints\best\checkpoint_step6323_loss0.1133_20250117_143218.pt

New best loss: 0.1133


Training Progress:  63%|██████████████████████████████████▏                   | 6325/10000 [1:40:36<4:33:15,  4.46s/it]

Step 6324/10000, Loss: 0.1322
Step 6325/10000, Loss: 0.1502


Training Progress:  63%|██████████████████████████████████▏                   | 6327/10000 [1:40:36<2:19:11,  2.27s/it]

Step 6326/10000, Loss: 0.1684
Step 6327/10000, Loss: 0.1533


Training Progress:  63%|██████████████████████████████████▏                   | 6329/10000 [1:40:37<1:13:31,  1.20s/it]

Step 6328/10000, Loss: 0.1547
Step 6329/10000, Loss: 0.1432


Training Progress:  63%|███████████████████████████████████▍                    | 6331/10000 [1:40:37<41:20,  1.48it/s]

Step 6330/10000, Loss: 0.1671
Step 6331/10000, Loss: 0.1493


Training Progress:  63%|███████████████████████████████████▍                    | 6333/10000 [1:40:37<25:21,  2.41it/s]

Step 6332/10000, Loss: 0.1294
Step 6333/10000, Loss: 0.1347


Training Progress:  63%|███████████████████████████████████▍                    | 6335/10000 [1:40:38<17:56,  3.40it/s]

Step 6334/10000, Loss: 0.1369
Step 6335/10000, Loss: 0.1326


Training Progress:  63%|███████████████████████████████████▍                    | 6337/10000 [1:40:38<14:07,  4.32it/s]

Step 6336/10000, Loss: 0.1394
Step 6337/10000, Loss: 0.1435


Training Progress:  63%|███████████████████████████████████▍                    | 6339/10000 [1:40:38<12:16,  4.97it/s]

Step 6338/10000, Loss: 0.1286
Step 6339/10000, Loss: 0.1207


Training Progress:  63%|███████████████████████████████████▌                    | 6341/10000 [1:40:39<11:19,  5.38it/s]

Step 6340/10000, Loss: 0.1319
Step 6341/10000, Loss: 0.1231


Training Progress:  63%|███████████████████████████████████▌                    | 6343/10000 [1:40:39<10:52,  5.60it/s]

Step 6342/10000, Loss: 0.1498
Step 6343/10000, Loss: 0.1239


Training Progress:  63%|███████████████████████████████████▌                    | 6345/10000 [1:40:39<10:39,  5.71it/s]

Step 6344/10000, Loss: 0.1168
Step 6345/10000, Loss: 0.1361


Training Progress:  63%|███████████████████████████████████▌                    | 6347/10000 [1:40:40<10:31,  5.78it/s]

Step 6346/10000, Loss: 0.1299
Step 6347/10000, Loss: 0.1391


Training Progress:  63%|███████████████████████████████████▌                    | 6349/10000 [1:40:40<10:30,  5.79it/s]

Step 6348/10000, Loss: 0.1307
Step 6349/10000, Loss: 0.1351


Training Progress:  64%|███████████████████████████████████▌                    | 6351/10000 [1:40:40<10:30,  5.79it/s]

Step 6350/10000, Loss: 0.1194
Step 6351/10000, Loss: 0.1240


Training Progress:  64%|███████████████████████████████████▌                    | 6353/10000 [1:40:41<10:28,  5.80it/s]

Step 6352/10000, Loss: 0.1347
Step 6353/10000, Loss: 0.1200


Training Progress:  64%|███████████████████████████████████▌                    | 6355/10000 [1:40:41<10:27,  5.81it/s]

Step 6354/10000, Loss: 0.1265
Step 6355/10000, Loss: 0.1174


Training Progress:  64%|███████████████████████████████████▌                    | 6357/10000 [1:40:41<10:23,  5.84it/s]

Step 6356/10000, Loss: 0.1196
Step 6357/10000, Loss: 0.1196


Training Progress:  64%|███████████████████████████████████▌                    | 6359/10000 [1:40:42<10:26,  5.81it/s]

Step 6358/10000, Loss: 0.1253
Step 6359/10000, Loss: 0.1255


Training Progress:  64%|███████████████████████████████████▌                    | 6361/10000 [1:40:42<10:25,  5.82it/s]

Step 6360/10000, Loss: 0.1160
Step 6361/10000, Loss: 0.1330


Training Progress:  64%|███████████████████████████████████▋                    | 6363/10000 [1:40:42<10:24,  5.82it/s]

Step 6362/10000, Loss: 0.1397
Step 6363/10000, Loss: 0.1355


Training Progress:  64%|███████████████████████████████████▋                    | 6365/10000 [1:40:43<10:24,  5.82it/s]

Step 6364/10000, Loss: 0.1263
Step 6365/10000, Loss: 0.1180


Training Progress:  64%|███████████████████████████████████▋                    | 6367/10000 [1:40:43<10:23,  5.83it/s]

Step 6366/10000, Loss: 0.1212
Step 6367/10000, Loss: 0.1269


Training Progress:  64%|███████████████████████████████████▋                    | 6369/10000 [1:40:43<10:11,  5.94it/s]

Step 6368/10000, Loss: 0.1267
Step 6369/10000, Loss: 0.1278


Training Progress:  64%|███████████████████████████████████▋                    | 6371/10000 [1:40:44<10:25,  5.81it/s]

Step 6370/10000, Loss: 0.1324
Step 6371/10000, Loss: 0.1178
Step 6372/10000, Loss: 0.1054


Training Progress:  64%|██████████████████████████████████▍                   | 6372/10000 [1:40:59<4:45:46,  4.73s/it]


Checkpoint saved: checkpoints\best\checkpoint_step6372_loss0.1054_20250117_143248.pt

New best loss: 0.1054


Training Progress:  64%|██████████████████████████████████▍                   | 6374/10000 [1:41:00<2:27:53,  2.45s/it]

Step 6373/10000, Loss: 0.1274
Step 6374/10000, Loss: 0.1304


Training Progress:  64%|██████████████████████████████████▍                   | 6376/10000 [1:41:00<1:17:45,  1.29s/it]

Step 6375/10000, Loss: 0.1355
Step 6376/10000, Loss: 0.1347


Training Progress:  64%|███████████████████████████████████▋                    | 6378/10000 [1:41:00<43:21,  1.39it/s]

Step 6377/10000, Loss: 0.1366
Step 6378/10000, Loss: 0.1294


Training Progress:  64%|███████████████████████████████████▋                    | 6380/10000 [1:41:01<26:29,  2.28it/s]

Step 6379/10000, Loss: 0.1536
Step 6380/10000, Loss: 0.1184


Training Progress:  64%|███████████████████████████████████▋                    | 6382/10000 [1:41:01<18:14,  3.30it/s]

Step 6381/10000, Loss: 0.1318
Step 6382/10000, Loss: 0.1184


Training Progress:  64%|███████████████████████████████████▊                    | 6384/10000 [1:41:01<14:11,  4.25it/s]

Step 6383/10000, Loss: 0.1179
Step 6384/10000, Loss: 0.1166


Training Progress:  64%|███████████████████████████████████▊                    | 6386/10000 [1:41:02<12:06,  4.98it/s]

Step 6385/10000, Loss: 0.1244
Step 6386/10000, Loss: 0.1288


Training Progress:  64%|███████████████████████████████████▊                    | 6388/10000 [1:41:02<11:19,  5.31it/s]

Step 6387/10000, Loss: 0.1280
Step 6388/10000, Loss: 0.1330


Training Progress:  64%|███████████████████████████████████▊                    | 6390/10000 [1:41:02<10:38,  5.65it/s]

Step 6389/10000, Loss: 0.1373
Step 6390/10000, Loss: 0.1315


Training Progress:  64%|███████████████████████████████████▊                    | 6392/10000 [1:41:03<10:37,  5.66it/s]

Step 6391/10000, Loss: 0.1246
Step 6392/10000, Loss: 0.1167


Training Progress:  64%|███████████████████████████████████▊                    | 6394/10000 [1:41:03<10:17,  5.84it/s]

Step 6393/10000, Loss: 0.1312
Step 6394/10000, Loss: 0.1118


Training Progress:  64%|███████████████████████████████████▊                    | 6396/10000 [1:41:03<10:25,  5.76it/s]

Step 6395/10000, Loss: 0.1190
Step 6396/10000, Loss: 0.1362


Training Progress:  64%|███████████████████████████████████▊                    | 6398/10000 [1:41:04<10:12,  5.88it/s]

Step 6397/10000, Loss: 0.1258
Step 6398/10000, Loss: 0.1219


Training Progress:  64%|███████████████████████████████████▊                    | 6400/10000 [1:41:04<10:17,  5.83it/s]

Step 6399/10000, Loss: 0.1249
Step 6400/10000, Loss: 0.1173


Training Progress:  64%|███████████████████████████████████▊                    | 6402/10000 [1:41:04<10:25,  5.75it/s]

Step 6401/10000, Loss: 0.1289
Step 6402/10000, Loss: 0.1114


Training Progress:  64%|███████████████████████████████████▊                    | 6404/10000 [1:41:05<10:12,  5.88it/s]

Step 6403/10000, Loss: 0.1191
Step 6404/10000, Loss: 0.1222


Training Progress:  64%|███████████████████████████████████▊                    | 6406/10000 [1:41:05<10:21,  5.79it/s]

Step 6405/10000, Loss: 0.1176
Step 6406/10000, Loss: 0.1143


Training Progress:  64%|███████████████████████████████████▉                    | 6408/10000 [1:41:05<10:13,  5.86it/s]

Step 6407/10000, Loss: 0.1307
Step 6408/10000, Loss: 0.1355


Training Progress:  64%|███████████████████████████████████▉                    | 6410/10000 [1:41:06<10:15,  5.83it/s]

Step 6409/10000, Loss: 0.1301
Step 6410/10000, Loss: 0.1295


Training Progress:  64%|███████████████████████████████████▉                    | 6412/10000 [1:41:06<10:22,  5.77it/s]

Step 6411/10000, Loss: 0.1152
Step 6412/10000, Loss: 0.1528


Training Progress:  64%|███████████████████████████████████▉                    | 6414/10000 [1:41:07<10:09,  5.88it/s]

Step 6413/10000, Loss: 0.1258
Step 6414/10000, Loss: 0.1242


Training Progress:  64%|███████████████████████████████████▉                    | 6416/10000 [1:41:07<10:18,  5.79it/s]

Step 6415/10000, Loss: 0.1229
Step 6416/10000, Loss: 0.1306


Training Progress:  64%|███████████████████████████████████▉                    | 6418/10000 [1:41:07<10:20,  5.78it/s]

Step 6417/10000, Loss: 0.1205
Step 6418/10000, Loss: 0.1068


Training Progress:  64%|███████████████████████████████████▉                    | 6420/10000 [1:41:08<10:17,  5.80it/s]

Step 6419/10000, Loss: 0.1272
Step 6420/10000, Loss: 0.1316


Training Progress:  64%|███████████████████████████████████▉                    | 6422/10000 [1:41:08<10:10,  5.86it/s]

Step 6421/10000, Loss: 0.1095
Step 6422/10000, Loss: 0.1244


Training Progress:  64%|███████████████████████████████████▉                    | 6424/10000 [1:41:08<10:15,  5.81it/s]

Step 6423/10000, Loss: 0.1174
Step 6424/10000, Loss: 0.1277


Training Progress:  64%|███████████████████████████████████▉                    | 6426/10000 [1:41:09<10:18,  5.77it/s]

Step 6425/10000, Loss: 0.1180
Step 6426/10000, Loss: 0.1119


Training Progress:  64%|███████████████████████████████████▉                    | 6428/10000 [1:41:09<10:07,  5.88it/s]

Step 6427/10000, Loss: 0.1125
Step 6428/10000, Loss: 0.1241


Training Progress:  64%|████████████████████████████████████                    | 6430/10000 [1:41:09<10:18,  5.77it/s]

Step 6429/10000, Loss: 0.1177
Step 6430/10000, Loss: 0.1222


Training Progress:  64%|████████████████████████████████████                    | 6431/10000 [1:41:09<10:10,  5.85it/s]

Step 6431/10000, Loss: 0.1126
Step 6432/10000, Loss: 0.1024


Training Progress:  64%|██████████████████████████████████▋                   | 6432/10000 [1:41:24<4:25:39,  4.47s/it]


Checkpoint saved: checkpoints\best\checkpoint_step6432_loss0.1024_20250117_143313.pt

New best loss: 0.1024


Training Progress:  64%|██████████████████████████████████▋                   | 6434/10000 [1:41:24<2:17:43,  2.32s/it]

Step 6433/10000, Loss: 0.1035
Step 6434/10000, Loss: 0.1206


Training Progress:  64%|██████████████████████████████████▊                   | 6436/10000 [1:41:25<1:12:40,  1.22s/it]

Step 6435/10000, Loss: 0.1129
Step 6436/10000, Loss: 0.1216


Training Progress:  64%|████████████████████████████████████                    | 6438/10000 [1:41:25<40:43,  1.46it/s]

Step 6437/10000, Loss: 0.1079
Step 6438/10000, Loss: 0.1115


Training Progress:  64%|████████████████████████████████████                    | 6440/10000 [1:41:25<25:14,  2.35it/s]

Step 6439/10000, Loss: 0.1069
Step 6440/10000, Loss: 0.1067


Training Progress:  64%|████████████████████████████████████                    | 6442/10000 [1:41:26<17:25,  3.40it/s]

Step 6441/10000, Loss: 0.1100
Step 6442/10000, Loss: 0.1088


Training Progress:  64%|████████████████████████████████████                    | 6444/10000 [1:41:26<13:47,  4.30it/s]

Step 6443/10000, Loss: 0.1161
Step 6444/10000, Loss: 0.1063


Training Progress:  64%|████████████████████████████████████                    | 6446/10000 [1:41:27<11:49,  5.01it/s]

Step 6445/10000, Loss: 0.1090
Step 6446/10000, Loss: 0.1132
Step 6447/10000, Loss: 0.0952


Training Progress:  64%|██████████████████████████████████▊                   | 6447/10000 [1:41:48<6:25:13,  6.51s/it]


Checkpoint saved: checkpoints\best\checkpoint_step6447_loss0.0952_20250117_143331.pt

New best loss: 0.0952


Training Progress:  64%|██████████████████████████████████▊                   | 6449/10000 [1:41:48<3:16:06,  3.31s/it]

Step 6448/10000, Loss: 0.0987
Step 6449/10000, Loss: 0.1046


Training Progress:  65%|██████████████████████████████████▊                   | 6451/10000 [1:41:49<1:41:21,  1.71s/it]

Step 6450/10000, Loss: 0.1338
Step 6451/10000, Loss: 0.1065


Training Progress:  65%|████████████████████████████████████▏                   | 6453/10000 [1:41:49<54:37,  1.08it/s]

Step 6452/10000, Loss: 0.1135
Step 6453/10000, Loss: 0.1040


Training Progress:  65%|████████████████████████████████████▏                   | 6455/10000 [1:41:49<32:03,  1.84it/s]

Step 6454/10000, Loss: 0.0999
Step 6455/10000, Loss: 0.1120


Training Progress:  65%|████████████████████████████████████▏                   | 6457/10000 [1:41:50<20:40,  2.86it/s]

Step 6456/10000, Loss: 0.1110
Step 6457/10000, Loss: 0.1132


Training Progress:  65%|████████████████████████████████████▏                   | 6459/10000 [1:41:50<15:20,  3.85it/s]

Step 6458/10000, Loss: 0.1058
Step 6459/10000, Loss: 0.1136


Training Progress:  65%|████████████████████████████████████▏                   | 6461/10000 [1:41:50<12:31,  4.71it/s]

Step 6460/10000, Loss: 0.1097
Step 6461/10000, Loss: 0.1224


Training Progress:  65%|████████████████████████████████████▏                   | 6463/10000 [1:41:51<11:23,  5.17it/s]

Step 6462/10000, Loss: 0.1101
Step 6463/10000, Loss: 0.1145


Training Progress:  65%|████████████████████████████████████▏                   | 6465/10000 [1:41:51<10:33,  5.58it/s]

Step 6464/10000, Loss: 0.1041
Step 6465/10000, Loss: 0.0998


Training Progress:  65%|████████████████████████████████████▏                   | 6467/10000 [1:41:51<10:22,  5.67it/s]

Step 6466/10000, Loss: 0.1098
Step 6467/10000, Loss: 0.1009


Training Progress:  65%|████████████████████████████████████▏                   | 6469/10000 [1:41:52<10:21,  5.68it/s]

Step 6468/10000, Loss: 0.1041
Step 6469/10000, Loss: 0.0980


Training Progress:  65%|████████████████████████████████████▏                   | 6471/10000 [1:41:52<10:06,  5.82it/s]

Step 6470/10000, Loss: 0.1170
Step 6471/10000, Loss: 0.0986


Training Progress:  65%|████████████████████████████████████▏                   | 6473/10000 [1:41:52<10:11,  5.77it/s]

Step 6472/10000, Loss: 0.1109
Step 6473/10000, Loss: 0.1086


Training Progress:  65%|████████████████████████████████████▎                   | 6475/10000 [1:41:53<10:01,  5.86it/s]

Step 6474/10000, Loss: 0.1080
Step 6475/10000, Loss: 0.1149


Training Progress:  65%|████████████████████████████████████▎                   | 6477/10000 [1:41:53<10:10,  5.77it/s]

Step 6476/10000, Loss: 0.1100
Step 6477/10000, Loss: 0.1142


Training Progress:  65%|████████████████████████████████████▎                   | 6479/10000 [1:41:53<09:56,  5.90it/s]

Step 6478/10000, Loss: 0.1301
Step 6479/10000, Loss: 0.1174


Training Progress:  65%|████████████████████████████████████▎                   | 6481/10000 [1:41:54<10:07,  5.79it/s]

Step 6480/10000, Loss: 0.1018
Step 6481/10000, Loss: 0.1061
Step 6482/10000, Loss: 0.0904


Training Progress:  65%|███████████████████████████████████                   | 6482/10000 [1:42:11<5:05:04,  5.20s/it]


Checkpoint saved: checkpoints\best\checkpoint_step6482_loss0.0904_20250117_143358.pt

New best loss: 0.0904


Training Progress:  65%|███████████████████████████████████                   | 6484/10000 [1:42:11<2:37:19,  2.68s/it]

Step 6483/10000, Loss: 0.1063
Step 6484/10000, Loss: 0.0996


Training Progress:  65%|███████████████████████████████████                   | 6486/10000 [1:42:12<1:22:10,  1.40s/it]

Step 6485/10000, Loss: 0.1065
Step 6486/10000, Loss: 0.1074


Training Progress:  65%|████████████████████████████████████▎                   | 6488/10000 [1:42:12<45:21,  1.29it/s]

Step 6487/10000, Loss: 0.1091
Step 6488/10000, Loss: 0.1025


Training Progress:  65%|████████████████████████████████████▎                   | 6490/10000 [1:42:12<27:17,  2.14it/s]

Step 6489/10000, Loss: 0.1170
Step 6490/10000, Loss: 0.1257


Training Progress:  65%|████████████████████████████████████▎                   | 6492/10000 [1:42:13<18:28,  3.17it/s]

Step 6491/10000, Loss: 0.1193
Step 6492/10000, Loss: 0.1219


Training Progress:  65%|████████████████████████████████████▎                   | 6494/10000 [1:42:13<14:09,  4.13it/s]

Step 6493/10000, Loss: 0.1138
Step 6494/10000, Loss: 0.1370


Training Progress:  65%|████████████████████████████████████▍                   | 6496/10000 [1:42:13<11:58,  4.88it/s]

Step 6495/10000, Loss: 0.1027
Step 6496/10000, Loss: 0.0946


Training Progress:  65%|████████████████████████████████████▍                   | 6498/10000 [1:42:14<11:03,  5.28it/s]

Step 6497/10000, Loss: 0.1096
Step 6498/10000, Loss: 0.1242


Training Progress:  65%|████████████████████████████████████▍                   | 6500/10000 [1:42:14<10:22,  5.62it/s]

Step 6499/10000, Loss: 0.1232
Step 6500/10000, Loss: 0.0959


Training Progress:  65%|████████████████████████████████████▍                   | 6502/10000 [1:42:14<10:19,  5.65it/s]

Step 6501/10000, Loss: 0.1171
Step 6502/10000, Loss: 0.1042


Training Progress:  65%|████████████████████████████████████▍                   | 6504/10000 [1:42:15<10:00,  5.82it/s]

Step 6503/10000, Loss: 0.1013
Step 6504/10000, Loss: 0.1063


Training Progress:  65%|████████████████████████████████████▍                   | 6506/10000 [1:42:15<10:06,  5.76it/s]

Step 6505/10000, Loss: 0.1129
Step 6506/10000, Loss: 0.1155


Training Progress:  65%|████████████████████████████████████▍                   | 6508/10000 [1:42:15<09:55,  5.87it/s]

Step 6507/10000, Loss: 0.1031
Step 6508/10000, Loss: 0.0959


Training Progress:  65%|████████████████████████████████████▍                   | 6510/10000 [1:42:16<10:04,  5.78it/s]

Step 6509/10000, Loss: 0.1019
Step 6510/10000, Loss: 0.1125


Training Progress:  65%|████████████████████████████████████▍                   | 6512/10000 [1:42:16<10:08,  5.73it/s]

Step 6511/10000, Loss: 0.1135
Step 6512/10000, Loss: 0.1076


Training Progress:  65%|████████████████████████████████████▍                   | 6514/10000 [1:42:16<09:54,  5.87it/s]

Step 6513/10000, Loss: 0.0988
Step 6514/10000, Loss: 0.0950
Step 6515/10000, Loss: 0.0884


Training Progress:  65%|███████████████████████████████████▏                  | 6515/10000 [1:42:33<5:00:20,  5.17s/it]


Checkpoint saved: checkpoints\best\checkpoint_step6515_loss0.0884_20250117_143420.pt

New best loss: 0.0884


Training Progress:  65%|███████████████████████████████████▏                  | 6517/10000 [1:42:34<2:35:30,  2.68s/it]

Step 6516/10000, Loss: 0.0960
Step 6517/10000, Loss: 0.1006


Training Progress:  65%|███████████████████████████████████▏                  | 6518/10000 [1:42:34<1:51:44,  1.93s/it]

Step 6518/10000, Loss: 0.0927
Step 6519/10000, Loss: 0.0876


Training Progress:  65%|███████████████████████████████████▏                  | 6519/10000 [1:42:56<7:32:38,  7.80s/it]


Checkpoint saved: checkpoints\best\checkpoint_step6519_loss0.0876_20250117_143438.pt

New best loss: 0.0876


Training Progress:  65%|███████████████████████████████████▏                  | 6521/10000 [1:42:56<3:48:45,  3.95s/it]

Step 6520/10000, Loss: 0.0948
Step 6521/10000, Loss: 0.0925


Training Progress:  65%|███████████████████████████████████▏                  | 6523/10000 [1:42:56<1:57:17,  2.02s/it]

Step 6522/10000, Loss: 0.0889
Step 6523/10000, Loss: 0.0972
Step 6524/10000, Loss: 0.0856


Training Progress:  65%|███████████████████████████████████▏                  | 6524/10000 [1:43:19<7:53:49,  8.18s/it]


Checkpoint saved: checkpoints\best\checkpoint_step6524_loss0.0856_20250117_143500.pt

New best loss: 0.0856


Training Progress:  65%|███████████████████████████████████▏                  | 6526/10000 [1:43:19<3:58:55,  4.13s/it]

Step 6525/10000, Loss: 0.1028
Step 6526/10000, Loss: 0.0961


Training Progress:  65%|███████████████████████████████████▎                  | 6528/10000 [1:43:20<2:02:13,  2.11s/it]

Step 6527/10000, Loss: 0.0984
Step 6528/10000, Loss: 0.0916
Step 6529/10000, Loss: 0.0834


Training Progress:  65%|███████████████████████████████████▎                  | 6529/10000 [1:43:43<8:16:17,  8.58s/it]


Checkpoint saved: checkpoints\best\checkpoint_step6529_loss0.0834_20250117_143524.pt

New best loss: 0.0834


Training Progress:  65%|███████████████████████████████████▎                  | 6531/10000 [1:43:44<4:11:07,  4.34s/it]

Step 6530/10000, Loss: 0.0906
Step 6531/10000, Loss: 0.0960


Training Progress:  65%|███████████████████████████████████▎                  | 6533/10000 [1:43:44<2:08:00,  2.22s/it]

Step 6532/10000, Loss: 0.1103
Step 6533/10000, Loss: 0.0865


Training Progress:  65%|███████████████████████████████████▎                  | 6535/10000 [1:43:45<1:07:30,  1.17s/it]

Step 6534/10000, Loss: 0.0969
Step 6535/10000, Loss: 0.0834


Training Progress:  65%|████████████████████████████████████▌                   | 6537/10000 [1:43:45<38:08,  1.51it/s]

Step 6536/10000, Loss: 0.0859
Step 6537/10000, Loss: 0.0982


Training Progress:  65%|████████████████████████████████████▌                   | 6539/10000 [1:43:45<23:32,  2.45it/s]

Step 6538/10000, Loss: 0.0965
Step 6539/10000, Loss: 0.0924
Step 6540/10000, Loss: 0.0809


Training Progress:  65%|███████████████████████████████████▎                  | 6540/10000 [1:44:05<5:58:37,  6.22s/it]


Checkpoint saved: checkpoints\best\checkpoint_step6540_loss0.0809_20250117_143549.pt

New best loss: 0.0809


Training Progress:  65%|███████████████████████████████████▎                  | 6542/10000 [1:44:06<3:04:20,  3.20s/it]

Step 6541/10000, Loss: 0.0893
Step 6542/10000, Loss: 0.0902


Training Progress:  65%|███████████████████████████████████▎                  | 6544/10000 [1:44:06<1:35:15,  1.65s/it]

Step 6543/10000, Loss: 0.1083
Step 6544/10000, Loss: 0.0969


Training Progress:  65%|████████████████████████████████████▋                   | 6546/10000 [1:44:06<51:40,  1.11it/s]

Step 6545/10000, Loss: 0.1050
Step 6546/10000, Loss: 0.0878


Training Progress:  65%|████████████████████████████████████▋                   | 6548/10000 [1:44:07<30:12,  1.90it/s]

Step 6547/10000, Loss: 0.0849
Step 6548/10000, Loss: 0.0907


Training Progress:  66%|████████████████████████████████████▋                   | 6550/10000 [1:44:07<19:51,  2.90it/s]

Step 6549/10000, Loss: 0.0929
Step 6550/10000, Loss: 0.0932


Training Progress:  66%|████████████████████████████████████▋                   | 6552/10000 [1:44:08<14:35,  3.94it/s]

Step 6551/10000, Loss: 0.0823
Step 6552/10000, Loss: 0.1053


Training Progress:  66%|████████████████████████████████████▋                   | 6554/10000 [1:44:08<12:13,  4.70it/s]

Step 6553/10000, Loss: 0.0815
Step 6554/10000, Loss: 0.0909


Training Progress:  66%|████████████████████████████████████▋                   | 6556/10000 [1:44:08<10:51,  5.29it/s]

Step 6555/10000, Loss: 0.0830
Step 6556/10000, Loss: 0.0921


Training Progress:  66%|████████████████████████████████████▋                   | 6558/10000 [1:44:09<10:22,  5.53it/s]

Step 6557/10000, Loss: 0.0995
Step 6558/10000, Loss: 0.1006


Training Progress:  66%|████████████████████████████████████▋                   | 6560/10000 [1:44:09<09:58,  5.75it/s]

Step 6559/10000, Loss: 0.1100
Step 6560/10000, Loss: 0.1185


Training Progress:  66%|████████████████████████████████████▋                   | 6562/10000 [1:44:09<10:04,  5.69it/s]

Step 6561/10000, Loss: 0.1221
Step 6562/10000, Loss: 0.1078


Training Progress:  66%|████████████████████████████████████▊                   | 6564/10000 [1:44:10<09:58,  5.74it/s]

Step 6563/10000, Loss: 0.1132
Step 6564/10000, Loss: 0.0900


Training Progress:  66%|████████████████████████████████████▊                   | 6566/10000 [1:44:10<09:47,  5.84it/s]

Step 6565/10000, Loss: 0.0978
Step 6566/10000, Loss: 0.0922


Training Progress:  66%|████████████████████████████████████▊                   | 6568/10000 [1:44:10<09:50,  5.81it/s]

Step 6567/10000, Loss: 0.0856
Step 6568/10000, Loss: 0.0905


Training Progress:  66%|████████████████████████████████████▊                   | 6570/10000 [1:44:11<09:43,  5.87it/s]

Step 6569/10000, Loss: 0.0862
Step 6570/10000, Loss: 0.0940


Training Progress:  66%|████████████████████████████████████▊                   | 6572/10000 [1:44:11<09:53,  5.77it/s]

Step 6571/10000, Loss: 0.1031
Step 6572/10000, Loss: 0.1107


Training Progress:  66%|████████████████████████████████████▊                   | 6574/10000 [1:44:11<09:45,  5.85it/s]

Step 6573/10000, Loss: 0.1087
Step 6574/10000, Loss: 0.1146


Training Progress:  66%|████████████████████████████████████▊                   | 6576/10000 [1:44:12<09:49,  5.81it/s]

Step 6575/10000, Loss: 0.1158
Step 6576/10000, Loss: 0.1339


Training Progress:  66%|████████████████████████████████████▊                   | 6578/10000 [1:44:12<09:39,  5.91it/s]

Step 6577/10000, Loss: 0.1137
Step 6578/10000, Loss: 0.1101


Training Progress:  66%|████████████████████████████████████▊                   | 6580/10000 [1:44:12<09:46,  5.83it/s]

Step 6579/10000, Loss: 0.1149
Step 6580/10000, Loss: 0.1031


Training Progress:  66%|████████████████████████████████████▊                   | 6582/10000 [1:44:13<09:49,  5.80it/s]

Step 6581/10000, Loss: 0.1028
Step 6582/10000, Loss: 0.0956


Training Progress:  66%|████████████████████████████████████▊                   | 6584/10000 [1:44:13<09:42,  5.86it/s]

Step 6583/10000, Loss: 0.1060
Step 6584/10000, Loss: 0.1010


Training Progress:  66%|████████████████████████████████████▉                   | 6586/10000 [1:44:13<09:50,  5.78it/s]

Step 6585/10000, Loss: 0.0927
Step 6586/10000, Loss: 0.0998


Training Progress:  66%|████████████████████████████████████▉                   | 6588/10000 [1:44:14<09:39,  5.89it/s]

Step 6587/10000, Loss: 0.0996
Step 6588/10000, Loss: 0.1049


Training Progress:  66%|████████████████████████████████████▉                   | 6590/10000 [1:44:14<09:46,  5.81it/s]

Step 6589/10000, Loss: 0.1014
Step 6590/10000, Loss: 0.0897


Training Progress:  66%|████████████████████████████████████▉                   | 6592/10000 [1:44:14<09:37,  5.90it/s]

Step 6591/10000, Loss: 0.0905
Step 6592/10000, Loss: 0.1108


Training Progress:  66%|████████████████████████████████████▉                   | 6594/10000 [1:44:15<09:48,  5.79it/s]

Step 6593/10000, Loss: 0.0942
Step 6594/10000, Loss: 0.0965


Training Progress:  66%|████████████████████████████████████▉                   | 6596/10000 [1:44:15<09:37,  5.89it/s]

Step 6595/10000, Loss: 0.0957
Step 6596/10000, Loss: 0.0854
Step 6597/10000, Loss: 0.0791


Training Progress:  66%|███████████████████████████████████▌                  | 6597/10000 [1:44:32<4:54:08,  5.19s/it]


Checkpoint saved: checkpoints\best\checkpoint_step6597_loss0.0791_20250117_143619.pt

New best loss: 0.0791


Training Progress:  66%|███████████████████████████████████▋                  | 6599/10000 [1:44:33<2:32:51,  2.70s/it]

Step 6598/10000, Loss: 0.0878
Step 6599/10000, Loss: 0.0885


Training Progress:  66%|███████████████████████████████████▋                  | 6601/10000 [1:44:33<1:19:37,  1.41s/it]

Step 6600/10000, Loss: 0.0869
Step 6601/10000, Loss: 0.0880


Training Progress:  66%|████████████████████████████████████▉                   | 6603/10000 [1:44:33<44:08,  1.28it/s]

Step 6602/10000, Loss: 0.0883
Step 6603/10000, Loss: 0.0868


Training Progress:  66%|████████████████████████████████████▉                   | 6605/10000 [1:44:34<26:23,  2.14it/s]

Step 6604/10000, Loss: 0.0860
Step 6605/10000, Loss: 0.0885


Training Progress:  66%|████████████████████████████████████▉                   | 6607/10000 [1:44:34<18:04,  3.13it/s]

Step 6606/10000, Loss: 0.0814
Step 6607/10000, Loss: 0.1052


Training Progress:  66%|█████████████████████████████████████                   | 6609/10000 [1:44:34<13:37,  4.15it/s]

Step 6608/10000, Loss: 0.0919
Step 6609/10000, Loss: 0.0802


Training Progress:  66%|█████████████████████████████████████                   | 6611/10000 [1:44:35<11:38,  4.85it/s]

Step 6610/10000, Loss: 0.0816
Step 6611/10000, Loss: 0.0815


Training Progress:  66%|█████████████████████████████████████                   | 6613/10000 [1:44:35<10:29,  5.38it/s]

Step 6612/10000, Loss: 0.0800
Step 6613/10000, Loss: 0.0843


Training Progress:  66%|█████████████████████████████████████                   | 6614/10000 [1:44:35<10:28,  5.39it/s]

Step 6614/10000, Loss: 0.0959
Step 6615/10000, Loss: 0.0775


Training Progress:  66%|███████████████████████████████████▋                  | 6615/10000 [1:44:54<5:33:26,  5.91s/it]


Checkpoint saved: checkpoints\best\checkpoint_step6615_loss0.0775_20250117_143639.pt

New best loss: 0.0775


Training Progress:  66%|███████████████████████████████████▋                  | 6617/10000 [1:44:55<2:50:43,  3.03s/it]

Step 6616/10000, Loss: 0.0876
Step 6617/10000, Loss: 0.0786


Training Progress:  66%|███████████████████████████████████▋                  | 6619/10000 [1:44:55<1:28:23,  1.57s/it]

Step 6618/10000, Loss: 0.0788
Step 6619/10000, Loss: 0.0894


Training Progress:  66%|███████████████████████████████████▋                  | 6620/10000 [1:44:56<1:04:58,  1.15s/it]

Step 6620/10000, Loss: 0.0791
Step 6621/10000, Loss: 0.0742


Training Progress:  66%|███████████████████████████████████▊                  | 6621/10000 [1:45:16<6:37:32,  7.06s/it]


Checkpoint saved: checkpoints\best\checkpoint_step6621_loss0.0742_20250117_143700.pt

New best loss: 0.0742


Training Progress:  66%|███████████████████████████████████▊                  | 6623/10000 [1:45:17<3:23:07,  3.61s/it]

Step 6622/10000, Loss: 0.0756
Step 6623/10000, Loss: 0.0858


Training Progress:  66%|███████████████████████████████████▊                  | 6625/10000 [1:45:17<1:44:12,  1.85s/it]

Step 6624/10000, Loss: 0.0749
Step 6625/10000, Loss: 0.0942


Training Progress:  66%|█████████████████████████████████████                   | 6627/10000 [1:45:18<56:05,  1.00it/s]

Step 6626/10000, Loss: 0.0818
Step 6627/10000, Loss: 0.0913


Training Progress:  66%|█████████████████████████████████████                   | 6628/10000 [1:45:18<42:04,  1.34it/s]

Step 6628/10000, Loss: 0.0773
Step 6629/10000, Loss: 0.0706


Training Progress:  66%|███████████████████████████████████▊                  | 6629/10000 [1:45:41<6:55:21,  7.39s/it]


Checkpoint saved: checkpoints\best\checkpoint_step6629_loss0.0706_20250117_143722.pt

New best loss: 0.0706


Training Progress:  66%|███████████████████████████████████▊                  | 6631/10000 [1:45:41<3:31:51,  3.77s/it]

Step 6630/10000, Loss: 0.0785
Step 6631/10000, Loss: 0.0778


Training Progress:  66%|███████████████████████████████████▊                  | 6633/10000 [1:45:42<1:48:31,  1.93s/it]

Step 6632/10000, Loss: 0.0911
Step 6633/10000, Loss: 0.0792


Training Progress:  66%|█████████████████████████████████████▏                  | 6635/10000 [1:45:42<58:06,  1.04s/it]

Step 6634/10000, Loss: 0.0858
Step 6635/10000, Loss: 0.0757


Training Progress:  66%|█████████████████████████████████████▏                  | 6637/10000 [1:45:42<33:14,  1.69it/s]

Step 6636/10000, Loss: 0.0922
Step 6637/10000, Loss: 0.0726


Training Progress:  66%|█████████████████████████████████████▏                  | 6639/10000 [1:45:43<21:11,  2.64it/s]

Step 6638/10000, Loss: 0.0738
Step 6639/10000, Loss: 0.0750
Step 6640/10000, Loss: 0.0653


Training Progress:  66%|███████████████████████████████████▊                  | 6640/10000 [1:46:02<5:38:05,  6.04s/it]


Checkpoint saved: checkpoints\best\checkpoint_step6640_loss0.0653_20250117_143747.pt

New best loss: 0.0653


Training Progress:  66%|███████████████████████████████████▊                  | 6642/10000 [1:46:03<2:53:18,  3.10s/it]

Step 6641/10000, Loss: 0.0786
Step 6642/10000, Loss: 0.0992


Training Progress:  66%|███████████████████████████████████▉                  | 6644/10000 [1:46:03<1:29:44,  1.60s/it]

Step 6643/10000, Loss: 0.0863
Step 6644/10000, Loss: 0.0801


Training Progress:  66%|█████████████████████████████████████▏                  | 6646/10000 [1:46:03<48:50,  1.14it/s]

Step 6645/10000, Loss: 0.0925
Step 6646/10000, Loss: 0.0838


Training Progress:  66%|█████████████████████████████████████▏                  | 6648/10000 [1:46:04<28:48,  1.94it/s]

Step 6647/10000, Loss: 0.1033
Step 6648/10000, Loss: 0.0898


Training Progress:  66%|█████████████████████████████████████▏                  | 6650/10000 [1:46:04<18:58,  2.94it/s]

Step 6649/10000, Loss: 0.0857
Step 6650/10000, Loss: 0.0903


Training Progress:  67%|█████████████████████████████████████▎                  | 6652/10000 [1:46:04<14:08,  3.95it/s]

Step 6651/10000, Loss: 0.0835
Step 6652/10000, Loss: 0.0802


Training Progress:  67%|█████████████████████████████████████▎                  | 6654/10000 [1:46:05<11:48,  4.73it/s]

Step 6653/10000, Loss: 0.0904
Step 6654/10000, Loss: 0.0845


Training Progress:  67%|█████████████████████████████████████▎                  | 6656/10000 [1:46:05<10:37,  5.25it/s]

Step 6655/10000, Loss: 0.0849
Step 6656/10000, Loss: 0.0804


Training Progress:  67%|█████████████████████████████████████▎                  | 6658/10000 [1:46:05<10:03,  5.54it/s]

Step 6657/10000, Loss: 0.0791
Step 6658/10000, Loss: 0.1055


Training Progress:  67%|█████████████████████████████████████▎                  | 6660/10000 [1:46:06<09:47,  5.69it/s]

Step 6659/10000, Loss: 0.0824
Step 6660/10000, Loss: 0.1025


Training Progress:  67%|█████████████████████████████████████▎                  | 6662/10000 [1:46:06<09:40,  5.75it/s]

Step 6661/10000, Loss: 0.1040
Step 6662/10000, Loss: 0.1043


Training Progress:  67%|█████████████████████████████████████▎                  | 6664/10000 [1:46:06<09:36,  5.79it/s]

Step 6663/10000, Loss: 0.0979
Step 6664/10000, Loss: 0.1064


Training Progress:  67%|█████████████████████████████████████▎                  | 6666/10000 [1:46:07<09:32,  5.82it/s]

Step 6665/10000, Loss: 0.1044
Step 6666/10000, Loss: 0.0901


Training Progress:  67%|█████████████████████████████████████▎                  | 6668/10000 [1:46:07<09:32,  5.82it/s]

Step 6667/10000, Loss: 0.0825
Step 6668/10000, Loss: 0.0774


Training Progress:  67%|█████████████████████████████████████▎                  | 6670/10000 [1:46:07<09:31,  5.83it/s]

Step 6669/10000, Loss: 0.0851
Step 6670/10000, Loss: 0.0821


Training Progress:  67%|█████████████████████████████████████▎                  | 6672/10000 [1:46:08<09:29,  5.85it/s]

Step 6671/10000, Loss: 0.0793
Step 6672/10000, Loss: 0.0804


Training Progress:  67%|█████████████████████████████████████▎                  | 6674/10000 [1:46:08<09:28,  5.85it/s]

Step 6673/10000, Loss: 0.0798
Step 6674/10000, Loss: 0.0896


Training Progress:  67%|█████████████████████████████████████▍                  | 6676/10000 [1:46:08<09:25,  5.88it/s]

Step 6675/10000, Loss: 0.0975
Step 6676/10000, Loss: 0.1057


Training Progress:  67%|█████████████████████████████████████▍                  | 6678/10000 [1:46:09<09:29,  5.83it/s]

Step 6677/10000, Loss: 0.0898
Step 6678/10000, Loss: 0.0858


Training Progress:  67%|█████████████████████████████████████▍                  | 6680/10000 [1:46:09<09:28,  5.84it/s]

Step 6679/10000, Loss: 0.0787
Step 6680/10000, Loss: 0.0864


Training Progress:  67%|█████████████████████████████████████▍                  | 6682/10000 [1:46:09<09:28,  5.84it/s]

Step 6681/10000, Loss: 0.0939
Step 6682/10000, Loss: 0.0841


Training Progress:  67%|█████████████████████████████████████▍                  | 6684/10000 [1:46:10<09:22,  5.89it/s]

Step 6683/10000, Loss: 0.0721
Step 6684/10000, Loss: 0.0796


Training Progress:  67%|█████████████████████████████████████▍                  | 6686/10000 [1:46:10<09:28,  5.83it/s]

Step 6685/10000, Loss: 0.0821
Step 6686/10000, Loss: 0.0753


Training Progress:  67%|█████████████████████████████████████▍                  | 6688/10000 [1:46:10<09:27,  5.84it/s]

Step 6687/10000, Loss: 0.0780
Step 6688/10000, Loss: 0.0774


Training Progress:  67%|█████████████████████████████████████▍                  | 6690/10000 [1:46:11<09:12,  5.99it/s]

Step 6689/10000, Loss: 0.0854
Step 6690/10000, Loss: 0.0820


Training Progress:  67%|█████████████████████████████████████▍                  | 6692/10000 [1:46:11<09:22,  5.88it/s]

Step 6691/10000, Loss: 0.0776
Step 6692/10000, Loss: 0.0778


Training Progress:  67%|█████████████████████████████████████▍                  | 6694/10000 [1:46:11<09:32,  5.78it/s]

Step 6693/10000, Loss: 0.0715
Step 6694/10000, Loss: 0.0715


Training Progress:  67%|█████████████████████████████████████▍                  | 6696/10000 [1:46:12<09:20,  5.89it/s]

Step 6695/10000, Loss: 0.0766
Step 6696/10000, Loss: 0.0812


Training Progress:  67%|█████████████████████████████████████▌                  | 6698/10000 [1:46:12<09:32,  5.77it/s]

Step 6697/10000, Loss: 0.0707
Step 6698/10000, Loss: 0.0874


Training Progress:  67%|█████████████████████████████████████▌                  | 6700/10000 [1:46:12<09:19,  5.89it/s]

Step 6699/10000, Loss: 0.0745
Step 6700/10000, Loss: 0.0711


Training Progress:  67%|█████████████████████████████████████▌                  | 6702/10000 [1:46:13<09:29,  5.79it/s]

Step 6701/10000, Loss: 0.0813
Step 6702/10000, Loss: 0.0836


Training Progress:  67%|█████████████████████████████████████▌                  | 6704/10000 [1:46:13<09:18,  5.90it/s]

Step 6703/10000, Loss: 0.0702
Step 6704/10000, Loss: 0.0679


Training Progress:  67%|█████████████████████████████████████▌                  | 6706/10000 [1:46:14<09:24,  5.83it/s]

Step 6705/10000, Loss: 0.0753
Step 6706/10000, Loss: 0.0673


Training Progress:  67%|█████████████████████████████████████▌                  | 6708/10000 [1:46:14<09:17,  5.91it/s]

Step 6707/10000, Loss: 0.0859
Step 6708/10000, Loss: 0.0675


Training Progress:  67%|█████████████████████████████████████▌                  | 6710/10000 [1:46:14<09:31,  5.75it/s]

Step 6709/10000, Loss: 0.0803
Step 6710/10000, Loss: 0.0680
Step 6711/10000, Loss: 0.0605


Training Progress:  67%|████████████████████████████████████▏                 | 6711/10000 [1:46:27<3:43:53,  4.08s/it]


Checkpoint saved: checkpoints\best\checkpoint_step6711_loss0.0605_20250117_143818.pt

New best loss: 0.0605


Training Progress:  67%|████████████████████████████████████▎                 | 6713/10000 [1:46:28<1:56:42,  2.13s/it]

Step 6712/10000, Loss: 0.0730
Step 6713/10000, Loss: 0.0683


Training Progress:  67%|████████████████████████████████████▎                 | 6715/10000 [1:46:28<1:01:57,  1.13s/it]

Step 6714/10000, Loss: 0.0768
Step 6715/10000, Loss: 0.0625


Training Progress:  67%|█████████████████████████████████████▌                  | 6717/10000 [1:46:29<34:59,  1.56it/s]

Step 6716/10000, Loss: 0.0754
Step 6717/10000, Loss: 0.0625


Training Progress:  67%|█████████████████████████████████████▋                  | 6719/10000 [1:46:29<21:58,  2.49it/s]

Step 6718/10000, Loss: 0.0752
Step 6719/10000, Loss: 0.0686
Step 6720/10000, Loss: 0.0588


Training Progress:  67%|████████████████████████████████████▎                 | 6720/10000 [1:46:52<6:29:50,  7.13s/it]


Checkpoint saved: checkpoints\best\checkpoint_step6720_loss0.0588_20250117_143833.pt

New best loss: 0.0588


Training Progress:  67%|████████████████████████████████████▎                 | 6721/10000 [1:46:52<4:38:42,  5.10s/it]

Step 6721/10000, Loss: 0.0619
Step 6722/10000, Loss: 0.0567


Training Progress:  67%|████████████████████████████████████▎                 | 6722/10000 [1:47:13<8:56:45,  9.82s/it]


Checkpoint saved: checkpoints\best\checkpoint_step6722_loss0.0567_20250117_143856.pt

New best loss: 0.0567


Training Progress:  67%|████████████████████████████████████▎                 | 6724/10000 [1:47:14<4:29:44,  4.94s/it]

Step 6723/10000, Loss: 0.0651
Step 6724/10000, Loss: 0.0764


Training Progress:  67%|████████████████████████████████████▎                 | 6726/10000 [1:47:14<2:16:50,  2.51s/it]

Step 6725/10000, Loss: 0.0655
Step 6726/10000, Loss: 0.0648


Training Progress:  67%|████████████████████████████████████▎                 | 6728/10000 [1:47:14<1:11:50,  1.32s/it]

Step 6727/10000, Loss: 0.0758
Step 6728/10000, Loss: 0.0669


Training Progress:  67%|█████████████████████████████████████▋                  | 6730/10000 [1:47:15<39:48,  1.37it/s]

Step 6729/10000, Loss: 0.0829
Step 6730/10000, Loss: 0.0654


Training Progress:  67%|█████████████████████████████████████▋                  | 6732/10000 [1:47:15<24:23,  2.23it/s]

Step 6731/10000, Loss: 0.0726
Step 6732/10000, Loss: 0.0744


Training Progress:  67%|█████████████████████████████████████▋                  | 6734/10000 [1:47:15<16:36,  3.28it/s]

Step 6733/10000, Loss: 0.0674
Step 6734/10000, Loss: 0.0712


Training Progress:  67%|█████████████████████████████████████▋                  | 6736/10000 [1:47:16<12:59,  4.18it/s]

Step 6735/10000, Loss: 0.0777
Step 6736/10000, Loss: 0.0841


Training Progress:  67%|█████████████████████████████████████▋                  | 6738/10000 [1:47:16<10:58,  4.95it/s]

Step 6737/10000, Loss: 0.0804
Step 6738/10000, Loss: 0.0738


Training Progress:  67%|█████████████████████████████████████▋                  | 6740/10000 [1:47:16<10:15,  5.29it/s]

Step 6739/10000, Loss: 0.0654
Step 6740/10000, Loss: 0.0853


Training Progress:  67%|█████████████████████████████████████▊                  | 6742/10000 [1:47:17<09:49,  5.53it/s]

Step 6741/10000, Loss: 0.0642
Step 6742/10000, Loss: 0.0738


Training Progress:  67%|█████████████████████████████████████▊                  | 6744/10000 [1:47:17<09:32,  5.69it/s]

Step 6743/10000, Loss: 0.0831
Step 6744/10000, Loss: 0.0788


Training Progress:  67%|█████████████████████████████████████▊                  | 6746/10000 [1:47:17<09:12,  5.89it/s]

Step 6745/10000, Loss: 0.0783
Step 6746/10000, Loss: 0.0758


Training Progress:  67%|█████████████████████████████████████▊                  | 6748/10000 [1:47:18<09:24,  5.77it/s]

Step 6747/10000, Loss: 0.0771
Step 6748/10000, Loss: 0.0856


Training Progress:  68%|█████████████████████████████████████▊                  | 6750/10000 [1:47:18<09:19,  5.81it/s]

Step 6749/10000, Loss: 0.0798
Step 6750/10000, Loss: 0.0817


Training Progress:  68%|█████████████████████████████████████▊                  | 6752/10000 [1:47:18<09:14,  5.86it/s]

Step 6751/10000, Loss: 0.0852
Step 6752/10000, Loss: 0.0879


Training Progress:  68%|█████████████████████████████████████▊                  | 6754/10000 [1:47:19<09:23,  5.76it/s]

Step 6753/10000, Loss: 0.0847
Step 6754/10000, Loss: 0.0749


Training Progress:  68%|█████████████████████████████████████▊                  | 6756/10000 [1:47:19<09:11,  5.89it/s]

Step 6755/10000, Loss: 0.0701
Step 6756/10000, Loss: 0.0779


Training Progress:  68%|█████████████████████████████████████▊                  | 6758/10000 [1:47:19<09:17,  5.82it/s]

Step 6757/10000, Loss: 0.0814
Step 6758/10000, Loss: 0.0834


Training Progress:  68%|█████████████████████████████████████▊                  | 6760/10000 [1:47:20<09:06,  5.93it/s]

Step 6759/10000, Loss: 0.0687
Step 6760/10000, Loss: 0.0741


Training Progress:  68%|█████████████████████████████████████▊                  | 6762/10000 [1:47:20<09:20,  5.78it/s]

Step 6761/10000, Loss: 0.0727
Step 6762/10000, Loss: 0.0755


Training Progress:  68%|█████████████████████████████████████▉                  | 6764/10000 [1:47:20<09:16,  5.81it/s]

Step 6763/10000, Loss: 0.0853
Step 6764/10000, Loss: 0.0761


Training Progress:  68%|█████████████████████████████████████▉                  | 6766/10000 [1:47:21<09:16,  5.81it/s]

Step 6765/10000, Loss: 0.0757
Step 6766/10000, Loss: 0.0752


Training Progress:  68%|█████████████████████████████████████▉                  | 6768/10000 [1:47:21<09:06,  5.92it/s]

Step 6767/10000, Loss: 0.0731
Step 6768/10000, Loss: 0.0890


Training Progress:  68%|█████████████████████████████████████▉                  | 6770/10000 [1:47:21<09:17,  5.79it/s]

Step 6769/10000, Loss: 0.0822
Step 6770/10000, Loss: 0.0828


Training Progress:  68%|█████████████████████████████████████▉                  | 6772/10000 [1:47:22<09:05,  5.91it/s]

Step 6771/10000, Loss: 0.0737
Step 6772/10000, Loss: 0.0873


Training Progress:  68%|█████████████████████████████████████▉                  | 6774/10000 [1:47:22<09:16,  5.79it/s]

Step 6773/10000, Loss: 0.0722
Step 6774/10000, Loss: 0.0715


Training Progress:  68%|█████████████████████████████████████▉                  | 6776/10000 [1:47:22<09:07,  5.89it/s]

Step 6775/10000, Loss: 0.0675
Step 6776/10000, Loss: 0.0691


Training Progress:  68%|█████████████████████████████████████▉                  | 6778/10000 [1:47:23<09:12,  5.83it/s]

Step 6777/10000, Loss: 0.0718
Step 6778/10000, Loss: 0.0672


Training Progress:  68%|█████████████████████████████████████▉                  | 6780/10000 [1:47:23<09:03,  5.92it/s]

Step 6779/10000, Loss: 0.0656
Step 6780/10000, Loss: 0.0742


Training Progress:  68%|█████████████████████████████████████▉                  | 6782/10000 [1:47:23<09:10,  5.85it/s]

Step 6781/10000, Loss: 0.0674
Step 6782/10000, Loss: 0.0576


Training Progress:  68%|█████████████████████████████████████▉                  | 6784/10000 [1:47:24<09:18,  5.76it/s]

Step 6783/10000, Loss: 0.0750
Step 6784/10000, Loss: 0.0669


Training Progress:  68%|██████████████████████████████████████                  | 6786/10000 [1:47:24<09:06,  5.88it/s]

Step 6785/10000, Loss: 0.0657
Step 6786/10000, Loss: 0.0640


Training Progress:  68%|██████████████████████████████████████                  | 6788/10000 [1:47:25<09:16,  5.77it/s]

Step 6787/10000, Loss: 0.0693
Step 6788/10000, Loss: 0.0659


Training Progress:  68%|██████████████████████████████████████                  | 6790/10000 [1:47:25<09:05,  5.89it/s]

Step 6789/10000, Loss: 0.0775
Step 6790/10000, Loss: 0.0616


Training Progress:  68%|██████████████████████████████████████                  | 6792/10000 [1:47:25<09:15,  5.78it/s]

Step 6791/10000, Loss: 0.0773
Step 6792/10000, Loss: 0.0673


Training Progress:  68%|██████████████████████████████████████                  | 6794/10000 [1:47:26<09:04,  5.89it/s]

Step 6793/10000, Loss: 0.0634
Step 6794/10000, Loss: 0.0739


Training Progress:  68%|██████████████████████████████████████                  | 6796/10000 [1:47:26<09:11,  5.81it/s]

Step 6795/10000, Loss: 0.0619
Step 6796/10000, Loss: 0.0697


Training Progress:  68%|██████████████████████████████████████                  | 6798/10000 [1:47:26<09:15,  5.76it/s]

Step 6797/10000, Loss: 0.0570
Step 6798/10000, Loss: 0.0643
Step 6799/10000, Loss: 0.0554


Training Progress:  68%|████████████████████████████████████▋                 | 6799/10000 [1:47:40<3:52:00,  4.35s/it]


Checkpoint saved: checkpoints\best\checkpoint_step6799_loss0.0554_20250117_143930.pt

New best loss: 0.0554


Training Progress:  68%|████████████████████████████████████▋                 | 6801/10000 [1:47:41<2:01:21,  2.28s/it]

Step 6800/10000, Loss: 0.0566
Step 6801/10000, Loss: 0.0584


Training Progress:  68%|████████████████████████████████████▋                 | 6803/10000 [1:47:41<1:04:09,  1.20s/it]

Step 6802/10000, Loss: 0.0586
Step 6803/10000, Loss: 0.0623


Training Progress:  68%|██████████████████████████████████████                  | 6805/10000 [1:47:42<35:55,  1.48it/s]

Step 6804/10000, Loss: 0.0565
Step 6805/10000, Loss: 0.0588


Training Progress:  68%|██████████████████████████████████████                  | 6807/10000 [1:47:42<22:17,  2.39it/s]

Step 6806/10000, Loss: 0.0687
Step 6807/10000, Loss: 0.0583


Training Progress:  68%|██████████████████████████████████████▏                 | 6809/10000 [1:47:42<15:40,  3.39it/s]

Step 6808/10000, Loss: 0.0609
Step 6809/10000, Loss: 0.0611


Training Progress:  68%|██████████████████████████████████████▏                 | 6811/10000 [1:47:43<12:12,  4.36it/s]

Step 6810/10000, Loss: 0.0635
Step 6811/10000, Loss: 0.0648


Training Progress:  68%|██████████████████████████████████████▏                 | 6813/10000 [1:47:43<10:45,  4.94it/s]

Step 6812/10000, Loss: 0.0563
Step 6813/10000, Loss: 0.0564


Training Progress:  68%|██████████████████████████████████████▏                 | 6815/10000 [1:47:43<09:45,  5.44it/s]

Step 6814/10000, Loss: 0.0658
Step 6815/10000, Loss: 0.0595


Training Progress:  68%|██████████████████████████████████████▏                 | 6817/10000 [1:47:44<09:33,  5.55it/s]

Step 6816/10000, Loss: 0.0598
Step 6817/10000, Loss: 0.0676


Training Progress:  68%|██████████████████████████████████████▏                 | 6819/10000 [1:47:44<09:10,  5.78it/s]

Step 6818/10000, Loss: 0.0669
Step 6819/10000, Loss: 0.0810


Training Progress:  68%|██████████████████████████████████████▏                 | 6820/10000 [1:47:44<09:06,  5.82it/s]

Step 6820/10000, Loss: 0.0811
Step 6821/10000, Loss: 0.0523


Training Progress:  68%|████████████████████████████████████▊                 | 6821/10000 [1:48:03<5:02:12,  5.70s/it]


Checkpoint saved: checkpoints\best\checkpoint_step6821_loss0.0523_20250117_143948.pt

New best loss: 0.0523


Training Progress:  68%|████████████████████████████████████▊                 | 6823/10000 [1:48:03<2:34:53,  2.93s/it]

Step 6822/10000, Loss: 0.0769
Step 6823/10000, Loss: 0.0652


Training Progress:  68%|████████████████████████████████████▊                 | 6825/10000 [1:48:04<1:20:20,  1.52s/it]

Step 6824/10000, Loss: 0.0663
Step 6825/10000, Loss: 0.0623


Training Progress:  68%|██████████████████████████████████████▏                 | 6827/10000 [1:48:04<44:05,  1.20it/s]

Step 6826/10000, Loss: 0.0677
Step 6827/10000, Loss: 0.0750


Training Progress:  68%|██████████████████████████████████████▏                 | 6829/10000 [1:48:04<26:03,  2.03it/s]

Step 6828/10000, Loss: 0.0657
Step 6829/10000, Loss: 0.0541


Training Progress:  68%|██████████████████████████████████████▎                 | 6831/10000 [1:48:05<17:29,  3.02it/s]

Step 6830/10000, Loss: 0.0589
Step 6831/10000, Loss: 0.0669


Training Progress:  68%|██████████████████████████████████████▎                 | 6833/10000 [1:48:05<13:02,  4.05it/s]

Step 6832/10000, Loss: 0.0565
Step 6833/10000, Loss: 0.0636


Training Progress:  68%|██████████████████████████████████████▎                 | 6835/10000 [1:48:05<11:04,  4.76it/s]

Step 6834/10000, Loss: 0.0688
Step 6835/10000, Loss: 0.0709


Training Progress:  68%|██████████████████████████████████████▎                 | 6837/10000 [1:48:06<09:54,  5.32it/s]

Step 6836/10000, Loss: 0.0641
Step 6837/10000, Loss: 0.0644


Training Progress:  68%|██████████████████████████████████████▎                 | 6839/10000 [1:48:06<09:30,  5.54it/s]

Step 6838/10000, Loss: 0.0693
Step 6839/10000, Loss: 0.0755


Training Progress:  68%|██████████████████████████████████████▎                 | 6841/10000 [1:48:06<09:08,  5.76it/s]

Step 6840/10000, Loss: 0.0731
Step 6841/10000, Loss: 0.0711


Training Progress:  68%|██████████████████████████████████████▎                 | 6843/10000 [1:48:07<09:10,  5.73it/s]

Step 6842/10000, Loss: 0.0582
Step 6843/10000, Loss: 0.0624


Training Progress:  68%|██████████████████████████████████████▎                 | 6845/10000 [1:48:07<08:57,  5.87it/s]

Step 6844/10000, Loss: 0.0671
Step 6845/10000, Loss: 0.0699


Training Progress:  68%|██████████████████████████████████████▎                 | 6847/10000 [1:48:07<09:08,  5.75it/s]

Step 6846/10000, Loss: 0.0653
Step 6847/10000, Loss: 0.0698


Training Progress:  68%|██████████████████████████████████████▎                 | 6849/10000 [1:48:08<08:55,  5.89it/s]

Step 6848/10000, Loss: 0.0678
Step 6849/10000, Loss: 0.0600


Training Progress:  69%|██████████████████████████████████████▎                 | 6851/10000 [1:48:08<09:04,  5.78it/s]

Step 6850/10000, Loss: 0.0564
Step 6851/10000, Loss: 0.0685


Training Progress:  69%|██████████████████████████████████████▍                 | 6853/10000 [1:48:08<08:52,  5.91it/s]

Step 6852/10000, Loss: 0.0653
Step 6853/10000, Loss: 0.0643


Training Progress:  69%|██████████████████████████████████████▍                 | 6855/10000 [1:48:09<08:59,  5.83it/s]

Step 6854/10000, Loss: 0.0803
Step 6855/10000, Loss: 0.0571


Training Progress:  69%|██████████████████████████████████████▍                 | 6857/10000 [1:48:09<08:52,  5.91it/s]

Step 6856/10000, Loss: 0.0612
Step 6857/10000, Loss: 0.0588


Training Progress:  69%|██████████████████████████████████████▍                 | 6859/10000 [1:48:09<08:57,  5.84it/s]

Step 6858/10000, Loss: 0.0541
Step 6859/10000, Loss: 0.0643


Training Progress:  69%|██████████████████████████████████████▍                 | 6861/10000 [1:48:10<08:51,  5.91it/s]

Step 6860/10000, Loss: 0.0550
Step 6861/10000, Loss: 0.0560


Training Progress:  69%|██████████████████████████████████████▍                 | 6863/10000 [1:48:10<08:55,  5.86it/s]

Step 6862/10000, Loss: 0.0657
Step 6863/10000, Loss: 0.0556


Training Progress:  69%|██████████████████████████████████████▍                 | 6865/10000 [1:48:11<08:49,  5.92it/s]

Step 6864/10000, Loss: 0.0577
Step 6865/10000, Loss: 0.0608


Training Progress:  69%|██████████████████████████████████████▍                 | 6867/10000 [1:48:11<08:55,  5.85it/s]

Step 6866/10000, Loss: 0.0597
Step 6867/10000, Loss: 0.0589
Step 6868/10000, Loss: 0.0517


Training Progress:  69%|█████████████████████████████████████                 | 6868/10000 [1:48:26<4:04:26,  4.68s/it]


Checkpoint saved: checkpoints\best\checkpoint_step6868_loss0.0517_20250117_144015.pt

New best loss: 0.0517


Training Progress:  69%|█████████████████████████████████████                 | 6870/10000 [1:48:27<2:07:34,  2.45s/it]

Step 6869/10000, Loss: 0.0543
Step 6870/10000, Loss: 0.0571


Training Progress:  69%|█████████████████████████████████████                 | 6871/10000 [1:48:27<1:31:54,  1.76s/it]

Step 6871/10000, Loss: 0.0656
Step 6872/10000, Loss: 0.0509


Training Progress:  69%|█████████████████████████████████████                 | 6872/10000 [1:48:48<6:38:19,  7.64s/it]


Checkpoint saved: checkpoints\best\checkpoint_step6872_loss0.0509_20250117_144031.pt

New best loss: 0.0509


Training Progress:  69%|█████████████████████████████████████                 | 6874/10000 [1:48:49<3:23:10,  3.90s/it]

Step 6873/10000, Loss: 0.0645
Step 6874/10000, Loss: 0.0538


Training Progress:  69%|█████████████████████████████████████▏                | 6876/10000 [1:48:49<1:43:59,  2.00s/it]

Step 6875/10000, Loss: 0.0510
Step 6876/10000, Loss: 0.0587


Training Progress:  69%|██████████████████████████████████████▌                 | 6878/10000 [1:48:50<55:28,  1.07s/it]

Step 6877/10000, Loss: 0.0583
Step 6878/10000, Loss: 0.0583


Training Progress:  69%|██████████████████████████████████████▌                 | 6880/10000 [1:48:50<31:47,  1.64it/s]

Step 6879/10000, Loss: 0.0537
Step 6880/10000, Loss: 0.0607


Training Progress:  69%|██████████████████████████████████████▌                 | 6881/10000 [1:48:50<24:50,  2.09it/s]

Step 6881/10000, Loss: 0.0524
Step 6882/10000, Loss: 0.0498


Training Progress:  69%|█████████████████████████████████████▏                | 6882/10000 [1:49:10<5:32:09,  6.39s/it]


Checkpoint saved: checkpoints\best\checkpoint_step6882_loss0.0498_20250117_144054.pt

New best loss: 0.0498


Training Progress:  69%|█████████████████████████████████████▏                | 6883/10000 [1:49:11<3:59:08,  4.60s/it]

Step 6883/10000, Loss: 0.0554
Step 6884/10000, Loss: 0.0466


Training Progress:  69%|█████████████████████████████████████▏                | 6884/10000 [1:49:32<8:21:58,  9.67s/it]


Checkpoint saved: checkpoints\best\checkpoint_step6884_loss0.0466_20250117_144115.pt

New best loss: 0.0466


Training Progress:  69%|█████████████████████████████████████▏                | 6886/10000 [1:49:33<4:13:51,  4.89s/it]

Step 6885/10000, Loss: 0.0495
Step 6886/10000, Loss: 0.0492


Training Progress:  69%|█████████████████████████████████████▏                | 6888/10000 [1:49:33<2:08:55,  2.49s/it]

Step 6887/10000, Loss: 0.0522
Step 6888/10000, Loss: 0.0573


Training Progress:  69%|█████████████████████████████████████▏                | 6890/10000 [1:49:34<1:07:34,  1.30s/it]

Step 6889/10000, Loss: 0.0503
Step 6890/10000, Loss: 0.0553


Training Progress:  69%|██████████████████████████████████████▌                 | 6892/10000 [1:49:34<37:40,  1.37it/s]

Step 6891/10000, Loss: 0.0532
Step 6892/10000, Loss: 0.0536


Training Progress:  69%|██████████████████████████████████████▌                 | 6894/10000 [1:49:34<22:56,  2.26it/s]

Step 6893/10000, Loss: 0.0568
Step 6894/10000, Loss: 0.0562


Training Progress:  69%|██████████████████████████████████████▌                 | 6896/10000 [1:49:35<15:40,  3.30it/s]

Step 6895/10000, Loss: 0.0513
Step 6896/10000, Loss: 0.0560


Training Progress:  69%|██████████████████████████████████████▋                 | 6898/10000 [1:49:35<12:11,  4.24it/s]

Step 6897/10000, Loss: 0.0523
Step 6898/10000, Loss: 0.0511


Training Progress:  69%|██████████████████████████████████████▋                 | 6900/10000 [1:49:35<10:24,  4.96it/s]

Step 6899/10000, Loss: 0.0640
Step 6900/10000, Loss: 0.0546


Training Progress:  69%|██████████████████████████████████████▋                 | 6902/10000 [1:49:36<09:38,  5.36it/s]

Step 6901/10000, Loss: 0.1185
Step 6902/10000, Loss: 0.0636


Training Progress:  69%|██████████████████████████████████████▋                 | 6904/10000 [1:49:36<09:21,  5.52it/s]

Step 6903/10000, Loss: 0.0523
Step 6904/10000, Loss: 0.0711


Training Progress:  69%|██████████████████████████████████████▋                 | 6906/10000 [1:49:36<08:57,  5.75it/s]

Step 6905/10000, Loss: 0.0557
Step 6906/10000, Loss: 0.0556


Training Progress:  69%|██████████████████████████████████████▋                 | 6908/10000 [1:49:37<09:04,  5.68it/s]

Step 6907/10000, Loss: 0.0592
Step 6908/10000, Loss: 0.0590


Training Progress:  69%|██████████████████████████████████████▋                 | 6910/10000 [1:49:37<08:46,  5.87it/s]

Step 6909/10000, Loss: 0.0532
Step 6910/10000, Loss: 0.0534


Training Progress:  69%|██████████████████████████████████████▋                 | 6912/10000 [1:49:37<08:54,  5.78it/s]

Step 6911/10000, Loss: 0.0477
Step 6912/10000, Loss: 0.0556


Training Progress:  69%|██████████████████████████████████████▋                 | 6914/10000 [1:49:38<08:45,  5.88it/s]

Step 6913/10000, Loss: 0.0574
Step 6914/10000, Loss: 0.0498


Training Progress:  69%|██████████████████████████████████████▋                 | 6916/10000 [1:49:38<08:49,  5.82it/s]

Step 6915/10000, Loss: 0.0538
Step 6916/10000, Loss: 0.0558


Training Progress:  69%|██████████████████████████████████████▋                 | 6918/10000 [1:49:38<08:54,  5.77it/s]

Step 6917/10000, Loss: 0.0537
Step 6918/10000, Loss: 0.0503


Training Progress:  69%|██████████████████████████████████████▊                 | 6920/10000 [1:49:39<08:43,  5.88it/s]

Step 6919/10000, Loss: 0.0519
Step 6920/10000, Loss: 0.0571


Training Progress:  69%|██████████████████████████████████████▊                 | 6922/10000 [1:49:39<08:52,  5.78it/s]

Step 6921/10000, Loss: 0.0574
Step 6922/10000, Loss: 0.0719


Training Progress:  69%|██████████████████████████████████████▊                 | 6924/10000 [1:49:39<08:40,  5.91it/s]

Step 6923/10000, Loss: 0.0567
Step 6924/10000, Loss: 0.0529


Training Progress:  69%|██████████████████████████████████████▊                 | 6926/10000 [1:49:40<08:51,  5.78it/s]

Step 6925/10000, Loss: 0.0485
Step 6926/10000, Loss: 0.0563


Training Progress:  69%|██████████████████████████████████████▊                 | 6928/10000 [1:49:40<08:40,  5.90it/s]

Step 6927/10000, Loss: 0.0526
Step 6928/10000, Loss: 0.0572


Training Progress:  69%|██████████████████████████████████████▊                 | 6930/10000 [1:49:40<08:49,  5.79it/s]

Step 6929/10000, Loss: 0.0624
Step 6930/10000, Loss: 0.0581


Training Progress:  69%|██████████████████████████████████████▊                 | 6932/10000 [1:49:41<08:37,  5.93it/s]

Step 6931/10000, Loss: 0.0533
Step 6932/10000, Loss: 0.0470


Training Progress:  69%|██████████████████████████████████████▊                 | 6934/10000 [1:49:41<08:44,  5.84it/s]

Step 6933/10000, Loss: 0.0609
Step 6934/10000, Loss: 0.0537


Training Progress:  69%|██████████████████████████████████████▊                 | 6936/10000 [1:49:41<08:52,  5.76it/s]

Step 6935/10000, Loss: 0.0545
Step 6936/10000, Loss: 0.0586


Training Progress:  69%|██████████████████████████████████████▊                 | 6938/10000 [1:49:42<08:40,  5.89it/s]

Step 6937/10000, Loss: 0.0518
Step 6938/10000, Loss: 0.0586


Training Progress:  69%|██████████████████████████████████████▊                 | 6939/10000 [1:49:42<08:38,  5.90it/s]

Step 6939/10000, Loss: 0.0553
Step 6940/10000, Loss: 0.0451


Training Progress:  69%|█████████████████████████████████████▍                | 6940/10000 [1:49:57<3:51:29,  4.54s/it]


Checkpoint saved: checkpoints\best\checkpoint_step6940_loss0.0451_20250117_144146.pt

New best loss: 0.0451


Training Progress:  69%|█████████████████████████████████████▍                | 6942/10000 [1:49:57<2:00:47,  2.37s/it]

Step 6941/10000, Loss: 0.0589
Step 6942/10000, Loss: 0.0539


Training Progress:  69%|█████████████████████████████████████▍                | 6944/10000 [1:49:58<1:03:27,  1.25s/it]

Step 6943/10000, Loss: 0.0451
Step 6944/10000, Loss: 0.0563


Training Progress:  69%|██████████████████████████████████████▉                 | 6946/10000 [1:49:58<35:38,  1.43it/s]

Step 6945/10000, Loss: 0.0486
Step 6946/10000, Loss: 0.0524


Training Progress:  69%|██████████████████████████████████████▉                 | 6948/10000 [1:49:58<21:50,  2.33it/s]

Step 6947/10000, Loss: 0.0540
Step 6948/10000, Loss: 0.0564


Training Progress:  70%|██████████████████████████████████████▉                 | 6950/10000 [1:49:59<15:13,  3.34it/s]

Step 6949/10000, Loss: 0.0497
Step 6950/10000, Loss: 0.0463


Training Progress:  70%|██████████████████████████████████████▉                 | 6952/10000 [1:49:59<11:47,  4.31it/s]

Step 6951/10000, Loss: 0.0516
Step 6952/10000, Loss: 0.0545


Training Progress:  70%|██████████████████████████████████████▉                 | 6954/10000 [1:49:59<10:14,  4.95it/s]

Step 6953/10000, Loss: 0.0541
Step 6954/10000, Loss: 0.0452


Training Progress:  70%|██████████████████████████████████████▉                 | 6956/10000 [1:50:00<09:19,  5.44it/s]

Step 6955/10000, Loss: 0.0595
Step 6956/10000, Loss: 0.0523
Step 6957/10000, Loss: 0.0446


Training Progress:  70%|█████████████████████████████████████▌                | 6957/10000 [1:50:19<4:59:19,  5.90s/it]


Checkpoint saved: checkpoints\best\checkpoint_step6957_loss0.0446_20250117_144204.pt

New best loss: 0.0446


Training Progress:  70%|█████████████████████████████████████▌                | 6959/10000 [1:50:20<2:33:20,  3.03s/it]

Step 6958/10000, Loss: 0.0475
Step 6959/10000, Loss: 0.0510


Training Progress:  70%|█████████████████████████████████████▌                | 6961/10000 [1:50:20<1:19:38,  1.57s/it]

Step 6960/10000, Loss: 0.0513
Step 6961/10000, Loss: 0.0492


Training Progress:  70%|██████████████████████████████████████▉                 | 6963/10000 [1:50:20<43:17,  1.17it/s]

Step 6962/10000, Loss: 0.0543
Step 6963/10000, Loss: 0.0499


Training Progress:  70%|███████████████████████████████████████                 | 6965/10000 [1:50:21<25:43,  1.97it/s]

Step 6964/10000, Loss: 0.0453
Step 6965/10000, Loss: 0.0506


Training Progress:  70%|███████████████████████████████████████                 | 6967/10000 [1:50:21<16:55,  2.99it/s]

Step 6966/10000, Loss: 0.0467
Step 6967/10000, Loss: 0.0456


Training Progress:  70%|███████████████████████████████████████                 | 6969/10000 [1:50:21<12:50,  3.93it/s]

Step 6968/10000, Loss: 0.0455
Step 6969/10000, Loss: 0.0518


Training Progress:  70%|███████████████████████████████████████                 | 6971/10000 [1:50:22<10:32,  4.79it/s]

Step 6970/10000, Loss: 0.0505
Step 6971/10000, Loss: 0.0451


Training Progress:  70%|███████████████████████████████████████                 | 6973/10000 [1:50:22<09:36,  5.25it/s]

Step 6972/10000, Loss: 0.0495
Step 6973/10000, Loss: 0.0482
Step 6974/10000, Loss: 0.0435


Training Progress:  70%|█████████████████████████████████████▋                | 6974/10000 [1:50:42<5:04:13,  6.03s/it]


Checkpoint saved: checkpoints\best\checkpoint_step6974_loss0.0435_20250117_144226.pt

New best loss: 0.0435


Training Progress:  70%|█████████████████████████████████████▋                | 6976/10000 [1:50:42<2:35:23,  3.08s/it]

Step 6975/10000, Loss: 0.0483
Step 6976/10000, Loss: 0.0507


Training Progress:  70%|█████████████████████████████████████▋                | 6978/10000 [1:50:42<1:20:21,  1.60s/it]

Step 6977/10000, Loss: 0.0490
Step 6978/10000, Loss: 0.0512


Training Progress:  70%|███████████████████████████████████████                 | 6979/10000 [1:50:43<59:00,  1.17s/it]

Step 6979/10000, Loss: 0.0531
Step 6980/10000, Loss: 0.0421


Training Progress:  70%|█████████████████████████████████████▋                | 6980/10000 [1:51:05<6:24:29,  7.64s/it]


Checkpoint saved: checkpoints\best\checkpoint_step6980_loss0.0421_20250117_144247.pt

New best loss: 0.0421


Training Progress:  70%|█████████████████████████████████████▋                | 6982/10000 [1:51:06<3:14:50,  3.87s/it]

Step 6981/10000, Loss: 0.0505
Step 6982/10000, Loss: 0.0430


Training Progress:  70%|█████████████████████████████████████▋                | 6984/10000 [1:51:06<1:39:49,  1.99s/it]

Step 6983/10000, Loss: 0.0535
Step 6984/10000, Loss: 0.0519


Training Progress:  70%|███████████████████████████████████████                 | 6986/10000 [1:51:07<53:16,  1.06s/it]

Step 6985/10000, Loss: 0.0440
Step 6986/10000, Loss: 0.0587
Step 6987/10000, Loss: 0.0420


Training Progress:  70%|█████████████████████████████████████▋                | 6987/10000 [1:51:28<5:54:26,  7.06s/it]


Checkpoint saved: checkpoints\best\checkpoint_step6987_loss0.0420_20250117_144311.pt

New best loss: 0.0420


Training Progress:  70%|█████████████████████████████████████▋                | 6989/10000 [1:51:28<3:01:14,  3.61s/it]

Step 6988/10000, Loss: 0.0469
Step 6989/10000, Loss: 0.0508


Training Progress:  70%|█████████████████████████████████████▊                | 6991/10000 [1:51:29<1:32:58,  1.85s/it]

Step 6990/10000, Loss: 0.0496
Step 6991/10000, Loss: 0.0498
Step 6992/10000, Loss: 0.0379


Training Progress:  70%|█████████████████████████████████████▊                | 6992/10000 [1:51:49<6:08:25,  7.35s/it]


Checkpoint saved: checkpoints\best\checkpoint_step6992_loss0.0379_20250117_144333.pt

New best loss: 0.0379


Training Progress:  70%|█████████████████████████████████████▊                | 6994/10000 [1:51:49<3:08:27,  3.76s/it]

Step 6993/10000, Loss: 0.0400
Step 6994/10000, Loss: 0.0497


Training Progress:  70%|█████████████████████████████████████▊                | 6996/10000 [1:51:50<1:36:39,  1.93s/it]

Step 6995/10000, Loss: 0.0441
Step 6996/10000, Loss: 0.0402


Training Progress:  70%|███████████████████████████████████████▏                | 6998/10000 [1:51:50<51:40,  1.03s/it]

Step 6997/10000, Loss: 0.0501
Step 6998/10000, Loss: 0.0455


Training Progress:  70%|███████████████████████████████████████▏                | 6999/10000 [1:51:50<38:45,  1.29it/s]

Step 6999/10000, Loss: 0.0455
Step 7000/10000, Loss: 0.0401


Training Progress:  70%|█████████████████████████████████████▊                | 7000/10000 [1:52:14<6:19:44,  7.59s/it]


Checkpoint saved: checkpoints\checkpoint_step7000_loss0.0401_20250117_144354.pt


Training Progress:  70%|█████████████████████████████████████▊                | 7002/10000 [1:52:14<3:13:15,  3.87s/it]

Step 7001/10000, Loss: 0.0444
Step 7002/10000, Loss: 0.0473


Training Progress:  70%|█████████████████████████████████████▊                | 7004/10000 [1:52:15<1:39:02,  1.98s/it]

Step 7003/10000, Loss: 0.0478
Step 7004/10000, Loss: 0.0450


Training Progress:  70%|███████████████████████████████████████▏                | 7006/10000 [1:52:15<52:44,  1.06s/it]

Step 7005/10000, Loss: 0.0501
Step 7006/10000, Loss: 0.0506


Training Progress:  70%|███████████████████████████████████████▏                | 7008/10000 [1:52:15<30:13,  1.65it/s]

Step 7007/10000, Loss: 0.0432
Step 7008/10000, Loss: 0.0443


Training Progress:  70%|███████████████████████████████████████▎                | 7010/10000 [1:52:16<19:06,  2.61it/s]

Step 7009/10000, Loss: 0.0450
Step 7010/10000, Loss: 0.0569


Training Progress:  70%|███████████████████████████████████████▎                | 7012/10000 [1:52:16<13:42,  3.63it/s]

Step 7011/10000, Loss: 0.0537
Step 7012/10000, Loss: 0.0475


Training Progress:  70%|███████████████████████████████████████▎                | 7013/10000 [1:52:16<12:06,  4.11it/s]

Step 7013/10000, Loss: 0.0391
Step 7014/10000, Loss: 0.0373


Training Progress:  70%|█████████████████████████████████████▉                | 7014/10000 [1:52:35<4:54:06,  5.91s/it]


Checkpoint saved: checkpoints\best\checkpoint_step7014_loss0.0373_20250117_144420.pt

New best loss: 0.0373


Training Progress:  70%|█████████████████████████████████████▉                | 7016/10000 [1:52:36<2:30:46,  3.03s/it]

Step 7015/10000, Loss: 0.0545
Step 7016/10000, Loss: 0.0408


Training Progress:  70%|█████████████████████████████████████▉                | 7018/10000 [1:52:36<1:18:00,  1.57s/it]

Step 7017/10000, Loss: 0.0418
Step 7018/10000, Loss: 0.0445


Training Progress:  70%|███████████████████████████████████████▎                | 7020/10000 [1:52:37<42:40,  1.16it/s]

Step 7019/10000, Loss: 0.0408
Step 7020/10000, Loss: 0.0490


Training Progress:  70%|███████████████████████████████████████▎                | 7022/10000 [1:52:37<25:07,  1.98it/s]

Step 7021/10000, Loss: 0.0471
Step 7022/10000, Loss: 0.0419


Training Progress:  70%|███████████████████████████████████████▎                | 7024/10000 [1:52:37<16:42,  2.97it/s]

Step 7023/10000, Loss: 0.0477
Step 7024/10000, Loss: 0.0440


Training Progress:  70%|███████████████████████████████████████▎                | 7026/10000 [1:52:38<12:23,  4.00it/s]

Step 7025/10000, Loss: 0.0400
Step 7026/10000, Loss: 0.0512


Training Progress:  70%|███████████████████████████████████████▎                | 7028/10000 [1:52:38<10:26,  4.74it/s]

Step 7027/10000, Loss: 0.0394
Step 7028/10000, Loss: 0.0448


Training Progress:  70%|███████████████████████████████████████▎                | 7030/10000 [1:52:38<09:33,  5.18it/s]

Step 7029/10000, Loss: 0.0475
Step 7030/10000, Loss: 0.0460


Training Progress:  70%|███████████████████████████████████████▍                | 7032/10000 [1:52:39<08:51,  5.58it/s]

Step 7031/10000, Loss: 0.0432
Step 7032/10000, Loss: 0.0442


Training Progress:  70%|███████████████████████████████████████▍                | 7034/10000 [1:52:39<08:47,  5.62it/s]

Step 7033/10000, Loss: 0.0521
Step 7034/10000, Loss: 0.0459


Training Progress:  70%|███████████████████████████████████████▍                | 7036/10000 [1:52:39<08:30,  5.81it/s]

Step 7035/10000, Loss: 0.0448
Step 7036/10000, Loss: 0.0376


Training Progress:  70%|███████████████████████████████████████▍                | 7038/10000 [1:52:40<08:31,  5.79it/s]

Step 7037/10000, Loss: 0.0465
Step 7038/10000, Loss: 0.0486


Training Progress:  70%|███████████████████████████████████████▍                | 7040/10000 [1:52:40<08:22,  5.89it/s]

Step 7039/10000, Loss: 0.0446
Step 7040/10000, Loss: 0.0397


Training Progress:  70%|███████████████████████████████████████▍                | 7042/10000 [1:52:41<08:31,  5.78it/s]

Step 7041/10000, Loss: 0.0438
Step 7042/10000, Loss: 0.0438


Training Progress:  70%|███████████████████████████████████████▍                | 7044/10000 [1:52:41<08:22,  5.89it/s]

Step 7043/10000, Loss: 0.0398
Step 7044/10000, Loss: 0.0464


Training Progress:  70%|███████████████████████████████████████▍                | 7046/10000 [1:52:41<08:25,  5.84it/s]

Step 7045/10000, Loss: 0.0378
Step 7046/10000, Loss: 0.0398


Training Progress:  70%|███████████████████████████████████████▍                | 7048/10000 [1:52:42<08:33,  5.75it/s]

Step 7047/10000, Loss: 0.0442
Step 7048/10000, Loss: 0.0398


Training Progress:  70%|███████████████████████████████████████▍                | 7049/10000 [1:52:42<08:27,  5.81it/s]

Step 7049/10000, Loss: 0.0387
Step 7050/10000, Loss: 0.0343


Training Progress:  70%|██████████████████████████████████████                | 7050/10000 [1:52:58<4:05:01,  4.98s/it]


Checkpoint saved: checkpoints\best\checkpoint_step7050_loss0.0343_20250117_144446.pt

New best loss: 0.0343


Training Progress:  71%|██████████████████████████████████████                | 7052/10000 [1:52:58<2:06:15,  2.57s/it]

Step 7051/10000, Loss: 0.0399
Step 7052/10000, Loss: 0.0455


Training Progress:  71%|██████████████████████████████████████                | 7054/10000 [1:52:59<1:06:08,  1.35s/it]

Step 7053/10000, Loss: 0.0416
Step 7054/10000, Loss: 0.0409


Training Progress:  71%|███████████████████████████████████████▌                | 7056/10000 [1:52:59<36:39,  1.34it/s]

Step 7055/10000, Loss: 0.0483
Step 7056/10000, Loss: 0.0426


Training Progress:  71%|███████████████████████████████████████▌                | 7058/10000 [1:52:59<22:13,  2.21it/s]

Step 7057/10000, Loss: 0.0438
Step 7058/10000, Loss: 0.0497


Training Progress:  71%|███████████████████████████████████████▌                | 7060/10000 [1:53:00<15:09,  3.23it/s]

Step 7059/10000, Loss: 0.0418
Step 7060/10000, Loss: 0.0455


Training Progress:  71%|███████████████████████████████████████▌                | 7062/10000 [1:53:00<11:41,  4.19it/s]

Step 7061/10000, Loss: 0.0433
Step 7062/10000, Loss: 0.0387


Training Progress:  71%|███████████████████████████████████████▌                | 7064/10000 [1:53:01<09:58,  4.91it/s]

Step 7063/10000, Loss: 0.0453
Step 7064/10000, Loss: 0.0396


Training Progress:  71%|███████████████████████████████████████▌                | 7066/10000 [1:53:01<09:08,  5.35it/s]

Step 7065/10000, Loss: 0.0624
Step 7066/10000, Loss: 0.0443


Training Progress:  71%|███████████████████████████████████████▌                | 7068/10000 [1:53:01<08:44,  5.59it/s]

Step 7067/10000, Loss: 0.0369
Step 7068/10000, Loss: 0.0479


Training Progress:  71%|███████████████████████████████████████▌                | 7070/10000 [1:53:02<08:31,  5.72it/s]

Step 7069/10000, Loss: 0.0367
Step 7070/10000, Loss: 0.0385


Training Progress:  71%|███████████████████████████████████████▌                | 7072/10000 [1:53:02<08:26,  5.78it/s]

Step 7071/10000, Loss: 0.0455
Step 7072/10000, Loss: 0.0440


Training Progress:  71%|███████████████████████████████████████▌                | 7074/10000 [1:53:02<08:23,  5.81it/s]

Step 7073/10000, Loss: 0.0450
Step 7074/10000, Loss: 0.0376


Training Progress:  71%|███████████████████████████████████████▋                | 7076/10000 [1:53:03<08:11,  5.94it/s]

Step 7075/10000, Loss: 0.0391
Step 7076/10000, Loss: 0.0476


Training Progress:  71%|███████████████████████████████████████▋                | 7078/10000 [1:53:03<08:22,  5.81it/s]

Step 7077/10000, Loss: 0.0429
Step 7078/10000, Loss: 0.0381


Training Progress:  71%|███████████████████████████████████████▋                | 7080/10000 [1:53:03<08:16,  5.88it/s]

Step 7079/10000, Loss: 0.0412
Step 7080/10000, Loss: 0.0427


Training Progress:  71%|███████████████████████████████████████▋                | 7082/10000 [1:53:04<08:18,  5.85it/s]

Step 7081/10000, Loss: 0.0386
Step 7082/10000, Loss: 0.0385


Training Progress:  71%|███████████████████████████████████████▋                | 7084/10000 [1:53:04<08:12,  5.93it/s]

Step 7083/10000, Loss: 0.0353
Step 7084/10000, Loss: 0.0465


Training Progress:  71%|███████████████████████████████████████▋                | 7086/10000 [1:53:04<08:23,  5.79it/s]

Step 7085/10000, Loss: 0.0404
Step 7086/10000, Loss: 0.0634


Training Progress:  71%|███████████████████████████████████████▋                | 7088/10000 [1:53:05<08:19,  5.83it/s]

Step 7087/10000, Loss: 0.0399
Step 7088/10000, Loss: 0.0418
Step 7089/10000, Loss: 0.0341


Training Progress:  71%|██████████████████████████████████████▎               | 7089/10000 [1:53:23<4:26:42,  5.50s/it]


Checkpoint saved: checkpoints\best\checkpoint_step7089_loss0.0341_20250117_144509.pt

New best loss: 0.0341


Training Progress:  71%|██████████████████████████████████████▎               | 7091/10000 [1:53:23<2:16:38,  2.82s/it]

Step 7090/10000, Loss: 0.0429
Step 7091/10000, Loss: 0.0419


Training Progress:  71%|██████████████████████████████████████▎               | 7093/10000 [1:53:23<1:11:01,  1.47s/it]

Step 7092/10000, Loss: 0.0546
Step 7093/10000, Loss: 0.0511


Training Progress:  71%|███████████████████████████████████████▋                | 7095/10000 [1:53:24<39:07,  1.24it/s]

Step 7094/10000, Loss: 0.0413
Step 7095/10000, Loss: 0.0348


Training Progress:  71%|███████████████████████████████████████▋                | 7097/10000 [1:53:24<23:15,  2.08it/s]

Step 7096/10000, Loss: 0.0368
Step 7097/10000, Loss: 0.0408
Step 7098/10000, Loss: 0.0335


Training Progress:  71%|██████████████████████████████████████▎               | 7098/10000 [1:53:44<5:05:22,  6.31s/it]


Checkpoint saved: checkpoints\best\checkpoint_step7098_loss0.0335_20250117_144528.pt

New best loss: 0.0335


Training Progress:  71%|██████████████████████████████████████▎               | 7100/10000 [1:53:45<2:36:43,  3.24s/it]

Step 7099/10000, Loss: 0.0382
Step 7100/10000, Loss: 0.0372


Training Progress:  71%|██████████████████████████████████████▎               | 7102/10000 [1:53:45<1:21:00,  1.68s/it]

Step 7101/10000, Loss: 0.0393
Step 7102/10000, Loss: 0.0437


Training Progress:  71%|███████████████████████████████████████▊                | 7104/10000 [1:53:45<43:59,  1.10it/s]

Step 7103/10000, Loss: 0.0371
Step 7104/10000, Loss: 0.0352


Training Progress:  71%|███████████████████████████████████████▊                | 7106/10000 [1:53:46<25:38,  1.88it/s]

Step 7105/10000, Loss: 0.0408
Step 7106/10000, Loss: 0.0451


Training Progress:  71%|███████████████████████████████████████▊                | 7108/10000 [1:53:46<16:53,  2.85it/s]

Step 7107/10000, Loss: 0.0339
Step 7108/10000, Loss: 0.0432


Training Progress:  71%|███████████████████████████████████████▊                | 7110/10000 [1:53:46<12:21,  3.90it/s]

Step 7109/10000, Loss: 0.0362
Step 7110/10000, Loss: 0.0394


Training Progress:  71%|███████████████████████████████████████▊                | 7112/10000 [1:53:47<10:22,  4.64it/s]

Step 7111/10000, Loss: 0.0403
Step 7112/10000, Loss: 0.0420


Training Progress:  71%|███████████████████████████████████████▊                | 7114/10000 [1:53:47<09:10,  5.24it/s]

Step 7113/10000, Loss: 0.0356
Step 7114/10000, Loss: 0.0389


Training Progress:  71%|███████████████████████████████████████▊                | 7116/10000 [1:53:47<08:45,  5.49it/s]

Step 7115/10000, Loss: 0.0476
Step 7116/10000, Loss: 0.0435


Training Progress:  71%|███████████████████████████████████████▊                | 7117/10000 [1:53:48<08:33,  5.62it/s]

Step 7117/10000, Loss: 0.0472
Step 7118/10000, Loss: 0.0327


Training Progress:  71%|██████████████████████████████████████▍               | 7118/10000 [1:54:05<4:22:47,  5.47s/it]


Checkpoint saved: checkpoints\best\checkpoint_step7118_loss0.0327_20250117_144552.pt

New best loss: 0.0327


Training Progress:  71%|██████████████████████████████████████▍               | 7120/10000 [1:54:06<2:16:03,  2.83s/it]

Step 7119/10000, Loss: 0.0469
Step 7120/10000, Loss: 0.0427


Training Progress:  71%|██████████████████████████████████████▍               | 7122/10000 [1:54:06<1:10:48,  1.48s/it]

Step 7121/10000, Loss: 0.0333
Step 7122/10000, Loss: 0.0330


Training Progress:  71%|███████████████████████████████████████▉                | 7124/10000 [1:54:07<38:51,  1.23it/s]

Step 7123/10000, Loss: 0.0391
Step 7124/10000, Loss: 0.0387


Training Progress:  71%|███████████████████████████████████████▉                | 7126/10000 [1:54:07<23:12,  2.06it/s]

Step 7125/10000, Loss: 0.0369
Step 7126/10000, Loss: 0.0444


Training Progress:  71%|███████████████████████████████████████▉                | 7128/10000 [1:54:07<15:30,  3.09it/s]

Step 7127/10000, Loss: 0.0341
Step 7128/10000, Loss: 0.0363


Training Progress:  71%|███████████████████████████████████████▉                | 7130/10000 [1:54:08<11:46,  4.06it/s]

Step 7129/10000, Loss: 0.0358
Step 7130/10000, Loss: 0.0383


Training Progress:  71%|███████████████████████████████████████▉                | 7131/10000 [1:54:08<10:40,  4.48it/s]

Step 7131/10000, Loss: 0.0369
Step 7132/10000, Loss: 0.0319


Training Progress:  71%|██████████████████████████████████████▌               | 7132/10000 [1:54:29<5:11:48,  6.52s/it]


Checkpoint saved: checkpoints\best\checkpoint_step7132_loss0.0319_20250117_144612.pt

New best loss: 0.0319


Training Progress:  71%|██████████████████████████████████████▌               | 7134/10000 [1:54:30<2:40:15,  3.35s/it]

Step 7133/10000, Loss: 0.0395
Step 7134/10000, Loss: 0.0430


Training Progress:  71%|██████████████████████████████████████▌               | 7136/10000 [1:54:30<1:22:36,  1.73s/it]

Step 7135/10000, Loss: 0.0365
Step 7136/10000, Loss: 0.0363


Training Progress:  71%|███████████████████████████████████████▉                | 7138/10000 [1:54:31<44:37,  1.07it/s]

Step 7137/10000, Loss: 0.0400
Step 7138/10000, Loss: 0.0351


Training Progress:  71%|███████████████████████████████████████▉                | 7140/10000 [1:54:31<25:53,  1.84it/s]

Step 7139/10000, Loss: 0.0431
Step 7140/10000, Loss: 0.0418


Training Progress:  71%|███████████████████████████████████████▉                | 7142/10000 [1:54:31<16:56,  2.81it/s]

Step 7141/10000, Loss: 0.0389
Step 7142/10000, Loss: 0.0467


Training Progress:  71%|████████████████████████████████████████                | 7144/10000 [1:54:32<12:17,  3.87it/s]

Step 7143/10000, Loss: 0.0382
Step 7144/10000, Loss: 0.0377


Training Progress:  71%|████████████████████████████████████████                | 7146/10000 [1:54:32<10:13,  4.65it/s]

Step 7145/10000, Loss: 0.0445
Step 7146/10000, Loss: 0.0345


Training Progress:  71%|████████████████████████████████████████                | 7148/10000 [1:54:32<09:14,  5.14it/s]

Step 7147/10000, Loss: 0.0746
Step 7148/10000, Loss: 0.0393


Training Progress:  72%|████████████████████████████████████████                | 7150/10000 [1:54:33<08:40,  5.48it/s]

Step 7149/10000, Loss: 0.0330
Step 7150/10000, Loss: 0.0467


Training Progress:  72%|████████████████████████████████████████                | 7152/10000 [1:54:33<08:12,  5.78it/s]

Step 7151/10000, Loss: 0.0330
Step 7152/10000, Loss: 0.0331


Training Progress:  72%|████████████████████████████████████████                | 7154/10000 [1:54:33<08:16,  5.73it/s]

Step 7153/10000, Loss: 0.0412
Step 7154/10000, Loss: 0.0412


Training Progress:  72%|████████████████████████████████████████                | 7155/10000 [1:54:33<08:06,  5.84it/s]

Step 7155/10000, Loss: 0.0364
Step 7156/10000, Loss: 0.0318


Training Progress:  72%|██████████████████████████████████████▋               | 7156/10000 [1:54:54<4:52:37,  6.17s/it]


Checkpoint saved: checkpoints\best\checkpoint_step7156_loss0.0318_20250117_144637.pt

New best loss: 0.0318


Training Progress:  72%|██████████████████████████████████████▋               | 7158/10000 [1:54:54<2:30:36,  3.18s/it]

Step 7157/10000, Loss: 0.0338
Step 7158/10000, Loss: 0.0374


Training Progress:  72%|██████████████████████████████████████▋               | 7160/10000 [1:54:55<1:17:52,  1.65s/it]

Step 7159/10000, Loss: 0.0392
Step 7160/10000, Loss: 0.0363


Training Progress:  72%|████████████████████████████████████████                | 7162/10000 [1:54:55<42:08,  1.12it/s]

Step 7161/10000, Loss: 0.0453
Step 7162/10000, Loss: 0.0371


Training Progress:  72%|████████████████████████████████████████                | 7164/10000 [1:54:55<24:43,  1.91it/s]

Step 7163/10000, Loss: 0.0403
Step 7164/10000, Loss: 0.0376


Training Progress:  72%|████████████████████████████████████████▏               | 7166/10000 [1:54:56<16:13,  2.91it/s]

Step 7165/10000, Loss: 0.0357
Step 7166/10000, Loss: 0.0444


Training Progress:  72%|████████████████████████████████████████▏               | 7168/10000 [1:54:56<12:08,  3.89it/s]

Step 7167/10000, Loss: 0.0381
Step 7168/10000, Loss: 0.0518


Training Progress:  72%|████████████████████████████████████████▏               | 7170/10000 [1:54:56<09:59,  4.72it/s]

Step 7169/10000, Loss: 0.0361
Step 7170/10000, Loss: 0.0406


Training Progress:  72%|████████████████████████████████████████▏               | 7172/10000 [1:54:57<09:05,  5.18it/s]

Step 7171/10000, Loss: 0.0344
Step 7172/10000, Loss: 0.0357


Training Progress:  72%|████████████████████████████████████████▏               | 7174/10000 [1:54:57<08:27,  5.57it/s]

Step 7173/10000, Loss: 0.0367
Step 7174/10000, Loss: 0.0480


Training Progress:  72%|████████████████████████████████████████▏               | 7176/10000 [1:54:57<08:23,  5.61it/s]

Step 7175/10000, Loss: 0.0436
Step 7176/10000, Loss: 0.0339
Step 7177/10000, Loss: 0.0315


Training Progress:  72%|██████████████████████████████████████▊               | 7177/10000 [1:55:13<3:44:20,  4.77s/it]


Checkpoint saved: checkpoints\best\checkpoint_step7177_loss0.0315_20250117_144701.pt

New best loss: 0.0315


Training Progress:  72%|██████████████████████████████████████▊               | 7179/10000 [1:55:13<1:56:44,  2.48s/it]

Step 7178/10000, Loss: 0.0355
Step 7179/10000, Loss: 0.0376
Step 7180/10000, Loss: 0.0291


Training Progress:  72%|██████████████████████████████████████▊               | 7180/10000 [1:55:38<7:04:38,  9.03s/it]


Checkpoint saved: checkpoints\best\checkpoint_step7180_loss0.0291_20250117_144717.pt

New best loss: 0.0291


Training Progress:  72%|██████████████████████████████████████▊               | 7182/10000 [1:55:38<3:35:10,  4.58s/it]

Step 7181/10000, Loss: 0.0365
Step 7182/10000, Loss: 0.0309


Training Progress:  72%|██████████████████████████████████████▊               | 7184/10000 [1:55:39<1:49:24,  2.33s/it]

Step 7183/10000, Loss: 0.0338
Step 7184/10000, Loss: 0.0365


Training Progress:  72%|████████████████████████████████████████▏               | 7186/10000 [1:55:39<57:47,  1.23s/it]

Step 7185/10000, Loss: 0.0339
Step 7186/10000, Loss: 0.0302


Training Progress:  72%|████████████████████████████████████████▎               | 7188/10000 [1:55:39<32:16,  1.45it/s]

Step 7187/10000, Loss: 0.0379
Step 7188/10000, Loss: 0.0347


Training Progress:  72%|████████████████████████████████████████▎               | 7190/10000 [1:55:40<19:45,  2.37it/s]

Step 7189/10000, Loss: 0.0326
Step 7190/10000, Loss: 0.0406


Training Progress:  72%|████████████████████████████████████████▎               | 7192/10000 [1:55:40<13:48,  3.39it/s]

Step 7191/10000, Loss: 0.0354
Step 7192/10000, Loss: 0.0355


Training Progress:  72%|████████████████████████████████████████▎               | 7194/10000 [1:55:40<10:56,  4.27it/s]

Step 7193/10000, Loss: 0.0402
Step 7194/10000, Loss: 0.0396


Training Progress:  72%|████████████████████████████████████████▎               | 7196/10000 [1:55:41<09:19,  5.01it/s]

Step 7195/10000, Loss: 0.0332
Step 7196/10000, Loss: 0.0316


Training Progress:  72%|████████████████████████████████████████▎               | 7198/10000 [1:55:41<08:45,  5.33it/s]

Step 7197/10000, Loss: 0.0445
Step 7198/10000, Loss: 0.0378


Training Progress:  72%|████████████████████████████████████████▎               | 7200/10000 [1:55:41<08:14,  5.66it/s]

Step 7199/10000, Loss: 0.0441
Step 7200/10000, Loss: 0.0328


Training Progress:  72%|████████████████████████████████████████▎               | 7202/10000 [1:55:42<08:13,  5.67it/s]

Step 7201/10000, Loss: 0.0423
Step 7202/10000, Loss: 0.0377


Training Progress:  72%|████████████████████████████████████████▎               | 7204/10000 [1:55:42<07:59,  5.83it/s]

Step 7203/10000, Loss: 0.0310
Step 7204/10000, Loss: 0.0323


Training Progress:  72%|████████████████████████████████████████▎               | 7206/10000 [1:55:43<08:06,  5.74it/s]

Step 7205/10000, Loss: 0.0353
Step 7206/10000, Loss: 0.0373


Training Progress:  72%|████████████████████████████████████████▎               | 7208/10000 [1:55:43<07:55,  5.87it/s]

Step 7207/10000, Loss: 0.0342
Step 7208/10000, Loss: 0.0383


Training Progress:  72%|████████████████████████████████████████▍               | 7210/10000 [1:55:43<08:02,  5.78it/s]

Step 7209/10000, Loss: 0.0310
Step 7210/10000, Loss: 0.0339


Training Progress:  72%|████████████████████████████████████████▍               | 7212/10000 [1:55:44<07:53,  5.88it/s]

Step 7211/10000, Loss: 0.0360
Step 7212/10000, Loss: 0.0351


Training Progress:  72%|████████████████████████████████████████▍               | 7213/10000 [1:55:44<08:01,  5.79it/s]

Step 7213/10000, Loss: 0.0333
Step 7214/10000, Loss: 0.0262


Training Progress:  72%|██████████████████████████████████████▉               | 7214/10000 [1:56:01<4:09:09,  5.37s/it]


Checkpoint saved: checkpoints\best\checkpoint_step7214_loss0.0262_20250117_144748.pt

New best loss: 0.0262


Training Progress:  72%|██████████████████████████████████████▉               | 7216/10000 [1:56:02<2:08:39,  2.77s/it]

Step 7215/10000, Loss: 0.0355
Step 7216/10000, Loss: 0.0428


Training Progress:  72%|██████████████████████████████████████▉               | 7218/10000 [1:56:02<1:06:55,  1.44s/it]

Step 7217/10000, Loss: 0.0372
Step 7218/10000, Loss: 0.0350


Training Progress:  72%|████████████████████████████████████████▍               | 7220/10000 [1:56:02<36:56,  1.25it/s]

Step 7219/10000, Loss: 0.0381
Step 7220/10000, Loss: 0.0335


Training Progress:  72%|████████████████████████████████████████▍               | 7222/10000 [1:56:03<22:01,  2.10it/s]

Step 7221/10000, Loss: 0.0394
Step 7222/10000, Loss: 0.0412


Training Progress:  72%|████████████████████████████████████████▍               | 7224/10000 [1:56:03<14:56,  3.10it/s]

Step 7223/10000, Loss: 0.0336
Step 7224/10000, Loss: 0.0408


Training Progress:  72%|████████████████████████████████████████▍               | 7226/10000 [1:56:04<11:15,  4.11it/s]

Step 7225/10000, Loss: 0.0337
Step 7226/10000, Loss: 0.0332


Training Progress:  72%|████████████████████████████████████████▍               | 7228/10000 [1:56:04<09:35,  4.82it/s]

Step 7227/10000, Loss: 0.0390
Step 7228/10000, Loss: 0.0347


Training Progress:  72%|████████████████████████████████████████▍               | 7230/10000 [1:56:04<08:37,  5.35it/s]

Step 7229/10000, Loss: 0.0415
Step 7230/10000, Loss: 0.0368


Training Progress:  72%|████████████████████████████████████████▍               | 7232/10000 [1:56:05<08:17,  5.56it/s]

Step 7231/10000, Loss: 0.0294
Step 7232/10000, Loss: 0.0420


Training Progress:  72%|████████████████████████████████████████▌               | 7234/10000 [1:56:05<07:59,  5.76it/s]

Step 7233/10000, Loss: 0.0319
Step 7234/10000, Loss: 0.0308


Training Progress:  72%|████████████████████████████████████████▌               | 7236/10000 [1:56:05<08:04,  5.70it/s]

Step 7235/10000, Loss: 0.0375
Step 7236/10000, Loss: 0.0435


Training Progress:  72%|████████████████████████████████████████▌               | 7238/10000 [1:56:06<08:01,  5.74it/s]

Step 7237/10000, Loss: 0.0363
Step 7238/10000, Loss: 0.0303


Training Progress:  72%|████████████████████████████████████████▌               | 7240/10000 [1:56:06<07:50,  5.87it/s]

Step 7239/10000, Loss: 0.0322
Step 7240/10000, Loss: 0.0373


Training Progress:  72%|████████████████████████████████████████▌               | 7242/10000 [1:56:06<07:56,  5.79it/s]

Step 7241/10000, Loss: 0.0364
Step 7242/10000, Loss: 0.0369


Training Progress:  72%|████████████████████████████████████████▌               | 7244/10000 [1:56:07<07:45,  5.91it/s]

Step 7243/10000, Loss: 0.0392
Step 7244/10000, Loss: 0.0350


Training Progress:  72%|████████████████████████████████████████▌               | 7246/10000 [1:56:07<07:55,  5.79it/s]

Step 7245/10000, Loss: 0.0329
Step 7246/10000, Loss: 0.0344


Training Progress:  72%|████████████████████████████████████████▌               | 7248/10000 [1:56:07<07:48,  5.87it/s]

Step 7247/10000, Loss: 0.0299
Step 7248/10000, Loss: 0.0413


Training Progress:  72%|████████████████████████████████████████▌               | 7250/10000 [1:56:08<07:50,  5.84it/s]

Step 7249/10000, Loss: 0.0343
Step 7250/10000, Loss: 0.0317


Training Progress:  73%|████████████████████████████████████████▌               | 7252/10000 [1:56:08<07:44,  5.92it/s]

Step 7251/10000, Loss: 0.0354
Step 7252/10000, Loss: 0.0329


Training Progress:  73%|████████████████████████████████████████▌               | 7254/10000 [1:56:08<07:54,  5.79it/s]

Step 7253/10000, Loss: 0.0331
Step 7254/10000, Loss: 0.0350


Training Progress:  73%|████████████████████████████████████████▋               | 7256/10000 [1:56:09<07:44,  5.90it/s]

Step 7255/10000, Loss: 0.0355
Step 7256/10000, Loss: 0.0413


Training Progress:  73%|████████████████████████████████████████▋               | 7258/10000 [1:56:09<07:49,  5.84it/s]

Step 7257/10000, Loss: 0.0379
Step 7258/10000, Loss: 0.0337


Training Progress:  73%|████████████████████████████████████████▋               | 7260/10000 [1:56:09<07:55,  5.77it/s]

Step 7259/10000, Loss: 0.0300
Step 7260/10000, Loss: 0.0303


Training Progress:  73%|████████████████████████████████████████▋               | 7261/10000 [1:56:10<07:52,  5.80it/s]

Step 7261/10000, Loss: 0.0348
Step 7262/10000, Loss: 0.0250


Training Progress:  73%|███████████████████████████████████████▏              | 7262/10000 [1:56:26<3:47:58,  5.00s/it]


Checkpoint saved: checkpoints\best\checkpoint_step7262_loss0.0250_20250117_144814.pt

New best loss: 0.0250


Training Progress:  73%|███████████████████████████████████████▏              | 7264/10000 [1:56:26<1:58:25,  2.60s/it]

Step 7263/10000, Loss: 0.0321
Step 7264/10000, Loss: 0.0307


Training Progress:  73%|███████████████████████████████████████▏              | 7266/10000 [1:56:27<1:01:59,  1.36s/it]

Step 7265/10000, Loss: 0.0305
Step 7266/10000, Loss: 0.0378


Training Progress:  73%|████████████████████████████████████████▋               | 7268/10000 [1:56:27<34:20,  1.33it/s]

Step 7267/10000, Loss: 0.0306
Step 7268/10000, Loss: 0.0291


Training Progress:  73%|████████████████████████████████████████▋               | 7270/10000 [1:56:27<20:41,  2.20it/s]

Step 7269/10000, Loss: 0.0347
Step 7270/10000, Loss: 0.0326


Training Progress:  73%|████████████████████████████████████████▋               | 7272/10000 [1:56:28<14:08,  3.21it/s]

Step 7271/10000, Loss: 0.0265
Step 7272/10000, Loss: 0.0356


Training Progress:  73%|████████████████████████████████████████▋               | 7274/10000 [1:56:28<10:58,  4.14it/s]

Step 7273/10000, Loss: 0.0285
Step 7274/10000, Loss: 0.0323


Training Progress:  73%|████████████████████████████████████████▋               | 7276/10000 [1:56:28<09:12,  4.93it/s]

Step 7275/10000, Loss: 0.0349
Step 7276/10000, Loss: 0.0370


Training Progress:  73%|████████████████████████████████████████▊               | 7278/10000 [1:56:29<08:35,  5.28it/s]

Step 7277/10000, Loss: 0.0320
Step 7278/10000, Loss: 0.0288


Training Progress:  73%|████████████████████████████████████████▊               | 7280/10000 [1:56:29<08:13,  5.51it/s]

Step 7279/10000, Loss: 0.0446
Step 7280/10000, Loss: 0.0383


Training Progress:  73%|████████████████████████████████████████▊               | 7282/10000 [1:56:29<07:54,  5.73it/s]

Step 7281/10000, Loss: 0.0431
Step 7282/10000, Loss: 0.0298


Training Progress:  73%|████████████████████████████████████████▊               | 7284/10000 [1:56:30<07:44,  5.84it/s]

Step 7283/10000, Loss: 0.0435
Step 7284/10000, Loss: 0.0342


Training Progress:  73%|████████████████████████████████████████▊               | 7286/10000 [1:56:30<07:45,  5.83it/s]

Step 7285/10000, Loss: 0.0267
Step 7286/10000, Loss: 0.0314


Training Progress:  73%|████████████████████████████████████████▊               | 7288/10000 [1:56:31<07:53,  5.73it/s]

Step 7287/10000, Loss: 0.0378
Step 7288/10000, Loss: 0.0328


Training Progress:  73%|████████████████████████████████████████▊               | 7290/10000 [1:56:31<07:41,  5.87it/s]

Step 7289/10000, Loss: 0.0317
Step 7290/10000, Loss: 0.0392


Training Progress:  73%|████████████████████████████████████████▊               | 7292/10000 [1:56:31<07:49,  5.77it/s]

Step 7291/10000, Loss: 0.0305
Step 7292/10000, Loss: 0.0322


Training Progress:  73%|████████████████████████████████████████▊               | 7294/10000 [1:56:32<07:40,  5.88it/s]

Step 7293/10000, Loss: 0.0325
Step 7294/10000, Loss: 0.0332


Training Progress:  73%|████████████████████████████████████████▊               | 7296/10000 [1:56:32<07:47,  5.79it/s]

Step 7295/10000, Loss: 0.0303
Step 7296/10000, Loss: 0.0301


Training Progress:  73%|████████████████████████████████████████▊               | 7298/10000 [1:56:32<07:48,  5.76it/s]

Step 7297/10000, Loss: 0.0314
Step 7298/10000, Loss: 0.0404


Training Progress:  73%|████████████████████████████████████████▉               | 7300/10000 [1:56:33<07:40,  5.87it/s]

Step 7299/10000, Loss: 0.0315
Step 7300/10000, Loss: 0.0337


Training Progress:  73%|████████████████████████████████████████▉               | 7302/10000 [1:56:33<07:47,  5.77it/s]

Step 7301/10000, Loss: 0.0361
Step 7302/10000, Loss: 0.0285


Training Progress:  73%|████████████████████████████████████████▉               | 7304/10000 [1:56:33<07:40,  5.85it/s]

Step 7303/10000, Loss: 0.0403
Step 7304/10000, Loss: 0.0354


Training Progress:  73%|████████████████████████████████████████▉               | 7306/10000 [1:56:34<07:46,  5.78it/s]

Step 7305/10000, Loss: 0.0330
Step 7306/10000, Loss: 0.0393


Training Progress:  73%|████████████████████████████████████████▉               | 7308/10000 [1:56:34<07:43,  5.81it/s]

Step 7307/10000, Loss: 0.0359
Step 7308/10000, Loss: 0.0313


Training Progress:  73%|████████████████████████████████████████▉               | 7310/10000 [1:56:34<07:41,  5.82it/s]

Step 7309/10000, Loss: 0.0379
Step 7310/10000, Loss: 0.0310


Training Progress:  73%|████████████████████████████████████████▉               | 7312/10000 [1:56:35<07:42,  5.81it/s]

Step 7311/10000, Loss: 0.0351
Step 7312/10000, Loss: 0.0324


Training Progress:  73%|████████████████████████████████████████▉               | 7314/10000 [1:56:35<07:41,  5.83it/s]

Step 7313/10000, Loss: 0.0277
Step 7314/10000, Loss: 0.0385


Training Progress:  73%|████████████████████████████████████████▉               | 7316/10000 [1:56:35<07:41,  5.81it/s]

Step 7315/10000, Loss: 0.0324
Step 7316/10000, Loss: 0.0322


Training Progress:  73%|████████████████████████████████████████▉               | 7318/10000 [1:56:36<07:40,  5.83it/s]

Step 7317/10000, Loss: 0.0330
Step 7318/10000, Loss: 0.0384


Training Progress:  73%|████████████████████████████████████████▉               | 7320/10000 [1:56:36<07:41,  5.81it/s]

Step 7319/10000, Loss: 0.0308
Step 7320/10000, Loss: 0.0289


Training Progress:  73%|█████████████████████████████████████████               | 7322/10000 [1:56:36<07:35,  5.88it/s]

Step 7321/10000, Loss: 0.0327
Step 7322/10000, Loss: 0.0336


Training Progress:  73%|█████████████████████████████████████████               | 7324/10000 [1:56:37<07:37,  5.84it/s]

Step 7323/10000, Loss: 0.0389
Step 7324/10000, Loss: 0.0547


Training Progress:  73%|█████████████████████████████████████████               | 7326/10000 [1:56:37<07:32,  5.91it/s]

Step 7325/10000, Loss: 0.0433
Step 7326/10000, Loss: 0.0402


Training Progress:  73%|█████████████████████████████████████████               | 7328/10000 [1:56:37<07:37,  5.84it/s]

Step 7327/10000, Loss: 0.0313
Step 7328/10000, Loss: 0.0306


Training Progress:  73%|█████████████████████████████████████████               | 7330/10000 [1:56:38<07:43,  5.76it/s]

Step 7329/10000, Loss: 0.0338
Step 7330/10000, Loss: 0.0409


Training Progress:  73%|█████████████████████████████████████████               | 7332/10000 [1:56:38<07:35,  5.86it/s]

Step 7331/10000, Loss: 0.0349
Step 7332/10000, Loss: 0.0305


Training Progress:  73%|█████████████████████████████████████████               | 7334/10000 [1:56:38<07:38,  5.82it/s]

Step 7333/10000, Loss: 0.0355
Step 7334/10000, Loss: 0.0322


Training Progress:  73%|█████████████████████████████████████████               | 7336/10000 [1:56:39<07:31,  5.90it/s]

Step 7335/10000, Loss: 0.0320
Step 7336/10000, Loss: 0.0332


Training Progress:  73%|█████████████████████████████████████████               | 7338/10000 [1:56:39<07:38,  5.80it/s]

Step 7337/10000, Loss: 0.0337
Step 7338/10000, Loss: 0.0360


Training Progress:  73%|█████████████████████████████████████████               | 7340/10000 [1:56:39<07:42,  5.75it/s]

Step 7339/10000, Loss: 0.0356
Step 7340/10000, Loss: 0.0314


Training Progress:  73%|█████████████████████████████████████████               | 7342/10000 [1:56:40<07:32,  5.87it/s]

Step 7341/10000, Loss: 0.0304
Step 7342/10000, Loss: 0.0291


Training Progress:  73%|█████████████████████████████████████████               | 7343/10000 [1:56:40<07:30,  5.90it/s]

Step 7343/10000, Loss: 0.0346
Step 7344/10000, Loss: 0.0250


Training Progress:  73%|███████████████████████████████████████▋              | 7344/10000 [1:56:57<3:47:40,  5.14s/it]


Checkpoint saved: checkpoints\best\checkpoint_step7344_loss0.0250_20250117_144844.pt

New best loss: 0.0250


Training Progress:  73%|███████████████████████████████████████▋              | 7346/10000 [1:56:57<1:58:26,  2.68s/it]

Step 7345/10000, Loss: 0.0316
Step 7346/10000, Loss: 0.0457


Training Progress:  73%|███████████████████████████████████████▋              | 7348/10000 [1:56:58<1:01:44,  1.40s/it]

Step 7347/10000, Loss: 0.0359
Step 7348/10000, Loss: 0.0347


Training Progress:  74%|█████████████████████████████████████████▏              | 7350/10000 [1:56:58<34:10,  1.29it/s]

Step 7349/10000, Loss: 0.0301
Step 7350/10000, Loss: 0.0287


Training Progress:  74%|█████████████████████████████████████████▏              | 7352/10000 [1:56:58<20:27,  2.16it/s]

Step 7351/10000, Loss: 0.0321
Step 7352/10000, Loss: 0.0293


Training Progress:  74%|█████████████████████████████████████████▏              | 7354/10000 [1:56:59<13:59,  3.15it/s]

Step 7353/10000, Loss: 0.0279
Step 7354/10000, Loss: 0.0327


Training Progress:  74%|█████████████████████████████████████████▏              | 7356/10000 [1:56:59<10:41,  4.12it/s]

Step 7355/10000, Loss: 0.0274
Step 7356/10000, Loss: 0.0299


Training Progress:  74%|█████████████████████████████████████████▏              | 7358/10000 [1:56:59<09:03,  4.87it/s]

Step 7357/10000, Loss: 0.0338
Step 7358/10000, Loss: 0.0337


Training Progress:  74%|█████████████████████████████████████████▏              | 7360/10000 [1:57:00<08:10,  5.38it/s]

Step 7359/10000, Loss: 0.0292
Step 7360/10000, Loss: 0.0360


Training Progress:  74%|█████████████████████████████████████████▏              | 7362/10000 [1:57:00<07:53,  5.58it/s]

Step 7361/10000, Loss: 0.0548
Step 7362/10000, Loss: 0.0351


Training Progress:  74%|█████████████████████████████████████████▏              | 7364/10000 [1:57:00<07:47,  5.64it/s]

Step 7363/10000, Loss: 0.0518
Step 7364/10000, Loss: 0.0353


Training Progress:  74%|█████████████████████████████████████████▏              | 7366/10000 [1:57:01<07:32,  5.82it/s]

Step 7365/10000, Loss: 0.0809
Step 7366/10000, Loss: 0.0347


Training Progress:  74%|█████████████████████████████████████████▎              | 7368/10000 [1:57:01<07:38,  5.74it/s]

Step 7367/10000, Loss: 0.0293
Step 7368/10000, Loss: 0.0316


Training Progress:  74%|█████████████████████████████████████████▎              | 7370/10000 [1:57:01<07:34,  5.79it/s]

Step 7369/10000, Loss: 0.0383
Step 7370/10000, Loss: 0.0374


Training Progress:  74%|█████████████████████████████████████████▎              | 7372/10000 [1:57:02<07:30,  5.84it/s]

Step 7371/10000, Loss: 0.0353
Step 7372/10000, Loss: 0.0420


Training Progress:  74%|█████████████████████████████████████████▎              | 7374/10000 [1:57:02<07:23,  5.92it/s]

Step 7373/10000, Loss: 0.0398
Step 7374/10000, Loss: 0.0335


Training Progress:  74%|█████████████████████████████████████████▎              | 7376/10000 [1:57:02<07:28,  5.85it/s]

Step 7375/10000, Loss: 0.0357
Step 7376/10000, Loss: 0.0317


Training Progress:  74%|█████████████████████████████████████████▎              | 7378/10000 [1:57:03<07:33,  5.78it/s]

Step 7377/10000, Loss: 0.0308
Step 7378/10000, Loss: 0.0276


Training Progress:  74%|█████████████████████████████████████████▎              | 7380/10000 [1:57:03<07:25,  5.89it/s]

Step 7379/10000, Loss: 0.0328
Step 7380/10000, Loss: 0.0378


Training Progress:  74%|█████████████████████████████████████████▎              | 7382/10000 [1:57:04<07:33,  5.78it/s]

Step 7381/10000, Loss: 0.0309
Step 7382/10000, Loss: 0.0324


Training Progress:  74%|█████████████████████████████████████████▎              | 7384/10000 [1:57:04<07:30,  5.81it/s]

Step 7383/10000, Loss: 0.0370
Step 7384/10000, Loss: 0.0309


Training Progress:  74%|█████████████████████████████████████████▎              | 7386/10000 [1:57:04<07:35,  5.74it/s]

Step 7385/10000, Loss: 0.0368
Step 7386/10000, Loss: 0.0347


Training Progress:  74%|█████████████████████████████████████████▎              | 7388/10000 [1:57:05<07:21,  5.91it/s]

Step 7387/10000, Loss: 0.0312
Step 7388/10000, Loss: 0.0346


Training Progress:  74%|█████████████████████████████████████████▍              | 7390/10000 [1:57:05<07:26,  5.85it/s]

Step 7389/10000, Loss: 0.0331
Step 7390/10000, Loss: 0.0335


Training Progress:  74%|█████████████████████████████████████████▍              | 7392/10000 [1:57:05<07:31,  5.78it/s]

Step 7391/10000, Loss: 0.0373
Step 7392/10000, Loss: 0.0327


Training Progress:  74%|█████████████████████████████████████████▍              | 7394/10000 [1:57:06<07:22,  5.88it/s]

Step 7393/10000, Loss: 0.0300
Step 7394/10000, Loss: 0.0315


Training Progress:  74%|█████████████████████████████████████████▍              | 7396/10000 [1:57:06<07:30,  5.78it/s]

Step 7395/10000, Loss: 0.0275
Step 7396/10000, Loss: 0.0688


Training Progress:  74%|█████████████████████████████████████████▍              | 7398/10000 [1:57:06<07:28,  5.80it/s]

Step 7397/10000, Loss: 0.0677
Step 7398/10000, Loss: 0.0328


Training Progress:  74%|█████████████████████████████████████████▍              | 7400/10000 [1:57:07<07:31,  5.76it/s]

Step 7399/10000, Loss: 0.0345
Step 7400/10000, Loss: 0.0370


Training Progress:  74%|█████████████████████████████████████████▍              | 7402/10000 [1:57:07<07:19,  5.90it/s]

Step 7401/10000, Loss: 0.0288
Step 7402/10000, Loss: 0.0300


Training Progress:  74%|█████████████████████████████████████████▍              | 7404/10000 [1:57:07<07:26,  5.81it/s]

Step 7403/10000, Loss: 0.0307
Step 7404/10000, Loss: 0.0367


Training Progress:  74%|█████████████████████████████████████████▍              | 7406/10000 [1:57:08<07:16,  5.94it/s]

Step 7405/10000, Loss: 0.0318
Step 7406/10000, Loss: 0.0385


Training Progress:  74%|█████████████████████████████████████████▍              | 7408/10000 [1:57:08<07:22,  5.85it/s]

Step 7407/10000, Loss: 0.0362
Step 7408/10000, Loss: 0.0436


Training Progress:  74%|█████████████████████████████████████████▍              | 7410/10000 [1:57:08<07:28,  5.77it/s]

Step 7409/10000, Loss: 0.0404
Step 7410/10000, Loss: 0.0330


Training Progress:  74%|█████████████████████████████████████████▌              | 7412/10000 [1:57:09<07:27,  5.79it/s]

Step 7411/10000, Loss: 0.0283
Step 7412/10000, Loss: 0.0362


Training Progress:  74%|█████████████████████████████████████████▌              | 7414/10000 [1:57:09<07:26,  5.79it/s]

Step 7413/10000, Loss: 0.0352
Step 7414/10000, Loss: 0.0294


Training Progress:  74%|█████████████████████████████████████████▌              | 7416/10000 [1:57:09<07:26,  5.79it/s]

Step 7415/10000, Loss: 0.0335
Step 7416/10000, Loss: 0.0268


Training Progress:  74%|█████████████████████████████████████████▌              | 7418/10000 [1:57:10<07:23,  5.83it/s]

Step 7417/10000, Loss: 0.0281
Step 7418/10000, Loss: 0.0323


Training Progress:  74%|█████████████████████████████████████████▌              | 7420/10000 [1:57:10<07:23,  5.82it/s]

Step 7419/10000, Loss: 0.0316
Step 7420/10000, Loss: 0.0347


Training Progress:  74%|█████████████████████████████████████████▌              | 7422/10000 [1:57:10<07:21,  5.84it/s]

Step 7421/10000, Loss: 0.0310
Step 7422/10000, Loss: 0.0326


Training Progress:  74%|█████████████████████████████████████████▌              | 7424/10000 [1:57:11<07:20,  5.84it/s]

Step 7423/10000, Loss: 0.0283
Step 7424/10000, Loss: 0.0282


Training Progress:  74%|█████████████████████████████████████████▌              | 7425/10000 [1:57:11<07:21,  5.84it/s]

Step 7425/10000, Loss: 0.0354
Step 7426/10000, Loss: 0.0247


Training Progress:  74%|████████████████████████████████████████              | 7426/10000 [1:57:27<3:29:06,  4.87s/it]


Checkpoint saved: checkpoints\best\checkpoint_step7426_loss0.0247_20250117_144915.pt

New best loss: 0.0247


Training Progress:  74%|████████████████████████████████████████              | 7428/10000 [1:57:27<1:48:36,  2.53s/it]

Step 7427/10000, Loss: 0.0316
Step 7428/10000, Loss: 0.0315


Training Progress:  74%|█████████████████████████████████████████▌              | 7430/10000 [1:57:28<57:00,  1.33s/it]

Step 7429/10000, Loss: 0.0307
Step 7430/10000, Loss: 0.0370


Training Progress:  74%|█████████████████████████████████████████▌              | 7432/10000 [1:57:28<31:33,  1.36it/s]

Step 7431/10000, Loss: 0.0312
Step 7432/10000, Loss: 0.0299


Training Progress:  74%|█████████████████████████████████████████▋              | 7434/10000 [1:57:28<19:13,  2.22it/s]

Step 7433/10000, Loss: 0.0298
Step 7434/10000, Loss: 0.0310
Step 7435/10000, Loss: 0.0230


Training Progress:  74%|████████████████████████████████████████▏             | 7435/10000 [1:57:50<4:46:15,  6.70s/it]


Checkpoint saved: checkpoints\best\checkpoint_step7435_loss0.0230_20250117_144932.pt

New best loss: 0.0230


Training Progress:  74%|████████████████████████████████████████▏             | 7437/10000 [1:57:50<2:27:04,  3.44s/it]

Step 7436/10000, Loss: 0.0312
Step 7437/10000, Loss: 0.0274


Training Progress:  74%|████████████████████████████████████████▏             | 7439/10000 [1:57:51<1:15:44,  1.77s/it]

Step 7438/10000, Loss: 0.0289
Step 7439/10000, Loss: 0.0306


Training Progress:  74%|█████████████████████████████████████████▋              | 7441/10000 [1:57:51<40:48,  1.05it/s]

Step 7440/10000, Loss: 0.0329
Step 7441/10000, Loss: 0.0270


Training Progress:  74%|█████████████████████████████████████████▋              | 7443/10000 [1:57:51<23:41,  1.80it/s]

Step 7442/10000, Loss: 0.0409
Step 7443/10000, Loss: 0.0516


Training Progress:  74%|█████████████████████████████████████████▋              | 7445/10000 [1:57:52<15:10,  2.81it/s]

Step 7444/10000, Loss: 0.0355
Step 7445/10000, Loss: 0.0414


Training Progress:  74%|█████████████████████████████████████████▋              | 7447/10000 [1:57:52<11:13,  3.79it/s]

Step 7446/10000, Loss: 0.0323
Step 7447/10000, Loss: 0.0618


Training Progress:  74%|█████████████████████████████████████████▋              | 7449/10000 [1:57:52<09:06,  4.67it/s]

Step 7448/10000, Loss: 0.0359
Step 7449/10000, Loss: 0.0288


Training Progress:  75%|█████████████████████████████████████████▋              | 7451/10000 [1:57:53<08:12,  5.18it/s]

Step 7450/10000, Loss: 0.0315
Step 7451/10000, Loss: 0.0333


Training Progress:  75%|█████████████████████████████████████████▋              | 7453/10000 [1:57:53<07:46,  5.46it/s]

Step 7452/10000, Loss: 0.0310
Step 7453/10000, Loss: 0.0309


Training Progress:  75%|█████████████████████████████████████████▋              | 7455/10000 [1:57:53<07:27,  5.69it/s]

Step 7454/10000, Loss: 0.0419
Step 7455/10000, Loss: 0.0305


Training Progress:  75%|█████████████████████████████████████████▊              | 7457/10000 [1:57:54<07:26,  5.70it/s]

Step 7456/10000, Loss: 0.0277
Step 7457/10000, Loss: 0.0351


Training Progress:  75%|█████████████████████████████████████████▊              | 7459/10000 [1:57:54<07:18,  5.80it/s]

Step 7458/10000, Loss: 0.0327
Step 7459/10000, Loss: 0.0289


Training Progress:  75%|█████████████████████████████████████████▊              | 7461/10000 [1:57:54<07:16,  5.82it/s]

Step 7460/10000, Loss: 0.0298
Step 7461/10000, Loss: 0.0346


Training Progress:  75%|█████████████████████████████████████████▊              | 7463/10000 [1:57:55<07:09,  5.90it/s]

Step 7462/10000, Loss: 0.0392
Step 7463/10000, Loss: 0.0290


Training Progress:  75%|█████████████████████████████████████████▊              | 7465/10000 [1:57:55<07:18,  5.78it/s]

Step 7464/10000, Loss: 0.0341
Step 7465/10000, Loss: 0.0365


Training Progress:  75%|█████████████████████████████████████████▊              | 7467/10000 [1:57:56<07:15,  5.81it/s]

Step 7466/10000, Loss: 0.0281
Step 7467/10000, Loss: 0.0365


Training Progress:  75%|█████████████████████████████████████████▊              | 7469/10000 [1:57:56<07:16,  5.80it/s]

Step 7468/10000, Loss: 0.0320
Step 7469/10000, Loss: 0.0309


Training Progress:  75%|█████████████████████████████████████████▊              | 7471/10000 [1:57:56<07:13,  5.83it/s]

Step 7470/10000, Loss: 0.0341
Step 7471/10000, Loss: 0.0306


Training Progress:  75%|█████████████████████████████████████████▊              | 7473/10000 [1:57:57<07:13,  5.82it/s]

Step 7472/10000, Loss: 0.0317
Step 7473/10000, Loss: 0.0353


Training Progress:  75%|█████████████████████████████████████████▊              | 7475/10000 [1:57:57<07:13,  5.83it/s]

Step 7474/10000, Loss: 0.0276
Step 7475/10000, Loss: 0.0291


Training Progress:  75%|█████████████████████████████████████████▊              | 7477/10000 [1:57:57<07:13,  5.81it/s]

Step 7476/10000, Loss: 0.0296
Step 7477/10000, Loss: 0.0271


Training Progress:  75%|█████████████████████████████████████████▉              | 7479/10000 [1:57:58<07:07,  5.89it/s]

Step 7478/10000, Loss: 0.0351
Step 7479/10000, Loss: 0.0293


Training Progress:  75%|█████████████████████████████████████████▉              | 7481/10000 [1:57:58<07:15,  5.79it/s]

Step 7480/10000, Loss: 0.0298
Step 7481/10000, Loss: 0.0326


Training Progress:  75%|█████████████████████████████████████████▉              | 7483/10000 [1:57:58<07:09,  5.86it/s]

Step 7482/10000, Loss: 0.0324
Step 7483/10000, Loss: 0.0320


Training Progress:  75%|█████████████████████████████████████████▉              | 7485/10000 [1:57:59<07:14,  5.79it/s]

Step 7484/10000, Loss: 0.0271
Step 7485/10000, Loss: 0.0301


Training Progress:  75%|█████████████████████████████████████████▉              | 7487/10000 [1:57:59<07:05,  5.91it/s]

Step 7486/10000, Loss: 0.0329
Step 7487/10000, Loss: 0.0316


Training Progress:  75%|█████████████████████████████████████████▉              | 7489/10000 [1:57:59<07:05,  5.91it/s]

Step 7488/10000, Loss: 0.0454
Step 7489/10000, Loss: 0.0356


Training Progress:  75%|█████████████████████████████████████████▉              | 7491/10000 [1:58:00<07:08,  5.86it/s]

Step 7490/10000, Loss: 0.0401
Step 7491/10000, Loss: 0.0337


Training Progress:  75%|█████████████████████████████████████████▉              | 7493/10000 [1:58:00<07:14,  5.77it/s]

Step 7492/10000, Loss: 0.0293
Step 7493/10000, Loss: 0.0294


Training Progress:  75%|█████████████████████████████████████████▉              | 7495/10000 [1:58:00<07:05,  5.88it/s]

Step 7494/10000, Loss: 0.0358
Step 7495/10000, Loss: 0.0306


Training Progress:  75%|█████████████████████████████████████████▉              | 7497/10000 [1:58:01<07:09,  5.83it/s]

Step 7496/10000, Loss: 0.0293
Step 7497/10000, Loss: 0.0313


Training Progress:  75%|█████████████████████████████████████████▉              | 7499/10000 [1:58:01<07:02,  5.92it/s]

Step 7498/10000, Loss: 0.0278
Step 7499/10000, Loss: 0.0280


Training Progress:  75%|██████████████████████████████████████████              | 7501/10000 [1:58:01<07:07,  5.85it/s]

Step 7500/10000, Loss: 0.0278
Step 7501/10000, Loss: 0.0297


Training Progress:  75%|██████████████████████████████████████████              | 7503/10000 [1:58:02<07:13,  5.76it/s]

Step 7502/10000, Loss: 0.0319
Step 7503/10000, Loss: 0.0314


Training Progress:  75%|██████████████████████████████████████████              | 7505/10000 [1:58:02<07:07,  5.84it/s]

Step 7504/10000, Loss: 0.0316
Step 7505/10000, Loss: 0.0281


Training Progress:  75%|██████████████████████████████████████████              | 7507/10000 [1:58:02<07:10,  5.79it/s]

Step 7506/10000, Loss: 0.0284
Step 7507/10000, Loss: 0.0328


Training Progress:  75%|██████████████████████████████████████████              | 7509/10000 [1:58:03<07:03,  5.89it/s]

Step 7508/10000, Loss: 0.0253
Step 7509/10000, Loss: 0.0284


Training Progress:  75%|██████████████████████████████████████████              | 7511/10000 [1:58:03<07:14,  5.73it/s]

Step 7510/10000, Loss: 0.0271
Step 7511/10000, Loss: 0.0297


Training Progress:  75%|██████████████████████████████████████████              | 7513/10000 [1:58:03<07:11,  5.77it/s]

Step 7512/10000, Loss: 0.0333
Step 7513/10000, Loss: 0.0260


Training Progress:  75%|██████████████████████████████████████████              | 7515/10000 [1:58:04<07:01,  5.89it/s]

Step 7514/10000, Loss: 0.0255
Step 7515/10000, Loss: 0.0316


Training Progress:  75%|██████████████████████████████████████████              | 7517/10000 [1:58:04<07:09,  5.78it/s]

Step 7516/10000, Loss: 0.0297
Step 7517/10000, Loss: 0.0272


Training Progress:  75%|██████████████████████████████████████████              | 7519/10000 [1:58:04<07:05,  5.83it/s]

Step 7518/10000, Loss: 0.0306
Step 7519/10000, Loss: 0.0292


Training Progress:  75%|██████████████████████████████████████████              | 7521/10000 [1:58:05<07:04,  5.84it/s]

Step 7520/10000, Loss: 0.0299
Step 7521/10000, Loss: 0.0311


Training Progress:  75%|██████████████████████████████████████████▏             | 7523/10000 [1:58:05<06:59,  5.90it/s]

Step 7522/10000, Loss: 0.0321
Step 7523/10000, Loss: 0.0280


Training Progress:  75%|██████████████████████████████████████████▏             | 7525/10000 [1:58:05<07:02,  5.85it/s]

Step 7524/10000, Loss: 0.0377
Step 7525/10000, Loss: 0.0452


Training Progress:  75%|██████████████████████████████████████████▏             | 7527/10000 [1:58:06<07:08,  5.77it/s]

Step 7526/10000, Loss: 0.0338
Step 7527/10000, Loss: 0.0543


Training Progress:  75%|██████████████████████████████████████████▏             | 7529/10000 [1:58:06<07:00,  5.88it/s]

Step 7528/10000, Loss: 0.0330
Step 7529/10000, Loss: 0.0518


Training Progress:  75%|██████████████████████████████████████████▏             | 7531/10000 [1:58:06<07:07,  5.78it/s]

Step 7530/10000, Loss: 0.0344
Step 7531/10000, Loss: 0.0309


Training Progress:  75%|██████████████████████████████████████████▏             | 7533/10000 [1:58:07<07:04,  5.82it/s]

Step 7532/10000, Loss: 0.0355
Step 7533/10000, Loss: 0.0346


Training Progress:  75%|██████████████████████████████████████████▏             | 7535/10000 [1:58:07<07:01,  5.85it/s]

Step 7534/10000, Loss: 0.0341
Step 7535/10000, Loss: 0.0299


Training Progress:  75%|██████████████████████████████████████████▏             | 7537/10000 [1:58:08<07:07,  5.76it/s]

Step 7536/10000, Loss: 0.0409
Step 7537/10000, Loss: 0.0249


Training Progress:  75%|██████████████████████████████████████████▏             | 7539/10000 [1:58:08<06:58,  5.88it/s]

Step 7538/10000, Loss: 0.0268
Step 7539/10000, Loss: 0.0325


Training Progress:  75%|██████████████████████████████████████████▏             | 7541/10000 [1:58:08<07:06,  5.77it/s]

Step 7540/10000, Loss: 0.0317
Step 7541/10000, Loss: 0.0307


Training Progress:  75%|██████████████████████████████████████████▏             | 7543/10000 [1:58:09<06:57,  5.89it/s]

Step 7542/10000, Loss: 0.0273
Step 7543/10000, Loss: 0.0334


Training Progress:  75%|██████████████████████████████████████████▎             | 7545/10000 [1:58:09<07:03,  5.79it/s]

Step 7544/10000, Loss: 0.0355
Step 7545/10000, Loss: 0.0302


Training Progress:  75%|██████████████████████████████████████████▎             | 7547/10000 [1:58:09<07:04,  5.78it/s]

Step 7546/10000, Loss: 0.0294
Step 7547/10000, Loss: 0.0359


Training Progress:  75%|██████████████████████████████████████████▎             | 7549/10000 [1:58:10<06:57,  5.88it/s]

Step 7548/10000, Loss: 0.0310
Step 7549/10000, Loss: 0.0349


Training Progress:  76%|██████████████████████████████████████████▎             | 7551/10000 [1:58:10<07:03,  5.78it/s]

Step 7550/10000, Loss: 0.0343
Step 7551/10000, Loss: 0.0345


Training Progress:  76%|██████████████████████████████████████████▎             | 7553/10000 [1:58:10<06:58,  5.84it/s]

Step 7552/10000, Loss: 0.0335
Step 7553/10000, Loss: 0.0338


Training Progress:  76%|██████████████████████████████████████████▎             | 7555/10000 [1:58:11<06:59,  5.83it/s]

Step 7554/10000, Loss: 0.0306
Step 7555/10000, Loss: 0.0331


Training Progress:  76%|██████████████████████████████████████████▎             | 7557/10000 [1:58:11<06:53,  5.91it/s]

Step 7556/10000, Loss: 0.0291
Step 7557/10000, Loss: 0.0305


Training Progress:  76%|██████████████████████████████████████████▎             | 7559/10000 [1:58:11<07:01,  5.80it/s]

Step 7558/10000, Loss: 0.0324
Step 7559/10000, Loss: 0.0252


Training Progress:  76%|██████████████████████████████████████████▎             | 7561/10000 [1:58:12<07:04,  5.74it/s]

Step 7560/10000, Loss: 0.0350
Step 7561/10000, Loss: 0.0266


Training Progress:  76%|██████████████████████████████████████████▎             | 7563/10000 [1:58:12<06:55,  5.87it/s]

Step 7562/10000, Loss: 0.0284
Step 7563/10000, Loss: 0.0326


Training Progress:  76%|██████████████████████████████████████████▎             | 7565/10000 [1:58:12<07:01,  5.77it/s]

Step 7564/10000, Loss: 0.0342
Step 7565/10000, Loss: 0.0324


Training Progress:  76%|██████████████████████████████████████████▍             | 7567/10000 [1:58:13<06:53,  5.88it/s]

Step 7566/10000, Loss: 0.0257
Step 7567/10000, Loss: 0.0296


Training Progress:  76%|██████████████████████████████████████████▍             | 7569/10000 [1:58:13<06:56,  5.83it/s]

Step 7568/10000, Loss: 0.0335
Step 7569/10000, Loss: 0.0291


Training Progress:  76%|██████████████████████████████████████████▍             | 7571/10000 [1:58:13<07:02,  5.75it/s]

Step 7570/10000, Loss: 0.0415
Step 7571/10000, Loss: 0.0309


Training Progress:  76%|██████████████████████████████████████████▍             | 7573/10000 [1:58:14<06:53,  5.87it/s]

Step 7572/10000, Loss: 0.0382
Step 7573/10000, Loss: 0.0362


Training Progress:  76%|██████████████████████████████████████████▍             | 7575/10000 [1:58:14<06:56,  5.82it/s]

Step 7574/10000, Loss: 0.0320
Step 7575/10000, Loss: 0.0257


Training Progress:  76%|██████████████████████████████████████████▍             | 7577/10000 [1:58:14<06:59,  5.78it/s]

Step 7576/10000, Loss: 0.0360
Step 7577/10000, Loss: 0.0298


Training Progress:  76%|██████████████████████████████████████████▍             | 7579/10000 [1:58:15<06:57,  5.80it/s]

Step 7578/10000, Loss: 0.0281
Step 7579/10000, Loss: 0.0304


Training Progress:  76%|██████████████████████████████████████████▍             | 7581/10000 [1:58:15<06:57,  5.80it/s]

Step 7580/10000, Loss: 0.0269
Step 7581/10000, Loss: 0.0271


Training Progress:  76%|██████████████████████████████████████████▍             | 7583/10000 [1:58:15<06:49,  5.91it/s]

Step 7582/10000, Loss: 0.0303
Step 7583/10000, Loss: 0.0298


Training Progress:  76%|██████████████████████████████████████████▍             | 7585/10000 [1:58:16<06:53,  5.85it/s]

Step 7584/10000, Loss: 0.0352
Step 7585/10000, Loss: 0.0283


Training Progress:  76%|██████████████████████████████████████████▍             | 7587/10000 [1:58:16<06:57,  5.79it/s]

Step 7586/10000, Loss: 0.0295
Step 7587/10000, Loss: 0.0296


Training Progress:  76%|██████████████████████████████████████████▍             | 7589/10000 [1:58:16<06:49,  5.88it/s]

Step 7588/10000, Loss: 0.0304
Step 7589/10000, Loss: 0.0316


Training Progress:  76%|██████████████████████████████████████████▌             | 7591/10000 [1:58:17<06:52,  5.84it/s]

Step 7590/10000, Loss: 0.0231
Step 7591/10000, Loss: 0.0287


Training Progress:  76%|██████████████████████████████████████████▌             | 7593/10000 [1:58:17<06:54,  5.80it/s]

Step 7592/10000, Loss: 0.0400
Step 7593/10000, Loss: 0.0299


Training Progress:  76%|██████████████████████████████████████████▌             | 7595/10000 [1:58:17<06:50,  5.86it/s]

Step 7594/10000, Loss: 0.0349
Step 7595/10000, Loss: 0.0289


Training Progress:  76%|██████████████████████████████████████████▌             | 7597/10000 [1:58:18<06:56,  5.76it/s]

Step 7596/10000, Loss: 0.0256
Step 7597/10000, Loss: 0.0292


Training Progress:  76%|██████████████████████████████████████████▌             | 7599/10000 [1:58:18<06:48,  5.88it/s]

Step 7598/10000, Loss: 0.0256
Step 7599/10000, Loss: 0.0264


Training Progress:  76%|██████████████████████████████████████████▌             | 7601/10000 [1:58:19<06:51,  5.83it/s]

Step 7600/10000, Loss: 0.0295
Step 7601/10000, Loss: 0.0265


Training Progress:  76%|██████████████████████████████████████████▌             | 7603/10000 [1:58:19<06:56,  5.75it/s]

Step 7602/10000, Loss: 0.0281
Step 7603/10000, Loss: 0.0303


Training Progress:  76%|██████████████████████████████████████████▌             | 7605/10000 [1:58:19<06:50,  5.83it/s]

Step 7604/10000, Loss: 0.0316
Step 7605/10000, Loss: 0.0298


Training Progress:  76%|██████████████████████████████████████████▌             | 7607/10000 [1:58:20<06:54,  5.78it/s]

Step 7606/10000, Loss: 0.0302
Step 7607/10000, Loss: 0.0400


Training Progress:  76%|██████████████████████████████████████████▌             | 7609/10000 [1:58:20<06:57,  5.73it/s]

Step 7608/10000, Loss: 0.0330
Step 7609/10000, Loss: 0.0416


Training Progress:  76%|██████████████████████████████████████████▌             | 7611/10000 [1:58:20<06:53,  5.78it/s]

Step 7610/10000, Loss: 0.0305
Step 7611/10000, Loss: 0.0459


Training Progress:  76%|██████████████████████████████████████████▋             | 7613/10000 [1:58:21<06:52,  5.79it/s]

Step 7612/10000, Loss: 0.0380
Step 7613/10000, Loss: 0.0287


Training Progress:  76%|██████████████████████████████████████████▋             | 7615/10000 [1:58:21<06:44,  5.89it/s]

Step 7614/10000, Loss: 0.0370
Step 7615/10000, Loss: 0.0360


Training Progress:  76%|██████████████████████████████████████████▋             | 7617/10000 [1:58:21<06:51,  5.80it/s]

Step 7616/10000, Loss: 0.0354
Step 7617/10000, Loss: 0.0319


Training Progress:  76%|██████████████████████████████████████████▋             | 7619/10000 [1:58:22<06:50,  5.80it/s]

Step 7618/10000, Loss: 0.0446
Step 7619/10000, Loss: 0.0303


Training Progress:  76%|██████████████████████████████████████████▋             | 7621/10000 [1:58:22<06:43,  5.90it/s]

Step 7620/10000, Loss: 0.0291
Step 7621/10000, Loss: 0.0326


Training Progress:  76%|██████████████████████████████████████████▋             | 7623/10000 [1:58:22<06:48,  5.81it/s]

Step 7622/10000, Loss: 0.0314
Step 7623/10000, Loss: 0.0300


Training Progress:  76%|██████████████████████████████████████████▋             | 7625/10000 [1:58:23<06:52,  5.75it/s]

Step 7624/10000, Loss: 0.0328
Step 7625/10000, Loss: 0.0317


Training Progress:  76%|██████████████████████████████████████████▋             | 7627/10000 [1:58:23<06:43,  5.88it/s]

Step 7626/10000, Loss: 0.0359
Step 7627/10000, Loss: 0.0329


Training Progress:  76%|██████████████████████████████████████████▋             | 7629/10000 [1:58:23<06:47,  5.82it/s]

Step 7628/10000, Loss: 0.0284
Step 7629/10000, Loss: 0.0363


Training Progress:  76%|██████████████████████████████████████████▋             | 7631/10000 [1:58:24<06:51,  5.76it/s]

Step 7630/10000, Loss: 0.0286
Step 7631/10000, Loss: 0.0381


Training Progress:  76%|██████████████████████████████████████████▋             | 7633/10000 [1:58:24<06:41,  5.90it/s]

Step 7632/10000, Loss: 0.0336
Step 7633/10000, Loss: 0.0353


Training Progress:  76%|██████████████████████████████████████████▊             | 7635/10000 [1:58:24<06:47,  5.80it/s]

Step 7634/10000, Loss: 0.0342
Step 7635/10000, Loss: 0.0312


Training Progress:  76%|██████████████████████████████████████████▊             | 7637/10000 [1:58:25<06:50,  5.76it/s]

Step 7636/10000, Loss: 0.0328
Step 7637/10000, Loss: 0.0353


Training Progress:  76%|██████████████████████████████████████████▊             | 7639/10000 [1:58:25<06:48,  5.79it/s]

Step 7638/10000, Loss: 0.0300
Step 7639/10000, Loss: 0.0325


Training Progress:  76%|██████████████████████████████████████████▊             | 7641/10000 [1:58:25<06:46,  5.80it/s]

Step 7640/10000, Loss: 0.0337
Step 7641/10000, Loss: 0.0263


Training Progress:  76%|██████████████████████████████████████████▊             | 7643/10000 [1:58:26<06:46,  5.79it/s]

Step 7642/10000, Loss: 0.0337
Step 7643/10000, Loss: 0.0296


Training Progress:  76%|██████████████████████████████████████████▊             | 7645/10000 [1:58:26<06:46,  5.80it/s]

Step 7644/10000, Loss: 0.0255
Step 7645/10000, Loss: 0.0336


Training Progress:  76%|██████████████████████████████████████████▊             | 7647/10000 [1:58:26<06:47,  5.77it/s]

Step 7646/10000, Loss: 0.0309
Step 7647/10000, Loss: 0.0315


Training Progress:  76%|██████████████████████████████████████████▊             | 7649/10000 [1:58:27<06:48,  5.76it/s]

Step 7648/10000, Loss: 0.0241
Step 7649/10000, Loss: 0.0265


Training Progress:  77%|██████████████████████████████████████████▊             | 7651/10000 [1:58:27<06:43,  5.82it/s]

Step 7650/10000, Loss: 0.0289
Step 7651/10000, Loss: 0.0265


Training Progress:  77%|██████████████████████████████████████████▊             | 7653/10000 [1:58:27<06:49,  5.73it/s]

Step 7652/10000, Loss: 0.0407
Step 7653/10000, Loss: 0.0333


Training Progress:  77%|██████████████████████████████████████████▊             | 7655/10000 [1:58:28<06:45,  5.78it/s]

Step 7654/10000, Loss: 0.0366
Step 7655/10000, Loss: 0.0456


Training Progress:  77%|██████████████████████████████████████████▉             | 7657/10000 [1:58:28<06:44,  5.79it/s]

Step 7656/10000, Loss: 0.0307
Step 7657/10000, Loss: 0.0275


Training Progress:  77%|██████████████████████████████████████████▉             | 7659/10000 [1:58:29<06:47,  5.75it/s]

Step 7658/10000, Loss: 0.0410
Step 7659/10000, Loss: 0.0325


Training Progress:  77%|██████████████████████████████████████████▉             | 7661/10000 [1:58:29<06:40,  5.84it/s]

Step 7660/10000, Loss: 0.0280
Step 7661/10000, Loss: 0.0323


Training Progress:  77%|██████████████████████████████████████████▉             | 7663/10000 [1:58:29<06:45,  5.76it/s]

Step 7662/10000, Loss: 0.0279
Step 7663/10000, Loss: 0.0304


Training Progress:  77%|██████████████████████████████████████████▉             | 7665/10000 [1:58:30<06:38,  5.86it/s]

Step 7664/10000, Loss: 0.0288
Step 7665/10000, Loss: 0.0282


Training Progress:  77%|██████████████████████████████████████████▉             | 7667/10000 [1:58:30<06:49,  5.70it/s]

Step 7666/10000, Loss: 0.0337
Step 7667/10000, Loss: 0.0341


Training Progress:  77%|██████████████████████████████████████████▉             | 7669/10000 [1:58:30<06:45,  5.75it/s]

Step 7668/10000, Loss: 0.0308
Step 7669/10000, Loss: 0.0299


Training Progress:  77%|██████████████████████████████████████████▉             | 7671/10000 [1:58:31<06:33,  5.92it/s]

Step 7670/10000, Loss: 0.0268
Step 7671/10000, Loss: 0.0337


Training Progress:  77%|██████████████████████████████████████████▉             | 7673/10000 [1:58:31<06:44,  5.75it/s]

Step 7672/10000, Loss: 0.0269
Step 7673/10000, Loss: 0.0290


Training Progress:  77%|██████████████████████████████████████████▉             | 7675/10000 [1:58:31<06:43,  5.77it/s]

Step 7674/10000, Loss: 0.0360
Step 7675/10000, Loss: 0.0284


Training Progress:  77%|██████████████████████████████████████████▉             | 7677/10000 [1:58:32<06:45,  5.73it/s]

Step 7676/10000, Loss: 0.0312
Step 7677/10000, Loss: 0.0246


Training Progress:  77%|███████████████████████████████████████████             | 7679/10000 [1:58:32<06:44,  5.74it/s]

Step 7678/10000, Loss: 0.0256
Step 7679/10000, Loss: 0.0323


Training Progress:  77%|███████████████████████████████████████████             | 7681/10000 [1:58:32<06:42,  5.76it/s]

Step 7680/10000, Loss: 0.0272
Step 7681/10000, Loss: 0.0258


Training Progress:  77%|███████████████████████████████████████████             | 7683/10000 [1:58:33<06:39,  5.80it/s]

Step 7682/10000, Loss: 0.0313
Step 7683/10000, Loss: 0.0250


Training Progress:  77%|███████████████████████████████████████████             | 7685/10000 [1:58:33<06:39,  5.79it/s]

Step 7684/10000, Loss: 0.0284
Step 7685/10000, Loss: 0.0284


Training Progress:  77%|███████████████████████████████████████████             | 7687/10000 [1:58:33<06:40,  5.78it/s]

Step 7686/10000, Loss: 0.0351
Step 7687/10000, Loss: 0.0264


Training Progress:  77%|███████████████████████████████████████████             | 7689/10000 [1:58:34<06:34,  5.86it/s]

Step 7688/10000, Loss: 0.0310
Step 7689/10000, Loss: 0.0440


Training Progress:  77%|███████████████████████████████████████████             | 7691/10000 [1:58:34<06:43,  5.72it/s]

Step 7690/10000, Loss: 0.0306
Step 7691/10000, Loss: 0.0524


Training Progress:  77%|███████████████████████████████████████████             | 7693/10000 [1:58:34<06:41,  5.75it/s]

Step 7692/10000, Loss: 0.0352
Step 7693/10000, Loss: 0.0692


Training Progress:  77%|███████████████████████████████████████████             | 7695/10000 [1:58:35<06:40,  5.76it/s]

Step 7694/10000, Loss: 0.0408
Step 7695/10000, Loss: 0.0298


Training Progress:  77%|███████████████████████████████████████████             | 7697/10000 [1:58:35<06:31,  5.88it/s]

Step 7696/10000, Loss: 0.0315
Step 7697/10000, Loss: 0.0378


Training Progress:  77%|███████████████████████████████████████████             | 7699/10000 [1:58:35<06:37,  5.79it/s]

Step 7698/10000, Loss: 0.0371
Step 7699/10000, Loss: 0.0307


Training Progress:  77%|███████████████████████████████████████████▏            | 7701/10000 [1:58:36<06:40,  5.74it/s]

Step 7700/10000, Loss: 0.0452
Step 7701/10000, Loss: 0.0323


Training Progress:  77%|███████████████████████████████████████████▏            | 7703/10000 [1:58:36<06:30,  5.88it/s]

Step 7702/10000, Loss: 0.0303
Step 7703/10000, Loss: 0.0374


Training Progress:  77%|███████████████████████████████████████████▏            | 7705/10000 [1:58:36<06:34,  5.82it/s]

Step 7704/10000, Loss: 0.0356
Step 7705/10000, Loss: 0.0392


Training Progress:  77%|███████████████████████████████████████████▏            | 7707/10000 [1:58:37<06:42,  5.69it/s]

Step 7706/10000, Loss: 0.0306
Step 7707/10000, Loss: 0.0359


Training Progress:  77%|███████████████████████████████████████████▏            | 7709/10000 [1:58:37<06:36,  5.78it/s]

Step 7708/10000, Loss: 0.0394
Step 7709/10000, Loss: 0.0330


Training Progress:  77%|███████████████████████████████████████████▏            | 7711/10000 [1:58:38<06:36,  5.78it/s]

Step 7710/10000, Loss: 0.0303
Step 7711/10000, Loss: 0.0365


Training Progress:  77%|███████████████████████████████████████████▏            | 7713/10000 [1:58:38<06:31,  5.84it/s]

Step 7712/10000, Loss: 0.0301
Step 7713/10000, Loss: 0.0334


Training Progress:  77%|███████████████████████████████████████████▏            | 7715/10000 [1:58:38<06:31,  5.83it/s]

Step 7714/10000, Loss: 0.0347
Step 7715/10000, Loss: 0.0342


Training Progress:  77%|███████████████████████████████████████████▏            | 7717/10000 [1:58:39<06:32,  5.81it/s]

Step 7716/10000, Loss: 0.0347
Step 7717/10000, Loss: 0.0340


Training Progress:  77%|███████████████████████████████████████████▏            | 7719/10000 [1:58:39<06:40,  5.70it/s]

Step 7718/10000, Loss: 0.0319
Step 7719/10000, Loss: 0.0379


Training Progress:  77%|███████████████████████████████████████████▏            | 7721/10000 [1:58:39<06:30,  5.83it/s]

Step 7720/10000, Loss: 0.0296
Step 7721/10000, Loss: 0.0317


Training Progress:  77%|███████████████████████████████████████████▏            | 7723/10000 [1:58:40<06:29,  5.85it/s]

Step 7722/10000, Loss: 0.0384
Step 7723/10000, Loss: 0.0312


Training Progress:  77%|███████████████████████████████████████████▎            | 7725/10000 [1:58:40<06:39,  5.70it/s]

Step 7724/10000, Loss: 0.0391
Step 7725/10000, Loss: 0.0294


Training Progress:  77%|███████████████████████████████████████████▎            | 7727/10000 [1:58:40<06:36,  5.73it/s]

Step 7726/10000, Loss: 0.0310
Step 7727/10000, Loss: 0.0330


Training Progress:  77%|███████████████████████████████████████████▎            | 7729/10000 [1:58:41<06:38,  5.70it/s]

Step 7728/10000, Loss: 0.0375
Step 7729/10000, Loss: 0.0294


Training Progress:  77%|███████████████████████████████████████████▎            | 7731/10000 [1:58:41<06:34,  5.75it/s]

Step 7730/10000, Loss: 0.0309
Step 7731/10000, Loss: 0.0260


Training Progress:  77%|███████████████████████████████████████████▎            | 7733/10000 [1:58:41<06:33,  5.76it/s]

Step 7732/10000, Loss: 0.0359
Step 7733/10000, Loss: 0.0277


Training Progress:  77%|███████████████████████████████████████████▎            | 7735/10000 [1:58:42<06:26,  5.85it/s]

Step 7734/10000, Loss: 0.0317
Step 7735/10000, Loss: 0.0386


Training Progress:  77%|███████████████████████████████████████████▎            | 7737/10000 [1:58:42<06:33,  5.75it/s]

Step 7736/10000, Loss: 0.0345
Step 7737/10000, Loss: 0.0306


Training Progress:  77%|███████████████████████████████████████████▎            | 7739/10000 [1:58:42<06:29,  5.80it/s]

Step 7738/10000, Loss: 0.0369
Step 7739/10000, Loss: 0.0275


Training Progress:  77%|███████████████████████████████████████████▎            | 7741/10000 [1:58:43<06:35,  5.72it/s]

Step 7740/10000, Loss: 0.0468
Step 7741/10000, Loss: 0.0326


Training Progress:  77%|███████████████████████████████████████████▎            | 7743/10000 [1:58:43<06:31,  5.76it/s]

Step 7742/10000, Loss: 0.0276
Step 7743/10000, Loss: 0.0294


Training Progress:  77%|███████████████████████████████████████████▎            | 7745/10000 [1:58:43<06:32,  5.74it/s]

Step 7744/10000, Loss: 0.0308
Step 7745/10000, Loss: 0.0285


Training Progress:  77%|███████████████████████████████████████████▍            | 7747/10000 [1:58:44<06:32,  5.74it/s]

Step 7746/10000, Loss: 0.0334
Step 7747/10000, Loss: 0.0333


Training Progress:  77%|███████████████████████████████████████████▍            | 7749/10000 [1:58:44<06:32,  5.74it/s]

Step 7748/10000, Loss: 0.0364
Step 7749/10000, Loss: 0.0322


Training Progress:  78%|███████████████████████████████████████████▍            | 7751/10000 [1:58:44<06:33,  5.71it/s]

Step 7750/10000, Loss: 0.0336
Step 7751/10000, Loss: 0.0267


Training Progress:  78%|███████████████████████████████████████████▍            | 7753/10000 [1:58:45<06:31,  5.74it/s]

Step 7752/10000, Loss: 0.0253
Step 7753/10000, Loss: 0.0332


Training Progress:  78%|███████████████████████████████████████████▍            | 7755/10000 [1:58:45<06:32,  5.72it/s]

Step 7754/10000, Loss: 0.0239
Step 7755/10000, Loss: 0.0292


Training Progress:  78%|███████████████████████████████████████████▍            | 7757/10000 [1:58:45<06:31,  5.72it/s]

Step 7756/10000, Loss: 0.0305
Step 7757/10000, Loss: 0.0297


Training Progress:  78%|███████████████████████████████████████████▍            | 7759/10000 [1:58:46<06:28,  5.77it/s]

Step 7758/10000, Loss: 0.0337
Step 7759/10000, Loss: 0.0283


Training Progress:  78%|███████████████████████████████████████████▍            | 7761/10000 [1:58:46<06:28,  5.77it/s]

Step 7760/10000, Loss: 0.0264
Step 7761/10000, Loss: 0.0269


Training Progress:  78%|███████████████████████████████████████████▍            | 7763/10000 [1:58:47<06:27,  5.78it/s]

Step 7762/10000, Loss: 0.0289
Step 7763/10000, Loss: 0.0250


Training Progress:  78%|███████████████████████████████████████████▍            | 7765/10000 [1:58:47<06:25,  5.79it/s]

Step 7764/10000, Loss: 0.0314
Step 7765/10000, Loss: 0.0251


Training Progress:  78%|███████████████████████████████████████████▍            | 7767/10000 [1:58:47<06:23,  5.82it/s]

Step 7766/10000, Loss: 0.0278
Step 7767/10000, Loss: 0.0308


Training Progress:  78%|███████████████████████████████████████████▌            | 7769/10000 [1:58:48<06:27,  5.76it/s]

Step 7768/10000, Loss: 0.0346
Step 7769/10000, Loss: 0.0282


Training Progress:  78%|███████████████████████████████████████████▌            | 7771/10000 [1:58:48<06:23,  5.82it/s]

Step 7770/10000, Loss: 0.0329
Step 7771/10000, Loss: 0.0364


Training Progress:  78%|███████████████████████████████████████████▌            | 7773/10000 [1:58:48<06:24,  5.79it/s]

Step 7772/10000, Loss: 0.0344
Step 7773/10000, Loss: 0.0379


Training Progress:  78%|███████████████████████████████████████████▌            | 7775/10000 [1:58:49<06:25,  5.77it/s]

Step 7774/10000, Loss: 0.0325
Step 7775/10000, Loss: 0.0737


Training Progress:  78%|███████████████████████████████████████████▌            | 7777/10000 [1:58:49<06:24,  5.78it/s]

Step 7776/10000, Loss: 0.0371
Step 7777/10000, Loss: 0.0258


Training Progress:  78%|███████████████████████████████████████████▌            | 7779/10000 [1:58:49<06:27,  5.73it/s]

Step 7778/10000, Loss: 0.0309
Step 7779/10000, Loss: 0.0340


Training Progress:  78%|███████████████████████████████████████████▌            | 7781/10000 [1:58:50<06:20,  5.84it/s]

Step 7780/10000, Loss: 0.0328
Step 7781/10000, Loss: 0.0273


Training Progress:  78%|███████████████████████████████████████████▌            | 7783/10000 [1:58:50<06:26,  5.74it/s]

Step 7782/10000, Loss: 0.0349
Step 7783/10000, Loss: 0.0354


Training Progress:  78%|███████████████████████████████████████████▌            | 7785/10000 [1:58:50<06:23,  5.77it/s]

Step 7784/10000, Loss: 0.0325
Step 7785/10000, Loss: 0.0336


Training Progress:  78%|███████████████████████████████████████████▌            | 7787/10000 [1:58:51<06:23,  5.77it/s]

Step 7786/10000, Loss: 0.0321
Step 7787/10000, Loss: 0.0288


Training Progress:  78%|███████████████████████████████████████████▌            | 7789/10000 [1:58:51<06:15,  5.89it/s]

Step 7788/10000, Loss: 0.0283
Step 7789/10000, Loss: 0.0349


Training Progress:  78%|███████████████████████████████████████████▋            | 7791/10000 [1:58:51<06:24,  5.75it/s]

Step 7790/10000, Loss: 0.0385
Step 7791/10000, Loss: 0.0300


Training Progress:  78%|███████████████████████████████████████████▋            | 7793/10000 [1:58:52<06:21,  5.78it/s]

Step 7792/10000, Loss: 0.0349
Step 7793/10000, Loss: 0.0391


Training Progress:  78%|███████████████████████████████████████████▋            | 7795/10000 [1:58:52<06:22,  5.76it/s]

Step 7794/10000, Loss: 0.0290
Step 7795/10000, Loss: 0.0390


Training Progress:  78%|███████████████████████████████████████████▋            | 7797/10000 [1:58:52<06:21,  5.78it/s]

Step 7796/10000, Loss: 0.0352
Step 7797/10000, Loss: 0.0311


Training Progress:  78%|███████████████████████████████████████████▋            | 7799/10000 [1:58:53<06:25,  5.71it/s]

Step 7798/10000, Loss: 0.0321
Step 7799/10000, Loss: 0.0328


Training Progress:  78%|███████████████████████████████████████████▋            | 7801/10000 [1:58:53<06:26,  5.68it/s]

Step 7800/10000, Loss: 0.0306
Step 7801/10000, Loss: 0.0348


Training Progress:  78%|███████████████████████████████████████████▋            | 7803/10000 [1:58:53<06:23,  5.73it/s]

Step 7802/10000, Loss: 0.0313
Step 7803/10000, Loss: 0.0338


Training Progress:  78%|███████████████████████████████████████████▋            | 7805/10000 [1:58:54<06:25,  5.70it/s]

Step 7804/10000, Loss: 0.0331
Step 7805/10000, Loss: 0.0293


Training Progress:  78%|███████████████████████████████████████████▋            | 7807/10000 [1:58:54<06:22,  5.74it/s]

Step 7806/10000, Loss: 0.0390
Step 7807/10000, Loss: 0.0286


Training Progress:  78%|███████████████████████████████████████████▋            | 7809/10000 [1:58:54<06:18,  5.78it/s]

Step 7808/10000, Loss: 0.0299
Step 7809/10000, Loss: 0.0356


Training Progress:  78%|███████████████████████████████████████████▋            | 7811/10000 [1:58:55<06:20,  5.76it/s]

Step 7810/10000, Loss: 0.0356
Step 7811/10000, Loss: 0.0348


Training Progress:  78%|███████████████████████████████████████████▊            | 7813/10000 [1:58:55<06:18,  5.77it/s]

Step 7812/10000, Loss: 0.0263
Step 7813/10000, Loss: 0.0298


Training Progress:  78%|███████████████████████████████████████████▊            | 7815/10000 [1:58:56<06:13,  5.86it/s]

Step 7814/10000, Loss: 0.0351
Step 7815/10000, Loss: 0.0316


Training Progress:  78%|███████████████████████████████████████████▊            | 7817/10000 [1:58:56<06:20,  5.73it/s]

Step 7816/10000, Loss: 0.0411
Step 7817/10000, Loss: 0.0406


Training Progress:  78%|███████████████████████████████████████████▊            | 7819/10000 [1:58:56<06:18,  5.76it/s]

Step 7818/10000, Loss: 0.0366
Step 7819/10000, Loss: 0.0355


Training Progress:  78%|███████████████████████████████████████████▊            | 7821/10000 [1:58:57<06:15,  5.80it/s]

Step 7820/10000, Loss: 0.0389
Step 7821/10000, Loss: 0.0285


Training Progress:  78%|███████████████████████████████████████████▊            | 7823/10000 [1:58:57<06:18,  5.75it/s]

Step 7822/10000, Loss: 0.0391
Step 7823/10000, Loss: 0.0338


Training Progress:  78%|███████████████████████████████████████████▊            | 7825/10000 [1:58:57<06:16,  5.77it/s]

Step 7824/10000, Loss: 0.0276
Step 7825/10000, Loss: 0.0294


Training Progress:  78%|███████████████████████████████████████████▊            | 7827/10000 [1:58:58<06:13,  5.82it/s]

Step 7826/10000, Loss: 0.0287
Step 7827/10000, Loss: 0.0277


Training Progress:  78%|███████████████████████████████████████████▊            | 7829/10000 [1:58:58<06:19,  5.71it/s]

Step 7828/10000, Loss: 0.0275
Step 7829/10000, Loss: 0.0263


Training Progress:  78%|███████████████████████████████████████████▊            | 7831/10000 [1:58:58<06:11,  5.83it/s]

Step 7830/10000, Loss: 0.0314
Step 7831/10000, Loss: 0.0318


Training Progress:  78%|███████████████████████████████████████████▊            | 7833/10000 [1:58:59<06:16,  5.75it/s]

Step 7832/10000, Loss: 0.0267
Step 7833/10000, Loss: 0.0271


Training Progress:  78%|███████████████████████████████████████████▉            | 7835/10000 [1:58:59<06:09,  5.85it/s]

Step 7834/10000, Loss: 0.0251
Step 7835/10000, Loss: 0.0328


Training Progress:  78%|███████████████████████████████████████████▉            | 7837/10000 [1:58:59<06:10,  5.84it/s]

Step 7836/10000, Loss: 0.0257
Step 7837/10000, Loss: 0.0328


Training Progress:  78%|███████████████████████████████████████████▉            | 7839/10000 [1:59:00<06:16,  5.74it/s]

Step 7838/10000, Loss: 0.0256
Step 7839/10000, Loss: 0.0293


Training Progress:  78%|███████████████████████████████████████████▉            | 7841/10000 [1:59:00<06:17,  5.71it/s]

Step 7840/10000, Loss: 0.0319
Step 7841/10000, Loss: 0.0271


Training Progress:  78%|███████████████████████████████████████████▉            | 7843/10000 [1:59:00<06:19,  5.68it/s]

Step 7842/10000, Loss: 0.0251
Step 7843/10000, Loss: 0.0326


Training Progress:  78%|███████████████████████████████████████████▉            | 7845/10000 [1:59:01<06:16,  5.73it/s]

Step 7844/10000, Loss: 0.0280
Step 7845/10000, Loss: 0.0243


Training Progress:  78%|███████████████████████████████████████████▉            | 7847/10000 [1:59:01<06:17,  5.70it/s]

Step 7846/10000, Loss: 0.0329
Step 7847/10000, Loss: 0.0252


Training Progress:  78%|███████████████████████████████████████████▉            | 7849/10000 [1:59:01<06:10,  5.81it/s]

Step 7848/10000, Loss: 0.0307
Step 7849/10000, Loss: 0.0282


Training Progress:  79%|███████████████████████████████████████████▉            | 7851/10000 [1:59:02<06:17,  5.69it/s]

Step 7850/10000, Loss: 0.0347
Step 7851/10000, Loss: 0.0288


Training Progress:  79%|███████████████████████████████████████████▉            | 7853/10000 [1:59:02<06:14,  5.74it/s]

Step 7852/10000, Loss: 0.0244
Step 7853/10000, Loss: 0.0397


Training Progress:  79%|███████████████████████████████████████████▉            | 7855/10000 [1:59:02<06:15,  5.71it/s]

Step 7854/10000, Loss: 0.0318
Step 7855/10000, Loss: 0.0414


Training Progress:  79%|███████████████████████████████████████████▉            | 7857/10000 [1:59:03<06:11,  5.77it/s]

Step 7856/10000, Loss: 0.0291
Step 7857/10000, Loss: 0.0545


Training Progress:  79%|████████████████████████████████████████████            | 7859/10000 [1:59:03<06:13,  5.73it/s]

Step 7858/10000, Loss: 0.0375
Step 7859/10000, Loss: 0.0366


Training Progress:  79%|████████████████████████████████████████████            | 7861/10000 [1:59:04<06:10,  5.77it/s]

Step 7860/10000, Loss: 0.0316
Step 7861/10000, Loss: 0.0318


Training Progress:  79%|████████████████████████████████████████████            | 7863/10000 [1:59:04<06:02,  5.89it/s]

Step 7862/10000, Loss: 0.0310
Step 7863/10000, Loss: 0.0275


Training Progress:  79%|████████████████████████████████████████████            | 7865/10000 [1:59:04<06:06,  5.83it/s]

Step 7864/10000, Loss: 0.0391
Step 7865/10000, Loss: 0.0260


Training Progress:  79%|████████████████████████████████████████████            | 7867/10000 [1:59:05<06:15,  5.68it/s]

Step 7866/10000, Loss: 0.0343
Step 7867/10000, Loss: 0.0340


Training Progress:  79%|████████████████████████████████████████████            | 7869/10000 [1:59:05<06:12,  5.72it/s]

Step 7868/10000, Loss: 0.0317
Step 7869/10000, Loss: 0.0303


Training Progress:  79%|████████████████████████████████████████████            | 7871/10000 [1:59:05<06:10,  5.75it/s]

Step 7870/10000, Loss: 0.0247
Step 7871/10000, Loss: 0.0338


Training Progress:  79%|████████████████████████████████████████████            | 7873/10000 [1:59:06<06:08,  5.78it/s]

Step 7872/10000, Loss: 0.0353
Step 7873/10000, Loss: 0.0303


Training Progress:  79%|████████████████████████████████████████████            | 7875/10000 [1:59:06<06:09,  5.75it/s]

Step 7874/10000, Loss: 0.0313
Step 7875/10000, Loss: 0.0377


Training Progress:  79%|████████████████████████████████████████████            | 7877/10000 [1:59:06<06:03,  5.85it/s]

Step 7876/10000, Loss: 0.0293
Step 7877/10000, Loss: 0.0350


Training Progress:  79%|████████████████████████████████████████████            | 7879/10000 [1:59:07<06:02,  5.85it/s]

Step 7878/10000, Loss: 0.0337
Step 7879/10000, Loss: 0.0351


Training Progress:  79%|████████████████████████████████████████████▏           | 7881/10000 [1:59:07<06:07,  5.76it/s]

Step 7880/10000, Loss: 0.0355
Step 7881/10000, Loss: 0.0317


Training Progress:  79%|████████████████████████████████████████████▏           | 7883/10000 [1:59:07<06:07,  5.77it/s]

Step 7882/10000, Loss: 0.0327
Step 7883/10000, Loss: 0.0390


Training Progress:  79%|████████████████████████████████████████████▏           | 7885/10000 [1:59:08<06:02,  5.83it/s]

Step 7884/10000, Loss: 0.0308
Step 7885/10000, Loss: 0.0287


Training Progress:  79%|████████████████████████████████████████████▏           | 7887/10000 [1:59:08<06:01,  5.84it/s]

Step 7886/10000, Loss: 0.0288
Step 7887/10000, Loss: 0.0279


Training Progress:  79%|████████████████████████████████████████████▏           | 7889/10000 [1:59:08<06:03,  5.81it/s]

Step 7888/10000, Loss: 0.0348
Step 7889/10000, Loss: 0.0260


Training Progress:  79%|████████████████████████████████████████████▏           | 7891/10000 [1:59:09<06:07,  5.74it/s]

Step 7890/10000, Loss: 0.0291
Step 7891/10000, Loss: 0.0360


Training Progress:  79%|████████████████████████████████████████████▏           | 7893/10000 [1:59:09<06:08,  5.72it/s]

Step 7892/10000, Loss: 0.0360
Step 7893/10000, Loss: 0.0293


Training Progress:  79%|████████████████████████████████████████████▏           | 7895/10000 [1:59:09<05:58,  5.87it/s]

Step 7894/10000, Loss: 0.0312
Step 7895/10000, Loss: 0.0320


Training Progress:  79%|████████████████████████████████████████████▏           | 7897/10000 [1:59:10<06:01,  5.81it/s]

Step 7896/10000, Loss: 0.0358
Step 7897/10000, Loss: 0.0292


Training Progress:  79%|████████████████████████████████████████████▏           | 7899/10000 [1:59:10<06:05,  5.74it/s]

Step 7898/10000, Loss: 0.0382
Step 7899/10000, Loss: 0.0464


Training Progress:  79%|████████████████████████████████████████████▏           | 7901/10000 [1:59:10<06:06,  5.73it/s]

Step 7900/10000, Loss: 0.0449
Step 7901/10000, Loss: 0.0366


Training Progress:  79%|████████████████████████████████████████████▎           | 7903/10000 [1:59:11<06:02,  5.78it/s]

Step 7902/10000, Loss: 0.0347
Step 7903/10000, Loss: 0.0319


Training Progress:  79%|████████████████████████████████████████████▎           | 7905/10000 [1:59:11<06:02,  5.78it/s]

Step 7904/10000, Loss: 0.0449
Step 7905/10000, Loss: 0.0330


Training Progress:  79%|████████████████████████████████████████████▎           | 7907/10000 [1:59:11<06:05,  5.72it/s]

Step 7906/10000, Loss: 0.0343
Step 7907/10000, Loss: 0.0316


Training Progress:  79%|████████████████████████████████████████████▎           | 7909/10000 [1:59:12<06:06,  5.71it/s]

Step 7908/10000, Loss: 0.0314
Step 7909/10000, Loss: 0.0298


Training Progress:  79%|████████████████████████████████████████████▎           | 7911/10000 [1:59:12<05:56,  5.86it/s]

Step 7910/10000, Loss: 0.0304
Step 7911/10000, Loss: 0.0318


Training Progress:  79%|████████████████████████████████████████████▎           | 7913/10000 [1:59:13<05:59,  5.81it/s]

Step 7912/10000, Loss: 0.0348
Step 7913/10000, Loss: 0.0295


Training Progress:  79%|████████████████████████████████████████████▎           | 7915/10000 [1:59:13<06:02,  5.75it/s]

Step 7914/10000, Loss: 0.0298
Step 7915/10000, Loss: 0.0274


Training Progress:  79%|████████████████████████████████████████████▎           | 7917/10000 [1:59:13<06:04,  5.72it/s]

Step 7916/10000, Loss: 0.0283
Step 7917/10000, Loss: 0.0322


Training Progress:  79%|████████████████████████████████████████████▎           | 7919/10000 [1:59:14<05:54,  5.86it/s]

Step 7918/10000, Loss: 0.0257
Step 7919/10000, Loss: 0.0295


Training Progress:  79%|████████████████████████████████████████████▎           | 7921/10000 [1:59:14<05:57,  5.81it/s]

Step 7920/10000, Loss: 0.0262
Step 7921/10000, Loss: 0.0301


Training Progress:  79%|████████████████████████████████████████████▎           | 7923/10000 [1:59:14<06:03,  5.71it/s]

Step 7922/10000, Loss: 0.0332
Step 7923/10000, Loss: 0.0305


Training Progress:  79%|████████████████████████████████████████████▍           | 7925/10000 [1:59:15<06:02,  5.72it/s]

Step 7924/10000, Loss: 0.0286
Step 7925/10000, Loss: 0.0316


Training Progress:  79%|████████████████████████████████████████████▍           | 7927/10000 [1:59:15<05:53,  5.86it/s]

Step 7926/10000, Loss: 0.0351
Step 7927/10000, Loss: 0.0270


Training Progress:  79%|████████████████████████████████████████████▍           | 7929/10000 [1:59:15<05:57,  5.79it/s]

Step 7928/10000, Loss: 0.0326
Step 7929/10000, Loss: 0.0258


Training Progress:  79%|████████████████████████████████████████████▍           | 7931/10000 [1:59:16<06:01,  5.72it/s]

Step 7930/10000, Loss: 0.0296
Step 7931/10000, Loss: 0.0318


Training Progress:  79%|████████████████████████████████████████████▍           | 7933/10000 [1:59:16<06:01,  5.73it/s]

Step 7932/10000, Loss: 0.0361
Step 7933/10000, Loss: 0.0299


Training Progress:  79%|████████████████████████████████████████████▍           | 7935/10000 [1:59:16<05:53,  5.85it/s]

Step 7934/10000, Loss: 0.0249
Step 7935/10000, Loss: 0.0357


Training Progress:  79%|████████████████████████████████████████████▍           | 7937/10000 [1:59:17<05:54,  5.82it/s]

Step 7936/10000, Loss: 0.0308
Step 7937/10000, Loss: 0.0381


Training Progress:  79%|████████████████████████████████████████████▍           | 7939/10000 [1:59:17<05:58,  5.74it/s]

Step 7938/10000, Loss: 0.0314
Step 7939/10000, Loss: 0.0489


Training Progress:  79%|████████████████████████████████████████████▍           | 7941/10000 [1:59:17<06:00,  5.70it/s]

Step 7940/10000, Loss: 0.0387
Step 7941/10000, Loss: 0.0414


Training Progress:  79%|████████████████████████████████████████████▍           | 7943/10000 [1:59:18<05:53,  5.81it/s]

Step 7942/10000, Loss: 0.0402
Step 7943/10000, Loss: 0.0346


Training Progress:  79%|████████████████████████████████████████████▍           | 7945/10000 [1:59:18<05:52,  5.82it/s]

Step 7944/10000, Loss: 0.0298
Step 7945/10000, Loss: 0.0252


Training Progress:  79%|████████████████████████████████████████████▌           | 7947/10000 [1:59:18<05:54,  5.78it/s]

Step 7946/10000, Loss: 0.0420
Step 7947/10000, Loss: 0.0302


Training Progress:  79%|████████████████████████████████████████████▌           | 7949/10000 [1:59:19<05:57,  5.74it/s]

Step 7948/10000, Loss: 0.0310
Step 7949/10000, Loss: 0.0332


Training Progress:  80%|████████████████████████████████████████████▌           | 7951/10000 [1:59:19<05:56,  5.74it/s]

Step 7950/10000, Loss: 0.0315
Step 7951/10000, Loss: 0.0317


Training Progress:  80%|████████████████████████████████████████████▌           | 7953/10000 [1:59:19<05:49,  5.85it/s]

Step 7952/10000, Loss: 0.0267
Step 7953/10000, Loss: 0.0335


Training Progress:  80%|████████████████████████████████████████████▌           | 7955/10000 [1:59:20<05:55,  5.76it/s]

Step 7954/10000, Loss: 0.0370
Step 7955/10000, Loss: 0.0305


Training Progress:  80%|████████████████████████████████████████████▌           | 7957/10000 [1:59:20<05:56,  5.73it/s]

Step 7956/10000, Loss: 0.0309
Step 7957/10000, Loss: 0.0362


Training Progress:  80%|████████████████████████████████████████████▌           | 7959/10000 [1:59:20<05:57,  5.72it/s]

Step 7958/10000, Loss: 0.0274
Step 7959/10000, Loss: 0.0346


Training Progress:  80%|████████████████████████████████████████████▌           | 7961/10000 [1:59:21<05:52,  5.78it/s]

Step 7960/10000, Loss: 0.0340
Step 7961/10000, Loss: 0.0321


Training Progress:  80%|████████████████████████████████████████████▌           | 7963/10000 [1:59:21<05:50,  5.81it/s]

Step 7962/10000, Loss: 0.0348
Step 7963/10000, Loss: 0.0305


Training Progress:  80%|████████████████████████████████████████████▌           | 7965/10000 [1:59:22<05:54,  5.74it/s]

Step 7964/10000, Loss: 0.0290
Step 7965/10000, Loss: 0.0381


Training Progress:  80%|████████████████████████████████████████████▌           | 7967/10000 [1:59:22<05:55,  5.72it/s]

Step 7966/10000, Loss: 0.0330
Step 7967/10000, Loss: 0.0371


Training Progress:  80%|████████████████████████████████████████████▋           | 7969/10000 [1:59:22<05:46,  5.86it/s]

Step 7968/10000, Loss: 0.0354
Step 7969/10000, Loss: 0.0311


Training Progress:  80%|████████████████████████████████████████████▋           | 7971/10000 [1:59:23<05:49,  5.81it/s]

Step 7970/10000, Loss: 0.0375
Step 7971/10000, Loss: 0.0325


Training Progress:  80%|████████████████████████████████████████████▋           | 7973/10000 [1:59:23<05:53,  5.74it/s]

Step 7972/10000, Loss: 0.0318
Step 7973/10000, Loss: 0.0348


Training Progress:  80%|████████████████████████████████████████████▋           | 7975/10000 [1:59:23<05:56,  5.68it/s]

Step 7974/10000, Loss: 0.0333
Step 7975/10000, Loss: 0.0334


Training Progress:  80%|████████████████████████████████████████████▋           | 7977/10000 [1:59:24<05:53,  5.72it/s]

Step 7976/10000, Loss: 0.0267
Step 7977/10000, Loss: 0.0294


Training Progress:  80%|████████████████████████████████████████████▋           | 7979/10000 [1:59:24<05:46,  5.84it/s]

Step 7978/10000, Loss: 0.0326
Step 7979/10000, Loss: 0.0332


Training Progress:  80%|████████████████████████████████████████████▋           | 7981/10000 [1:59:24<05:51,  5.74it/s]

Step 7980/10000, Loss: 0.0449
Step 7981/10000, Loss: 0.0374


Training Progress:  80%|████████████████████████████████████████████▋           | 7983/10000 [1:59:25<05:52,  5.72it/s]

Step 7982/10000, Loss: 0.0485
Step 7983/10000, Loss: 0.0374


Training Progress:  80%|████████████████████████████████████████████▋           | 7985/10000 [1:59:25<05:53,  5.70it/s]

Step 7984/10000, Loss: 0.0377
Step 7985/10000, Loss: 0.0324


Training Progress:  80%|████████████████████████████████████████████▋           | 7987/10000 [1:59:25<05:43,  5.86it/s]

Step 7986/10000, Loss: 0.0500
Step 7987/10000, Loss: 0.0310


Training Progress:  80%|████████████████████████████████████████████▋           | 7989/10000 [1:59:26<05:53,  5.70it/s]

Step 7988/10000, Loss: 0.0334
Step 7989/10000, Loss: 0.0358


Training Progress:  80%|████████████████████████████████████████████▋           | 7991/10000 [1:59:26<05:49,  5.74it/s]

Step 7990/10000, Loss: 0.0330
Step 7991/10000, Loss: 0.0335


Training Progress:  80%|████████████████████████████████████████████▊           | 7993/10000 [1:59:26<05:49,  5.75it/s]

Step 7992/10000, Loss: 0.0327
Step 7993/10000, Loss: 0.0334


Training Progress:  80%|████████████████████████████████████████████▊           | 7995/10000 [1:59:27<05:48,  5.75it/s]

Step 7994/10000, Loss: 0.0362
Step 7995/10000, Loss: 0.0379


Training Progress:  80%|████████████████████████████████████████████▊           | 7997/10000 [1:59:27<05:48,  5.75it/s]

Step 7996/10000, Loss: 0.0306
Step 7997/10000, Loss: 0.0349


Training Progress:  80%|████████████████████████████████████████████▊           | 7999/10000 [1:59:27<05:47,  5.77it/s]

Step 7998/10000, Loss: 0.0308
Step 7999/10000, Loss: 0.0361
Step 8000/10000, Loss: 0.0278


Training Progress:  80%|███████████████████████████████████████████▏          | 8000/10000 [1:59:42<2:30:44,  4.52s/it]


Checkpoint saved: checkpoints\checkpoint_step8000_loss0.0278_20250117_145131.pt


Training Progress:  80%|███████████████████████████████████████████▏          | 8002/10000 [1:59:43<1:18:48,  2.37s/it]

Step 8001/10000, Loss: 0.0323
Step 8002/10000, Loss: 0.0330


Training Progress:  80%|████████████████████████████████████████████▊           | 8004/10000 [1:59:43<41:30,  1.25s/it]

Step 8003/10000, Loss: 0.0313
Step 8004/10000, Loss: 0.0307


Training Progress:  80%|████████████████████████████████████████████▊           | 8006/10000 [1:59:43<23:18,  1.43it/s]

Step 8005/10000, Loss: 0.0295
Step 8006/10000, Loss: 0.0250


Training Progress:  80%|████████████████████████████████████████████▊           | 8008/10000 [1:59:44<14:23,  2.31it/s]

Step 8007/10000, Loss: 0.0430
Step 8008/10000, Loss: 0.0436


Training Progress:  80%|████████████████████████████████████████████▊           | 8010/10000 [1:59:44<09:56,  3.34it/s]

Step 8009/10000, Loss: 0.0277
Step 8010/10000, Loss: 0.0393


Training Progress:  80%|████████████████████████████████████████████▊           | 8012/10000 [1:59:44<07:44,  4.28it/s]

Step 8011/10000, Loss: 0.0291
Step 8012/10000, Loss: 0.0308


Training Progress:  80%|████████████████████████████████████████████▉           | 8014/10000 [1:59:45<06:45,  4.89it/s]

Step 8013/10000, Loss: 0.0314
Step 8014/10000, Loss: 0.0377


Training Progress:  80%|████████████████████████████████████████████▉           | 8016/10000 [1:59:45<06:16,  5.27it/s]

Step 8015/10000, Loss: 0.0334
Step 8016/10000, Loss: 0.0376


Training Progress:  80%|████████████████████████████████████████████▉           | 8018/10000 [1:59:45<05:54,  5.59it/s]

Step 8017/10000, Loss: 0.0429
Step 8018/10000, Loss: 0.0344


Training Progress:  80%|████████████████████████████████████████████▉           | 8020/10000 [1:59:46<05:50,  5.65it/s]

Step 8019/10000, Loss: 0.0450
Step 8020/10000, Loss: 0.0370


Training Progress:  80%|████████████████████████████████████████████▉           | 8022/10000 [1:59:46<05:46,  5.71it/s]

Step 8021/10000, Loss: 0.0458
Step 8022/10000, Loss: 0.0357


Training Progress:  80%|████████████████████████████████████████████▉           | 8024/10000 [1:59:47<05:47,  5.69it/s]

Step 8023/10000, Loss: 0.0381
Step 8024/10000, Loss: 0.0394


Training Progress:  80%|████████████████████████████████████████████▉           | 8026/10000 [1:59:47<05:37,  5.84it/s]

Step 8025/10000, Loss: 0.0367
Step 8026/10000, Loss: 0.0338


Training Progress:  80%|████████████████████████████████████████████▉           | 8028/10000 [1:59:47<05:42,  5.77it/s]

Step 8027/10000, Loss: 0.0304
Step 8028/10000, Loss: 0.0360


Training Progress:  80%|████████████████████████████████████████████▉           | 8030/10000 [1:59:48<05:42,  5.75it/s]

Step 8029/10000, Loss: 0.0318
Step 8030/10000, Loss: 0.0295


Training Progress:  80%|████████████████████████████████████████████▉           | 8032/10000 [1:59:48<05:39,  5.80it/s]

Step 8031/10000, Loss: 0.0326
Step 8032/10000, Loss: 0.0312


Training Progress:  80%|████████████████████████████████████████████▉           | 8034/10000 [1:59:48<05:38,  5.81it/s]

Step 8033/10000, Loss: 0.0314
Step 8034/10000, Loss: 0.0242


Training Progress:  80%|█████████████████████████████████████████████           | 8036/10000 [1:59:49<05:38,  5.80it/s]

Step 8035/10000, Loss: 0.0319
Step 8036/10000, Loss: 0.0384


Training Progress:  80%|█████████████████████████████████████████████           | 8038/10000 [1:59:49<05:41,  5.74it/s]

Step 8037/10000, Loss: 0.0308
Step 8038/10000, Loss: 0.0319


Training Progress:  80%|█████████████████████████████████████████████           | 8040/10000 [1:59:49<05:34,  5.85it/s]

Step 8039/10000, Loss: 0.0388
Step 8040/10000, Loss: 0.0334


Training Progress:  80%|█████████████████████████████████████████████           | 8042/10000 [1:59:50<05:36,  5.82it/s]

Step 8041/10000, Loss: 0.0353
Step 8042/10000, Loss: 0.0351


Training Progress:  80%|█████████████████████████████████████████████           | 8044/10000 [1:59:50<05:39,  5.76it/s]

Step 8043/10000, Loss: 0.0316
Step 8044/10000, Loss: 0.0344


Training Progress:  80%|█████████████████████████████████████████████           | 8046/10000 [1:59:50<05:40,  5.74it/s]

Step 8045/10000, Loss: 0.0335
Step 8046/10000, Loss: 0.0333


Training Progress:  80%|█████████████████████████████████████████████           | 8048/10000 [1:59:51<05:32,  5.87it/s]

Step 8047/10000, Loss: 0.0382
Step 8048/10000, Loss: 0.0331


Training Progress:  80%|█████████████████████████████████████████████           | 8050/10000 [1:59:51<05:35,  5.82it/s]

Step 8049/10000, Loss: 0.0328
Step 8050/10000, Loss: 0.0298


Training Progress:  81%|█████████████████████████████████████████████           | 8052/10000 [1:59:51<05:38,  5.75it/s]

Step 8051/10000, Loss: 0.0322
Step 8052/10000, Loss: 0.0383


Training Progress:  81%|█████████████████████████████████████████████           | 8054/10000 [1:59:52<05:39,  5.73it/s]

Step 8053/10000, Loss: 0.0372
Step 8054/10000, Loss: 0.0357


Training Progress:  81%|█████████████████████████████████████████████           | 8056/10000 [1:59:52<05:31,  5.86it/s]

Step 8055/10000, Loss: 0.0471
Step 8056/10000, Loss: 0.0499


Training Progress:  81%|█████████████████████████████████████████████           | 8058/10000 [1:59:52<05:34,  5.81it/s]

Step 8057/10000, Loss: 0.0336
Step 8058/10000, Loss: 0.0288


Training Progress:  81%|█████████████████████████████████████████████▏          | 8060/10000 [1:59:53<05:37,  5.74it/s]

Step 8059/10000, Loss: 0.0355
Step 8060/10000, Loss: 0.0388


Training Progress:  81%|█████████████████████████████████████████████▏          | 8062/10000 [1:59:53<05:29,  5.88it/s]

Step 8061/10000, Loss: 0.0330
Step 8062/10000, Loss: 0.0389


Training Progress:  81%|█████████████████████████████████████████████▏          | 8064/10000 [1:59:53<05:32,  5.82it/s]

Step 8063/10000, Loss: 0.0382
Step 8064/10000, Loss: 0.0458


Training Progress:  81%|█████████████████████████████████████████████▏          | 8066/10000 [1:59:54<05:35,  5.77it/s]

Step 8065/10000, Loss: 0.0408
Step 8066/10000, Loss: 0.0350


Training Progress:  81%|█████████████████████████████████████████████▏          | 8068/10000 [1:59:54<05:38,  5.71it/s]

Step 8067/10000, Loss: 0.0317
Step 8068/10000, Loss: 0.0403


Training Progress:  81%|█████████████████████████████████████████████▏          | 8070/10000 [1:59:54<05:29,  5.86it/s]

Step 8069/10000, Loss: 0.0395
Step 8070/10000, Loss: 0.0329


Training Progress:  81%|█████████████████████████████████████████████▏          | 8072/10000 [1:59:55<05:32,  5.80it/s]

Step 8071/10000, Loss: 0.0323
Step 8072/10000, Loss: 0.0412


Training Progress:  81%|█████████████████████████████████████████████▏          | 8074/10000 [1:59:55<05:35,  5.74it/s]

Step 8073/10000, Loss: 0.0410
Step 8074/10000, Loss: 0.0335


Training Progress:  81%|█████████████████████████████████████████████▏          | 8076/10000 [1:59:56<05:37,  5.70it/s]

Step 8075/10000, Loss: 0.0331
Step 8076/10000, Loss: 0.0343


Training Progress:  81%|█████████████████████████████████████████████▏          | 8078/10000 [1:59:56<05:28,  5.86it/s]

Step 8077/10000, Loss: 0.0338
Step 8078/10000, Loss: 0.0361


Training Progress:  81%|█████████████████████████████████████████████▏          | 8080/10000 [1:59:56<05:34,  5.73it/s]

Step 8079/10000, Loss: 0.0384
Step 8080/10000, Loss: 0.0369


Training Progress:  81%|█████████████████████████████████████████████▎          | 8082/10000 [1:59:57<05:32,  5.77it/s]

Step 8081/10000, Loss: 0.0330
Step 8082/10000, Loss: 0.0280


Training Progress:  81%|█████████████████████████████████████████████▎          | 8084/10000 [1:59:57<05:31,  5.78it/s]

Step 8083/10000, Loss: 0.0415
Step 8084/10000, Loss: 0.1355


Training Progress:  81%|█████████████████████████████████████████████▎          | 8086/10000 [1:59:57<05:27,  5.85it/s]

Step 8085/10000, Loss: 0.0704
Step 8086/10000, Loss: 0.0600


Training Progress:  81%|█████████████████████████████████████████████▎          | 8088/10000 [1:59:58<05:31,  5.78it/s]

Step 8087/10000, Loss: 0.0436
Step 8088/10000, Loss: 0.0432


Training Progress:  81%|█████████████████████████████████████████████▎          | 8090/10000 [1:59:58<05:30,  5.77it/s]

Step 8089/10000, Loss: 0.0490
Step 8090/10000, Loss: 0.0455


Training Progress:  81%|█████████████████████████████████████████████▎          | 8092/10000 [1:59:58<05:23,  5.89it/s]

Step 8091/10000, Loss: 0.0367
Step 8092/10000, Loss: 0.0473


Training Progress:  81%|█████████████████████████████████████████████▎          | 8094/10000 [1:59:59<05:27,  5.83it/s]

Step 8093/10000, Loss: 0.0336
Step 8094/10000, Loss: 0.0361


Training Progress:  81%|█████████████████████████████████████████████▎          | 8096/10000 [1:59:59<05:27,  5.81it/s]

Step 8095/10000, Loss: 0.0394
Step 8096/10000, Loss: 0.0439


Training Progress:  81%|█████████████████████████████████████████████▎          | 8098/10000 [1:59:59<05:31,  5.75it/s]

Step 8097/10000, Loss: 0.0352
Step 8098/10000, Loss: 0.0315


Training Progress:  81%|█████████████████████████████████████████████▎          | 8100/10000 [2:00:00<05:23,  5.87it/s]

Step 8099/10000, Loss: 0.0410
Step 8100/10000, Loss: 0.0404


Training Progress:  81%|█████████████████████████████████████████████▎          | 8102/10000 [2:00:00<05:29,  5.77it/s]

Step 8101/10000, Loss: 0.0495
Step 8102/10000, Loss: 0.0362


Training Progress:  81%|█████████████████████████████████████████████▍          | 8104/10000 [2:00:00<05:29,  5.76it/s]

Step 8103/10000, Loss: 0.0449
Step 8104/10000, Loss: 0.0433


Training Progress:  81%|█████████████████████████████████████████████▍          | 8106/10000 [2:00:01<05:30,  5.72it/s]

Step 8105/10000, Loss: 0.0432
Step 8106/10000, Loss: 0.0578


Training Progress:  81%|█████████████████████████████████████████████▍          | 8108/10000 [2:00:01<05:22,  5.86it/s]

Step 8107/10000, Loss: 0.0427
Step 8108/10000, Loss: 0.0358


Training Progress:  81%|█████████████████████████████████████████████▍          | 8110/10000 [2:00:01<05:25,  5.81it/s]

Step 8109/10000, Loss: 0.0355
Step 8110/10000, Loss: 0.0438


Training Progress:  81%|█████████████████████████████████████████████▍          | 8112/10000 [2:00:02<05:28,  5.74it/s]

Step 8111/10000, Loss: 0.0385
Step 8112/10000, Loss: 0.0369


Training Progress:  81%|█████████████████████████████████████████████▍          | 8114/10000 [2:00:02<05:28,  5.74it/s]

Step 8113/10000, Loss: 0.0354
Step 8114/10000, Loss: 0.0381


Training Progress:  81%|█████████████████████████████████████████████▍          | 8116/10000 [2:00:02<05:25,  5.79it/s]

Step 8115/10000, Loss: 0.0405
Step 8116/10000, Loss: 0.0351


Training Progress:  81%|█████████████████████████████████████████████▍          | 8118/10000 [2:00:03<05:27,  5.74it/s]

Step 8117/10000, Loss: 0.0386
Step 8118/10000, Loss: 0.0436


Training Progress:  81%|█████████████████████████████████████████████▍          | 8120/10000 [2:00:03<05:20,  5.87it/s]

Step 8119/10000, Loss: 0.0376
Step 8120/10000, Loss: 0.0381


Training Progress:  81%|█████████████████████████████████████████████▍          | 8122/10000 [2:00:03<05:22,  5.82it/s]

Step 8121/10000, Loss: 0.0436
Step 8122/10000, Loss: 0.0328


Training Progress:  81%|█████████████████████████████████████████████▍          | 8124/10000 [2:00:04<05:26,  5.75it/s]

Step 8123/10000, Loss: 0.0498
Step 8124/10000, Loss: 0.0426


Training Progress:  81%|█████████████████████████████████████████████▌          | 8126/10000 [2:00:04<05:28,  5.71it/s]

Step 8125/10000, Loss: 0.0393
Step 8126/10000, Loss: 0.0389


Training Progress:  81%|█████████████████████████████████████████████▌          | 8128/10000 [2:00:04<05:19,  5.85it/s]

Step 8127/10000, Loss: 0.0380
Step 8128/10000, Loss: 0.0384


Training Progress:  81%|█████████████████████████████████████████████▌          | 8130/10000 [2:00:05<05:22,  5.81it/s]

Step 8129/10000, Loss: 0.0437
Step 8130/10000, Loss: 0.0351


Training Progress:  81%|█████████████████████████████████████████████▌          | 8132/10000 [2:00:05<05:25,  5.74it/s]

Step 8131/10000, Loss: 0.0423
Step 8132/10000, Loss: 0.0372


Training Progress:  81%|█████████████████████████████████████████████▌          | 8134/10000 [2:00:06<05:26,  5.72it/s]

Step 8133/10000, Loss: 0.0326
Step 8134/10000, Loss: 0.0420


Training Progress:  81%|█████████████████████████████████████████████▌          | 8136/10000 [2:00:06<05:18,  5.85it/s]

Step 8135/10000, Loss: 0.0447
Step 8136/10000, Loss: 0.0351


Training Progress:  81%|█████████████████████████████████████████████▌          | 8138/10000 [2:00:06<05:24,  5.74it/s]

Step 8137/10000, Loss: 0.0443
Step 8138/10000, Loss: 0.1238


Training Progress:  81%|█████████████████████████████████████████████▌          | 8140/10000 [2:00:07<05:24,  5.73it/s]

Step 8139/10000, Loss: 0.0852
Step 8140/10000, Loss: 0.0578


Training Progress:  81%|█████████████████████████████████████████████▌          | 8142/10000 [2:00:07<05:16,  5.87it/s]

Step 8141/10000, Loss: 0.0470
Step 8142/10000, Loss: 0.0478


Training Progress:  81%|█████████████████████████████████████████████▌          | 8144/10000 [2:00:07<05:21,  5.77it/s]

Step 8143/10000, Loss: 0.0470
Step 8144/10000, Loss: 0.0507


Training Progress:  81%|█████████████████████████████████████████████▌          | 8146/10000 [2:00:08<05:22,  5.76it/s]

Step 8145/10000, Loss: 0.0453
Step 8146/10000, Loss: 0.0573


Training Progress:  81%|█████████████████████████████████████████████▋          | 8148/10000 [2:00:08<05:23,  5.73it/s]

Step 8147/10000, Loss: 0.0468
Step 8148/10000, Loss: 0.0473


Training Progress:  82%|█████████████████████████████████████████████▋          | 8150/10000 [2:00:08<05:16,  5.84it/s]

Step 8149/10000, Loss: 0.0479
Step 8150/10000, Loss: 0.0460


Training Progress:  82%|█████████████████████████████████████████████▋          | 8152/10000 [2:00:09<05:18,  5.81it/s]

Step 8151/10000, Loss: 0.0559
Step 8152/10000, Loss: 0.0323


Training Progress:  82%|█████████████████████████████████████████████▋          | 8154/10000 [2:00:09<05:21,  5.74it/s]

Step 8153/10000, Loss: 0.0409
Step 8154/10000, Loss: 0.0458


Training Progress:  82%|█████████████████████████████████████████████▋          | 8156/10000 [2:00:09<05:22,  5.73it/s]

Step 8155/10000, Loss: 0.0498
Step 8156/10000, Loss: 0.0393


Training Progress:  82%|█████████████████████████████████████████████▋          | 8158/10000 [2:00:10<05:14,  5.85it/s]

Step 8157/10000, Loss: 0.0457
Step 8158/10000, Loss: 0.0477


Training Progress:  82%|█████████████████████████████████████████████▋          | 8160/10000 [2:00:10<05:17,  5.80it/s]

Step 8159/10000, Loss: 0.0418
Step 8160/10000, Loss: 0.0439


Training Progress:  82%|█████████████████████████████████████████████▋          | 8162/10000 [2:00:10<05:17,  5.78it/s]

Step 8161/10000, Loss: 0.0960
Step 8162/10000, Loss: 0.0772


Training Progress:  82%|█████████████████████████████████████████████▋          | 8164/10000 [2:00:11<05:19,  5.74it/s]

Step 8163/10000, Loss: 0.0472
Step 8164/10000, Loss: 0.0446


Training Progress:  82%|█████████████████████████████████████████████▋          | 8166/10000 [2:00:11<05:12,  5.87it/s]

Step 8165/10000, Loss: 0.0556
Step 8166/10000, Loss: 0.1241


Training Progress:  82%|█████████████████████████████████████████████▋          | 8168/10000 [2:00:11<05:16,  5.78it/s]

Step 8167/10000, Loss: 0.0766
Step 8168/10000, Loss: 0.0596


Training Progress:  82%|█████████████████████████████████████████████▊          | 8170/10000 [2:00:12<05:19,  5.73it/s]

Step 8169/10000, Loss: 0.0562
Step 8170/10000, Loss: 0.0538


Training Progress:  82%|█████████████████████████████████████████████▊          | 8172/10000 [2:00:12<05:19,  5.71it/s]

Step 8171/10000, Loss: 0.0579
Step 8172/10000, Loss: 0.0555


Training Progress:  82%|█████████████████████████████████████████████▊          | 8174/10000 [2:00:12<05:11,  5.86it/s]

Step 8173/10000, Loss: 0.0531
Step 8174/10000, Loss: 0.0714


Training Progress:  82%|█████████████████████████████████████████████▊          | 8176/10000 [2:00:13<05:16,  5.76it/s]

Step 8175/10000, Loss: 0.0479
Step 8176/10000, Loss: 0.0602


Training Progress:  82%|█████████████████████████████████████████████▊          | 8178/10000 [2:00:13<05:18,  5.71it/s]

Step 8177/10000, Loss: 0.0588
Step 8178/10000, Loss: 0.0564


Training Progress:  82%|█████████████████████████████████████████████▊          | 8180/10000 [2:00:13<05:10,  5.85it/s]

Step 8179/10000, Loss: 0.0628
Step 8180/10000, Loss: 0.0573


Training Progress:  82%|█████████████████████████████████████████████▊          | 8182/10000 [2:00:14<05:14,  5.78it/s]

Step 8181/10000, Loss: 0.0644
Step 8182/10000, Loss: 0.0537


Training Progress:  82%|█████████████████████████████████████████████▊          | 8184/10000 [2:00:14<05:15,  5.75it/s]

Step 8183/10000, Loss: 0.0750
Step 8184/10000, Loss: 0.0559


Training Progress:  82%|█████████████████████████████████████████████▊          | 8186/10000 [2:00:15<05:17,  5.71it/s]

Step 8185/10000, Loss: 0.0617
Step 8186/10000, Loss: 0.0625


Training Progress:  82%|█████████████████████████████████████████████▊          | 8188/10000 [2:00:15<05:16,  5.73it/s]

Step 8187/10000, Loss: 0.0634
Step 8188/10000, Loss: 0.0781


Training Progress:  82%|█████████████████████████████████████████████▊          | 8190/10000 [2:00:15<05:11,  5.81it/s]

Step 8189/10000, Loss: 0.0673
Step 8190/10000, Loss: 0.0647


Training Progress:  82%|█████████████████████████████████████████████▉          | 8192/10000 [2:00:16<05:10,  5.82it/s]

Step 8191/10000, Loss: 0.0629
Step 8192/10000, Loss: 0.0618


Training Progress:  82%|█████████████████████████████████████████████▉          | 8194/10000 [2:00:16<05:13,  5.75it/s]

Step 8193/10000, Loss: 0.0588
Step 8194/10000, Loss: 0.0575


Training Progress:  82%|█████████████████████████████████████████████▉          | 8196/10000 [2:00:16<05:09,  5.84it/s]

Step 8195/10000, Loss: 0.0588
Step 8196/10000, Loss: 0.0649


Training Progress:  82%|█████████████████████████████████████████████▉          | 8198/10000 [2:00:17<05:08,  5.83it/s]

Step 8197/10000, Loss: 0.0618
Step 8198/10000, Loss: 0.0575


Training Progress:  82%|█████████████████████████████████████████████▉          | 8200/10000 [2:00:17<05:13,  5.75it/s]

Step 8199/10000, Loss: 0.0660
Step 8200/10000, Loss: 0.0742


Training Progress:  82%|█████████████████████████████████████████████▉          | 8202/10000 [2:00:17<05:15,  5.71it/s]

Step 8201/10000, Loss: 0.0697
Step 8202/10000, Loss: 0.0595


Training Progress:  82%|█████████████████████████████████████████████▉          | 8204/10000 [2:00:18<05:09,  5.81it/s]

Step 8203/10000, Loss: 0.0768
Step 8204/10000, Loss: 0.0622


Training Progress:  82%|█████████████████████████████████████████████▉          | 8206/10000 [2:00:18<05:07,  5.83it/s]

Step 8205/10000, Loss: 0.0805
Step 8206/10000, Loss: 0.0701


Training Progress:  82%|█████████████████████████████████████████████▉          | 8208/10000 [2:00:18<05:14,  5.70it/s]

Step 8207/10000, Loss: 0.0664
Step 8208/10000, Loss: 0.0825


Training Progress:  82%|█████████████████████████████████████████████▉          | 8210/10000 [2:00:19<05:12,  5.73it/s]

Step 8209/10000, Loss: 0.0700
Step 8210/10000, Loss: 0.0710


Training Progress:  82%|█████████████████████████████████████████████▉          | 8212/10000 [2:00:19<05:04,  5.87it/s]

Step 8211/10000, Loss: 0.0765
Step 8212/10000, Loss: 0.0688


Training Progress:  82%|█████████████████████████████████████████████▉          | 8214/10000 [2:00:19<05:06,  5.82it/s]

Step 8213/10000, Loss: 0.0777
Step 8214/10000, Loss: 0.0756


Training Progress:  82%|██████████████████████████████████████████████          | 8216/10000 [2:00:20<05:09,  5.76it/s]

Step 8215/10000, Loss: 0.0698
Step 8216/10000, Loss: 0.0958


Training Progress:  82%|██████████████████████████████████████████████          | 8218/10000 [2:00:20<05:11,  5.72it/s]

Step 8217/10000, Loss: 0.0942
Step 8218/10000, Loss: 0.0870


Training Progress:  82%|██████████████████████████████████████████████          | 8220/10000 [2:00:20<05:02,  5.88it/s]

Step 8219/10000, Loss: 0.0805
Step 8220/10000, Loss: 0.1251


Training Progress:  82%|██████████████████████████████████████████████          | 8222/10000 [2:00:21<05:05,  5.82it/s]

Step 8221/10000, Loss: 0.0983
Step 8222/10000, Loss: 0.0898


Training Progress:  82%|██████████████████████████████████████████████          | 8224/10000 [2:00:21<05:09,  5.75it/s]

Step 8223/10000, Loss: 0.0899
Step 8224/10000, Loss: 0.1116


Training Progress:  82%|██████████████████████████████████████████████          | 8226/10000 [2:00:21<05:10,  5.71it/s]

Step 8225/10000, Loss: 0.1029
Step 8226/10000, Loss: 0.0978


Training Progress:  82%|██████████████████████████████████████████████          | 8228/10000 [2:00:22<05:02,  5.85it/s]

Step 8227/10000, Loss: 0.1107
Step 8228/10000, Loss: 0.1173


Training Progress:  82%|██████████████████████████████████████████████          | 8230/10000 [2:00:22<05:08,  5.73it/s]

Step 8229/10000, Loss: 0.1143
Step 8230/10000, Loss: 0.0913


Training Progress:  82%|██████████████████████████████████████████████          | 8232/10000 [2:00:22<05:04,  5.80it/s]

Step 8231/10000, Loss: 0.1210
Step 8232/10000, Loss: 0.1303


Training Progress:  82%|██████████████████████████████████████████████          | 8234/10000 [2:00:23<05:08,  5.73it/s]

Step 8233/10000, Loss: 0.1067
Step 8234/10000, Loss: 0.1188


Training Progress:  82%|██████████████████████████████████████████████          | 8236/10000 [2:00:23<05:00,  5.88it/s]

Step 8235/10000, Loss: 0.1194
Step 8236/10000, Loss: 0.1228


Training Progress:  82%|██████████████████████████████████████████████▏         | 8238/10000 [2:00:24<05:02,  5.82it/s]

Step 8237/10000, Loss: 0.1262
Step 8238/10000, Loss: 0.1189


Training Progress:  82%|██████████████████████████████████████████████▏         | 8240/10000 [2:00:24<05:06,  5.74it/s]

Step 8239/10000, Loss: 0.1227
Step 8240/10000, Loss: 0.1387


Training Progress:  82%|██████████████████████████████████████████████▏         | 8242/10000 [2:00:24<05:07,  5.72it/s]

Step 8241/10000, Loss: 0.1658
Step 8242/10000, Loss: 0.1561


Training Progress:  82%|██████████████████████████████████████████████▏         | 8244/10000 [2:00:25<04:59,  5.87it/s]

Step 8243/10000, Loss: 0.1578
Step 8244/10000, Loss: 0.1779


Training Progress:  82%|██████████████████████████████████████████████▏         | 8246/10000 [2:00:25<05:01,  5.82it/s]

Step 8245/10000, Loss: 0.1584
Step 8246/10000, Loss: 0.1542


Training Progress:  82%|██████████████████████████████████████████████▏         | 8248/10000 [2:00:25<05:06,  5.71it/s]

Step 8247/10000, Loss: 0.2012
Step 8248/10000, Loss: 0.2192


Training Progress:  82%|██████████████████████████████████████████████▏         | 8250/10000 [2:00:26<05:06,  5.71it/s]

Step 8249/10000, Loss: 0.2409
Step 8250/10000, Loss: 0.2084


Training Progress:  83%|██████████████████████████████████████████████▏         | 8252/10000 [2:00:26<05:04,  5.75it/s]

Step 8251/10000, Loss: 0.2383
Step 8252/10000, Loss: 0.1918


Training Progress:  83%|██████████████████████████████████████████████▏         | 8254/10000 [2:00:26<05:00,  5.81it/s]

Step 8253/10000, Loss: 0.2102
Step 8254/10000, Loss: 0.1927


Training Progress:  83%|██████████████████████████████████████████████▏         | 8256/10000 [2:00:27<05:02,  5.76it/s]

Step 8255/10000, Loss: 0.1958
Step 8256/10000, Loss: 0.2262


Training Progress:  83%|██████████████████████████████████████████████▏         | 8258/10000 [2:00:27<05:05,  5.71it/s]

Step 8257/10000, Loss: 0.2069
Step 8258/10000, Loss: 0.2194


Training Progress:  83%|██████████████████████████████████████████████▎         | 8260/10000 [2:00:27<05:10,  5.60it/s]

Step 8259/10000, Loss: 0.2369
Step 8260/10000, Loss: 0.2270


Training Progress:  83%|██████████████████████████████████████████████▎         | 8262/10000 [2:00:28<05:08,  5.64it/s]

Step 8261/10000, Loss: 0.2667
Step 8262/10000, Loss: 0.2492


Training Progress:  83%|██████████████████████████████████████████████▎         | 8264/10000 [2:00:28<05:07,  5.64it/s]

Step 8263/10000, Loss: 0.2378
Step 8264/10000, Loss: 0.2493


Training Progress:  83%|██████████████████████████████████████████████▎         | 8266/10000 [2:00:28<04:56,  5.85it/s]

Step 8265/10000, Loss: 0.2921
Step 8266/10000, Loss: 0.2789


Training Progress:  83%|██████████████████████████████████████████████▎         | 8268/10000 [2:00:29<04:58,  5.81it/s]

Step 8267/10000, Loss: 0.2490
Step 8268/10000, Loss: 0.2764


Training Progress:  83%|██████████████████████████████████████████████▎         | 8270/10000 [2:00:29<05:01,  5.74it/s]

Step 8269/10000, Loss: 0.2906
Step 8270/10000, Loss: 0.3205


Training Progress:  83%|██████████████████████████████████████████████▎         | 8272/10000 [2:00:29<05:02,  5.72it/s]

Step 8271/10000, Loss: 0.3322
Step 8272/10000, Loss: 0.3293


Training Progress:  83%|██████████████████████████████████████████████▎         | 8274/10000 [2:00:30<04:54,  5.86it/s]

Step 8273/10000, Loss: 0.3361
Step 8274/10000, Loss: 0.3511


Training Progress:  83%|██████████████████████████████████████████████▎         | 8276/10000 [2:00:30<04:56,  5.81it/s]

Step 8275/10000, Loss: 0.3490
Step 8276/10000, Loss: 0.3437


Training Progress:  83%|██████████████████████████████████████████████▎         | 8278/10000 [2:00:30<04:59,  5.75it/s]

Step 8277/10000, Loss: 0.3284
Step 8278/10000, Loss: 0.4267


Training Progress:  83%|██████████████████████████████████████████████▎         | 8280/10000 [2:00:31<05:01,  5.70it/s]

Step 8279/10000, Loss: 0.4118
Step 8280/10000, Loss: 0.4231


Training Progress:  83%|██████████████████████████████████████████████▍         | 8282/10000 [2:00:31<04:53,  5.85it/s]

Step 8281/10000, Loss: 0.3959
Step 8282/10000, Loss: 0.4269


Training Progress:  83%|██████████████████████████████████████████████▍         | 8284/10000 [2:00:32<04:55,  5.81it/s]

Step 8283/10000, Loss: 0.4297
Step 8284/10000, Loss: 0.4123


Training Progress:  83%|██████████████████████████████████████████████▍         | 8286/10000 [2:00:32<04:58,  5.74it/s]

Step 8285/10000, Loss: 0.4320
Step 8286/10000, Loss: 0.4384


Training Progress:  83%|██████████████████████████████████████████████▍         | 8288/10000 [2:00:32<05:01,  5.68it/s]

Step 8287/10000, Loss: 0.5053
Step 8288/10000, Loss: 0.4560


Training Progress:  83%|██████████████████████████████████████████████▍         | 8290/10000 [2:00:33<04:58,  5.72it/s]

Step 8289/10000, Loss: 0.5104
Step 8290/10000, Loss: 0.5051


Training Progress:  83%|██████████████████████████████████████████████▍         | 8292/10000 [2:00:33<04:52,  5.85it/s]

Step 8291/10000, Loss: 0.5010
Step 8292/10000, Loss: 0.5822


Training Progress:  83%|██████████████████████████████████████████████▍         | 8294/10000 [2:00:33<04:58,  5.71it/s]

Step 8293/10000, Loss: 0.5712
Step 8294/10000, Loss: 0.5306


Training Progress:  83%|██████████████████████████████████████████████▍         | 8296/10000 [2:00:34<04:57,  5.73it/s]

Step 8295/10000, Loss: 0.6164
Step 8296/10000, Loss: 0.5911


Training Progress:  83%|██████████████████████████████████████████████▍         | 8298/10000 [2:00:34<04:50,  5.86it/s]

Step 8297/10000, Loss: 0.5985
Step 8298/10000, Loss: 0.6733


Training Progress:  83%|██████████████████████████████████████████████▍         | 8300/10000 [2:00:34<04:57,  5.71it/s]

Step 8299/10000, Loss: 0.6616
Step 8300/10000, Loss: 0.6864


Training Progress:  83%|██████████████████████████████████████████████▍         | 8302/10000 [2:00:35<04:52,  5.81it/s]

Step 8301/10000, Loss: 0.6592
Step 8302/10000, Loss: 0.7240


Training Progress:  83%|██████████████████████████████████████████████▌         | 8304/10000 [2:00:35<04:55,  5.74it/s]

Step 8303/10000, Loss: 0.6985
Step 8304/10000, Loss: 0.6886


Training Progress:  83%|██████████████████████████████████████████████▌         | 8306/10000 [2:00:35<04:48,  5.87it/s]

Step 8305/10000, Loss: 0.7278
Step 8306/10000, Loss: 0.7087


Training Progress:  83%|██████████████████████████████████████████████▌         | 8308/10000 [2:00:36<04:50,  5.82it/s]

Step 8307/10000, Loss: 0.6967
Step 8308/10000, Loss: 0.7471


Training Progress:  83%|██████████████████████████████████████████████▌         | 8310/10000 [2:00:36<04:54,  5.74it/s]

Step 8309/10000, Loss: 0.8031
Step 8310/10000, Loss: 0.7560


Training Progress:  83%|██████████████████████████████████████████████▌         | 8312/10000 [2:00:36<04:56,  5.69it/s]

Step 8311/10000, Loss: 0.8277
Step 8312/10000, Loss: 0.8035


Training Progress:  83%|██████████████████████████████████████████████▌         | 8314/10000 [2:00:37<04:55,  5.71it/s]

Step 8313/10000, Loss: 0.8984
Step 8314/10000, Loss: 0.8325


Training Progress:  83%|██████████████████████████████████████████████▌         | 8316/10000 [2:00:37<04:48,  5.84it/s]

Step 8315/10000, Loss: 0.8076
Step 8316/10000, Loss: 0.9213


Training Progress:  83%|██████████████████████████████████████████████▌         | 8318/10000 [2:00:37<04:49,  5.81it/s]

Step 8317/10000, Loss: 0.8398
Step 8318/10000, Loss: 0.8599


Training Progress:  83%|██████████████████████████████████████████████▌         | 8320/10000 [2:00:38<04:52,  5.74it/s]

Step 8319/10000, Loss: 0.8416
Step 8320/10000, Loss: 0.9096


Training Progress:  83%|██████████████████████████████████████████████▌         | 8322/10000 [2:00:38<04:53,  5.71it/s]

Step 8321/10000, Loss: 0.9643
Step 8322/10000, Loss: 0.9366


Training Progress:  83%|██████████████████████████████████████████████▌         | 8324/10000 [2:00:38<04:45,  5.86it/s]

Step 8323/10000, Loss: 0.9311
Step 8324/10000, Loss: 0.9495


Training Progress:  83%|██████████████████████████████████████████████▋         | 8326/10000 [2:00:39<04:47,  5.81it/s]

Step 8325/10000, Loss: 0.9932
Step 8326/10000, Loss: 1.0076


Training Progress:  83%|██████████████████████████████████████████████▋         | 8328/10000 [2:00:39<04:51,  5.74it/s]

Step 8327/10000, Loss: 1.0297
Step 8328/10000, Loss: 1.0221


Training Progress:  83%|██████████████████████████████████████████████▋         | 8330/10000 [2:00:39<04:52,  5.70it/s]

Step 8329/10000, Loss: 1.1636
Step 8330/10000, Loss: 1.0551


Training Progress:  83%|██████████████████████████████████████████████▋         | 8332/10000 [2:00:40<04:44,  5.85it/s]

Step 8331/10000, Loss: 1.1058
Step 8332/10000, Loss: 1.1046


Training Progress:  83%|██████████████████████████████████████████████▋         | 8334/10000 [2:00:40<04:47,  5.79it/s]

Step 8333/10000, Loss: 1.0783
Step 8334/10000, Loss: 1.0861


Training Progress:  83%|██████████████████████████████████████████████▋         | 8336/10000 [2:00:41<04:48,  5.76it/s]

Step 8335/10000, Loss: 1.1118
Step 8336/10000, Loss: 1.1254


Training Progress:  83%|██████████████████████████████████████████████▋         | 8338/10000 [2:00:41<04:52,  5.69it/s]

Step 8337/10000, Loss: 1.1889
Step 8338/10000, Loss: 1.2625


Training Progress:  83%|██████████████████████████████████████████████▋         | 8340/10000 [2:00:41<04:51,  5.70it/s]

Step 8339/10000, Loss: 1.2272
Step 8340/10000, Loss: 1.1401


Training Progress:  83%|██████████████████████████████████████████████▋         | 8342/10000 [2:00:42<04:44,  5.84it/s]

Step 8341/10000, Loss: 1.2198
Step 8342/10000, Loss: 1.1145


Training Progress:  83%|██████████████████████████████████████████████▋         | 8344/10000 [2:00:42<04:47,  5.75it/s]

Step 8343/10000, Loss: 1.2046
Step 8344/10000, Loss: 1.2111


Training Progress:  83%|██████████████████████████████████████████████▋         | 8346/10000 [2:00:42<04:50,  5.69it/s]

Step 8345/10000, Loss: 1.0713
Step 8346/10000, Loss: 1.1431


Training Progress:  83%|██████████████████████████████████████████████▋         | 8348/10000 [2:00:43<04:41,  5.86it/s]

Step 8347/10000, Loss: 1.1947
Step 8348/10000, Loss: 1.1820


Training Progress:  84%|██████████████████████████████████████████████▊         | 8350/10000 [2:00:43<04:43,  5.81it/s]

Step 8349/10000, Loss: 1.0605
Step 8350/10000, Loss: 1.1257


Training Progress:  84%|██████████████████████████████████████████████▊         | 8352/10000 [2:00:43<04:47,  5.73it/s]

Step 8351/10000, Loss: 1.1166
Step 8352/10000, Loss: 1.1363


Training Progress:  84%|██████████████████████████████████████████████▊         | 8354/10000 [2:00:44<04:45,  5.76it/s]

Step 8353/10000, Loss: 1.1569
Step 8354/10000, Loss: 1.3081


Training Progress:  84%|██████████████████████████████████████████████▊         | 8356/10000 [2:00:44<04:38,  5.90it/s]

Step 8355/10000, Loss: 1.2459
Step 8356/10000, Loss: 1.2437


Training Progress:  84%|██████████████████████████████████████████████▊         | 8358/10000 [2:00:44<04:45,  5.75it/s]

Step 8357/10000, Loss: 1.2066
Step 8358/10000, Loss: 1.2093


Training Progress:  84%|██████████████████████████████████████████████▊         | 8360/10000 [2:00:45<04:44,  5.76it/s]

Step 8359/10000, Loss: 1.1186
Step 8360/10000, Loss: 1.2132


Training Progress:  84%|██████████████████████████████████████████████▊         | 8362/10000 [2:00:45<04:42,  5.79it/s]

Step 8361/10000, Loss: 1.1744
Step 8362/10000, Loss: 1.2116


Training Progress:  84%|██████████████████████████████████████████████▊         | 8364/10000 [2:00:45<04:43,  5.78it/s]

Step 8363/10000, Loss: 1.3462
Step 8364/10000, Loss: 1.3334


Training Progress:  84%|██████████████████████████████████████████████▊         | 8366/10000 [2:00:46<04:42,  5.78it/s]

Step 8365/10000, Loss: 1.3856
Step 8366/10000, Loss: 1.3578


Training Progress:  84%|██████████████████████████████████████████████▊         | 8368/10000 [2:00:46<04:41,  5.79it/s]

Step 8367/10000, Loss: 1.3261
Step 8368/10000, Loss: 1.2678


Training Progress:  84%|██████████████████████████████████████████████▊         | 8370/10000 [2:00:46<04:35,  5.92it/s]

Step 8369/10000, Loss: 1.3141
Step 8370/10000, Loss: 1.2051


Training Progress:  84%|██████████████████████████████████████████████▉         | 8372/10000 [2:00:47<04:42,  5.75it/s]

Step 8371/10000, Loss: 1.2995
Step 8372/10000, Loss: 1.2575


Training Progress:  84%|██████████████████████████████████████████████▉         | 8374/10000 [2:00:47<04:41,  5.77it/s]

Step 8373/10000, Loss: 1.2959
Step 8374/10000, Loss: 1.3512


Training Progress:  84%|██████████████████████████████████████████████▉         | 8376/10000 [2:00:47<04:44,  5.72it/s]

Step 8375/10000, Loss: 1.4077
Step 8376/10000, Loss: 1.3858


Training Progress:  84%|██████████████████████████████████████████████▉         | 8378/10000 [2:00:48<04:41,  5.75it/s]

Step 8377/10000, Loss: 1.4643
Step 8378/10000, Loss: 1.4302


Training Progress:  84%|██████████████████████████████████████████████▉         | 8380/10000 [2:00:48<04:43,  5.71it/s]

Step 8379/10000, Loss: 1.3112
Step 8380/10000, Loss: 1.3875


Training Progress:  84%|██████████████████████████████████████████████▉         | 8382/10000 [2:00:48<04:38,  5.81it/s]

Step 8381/10000, Loss: 1.3299
Step 8382/10000, Loss: 1.3073


Training Progress:  84%|██████████████████████████████████████████████▉         | 8384/10000 [2:00:49<04:37,  5.82it/s]

Step 8383/10000, Loss: 1.3407
Step 8384/10000, Loss: 1.2260


Training Progress:  84%|██████████████████████████████████████████████▉         | 8386/10000 [2:00:49<04:43,  5.70it/s]

Step 8385/10000, Loss: 1.2779
Step 8386/10000, Loss: 1.2872


Training Progress:  84%|██████████████████████████████████████████████▉         | 8388/10000 [2:00:50<04:42,  5.71it/s]

Step 8387/10000, Loss: 1.3431
Step 8388/10000, Loss: 1.3664


Training Progress:  84%|██████████████████████████████████████████████▉         | 8390/10000 [2:00:50<04:34,  5.86it/s]

Step 8389/10000, Loss: 1.3112
Step 8390/10000, Loss: 1.3112


Training Progress:  84%|██████████████████████████████████████████████▉         | 8392/10000 [2:00:50<04:36,  5.82it/s]

Step 8391/10000, Loss: 1.3876
Step 8392/10000, Loss: 1.3053


Training Progress:  84%|███████████████████████████████████████████████         | 8394/10000 [2:00:51<04:39,  5.75it/s]

Step 8393/10000, Loss: 1.3677
Step 8394/10000, Loss: 1.3022


Training Progress:  84%|███████████████████████████████████████████████         | 8396/10000 [2:00:51<04:40,  5.71it/s]

Step 8395/10000, Loss: 1.3066
Step 8396/10000, Loss: 1.2734


Training Progress:  84%|███████████████████████████████████████████████         | 8398/10000 [2:00:51<04:35,  5.82it/s]

Step 8397/10000, Loss: 1.2685
Step 8398/10000, Loss: 1.3298


Training Progress:  84%|███████████████████████████████████████████████         | 8400/10000 [2:00:52<04:34,  5.82it/s]

Step 8399/10000, Loss: 1.2980
Step 8400/10000, Loss: 1.3578


Training Progress:  84%|███████████████████████████████████████████████         | 8402/10000 [2:00:52<04:37,  5.75it/s]

Step 8401/10000, Loss: 1.2689
Step 8402/10000, Loss: 1.2703


Training Progress:  84%|███████████████████████████████████████████████         | 8404/10000 [2:00:52<04:33,  5.83it/s]

Step 8403/10000, Loss: 1.3088
Step 8404/10000, Loss: 1.3372


Training Progress:  84%|███████████████████████████████████████████████         | 8406/10000 [2:00:53<04:32,  5.84it/s]

Step 8405/10000, Loss: 1.2516
Step 8406/10000, Loss: 1.2979


Training Progress:  84%|███████████████████████████████████████████████         | 8408/10000 [2:00:53<04:37,  5.75it/s]

Step 8407/10000, Loss: 1.3159
Step 8408/10000, Loss: 1.3149


Training Progress:  84%|███████████████████████████████████████████████         | 8410/10000 [2:00:53<04:36,  5.76it/s]

Step 8409/10000, Loss: 1.3957
Step 8410/10000, Loss: 1.2805


Training Progress:  84%|███████████████████████████████████████████████         | 8412/10000 [2:00:54<04:38,  5.71it/s]

Step 8411/10000, Loss: 1.3495
Step 8412/10000, Loss: 1.2323


Training Progress:  84%|███████████████████████████████████████████████         | 8414/10000 [2:00:54<04:30,  5.86it/s]

Step 8413/10000, Loss: 1.3397
Step 8414/10000, Loss: 1.2588


Training Progress:  84%|███████████████████████████████████████████████▏        | 8416/10000 [2:00:54<04:31,  5.83it/s]

Step 8415/10000, Loss: 1.2576
Step 8416/10000, Loss: 1.1810


Training Progress:  84%|███████████████████████████████████████████████▏        | 8418/10000 [2:00:55<04:33,  5.79it/s]

Step 8417/10000, Loss: 1.3890
Step 8418/10000, Loss: 1.3515


Training Progress:  84%|███████████████████████████████████████████████▏        | 8420/10000 [2:00:55<04:35,  5.74it/s]

Step 8419/10000, Loss: 1.3243
Step 8420/10000, Loss: 1.4344


Training Progress:  84%|███████████████████████████████████████████████▏        | 8422/10000 [2:00:55<04:34,  5.74it/s]

Step 8421/10000, Loss: 1.4251
Step 8422/10000, Loss: 1.2754


Training Progress:  84%|███████████████████████████████████████████████▏        | 8424/10000 [2:00:56<04:28,  5.86it/s]

Step 8423/10000, Loss: 1.3407
Step 8424/10000, Loss: 1.1362


Training Progress:  84%|███████████████████████████████████████████████▏        | 8426/10000 [2:00:56<04:30,  5.81it/s]

Step 8425/10000, Loss: 1.2868
Step 8426/10000, Loss: 1.2604


Training Progress:  84%|███████████████████████████████████████████████▏        | 8428/10000 [2:00:56<04:35,  5.71it/s]

Step 8427/10000, Loss: 1.1453
Step 8428/10000, Loss: 1.0842


Training Progress:  84%|███████████████████████████████████████████████▏        | 8430/10000 [2:00:57<04:34,  5.72it/s]

Step 8429/10000, Loss: 1.2067
Step 8430/10000, Loss: 1.1787


Training Progress:  84%|███████████████████████████████████████████████▏        | 8432/10000 [2:00:57<04:27,  5.86it/s]

Step 8431/10000, Loss: 1.0947
Step 8432/10000, Loss: 1.1517


Training Progress:  84%|███████████████████████████████████████████████▏        | 8434/10000 [2:00:57<04:32,  5.74it/s]

Step 8433/10000, Loss: 1.1557
Step 8434/10000, Loss: 1.1077


Training Progress:  84%|███████████████████████████████████████████████▏        | 8436/10000 [2:00:58<04:31,  5.76it/s]

Step 8435/10000, Loss: 1.1076
Step 8436/10000, Loss: 1.2840


Training Progress:  84%|███████████████████████████████████████████████▎        | 8438/10000 [2:00:58<04:33,  5.71it/s]

Step 8437/10000, Loss: 1.1697
Step 8438/10000, Loss: 1.1881


Training Progress:  84%|███████████████████████████████████████████████▎        | 8440/10000 [2:00:59<04:26,  5.84it/s]

Step 8439/10000, Loss: 1.1291
Step 8440/10000, Loss: 1.0970


Training Progress:  84%|███████████████████████████████████████████████▎        | 8442/10000 [2:00:59<04:27,  5.82it/s]

Step 8441/10000, Loss: 1.0574
Step 8442/10000, Loss: 1.0273


Training Progress:  84%|███████████████████████████████████████████████▎        | 8444/10000 [2:00:59<04:32,  5.72it/s]

Step 8443/10000, Loss: 1.0281
Step 8444/10000, Loss: 1.0074


Training Progress:  84%|███████████████████████████████████████████████▎        | 8446/10000 [2:01:00<04:29,  5.77it/s]

Step 8445/10000, Loss: 1.2314
Step 8446/10000, Loss: 1.1497


Training Progress:  84%|███████████████████████████████████████████████▎        | 8448/10000 [2:01:00<04:31,  5.72it/s]

Step 8447/10000, Loss: 1.1327
Step 8448/10000, Loss: 1.1295


Training Progress:  84%|███████████████████████████████████████████████▎        | 8450/10000 [2:01:00<04:25,  5.83it/s]

Step 8449/10000, Loss: 1.1580
Step 8450/10000, Loss: 1.1277


Training Progress:  85%|███████████████████████████████████████████████▎        | 8452/10000 [2:01:01<04:26,  5.82it/s]

Step 8451/10000, Loss: 1.1424
Step 8452/10000, Loss: 0.9822


Training Progress:  85%|███████████████████████████████████████████████▎        | 8454/10000 [2:01:01<04:28,  5.75it/s]

Step 8453/10000, Loss: 1.0448
Step 8454/10000, Loss: 1.0614


Training Progress:  85%|███████████████████████████████████████████████▎        | 8456/10000 [2:01:01<04:27,  5.78it/s]

Step 8455/10000, Loss: 1.0149
Step 8456/10000, Loss: 1.0669


Training Progress:  85%|███████████████████████████████████████████████▎        | 8458/10000 [2:01:02<04:23,  5.84it/s]

Step 8457/10000, Loss: 1.0821
Step 8458/10000, Loss: 1.1054


Training Progress:  85%|███████████████████████████████████████████████▍        | 8460/10000 [2:01:02<04:29,  5.71it/s]

Step 8459/10000, Loss: 1.0902
Step 8460/10000, Loss: 1.1585


Training Progress:  85%|███████████████████████████████████████████████▍        | 8462/10000 [2:01:02<04:26,  5.77it/s]

Step 8461/10000, Loss: 1.0376
Step 8462/10000, Loss: 1.1285


Training Progress:  85%|███████████████████████████████████████████████▍        | 8464/10000 [2:01:03<04:28,  5.72it/s]

Step 8463/10000, Loss: 1.0249
Step 8464/10000, Loss: 1.0314


Training Progress:  85%|███████████████████████████████████████████████▍        | 8466/10000 [2:01:03<04:29,  5.70it/s]

Step 8465/10000, Loss: 1.0477
Step 8466/10000, Loss: 1.0185


Training Progress:  85%|███████████████████████████████████████████████▍        | 8468/10000 [2:01:03<04:21,  5.86it/s]

Step 8467/10000, Loss: 1.0119
Step 8468/10000, Loss: 1.0127


Training Progress:  85%|███████████████████████████████████████████████▍        | 8470/10000 [2:01:04<04:23,  5.81it/s]

Step 8469/10000, Loss: 1.0274
Step 8470/10000, Loss: 1.0675


Training Progress:  85%|███████████████████████████████████████████████▍        | 8472/10000 [2:01:04<04:24,  5.77it/s]

Step 8471/10000, Loss: 1.0839
Step 8472/10000, Loss: 1.0506


Training Progress:  85%|███████████████████████████████████████████████▍        | 8474/10000 [2:01:04<04:25,  5.75it/s]

Step 8473/10000, Loss: 1.0333
Step 8474/10000, Loss: 0.9961


Training Progress:  85%|███████████████████████████████████████████████▍        | 8476/10000 [2:01:05<04:19,  5.88it/s]

Step 8475/10000, Loss: 0.9744
Step 8476/10000, Loss: 1.0003


Training Progress:  85%|███████████████████████████████████████████████▍        | 8478/10000 [2:01:05<04:21,  5.81it/s]

Step 8477/10000, Loss: 0.9369
Step 8478/10000, Loss: 0.9526


Training Progress:  85%|███████████████████████████████████████████████▍        | 8480/10000 [2:01:05<04:24,  5.75it/s]

Step 8479/10000, Loss: 0.8879
Step 8480/10000, Loss: 1.0259


Training Progress:  85%|███████████████████████████████████████████████▍        | 8482/10000 [2:01:06<04:25,  5.71it/s]

Step 8481/10000, Loss: 0.9757
Step 8482/10000, Loss: 0.9204


Training Progress:  85%|███████████████████████████████████████████████▌        | 8484/10000 [2:01:06<04:25,  5.71it/s]

Step 8483/10000, Loss: 0.9249
Step 8484/10000, Loss: 0.9173


Training Progress:  85%|███████████████████████████████████████████████▌        | 8486/10000 [2:01:06<04:19,  5.84it/s]

Step 8485/10000, Loss: 0.9916
Step 8486/10000, Loss: 0.9070


Training Progress:  85%|███████████████████████████████████████████████▌        | 8488/10000 [2:01:07<04:22,  5.76it/s]

Step 8487/10000, Loss: 0.8469
Step 8488/10000, Loss: 0.8901


Training Progress:  85%|███████████████████████████████████████████████▌        | 8490/10000 [2:01:07<04:24,  5.70it/s]

Step 8489/10000, Loss: 0.9177
Step 8490/10000, Loss: 0.9300


Training Progress:  85%|███████████████████████████████████████████████▌        | 8492/10000 [2:01:08<04:17,  5.86it/s]

Step 8491/10000, Loss: 0.9674
Step 8492/10000, Loss: 0.9792


Training Progress:  85%|███████████████████████████████████████████████▌        | 8494/10000 [2:01:08<04:19,  5.80it/s]

Step 8493/10000, Loss: 0.9479
Step 8494/10000, Loss: 0.8929


Training Progress:  85%|███████████████████████████████████████████████▌        | 8496/10000 [2:01:08<04:22,  5.72it/s]

Step 8495/10000, Loss: 0.9506
Step 8496/10000, Loss: 0.8791


Training Progress:  85%|███████████████████████████████████████████████▌        | 8498/10000 [2:01:09<04:21,  5.74it/s]

Step 8497/10000, Loss: 0.8485
Step 8498/10000, Loss: 0.8523


Training Progress:  85%|███████████████████████████████████████████████▌        | 8500/10000 [2:01:09<04:21,  5.73it/s]

Step 8499/10000, Loss: 0.9495
Step 8500/10000, Loss: 0.9407


Training Progress:  85%|███████████████████████████████████████████████▌        | 8502/10000 [2:01:09<04:16,  5.83it/s]

Step 8501/10000, Loss: 0.8825
Step 8502/10000, Loss: 0.9853


Training Progress:  85%|███████████████████████████████████████████████▌        | 8504/10000 [2:01:10<04:16,  5.82it/s]

Step 8503/10000, Loss: 0.9760
Step 8504/10000, Loss: 0.8415


Training Progress:  85%|███████████████████████████████████████████████▋        | 8506/10000 [2:01:10<04:19,  5.75it/s]

Step 8505/10000, Loss: 0.8715
Step 8506/10000, Loss: 0.8505


Training Progress:  85%|███████████████████████████████████████████████▋        | 8508/10000 [2:01:10<04:21,  5.70it/s]

Step 8507/10000, Loss: 0.8807
Step 8508/10000, Loss: 0.8136


Training Progress:  85%|███████████████████████████████████████████████▋        | 8510/10000 [2:01:11<04:15,  5.83it/s]

Step 8509/10000, Loss: 0.7555
Step 8510/10000, Loss: 0.7190


Training Progress:  85%|███████████████████████████████████████████████▋        | 8512/10000 [2:01:11<04:15,  5.82it/s]

Step 8511/10000, Loss: 0.8366
Step 8512/10000, Loss: 0.8209


Training Progress:  85%|███████████████████████████████████████████████▋        | 8514/10000 [2:01:11<04:16,  5.80it/s]

Step 8513/10000, Loss: 0.7305
Step 8514/10000, Loss: 0.8240


Training Progress:  85%|███████████████████████████████████████████████▋        | 8516/10000 [2:01:12<04:18,  5.73it/s]

Step 8515/10000, Loss: 0.7834
Step 8516/10000, Loss: 0.7706


Training Progress:  85%|███████████████████████████████████████████████▋        | 8518/10000 [2:01:12<04:12,  5.86it/s]

Step 8517/10000, Loss: 0.7418
Step 8518/10000, Loss: 0.8869


Training Progress:  85%|███████████████████████████████████████████████▋        | 8520/10000 [2:01:12<04:14,  5.82it/s]

Step 8519/10000, Loss: 0.8007
Step 8520/10000, Loss: 0.8059


Training Progress:  85%|███████████████████████████████████████████████▋        | 8522/10000 [2:01:13<04:15,  5.79it/s]

Step 8521/10000, Loss: 0.7934
Step 8522/10000, Loss: 0.7467


Training Progress:  85%|███████████████████████████████████████████████▋        | 8524/10000 [2:01:13<04:17,  5.72it/s]

Step 8523/10000, Loss: 0.7264
Step 8524/10000, Loss: 0.7044


Training Progress:  85%|███████████████████████████████████████████████▋        | 8526/10000 [2:01:13<04:16,  5.74it/s]

Step 8525/10000, Loss: 0.7029
Step 8526/10000, Loss: 0.6436


Training Progress:  85%|███████████████████████████████████████████████▊        | 8528/10000 [2:01:14<04:10,  5.87it/s]

Step 8527/10000, Loss: 0.7904
Step 8528/10000, Loss: 0.7333


Training Progress:  85%|███████████████████████████████████████████████▊        | 8530/10000 [2:01:14<04:13,  5.81it/s]

Step 8529/10000, Loss: 0.7887
Step 8530/10000, Loss: 0.7137


Training Progress:  85%|███████████████████████████████████████████████▊        | 8532/10000 [2:01:14<04:15,  5.75it/s]

Step 8531/10000, Loss: 0.7636
Step 8532/10000, Loss: 0.7664


Training Progress:  85%|███████████████████████████████████████████████▊        | 8534/10000 [2:01:15<04:16,  5.72it/s]

Step 8533/10000, Loss: 0.7880
Step 8534/10000, Loss: 0.6572


Training Progress:  85%|███████████████████████████████████████████████▊        | 8536/10000 [2:01:15<04:09,  5.86it/s]

Step 8535/10000, Loss: 0.7512
Step 8536/10000, Loss: 0.7089


Training Progress:  85%|███████████████████████████████████████████████▊        | 8538/10000 [2:01:15<04:11,  5.82it/s]

Step 8537/10000, Loss: 0.6682
Step 8538/10000, Loss: 0.6915


Training Progress:  85%|███████████████████████████████████████████████▊        | 8540/10000 [2:01:16<04:14,  5.74it/s]

Step 8539/10000, Loss: 0.7089
Step 8540/10000, Loss: 0.6646


Training Progress:  85%|███████████████████████████████████████████████▊        | 8542/10000 [2:01:16<04:15,  5.71it/s]

Step 8541/10000, Loss: 0.6810
Step 8542/10000, Loss: 0.7282


Training Progress:  85%|███████████████████████████████████████████████▊        | 8544/10000 [2:01:17<04:15,  5.70it/s]

Step 8543/10000, Loss: 0.6556
Step 8544/10000, Loss: 0.6943


Training Progress:  85%|███████████████████████████████████████████████▊        | 8546/10000 [2:01:17<04:08,  5.85it/s]

Step 8545/10000, Loss: 0.6641
Step 8546/10000, Loss: 0.6418


Training Progress:  85%|███████████████████████████████████████████████▊        | 8548/10000 [2:01:17<04:15,  5.69it/s]

Step 8547/10000, Loss: 0.6953
Step 8548/10000, Loss: 0.6718


Training Progress:  86%|███████████████████████████████████████████████▉        | 8550/10000 [2:01:18<04:13,  5.73it/s]

Step 8549/10000, Loss: 0.6961
Step 8550/10000, Loss: 0.6591


Training Progress:  86%|███████████████████████████████████████████████▉        | 8552/10000 [2:01:18<04:13,  5.71it/s]

Step 8551/10000, Loss: 0.6465
Step 8552/10000, Loss: 0.6545


Training Progress:  86%|███████████████████████████████████████████████▉        | 8554/10000 [2:01:18<04:08,  5.82it/s]

Step 8553/10000, Loss: 0.6753
Step 8554/10000, Loss: 0.6630


Training Progress:  86%|███████████████████████████████████████████████▉        | 8556/10000 [2:01:19<04:08,  5.80it/s]

Step 8555/10000, Loss: 0.6466
Step 8556/10000, Loss: 0.6325


Training Progress:  86%|███████████████████████████████████████████████▉        | 8558/10000 [2:01:19<04:10,  5.75it/s]

Step 8557/10000, Loss: 0.6280
Step 8558/10000, Loss: 0.5864


Training Progress:  86%|███████████████████████████████████████████████▉        | 8560/10000 [2:01:19<04:13,  5.69it/s]

Step 8559/10000, Loss: 0.5933
Step 8560/10000, Loss: 0.6017


Training Progress:  86%|███████████████████████████████████████████████▉        | 8562/10000 [2:01:20<04:12,  5.70it/s]

Step 8561/10000, Loss: 0.5781
Step 8562/10000, Loss: 0.6629


Training Progress:  86%|███████████████████████████████████████████████▉        | 8564/10000 [2:01:20<04:05,  5.85it/s]

Step 8563/10000, Loss: 0.5982
Step 8564/10000, Loss: 0.5917


Training Progress:  86%|███████████████████████████████████████████████▉        | 8566/10000 [2:01:20<04:06,  5.81it/s]

Step 8565/10000, Loss: 0.5607
Step 8566/10000, Loss: 0.5776


Training Progress:  86%|███████████████████████████████████████████████▉        | 8568/10000 [2:01:21<04:10,  5.71it/s]

Step 8567/10000, Loss: 0.6315
Step 8568/10000, Loss: 0.5659


Training Progress:  86%|███████████████████████████████████████████████▉        | 8570/10000 [2:01:21<04:07,  5.78it/s]

Step 8569/10000, Loss: 0.5303
Step 8570/10000, Loss: 0.5721


Training Progress:  86%|████████████████████████████████████████████████        | 8572/10000 [2:01:21<04:05,  5.82it/s]

Step 8571/10000, Loss: 0.5857
Step 8572/10000, Loss: 0.5416


Training Progress:  86%|████████████████████████████████████████████████        | 8574/10000 [2:01:22<04:05,  5.81it/s]

Step 8573/10000, Loss: 0.6121
Step 8574/10000, Loss: 0.5956


Training Progress:  86%|████████████████████████████████████████████████        | 8576/10000 [2:01:22<04:05,  5.79it/s]

Step 8575/10000, Loss: 0.5615
Step 8576/10000, Loss: 0.5612


Training Progress:  86%|████████████████████████████████████████████████        | 8578/10000 [2:01:22<04:06,  5.78it/s]

Step 8577/10000, Loss: 0.5813
Step 8578/10000, Loss: 0.5281


Training Progress:  86%|████████████████████████████████████████████████        | 8580/10000 [2:01:23<04:08,  5.71it/s]

Step 8579/10000, Loss: 0.5181
Step 8580/10000, Loss: 0.4950


Training Progress:  86%|████████████████████████████████████████████████        | 8582/10000 [2:01:23<04:03,  5.82it/s]

Step 8581/10000, Loss: 0.5457
Step 8582/10000, Loss: 0.5672


Training Progress:  86%|████████████████████████████████████████████████        | 8584/10000 [2:01:23<04:02,  5.83it/s]

Step 8583/10000, Loss: 0.5604
Step 8584/10000, Loss: 0.5799


Training Progress:  86%|████████████████████████████████████████████████        | 8586/10000 [2:01:24<04:08,  5.70it/s]

Step 8585/10000, Loss: 0.5631
Step 8586/10000, Loss: 0.5111


Training Progress:  86%|████████████████████████████████████████████████        | 8588/10000 [2:01:24<04:06,  5.72it/s]

Step 8587/10000, Loss: 0.5403
Step 8588/10000, Loss: 0.4976


Training Progress:  86%|████████████████████████████████████████████████        | 8590/10000 [2:01:24<04:01,  5.85it/s]

Step 8589/10000, Loss: 0.5404
Step 8590/10000, Loss: 0.5205


Training Progress:  86%|████████████████████████████████████████████████        | 8592/10000 [2:01:25<04:01,  5.83it/s]

Step 8591/10000, Loss: 0.4520
Step 8592/10000, Loss: 0.4313


Training Progress:  86%|████████████████████████████████████████████████▏       | 8594/10000 [2:01:25<04:02,  5.81it/s]

Step 8593/10000, Loss: 0.5127
Step 8594/10000, Loss: 0.4837


Training Progress:  86%|████████████████████████████████████████████████▏       | 8596/10000 [2:01:26<04:04,  5.74it/s]

Step 8595/10000, Loss: 0.4527
Step 8596/10000, Loss: 0.5032


Training Progress:  86%|████████████████████████████████████████████████▏       | 8598/10000 [2:01:26<04:05,  5.72it/s]

Step 8597/10000, Loss: 0.4539
Step 8598/10000, Loss: 0.4624


Training Progress:  86%|████████████████████████████████████████████████▏       | 8600/10000 [2:01:26<04:04,  5.72it/s]

Step 8599/10000, Loss: 0.4832
Step 8600/10000, Loss: 0.5572


Training Progress:  86%|████████████████████████████████████████████████▏       | 8602/10000 [2:01:27<03:57,  5.88it/s]

Step 8601/10000, Loss: 0.5015
Step 8602/10000, Loss: 0.4975


Training Progress:  86%|████████████████████████████████████████████████▏       | 8604/10000 [2:01:27<04:06,  5.66it/s]

Step 8603/10000, Loss: 0.4669
Step 8604/10000, Loss: 0.4764


Training Progress:  86%|████████████████████████████████████████████████▏       | 8606/10000 [2:01:27<04:06,  5.65it/s]

Step 8605/10000, Loss: 0.3923
Step 8606/10000, Loss: 0.4224


Training Progress:  86%|████████████████████████████████████████████████▏       | 8608/10000 [2:01:28<04:05,  5.68it/s]

Step 8607/10000, Loss: 0.4122
Step 8608/10000, Loss: 0.4103


Training Progress:  86%|████████████████████████████████████████████████▏       | 8610/10000 [2:01:28<04:02,  5.73it/s]

Step 8609/10000, Loss: 0.4324
Step 8610/10000, Loss: 0.4398


Training Progress:  86%|████████████████████████████████████████████████▏       | 8612/10000 [2:01:28<04:03,  5.71it/s]

Step 8611/10000, Loss: 0.4700
Step 8612/10000, Loss: 0.4320


Training Progress:  86%|████████████████████████████████████████████████▏       | 8614/10000 [2:01:29<04:04,  5.68it/s]

Step 8613/10000, Loss: 0.4518
Step 8614/10000, Loss: 0.4354


Training Progress:  86%|████████████████████████████████████████████████▏       | 8616/10000 [2:01:29<03:56,  5.85it/s]

Step 8615/10000, Loss: 0.4543
Step 8616/10000, Loss: 0.4105


Training Progress:  86%|████████████████████████████████████████████████▎       | 8618/10000 [2:01:29<03:57,  5.82it/s]

Step 8617/10000, Loss: 0.4330
Step 8618/10000, Loss: 0.4242


Training Progress:  86%|████████████████████████████████████████████████▎       | 8620/10000 [2:01:30<03:58,  5.79it/s]

Step 8619/10000, Loss: 0.3885
Step 8620/10000, Loss: 0.4008


Training Progress:  86%|████████████████████████████████████████████████▎       | 8622/10000 [2:01:30<04:00,  5.74it/s]

Step 8621/10000, Loss: 0.4368
Step 8622/10000, Loss: 0.4373


Training Progress:  86%|████████████████████████████████████████████████▎       | 8624/10000 [2:01:30<03:57,  5.80it/s]

Step 8623/10000, Loss: 0.4016
Step 8624/10000, Loss: 0.4200


Training Progress:  86%|████████████████████████████████████████████████▎       | 8626/10000 [2:01:31<03:56,  5.80it/s]

Step 8625/10000, Loss: 0.4038
Step 8626/10000, Loss: 0.4353


Training Progress:  86%|████████████████████████████████████████████████▎       | 8628/10000 [2:01:31<03:59,  5.73it/s]

Step 8627/10000, Loss: 0.4025
Step 8628/10000, Loss: 0.3758


Training Progress:  86%|████████████████████████████████████████████████▎       | 8630/10000 [2:01:31<03:57,  5.76it/s]

Step 8629/10000, Loss: 0.3977
Step 8630/10000, Loss: 0.4075


Training Progress:  86%|████████████████████████████████████████████████▎       | 8632/10000 [2:01:32<03:59,  5.71it/s]

Step 8631/10000, Loss: 0.4033
Step 8632/10000, Loss: 0.4417


Training Progress:  86%|████████████████████████████████████████████████▎       | 8634/10000 [2:01:32<03:52,  5.87it/s]

Step 8633/10000, Loss: 0.3942
Step 8634/10000, Loss: 0.4014


Training Progress:  86%|████████████████████████████████████████████████▎       | 8636/10000 [2:01:32<03:54,  5.81it/s]

Step 8635/10000, Loss: 0.3921
Step 8636/10000, Loss: 0.3683


Training Progress:  86%|████████████████████████████████████████████████▎       | 8638/10000 [2:01:33<03:56,  5.75it/s]

Step 8637/10000, Loss: 0.3755
Step 8638/10000, Loss: 0.3779


Training Progress:  86%|████████████████████████████████████████████████▍       | 8640/10000 [2:01:33<03:57,  5.73it/s]

Step 8639/10000, Loss: 0.3749
Step 8640/10000, Loss: 0.3473


Training Progress:  86%|████████████████████████████████████████████████▍       | 8642/10000 [2:01:34<03:58,  5.70it/s]

Step 8641/10000, Loss: 0.3753
Step 8642/10000, Loss: 0.3602


Training Progress:  86%|████████████████████████████████████████████████▍       | 8644/10000 [2:01:34<03:52,  5.84it/s]

Step 8643/10000, Loss: 0.3417
Step 8644/10000, Loss: 0.3577


Training Progress:  86%|████████████████████████████████████████████████▍       | 8646/10000 [2:01:34<03:55,  5.76it/s]

Step 8645/10000, Loss: 0.3471
Step 8646/10000, Loss: 0.3475


Training Progress:  86%|████████████████████████████████████████████████▍       | 8648/10000 [2:01:35<03:53,  5.80it/s]

Step 8647/10000, Loss: 0.3190
Step 8648/10000, Loss: 0.3358


Training Progress:  86%|████████████████████████████████████████████████▍       | 8650/10000 [2:01:35<03:55,  5.74it/s]

Step 8649/10000, Loss: 0.3723
Step 8650/10000, Loss: 0.3280


Training Progress:  87%|████████████████████████████████████████████████▍       | 8652/10000 [2:01:35<03:50,  5.84it/s]

Step 8651/10000, Loss: 0.3212
Step 8652/10000, Loss: 0.3393


Training Progress:  87%|████████████████████████████████████████████████▍       | 8654/10000 [2:01:36<03:54,  5.73it/s]

Step 8653/10000, Loss: 0.3241
Step 8654/10000, Loss: 0.3451


Training Progress:  87%|████████████████████████████████████████████████▍       | 8656/10000 [2:01:36<03:50,  5.82it/s]

Step 8655/10000, Loss: 0.3605
Step 8656/10000, Loss: 0.3641


Training Progress:  87%|████████████████████████████████████████████████▍       | 8658/10000 [2:01:36<03:53,  5.74it/s]

Step 8657/10000, Loss: 0.3451
Step 8658/10000, Loss: 0.3026


Training Progress:  87%|████████████████████████████████████████████████▍       | 8660/10000 [2:01:37<03:54,  5.71it/s]

Step 8659/10000, Loss: 0.3158
Step 8660/10000, Loss: 0.3038


Training Progress:  87%|████████████████████████████████████████████████▌       | 8662/10000 [2:01:37<03:48,  5.87it/s]

Step 8661/10000, Loss: 0.3094
Step 8662/10000, Loss: 0.2836


Training Progress:  87%|████████████████████████████████████████████████▌       | 8664/10000 [2:01:37<03:49,  5.81it/s]

Step 8663/10000, Loss: 0.3100
Step 8664/10000, Loss: 0.3296


Training Progress:  87%|████████████████████████████████████████████████▌       | 8666/10000 [2:01:38<03:50,  5.79it/s]

Step 8665/10000, Loss: 0.3335
Step 8666/10000, Loss: 0.3577


Training Progress:  87%|████████████████████████████████████████████████▌       | 8668/10000 [2:01:38<03:52,  5.73it/s]

Step 8667/10000, Loss: 0.3346
Step 8668/10000, Loss: 0.2804


Training Progress:  87%|████████████████████████████████████████████████▌       | 8670/10000 [2:01:38<03:52,  5.73it/s]

Step 8669/10000, Loss: 0.3137
Step 8670/10000, Loss: 0.3233


Training Progress:  87%|████████████████████████████████████████████████▌       | 8672/10000 [2:01:39<03:46,  5.87it/s]

Step 8671/10000, Loss: 0.2844
Step 8672/10000, Loss: 0.3087


Training Progress:  87%|████████████████████████████████████████████████▌       | 8674/10000 [2:01:39<03:47,  5.82it/s]

Step 8673/10000, Loss: 0.2676
Step 8674/10000, Loss: 0.2853


Training Progress:  87%|████████████████████████████████████████████████▌       | 8676/10000 [2:01:39<03:50,  5.75it/s]

Step 8675/10000, Loss: 0.3025
Step 8676/10000, Loss: 0.2666


Training Progress:  87%|████████████████████████████████████████████████▌       | 8678/10000 [2:01:40<03:51,  5.72it/s]

Step 8677/10000, Loss: 0.2598
Step 8678/10000, Loss: 0.2954


Training Progress:  87%|████████████████████████████████████████████████▌       | 8680/10000 [2:01:40<03:49,  5.75it/s]

Step 8679/10000, Loss: 0.2588
Step 8680/10000, Loss: 0.2708


Training Progress:  87%|████████████████████████████████████████████████▌       | 8682/10000 [2:01:40<03:45,  5.84it/s]

Step 8681/10000, Loss: 0.2484
Step 8682/10000, Loss: 0.2903


Training Progress:  87%|████████████████████████████████████████████████▋       | 8684/10000 [2:01:41<03:47,  5.77it/s]

Step 8683/10000, Loss: 0.2614
Step 8684/10000, Loss: 0.2864


Training Progress:  87%|████████████████████████████████████████████████▋       | 8686/10000 [2:01:41<03:48,  5.75it/s]

Step 8685/10000, Loss: 0.2741
Step 8686/10000, Loss: 0.2465


Training Progress:  87%|████████████████████████████████████████████████▋       | 8688/10000 [2:01:42<03:49,  5.71it/s]

Step 8687/10000, Loss: 0.2385
Step 8688/10000, Loss: 0.2295


Training Progress:  87%|████████████████████████████████████████████████▋       | 8690/10000 [2:01:42<03:43,  5.85it/s]

Step 8689/10000, Loss: 0.2150
Step 8690/10000, Loss: 0.2320


Training Progress:  87%|████████████████████████████████████████████████▋       | 8692/10000 [2:01:42<03:44,  5.81it/s]

Step 8691/10000, Loss: 0.2675
Step 8692/10000, Loss: 0.2378


Training Progress:  87%|████████████████████████████████████████████████▋       | 8694/10000 [2:01:43<03:47,  5.74it/s]

Step 8693/10000, Loss: 0.2661
Step 8694/10000, Loss: 0.2432


Training Progress:  87%|████████████████████████████████████████████████▋       | 8696/10000 [2:01:43<03:47,  5.73it/s]

Step 8695/10000, Loss: 0.2474
Step 8696/10000, Loss: 0.2383


Training Progress:  87%|████████████████████████████████████████████████▋       | 8698/10000 [2:01:43<03:43,  5.83it/s]

Step 8697/10000, Loss: 0.2707
Step 8698/10000, Loss: 0.2333


Training Progress:  87%|████████████████████████████████████████████████▋       | 8700/10000 [2:01:44<03:43,  5.82it/s]

Step 8699/10000, Loss: 0.2570
Step 8700/10000, Loss: 0.2287


Training Progress:  87%|████████████████████████████████████████████████▋       | 8702/10000 [2:01:44<03:43,  5.80it/s]

Step 8701/10000, Loss: 0.2246
Step 8702/10000, Loss: 0.2269


Training Progress:  87%|████████████████████████████████████████████████▋       | 8704/10000 [2:01:44<03:47,  5.70it/s]

Step 8703/10000, Loss: 0.2485
Step 8704/10000, Loss: 0.2570


Training Progress:  87%|████████████████████████████████████████████████▊       | 8706/10000 [2:01:45<03:46,  5.71it/s]

Step 8705/10000, Loss: 0.2265
Step 8706/10000, Loss: 0.2426


Training Progress:  87%|████████████████████████████████████████████████▊       | 8708/10000 [2:01:45<03:46,  5.70it/s]

Step 8707/10000, Loss: 0.2413
Step 8708/10000, Loss: 0.2283


Training Progress:  87%|████████████████████████████████████████████████▊       | 8710/10000 [2:01:45<03:41,  5.83it/s]

Step 8709/10000, Loss: 0.2266
Step 8710/10000, Loss: 0.2066


Training Progress:  87%|████████████████████████████████████████████████▊       | 8712/10000 [2:01:46<03:41,  5.82it/s]

Step 8711/10000, Loss: 0.2139
Step 8712/10000, Loss: 0.2255


Training Progress:  87%|████████████████████████████████████████████████▊       | 8714/10000 [2:01:46<03:43,  5.74it/s]

Step 8713/10000, Loss: 0.2160
Step 8714/10000, Loss: 0.2363


Training Progress:  87%|████████████████████████████████████████████████▊       | 8716/10000 [2:01:46<03:44,  5.71it/s]

Step 8715/10000, Loss: 0.2213
Step 8716/10000, Loss: 0.2184


Training Progress:  87%|████████████████████████████████████████████████▊       | 8718/10000 [2:01:47<03:39,  5.85it/s]

Step 8717/10000, Loss: 0.2055
Step 8718/10000, Loss: 0.2082


Training Progress:  87%|████████████████████████████████████████████████▊       | 8720/10000 [2:01:47<03:39,  5.83it/s]

Step 8719/10000, Loss: 0.2041
Step 8720/10000, Loss: 0.2129


Training Progress:  87%|████████████████████████████████████████████████▊       | 8722/10000 [2:01:47<03:40,  5.79it/s]

Step 8721/10000, Loss: 0.2200
Step 8722/10000, Loss: 0.2038


Training Progress:  87%|████████████████████████████████████████████████▊       | 8724/10000 [2:01:48<03:42,  5.74it/s]

Step 8723/10000, Loss: 0.2220
Step 8724/10000, Loss: 0.1936


Training Progress:  87%|████████████████████████████████████████████████▊       | 8726/10000 [2:01:48<03:42,  5.71it/s]

Step 8725/10000, Loss: 0.2079
Step 8726/10000, Loss: 0.2113


Training Progress:  87%|████████████████████████████████████████████████▉       | 8728/10000 [2:01:48<03:36,  5.87it/s]

Step 8727/10000, Loss: 0.2056
Step 8728/10000, Loss: 0.2069


Training Progress:  87%|████████████████████████████████████████████████▉       | 8730/10000 [2:01:49<03:38,  5.82it/s]

Step 8729/10000, Loss: 0.1885
Step 8730/10000, Loss: 0.1987


Training Progress:  87%|████████████████████████████████████████████████▉       | 8732/10000 [2:01:49<03:40,  5.75it/s]

Step 8731/10000, Loss: 0.2149
Step 8732/10000, Loss: 0.2053


Training Progress:  87%|████████████████████████████████████████████████▉       | 8734/10000 [2:01:49<03:41,  5.71it/s]

Step 8733/10000, Loss: 0.1819
Step 8734/10000, Loss: 0.1942


Training Progress:  87%|████████████████████████████████████████████████▉       | 8736/10000 [2:01:50<03:36,  5.85it/s]

Step 8735/10000, Loss: 0.1804
Step 8736/10000, Loss: 0.1735


Training Progress:  87%|████████████████████████████████████████████████▉       | 8738/10000 [2:01:50<03:36,  5.82it/s]

Step 8737/10000, Loss: 0.2118
Step 8738/10000, Loss: 0.1905


Training Progress:  87%|████████████████████████████████████████████████▉       | 8740/10000 [2:01:51<03:37,  5.79it/s]

Step 8739/10000, Loss: 0.1804
Step 8740/10000, Loss: 0.1783


Training Progress:  87%|████████████████████████████████████████████████▉       | 8742/10000 [2:01:51<03:39,  5.73it/s]

Step 8741/10000, Loss: 0.1885
Step 8742/10000, Loss: 0.1752


Training Progress:  87%|████████████████████████████████████████████████▉       | 8744/10000 [2:01:51<03:40,  5.69it/s]

Step 8743/10000, Loss: 0.1622
Step 8744/10000, Loss: 0.1594


Training Progress:  87%|████████████████████████████████████████████████▉       | 8746/10000 [2:01:52<03:34,  5.85it/s]

Step 8745/10000, Loss: 0.1855
Step 8746/10000, Loss: 0.1818


Training Progress:  87%|████████████████████████████████████████████████▉       | 8748/10000 [2:01:52<03:35,  5.82it/s]

Step 8747/10000, Loss: 0.1728
Step 8748/10000, Loss: 0.1938


Training Progress:  88%|█████████████████████████████████████████████████       | 8750/10000 [2:01:52<03:39,  5.71it/s]

Step 8749/10000, Loss: 0.1869
Step 8750/10000, Loss: 0.1411


Training Progress:  88%|█████████████████████████████████████████████████       | 8752/10000 [2:01:53<03:38,  5.72it/s]

Step 8751/10000, Loss: 0.2000
Step 8752/10000, Loss: 0.1776


Training Progress:  88%|█████████████████████████████████████████████████       | 8754/10000 [2:01:53<03:37,  5.72it/s]

Step 8753/10000, Loss: 0.1658
Step 8754/10000, Loss: 0.1683


Training Progress:  88%|█████████████████████████████████████████████████       | 8756/10000 [2:01:53<03:34,  5.79it/s]

Step 8755/10000, Loss: 0.1554
Step 8756/10000, Loss: 0.1547


Training Progress:  88%|█████████████████████████████████████████████████       | 8758/10000 [2:01:54<03:34,  5.78it/s]

Step 8757/10000, Loss: 0.1678
Step 8758/10000, Loss: 0.1599


Training Progress:  88%|█████████████████████████████████████████████████       | 8760/10000 [2:01:54<03:35,  5.75it/s]

Step 8759/10000, Loss: 0.1580
Step 8760/10000, Loss: 0.1667


Training Progress:  88%|█████████████████████████████████████████████████       | 8762/10000 [2:01:54<03:36,  5.72it/s]

Step 8761/10000, Loss: 0.1497
Step 8762/10000, Loss: 0.1582


Training Progress:  88%|█████████████████████████████████████████████████       | 8764/10000 [2:01:55<03:31,  5.86it/s]

Step 8763/10000, Loss: 0.1627
Step 8764/10000, Loss: 0.1619


Training Progress:  88%|█████████████████████████████████████████████████       | 8766/10000 [2:01:55<03:33,  5.78it/s]

Step 8765/10000, Loss: 0.1374
Step 8766/10000, Loss: 0.1775


Training Progress:  88%|█████████████████████████████████████████████████       | 8768/10000 [2:01:55<03:34,  5.75it/s]

Step 8767/10000, Loss: 0.1323
Step 8768/10000, Loss: 0.1449


Training Progress:  88%|█████████████████████████████████████████████████       | 8770/10000 [2:01:56<03:35,  5.72it/s]

Step 8769/10000, Loss: 0.1392
Step 8770/10000, Loss: 0.1356


Training Progress:  88%|█████████████████████████████████████████████████       | 8772/10000 [2:01:56<03:29,  5.86it/s]

Step 8771/10000, Loss: 0.1343
Step 8772/10000, Loss: 0.1261


Training Progress:  88%|█████████████████████████████████████████████████▏      | 8774/10000 [2:01:56<03:30,  5.83it/s]

Step 8773/10000, Loss: 0.1449
Step 8774/10000, Loss: 0.1458


Training Progress:  88%|█████████████████████████████████████████████████▏      | 8776/10000 [2:01:57<03:33,  5.75it/s]

Step 8775/10000, Loss: 0.1510
Step 8776/10000, Loss: 0.1382


Training Progress:  88%|█████████████████████████████████████████████████▏      | 8778/10000 [2:01:57<03:33,  5.72it/s]

Step 8777/10000, Loss: 0.1316
Step 8778/10000, Loss: 0.1262


Training Progress:  88%|█████████████████████████████████████████████████▏      | 8780/10000 [2:01:57<03:34,  5.70it/s]

Step 8779/10000, Loss: 0.1625
Step 8780/10000, Loss: 0.1333


Training Progress:  88%|█████████████████████████████████████████████████▏      | 8782/10000 [2:01:58<03:28,  5.84it/s]

Step 8781/10000, Loss: 0.1337
Step 8782/10000, Loss: 0.1311


Training Progress:  88%|█████████████████████████████████████████████████▏      | 8784/10000 [2:01:58<03:31,  5.75it/s]

Step 8783/10000, Loss: 0.1386
Step 8784/10000, Loss: 0.1308


Training Progress:  88%|█████████████████████████████████████████████████▏      | 8786/10000 [2:01:58<03:31,  5.75it/s]

Step 8785/10000, Loss: 0.1462
Step 8786/10000, Loss: 0.1301


Training Progress:  88%|█████████████████████████████████████████████████▏      | 8788/10000 [2:01:59<03:32,  5.71it/s]

Step 8787/10000, Loss: 0.1393
Step 8788/10000, Loss: 0.1395


Training Progress:  88%|█████████████████████████████████████████████████▏      | 8790/10000 [2:01:59<03:26,  5.87it/s]

Step 8789/10000, Loss: 0.1241
Step 8790/10000, Loss: 0.1416


Training Progress:  88%|█████████████████████████████████████████████████▏      | 8792/10000 [2:02:00<03:27,  5.82it/s]

Step 8791/10000, Loss: 0.1319
Step 8792/10000, Loss: 0.1111


Training Progress:  88%|█████████████████████████████████████████████████▏      | 8794/10000 [2:02:00<03:30,  5.73it/s]

Step 8793/10000, Loss: 0.1301
Step 8794/10000, Loss: 0.1363


Training Progress:  88%|█████████████████████████████████████████████████▎      | 8796/10000 [2:02:00<03:31,  5.70it/s]

Step 8795/10000, Loss: 0.1230
Step 8796/10000, Loss: 0.1389


Training Progress:  88%|█████████████████████████████████████████████████▎      | 8798/10000 [2:02:01<03:24,  5.87it/s]

Step 8797/10000, Loss: 0.1346
Step 8798/10000, Loss: 0.1265


Training Progress:  88%|█████████████████████████████████████████████████▎      | 8800/10000 [2:02:01<03:26,  5.82it/s]

Step 8799/10000, Loss: 0.1059
Step 8800/10000, Loss: 0.1179


Training Progress:  88%|█████████████████████████████████████████████████▎      | 8802/10000 [2:02:01<03:28,  5.74it/s]

Step 8801/10000, Loss: 0.1225
Step 8802/10000, Loss: 0.1373


Training Progress:  88%|█████████████████████████████████████████████████▎      | 8804/10000 [2:02:02<03:28,  5.74it/s]

Step 8803/10000, Loss: 0.1327
Step 8804/10000, Loss: 0.1248


Training Progress:  88%|█████████████████████████████████████████████████▎      | 8806/10000 [2:02:02<03:25,  5.80it/s]

Step 8805/10000, Loss: 0.1440
Step 8806/10000, Loss: 0.1130


Training Progress:  88%|█████████████████████████████████████████████████▎      | 8808/10000 [2:02:02<03:25,  5.80it/s]

Step 8807/10000, Loss: 0.1235
Step 8808/10000, Loss: 0.1181


Training Progress:  88%|█████████████████████████████████████████████████▎      | 8810/10000 [2:02:03<03:27,  5.73it/s]

Step 8809/10000, Loss: 0.1270
Step 8810/10000, Loss: 0.1143


Training Progress:  88%|█████████████████████████████████████████████████▎      | 8812/10000 [2:02:03<03:28,  5.70it/s]

Step 8811/10000, Loss: 0.1087
Step 8812/10000, Loss: 0.1255


Training Progress:  88%|█████████████████████████████████████████████████▎      | 8814/10000 [2:02:03<03:26,  5.73it/s]

Step 8813/10000, Loss: 0.1296
Step 8814/10000, Loss: 0.1248


Training Progress:  88%|█████████████████████████████████████████████████▎      | 8816/10000 [2:02:04<03:21,  5.88it/s]

Step 8815/10000, Loss: 0.1183
Step 8816/10000, Loss: 0.1176


Training Progress:  88%|█████████████████████████████████████████████████▍      | 8818/10000 [2:02:04<03:23,  5.81it/s]

Step 8817/10000, Loss: 0.1166
Step 8818/10000, Loss: 0.1172


Training Progress:  88%|█████████████████████████████████████████████████▍      | 8820/10000 [2:02:04<03:25,  5.75it/s]

Step 8819/10000, Loss: 0.1286
Step 8820/10000, Loss: 0.0995


Training Progress:  88%|█████████████████████████████████████████████████▍      | 8822/10000 [2:02:05<03:25,  5.73it/s]

Step 8821/10000, Loss: 0.1046
Step 8822/10000, Loss: 0.1126


Training Progress:  88%|█████████████████████████████████████████████████▍      | 8824/10000 [2:02:05<03:22,  5.81it/s]

Step 8823/10000, Loss: 0.1217
Step 8824/10000, Loss: 0.1136


Training Progress:  88%|█████████████████████████████████████████████████▍      | 8826/10000 [2:02:05<03:21,  5.84it/s]

Step 8825/10000, Loss: 0.1003
Step 8826/10000, Loss: 0.1110


Training Progress:  88%|█████████████████████████████████████████████████▍      | 8828/10000 [2:02:06<03:22,  5.79it/s]

Step 8827/10000, Loss: 0.1133
Step 8828/10000, Loss: 0.1127


Training Progress:  88%|█████████████████████████████████████████████████▍      | 8830/10000 [2:02:06<03:23,  5.75it/s]

Step 8829/10000, Loss: 0.0917
Step 8830/10000, Loss: 0.1189


Training Progress:  88%|█████████████████████████████████████████████████▍      | 8832/10000 [2:02:06<03:24,  5.71it/s]

Step 8831/10000, Loss: 0.1099
Step 8832/10000, Loss: 0.0948


Training Progress:  88%|█████████████████████████████████████████████████▍      | 8834/10000 [2:02:07<03:19,  5.85it/s]

Step 8833/10000, Loss: 0.1051
Step 8834/10000, Loss: 0.1097


Training Progress:  88%|█████████████████████████████████████████████████▍      | 8836/10000 [2:02:07<03:23,  5.71it/s]

Step 8835/10000, Loss: 0.1064
Step 8836/10000, Loss: 0.1013


Training Progress:  88%|█████████████████████████████████████████████████▍      | 8838/10000 [2:02:07<03:19,  5.81it/s]

Step 8837/10000, Loss: 0.0981
Step 8838/10000, Loss: 0.0975


Training Progress:  88%|█████████████████████████████████████████████████▌      | 8840/10000 [2:02:08<03:22,  5.74it/s]

Step 8839/10000, Loss: 0.1201
Step 8840/10000, Loss: 0.0944


Training Progress:  88%|█████████████████████████████████████████████████▌      | 8842/10000 [2:02:08<03:22,  5.71it/s]

Step 8841/10000, Loss: 0.0984
Step 8842/10000, Loss: 0.1004


Training Progress:  88%|█████████████████████████████████████████████████▌      | 8844/10000 [2:02:09<03:23,  5.69it/s]

Step 8843/10000, Loss: 0.0891
Step 8844/10000, Loss: 0.0853


Training Progress:  88%|█████████████████████████████████████████████████▌      | 8846/10000 [2:02:09<03:17,  5.85it/s]

Step 8845/10000, Loss: 0.0904
Step 8846/10000, Loss: 0.0946


Training Progress:  88%|█████████████████████████████████████████████████▌      | 8848/10000 [2:02:09<03:21,  5.71it/s]

Step 8847/10000, Loss: 0.0874
Step 8848/10000, Loss: 0.1095


Training Progress:  88%|█████████████████████████████████████████████████▌      | 8850/10000 [2:02:10<03:19,  5.77it/s]

Step 8849/10000, Loss: 0.0881
Step 8850/10000, Loss: 0.0839


Training Progress:  89%|█████████████████████████████████████████████████▌      | 8852/10000 [2:02:10<03:19,  5.75it/s]

Step 8851/10000, Loss: 0.0828
Step 8852/10000, Loss: 0.0771


Training Progress:  89%|█████████████████████████████████████████████████▌      | 8854/10000 [2:02:10<03:16,  5.84it/s]

Step 8853/10000, Loss: 0.0734
Step 8854/10000, Loss: 0.0757


Training Progress:  89%|█████████████████████████████████████████████████▌      | 8856/10000 [2:02:11<03:16,  5.83it/s]

Step 8855/10000, Loss: 0.0816
Step 8856/10000, Loss: 0.0934


Training Progress:  89%|█████████████████████████████████████████████████▌      | 8858/10000 [2:02:11<03:17,  5.79it/s]

Step 8857/10000, Loss: 0.0927
Step 8858/10000, Loss: 0.0802


Training Progress:  89%|█████████████████████████████████████████████████▌      | 8860/10000 [2:02:11<03:19,  5.73it/s]

Step 8859/10000, Loss: 0.0882
Step 8860/10000, Loss: 0.0793


Training Progress:  89%|█████████████████████████████████████████████████▋      | 8862/10000 [2:02:12<03:19,  5.70it/s]

Step 8861/10000, Loss: 0.0914
Step 8862/10000, Loss: 0.0876


Training Progress:  89%|█████████████████████████████████████████████████▋      | 8864/10000 [2:02:12<03:14,  5.85it/s]

Step 8863/10000, Loss: 0.0800
Step 8864/10000, Loss: 0.0854


Training Progress:  89%|█████████████████████████████████████████████████▋      | 8866/10000 [2:02:12<03:14,  5.83it/s]

Step 8865/10000, Loss: 0.0859
Step 8866/10000, Loss: 0.0764


Training Progress:  89%|█████████████████████████████████████████████████▋      | 8868/10000 [2:02:13<03:15,  5.79it/s]

Step 8867/10000, Loss: 0.0866
Step 8868/10000, Loss: 0.0808


Training Progress:  89%|█████████████████████████████████████████████████▋      | 8870/10000 [2:02:13<03:17,  5.73it/s]

Step 8869/10000, Loss: 0.0954
Step 8870/10000, Loss: 0.0909


Training Progress:  89%|█████████████████████████████████████████████████▋      | 8872/10000 [2:02:13<03:16,  5.73it/s]

Step 8871/10000, Loss: 0.0740
Step 8872/10000, Loss: 0.0948


Training Progress:  89%|█████████████████████████████████████████████████▋      | 8874/10000 [2:02:14<03:12,  5.86it/s]

Step 8873/10000, Loss: 0.0797
Step 8874/10000, Loss: 0.0753


Training Progress:  89%|█████████████████████████████████████████████████▋      | 8876/10000 [2:02:14<03:13,  5.82it/s]

Step 8875/10000, Loss: 0.0881
Step 8876/10000, Loss: 0.0798


Training Progress:  89%|█████████████████████████████████████████████████▋      | 8878/10000 [2:02:14<03:14,  5.78it/s]

Step 8877/10000, Loss: 0.0807
Step 8878/10000, Loss: 0.0884


Training Progress:  89%|█████████████████████████████████████████████████▋      | 8880/10000 [2:02:15<03:15,  5.72it/s]

Step 8879/10000, Loss: 0.0944
Step 8880/10000, Loss: 0.0844


Training Progress:  89%|█████████████████████████████████████████████████▋      | 8882/10000 [2:02:15<03:15,  5.71it/s]

Step 8881/10000, Loss: 0.0682
Step 8882/10000, Loss: 0.0704


Training Progress:  89%|█████████████████████████████████████████████████▊      | 8884/10000 [2:02:15<03:10,  5.85it/s]

Step 8883/10000, Loss: 0.0835
Step 8884/10000, Loss: 0.0748


Training Progress:  89%|█████████████████████████████████████████████████▊      | 8886/10000 [2:02:16<03:11,  5.82it/s]

Step 8885/10000, Loss: 0.0658
Step 8886/10000, Loss: 0.0802


Training Progress:  89%|█████████████████████████████████████████████████▊      | 8888/10000 [2:02:16<03:13,  5.74it/s]

Step 8887/10000, Loss: 0.0759
Step 8888/10000, Loss: 0.0723


Training Progress:  89%|█████████████████████████████████████████████████▊      | 8890/10000 [2:02:16<03:14,  5.70it/s]

Step 8889/10000, Loss: 0.0761
Step 8890/10000, Loss: 0.0790


Training Progress:  89%|█████████████████████████████████████████████████▊      | 8892/10000 [2:02:17<03:13,  5.71it/s]

Step 8891/10000, Loss: 0.0801
Step 8892/10000, Loss: 0.0739


Training Progress:  89%|█████████████████████████████████████████████████▊      | 8894/10000 [2:02:17<03:10,  5.80it/s]

Step 8893/10000, Loss: 0.0628
Step 8894/10000, Loss: 0.0685


Training Progress:  89%|█████████████████████████████████████████████████▊      | 8896/10000 [2:02:18<03:11,  5.77it/s]

Step 8895/10000, Loss: 0.0726
Step 8896/10000, Loss: 0.0749


Training Progress:  89%|█████████████████████████████████████████████████▊      | 8898/10000 [2:02:18<03:12,  5.72it/s]

Step 8897/10000, Loss: 0.0687
Step 8898/10000, Loss: 0.0681


Training Progress:  89%|█████████████████████████████████████████████████▊      | 8900/10000 [2:02:18<03:13,  5.69it/s]

Step 8899/10000, Loss: 0.0684
Step 8900/10000, Loss: 0.0729


Training Progress:  89%|█████████████████████████████████████████████████▊      | 8902/10000 [2:02:19<03:07,  5.85it/s]

Step 8901/10000, Loss: 0.0731
Step 8902/10000, Loss: 0.0604


Training Progress:  89%|█████████████████████████████████████████████████▊      | 8904/10000 [2:02:19<03:08,  5.81it/s]

Step 8903/10000, Loss: 0.0713
Step 8904/10000, Loss: 0.0741


Training Progress:  89%|█████████████████████████████████████████████████▊      | 8906/10000 [2:02:19<03:11,  5.72it/s]

Step 8905/10000, Loss: 0.0797
Step 8906/10000, Loss: 0.0676


Training Progress:  89%|█████████████████████████████████████████████████▉      | 8908/10000 [2:02:20<03:11,  5.72it/s]

Step 8907/10000, Loss: 0.0670
Step 8908/10000, Loss: 0.0670


Training Progress:  89%|█████████████████████████████████████████████████▉      | 8910/10000 [2:02:20<03:11,  5.69it/s]

Step 8909/10000, Loss: 0.0744
Step 8910/10000, Loss: 0.0750


Training Progress:  89%|█████████████████████████████████████████████████▉      | 8912/10000 [2:02:20<03:06,  5.85it/s]

Step 8911/10000, Loss: 0.0637
Step 8912/10000, Loss: 0.0766


Training Progress:  89%|█████████████████████████████████████████████████▉      | 8914/10000 [2:02:21<03:07,  5.80it/s]

Step 8913/10000, Loss: 0.0688
Step 8914/10000, Loss: 0.0658


Training Progress:  89%|█████████████████████████████████████████████████▉      | 8916/10000 [2:02:21<03:09,  5.73it/s]

Step 8915/10000, Loss: 0.0702
Step 8916/10000, Loss: 0.0720


Training Progress:  89%|█████████████████████████████████████████████████▉      | 8918/10000 [2:02:21<03:09,  5.71it/s]

Step 8917/10000, Loss: 0.0632
Step 8918/10000, Loss: 0.0617


Training Progress:  89%|█████████████████████████████████████████████████▉      | 8920/10000 [2:02:22<03:09,  5.70it/s]

Step 8919/10000, Loss: 0.0824
Step 8920/10000, Loss: 0.0695


Training Progress:  89%|█████████████████████████████████████████████████▉      | 8922/10000 [2:02:22<03:04,  5.84it/s]

Step 8921/10000, Loss: 0.0763
Step 8922/10000, Loss: 0.0551


Training Progress:  89%|█████████████████████████████████████████████████▉      | 8924/10000 [2:02:22<03:05,  5.81it/s]

Step 8923/10000, Loss: 0.0679
Step 8924/10000, Loss: 0.0642


Training Progress:  89%|█████████████████████████████████████████████████▉      | 8926/10000 [2:02:23<03:06,  5.75it/s]

Step 8925/10000, Loss: 0.0574
Step 8926/10000, Loss: 0.0592


Training Progress:  89%|█████████████████████████████████████████████████▉      | 8928/10000 [2:02:23<03:07,  5.71it/s]

Step 8927/10000, Loss: 0.0684
Step 8928/10000, Loss: 0.0622


Training Progress:  89%|██████████████████████████████████████████████████      | 8930/10000 [2:02:23<03:02,  5.86it/s]

Step 8929/10000, Loss: 0.0561
Step 8930/10000, Loss: 0.0668


Training Progress:  89%|██████████████████████████████████████████████████      | 8932/10000 [2:02:24<03:03,  5.82it/s]

Step 8931/10000, Loss: 0.0483
Step 8932/10000, Loss: 0.0534


Training Progress:  89%|██████████████████████████████████████████████████      | 8934/10000 [2:02:24<03:04,  5.79it/s]

Step 8933/10000, Loss: 0.0579
Step 8934/10000, Loss: 0.0545


Training Progress:  89%|██████████████████████████████████████████████████      | 8936/10000 [2:02:24<03:05,  5.73it/s]

Step 8935/10000, Loss: 0.0561
Step 8936/10000, Loss: 0.0523


Training Progress:  89%|██████████████████████████████████████████████████      | 8938/10000 [2:02:25<03:06,  5.70it/s]

Step 8937/10000, Loss: 0.0615
Step 8938/10000, Loss: 0.0654


Training Progress:  89%|██████████████████████████████████████████████████      | 8940/10000 [2:02:25<03:02,  5.81it/s]

Step 8939/10000, Loss: 0.0531
Step 8940/10000, Loss: 0.0575


Training Progress:  89%|██████████████████████████████████████████████████      | 8942/10000 [2:02:26<03:01,  5.82it/s]

Step 8941/10000, Loss: 0.0619
Step 8942/10000, Loss: 0.0502


Training Progress:  89%|██████████████████████████████████████████████████      | 8944/10000 [2:02:26<03:03,  5.75it/s]

Step 8943/10000, Loss: 0.0617
Step 8944/10000, Loss: 0.0572


Training Progress:  89%|██████████████████████████████████████████████████      | 8946/10000 [2:02:26<03:05,  5.70it/s]

Step 8945/10000, Loss: 0.0566
Step 8946/10000, Loss: 0.0598


Training Progress:  89%|██████████████████████████████████████████████████      | 8948/10000 [2:02:27<03:03,  5.72it/s]

Step 8947/10000, Loss: 0.0534
Step 8948/10000, Loss: 0.0549


Training Progress:  90%|██████████████████████████████████████████████████      | 8950/10000 [2:02:27<03:04,  5.69it/s]

Step 8949/10000, Loss: 0.0590
Step 8950/10000, Loss: 0.0571


Training Progress:  90%|██████████████████████████████████████████████████▏     | 8952/10000 [2:02:27<03:00,  5.80it/s]

Step 8951/10000, Loss: 0.0888
Step 8952/10000, Loss: 0.0598


Training Progress:  90%|██████████████████████████████████████████████████▏     | 8954/10000 [2:02:28<02:59,  5.82it/s]

Step 8953/10000, Loss: 0.0498
Step 8954/10000, Loss: 0.0651


Training Progress:  90%|██████████████████████████████████████████████████▏     | 8956/10000 [2:02:28<03:01,  5.76it/s]

Step 8955/10000, Loss: 0.0529
Step 8956/10000, Loss: 0.0565


Training Progress:  90%|██████████████████████████████████████████████████▏     | 8958/10000 [2:02:28<03:03,  5.69it/s]

Step 8957/10000, Loss: 0.0658
Step 8958/10000, Loss: 0.0581


Training Progress:  90%|██████████████████████████████████████████████████▏     | 8960/10000 [2:02:29<03:02,  5.71it/s]

Step 8959/10000, Loss: 0.0528
Step 8960/10000, Loss: 0.0598


Training Progress:  90%|██████████████████████████████████████████████████▏     | 8962/10000 [2:02:29<02:56,  5.87it/s]

Step 8961/10000, Loss: 0.0614
Step 8962/10000, Loss: 0.0569


Training Progress:  90%|██████████████████████████████████████████████████▏     | 8964/10000 [2:02:29<02:58,  5.81it/s]

Step 8963/10000, Loss: 0.0546
Step 8964/10000, Loss: 0.0512


Training Progress:  90%|██████████████████████████████████████████████████▏     | 8966/10000 [2:02:30<03:00,  5.74it/s]

Step 8965/10000, Loss: 0.0637
Step 8966/10000, Loss: 0.0586


Training Progress:  90%|██████████████████████████████████████████████████▏     | 8968/10000 [2:02:30<03:01,  5.70it/s]

Step 8967/10000, Loss: 0.0532
Step 8968/10000, Loss: 0.0609


Training Progress:  90%|██████████████████████████████████████████████████▏     | 8970/10000 [2:02:30<02:58,  5.77it/s]

Step 8969/10000, Loss: 0.0502
Step 8970/10000, Loss: 0.0544


Training Progress:  90%|██████████████████████████████████████████████████▏     | 8972/10000 [2:02:31<02:55,  5.84it/s]

Step 8971/10000, Loss: 0.0540
Step 8972/10000, Loss: 0.0547


Training Progress:  90%|██████████████████████████████████████████████████▎     | 8974/10000 [2:02:31<02:57,  5.80it/s]

Step 8973/10000, Loss: 0.0550
Step 8974/10000, Loss: 0.0498


Training Progress:  90%|██████████████████████████████████████████████████▎     | 8976/10000 [2:02:31<02:57,  5.78it/s]

Step 8975/10000, Loss: 0.0458
Step 8976/10000, Loss: 0.0516


Training Progress:  90%|██████████████████████████████████████████████████▎     | 8978/10000 [2:02:32<02:58,  5.72it/s]

Step 8977/10000, Loss: 0.0539
Step 8978/10000, Loss: 0.0578


Training Progress:  90%|██████████████████████████████████████████████████▎     | 8980/10000 [2:02:32<02:54,  5.86it/s]

Step 8979/10000, Loss: 0.0525
Step 8980/10000, Loss: 0.0492


Training Progress:  90%|██████████████████████████████████████████████████▎     | 8982/10000 [2:02:32<02:54,  5.83it/s]

Step 8981/10000, Loss: 0.0438
Step 8982/10000, Loss: 0.0515


Training Progress:  90%|██████████████████████████████████████████████████▎     | 8984/10000 [2:02:33<02:55,  5.79it/s]

Step 8983/10000, Loss: 0.0498
Step 8984/10000, Loss: 0.0411


Training Progress:  90%|██████████████████████████████████████████████████▎     | 8986/10000 [2:02:33<02:56,  5.75it/s]

Step 8985/10000, Loss: 0.0505
Step 8986/10000, Loss: 0.0563


Training Progress:  90%|██████████████████████████████████████████████████▎     | 8988/10000 [2:02:33<02:57,  5.71it/s]

Step 8987/10000, Loss: 0.0561
Step 8988/10000, Loss: 0.0501


Training Progress:  90%|██████████████████████████████████████████████████▎     | 8990/10000 [2:02:34<02:52,  5.86it/s]

Step 8989/10000, Loss: 0.0473
Step 8990/10000, Loss: 0.0458


Training Progress:  90%|██████████████████████████████████████████████████▎     | 8992/10000 [2:02:34<02:53,  5.81it/s]

Step 8991/10000, Loss: 0.0515
Step 8992/10000, Loss: 0.0546


Training Progress:  90%|██████████████████████████████████████████████████▎     | 8994/10000 [2:02:35<02:55,  5.74it/s]

Step 8993/10000, Loss: 0.0423
Step 8994/10000, Loss: 0.0543


Training Progress:  90%|██████████████████████████████████████████████████▍     | 8996/10000 [2:02:35<02:56,  5.70it/s]

Step 8995/10000, Loss: 0.0447
Step 8996/10000, Loss: 0.0458


Training Progress:  90%|██████████████████████████████████████████████████▍     | 8998/10000 [2:02:35<02:55,  5.71it/s]

Step 8997/10000, Loss: 0.0488
Step 8998/10000, Loss: 0.0539


Training Progress:  90%|██████████████████████████████████████████████████▍     | 8999/10000 [2:02:35<02:53,  5.76it/s]

Step 8999/10000, Loss: 0.0500
Step 9000/10000, Loss: 0.0431


Training Progress:  90%|████████████████████████████████████████████████▌     | 9000/10000 [2:02:49<1:11:38,  4.30s/it]


Checkpoint saved: checkpoints\checkpoint_step9000_loss0.0431_20250117_145439.pt


Training Progress:  90%|██████████████████████████████████████████████████▍     | 9002/10000 [2:02:50<37:15,  2.24s/it]

Step 9001/10000, Loss: 0.0535
Step 9002/10000, Loss: 0.0576


Training Progress:  90%|██████████████████████████████████████████████████▍     | 9004/10000 [2:02:50<19:44,  1.19s/it]

Step 9003/10000, Loss: 0.0680
Step 9004/10000, Loss: 0.0420


Training Progress:  90%|██████████████████████████████████████████████████▍     | 9006/10000 [2:02:51<11:07,  1.49it/s]

Step 9005/10000, Loss: 0.0623
Step 9006/10000, Loss: 0.0474


Training Progress:  90%|██████████████████████████████████████████████████▍     | 9008/10000 [2:02:51<06:55,  2.39it/s]

Step 9007/10000, Loss: 0.0431
Step 9008/10000, Loss: 0.0475


Training Progress:  90%|██████████████████████████████████████████████████▍     | 9010/10000 [2:02:51<04:48,  3.43it/s]

Step 9009/10000, Loss: 0.0495
Step 9010/10000, Loss: 0.0460


Training Progress:  90%|██████████████████████████████████████████████████▍     | 9012/10000 [2:02:52<03:48,  4.32it/s]

Step 9011/10000, Loss: 0.0411
Step 9012/10000, Loss: 0.0490


Training Progress:  90%|██████████████████████████████████████████████████▍     | 9014/10000 [2:02:52<03:20,  4.92it/s]

Step 9013/10000, Loss: 0.0408
Step 9014/10000, Loss: 0.0400


Training Progress:  90%|██████████████████████████████████████████████████▍     | 9016/10000 [2:02:52<03:01,  5.41it/s]

Step 9015/10000, Loss: 0.0440
Step 9016/10000, Loss: 0.0391


Training Progress:  90%|██████████████████████████████████████████████████▌     | 9018/10000 [2:02:53<02:55,  5.59it/s]

Step 9017/10000, Loss: 0.0401
Step 9018/10000, Loss: 0.0383


Training Progress:  90%|██████████████████████████████████████████████████▌     | 9020/10000 [2:02:53<02:53,  5.64it/s]

Step 9019/10000, Loss: 0.0435
Step 9020/10000, Loss: 0.0510


Training Progress:  90%|██████████████████████████████████████████████████▌     | 9022/10000 [2:02:53<02:52,  5.67it/s]

Step 9021/10000, Loss: 0.0442
Step 9022/10000, Loss: 0.0457


Training Progress:  90%|██████████████████████████████████████████████████▌     | 9024/10000 [2:02:54<02:47,  5.82it/s]

Step 9023/10000, Loss: 0.0510
Step 9024/10000, Loss: 0.0433


Training Progress:  90%|██████████████████████████████████████████████████▌     | 9026/10000 [2:02:54<02:47,  5.80it/s]

Step 9025/10000, Loss: 0.0464
Step 9026/10000, Loss: 0.0448


Training Progress:  90%|██████████████████████████████████████████████████▌     | 9028/10000 [2:02:54<02:48,  5.76it/s]

Step 9027/10000, Loss: 0.0427
Step 9028/10000, Loss: 0.0483


Training Progress:  90%|██████████████████████████████████████████████████▌     | 9030/10000 [2:02:55<02:45,  5.88it/s]

Step 9029/10000, Loss: 0.0450
Step 9030/10000, Loss: 0.0385


Training Progress:  90%|██████████████████████████████████████████████████▌     | 9032/10000 [2:02:55<02:46,  5.83it/s]

Step 9031/10000, Loss: 0.0456
Step 9032/10000, Loss: 0.0411


Training Progress:  90%|██████████████████████████████████████████████████▌     | 9034/10000 [2:02:55<02:47,  5.77it/s]

Step 9033/10000, Loss: 0.0603
Step 9034/10000, Loss: 0.0465


Training Progress:  90%|██████████████████████████████████████████████████▌     | 9036/10000 [2:02:56<02:48,  5.72it/s]

Step 9035/10000, Loss: 0.0380
Step 9036/10000, Loss: 0.0478


Training Progress:  90%|██████████████████████████████████████████████████▌     | 9038/10000 [2:02:56<02:44,  5.86it/s]

Step 9037/10000, Loss: 0.0396
Step 9038/10000, Loss: 0.0418


Training Progress:  90%|██████████████████████████████████████████████████▌     | 9040/10000 [2:02:56<02:44,  5.82it/s]

Step 9039/10000, Loss: 0.0494
Step 9040/10000, Loss: 0.0586


Training Progress:  90%|██████████████████████████████████████████████████▋     | 9042/10000 [2:02:57<02:46,  5.75it/s]

Step 9041/10000, Loss: 0.0452
Step 9042/10000, Loss: 0.0505


Training Progress:  90%|██████████████████████████████████████████████████▋     | 9044/10000 [2:02:57<02:47,  5.72it/s]

Step 9043/10000, Loss: 0.0431
Step 9044/10000, Loss: 0.0427


Training Progress:  90%|██████████████████████████████████████████████████▋     | 9046/10000 [2:02:57<02:42,  5.86it/s]

Step 9045/10000, Loss: 0.0429
Step 9046/10000, Loss: 0.0387


Training Progress:  90%|██████████████████████████████████████████████████▋     | 9048/10000 [2:02:58<02:43,  5.81it/s]

Step 9047/10000, Loss: 0.0509
Step 9048/10000, Loss: 0.0473


Training Progress:  90%|██████████████████████████████████████████████████▋     | 9050/10000 [2:02:58<02:45,  5.74it/s]

Step 9049/10000, Loss: 0.0436
Step 9050/10000, Loss: 0.0487


Training Progress:  91%|██████████████████████████████████████████████████▋     | 9052/10000 [2:02:59<02:41,  5.87it/s]

Step 9051/10000, Loss: 0.0429
Step 9052/10000, Loss: 0.0494


Training Progress:  91%|██████████████████████████████████████████████████▋     | 9054/10000 [2:02:59<02:42,  5.83it/s]

Step 9053/10000, Loss: 0.0453
Step 9054/10000, Loss: 0.0427


Training Progress:  91%|██████████████████████████████████████████████████▋     | 9056/10000 [2:02:59<02:45,  5.70it/s]

Step 9055/10000, Loss: 0.0434
Step 9056/10000, Loss: 0.0406


Training Progress:  91%|██████████████████████████████████████████████████▋     | 9058/10000 [2:03:00<02:40,  5.87it/s]

Step 9057/10000, Loss: 0.0411
Step 9058/10000, Loss: 0.0434


Training Progress:  91%|██████████████████████████████████████████████████▋     | 9060/10000 [2:03:00<02:41,  5.83it/s]

Step 9059/10000, Loss: 0.0407
Step 9060/10000, Loss: 0.0461


Training Progress:  91%|██████████████████████████████████████████████████▋     | 9062/10000 [2:03:00<02:44,  5.71it/s]

Step 9061/10000, Loss: 0.0435
Step 9062/10000, Loss: 0.0412


Training Progress:  91%|██████████████████████████████████████████████████▊     | 9064/10000 [2:03:01<02:43,  5.71it/s]

Step 9063/10000, Loss: 0.0374
Step 9064/10000, Loss: 0.0421


Training Progress:  91%|██████████████████████████████████████████████████▊     | 9066/10000 [2:03:01<02:39,  5.86it/s]

Step 9065/10000, Loss: 0.0415
Step 9066/10000, Loss: 0.0327


Training Progress:  91%|██████████████████████████████████████████████████▊     | 9068/10000 [2:03:01<02:40,  5.80it/s]

Step 9067/10000, Loss: 0.0399
Step 9068/10000, Loss: 0.0439


Training Progress:  91%|██████████████████████████████████████████████████▊     | 9070/10000 [2:03:02<02:41,  5.75it/s]

Step 9069/10000, Loss: 0.0534
Step 9070/10000, Loss: 0.0440


Training Progress:  91%|██████████████████████████████████████████████████▊     | 9072/10000 [2:03:02<02:38,  5.87it/s]

Step 9071/10000, Loss: 0.0355
Step 9072/10000, Loss: 0.0377


Training Progress:  91%|██████████████████████████████████████████████████▊     | 9074/10000 [2:03:02<02:38,  5.83it/s]

Step 9073/10000, Loss: 0.0408
Step 9074/10000, Loss: 0.0431


Training Progress:  91%|██████████████████████████████████████████████████▊     | 9076/10000 [2:03:03<02:40,  5.75it/s]

Step 9075/10000, Loss: 0.0355
Step 9076/10000, Loss: 0.0471


Training Progress:  91%|██████████████████████████████████████████████████▊     | 9078/10000 [2:03:03<02:41,  5.72it/s]

Step 9077/10000, Loss: 0.0353
Step 9078/10000, Loss: 0.0375


Training Progress:  91%|██████████████████████████████████████████████████▊     | 9080/10000 [2:03:03<02:41,  5.71it/s]

Step 9079/10000, Loss: 0.0408
Step 9080/10000, Loss: 0.0428


Training Progress:  91%|██████████████████████████████████████████████████▊     | 9082/10000 [2:03:04<02:36,  5.85it/s]

Step 9081/10000, Loss: 0.0393
Step 9082/10000, Loss: 0.0392


Training Progress:  91%|██████████████████████████████████████████████████▊     | 9084/10000 [2:03:04<02:39,  5.75it/s]

Step 9083/10000, Loss: 0.0487
Step 9084/10000, Loss: 0.0428


Training Progress:  91%|██████████████████████████████████████████████████▉     | 9086/10000 [2:03:04<02:38,  5.75it/s]

Step 9085/10000, Loss: 0.0581
Step 9086/10000, Loss: 0.0429


Training Progress:  91%|██████████████████████████████████████████████████▉     | 9088/10000 [2:03:05<02:35,  5.87it/s]

Step 9087/10000, Loss: 0.0586
Step 9088/10000, Loss: 0.0408


Training Progress:  91%|██████████████████████████████████████████████████▉     | 9090/10000 [2:03:05<02:37,  5.77it/s]

Step 9089/10000, Loss: 0.0389
Step 9090/10000, Loss: 0.0419


Training Progress:  91%|██████████████████████████████████████████████████▉     | 9092/10000 [2:03:05<02:37,  5.76it/s]

Step 9091/10000, Loss: 0.0449
Step 9092/10000, Loss: 0.0404


Training Progress:  91%|██████████████████████████████████████████████████▉     | 9094/10000 [2:03:06<02:38,  5.71it/s]

Step 9093/10000, Loss: 0.0359
Step 9094/10000, Loss: 0.0458


Training Progress:  91%|██████████████████████████████████████████████████▉     | 9096/10000 [2:03:06<02:34,  5.86it/s]

Step 9095/10000, Loss: 0.0311
Step 9096/10000, Loss: 0.0368


Training Progress:  91%|██████████████████████████████████████████████████▉     | 9098/10000 [2:03:06<02:34,  5.82it/s]

Step 9097/10000, Loss: 0.0371
Step 9098/10000, Loss: 0.0352


Training Progress:  91%|██████████████████████████████████████████████████▉     | 9100/10000 [2:03:07<02:36,  5.75it/s]

Step 9099/10000, Loss: 0.0370
Step 9100/10000, Loss: 0.0313


Training Progress:  91%|██████████████████████████████████████████████████▉     | 9102/10000 [2:03:07<02:32,  5.88it/s]

Step 9101/10000, Loss: 0.0373
Step 9102/10000, Loss: 0.0426


Training Progress:  91%|██████████████████████████████████████████████████▉     | 9104/10000 [2:03:08<02:33,  5.83it/s]

Step 9103/10000, Loss: 0.0373
Step 9104/10000, Loss: 0.0372


Training Progress:  91%|██████████████████████████████████████████████████▉     | 9106/10000 [2:03:08<02:36,  5.71it/s]

Step 9105/10000, Loss: 0.0429
Step 9106/10000, Loss: 0.0315


Training Progress:  91%|███████████████████████████████████████████████████     | 9108/10000 [2:03:08<02:36,  5.71it/s]

Step 9107/10000, Loss: 0.0401
Step 9108/10000, Loss: 0.0385


Training Progress:  91%|███████████████████████████████████████████████████     | 9110/10000 [2:03:09<02:31,  5.86it/s]

Step 9109/10000, Loss: 0.0372
Step 9110/10000, Loss: 0.0422


Training Progress:  91%|███████████████████████████████████████████████████     | 9112/10000 [2:03:09<02:34,  5.76it/s]

Step 9111/10000, Loss: 0.0373
Step 9112/10000, Loss: 0.0364


Training Progress:  91%|███████████████████████████████████████████████████     | 9114/10000 [2:03:09<02:34,  5.72it/s]

Step 9113/10000, Loss: 0.0389
Step 9114/10000, Loss: 0.0394


Training Progress:  91%|███████████████████████████████████████████████████     | 9116/10000 [2:03:10<02:34,  5.72it/s]

Step 9115/10000, Loss: 0.0415
Step 9116/10000, Loss: 0.0380


Training Progress:  91%|███████████████████████████████████████████████████     | 9118/10000 [2:03:10<02:30,  5.87it/s]

Step 9117/10000, Loss: 0.0301
Step 9118/10000, Loss: 0.0474


Training Progress:  91%|███████████████████████████████████████████████████     | 9120/10000 [2:03:10<02:33,  5.74it/s]

Step 9119/10000, Loss: 0.0393
Step 9120/10000, Loss: 0.0372


Training Progress:  91%|███████████████████████████████████████████████████     | 9122/10000 [2:03:11<02:33,  5.73it/s]

Step 9121/10000, Loss: 0.0414
Step 9122/10000, Loss: 0.0444


Training Progress:  91%|███████████████████████████████████████████████████     | 9124/10000 [2:03:11<02:28,  5.88it/s]

Step 9123/10000, Loss: 0.0387
Step 9124/10000, Loss: 0.0415


Training Progress:  91%|███████████████████████████████████████████████████     | 9126/10000 [2:03:11<02:30,  5.82it/s]

Step 9125/10000, Loss: 0.0379
Step 9126/10000, Loss: 0.0357


Training Progress:  91%|███████████████████████████████████████████████████     | 9128/10000 [2:03:12<02:31,  5.75it/s]

Step 9127/10000, Loss: 0.0384
Step 9128/10000, Loss: 0.0413


Training Progress:  91%|███████████████████████████████████████████████████▏    | 9130/10000 [2:03:12<02:32,  5.71it/s]

Step 9129/10000, Loss: 0.0420
Step 9130/10000, Loss: 0.0454


Training Progress:  91%|███████████████████████████████████████████████████▏    | 9132/10000 [2:03:12<02:28,  5.86it/s]

Step 9131/10000, Loss: 0.0429
Step 9132/10000, Loss: 0.0444


Training Progress:  91%|███████████████████████████████████████████████████▏    | 9134/10000 [2:03:13<02:30,  5.74it/s]

Step 9133/10000, Loss: 0.0381
Step 9134/10000, Loss: 0.0464


Training Progress:  91%|███████████████████████████████████████████████████▏    | 9136/10000 [2:03:13<02:30,  5.76it/s]

Step 9135/10000, Loss: 0.0413
Step 9136/10000, Loss: 0.0370


Training Progress:  91%|███████████████████████████████████████████████████▏    | 9138/10000 [2:03:13<02:26,  5.90it/s]

Step 9137/10000, Loss: 0.0389
Step 9138/10000, Loss: 0.0349


Training Progress:  91%|███████████████████████████████████████████████████▏    | 9140/10000 [2:03:14<02:27,  5.83it/s]

Step 9139/10000, Loss: 0.0373
Step 9140/10000, Loss: 0.0402


Training Progress:  91%|███████████████████████████████████████████████████▏    | 9142/10000 [2:03:14<02:28,  5.77it/s]

Step 9141/10000, Loss: 0.0370
Step 9142/10000, Loss: 0.0403


Training Progress:  91%|███████████████████████████████████████████████████▏    | 9144/10000 [2:03:14<02:29,  5.73it/s]

Step 9143/10000, Loss: 0.0360
Step 9144/10000, Loss: 0.0363


Training Progress:  91%|███████████████████████████████████████████████████▏    | 9146/10000 [2:03:15<02:25,  5.88it/s]

Step 9145/10000, Loss: 0.0343
Step 9146/10000, Loss: 0.0401


Training Progress:  91%|███████████████████████████████████████████████████▏    | 9148/10000 [2:03:15<02:26,  5.82it/s]

Step 9147/10000, Loss: 0.0393
Step 9148/10000, Loss: 0.0319


Training Progress:  92%|███████████████████████████████████████████████████▏    | 9150/10000 [2:03:15<02:26,  5.80it/s]

Step 9149/10000, Loss: 0.0372
Step 9150/10000, Loss: 0.0367


Training Progress:  92%|███████████████████████████████████████████████████▎    | 9152/10000 [2:03:16<02:27,  5.73it/s]

Step 9151/10000, Loss: 0.0405
Step 9152/10000, Loss: 0.0403


Training Progress:  92%|███████████████████████████████████████████████████▎    | 9154/10000 [2:03:16<02:24,  5.87it/s]

Step 9153/10000, Loss: 0.0307
Step 9154/10000, Loss: 0.0329


Training Progress:  92%|███████████████████████████████████████████████████▎    | 9156/10000 [2:03:16<02:25,  5.81it/s]

Step 9155/10000, Loss: 0.0360
Step 9156/10000, Loss: 0.0347


Training Progress:  92%|███████████████████████████████████████████████████▎    | 9158/10000 [2:03:17<02:26,  5.75it/s]

Step 9157/10000, Loss: 0.0301
Step 9158/10000, Loss: 0.0410


Training Progress:  92%|███████████████████████████████████████████████████▎    | 9160/10000 [2:03:17<02:23,  5.87it/s]

Step 9159/10000, Loss: 0.0333
Step 9160/10000, Loss: 0.0332


Training Progress:  92%|███████████████████████████████████████████████████▎    | 9162/10000 [2:03:18<02:23,  5.83it/s]

Step 9161/10000, Loss: 0.0385
Step 9162/10000, Loss: 0.0395


Training Progress:  92%|███████████████████████████████████████████████████▎    | 9164/10000 [2:03:18<02:26,  5.70it/s]

Step 9163/10000, Loss: 0.0343
Step 9164/10000, Loss: 0.0305


Training Progress:  92%|███████████████████████████████████████████████████▎    | 9166/10000 [2:03:18<02:25,  5.71it/s]

Step 9165/10000, Loss: 0.0418
Step 9166/10000, Loss: 0.0396


Training Progress:  92%|███████████████████████████████████████████████████▎    | 9168/10000 [2:03:19<02:21,  5.87it/s]

Step 9167/10000, Loss: 0.0530
Step 9168/10000, Loss: 0.0412


Training Progress:  92%|███████████████████████████████████████████████████▎    | 9170/10000 [2:03:19<02:22,  5.82it/s]

Step 9169/10000, Loss: 0.0644
Step 9170/10000, Loss: 0.0421


Training Progress:  92%|███████████████████████████████████████████████████▎    | 9172/10000 [2:03:19<02:24,  5.72it/s]

Step 9171/10000, Loss: 0.0331
Step 9172/10000, Loss: 0.0359


Training Progress:  92%|███████████████████████████████████████████████████▎    | 9174/10000 [2:03:20<02:24,  5.72it/s]

Step 9173/10000, Loss: 0.0351
Step 9174/10000, Loss: 0.0359


Training Progress:  92%|███████████████████████████████████████████████████▍    | 9176/10000 [2:03:20<02:21,  5.82it/s]

Step 9175/10000, Loss: 0.0333
Step 9176/10000, Loss: 0.0401


Training Progress:  92%|███████████████████████████████████████████████████▍    | 9178/10000 [2:03:20<02:23,  5.73it/s]

Step 9177/10000, Loss: 0.0330
Step 9178/10000, Loss: 0.0344


Training Progress:  92%|███████████████████████████████████████████████████▍    | 9180/10000 [2:03:21<02:21,  5.78it/s]

Step 9179/10000, Loss: 0.0361
Step 9180/10000, Loss: 0.0321


Training Progress:  92%|███████████████████████████████████████████████████▍    | 9182/10000 [2:03:21<02:23,  5.72it/s]

Step 9181/10000, Loss: 0.0355
Step 9182/10000, Loss: 0.0312


Training Progress:  92%|███████████████████████████████████████████████████▍    | 9184/10000 [2:03:21<02:19,  5.86it/s]

Step 9183/10000, Loss: 0.0351
Step 9184/10000, Loss: 0.0395


Training Progress:  92%|███████████████████████████████████████████████████▍    | 9186/10000 [2:03:22<02:20,  5.79it/s]

Step 9185/10000, Loss: 0.0343
Step 9186/10000, Loss: 0.0329


Training Progress:  92%|███████████████████████████████████████████████████▍    | 9188/10000 [2:03:22<02:20,  5.77it/s]

Step 9187/10000, Loss: 0.0378
Step 9188/10000, Loss: 0.0300


Training Progress:  92%|███████████████████████████████████████████████████▍    | 9190/10000 [2:03:22<02:20,  5.76it/s]

Step 9189/10000, Loss: 0.0349
Step 9190/10000, Loss: 0.0357


Training Progress:  92%|███████████████████████████████████████████████████▍    | 9192/10000 [2:03:23<02:17,  5.87it/s]

Step 9191/10000, Loss: 0.0333
Step 9192/10000, Loss: 0.0378


Training Progress:  92%|███████████████████████████████████████████████████▍    | 9194/10000 [2:03:23<02:18,  5.82it/s]

Step 9193/10000, Loss: 0.0347
Step 9194/10000, Loss: 0.0333


Training Progress:  92%|███████████████████████████████████████████████████▍    | 9196/10000 [2:03:23<02:18,  5.79it/s]

Step 9195/10000, Loss: 0.0387
Step 9196/10000, Loss: 0.0318


Training Progress:  92%|███████████████████████████████████████████████████▌    | 9198/10000 [2:03:24<02:19,  5.74it/s]

Step 9197/10000, Loss: 0.0335
Step 9198/10000, Loss: 0.0355


Training Progress:  92%|███████████████████████████████████████████████████▌    | 9200/10000 [2:03:24<02:18,  5.77it/s]

Step 9199/10000, Loss: 0.0285
Step 9200/10000, Loss: 0.0416


Training Progress:  92%|███████████████████████████████████████████████████▌    | 9202/10000 [2:03:24<02:16,  5.86it/s]

Step 9201/10000, Loss: 0.0317
Step 9202/10000, Loss: 0.0322


Training Progress:  92%|███████████████████████████████████████████████████▌    | 9204/10000 [2:03:25<02:18,  5.74it/s]

Step 9203/10000, Loss: 0.0352
Step 9204/10000, Loss: 0.0340


Training Progress:  92%|███████████████████████████████████████████████████▌    | 9206/10000 [2:03:25<02:19,  5.71it/s]

Step 9205/10000, Loss: 0.0366
Step 9206/10000, Loss: 0.0327


Training Progress:  92%|███████████████████████████████████████████████████▌    | 9208/10000 [2:03:25<02:15,  5.86it/s]

Step 9207/10000, Loss: 0.0317
Step 9208/10000, Loss: 0.0356


Training Progress:  92%|███████████████████████████████████████████████████▌    | 9210/10000 [2:03:26<02:15,  5.82it/s]

Step 9209/10000, Loss: 0.0380
Step 9210/10000, Loss: 0.0489


Training Progress:  92%|███████████████████████████████████████████████████▌    | 9212/10000 [2:03:26<02:16,  5.78it/s]

Step 9211/10000, Loss: 0.0464
Step 9212/10000, Loss: 0.0507


Training Progress:  92%|███████████████████████████████████████████████████▌    | 9214/10000 [2:03:27<02:17,  5.73it/s]

Step 9213/10000, Loss: 0.0459
Step 9214/10000, Loss: 0.0432


Training Progress:  92%|███████████████████████████████████████████████████▌    | 9216/10000 [2:03:27<02:13,  5.86it/s]

Step 9215/10000, Loss: 0.0398
Step 9216/10000, Loss: 0.0450


Training Progress:  92%|███████████████████████████████████████████████████▌    | 9218/10000 [2:03:27<02:16,  5.71it/s]

Step 9217/10000, Loss: 0.0379
Step 9218/10000, Loss: 0.0354


Training Progress:  92%|███████████████████████████████████████████████████▋    | 9220/10000 [2:03:28<02:15,  5.77it/s]

Step 9219/10000, Loss: 0.0375
Step 9220/10000, Loss: 0.0359


Training Progress:  92%|███████████████████████████████████████████████████▋    | 9222/10000 [2:03:28<02:16,  5.71it/s]

Step 9221/10000, Loss: 0.0358
Step 9222/10000, Loss: 0.0390


Training Progress:  92%|███████████████████████████████████████████████████▋    | 9224/10000 [2:03:28<02:13,  5.82it/s]

Step 9223/10000, Loss: 0.0392
Step 9224/10000, Loss: 0.0435


Training Progress:  92%|███████████████████████████████████████████████████▋    | 9226/10000 [2:03:29<02:12,  5.84it/s]

Step 9225/10000, Loss: 0.0376
Step 9226/10000, Loss: 0.0380


Training Progress:  92%|███████████████████████████████████████████████████▋    | 9228/10000 [2:03:29<02:14,  5.75it/s]

Step 9227/10000, Loss: 0.0375
Step 9228/10000, Loss: 0.0352


Training Progress:  92%|███████████████████████████████████████████████████▋    | 9230/10000 [2:03:29<02:15,  5.70it/s]

Step 9229/10000, Loss: 0.0344
Step 9230/10000, Loss: 0.0300


Training Progress:  92%|███████████████████████████████████████████████████▋    | 9232/10000 [2:03:30<02:14,  5.70it/s]

Step 9231/10000, Loss: 0.0352
Step 9232/10000, Loss: 0.0288


Training Progress:  92%|███████████████████████████████████████████████████▋    | 9234/10000 [2:03:30<02:11,  5.84it/s]

Step 9233/10000, Loss: 0.0337
Step 9234/10000, Loss: 0.0350


Training Progress:  92%|███████████████████████████████████████████████████▋    | 9236/10000 [2:03:30<02:11,  5.83it/s]

Step 9235/10000, Loss: 0.0286
Step 9236/10000, Loss: 0.0349


Training Progress:  92%|███████████████████████████████████████████████████▋    | 9238/10000 [2:03:31<02:12,  5.77it/s]

Step 9237/10000, Loss: 0.0384
Step 9238/10000, Loss: 0.0316


Training Progress:  92%|███████████████████████████████████████████████████▋    | 9240/10000 [2:03:31<02:12,  5.72it/s]

Step 9239/10000, Loss: 0.0260
Step 9240/10000, Loss: 0.0372


Training Progress:  92%|███████████████████████████████████████████████████▋    | 9241/10000 [2:03:31<02:11,  5.78it/s]

Step 9241/10000, Loss: 0.0278


Training Progress:  92%|███████████████████████████████████████████████████▊    | 9243/10000 [2:03:32<02:11,  5.77it/s]

Step 9242/10000, Loss: 0.0295
Step 9243/10000, Loss: 0.0341


Training Progress:  92%|███████████████████████████████████████████████████▊    | 9245/10000 [2:03:32<02:11,  5.73it/s]

Step 9244/10000, Loss: 0.0337
Step 9245/10000, Loss: 0.0300


Training Progress:  92%|███████████████████████████████████████████████████▊    | 9247/10000 [2:03:32<02:11,  5.71it/s]

Step 9246/10000, Loss: 0.0315
Step 9247/10000, Loss: 0.0411


Training Progress:  92%|███████████████████████████████████████████████████▊    | 9249/10000 [2:03:33<02:08,  5.85it/s]

Step 9248/10000, Loss: 0.0366
Step 9249/10000, Loss: 0.0510


Training Progress:  93%|███████████████████████████████████████████████████▊    | 9251/10000 [2:03:33<02:08,  5.82it/s]

Step 9250/10000, Loss: 0.0358
Step 9251/10000, Loss: 0.0617


Training Progress:  93%|███████████████████████████████████████████████████▊    | 9253/10000 [2:03:33<02:10,  5.72it/s]

Step 9252/10000, Loss: 0.0473
Step 9253/10000, Loss: 0.0366


Training Progress:  93%|███████████████████████████████████████████████████▊    | 9255/10000 [2:03:34<02:10,  5.71it/s]

Step 9254/10000, Loss: 0.0325
Step 9255/10000, Loss: 0.0381


Training Progress:  93%|███████████████████████████████████████████████████▊    | 9257/10000 [2:03:34<02:06,  5.85it/s]

Step 9256/10000, Loss: 0.0343
Step 9257/10000, Loss: 0.0352


Training Progress:  93%|███████████████████████████████████████████████████▊    | 9259/10000 [2:03:34<02:07,  5.83it/s]

Step 9258/10000, Loss: 0.0407
Step 9259/10000, Loss: 0.0293


Training Progress:  93%|███████████████████████████████████████████████████▊    | 9261/10000 [2:03:35<02:07,  5.79it/s]

Step 9260/10000, Loss: 0.0353
Step 9261/10000, Loss: 0.0339


Training Progress:  93%|███████████████████████████████████████████████████▊    | 9263/10000 [2:03:35<02:08,  5.74it/s]

Step 9262/10000, Loss: 0.0320
Step 9263/10000, Loss: 0.0328


Training Progress:  93%|███████████████████████████████████████████████████▉    | 9265/10000 [2:03:35<02:08,  5.71it/s]

Step 9264/10000, Loss: 0.0288
Step 9265/10000, Loss: 0.0341


Training Progress:  93%|███████████████████████████████████████████████████▉    | 9267/10000 [2:03:36<02:05,  5.84it/s]

Step 9266/10000, Loss: 0.0411
Step 9267/10000, Loss: 0.0306


Training Progress:  93%|███████████████████████████████████████████████████▉    | 9269/10000 [2:03:36<02:05,  5.81it/s]

Step 9268/10000, Loss: 0.0313
Step 9269/10000, Loss: 0.0379


Training Progress:  93%|███████████████████████████████████████████████████▉    | 9271/10000 [2:03:36<02:06,  5.78it/s]

Step 9270/10000, Loss: 0.0296
Step 9271/10000, Loss: 0.0378


Training Progress:  93%|███████████████████████████████████████████████████▉    | 9273/10000 [2:03:37<02:03,  5.88it/s]

Step 9272/10000, Loss: 0.0323
Step 9273/10000, Loss: 0.0324


Training Progress:  93%|███████████████████████████████████████████████████▉    | 9275/10000 [2:03:37<02:06,  5.73it/s]

Step 9274/10000, Loss: 0.0366
Step 9275/10000, Loss: 0.0317


Training Progress:  93%|███████████████████████████████████████████████████▉    | 9277/10000 [2:03:37<02:05,  5.76it/s]

Step 9276/10000, Loss: 0.0348
Step 9277/10000, Loss: 0.0376


Training Progress:  93%|███████████████████████████████████████████████████▉    | 9279/10000 [2:03:38<02:04,  5.77it/s]

Step 9278/10000, Loss: 0.0285
Step 9279/10000, Loss: 0.0333


Training Progress:  93%|███████████████████████████████████████████████████▉    | 9281/10000 [2:03:38<02:04,  5.77it/s]

Step 9280/10000, Loss: 0.0336
Step 9281/10000, Loss: 0.0262


Training Progress:  93%|███████████████████████████████████████████████████▉    | 9283/10000 [2:03:38<02:04,  5.77it/s]

Step 9282/10000, Loss: 0.0361
Step 9283/10000, Loss: 0.0274


Training Progress:  93%|███████████████████████████████████████████████████▉    | 9285/10000 [2:03:39<02:02,  5.83it/s]

Step 9284/10000, Loss: 0.0288
Step 9285/10000, Loss: 0.0361


Training Progress:  93%|████████████████████████████████████████████████████    | 9287/10000 [2:03:39<02:04,  5.72it/s]

Step 9286/10000, Loss: 0.0360
Step 9287/10000, Loss: 0.0325


Training Progress:  93%|████████████████████████████████████████████████████    | 9289/10000 [2:03:40<02:03,  5.75it/s]

Step 9288/10000, Loss: 0.0318
Step 9289/10000, Loss: 0.0319


Training Progress:  93%|████████████████████████████████████████████████████    | 9291/10000 [2:03:40<02:03,  5.76it/s]

Step 9290/10000, Loss: 0.0300
Step 9291/10000, Loss: 0.0322


Training Progress:  93%|████████████████████████████████████████████████████    | 9293/10000 [2:03:40<02:02,  5.76it/s]

Step 9292/10000, Loss: 0.0367
Step 9293/10000, Loss: 0.0378


Training Progress:  93%|████████████████████████████████████████████████████    | 9295/10000 [2:03:41<02:02,  5.76it/s]

Step 9294/10000, Loss: 0.0442
Step 9295/10000, Loss: 0.0350


Training Progress:  93%|████████████████████████████████████████████████████    | 9297/10000 [2:03:41<02:01,  5.78it/s]

Step 9296/10000, Loss: 0.0378
Step 9297/10000, Loss: 0.0292


Training Progress:  93%|████████████████████████████████████████████████████    | 9299/10000 [2:03:41<01:59,  5.89it/s]

Step 9298/10000, Loss: 0.0467
Step 9299/10000, Loss: 0.0351


Training Progress:  93%|████████████████████████████████████████████████████    | 9301/10000 [2:03:42<01:59,  5.86it/s]

Step 9300/10000, Loss: 0.0310
Step 9301/10000, Loss: 0.0310


Training Progress:  93%|████████████████████████████████████████████████████    | 9303/10000 [2:03:42<01:59,  5.82it/s]

Step 9302/10000, Loss: 0.0285
Step 9303/10000, Loss: 0.0335


Training Progress:  93%|████████████████████████████████████████████████████    | 9305/10000 [2:03:42<02:01,  5.72it/s]

Step 9304/10000, Loss: 0.0323
Step 9305/10000, Loss: 0.0326


Training Progress:  93%|████████████████████████████████████████████████████    | 9307/10000 [2:03:43<02:01,  5.71it/s]

Step 9306/10000, Loss: 0.0347
Step 9307/10000, Loss: 0.0365


Training Progress:  93%|████████████████████████████████████████████████████▏   | 9309/10000 [2:03:43<01:58,  5.85it/s]

Step 9308/10000, Loss: 0.0362
Step 9309/10000, Loss: 0.0281


Training Progress:  93%|████████████████████████████████████████████████████▏   | 9311/10000 [2:03:43<01:59,  5.77it/s]

Step 9310/10000, Loss: 0.0292
Step 9311/10000, Loss: 0.0339


Training Progress:  93%|████████████████████████████████████████████████████▏   | 9313/10000 [2:03:44<01:59,  5.75it/s]

Step 9312/10000, Loss: 0.0275
Step 9313/10000, Loss: 0.0366


Training Progress:  93%|████████████████████████████████████████████████████▏   | 9315/10000 [2:03:44<01:59,  5.71it/s]

Step 9314/10000, Loss: 0.0302
Step 9315/10000, Loss: 0.0263


Training Progress:  93%|████████████████████████████████████████████████████▏   | 9317/10000 [2:03:44<01:56,  5.85it/s]

Step 9316/10000, Loss: 0.0352
Step 9317/10000, Loss: 0.0269


Training Progress:  93%|████████████████████████████████████████████████████▏   | 9319/10000 [2:03:45<01:57,  5.81it/s]

Step 9318/10000, Loss: 0.0305
Step 9319/10000, Loss: 0.0368


Training Progress:  93%|████████████████████████████████████████████████████▏   | 9321/10000 [2:03:45<01:58,  5.75it/s]

Step 9320/10000, Loss: 0.0309
Step 9321/10000, Loss: 0.0258


Training Progress:  93%|████████████████████████████████████████████████████▏   | 9323/10000 [2:03:45<01:58,  5.74it/s]

Step 9322/10000, Loss: 0.0326
Step 9323/10000, Loss: 0.0283


Training Progress:  93%|████████████████████████████████████████████████████▏   | 9325/10000 [2:03:46<01:55,  5.87it/s]

Step 9324/10000, Loss: 0.0286
Step 9325/10000, Loss: 0.0341


Training Progress:  93%|████████████████████████████████████████████████████▏   | 9327/10000 [2:03:46<01:55,  5.82it/s]

Step 9326/10000, Loss: 0.0336
Step 9327/10000, Loss: 0.0301


Training Progress:  93%|████████████████████████████████████████████████████▏   | 9329/10000 [2:03:46<01:55,  5.79it/s]

Step 9328/10000, Loss: 0.0295
Step 9329/10000, Loss: 0.0359


Training Progress:  93%|████████████████████████████████████████████████████▎   | 9331/10000 [2:03:47<01:56,  5.73it/s]

Step 9330/10000, Loss: 0.0337
Step 9331/10000, Loss: 0.0435


Training Progress:  93%|████████████████████████████████████████████████████▎   | 9333/10000 [2:03:47<01:54,  5.81it/s]

Step 9332/10000, Loss: 0.0343
Step 9333/10000, Loss: 0.0453


Training Progress:  93%|████████████████████████████████████████████████████▎   | 9335/10000 [2:03:47<01:54,  5.82it/s]

Step 9334/10000, Loss: 0.0366
Step 9335/10000, Loss: 0.0360


Training Progress:  93%|████████████████████████████████████████████████████▎   | 9337/10000 [2:03:48<01:55,  5.74it/s]

Step 9336/10000, Loss: 0.0408
Step 9337/10000, Loss: 0.0348


Training Progress:  93%|████████████████████████████████████████████████████▎   | 9339/10000 [2:03:48<01:54,  5.75it/s]

Step 9338/10000, Loss: 0.0296
Step 9339/10000, Loss: 0.0290


Training Progress:  93%|████████████████████████████████████████████████████▎   | 9341/10000 [2:03:49<01:55,  5.71it/s]

Step 9340/10000, Loss: 0.0350
Step 9341/10000, Loss: 0.0283


Training Progress:  93%|████████████████████████████████████████████████████▎   | 9343/10000 [2:03:49<01:52,  5.85it/s]

Step 9342/10000, Loss: 0.0295
Step 9343/10000, Loss: 0.0342


Training Progress:  93%|████████████████████████████████████████████████████▎   | 9345/10000 [2:03:49<01:54,  5.70it/s]

Step 9344/10000, Loss: 0.0316
Step 9345/10000, Loss: 0.0306


Training Progress:  93%|████████████████████████████████████████████████████▎   | 9347/10000 [2:03:50<01:52,  5.82it/s]

Step 9346/10000, Loss: 0.0269
Step 9347/10000, Loss: 0.0365


Training Progress:  93%|████████████████████████████████████████████████████▎   | 9349/10000 [2:03:50<01:53,  5.76it/s]

Step 9348/10000, Loss: 0.0373
Step 9349/10000, Loss: 0.0322


Training Progress:  94%|████████████████████████████████████████████████████▎   | 9351/10000 [2:03:50<01:51,  5.83it/s]

Step 9350/10000, Loss: 0.0326
Step 9351/10000, Loss: 0.0352


Training Progress:  94%|████████████████████████████████████████████████████▍   | 9353/10000 [2:03:51<01:50,  5.84it/s]

Step 9352/10000, Loss: 0.0283
Step 9353/10000, Loss: 0.0346


Training Progress:  94%|████████████████████████████████████████████████████▍   | 9355/10000 [2:03:51<01:51,  5.81it/s]

Step 9354/10000, Loss: 0.0339
Step 9355/10000, Loss: 0.0320


Training Progress:  94%|████████████████████████████████████████████████████▍   | 9357/10000 [2:03:51<01:52,  5.72it/s]

Step 9356/10000, Loss: 0.0367
Step 9357/10000, Loss: 0.0329


Training Progress:  94%|████████████████████████████████████████████████████▍   | 9359/10000 [2:03:52<01:52,  5.71it/s]

Step 9358/10000, Loss: 0.0314
Step 9359/10000, Loss: 0.0358


Training Progress:  94%|████████████████████████████████████████████████████▍   | 9361/10000 [2:03:52<01:52,  5.70it/s]

Step 9360/10000, Loss: 0.0296
Step 9361/10000, Loss: 0.0355


Training Progress:  94%|████████████████████████████████████████████████████▍   | 9363/10000 [2:03:52<01:51,  5.70it/s]

Step 9362/10000, Loss: 0.0357
Step 9363/10000, Loss: 0.0240


Training Progress:  94%|████████████████████████████████████████████████████▍   | 9365/10000 [2:03:53<01:48,  5.84it/s]

Step 9364/10000, Loss: 0.0376
Step 9365/10000, Loss: 0.0285


Training Progress:  94%|████████████████████████████████████████████████████▍   | 9367/10000 [2:03:53<01:51,  5.69it/s]

Step 9366/10000, Loss: 0.0284
Step 9367/10000, Loss: 0.0350


Training Progress:  94%|████████████████████████████████████████████████████▍   | 9369/10000 [2:03:53<01:50,  5.72it/s]

Step 9368/10000, Loss: 0.0329
Step 9369/10000, Loss: 0.0328


Training Progress:  94%|████████████████████████████████████████████████████▍   | 9371/10000 [2:03:54<01:50,  5.71it/s]

Step 9370/10000, Loss: 0.0288
Step 9371/10000, Loss: 0.0310


Training Progress:  94%|████████████████████████████████████████████████████▍   | 9373/10000 [2:03:54<01:47,  5.85it/s]

Step 9372/10000, Loss: 0.0311
Step 9373/10000, Loss: 0.0296


Training Progress:  94%|████████████████████████████████████████████████████▌   | 9375/10000 [2:03:54<01:47,  5.80it/s]

Step 9374/10000, Loss: 0.0307
Step 9375/10000, Loss: 0.0302


Training Progress:  94%|████████████████████████████████████████████████████▌   | 9377/10000 [2:03:55<01:48,  5.75it/s]

Step 9376/10000, Loss: 0.0366
Step 9377/10000, Loss: 0.0263


Training Progress:  94%|████████████████████████████████████████████████████▌   | 9379/10000 [2:03:55<01:48,  5.73it/s]

Step 9378/10000, Loss: 0.0375
Step 9379/10000, Loss: 0.0344


Training Progress:  94%|████████████████████████████████████████████████████▌   | 9381/10000 [2:03:55<01:45,  5.86it/s]

Step 9380/10000, Loss: 0.0555
Step 9381/10000, Loss: 0.0314


Training Progress:  94%|████████████████████████████████████████████████████▌   | 9383/10000 [2:03:56<01:46,  5.81it/s]

Step 9382/10000, Loss: 0.0260
Step 9383/10000, Loss: 0.0267


Training Progress:  94%|████████████████████████████████████████████████████▌   | 9385/10000 [2:03:56<01:46,  5.79it/s]

Step 9384/10000, Loss: 0.0254
Step 9385/10000, Loss: 0.0269


Training Progress:  94%|████████████████████████████████████████████████████▌   | 9387/10000 [2:03:56<01:46,  5.78it/s]

Step 9386/10000, Loss: 0.0279
Step 9387/10000, Loss: 0.0326


Training Progress:  94%|████████████████████████████████████████████████████▌   | 9389/10000 [2:03:57<01:46,  5.72it/s]

Step 9388/10000, Loss: 0.0351
Step 9389/10000, Loss: 0.0311


Training Progress:  94%|████████████████████████████████████████████████████▌   | 9391/10000 [2:03:57<01:46,  5.70it/s]

Step 9390/10000, Loss: 0.0316
Step 9391/10000, Loss: 0.0285


Training Progress:  94%|████████████████████████████████████████████████████▌   | 9393/10000 [2:03:58<01:45,  5.76it/s]

Step 9392/10000, Loss: 0.0313
Step 9393/10000, Loss: 0.0341


Training Progress:  94%|████████████████████████████████████████████████████▌   | 9395/10000 [2:03:58<01:43,  5.82it/s]

Step 9394/10000, Loss: 0.0274
Step 9395/10000, Loss: 0.0348


Training Progress:  94%|████████████████████████████████████████████████████▌   | 9397/10000 [2:03:58<01:45,  5.71it/s]

Step 9396/10000, Loss: 0.0287
Step 9397/10000, Loss: 0.0281


Training Progress:  94%|████████████████████████████████████████████████████▋   | 9399/10000 [2:03:59<01:43,  5.80it/s]

Step 9398/10000, Loss: 0.0348
Step 9399/10000, Loss: 0.0299


Training Progress:  94%|████████████████████████████████████████████████████▋   | 9401/10000 [2:03:59<01:44,  5.73it/s]

Step 9400/10000, Loss: 0.0302
Step 9401/10000, Loss: 0.0356


Training Progress:  94%|████████████████████████████████████████████████████▋   | 9402/10000 [2:03:59<01:43,  5.79it/s]

Step 9402/10000, Loss: 0.0335
Step 9403/10000, Loss: 0.0223


Training Progress:  94%|████████████████████████████████████████████████████▋   | 9403/10000 [2:04:16<50:20,  5.06s/it]


Checkpoint saved: checkpoints\best\checkpoint_step9403_loss0.0223_20250117_145603.pt

New best loss: 0.0223


Training Progress:  94%|████████████████████████████████████████████████████▋   | 9405/10000 [2:04:16<25:50,  2.61s/it]

Step 9404/10000, Loss: 0.0322
Step 9405/10000, Loss: 0.0239


Training Progress:  94%|████████████████████████████████████████████████████▋   | 9407/10000 [2:04:16<13:27,  1.36s/it]

Step 9406/10000, Loss: 0.0278
Step 9407/10000, Loss: 0.0318


Training Progress:  94%|████████████████████████████████████████████████████▋   | 9409/10000 [2:04:17<07:27,  1.32it/s]

Step 9408/10000, Loss: 0.0327
Step 9409/10000, Loss: 0.0269


Training Progress:  94%|████████████████████████████████████████████████████▋   | 9411/10000 [2:04:17<04:31,  2.17it/s]

Step 9410/10000, Loss: 0.0263
Step 9411/10000, Loss: 0.0309


Training Progress:  94%|████████████████████████████████████████████████████▋   | 9413/10000 [2:04:17<03:02,  3.22it/s]

Step 9412/10000, Loss: 0.0330
Step 9413/10000, Loss: 0.0326


Training Progress:  94%|████████████████████████████████████████████████████▋   | 9415/10000 [2:04:18<02:20,  4.16it/s]

Step 9414/10000, Loss: 0.0250
Step 9415/10000, Loss: 0.0357


Training Progress:  94%|████████████████████████████████████████████████████▋   | 9417/10000 [2:04:18<02:01,  4.81it/s]

Step 9416/10000, Loss: 0.0320
Step 9417/10000, Loss: 0.0292


Training Progress:  94%|████████████████████████████████████████████████████▋   | 9419/10000 [2:04:18<01:48,  5.35it/s]

Step 9418/10000, Loss: 0.0303
Step 9419/10000, Loss: 0.0401


Training Progress:  94%|████████████████████████████████████████████████████▊   | 9421/10000 [2:04:19<01:44,  5.55it/s]

Step 9420/10000, Loss: 0.0311
Step 9421/10000, Loss: 0.0275


Training Progress:  94%|████████████████████████████████████████████████████▊   | 9423/10000 [2:04:19<01:42,  5.63it/s]

Step 9422/10000, Loss: 0.0363
Step 9423/10000, Loss: 0.0249


Training Progress:  94%|████████████████████████████████████████████████████▊   | 9425/10000 [2:04:20<01:41,  5.64it/s]

Step 9424/10000, Loss: 0.0291
Step 9425/10000, Loss: 0.0288


Training Progress:  94%|████████████████████████████████████████████████████▊   | 9427/10000 [2:04:20<01:38,  5.82it/s]

Step 9426/10000, Loss: 0.0298
Step 9427/10000, Loss: 0.0271


Training Progress:  94%|████████████████████████████████████████████████████▊   | 9429/10000 [2:04:20<01:39,  5.71it/s]

Step 9428/10000, Loss: 0.0249
Step 9429/10000, Loss: 0.0289


Training Progress:  94%|████████████████████████████████████████████████████▊   | 9431/10000 [2:04:21<01:39,  5.71it/s]

Step 9430/10000, Loss: 0.0326
Step 9431/10000, Loss: 0.0273


Training Progress:  94%|████████████████████████████████████████████████████▊   | 9433/10000 [2:04:21<01:39,  5.73it/s]

Step 9432/10000, Loss: 0.0300
Step 9433/10000, Loss: 0.0327


Training Progress:  94%|████████████████████████████████████████████████████▊   | 9435/10000 [2:04:21<01:37,  5.81it/s]

Step 9434/10000, Loss: 0.0247
Step 9435/10000, Loss: 0.0357


Training Progress:  94%|████████████████████████████████████████████████████▊   | 9437/10000 [2:04:22<01:37,  5.77it/s]

Step 9436/10000, Loss: 0.0325
Step 9437/10000, Loss: 0.0311


Training Progress:  94%|████████████████████████████████████████████████████▊   | 9439/10000 [2:04:22<01:37,  5.73it/s]

Step 9438/10000, Loss: 0.0349
Step 9439/10000, Loss: 0.0321


Training Progress:  94%|████████████████████████████████████████████████████▊   | 9441/10000 [2:04:22<01:35,  5.85it/s]

Step 9440/10000, Loss: 0.0331
Step 9441/10000, Loss: 0.0361


Training Progress:  94%|████████████████████████████████████████████████████▉   | 9443/10000 [2:04:23<01:36,  5.75it/s]

Step 9442/10000, Loss: 0.0271
Step 9443/10000, Loss: 0.0350


Training Progress:  94%|████████████████████████████████████████████████████▉   | 9445/10000 [2:04:23<01:36,  5.73it/s]

Step 9444/10000, Loss: 0.0321
Step 9445/10000, Loss: 0.0258


Training Progress:  94%|████████████████████████████████████████████████████▉   | 9447/10000 [2:04:23<01:34,  5.86it/s]

Step 9446/10000, Loss: 0.0310
Step 9447/10000, Loss: 0.0266


Training Progress:  94%|████████████████████████████████████████████████████▉   | 9449/10000 [2:04:24<01:35,  5.74it/s]

Step 9448/10000, Loss: 0.0257
Step 9449/10000, Loss: 0.0329


Training Progress:  95%|████████████████████████████████████████████████████▉   | 9451/10000 [2:04:24<01:35,  5.76it/s]

Step 9450/10000, Loss: 0.0331
Step 9451/10000, Loss: 0.0279


Training Progress:  95%|████████████████████████████████████████████████████▉   | 9453/10000 [2:04:24<01:34,  5.78it/s]

Step 9452/10000, Loss: 0.0284
Step 9453/10000, Loss: 0.0276


Training Progress:  95%|████████████████████████████████████████████████████▉   | 9455/10000 [2:04:25<01:34,  5.78it/s]

Step 9454/10000, Loss: 0.0287
Step 9455/10000, Loss: 0.0293


Training Progress:  95%|████████████████████████████████████████████████████▉   | 9457/10000 [2:04:25<01:33,  5.79it/s]

Step 9456/10000, Loss: 0.0297
Step 9457/10000, Loss: 0.0298


Training Progress:  95%|████████████████████████████████████████████████████▉   | 9459/10000 [2:04:25<01:33,  5.79it/s]

Step 9458/10000, Loss: 0.0332
Step 9459/10000, Loss: 0.0279


Training Progress:  95%|████████████████████████████████████████████████████▉   | 9461/10000 [2:04:26<01:32,  5.80it/s]

Step 9460/10000, Loss: 0.0327
Step 9461/10000, Loss: 0.0291


Training Progress:  95%|████████████████████████████████████████████████████▉   | 9463/10000 [2:04:26<01:31,  5.85it/s]

Step 9462/10000, Loss: 0.0371
Step 9463/10000, Loss: 0.0315


Training Progress:  95%|█████████████████████████████████████████████████████   | 9465/10000 [2:04:26<01:32,  5.78it/s]

Step 9464/10000, Loss: 0.0277
Step 9465/10000, Loss: 0.0265


Training Progress:  95%|█████████████████████████████████████████████████████   | 9467/10000 [2:04:27<01:32,  5.77it/s]

Step 9466/10000, Loss: 0.0250
Step 9467/10000, Loss: 0.0253


Training Progress:  95%|█████████████████████████████████████████████████████   | 9469/10000 [2:04:27<01:31,  5.80it/s]

Step 9468/10000, Loss: 0.0287
Step 9469/10000, Loss: 0.0292


Training Progress:  95%|█████████████████████████████████████████████████████   | 9471/10000 [2:04:27<01:31,  5.80it/s]

Step 9470/10000, Loss: 0.0283
Step 9471/10000, Loss: 0.0281


Training Progress:  95%|█████████████████████████████████████████████████████   | 9473/10000 [2:04:28<01:30,  5.80it/s]

Step 9472/10000, Loss: 0.0272
Step 9473/10000, Loss: 0.0246


Training Progress:  95%|█████████████████████████████████████████████████████   | 9475/10000 [2:04:28<01:29,  5.85it/s]

Step 9474/10000, Loss: 0.0265
Step 9475/10000, Loss: 0.0296


Training Progress:  95%|█████████████████████████████████████████████████████   | 9477/10000 [2:04:29<01:30,  5.78it/s]

Step 9476/10000, Loss: 0.0227
Step 9477/10000, Loss: 0.0288


Training Progress:  95%|█████████████████████████████████████████████████████   | 9479/10000 [2:04:29<01:29,  5.79it/s]

Step 9478/10000, Loss: 0.0251
Step 9479/10000, Loss: 0.0251


Training Progress:  95%|█████████████████████████████████████████████████████   | 9481/10000 [2:04:29<01:29,  5.81it/s]

Step 9480/10000, Loss: 0.0297
Step 9481/10000, Loss: 0.0260


Training Progress:  95%|█████████████████████████████████████████████████████   | 9483/10000 [2:04:30<01:29,  5.81it/s]

Step 9482/10000, Loss: 0.0273
Step 9483/10000, Loss: 0.0312


Training Progress:  95%|█████████████████████████████████████████████████████   | 9485/10000 [2:04:30<01:28,  5.81it/s]

Step 9484/10000, Loss: 0.0283
Step 9485/10000, Loss: 0.0251


Training Progress:  95%|█████████████████████████████████████████████████████▏  | 9487/10000 [2:04:30<01:28,  5.79it/s]

Step 9486/10000, Loss: 0.0290
Step 9487/10000, Loss: 0.0274


Training Progress:  95%|█████████████████████████████████████████████████████▏  | 9489/10000 [2:04:31<01:28,  5.79it/s]

Step 9488/10000, Loss: 0.0285
Step 9489/10000, Loss: 0.0315


Training Progress:  95%|█████████████████████████████████████████████████████▏  | 9491/10000 [2:04:31<01:27,  5.80it/s]

Step 9490/10000, Loss: 0.0346
Step 9491/10000, Loss: 0.0256


Training Progress:  95%|█████████████████████████████████████████████████████▏  | 9493/10000 [2:04:31<01:26,  5.86it/s]

Step 9492/10000, Loss: 0.0249
Step 9493/10000, Loss: 0.0380


Training Progress:  95%|█████████████████████████████████████████████████████▏  | 9495/10000 [2:04:32<01:27,  5.79it/s]

Step 9494/10000, Loss: 0.0307
Step 9495/10000, Loss: 0.0330


Training Progress:  95%|█████████████████████████████████████████████████████▏  | 9497/10000 [2:04:32<01:27,  5.78it/s]

Step 9496/10000, Loss: 0.0268
Step 9497/10000, Loss: 0.0373


Training Progress:  95%|█████████████████████████████████████████████████████▏  | 9499/10000 [2:04:32<01:26,  5.79it/s]

Step 9498/10000, Loss: 0.0299
Step 9499/10000, Loss: 0.0239


Training Progress:  95%|█████████████████████████████████████████████████████▏  | 9501/10000 [2:04:33<01:26,  5.79it/s]

Step 9500/10000, Loss: 0.0259
Step 9501/10000, Loss: 0.0354


Training Progress:  95%|█████████████████████████████████████████████████████▏  | 9503/10000 [2:04:33<01:26,  5.75it/s]

Step 9502/10000, Loss: 0.0256
Step 9503/10000, Loss: 0.0249


Training Progress:  95%|█████████████████████████████████████████████████████▏  | 9505/10000 [2:04:33<01:25,  5.77it/s]

Step 9504/10000, Loss: 0.0315
Step 9505/10000, Loss: 0.0239


Training Progress:  95%|█████████████████████████████████████████████████████▏  | 9507/10000 [2:04:34<01:25,  5.79it/s]

Step 9506/10000, Loss: 0.0264
Step 9507/10000, Loss: 0.0295


Training Progress:  95%|█████████████████████████████████████████████████████▎  | 9509/10000 [2:04:34<01:24,  5.79it/s]

Step 9508/10000, Loss: 0.0265
Step 9509/10000, Loss: 0.0246
Step 9510/10000, Loss: 0.0220


Training Progress:  95%|█████████████████████████████████████████████████████▎  | 9510/10000 [2:04:50<40:03,  4.90s/it]


Checkpoint saved: checkpoints\best\checkpoint_step9510_loss0.0220_20250117_145638.pt

New best loss: 0.0220


Training Progress:  95%|█████████████████████████████████████████████████████▎  | 9512/10000 [2:04:51<20:49,  2.56s/it]

Step 9511/10000, Loss: 0.0265
Step 9512/10000, Loss: 0.0314


Training Progress:  95%|█████████████████████████████████████████████████████▎  | 9514/10000 [2:04:51<10:53,  1.34s/it]

Step 9513/10000, Loss: 0.0279
Step 9514/10000, Loss: 0.0262


Training Progress:  95%|█████████████████████████████████████████████████████▎  | 9516/10000 [2:04:51<05:59,  1.35it/s]

Step 9515/10000, Loss: 0.0292
Step 9516/10000, Loss: 0.0247


Training Progress:  95%|█████████████████████████████████████████████████████▎  | 9518/10000 [2:04:52<03:38,  2.21it/s]

Step 9517/10000, Loss: 0.0317
Step 9518/10000, Loss: 0.0295


Training Progress:  95%|█████████████████████████████████████████████████████▎  | 9520/10000 [2:04:52<02:29,  3.21it/s]

Step 9519/10000, Loss: 0.0289
Step 9520/10000, Loss: 0.0323


Training Progress:  95%|█████████████████████████████████████████████████████▎  | 9522/10000 [2:04:52<01:55,  4.13it/s]

Step 9521/10000, Loss: 0.0297
Step 9522/10000, Loss: 0.0288


Training Progress:  95%|█████████████████████████████████████████████████████▎  | 9524/10000 [2:04:53<01:36,  4.91it/s]

Step 9523/10000, Loss: 0.0355
Step 9524/10000, Loss: 0.0272


Training Progress:  95%|█████████████████████████████████████████████████████▎  | 9526/10000 [2:04:53<01:29,  5.30it/s]

Step 9525/10000, Loss: 0.0306
Step 9526/10000, Loss: 0.0281


Training Progress:  95%|█████████████████████████████████████████████████████▎  | 9528/10000 [2:04:53<01:23,  5.65it/s]

Step 9527/10000, Loss: 0.0260
Step 9528/10000, Loss: 0.0472


Training Progress:  95%|█████████████████████████████████████████████████████▎  | 9530/10000 [2:04:54<01:22,  5.71it/s]

Step 9529/10000, Loss: 0.0351
Step 9530/10000, Loss: 0.0240


Training Progress:  95%|█████████████████████████████████████████████████████▍  | 9532/10000 [2:04:54<01:22,  5.70it/s]

Step 9531/10000, Loss: 0.0321
Step 9532/10000, Loss: 0.0319


Training Progress:  95%|█████████████████████████████████████████████████████▍  | 9534/10000 [2:04:54<01:19,  5.86it/s]

Step 9533/10000, Loss: 0.0288
Step 9534/10000, Loss: 0.0274


Training Progress:  95%|█████████████████████████████████████████████████████▍  | 9536/10000 [2:04:55<01:19,  5.82it/s]

Step 9535/10000, Loss: 0.0288
Step 9536/10000, Loss: 0.0298


Training Progress:  95%|█████████████████████████████████████████████████████▍  | 9538/10000 [2:04:55<01:20,  5.75it/s]

Step 9537/10000, Loss: 0.0274
Step 9538/10000, Loss: 0.0286


Training Progress:  95%|█████████████████████████████████████████████████████▍  | 9540/10000 [2:04:55<01:18,  5.87it/s]

Step 9539/10000, Loss: 0.0292
Step 9540/10000, Loss: 0.0352


Training Progress:  95%|█████████████████████████████████████████████████████▍  | 9542/10000 [2:04:56<01:18,  5.82it/s]

Step 9541/10000, Loss: 0.0348
Step 9542/10000, Loss: 0.0387


Training Progress:  95%|█████████████████████████████████████████████████████▍  | 9544/10000 [2:04:56<01:19,  5.76it/s]

Step 9543/10000, Loss: 0.0278
Step 9544/10000, Loss: 0.0328


Training Progress:  95%|█████████████████████████████████████████████████████▍  | 9546/10000 [2:04:57<01:19,  5.72it/s]

Step 9545/10000, Loss: 0.0303
Step 9546/10000, Loss: 0.0235


Training Progress:  95%|█████████████████████████████████████████████████████▍  | 9548/10000 [2:04:57<01:17,  5.85it/s]

Step 9547/10000, Loss: 0.0287
Step 9548/10000, Loss: 0.0240


Training Progress:  96%|█████████████████████████████████████████████████████▍  | 9550/10000 [2:04:57<01:17,  5.77it/s]

Step 9549/10000, Loss: 0.0279
Step 9550/10000, Loss: 0.0271


Training Progress:  96%|█████████████████████████████████████████████████████▍  | 9552/10000 [2:04:58<01:16,  5.88it/s]

Step 9551/10000, Loss: 0.0308
Step 9552/10000, Loss: 0.0322


Training Progress:  96%|█████████████████████████████████████████████████████▌  | 9554/10000 [2:04:58<01:16,  5.82it/s]

Step 9553/10000, Loss: 0.0277
Step 9554/10000, Loss: 0.0314


Training Progress:  96%|█████████████████████████████████████████████████████▌  | 9556/10000 [2:04:58<01:17,  5.72it/s]

Step 9555/10000, Loss: 0.0268
Step 9556/10000, Loss: 0.0249


Training Progress:  96%|█████████████████████████████████████████████████████▌  | 9557/10000 [2:04:58<01:16,  5.82it/s]

Step 9557/10000, Loss: 0.0297
Step 9558/10000, Loss: 0.0200


Training Progress:  96%|█████████████████████████████████████████████████████▌  | 9558/10000 [2:05:16<40:31,  5.50s/it]


Checkpoint saved: checkpoints\best\checkpoint_step9558_loss0.0200_20250117_145702.pt

New best loss: 0.0200


Training Progress:  96%|█████████████████████████████████████████████████████▌  | 9560/10000 [2:05:17<20:54,  2.85s/it]

Step 9559/10000, Loss: 0.0289
Step 9560/10000, Loss: 0.0235


Training Progress:  96%|█████████████████████████████████████████████████████▌  | 9562/10000 [2:05:17<10:50,  1.49s/it]

Step 9561/10000, Loss: 0.0251
Step 9562/10000, Loss: 0.0307


Training Progress:  96%|█████████████████████████████████████████████████████▌  | 9564/10000 [2:05:18<05:55,  1.23it/s]

Step 9563/10000, Loss: 0.0226
Step 9564/10000, Loss: 0.0219


Training Progress:  96%|█████████████████████████████████████████████████████▌  | 9566/10000 [2:05:18<03:31,  2.06it/s]

Step 9565/10000, Loss: 0.0272
Step 9566/10000, Loss: 0.0232


Training Progress:  96%|█████████████████████████████████████████████████████▌  | 9568/10000 [2:05:18<02:21,  3.06it/s]

Step 9567/10000, Loss: 0.0224
Step 9568/10000, Loss: 0.0287


Training Progress:  96%|█████████████████████████████████████████████████████▌  | 9570/10000 [2:05:19<01:45,  4.07it/s]

Step 9569/10000, Loss: 0.0252
Step 9570/10000, Loss: 0.0249


Training Progress:  96%|█████████████████████████████████████████████████████▌  | 9572/10000 [2:05:19<01:29,  4.77it/s]

Step 9571/10000, Loss: 0.0282
Step 9572/10000, Loss: 0.0320


Training Progress:  96%|█████████████████████████████████████████████████████▌  | 9574/10000 [2:05:19<01:19,  5.33it/s]

Step 9573/10000, Loss: 0.0268
Step 9574/10000, Loss: 0.0240


Training Progress:  96%|█████████████████████████████████████████████████████▋  | 9576/10000 [2:05:20<01:16,  5.55it/s]

Step 9575/10000, Loss: 0.0292
Step 9576/10000, Loss: 0.0326


Training Progress:  96%|█████████████████████████████████████████████████████▋  | 9578/10000 [2:05:20<01:15,  5.61it/s]

Step 9577/10000, Loss: 0.0338
Step 9578/10000, Loss: 0.0246


Training Progress:  96%|█████████████████████████████████████████████████████▋  | 9580/10000 [2:05:20<01:12,  5.81it/s]

Step 9579/10000, Loss: 0.0353
Step 9580/10000, Loss: 0.0316


Training Progress:  96%|█████████████████████████████████████████████████████▋  | 9582/10000 [2:05:21<01:12,  5.78it/s]

Step 9581/10000, Loss: 0.0236
Step 9582/10000, Loss: 0.0214


Training Progress:  96%|█████████████████████████████████████████████████████▋  | 9584/10000 [2:05:21<01:10,  5.90it/s]

Step 9583/10000, Loss: 0.0303
Step 9584/10000, Loss: 0.0245


Training Progress:  96%|█████████████████████████████████████████████████████▋  | 9586/10000 [2:05:21<01:11,  5.83it/s]

Step 9585/10000, Loss: 0.0281
Step 9586/10000, Loss: 0.0296


Training Progress:  96%|█████████████████████████████████████████████████████▋  | 9588/10000 [2:05:22<01:11,  5.72it/s]

Step 9587/10000, Loss: 0.0200
Step 9588/10000, Loss: 0.0237


Training Progress:  96%|█████████████████████████████████████████████████████▋  | 9590/10000 [2:05:22<01:10,  5.83it/s]

Step 9589/10000, Loss: 0.0272
Step 9590/10000, Loss: 0.0265


Training Progress:  96%|█████████████████████████████████████████████████████▋  | 9592/10000 [2:05:23<01:10,  5.76it/s]

Step 9591/10000, Loss: 0.0230
Step 9592/10000, Loss: 0.0244


Training Progress:  96%|█████████████████████████████████████████████████████▋  | 9594/10000 [2:05:23<01:10,  5.77it/s]

Step 9593/10000, Loss: 0.0253
Step 9594/10000, Loss: 0.0307


Training Progress:  96%|█████████████████████████████████████████████████████▋  | 9596/10000 [2:05:23<01:09,  5.82it/s]

Step 9595/10000, Loss: 0.0275
Step 9596/10000, Loss: 0.0248


Training Progress:  96%|█████████████████████████████████████████████████████▋  | 9598/10000 [2:05:24<01:09,  5.80it/s]

Step 9597/10000, Loss: 0.0338
Step 9598/10000, Loss: 0.0223


Training Progress:  96%|█████████████████████████████████████████████████████▊  | 9600/10000 [2:05:24<01:07,  5.90it/s]

Step 9599/10000, Loss: 0.0305
Step 9600/10000, Loss: 0.0278


Training Progress:  96%|█████████████████████████████████████████████████████▊  | 9602/10000 [2:05:24<01:09,  5.74it/s]

Step 9601/10000, Loss: 0.0266
Step 9602/10000, Loss: 0.0331


Training Progress:  96%|█████████████████████████████████████████████████████▊  | 9604/10000 [2:05:25<01:08,  5.79it/s]

Step 9603/10000, Loss: 0.0279
Step 9604/10000, Loss: 0.0245


Training Progress:  96%|█████████████████████████████████████████████████████▊  | 9606/10000 [2:05:25<01:06,  5.90it/s]

Step 9605/10000, Loss: 0.0321
Step 9606/10000, Loss: 0.0239


Training Progress:  96%|█████████████████████████████████████████████████████▊  | 9608/10000 [2:05:25<01:07,  5.80it/s]

Step 9607/10000, Loss: 0.0262
Step 9608/10000, Loss: 0.0295


Training Progress:  96%|█████████████████████████████████████████████████████▊  | 9610/10000 [2:05:26<01:07,  5.76it/s]

Step 9609/10000, Loss: 0.0241
Step 9610/10000, Loss: 0.0347


Training Progress:  96%|█████████████████████████████████████████████████████▊  | 9612/10000 [2:05:26<01:05,  5.88it/s]

Step 9611/10000, Loss: 0.0241
Step 9612/10000, Loss: 0.0266


Training Progress:  96%|█████████████████████████████████████████████████████▊  | 9614/10000 [2:05:26<01:07,  5.75it/s]

Step 9613/10000, Loss: 0.0325
Step 9614/10000, Loss: 0.0306


Training Progress:  96%|█████████████████████████████████████████████████████▊  | 9616/10000 [2:05:27<01:07,  5.73it/s]

Step 9615/10000, Loss: 0.0300
Step 9616/10000, Loss: 0.0261


Training Progress:  96%|█████████████████████████████████████████████████████▊  | 9618/10000 [2:05:27<01:05,  5.87it/s]

Step 9617/10000, Loss: 0.0265
Step 9618/10000, Loss: 0.0256


Training Progress:  96%|█████████████████████████████████████████████████████▊  | 9620/10000 [2:05:27<01:06,  5.74it/s]

Step 9619/10000, Loss: 0.0266
Step 9620/10000, Loss: 0.0401


Training Progress:  96%|█████████████████████████████████████████████████████▉  | 9622/10000 [2:05:28<01:03,  5.91it/s]

Step 9621/10000, Loss: 0.0394
Step 9622/10000, Loss: 0.0366


Training Progress:  96%|█████████████████████████████████████████████████████▉  | 9624/10000 [2:05:28<01:04,  5.80it/s]

Step 9623/10000, Loss: 0.0300
Step 9624/10000, Loss: 0.0302


Training Progress:  96%|█████████████████████████████████████████████████████▉  | 9626/10000 [2:05:28<01:05,  5.74it/s]

Step 9625/10000, Loss: 0.0240
Step 9626/10000, Loss: 0.0398


Training Progress:  96%|█████████████████████████████████████████████████████▉  | 9628/10000 [2:05:29<01:04,  5.79it/s]

Step 9627/10000, Loss: 0.0313
Step 9628/10000, Loss: 0.0256


Training Progress:  96%|█████████████████████████████████████████████████████▉  | 9630/10000 [2:05:29<01:03,  5.80it/s]

Step 9629/10000, Loss: 0.0275
Step 9630/10000, Loss: 0.0265


Training Progress:  96%|█████████████████████████████████████████████████████▉  | 9632/10000 [2:05:29<01:02,  5.87it/s]

Step 9631/10000, Loss: 0.0272
Step 9632/10000, Loss: 0.0281


Training Progress:  96%|█████████████████████████████████████████████████████▉  | 9634/10000 [2:05:30<01:02,  5.86it/s]

Step 9633/10000, Loss: 0.0247
Step 9634/10000, Loss: 0.0305


Training Progress:  96%|█████████████████████████████████████████████████████▉  | 9636/10000 [2:05:30<01:02,  5.78it/s]

Step 9635/10000, Loss: 0.0274
Step 9636/10000, Loss: 0.0299


Training Progress:  96%|█████████████████████████████████████████████████████▉  | 9638/10000 [2:05:30<01:01,  5.90it/s]

Step 9637/10000, Loss: 0.0281
Step 9638/10000, Loss: 0.0247


Training Progress:  96%|█████████████████████████████████████████████████████▉  | 9640/10000 [2:05:31<01:02,  5.79it/s]

Step 9639/10000, Loss: 0.0271
Step 9640/10000, Loss: 0.0241


Training Progress:  96%|█████████████████████████████████████████████████████▉  | 9642/10000 [2:05:31<01:01,  5.80it/s]

Step 9641/10000, Loss: 0.0312
Step 9642/10000, Loss: 0.0261


Training Progress:  96%|██████████████████████████████████████████████████████  | 9644/10000 [2:05:31<01:00,  5.84it/s]

Step 9643/10000, Loss: 0.0254
Step 9644/10000, Loss: 0.0298


Training Progress:  96%|██████████████████████████████████████████████████████  | 9646/10000 [2:05:32<01:01,  5.77it/s]

Step 9645/10000, Loss: 0.0240
Step 9646/10000, Loss: 0.0238


Training Progress:  96%|██████████████████████████████████████████████████████  | 9648/10000 [2:05:32<00:59,  5.88it/s]

Step 9647/10000, Loss: 0.0252
Step 9648/10000, Loss: 0.0238


Training Progress:  96%|██████████████████████████████████████████████████████  | 9650/10000 [2:05:32<01:00,  5.83it/s]

Step 9649/10000, Loss: 0.0225
Step 9650/10000, Loss: 0.0267


Training Progress:  97%|██████████████████████████████████████████████████████  | 9652/10000 [2:05:33<01:00,  5.76it/s]

Step 9651/10000, Loss: 0.0233
Step 9652/10000, Loss: 0.0261


Training Progress:  97%|██████████████████████████████████████████████████████  | 9654/10000 [2:05:33<00:58,  5.88it/s]

Step 9653/10000, Loss: 0.0278
Step 9654/10000, Loss: 0.0309


Training Progress:  97%|██████████████████████████████████████████████████████  | 9656/10000 [2:05:34<00:59,  5.77it/s]

Step 9655/10000, Loss: 0.0240
Step 9656/10000, Loss: 0.0225


Training Progress:  97%|██████████████████████████████████████████████████████  | 9658/10000 [2:05:34<00:59,  5.73it/s]

Step 9657/10000, Loss: 0.0290
Step 9658/10000, Loss: 0.0274


Training Progress:  97%|██████████████████████████████████████████████████████  | 9660/10000 [2:05:34<00:59,  5.76it/s]

Step 9659/10000, Loss: 0.0342
Step 9660/10000, Loss: 0.0235


Training Progress:  97%|██████████████████████████████████████████████████████  | 9662/10000 [2:05:35<00:58,  5.79it/s]

Step 9661/10000, Loss: 0.0356
Step 9662/10000, Loss: 0.0303


Training Progress:  97%|██████████████████████████████████████████████████████  | 9664/10000 [2:05:35<00:57,  5.89it/s]

Step 9663/10000, Loss: 0.0229
Step 9664/10000, Loss: 0.0271


Training Progress:  97%|██████████████████████████████████████████████████████▏ | 9666/10000 [2:05:35<00:57,  5.76it/s]

Step 9665/10000, Loss: 0.0336
Step 9666/10000, Loss: 0.0261


Training Progress:  97%|██████████████████████████████████████████████████████▏ | 9668/10000 [2:05:36<00:57,  5.81it/s]

Step 9667/10000, Loss: 0.0240
Step 9668/10000, Loss: 0.0318


Training Progress:  97%|██████████████████████████████████████████████████████▏ | 9670/10000 [2:05:36<00:55,  5.90it/s]

Step 9669/10000, Loss: 0.0224
Step 9670/10000, Loss: 0.0232


Training Progress:  97%|██████████████████████████████████████████████████████▏ | 9672/10000 [2:05:36<00:56,  5.76it/s]

Step 9671/10000, Loss: 0.0293
Step 9672/10000, Loss: 0.0261


Training Progress:  97%|██████████████████████████████████████████████████████▏ | 9674/10000 [2:05:37<00:56,  5.74it/s]

Step 9673/10000, Loss: 0.0294
Step 9674/10000, Loss: 0.0246


Training Progress:  97%|██████████████████████████████████████████████████████▏ | 9676/10000 [2:05:37<00:55,  5.87it/s]

Step 9675/10000, Loss: 0.0291
Step 9676/10000, Loss: 0.0326


Training Progress:  97%|██████████████████████████████████████████████████████▏ | 9678/10000 [2:05:37<00:55,  5.76it/s]

Step 9677/10000, Loss: 0.0297
Step 9678/10000, Loss: 0.0251


Training Progress:  97%|██████████████████████████████████████████████████████▏ | 9680/10000 [2:05:38<00:54,  5.89it/s]

Step 9679/10000, Loss: 0.0334
Step 9680/10000, Loss: 0.0267


Training Progress:  97%|██████████████████████████████████████████████████████▏ | 9682/10000 [2:05:38<00:54,  5.82it/s]

Step 9681/10000, Loss: 0.0288
Step 9682/10000, Loss: 0.0276


Training Progress:  97%|██████████████████████████████████████████████████████▏ | 9684/10000 [2:05:38<00:54,  5.78it/s]

Step 9683/10000, Loss: 0.0269
Step 9684/10000, Loss: 0.0314


Training Progress:  97%|██████████████████████████████████████████████████████▏ | 9686/10000 [2:05:39<00:53,  5.87it/s]

Step 9685/10000, Loss: 0.0267
Step 9686/10000, Loss: 0.0242


Training Progress:  97%|██████████████████████████████████████████████████████▎ | 9688/10000 [2:05:39<00:53,  5.82it/s]

Step 9687/10000, Loss: 0.0333
Step 9688/10000, Loss: 0.0248


Training Progress:  97%|██████████████████████████████████████████████████████▎ | 9690/10000 [2:05:39<00:53,  5.75it/s]

Step 9689/10000, Loss: 0.0234
Step 9690/10000, Loss: 0.0307


Training Progress:  97%|██████████████████████████████████████████████████████▎ | 9692/10000 [2:05:40<00:53,  5.76it/s]

Step 9691/10000, Loss: 0.0213
Step 9692/10000, Loss: 0.0299


Training Progress:  97%|██████████████████████████████████████████████████████▎ | 9694/10000 [2:05:40<00:52,  5.82it/s]

Step 9693/10000, Loss: 0.0231
Step 9694/10000, Loss: 0.0233


Training Progress:  97%|██████████████████████████████████████████████████████▎ | 9696/10000 [2:05:40<00:52,  5.77it/s]

Step 9695/10000, Loss: 0.0331
Step 9696/10000, Loss: 0.0327


Training Progress:  97%|██████████████████████████████████████████████████████▎ | 9698/10000 [2:05:41<00:51,  5.88it/s]

Step 9697/10000, Loss: 0.0286
Step 9698/10000, Loss: 0.0258


Training Progress:  97%|██████████████████████████████████████████████████████▎ | 9700/10000 [2:05:41<00:51,  5.82it/s]

Step 9699/10000, Loss: 0.0271
Step 9700/10000, Loss: 0.0298


Training Progress:  97%|██████████████████████████████████████████████████████▎ | 9702/10000 [2:05:41<00:51,  5.75it/s]

Step 9701/10000, Loss: 0.0251
Step 9702/10000, Loss: 0.0312


Training Progress:  97%|██████████████████████████████████████████████████████▎ | 9704/10000 [2:05:42<00:50,  5.87it/s]

Step 9703/10000, Loss: 0.0376
Step 9704/10000, Loss: 0.0293


Training Progress:  97%|██████████████████████████████████████████████████████▎ | 9706/10000 [2:05:42<00:50,  5.77it/s]

Step 9705/10000, Loss: 0.0254
Step 9706/10000, Loss: 0.0280


Training Progress:  97%|██████████████████████████████████████████████████████▎ | 9708/10000 [2:05:42<00:49,  5.89it/s]

Step 9707/10000, Loss: 0.0251
Step 9708/10000, Loss: 0.0344


Training Progress:  97%|██████████████████████████████████████████████████████▍ | 9710/10000 [2:05:43<00:49,  5.82it/s]

Step 9709/10000, Loss: 0.0290
Step 9710/10000, Loss: 0.0225


Training Progress:  97%|██████████████████████████████████████████████████████▍ | 9712/10000 [2:05:43<00:50,  5.75it/s]

Step 9711/10000, Loss: 0.0280
Step 9712/10000, Loss: 0.0239


Training Progress:  97%|██████████████████████████████████████████████████████▍ | 9714/10000 [2:05:43<00:48,  5.87it/s]

Step 9713/10000, Loss: 0.0277
Step 9714/10000, Loss: 0.0297


Training Progress:  97%|██████████████████████████████████████████████████████▍ | 9716/10000 [2:05:44<00:48,  5.82it/s]

Step 9715/10000, Loss: 0.0310
Step 9716/10000, Loss: 0.0333


Training Progress:  97%|██████████████████████████████████████████████████████▍ | 9718/10000 [2:05:44<00:49,  5.75it/s]

Step 9717/10000, Loss: 0.0287
Step 9718/10000, Loss: 0.0242


Training Progress:  97%|██████████████████████████████████████████████████████▍ | 9720/10000 [2:05:45<00:47,  5.87it/s]

Step 9719/10000, Loss: 0.0263
Step 9720/10000, Loss: 0.0264


Training Progress:  97%|██████████████████████████████████████████████████████▍ | 9722/10000 [2:05:45<00:47,  5.82it/s]

Step 9721/10000, Loss: 0.0272
Step 9722/10000, Loss: 0.0204


Training Progress:  97%|██████████████████████████████████████████████████████▍ | 9724/10000 [2:05:45<00:48,  5.72it/s]

Step 9723/10000, Loss: 0.0290
Step 9724/10000, Loss: 0.0254


Training Progress:  97%|██████████████████████████████████████████████████████▍ | 9726/10000 [2:05:46<00:46,  5.84it/s]

Step 9725/10000, Loss: 0.0247
Step 9726/10000, Loss: 0.0292


Training Progress:  97%|██████████████████████████████████████████████████████▍ | 9728/10000 [2:05:46<00:46,  5.83it/s]

Step 9727/10000, Loss: 0.0231
Step 9728/10000, Loss: 0.0243


Training Progress:  97%|██████████████████████████████████████████████████████▍ | 9730/10000 [2:05:46<00:45,  5.88it/s]

Step 9729/10000, Loss: 0.0287
Step 9730/10000, Loss: 0.0260


Training Progress:  97%|██████████████████████████████████████████████████████▍ | 9732/10000 [2:05:47<00:45,  5.85it/s]

Step 9731/10000, Loss: 0.0209
Step 9732/10000, Loss: 0.0298


Training Progress:  97%|██████████████████████████████████████████████████████▌ | 9734/10000 [2:05:47<00:45,  5.83it/s]

Step 9733/10000, Loss: 0.0214
Step 9734/10000, Loss: 0.0247


Training Progress:  97%|██████████████████████████████████████████████████████▌ | 9736/10000 [2:05:47<00:44,  5.88it/s]

Step 9735/10000, Loss: 0.0294
Step 9736/10000, Loss: 0.0278


Training Progress:  97%|██████████████████████████████████████████████████████▌ | 9738/10000 [2:05:48<00:45,  5.81it/s]

Step 9737/10000, Loss: 0.0270
Step 9738/10000, Loss: 0.0212


Training Progress:  97%|██████████████████████████████████████████████████████▌ | 9740/10000 [2:05:48<00:44,  5.81it/s]

Step 9739/10000, Loss: 0.0293
Step 9740/10000, Loss: 0.0326


Training Progress:  97%|██████████████████████████████████████████████████████▌ | 9742/10000 [2:05:48<00:44,  5.75it/s]

Step 9741/10000, Loss: 0.0321
Step 9742/10000, Loss: 0.0263


Training Progress:  97%|██████████████████████████████████████████████████████▌ | 9744/10000 [2:05:49<00:43,  5.88it/s]

Step 9743/10000, Loss: 0.0319
Step 9744/10000, Loss: 0.0313


Training Progress:  97%|██████████████████████████████████████████████████████▌ | 9746/10000 [2:05:49<00:43,  5.81it/s]

Step 9745/10000, Loss: 0.0243
Step 9746/10000, Loss: 0.0248


Training Progress:  97%|██████████████████████████████████████████████████████▌ | 9748/10000 [2:05:49<00:43,  5.75it/s]

Step 9747/10000, Loss: 0.0282
Step 9748/10000, Loss: 0.0277


Training Progress:  98%|██████████████████████████████████████████████████████▌ | 9750/10000 [2:05:50<00:42,  5.89it/s]

Step 9749/10000, Loss: 0.0282
Step 9750/10000, Loss: 0.0330


Training Progress:  98%|██████████████████████████████████████████████████████▌ | 9752/10000 [2:05:50<00:42,  5.82it/s]

Step 9751/10000, Loss: 0.0235
Step 9752/10000, Loss: 0.0240


Training Progress:  98%|██████████████████████████████████████████████████████▌ | 9754/10000 [2:05:50<00:42,  5.79it/s]

Step 9753/10000, Loss: 0.0279
Step 9754/10000, Loss: 0.0268


Training Progress:  98%|██████████████████████████████████████████████████████▋ | 9756/10000 [2:05:51<00:41,  5.83it/s]

Step 9755/10000, Loss: 0.0259
Step 9756/10000, Loss: 0.0251


Training Progress:  98%|██████████████████████████████████████████████████████▋ | 9758/10000 [2:05:51<00:41,  5.84it/s]

Step 9757/10000, Loss: 0.0279
Step 9758/10000, Loss: 0.0319


Training Progress:  98%|██████████████████████████████████████████████████████▋ | 9760/10000 [2:05:51<00:41,  5.77it/s]

Step 9759/10000, Loss: 0.0270
Step 9760/10000, Loss: 0.0270


Training Progress:  98%|██████████████████████████████████████████████████████▋ | 9762/10000 [2:05:52<00:41,  5.77it/s]

Step 9761/10000, Loss: 0.0306
Step 9762/10000, Loss: 0.0240


Training Progress:  98%|██████████████████████████████████████████████████████▋ | 9764/10000 [2:05:52<00:40,  5.78it/s]

Step 9763/10000, Loss: 0.0318
Step 9764/10000, Loss: 0.0307


Training Progress:  98%|██████████████████████████████████████████████████████▋ | 9766/10000 [2:05:52<00:40,  5.79it/s]

Step 9765/10000, Loss: 0.0288
Step 9766/10000, Loss: 0.0308


Training Progress:  98%|██████████████████████████████████████████████████████▋ | 9768/10000 [2:05:53<00:40,  5.79it/s]

Step 9767/10000, Loss: 0.0307
Step 9768/10000, Loss: 0.0254


Training Progress:  98%|██████████████████████████████████████████████████████▋ | 9770/10000 [2:05:53<00:39,  5.89it/s]

Step 9769/10000, Loss: 0.0362
Step 9770/10000, Loss: 0.0241


Training Progress:  98%|██████████████████████████████████████████████████████▋ | 9772/10000 [2:05:53<00:39,  5.79it/s]

Step 9771/10000, Loss: 0.0289
Step 9772/10000, Loss: 0.0276


Training Progress:  98%|██████████████████████████████████████████████████████▋ | 9774/10000 [2:05:54<00:38,  5.80it/s]

Step 9773/10000, Loss: 0.0219
Step 9774/10000, Loss: 0.0247


Training Progress:  98%|██████████████████████████████████████████████████████▋ | 9776/10000 [2:05:54<00:38,  5.85it/s]

Step 9775/10000, Loss: 0.0200
Step 9776/10000, Loss: 0.0262


Training Progress:  98%|██████████████████████████████████████████████████████▊ | 9778/10000 [2:05:55<00:38,  5.76it/s]

Step 9777/10000, Loss: 0.0310
Step 9778/10000, Loss: 0.0260


Training Progress:  98%|██████████████████████████████████████████████████████▊ | 9780/10000 [2:05:55<00:37,  5.88it/s]

Step 9779/10000, Loss: 0.0253
Step 9780/10000, Loss: 0.0273


Training Progress:  98%|██████████████████████████████████████████████████████▊ | 9782/10000 [2:05:55<00:38,  5.73it/s]

Step 9781/10000, Loss: 0.0243
Step 9782/10000, Loss: 0.0313


Training Progress:  98%|██████████████████████████████████████████████████████▊ | 9784/10000 [2:05:56<00:37,  5.78it/s]

Step 9783/10000, Loss: 0.0257
Step 9784/10000, Loss: 0.0280


Training Progress:  98%|██████████████████████████████████████████████████████▊ | 9786/10000 [2:05:56<00:37,  5.75it/s]

Step 9785/10000, Loss: 0.0290
Step 9786/10000, Loss: 0.0285


Training Progress:  98%|██████████████████████████████████████████████████████▊ | 9788/10000 [2:05:56<00:36,  5.83it/s]

Step 9787/10000, Loss: 0.0280
Step 9788/10000, Loss: 0.0252


Training Progress:  98%|██████████████████████████████████████████████████████▊ | 9790/10000 [2:05:57<00:36,  5.79it/s]

Step 9789/10000, Loss: 0.0210
Step 9790/10000, Loss: 0.0330


Training Progress:  98%|██████████████████████████████████████████████████████▊ | 9792/10000 [2:05:57<00:36,  5.73it/s]

Step 9791/10000, Loss: 0.0324
Step 9792/10000, Loss: 0.0231


Training Progress:  98%|██████████████████████████████████████████████████████▊ | 9794/10000 [2:05:57<00:35,  5.84it/s]

Step 9793/10000, Loss: 0.0268
Step 9794/10000, Loss: 0.0269


Training Progress:  98%|██████████████████████████████████████████████████████▊ | 9796/10000 [2:05:58<00:35,  5.78it/s]

Step 9795/10000, Loss: 0.0266
Step 9796/10000, Loss: 0.0276


Training Progress:  98%|██████████████████████████████████████████████████████▊ | 9798/10000 [2:05:58<00:35,  5.73it/s]

Step 9797/10000, Loss: 0.0278
Step 9798/10000, Loss: 0.0294


Training Progress:  98%|██████████████████████████████████████████████████████▉ | 9800/10000 [2:05:58<00:34,  5.87it/s]

Step 9799/10000, Loss: 0.0278
Step 9800/10000, Loss: 0.0279


Training Progress:  98%|██████████████████████████████████████████████████████▉ | 9802/10000 [2:05:59<00:34,  5.77it/s]

Step 9801/10000, Loss: 0.0267
Step 9802/10000, Loss: 0.0274


Training Progress:  98%|██████████████████████████████████████████████████████▉ | 9804/10000 [2:05:59<00:34,  5.72it/s]

Step 9803/10000, Loss: 0.0288
Step 9804/10000, Loss: 0.0236


Training Progress:  98%|██████████████████████████████████████████████████████▉ | 9806/10000 [2:05:59<00:33,  5.87it/s]

Step 9805/10000, Loss: 0.0280
Step 9806/10000, Loss: 0.0246


Training Progress:  98%|██████████████████████████████████████████████████████▉ | 9808/10000 [2:06:00<00:32,  5.82it/s]

Step 9807/10000, Loss: 0.0265
Step 9808/10000, Loss: 0.0301


Training Progress:  98%|██████████████████████████████████████████████████████▉ | 9810/10000 [2:06:00<00:33,  5.74it/s]

Step 9809/10000, Loss: 0.0260
Step 9810/10000, Loss: 0.0262


Training Progress:  98%|██████████████████████████████████████████████████████▉ | 9812/10000 [2:06:00<00:31,  5.88it/s]

Step 9811/10000, Loss: 0.0279
Step 9812/10000, Loss: 0.0275


Training Progress:  98%|██████████████████████████████████████████████████████▉ | 9814/10000 [2:06:01<00:31,  5.84it/s]

Step 9813/10000, Loss: 0.0239
Step 9814/10000, Loss: 0.0290


Training Progress:  98%|██████████████████████████████████████████████████████▉ | 9816/10000 [2:06:01<00:31,  5.75it/s]

Step 9815/10000, Loss: 0.0247
Step 9816/10000, Loss: 0.0273


Training Progress:  98%|██████████████████████████████████████████████████████▉ | 9818/10000 [2:06:01<00:30,  5.88it/s]

Step 9817/10000, Loss: 0.0296
Step 9818/10000, Loss: 0.0325


Training Progress:  98%|██████████████████████████████████████████████████████▉ | 9820/10000 [2:06:02<00:30,  5.83it/s]

Step 9819/10000, Loss: 0.0249
Step 9820/10000, Loss: 0.0236


Training Progress:  98%|███████████████████████████████████████████████████████ | 9822/10000 [2:06:02<00:30,  5.79it/s]

Step 9821/10000, Loss: 0.0285
Step 9822/10000, Loss: 0.0333


Training Progress:  98%|███████████████████████████████████████████████████████ | 9824/10000 [2:06:02<00:30,  5.74it/s]

Step 9823/10000, Loss: 0.0337
Step 9824/10000, Loss: 0.0241


Training Progress:  98%|███████████████████████████████████████████████████████ | 9826/10000 [2:06:03<00:29,  5.87it/s]

Step 9825/10000, Loss: 0.0351
Step 9826/10000, Loss: 0.0321


Training Progress:  98%|███████████████████████████████████████████████████████ | 9828/10000 [2:06:03<00:29,  5.77it/s]

Step 9827/10000, Loss: 0.0244
Step 9828/10000, Loss: 0.0266


Training Progress:  98%|███████████████████████████████████████████████████████ | 9830/10000 [2:06:03<00:29,  5.72it/s]

Step 9829/10000, Loss: 0.0320
Step 9830/10000, Loss: 0.0256


Training Progress:  98%|███████████████████████████████████████████████████████ | 9832/10000 [2:06:04<00:28,  5.85it/s]

Step 9831/10000, Loss: 0.0270
Step 9832/10000, Loss: 0.0340


Training Progress:  98%|███████████████████████████████████████████████████████ | 9834/10000 [2:06:04<00:28,  5.77it/s]

Step 9833/10000, Loss: 0.0256
Step 9834/10000, Loss: 0.0242


Training Progress:  98%|███████████████████████████████████████████████████████ | 9836/10000 [2:06:05<00:28,  5.73it/s]

Step 9835/10000, Loss: 0.0295
Step 9836/10000, Loss: 0.0298


Training Progress:  98%|███████████████████████████████████████████████████████ | 9838/10000 [2:06:05<00:27,  5.88it/s]

Step 9837/10000, Loss: 0.0300
Step 9838/10000, Loss: 0.0263


Training Progress:  98%|███████████████████████████████████████████████████████ | 9840/10000 [2:06:05<00:27,  5.73it/s]

Step 9839/10000, Loss: 0.0271
Step 9840/10000, Loss: 0.0311


Training Progress:  98%|███████████████████████████████████████████████████████ | 9842/10000 [2:06:06<00:27,  5.77it/s]

Step 9841/10000, Loss: 0.0265
Step 9842/10000, Loss: 0.0246


Training Progress:  98%|███████████████████████████████████████████████████████▏| 9844/10000 [2:06:06<00:27,  5.72it/s]

Step 9843/10000, Loss: 0.0319
Step 9844/10000, Loss: 0.0220


Training Progress:  98%|███████████████████████████████████████████████████████▏| 9846/10000 [2:06:06<00:26,  5.82it/s]

Step 9845/10000, Loss: 0.0288
Step 9846/10000, Loss: 0.0295


Training Progress:  98%|███████████████████████████████████████████████████████▏| 9848/10000 [2:06:07<00:26,  5.78it/s]

Step 9847/10000, Loss: 0.0266
Step 9848/10000, Loss: 0.0275


Training Progress:  98%|███████████████████████████████████████████████████████▏| 9850/10000 [2:06:07<00:26,  5.73it/s]

Step 9849/10000, Loss: 0.0271
Step 9850/10000, Loss: 0.0250


Training Progress:  99%|███████████████████████████████████████████████████████▏| 9852/10000 [2:06:07<00:25,  5.81it/s]

Step 9851/10000, Loss: 0.0381
Step 9852/10000, Loss: 0.0259


Training Progress:  99%|███████████████████████████████████████████████████████▏| 9854/10000 [2:06:08<00:25,  5.82it/s]

Step 9853/10000, Loss: 0.0276
Step 9854/10000, Loss: 0.0275


Training Progress:  99%|███████████████████████████████████████████████████████▏| 9856/10000 [2:06:08<00:25,  5.76it/s]

Step 9855/10000, Loss: 0.0258
Step 9856/10000, Loss: 0.0288


Training Progress:  99%|███████████████████████████████████████████████████████▏| 9858/10000 [2:06:08<00:24,  5.89it/s]

Step 9857/10000, Loss: 0.0217
Step 9858/10000, Loss: 0.0264


Training Progress:  99%|███████████████████████████████████████████████████████▏| 9860/10000 [2:06:09<00:24,  5.82it/s]

Step 9859/10000, Loss: 0.0283
Step 9860/10000, Loss: 0.0317


Training Progress:  99%|███████████████████████████████████████████████████████▏| 9862/10000 [2:06:09<00:23,  5.75it/s]

Step 9861/10000, Loss: 0.0274
Step 9862/10000, Loss: 0.0266


Training Progress:  99%|███████████████████████████████████████████████████████▏| 9864/10000 [2:06:09<00:23,  5.72it/s]

Step 9863/10000, Loss: 0.0285
Step 9864/10000, Loss: 0.0294


Training Progress:  99%|███████████████████████████████████████████████████████▏| 9866/10000 [2:06:10<00:22,  5.86it/s]

Step 9865/10000, Loss: 0.0232
Step 9866/10000, Loss: 0.0259


Training Progress:  99%|███████████████████████████████████████████████████████▎| 9868/10000 [2:06:10<00:23,  5.72it/s]

Step 9867/10000, Loss: 0.0317
Step 9868/10000, Loss: 0.0287


Training Progress:  99%|███████████████████████████████████████████████████████▎| 9870/10000 [2:06:10<00:22,  5.77it/s]

Step 9869/10000, Loss: 0.0267
Step 9870/10000, Loss: 0.0294


Training Progress:  99%|███████████████████████████████████████████████████████▎| 9872/10000 [2:06:11<00:21,  5.89it/s]

Step 9871/10000, Loss: 0.0283
Step 9872/10000, Loss: 0.0322


Training Progress:  99%|███████████████████████████████████████████████████████▎| 9874/10000 [2:06:11<00:21,  5.82it/s]

Step 9873/10000, Loss: 0.0293
Step 9874/10000, Loss: 0.0222


Training Progress:  99%|███████████████████████████████████████████████████████▎| 9876/10000 [2:06:11<00:21,  5.76it/s]

Step 9875/10000, Loss: 0.0303
Step 9876/10000, Loss: 0.0233


Training Progress:  99%|███████████████████████████████████████████████████████▎| 9878/10000 [2:06:12<00:20,  5.88it/s]

Step 9877/10000, Loss: 0.0271
Step 9878/10000, Loss: 0.0283


Training Progress:  99%|███████████████████████████████████████████████████████▎| 9880/10000 [2:06:12<00:20,  5.81it/s]

Step 9879/10000, Loss: 0.0289
Step 9880/10000, Loss: 0.0304


Training Progress:  99%|███████████████████████████████████████████████████████▎| 9882/10000 [2:06:12<00:20,  5.75it/s]

Step 9881/10000, Loss: 0.0292
Step 9882/10000, Loss: 0.0256


Training Progress:  99%|███████████████████████████████████████████████████████▎| 9884/10000 [2:06:13<00:20,  5.72it/s]

Step 9883/10000, Loss: 0.0277
Step 9884/10000, Loss: 0.0279


Training Progress:  99%|███████████████████████████████████████████████████████▎| 9885/10000 [2:06:13<00:19,  5.78it/s]

Step 9885/10000, Loss: 0.0298
Step 9886/10000, Loss: 0.0172


Training Progress:  99%|███████████████████████████████████████████████████████▎| 9886/10000 [2:06:28<08:37,  4.54s/it]


Checkpoint saved: checkpoints\best\checkpoint_step9886_loss0.0172_20250117_145817.pt

New best loss: 0.0172


Training Progress:  99%|███████████████████████████████████████████████████████▎| 9888/10000 [2:06:28<04:23,  2.35s/it]

Step 9887/10000, Loss: 0.0243
Step 9888/10000, Loss: 0.0237


Training Progress:  99%|███████████████████████████████████████████████████████▍| 9890/10000 [2:06:29<02:16,  1.24s/it]

Step 9889/10000, Loss: 0.0245
Step 9890/10000, Loss: 0.0329


Training Progress:  99%|███████████████████████████████████████████████████████▍| 9892/10000 [2:06:29<01:15,  1.44it/s]

Step 9891/10000, Loss: 0.0215
Step 9892/10000, Loss: 0.0239


Training Progress:  99%|███████████████████████████████████████████████████████▍| 9894/10000 [2:06:29<00:45,  2.34it/s]

Step 9893/10000, Loss: 0.0276
Step 9894/10000, Loss: 0.0268


Training Progress:  99%|███████████████████████████████████████████████████████▍| 9896/10000 [2:06:30<00:30,  3.37it/s]

Step 9895/10000, Loss: 0.0215
Step 9896/10000, Loss: 0.0316


Training Progress:  99%|███████████████████████████████████████████████████████▍| 9898/10000 [2:06:30<00:23,  4.25it/s]

Step 9897/10000, Loss: 0.0229
Step 9898/10000, Loss: 0.0235


Training Progress:  99%|███████████████████████████████████████████████████████▍| 9900/10000 [2:06:30<00:20,  4.88it/s]

Step 9899/10000, Loss: 0.0281
Step 9900/10000, Loss: 0.0296


Training Progress:  99%|███████████████████████████████████████████████████████▍| 9902/10000 [2:06:31<00:18,  5.39it/s]

Step 9901/10000, Loss: 0.0282
Step 9902/10000, Loss: 0.0244


Training Progress:  99%|███████████████████████████████████████████████████████▍| 9904/10000 [2:06:31<00:17,  5.58it/s]

Step 9903/10000, Loss: 0.0307
Step 9904/10000, Loss: 0.0342


Training Progress:  99%|███████████████████████████████████████████████████████▍| 9906/10000 [2:06:31<00:16,  5.65it/s]

Step 9905/10000, Loss: 0.0310
Step 9906/10000, Loss: 0.0250


Training Progress:  99%|███████████████████████████████████████████████████████▍| 9908/10000 [2:06:32<00:15,  5.82it/s]

Step 9907/10000, Loss: 0.0373
Step 9908/10000, Loss: 0.0315


Training Progress:  99%|███████████████████████████████████████████████████████▍| 9910/10000 [2:06:32<00:15,  5.78it/s]

Step 9909/10000, Loss: 0.0272
Step 9910/10000, Loss: 0.0243


Training Progress:  99%|███████████████████████████████████████████████████████▌| 9912/10000 [2:06:32<00:15,  5.75it/s]

Step 9911/10000, Loss: 0.0289
Step 9912/10000, Loss: 0.0263


Training Progress:  99%|███████████████████████████████████████████████████████▌| 9914/10000 [2:06:33<00:15,  5.70it/s]

Step 9913/10000, Loss: 0.0258
Step 9914/10000, Loss: 0.0308


Training Progress:  99%|███████████████████████████████████████████████████████▌| 9916/10000 [2:06:33<00:14,  5.75it/s]

Step 9915/10000, Loss: 0.0227
Step 9916/10000, Loss: 0.0264


Training Progress:  99%|███████████████████████████████████████████████████████▌| 9918/10000 [2:06:33<00:14,  5.83it/s]

Step 9917/10000, Loss: 0.0277
Step 9918/10000, Loss: 0.0304


Training Progress:  99%|███████████████████████████████████████████████████████▌| 9920/10000 [2:06:34<00:13,  5.75it/s]

Step 9919/10000, Loss: 0.0280
Step 9920/10000, Loss: 0.0256


Training Progress:  99%|███████████████████████████████████████████████████████▌| 9922/10000 [2:06:34<00:13,  5.88it/s]

Step 9921/10000, Loss: 0.0257
Step 9922/10000, Loss: 0.0314


Training Progress:  99%|███████████████████████████████████████████████████████▌| 9924/10000 [2:06:34<00:13,  5.82it/s]

Step 9923/10000, Loss: 0.0263
Step 9924/10000, Loss: 0.0265


Training Progress:  99%|███████████████████████████████████████████████████████▌| 9926/10000 [2:06:35<00:12,  5.76it/s]

Step 9925/10000, Loss: 0.0337
Step 9926/10000, Loss: 0.0230


Training Progress:  99%|███████████████████████████████████████████████████████▌| 9928/10000 [2:06:35<00:12,  5.72it/s]

Step 9927/10000, Loss: 0.0298
Step 9928/10000, Loss: 0.0295


Training Progress:  99%|███████████████████████████████████████████████████████▌| 9930/10000 [2:06:35<00:11,  5.86it/s]

Step 9929/10000, Loss: 0.0284
Step 9930/10000, Loss: 0.0281


Training Progress:  99%|███████████████████████████████████████████████████████▌| 9932/10000 [2:06:36<00:11,  5.77it/s]

Step 9931/10000, Loss: 0.0311
Step 9932/10000, Loss: 0.0272


Training Progress:  99%|███████████████████████████████████████████████████████▋| 9934/10000 [2:06:36<00:11,  5.76it/s]

Step 9933/10000, Loss: 0.0315
Step 9934/10000, Loss: 0.0220


Training Progress:  99%|███████████████████████████████████████████████████████▋| 9936/10000 [2:06:37<00:11,  5.81it/s]

Step 9935/10000, Loss: 0.0238
Step 9936/10000, Loss: 0.0271


Training Progress:  99%|███████████████████████████████████████████████████████▋| 9938/10000 [2:06:37<00:10,  5.76it/s]

Step 9937/10000, Loss: 0.0218
Step 9938/10000, Loss: 0.0284


Training Progress:  99%|███████████████████████████████████████████████████████▋| 9940/10000 [2:06:37<00:10,  5.72it/s]

Step 9939/10000, Loss: 0.0234
Step 9940/10000, Loss: 0.0232


Training Progress:  99%|███████████████████████████████████████████████████████▋| 9942/10000 [2:06:38<00:09,  5.86it/s]

Step 9941/10000, Loss: 0.0342
Step 9942/10000, Loss: 0.0287


Training Progress:  99%|███████████████████████████████████████████████████████▋| 9944/10000 [2:06:38<00:09,  5.81it/s]

Step 9943/10000, Loss: 0.0260
Step 9944/10000, Loss: 0.0252


Training Progress:  99%|███████████████████████████████████████████████████████▋| 9946/10000 [2:06:38<00:09,  5.73it/s]

Step 9945/10000, Loss: 0.0267
Step 9946/10000, Loss: 0.0284


Training Progress:  99%|███████████████████████████████████████████████████████▋| 9948/10000 [2:06:39<00:08,  5.86it/s]

Step 9947/10000, Loss: 0.0254
Step 9948/10000, Loss: 0.0258


Training Progress: 100%|███████████████████████████████████████████████████████▋| 9950/10000 [2:06:39<00:08,  5.82it/s]

Step 9949/10000, Loss: 0.0272
Step 9950/10000, Loss: 0.0272


Training Progress: 100%|███████████████████████████████████████████████████████▋| 9952/10000 [2:06:39<00:08,  5.78it/s]

Step 9951/10000, Loss: 0.0250
Step 9952/10000, Loss: 0.0243


Training Progress: 100%|███████████████████████████████████████████████████████▋| 9954/10000 [2:06:40<00:07,  5.87it/s]

Step 9953/10000, Loss: 0.0213
Step 9954/10000, Loss: 0.0332


Training Progress: 100%|███████████████████████████████████████████████████████▊| 9956/10000 [2:06:40<00:07,  5.82it/s]

Step 9955/10000, Loss: 0.0269
Step 9956/10000, Loss: 0.0221


Training Progress: 100%|███████████████████████████████████████████████████████▊| 9958/10000 [2:06:40<00:07,  5.75it/s]

Step 9957/10000, Loss: 0.0261
Step 9958/10000, Loss: 0.0235


Training Progress: 100%|███████████████████████████████████████████████████████▊| 9960/10000 [2:06:41<00:06,  5.87it/s]

Step 9959/10000, Loss: 0.0238
Step 9960/10000, Loss: 0.0257


Training Progress: 100%|███████████████████████████████████████████████████████▊| 9962/10000 [2:06:41<00:06,  5.82it/s]

Step 9961/10000, Loss: 0.0264
Step 9962/10000, Loss: 0.0331


Training Progress: 100%|███████████████████████████████████████████████████████▊| 9964/10000 [2:06:41<00:06,  5.78it/s]

Step 9963/10000, Loss: 0.0288
Step 9964/10000, Loss: 0.0269


Training Progress: 100%|███████████████████████████████████████████████████████▊| 9966/10000 [2:06:42<00:05,  5.73it/s]

Step 9965/10000, Loss: 0.0256
Step 9966/10000, Loss: 0.0241


Training Progress: 100%|███████████████████████████████████████████████████████▊| 9968/10000 [2:06:42<00:05,  5.87it/s]

Step 9967/10000, Loss: 0.0293
Step 9968/10000, Loss: 0.0234


Training Progress: 100%|███████████████████████████████████████████████████████▊| 9970/10000 [2:06:42<00:05,  5.82it/s]

Step 9969/10000, Loss: 0.0358
Step 9970/10000, Loss: 0.0235


Training Progress: 100%|███████████████████████████████████████████████████████▊| 9972/10000 [2:06:43<00:04,  5.78it/s]

Step 9971/10000, Loss: 0.0215
Step 9972/10000, Loss: 0.0297


Training Progress: 100%|███████████████████████████████████████████████████████▊| 9974/10000 [2:06:43<00:04,  5.88it/s]

Step 9973/10000, Loss: 0.0262
Step 9974/10000, Loss: 0.0256


Training Progress: 100%|███████████████████████████████████████████████████████▊| 9976/10000 [2:06:43<00:04,  5.82it/s]

Step 9975/10000, Loss: 0.0304
Step 9976/10000, Loss: 0.0275


Training Progress: 100%|███████████████████████████████████████████████████████▉| 9978/10000 [2:06:44<00:03,  5.76it/s]

Step 9977/10000, Loss: 0.0212
Step 9978/10000, Loss: 0.0294


Training Progress: 100%|███████████████████████████████████████████████████████▉| 9980/10000 [2:06:44<00:03,  5.87it/s]

Step 9979/10000, Loss: 0.0231
Step 9980/10000, Loss: 0.0291


Training Progress: 100%|███████████████████████████████████████████████████████▉| 9982/10000 [2:06:44<00:03,  5.82it/s]

Step 9981/10000, Loss: 0.0287
Step 9982/10000, Loss: 0.0326


Training Progress: 100%|███████████████████████████████████████████████████████▉| 9984/10000 [2:06:45<00:02,  5.76it/s]

Step 9983/10000, Loss: 0.0236
Step 9984/10000, Loss: 0.0267


Training Progress: 100%|███████████████████████████████████████████████████████▉| 9986/10000 [2:06:45<00:02,  5.71it/s]

Step 9985/10000, Loss: 0.0326
Step 9986/10000, Loss: 0.0312


Training Progress: 100%|███████████████████████████████████████████████████████▉| 9988/10000 [2:06:45<00:02,  5.85it/s]

Step 9987/10000, Loss: 0.0357
Step 9988/10000, Loss: 0.0228


Training Progress: 100%|███████████████████████████████████████████████████████▉| 9990/10000 [2:06:46<00:01,  5.80it/s]

Step 9989/10000, Loss: 0.0388
Step 9990/10000, Loss: 0.0328


Training Progress: 100%|███████████████████████████████████████████████████████▉| 9992/10000 [2:06:46<00:01,  5.74it/s]

Step 9991/10000, Loss: 0.0218
Step 9992/10000, Loss: 0.0267


Training Progress: 100%|███████████████████████████████████████████████████████▉| 9994/10000 [2:06:47<00:01,  5.87it/s]

Step 9993/10000, Loss: 0.0254
Step 9994/10000, Loss: 0.0254


Training Progress: 100%|███████████████████████████████████████████████████████▉| 9996/10000 [2:06:47<00:00,  5.82it/s]

Step 9995/10000, Loss: 0.0261
Step 9996/10000, Loss: 0.0318


Training Progress: 100%|███████████████████████████████████████████████████████▉| 9998/10000 [2:06:47<00:00,  5.71it/s]

Step 9997/10000, Loss: 0.0234
Step 9998/10000, Loss: 0.0244


Training Progress: 100%|███████████████████████████████████████████████████████▉| 9999/10000 [2:06:47<00:00,  5.82it/s]

Step 9999/10000, Loss: 0.0277
Step 10000/10000, Loss: 0.0238


Training Progress: 100%|███████████████████████████████████████████████████████| 10000/10000 [2:07:05<00:00,  1.31it/s]


Checkpoint saved: checkpoints\checkpoint_step10000_loss0.0238_20250117_145851.pt






Checkpoint saved: checkpoints\checkpoint_step10000_loss0.0238_20250117_145909.pt

Training completed!
Best loss achieved: 0.0172
Final loss: 0.0238
Checkpoints saved in: checkpoints
