In [1]:
import torch
import torch.nn as nn
import torch.nn.functional as F
import pandas as pd
import numpy as np
import torch.optim as optim
from tqdm import tqdm
import sys, os, math
import wandb
from sklearn.metrics import f1_score, confusion_matrix, accuracy_score
import json
from transformers import EsmModel, AutoTokenizer

sys.path.insert(0, '../dlp')
from batch import Batch

device = 'cuda:0' if torch.cuda.is_available() else 'cpu'
print(device)

epochs = 100_000
val_epoch = 100
num_val = 10
batch_size = 8
dataset_name = "new_corpus"
lr = 0.001
model_name = "Finetune_ESM"
max_seq_len = 500

from data_access import PQDataAccess
da = PQDataAccess(f"/home/aac/Alireza/datasets/export_pqt_4_taxseq_new/{dataset_name}", batch_size)

checkpoint_dir = f"../checkpoints/{model_name}_checkpoints"
if not os.path.exists(checkpoint_dir):
    os.makedirs(checkpoint_dir)
print(checkpoint_dir)

wandb.init(
    # set the wandb project where this run will be logged
    project=model_name,

    # track hyperparameters and run metadata
    config={
        "learning_rate": lr,
        "architecture": model_name,
        "epochs": epochs,
        "batch_szie": batch_size,
        "max_seq_len": max_seq_len
    }
)

  from .autonotebook import tqdm as notebook_tqdm


cuda:0
 WORLD_SIZE=1 , LOCAL_WORLD_SIZE=1,RANK =0,LOCAL_RANK = 0 
../checkpoints/Finetune_ESM_checkpoints


git root error: Cmd('git') failed due to: exit code(128)
  cmdline: git rev-parse --show-toplevel
  stderr: 'fatal: detected dubious ownership in repository at '/home/aac/Alireza'
To add an exception for this directory, call:

	git config --global --add safe.directory /home/aac/Alireza'
[34m[1mwandb[0m: Using wandb-core as the SDK backend.  Please refer to https://wandb.me/wandb-core for more information.
[34m[1mwandb[0m: Currently logged in as: [33malirezanor[0m ([33malireza_noroozi[0m). Use [1m`wandb login --relogin`[0m to force relogin


In [2]:
tax_ids_file = "../data/tax_ids.csv"

tax_ids = pd.read_csv(tax_ids_file)
# print(tax_ids)
num_classes = len(tax_ids) + 1
print(num_classes)
id_encoder = {name: idx + 1 for idx, name in enumerate(tax_ids['Taxonomic_lineage_IDs'].values)}

id_decoder = {idx + 1: name for idx, name in enumerate(tax_ids['Taxonomic_lineage_IDs'].values)}
id_decoder[0] = "NOT DEFINED"

tokenizer_ = AutoTokenizer.from_pretrained("facebook/esm2_t33_650M_UR50D")

def data_to_tensor_batch(b):
    inputs = tokenizer_(
        [e['sequence'] for e in b],
        return_tensors="pt", 
        padding='max_length', 
        truncation=True, 
        max_length=max_seq_len
    )

    tax_ids = torch.LongTensor([id_encoder.get(e['Taxonomic_lineage_IDs'], 0) for e in b])

    return Batch(inputs, tax_ids)

67486


In [3]:
class ESM2(nn.Module):
    def __init__(self, num_classes):
        super().__init__()
        self.esm = EsmModel.from_pretrained("facebook/esm2_t33_650M_UR50D")
        
        self.layer1 = nn.Linear(1280, 512)
        self.layer_norm = nn.LayerNorm(512)
        self.relu = nn.ReLU()
        self.dropout = nn.Dropout(0.1)
        self.layer2 = nn.Linear(512, num_classes)

    def forward(self, x, attention_mask=None):
        outputs = self.esm(x, attention_mask=attention_mask).pooler_output

        outputs = self.layer1(outputs)
        outputs = self.layer_norm(outputs)
        outputs = self.relu(outputs)
        outputs = self.dropout(outputs)
        outputs = self.layer2(outputs)
        return outputs

In [4]:
model = ESM2(num_classes).to(device)

total = sum(p.numel() for p in model.parameters())
print(f'Total parameters: {total/ 1e6} M')
# print(model)

optimizer = torch.optim.Adam(model.parameters(), lr=lr)

criterion = nn.CrossEntropyLoss()
# Cosine annealing with warm restarts
scheduler = torch.optim.lr_scheduler.CosineAnnealingWarmRestarts(
    optimizer,
    T_0=10,  # Initial restart interval
    T_mult=2,  # Multiply interval by 2 after each restart
    eta_min=1e-6  # Minimum learning rate
)

Some weights of EsmModel were not initialized from the model checkpoint at facebook/esm2_t33_650M_UR50D and are newly initialized: ['esm.pooler.dense.bias', 'esm.pooler.dense.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Total parameters: 687.631155 M


In [5]:
val_dir = f"val_results/{model_name}"
if not os.path.exists(val_dir):
    os.makedirs(val_dir)
    
val_batches = [da.get_batch() for _ in range(num_val)]
# print(val_batches[0])

def evaluate(model):
    model.eval()  # Set model to evaluation mode
    
    running_loss = 0.0
    all_preds = []
    all_labels = []
    
    for epoch in range(num_val):
        with torch.no_grad():  # Disable gradient computation during evaluation
            tensor_batch = data_to_tensor_batch(val_batches[epoch])
            tensor_batch.gpu(device)
        
            labels = tensor_batch.taxes
            outputs = model(tensor_batch.seq_ids['input_ids'], tensor_batch.seq_ids['attention_mask'])

            # Calculate the loss
            loss = criterion(outputs, labels)
    
            running_loss += loss.item()
                
            preds = torch.argmax(outputs, dim=1)
    
            all_preds.append(preds.cpu())
            all_labels.append(labels.cpu())
    
    # Concatenate all batches into single tensors
    all_preds = torch.cat(all_preds)
    all_labels = torch.cat(all_labels)

    # Compute evaluation metrics (example: accuracy, F1 score)
    accuracy = accuracy_score(all_labels.numpy(), all_preds.numpy())
    f1_macro = f1_score(all_labels.numpy(), all_preds.numpy(), average='macro')  # F1-score for multi-label classification
    f1_micro = f1_score(all_labels.numpy(), all_preds.numpy(), average='micro')  # F1-score for multi-label classification
    # conf_matrix = confusion_matrix(all_labels.numpy(), all_preds.numpy())
    avg_loss = running_loss / num_val
    
    return avg_loss, accuracy, f1_micro, f1_macro

evaluate(model)

(11.243791866302491, 0.0, 0.0, 0.0)

In [6]:
import glob
def load_checkpoint(model, optimizer=None, scheduler=None):
    checkpoints = glob.glob(os.path.join(checkpoint_dir, 'checkpoint_epoch_*.pt'))        
    # Extract epoch numbers and find latest
    latest_checkpoint = max(checkpoints, key=lambda x: int(x.split('_')[-1].split('.')[0]))
    checkpoint = torch.load(latest_checkpoint)
    
    # Load model state
    model.load_state_dict(checkpoint['model_state_dict'])
    model = model.to(device)
    
    # Load optimizer state if provided (for training)
    if optimizer is not None:
        optimizer.load_state_dict(checkpoint['optimizer_state_dict'])
        # Move optimizer state to GPU if necessary
        for state in optimizer.state.values():
            for k, v in state.items():
                if isinstance(v, torch.Tensor):
                    state[k] = v.to(device)
    if scheduler is not None:
        scheduler.load_state_dict(checkpoint['scheduler_state_dict'])
    
    # Get training metadata
    epoch = checkpoint['epoch']
    metrics = checkpoint['metrics']
    
    print(f"Successfully loaded checkpoint from epoch {epoch}")
    # print("Metrics at checkpoint:", metrics)
    
    return model, optimizer, scheduler, epoch, metrics
        

# model, optimizer, scheduler, latest_epoch, metrics = load_checkpoint(model, optimizer, scheduler)
latest_epoch = 0

In [7]:
running_loss = 0
current_lr = lr

for epoch in tqdm(range(latest_epoch + 1, latest_epoch + epochs + 1)):
    model.train()
    
    tensor_batch = data_to_tensor_batch(da.get_batch())
    tensor_batch.gpu(device)
    
    labels = tensor_batch.taxes
    outputs = model(tensor_batch.seq_ids['input_ids'], tensor_batch.seq_ids['attention_mask'])
    
    loss = criterion(outputs, labels)
    
    optimizer.zero_grad()
    loss.backward()
    optimizer.step()
    
    running_loss += loss.item()
    
    if (epoch + 1) % val_epoch == 0:
        train_loss = running_loss / val_epoch
        # Evaluate on validation set
        val_loss, val_accuracy, val_f1_micro, val_f1_macro = evaluate(model)
        
        print(f"Epoch [{epoch + 1}/{epochs}]")
        print(f"Train Loss: {train_loss:.4f}")
        print(f"test Loss: {val_loss:.4f}, test Accuracy: {val_accuracy:.4f}")
        print(f"test F1 (micro): {val_f1_micro:.4f}, test F1 (macro): {val_f1_macro:.4f}")
        
        # Create metrics dictionary for saving
        metrics = {
            "train_loss": train_loss,
            "test_loss": val_loss,
            "test_accuracy": val_accuracy,
            "test_f1_micro": val_f1_micro,
            "test_f1_macro": val_f1_macro,
            "epoch": epoch + 1,
            "lr": current_lr
        }

        # Save periodic checkpoint
        checkpoint_path = os.path.join(checkpoint_dir, f'checkpoint_epoch_{epoch}.pt')
        torch.save({
            'epoch': epoch,
            'model_state_dict': model.state_dict(),
            'optimizer_state_dict': optimizer.state_dict(),
            'scheduler_state_dict': scheduler.state_dict(),
            'metrics': metrics
        }, checkpoint_path)
        
        # Log to wandb
        wandb.log(metrics)

        # Step the scheduler
        scheduler.step(epoch + loss.item())
        current_lr = scheduler.get_last_lr()[0]
        
        # Reset training metrics
        running_loss = 0

wandb.finish()

  0%|          | 98/100000 [01:23<23:50:12,  1.16it/s]

Epoch [100/100000]
Train Loss: 10.9839
test Loss: 11.0489, test Accuracy: 0.0000
test F1 (micro): 0.0000, test F1 (macro): 0.0000


  0%|          | 198/100000 [03:19<23:45:52,  1.17it/s] 

Epoch [200/100000]
Train Loss: 10.7905
test Loss: 10.5840, test Accuracy: 0.0000
test F1 (micro): 0.0000, test F1 (macro): 0.0000


  0%|          | 298/100000 [05:11<23:42:55,  1.17it/s] 

Epoch [300/100000]
Train Loss: 10.6935
test Loss: 10.3726, test Accuracy: 0.0000
test F1 (micro): 0.0000, test F1 (macro): 0.0000


  0%|          | 398/100000 [06:56<23:38:15,  1.17it/s] 

Epoch [400/100000]
Train Loss: 10.6458
test Loss: 10.3708, test Accuracy: 0.0000
test F1 (micro): 0.0000, test F1 (macro): 0.0000


  0%|          | 498/100000 [08:43<23:42:00,  1.17it/s] 

Epoch [500/100000]
Train Loss: 10.5916
test Loss: 10.2280, test Accuracy: 0.0000
test F1 (micro): 0.0000, test F1 (macro): 0.0000


  1%|          | 698/100000 [12:21<23:36:50,  1.17it/s] 

Epoch [700/100000]
Train Loss: 10.5770
test Loss: 10.2566, test Accuracy: 0.0000
test F1 (micro): 0.0000, test F1 (macro): 0.0000


  1%|          | 798/100000 [14:08<23:33:19,  1.17it/s] 

Epoch [800/100000]
Train Loss: 10.5264
test Loss: 10.2511, test Accuracy: 0.0000
test F1 (micro): 0.0000, test F1 (macro): 0.0000


  1%|          | 898/100000 [15:59<23:31:48,  1.17it/s] 

Epoch [900/100000]
Train Loss: 10.5826
test Loss: 10.1532, test Accuracy: 0.0000
test F1 (micro): 0.0000, test F1 (macro): 0.0000


  1%|          | 998/100000 [17:43<23:38:26,  1.16it/s] 

Epoch [1000/100000]
Train Loss: 10.4992
test Loss: 10.1603, test Accuracy: 0.0000
test F1 (micro): 0.0000, test F1 (macro): 0.0000


  1%|          | 1098/100000 [19:32<23:21:34,  1.18it/s] 

Epoch [1100/100000]
Train Loss: 10.4366
test Loss: 10.1345, test Accuracy: 0.0000
test F1 (micro): 0.0000, test F1 (macro): 0.0000


  1%|          | 1198/100000 [21:18<23:31:32,  1.17it/s] 

Epoch [1200/100000]
Train Loss: 10.4988
test Loss: 10.1922, test Accuracy: 0.0000
test F1 (micro): 0.0000, test F1 (macro): 0.0000


  1%|▏         | 1298/100000 [23:05<23:31:24,  1.17it/s] 

Epoch [1300/100000]
Train Loss: 10.4969
test Loss: 10.1520, test Accuracy: 0.0000
test F1 (micro): 0.0000, test F1 (macro): 0.0000


  1%|▏         | 1398/100000 [24:53<23:14:23,  1.18it/s] 

Epoch [1400/100000]
Train Loss: 10.3082
test Loss: 10.1369, test Accuracy: 0.0000
test F1 (micro): 0.0000, test F1 (macro): 0.0000


  2%|▏         | 1598/100000 [28:36<23:20:35,  1.17it/s] 

Epoch [1600/100000]
Train Loss: 10.2984
test Loss: 10.0398, test Accuracy: 0.0000
test F1 (micro): 0.0000, test F1 (macro): 0.0000


  2%|▏         | 2498/100000 [44:58<23:08:20,  1.17it/s]

Epoch [2500/100000]
Train Loss: 10.2772
test Loss: 9.9119, test Accuracy: 0.0000
test F1 (micro): 0.0000, test F1 (macro): 0.0000


  3%|▎         | 2598/100000 [46:41<23:08:41,  1.17it/s] 

Epoch [2600/100000]
Train Loss: 10.3002
test Loss: 9.9121, test Accuracy: 0.0000
test F1 (micro): 0.0000, test F1 (macro): 0.0000


  3%|▎         | 2698/100000 [48:23<23:11:16,  1.17it/s] 

Epoch [2700/100000]
Train Loss: 10.2658
test Loss: 9.9387, test Accuracy: 0.0000
test F1 (micro): 0.0000, test F1 (macro): 0.0000


  3%|▎         | 2798/100000 [50:07<23:10:33,  1.17it/s] 

Epoch [2800/100000]
Train Loss: 10.2264
test Loss: 9.9310, test Accuracy: 0.0125
test F1 (micro): 0.0125, test F1 (macro): 0.0003


  3%|▎         | 2898/100000 [51:52<23:05:58,  1.17it/s] 

Epoch [2900/100000]
Train Loss: 10.3502
test Loss: 9.9088, test Accuracy: 0.0000
test F1 (micro): 0.0000, test F1 (macro): 0.0000


  3%|▎         | 2998/100000 [53:43<22:59:48,  1.17it/s] 

Epoch [3000/100000]
Train Loss: 10.3410
test Loss: 9.8685, test Accuracy: 0.0000
test F1 (micro): 0.0000, test F1 (macro): 0.0000


  3%|▎         | 3098/100000 [55:31<22:52:33,  1.18it/s] 

Epoch [3100/100000]
Train Loss: 10.2105
test Loss: 9.9348, test Accuracy: 0.0125
test F1 (micro): 0.0125, test F1 (macro): 0.0003


  3%|▎         | 3198/100000 [57:22<22:56:18,  1.17it/s] 

Epoch [3200/100000]
Train Loss: 10.2321
test Loss: 9.8927, test Accuracy: 0.0000
test F1 (micro): 0.0000, test F1 (macro): 0.0000


  3%|▎         | 3298/100000 [59:10<22:59:41,  1.17it/s] 

Epoch [3300/100000]
Train Loss: 10.3060
test Loss: 9.9776, test Accuracy: 0.0000
test F1 (micro): 0.0000, test F1 (macro): 0.0000


  3%|▎         | 3398/100000 [1:00:57<22:55:42,  1.17it/s]

Epoch [3400/100000]
Train Loss: 10.1084
test Loss: 9.8358, test Accuracy: 0.0000
test F1 (micro): 0.0000, test F1 (macro): 0.0000


  3%|▎         | 3498/100000 [1:02:46<22:59:27,  1.17it/s] 

Epoch [3500/100000]
Train Loss: 10.2265
test Loss: 9.9052, test Accuracy: 0.0000
test F1 (micro): 0.0000, test F1 (macro): 0.0000


  4%|▎         | 3598/100000 [1:04:45<22:59:22,  1.16it/s] 

Epoch [3600/100000]
Train Loss: 10.3402
test Loss: 9.9393, test Accuracy: 0.0000
test F1 (micro): 0.0000, test F1 (macro): 0.0000


  4%|▎         | 3698/100000 [1:06:34<22:53:06,  1.17it/s] 

Epoch [3700/100000]
Train Loss: 10.1987
test Loss: 9.8702, test Accuracy: 0.0000
test F1 (micro): 0.0000, test F1 (macro): 0.0000


  4%|▍         | 3798/100000 [1:08:21<22:52:52,  1.17it/s] 

Epoch [3800/100000]
Train Loss: 10.2467
test Loss: 9.9193, test Accuracy: 0.0000
test F1 (micro): 0.0000, test F1 (macro): 0.0000


  4%|▍         | 3898/100000 [1:10:06<22:54:20,  1.17it/s] 

Epoch [3900/100000]
Train Loss: 10.3373
test Loss: 9.9886, test Accuracy: 0.0000
test F1 (micro): 0.0000, test F1 (macro): 0.0000


  4%|▍         | 3998/100000 [1:11:54<22:40:42,  1.18it/s] 

Epoch [4000/100000]
Train Loss: 10.2399
test Loss: 9.8428, test Accuracy: 0.0000
test F1 (micro): 0.0000, test F1 (macro): 0.0000


  4%|▍         | 4098/100000 [1:13:41<22:46:31,  1.17it/s] 

Epoch [4100/100000]
Train Loss: 10.2230
test Loss: 9.8572, test Accuracy: 0.0000
test F1 (micro): 0.0000, test F1 (macro): 0.0000


  4%|▍         | 4198/100000 [1:15:25<22:36:26,  1.18it/s] 

Epoch [4200/100000]
Train Loss: 10.2443
test Loss: 9.8650, test Accuracy: 0.0000
test F1 (micro): 0.0000, test F1 (macro): 0.0000


  4%|▍         | 4298/100000 [1:17:11<23:02:30,  1.15it/s] 

Epoch [4300/100000]
Train Loss: 10.2075
test Loss: 9.8768, test Accuracy: 0.0000
test F1 (micro): 0.0000, test F1 (macro): 0.0000


  4%|▍         | 4398/100000 [1:19:01<22:45:16,  1.17it/s] 

Epoch [4400/100000]
Train Loss: 10.2353
test Loss: 9.8379, test Accuracy: 0.0000
test F1 (micro): 0.0000, test F1 (macro): 0.0000


  4%|▍         | 4498/100000 [1:20:46<22:39:32,  1.17it/s] 

Epoch [4500/100000]
Train Loss: 10.2091
test Loss: 9.8507, test Accuracy: 0.0000
test F1 (micro): 0.0000, test F1 (macro): 0.0000


  5%|▍         | 4598/100000 [1:22:37<22:46:02,  1.16it/s] 

Epoch [4600/100000]
Train Loss: 10.1832
test Loss: 9.8413, test Accuracy: 0.0000
test F1 (micro): 0.0000, test F1 (macro): 0.0000


  5%|▍         | 4698/100000 [1:24:30<22:40:11,  1.17it/s] 

Epoch [4700/100000]
Train Loss: 10.2155
test Loss: 9.8500, test Accuracy: 0.0000
test F1 (micro): 0.0000, test F1 (macro): 0.0000


  5%|▍         | 4798/100000 [1:26:24<22:40:32,  1.17it/s] 

Epoch [4800/100000]
Train Loss: 10.1529
test Loss: 9.8459, test Accuracy: 0.0000
test F1 (micro): 0.0000, test F1 (macro): 0.0000


  5%|▍         | 4898/100000 [1:28:09<22:31:48,  1.17it/s] 

Epoch [4900/100000]
Train Loss: 10.2520
test Loss: 9.8512, test Accuracy: 0.0000
test F1 (micro): 0.0000, test F1 (macro): 0.0000


  5%|▍         | 4998/100000 [1:30:03<22:25:56,  1.18it/s] 

Epoch [5000/100000]
Train Loss: 10.2844
test Loss: 9.8563, test Accuracy: 0.0000
test F1 (micro): 0.0000, test F1 (macro): 0.0000


  5%|▌         | 5098/100000 [1:31:51<22:37:43,  1.16it/s] 

Epoch [5100/100000]
Train Loss: 10.1657
test Loss: 9.8549, test Accuracy: 0.0000
test F1 (micro): 0.0000, test F1 (macro): 0.0000


  5%|▌         | 5198/100000 [1:33:38<22:31:31,  1.17it/s] 

Epoch [5200/100000]
Train Loss: 10.2654
test Loss: 9.8551, test Accuracy: 0.0000
test F1 (micro): 0.0000, test F1 (macro): 0.0000


  5%|▌         | 5298/100000 [1:35:25<22:29:26,  1.17it/s] 

Epoch [5300/100000]
Train Loss: 10.2141
test Loss: 9.8808, test Accuracy: 0.0000
test F1 (micro): 0.0000, test F1 (macro): 0.0000


  5%|▌         | 5398/100000 [1:37:12<22:23:32,  1.17it/s] 

Epoch [5400/100000]
Train Loss: 10.2586
test Loss: 9.9591, test Accuracy: 0.0000
test F1 (micro): 0.0000, test F1 (macro): 0.0000


  5%|▌         | 5498/100000 [1:38:58<22:24:38,  1.17it/s] 

Epoch [5500/100000]
Train Loss: 10.2305
test Loss: 9.8920, test Accuracy: 0.0250
test F1 (micro): 0.0250, test F1 (macro): 0.0006


  6%|▌         | 5598/100000 [1:40:47<22:24:21,  1.17it/s] 

Epoch [5600/100000]
Train Loss: 10.2416
test Loss: 9.8798, test Accuracy: 0.0000
test F1 (micro): 0.0000, test F1 (macro): 0.0000


  6%|▌         | 5698/100000 [1:42:35<22:26:40,  1.17it/s] 

Epoch [5700/100000]
Train Loss: 10.0899
test Loss: 9.8406, test Accuracy: 0.0000
test F1 (micro): 0.0000, test F1 (macro): 0.0000


  6%|▌         | 5798/100000 [1:44:23<22:36:22,  1.16it/s] 

Epoch [5800/100000]
Train Loss: 10.2330
test Loss: 9.9097, test Accuracy: 0.0000
test F1 (micro): 0.0000, test F1 (macro): 0.0000


  6%|▌         | 5898/100000 [1:46:12<22:24:46,  1.17it/s] 

Epoch [5900/100000]
Train Loss: 10.1574
test Loss: 9.9145, test Accuracy: 0.0000
test F1 (micro): 0.0000, test F1 (macro): 0.0000


  6%|▌         | 5998/100000 [1:48:06<22:15:16,  1.17it/s] 

Epoch [6000/100000]
Train Loss: 10.2457
test Loss: 9.8572, test Accuracy: 0.0250
test F1 (micro): 0.0250, test F1 (macro): 0.0006


  6%|▌         | 6098/100000 [1:49:55<22:18:36,  1.17it/s] 

Epoch [6100/100000]
Train Loss: 10.0967
test Loss: 9.8741, test Accuracy: 0.0000
test F1 (micro): 0.0000, test F1 (macro): 0.0000


  6%|▌         | 6198/100000 [1:51:42<22:16:54,  1.17it/s] 

Epoch [6200/100000]
Train Loss: 10.2032
test Loss: 9.9327, test Accuracy: 0.0000
test F1 (micro): 0.0000, test F1 (macro): 0.0000


  6%|▋         | 6298/100000 [1:53:31<22:17:31,  1.17it/s] 

Epoch [6300/100000]
Train Loss: 10.2815
test Loss: 9.9015, test Accuracy: 0.0000
test F1 (micro): 0.0000, test F1 (macro): 0.0000


  6%|▋         | 6398/100000 [1:55:18<22:11:22,  1.17it/s] 

Epoch [6400/100000]
Train Loss: 10.2280
test Loss: 9.9432, test Accuracy: 0.0000
test F1 (micro): 0.0000, test F1 (macro): 0.0000


  6%|▋         | 6498/100000 [1:57:05<22:17:43,  1.16it/s] 

Epoch [6500/100000]
Train Loss: 10.1315
test Loss: 9.8989, test Accuracy: 0.0000
test F1 (micro): 0.0000, test F1 (macro): 0.0000


  7%|▋         | 6598/100000 [1:58:56<22:15:26,  1.17it/s] 

Epoch [6600/100000]
Train Loss: 10.2961
test Loss: 9.9240, test Accuracy: 0.0000
test F1 (micro): 0.0000, test F1 (macro): 0.0000


  7%|▋         | 6698/100000 [2:00:42<22:10:21,  1.17it/s] 

Epoch [6700/100000]
Train Loss: 10.2215
test Loss: 9.9514, test Accuracy: 0.0000
test F1 (micro): 0.0000, test F1 (macro): 0.0000


  7%|▋         | 6798/100000 [2:02:32<22:04:41,  1.17it/s] 

Epoch [6800/100000]
Train Loss: 10.2397
test Loss: 9.9354, test Accuracy: 0.0000
test F1 (micro): 0.0000, test F1 (macro): 0.0000


  7%|▋         | 6898/100000 [2:04:25<22:05:18,  1.17it/s] 

Epoch [6900/100000]
Train Loss: 10.1755
test Loss: 9.9303, test Accuracy: 0.0000
test F1 (micro): 0.0000, test F1 (macro): 0.0000


  7%|▋         | 6998/100000 [2:06:09<22:04:58,  1.17it/s] 

Epoch [7000/100000]
Train Loss: 10.1580
test Loss: 9.9841, test Accuracy: 0.0000
test F1 (micro): 0.0000, test F1 (macro): 0.0000


  7%|▋         | 7098/100000 [2:07:57<22:04:53,  1.17it/s] 

Epoch [7100/100000]
Train Loss: 10.1968
test Loss: 9.9468, test Accuracy: 0.0000
test F1 (micro): 0.0000, test F1 (macro): 0.0000


 11%|█▏        | 11398/100000 [3:25:49<21:03:39,  1.17it/s]

Epoch [11400/100000]
Train Loss: 10.1088
test Loss: 10.0872, test Accuracy: 0.0000
test F1 (micro): 0.0000, test F1 (macro): 0.0000


 11%|█▏        | 11498/100000 [3:27:36<21:05:29,  1.17it/s] 

Epoch [11500/100000]
Train Loss: 10.1869
test Loss: 10.1001, test Accuracy: 0.0000
test F1 (micro): 0.0000, test F1 (macro): 0.0000


 12%|█▏        | 11598/100000 [3:29:22<20:58:33,  1.17it/s] 

Epoch [11600/100000]
Train Loss: 10.2427
test Loss: 10.1338, test Accuracy: 0.0000
test F1 (micro): 0.0000, test F1 (macro): 0.0000


 12%|█▏        | 11698/100000 [3:31:09<21:04:51,  1.16it/s] 

Epoch [11700/100000]
Train Loss: 10.0887
test Loss: 10.0980, test Accuracy: 0.0000
test F1 (micro): 0.0000, test F1 (macro): 0.0000


 12%|█▏        | 11798/100000 [3:32:54<21:10:32,  1.16it/s] 

Epoch [11800/100000]
Train Loss: 10.2298
test Loss: 10.0627, test Accuracy: 0.0000
test F1 (micro): 0.0000, test F1 (macro): 0.0000


 12%|█▏        | 11898/100000 [3:34:43<20:47:10,  1.18it/s] 

Epoch [11900/100000]
Train Loss: 10.0509
test Loss: 10.0716, test Accuracy: 0.0000
test F1 (micro): 0.0000, test F1 (macro): 0.0000


 12%|█▏        | 11998/100000 [3:36:27<20:50:13,  1.17it/s] 

Epoch [12000/100000]
Train Loss: 10.1462
test Loss: 10.0701, test Accuracy: 0.0000
test F1 (micro): 0.0000, test F1 (macro): 0.0000


 12%|█▏        | 12098/100000 [3:38:14<20:48:52,  1.17it/s] 

Epoch [12100/100000]
Train Loss: 10.1219
test Loss: 10.1235, test Accuracy: 0.0000
test F1 (micro): 0.0000, test F1 (macro): 0.0000


 12%|█▏        | 12198/100000 [3:40:00<20:52:41,  1.17it/s] 

Epoch [12200/100000]
Train Loss: 10.2327
test Loss: 10.1035, test Accuracy: 0.0000
test F1 (micro): 0.0000, test F1 (macro): 0.0000


 12%|█▏        | 12298/100000 [3:41:48<20:46:56,  1.17it/s] 

Epoch [12300/100000]
Train Loss: 10.1796
test Loss: 10.1201, test Accuracy: 0.0000
test F1 (micro): 0.0000, test F1 (macro): 0.0000


 12%|█▏        | 12398/100000 [3:43:38<20:50:41,  1.17it/s] 

Epoch [12400/100000]
Train Loss: 10.2172
test Loss: 10.1147, test Accuracy: 0.0000
test F1 (micro): 0.0000, test F1 (macro): 0.0000


 12%|█▏        | 12498/100000 [3:45:24<20:53:19,  1.16it/s] 

Epoch [12500/100000]
Train Loss: 10.2045
test Loss: 10.1099, test Accuracy: 0.0000
test F1 (micro): 0.0000, test F1 (macro): 0.0000


 13%|█▎        | 12598/100000 [3:47:11<20:42:51,  1.17it/s] 

Epoch [12600/100000]
Train Loss: 10.1225
test Loss: 10.1606, test Accuracy: 0.0000
test F1 (micro): 0.0000, test F1 (macro): 0.0000


 13%|█▎        | 12698/100000 [3:48:58<20:49:32,  1.16it/s] 

Epoch [12700/100000]
Train Loss: 10.3396
test Loss: 10.0879, test Accuracy: 0.0000
test F1 (micro): 0.0000, test F1 (macro): 0.0000


 13%|█▎        | 12798/100000 [3:50:50<20:42:08,  1.17it/s] 

Epoch [12800/100000]
Train Loss: 10.3361
test Loss: 10.0822, test Accuracy: 0.0000
test F1 (micro): 0.0000, test F1 (macro): 0.0000


 13%|█▎        | 12898/100000 [3:52:37<20:38:53,  1.17it/s] 

Epoch [12900/100000]
Train Loss: 10.2739
test Loss: 10.0925, test Accuracy: 0.0000
test F1 (micro): 0.0000, test F1 (macro): 0.0000


 13%|█▎        | 12998/100000 [3:54:32<20:45:33,  1.16it/s] 

Epoch [13000/100000]
Train Loss: 10.3044
test Loss: 10.1169, test Accuracy: 0.0000
test F1 (micro): 0.0000, test F1 (macro): 0.0000


 13%|█▎        | 13035/100000 [3:55:25<20:56:13,  1.15it/s] 

Epoch [13400/100000]
Train Loss: 10.2396
test Loss: 10.0828, test Accuracy: 0.0000
test F1 (micro): 0.0000, test F1 (macro): 0.0000


 15%|█▍        | 14698/100000 [4:25:10<20:08:24,  1.18it/s]

Epoch [14700/100000]
Train Loss: 10.2065
test Loss: 10.0989, test Accuracy: 0.0000
test F1 (micro): 0.0000, test F1 (macro): 0.0000


 15%|█▍        | 14798/100000 [4:27:01<20:15:13,  1.17it/s] 

Epoch [14800/100000]
Train Loss: 10.2286
test Loss: 10.0816, test Accuracy: 0.0000
test F1 (micro): 0.0000, test F1 (macro): 0.0000


 15%|█▍        | 14898/100000 [4:28:50<20:16:13,  1.17it/s] 

Epoch [14900/100000]
Train Loss: 9.9974
test Loss: 10.0587, test Accuracy: 0.0000
test F1 (micro): 0.0000, test F1 (macro): 0.0000


 15%|█▍        | 14998/100000 [4:30:39<20:03:27,  1.18it/s] 

Epoch [15000/100000]
Train Loss: 10.0144
test Loss: 10.0706, test Accuracy: 0.0000
test F1 (micro): 0.0000, test F1 (macro): 0.0000


 15%|█▌        | 15098/100000 [4:32:35<20:34:35,  1.15it/s] 

Epoch [15100/100000]
Train Loss: 10.2259
test Loss: 10.1082, test Accuracy: 0.0000
test F1 (micro): 0.0000, test F1 (macro): 0.0000


 15%|█▌        | 15198/100000 [4:34:18<20:07:23,  1.17it/s] 

Epoch [15200/100000]
Train Loss: 10.2681
test Loss: 10.1124, test Accuracy: 0.0000
test F1 (micro): 0.0000, test F1 (macro): 0.0000


 15%|█▌        | 15298/100000 [4:36:06<20:08:36,  1.17it/s] 

Epoch [15300/100000]
Train Loss: 10.2144
test Loss: 10.1167, test Accuracy: 0.0000
test F1 (micro): 0.0000, test F1 (macro): 0.0000


 15%|█▌        | 15398/100000 [4:37:55<20:14:43,  1.16it/s] 

Epoch [15400/100000]
Train Loss: 10.0875
test Loss: 10.0870, test Accuracy: 0.0000
test F1 (micro): 0.0000, test F1 (macro): 0.0000


 15%|█▌        | 15498/100000 [4:39:41<20:03:39,  1.17it/s] 

Epoch [15500/100000]
Train Loss: 10.5019
test Loss: 10.1593, test Accuracy: 0.0000
test F1 (micro): 0.0000, test F1 (macro): 0.0000


 16%|█▌        | 15598/100000 [4:41:31<20:03:32,  1.17it/s] 

Epoch [15600/100000]
Train Loss: 10.2275
test Loss: 10.0901, test Accuracy: 0.0000
test F1 (micro): 0.0000, test F1 (macro): 0.0000


 16%|█▌        | 15698/100000 [4:43:18<20:05:31,  1.17it/s] 

Epoch [15700/100000]
Train Loss: 10.1456
test Loss: 10.1090, test Accuracy: 0.0000
test F1 (micro): 0.0000, test F1 (macro): 0.0000


 16%|█▌        | 15798/100000 [4:45:08<20:03:04,  1.17it/s] 

Epoch [15800/100000]
Train Loss: 10.2332
test Loss: 10.1021, test Accuracy: 0.0000
test F1 (micro): 0.0000, test F1 (macro): 0.0000


 16%|█▌        | 15898/100000 [4:46:56<20:02:55,  1.17it/s] 

Epoch [15900/100000]
Train Loss: 10.3044
test Loss: 10.1308, test Accuracy: 0.0000
test F1 (micro): 0.0000, test F1 (macro): 0.0000


 16%|█▌        | 15998/100000 [4:48:41<19:47:22,  1.18it/s] 

Epoch [16000/100000]
Train Loss: 10.3263
test Loss: 10.1287, test Accuracy: 0.0000
test F1 (micro): 0.0000, test F1 (macro): 0.0000


 16%|█▌        | 16098/100000 [4:50:28<19:58:29,  1.17it/s] 

Epoch [16100/100000]
Train Loss: 10.1703
test Loss: 10.1197, test Accuracy: 0.0000
test F1 (micro): 0.0000, test F1 (macro): 0.0000


 16%|█▌        | 16198/100000 [4:52:17<19:55:24,  1.17it/s] 

Epoch [16200/100000]
Train Loss: 10.3130
test Loss: 10.1161, test Accuracy: 0.0000
test F1 (micro): 0.0000, test F1 (macro): 0.0000


 16%|█▋        | 16298/100000 [4:54:07<19:44:41,  1.18it/s] 

Epoch [16300/100000]
Train Loss: 10.3831
test Loss: 10.1375, test Accuracy: 0.0000
test F1 (micro): 0.0000, test F1 (macro): 0.0000


 16%|█▋        | 16398/100000 [4:55:54<19:56:05,  1.16it/s] 

Epoch [16400/100000]
Train Loss: 10.4414
test Loss: 10.0923, test Accuracy: 0.0000
test F1 (micro): 0.0000, test F1 (macro): 0.0000


 16%|█▋        | 16498/100000 [4:57:38<19:50:44,  1.17it/s] 

Epoch [16500/100000]
Train Loss: 10.3328
test Loss: 10.0981, test Accuracy: 0.0000
test F1 (micro): 0.0000, test F1 (macro): 0.0000


 17%|█▋        | 16598/100000 [4:59:33<19:49:06,  1.17it/s] 

Epoch [16600/100000]
Train Loss: 10.3343
test Loss: 10.0891, test Accuracy: 0.0000
test F1 (micro): 0.0000, test F1 (macro): 0.0000


 17%|█▋        | 16698/100000 [5:01:20<19:46:37,  1.17it/s] 

Epoch [16700/100000]
Train Loss: 10.3932
test Loss: 10.0900, test Accuracy: 0.0000
test F1 (micro): 0.0000, test F1 (macro): 0.0000


 17%|█▋        | 16798/100000 [5:03:06<19:46:44,  1.17it/s] 

Epoch [16800/100000]
Train Loss: 10.2173
test Loss: 10.0836, test Accuracy: 0.0000
test F1 (micro): 0.0000, test F1 (macro): 0.0000


 17%|█▋        | 16898/100000 [5:04:55<19:41:46,  1.17it/s] 

Epoch [16900/100000]
Train Loss: 10.2809
test Loss: 10.0939, test Accuracy: 0.0000
test F1 (micro): 0.0000, test F1 (macro): 0.0000


 55%|█████▍    | 54698/100000 [16:28:18<10:43:00,  1.17it/s]

Epoch [54700/100000]
Train Loss: 10.6800
test Loss: 10.2332, test Accuracy: 0.0000
test F1 (micro): 0.0000, test F1 (macro): 0.0000


 55%|█████▍    | 54798/100000 [16:30:04<10:41:18,  1.17it/s]

Epoch [54800/100000]
Train Loss: 10.8184
test Loss: 10.2363, test Accuracy: 0.0000
test F1 (micro): 0.0000, test F1 (macro): 0.0000


 55%|█████▍    | 54898/100000 [16:31:53<10:42:53,  1.17it/s]

Epoch [54900/100000]
Train Loss: 10.7230
test Loss: 10.2428, test Accuracy: 0.0000
test F1 (micro): 0.0000, test F1 (macro): 0.0000


 55%|█████▍    | 54998/100000 [16:33:39<10:41:50,  1.17it/s]

Epoch [55000/100000]
Train Loss: 10.8448
test Loss: 10.2459, test Accuracy: 0.0000
test F1 (micro): 0.0000, test F1 (macro): 0.0000


 55%|█████▌    | 55098/100000 [16:35:25<10:40:37,  1.17it/s]

Epoch [55100/100000]
Train Loss: 10.8187
test Loss: 10.2491, test Accuracy: 0.0000
test F1 (micro): 0.0000, test F1 (macro): 0.0000


 55%|█████▌    | 55198/100000 [16:37:18<10:35:28,  1.18it/s] 

Epoch [55200/100000]
Train Loss: 10.6867
test Loss: 10.2582, test Accuracy: 0.0000
test F1 (micro): 0.0000, test F1 (macro): 0.0000


 55%|█████▌    | 55298/100000 [16:39:04<10:32:49,  1.18it/s]

Epoch [55300/100000]
Train Loss: 10.6648
test Loss: 10.2632, test Accuracy: 0.0000
test F1 (micro): 0.0000, test F1 (macro): 0.0000


 55%|█████▌    | 55398/100000 [16:40:52<10:32:51,  1.17it/s]

Epoch [55400/100000]
Train Loss: 10.8662
test Loss: 10.2655, test Accuracy: 0.0000
test F1 (micro): 0.0000, test F1 (macro): 0.0000


 55%|█████▌    | 55498/100000 [16:42:40<10:30:08,  1.18it/s]

Epoch [55500/100000]
Train Loss: 10.8319
test Loss: 10.2728, test Accuracy: 0.0000
test F1 (micro): 0.0000, test F1 (macro): 0.0000


 56%|█████▌    | 55598/100000 [16:44:27<10:30:39,  1.17it/s]

Epoch [55600/100000]
Train Loss: 10.5894
test Loss: 10.2699, test Accuracy: 0.0000
test F1 (micro): 0.0000, test F1 (macro): 0.0000


 56%|█████▌    | 55698/100000 [16:46:18<10:29:02,  1.17it/s] 

Epoch [55700/100000]
Train Loss: 10.5876
test Loss: 10.2730, test Accuracy: 0.0000
test F1 (micro): 0.0000, test F1 (macro): 0.0000


 56%|█████▌    | 55798/100000 [16:48:13<10:27:35,  1.17it/s] 

Epoch [55800/100000]
Train Loss: 10.7381
test Loss: 10.2768, test Accuracy: 0.0000
test F1 (micro): 0.0000, test F1 (macro): 0.0000


 56%|█████▌    | 55898/100000 [16:50:06<10:24:03,  1.18it/s] 

Epoch [55900/100000]
Train Loss: 10.5894
test Loss: 10.2749, test Accuracy: 0.0000
test F1 (micro): 0.0000, test F1 (macro): 0.0000


 56%|█████▌    | 55998/100000 [16:51:55<10:25:49,  1.17it/s]

Epoch [56000/100000]
Train Loss: 10.8644
test Loss: 10.2741, test Accuracy: 0.0000
test F1 (micro): 0.0000, test F1 (macro): 0.0000


 56%|█████▌    | 56098/100000 [16:53:49<10:23:52,  1.17it/s] 

Epoch [56100/100000]
Train Loss: 10.6020
test Loss: 10.2675, test Accuracy: 0.0000
test F1 (micro): 0.0000, test F1 (macro): 0.0000


 56%|█████▌    | 56198/100000 [16:55:38<10:19:05,  1.18it/s]

Epoch [56200/100000]
Train Loss: 10.7712
test Loss: 10.2682, test Accuracy: 0.0000
test F1 (micro): 0.0000, test F1 (macro): 0.0000


 56%|█████▋    | 56298/100000 [16:57:24<10:20:56,  1.17it/s]

Epoch [56300/100000]
Train Loss: 10.6139
test Loss: 10.2655, test Accuracy: 0.0000
test F1 (micro): 0.0000, test F1 (macro): 0.0000


 56%|█████▋    | 56398/100000 [16:59:19<10:19:05,  1.17it/s] 

Epoch [56400/100000]
Train Loss: 11.1227
test Loss: 10.2644, test Accuracy: 0.0000
test F1 (micro): 0.0000, test F1 (macro): 0.0000


 56%|█████▋    | 56498/100000 [17:01:04<10:18:08,  1.17it/s]

Epoch [56500/100000]
Train Loss: 10.7543
test Loss: 10.2628, test Accuracy: 0.0000
test F1 (micro): 0.0000, test F1 (macro): 0.0000


 57%|█████▋    | 56598/100000 [17:02:51<10:18:10,  1.17it/s]

Epoch [56600/100000]
Train Loss: 10.8055
test Loss: 10.2730, test Accuracy: 0.0000
test F1 (micro): 0.0000, test F1 (macro): 0.0000


 57%|█████▋    | 56698/100000 [17:04:37<10:14:56,  1.17it/s]

Epoch [56700/100000]
Train Loss: 10.8270
test Loss: 10.2560, test Accuracy: 0.0000
test F1 (micro): 0.0000, test F1 (macro): 0.0000


 57%|█████▋    | 56798/100000 [17:06:24<10:13:53,  1.17it/s]

Epoch [56800/100000]
Train Loss: 10.9367
test Loss: 10.2463, test Accuracy: 0.0000
test F1 (micro): 0.0000, test F1 (macro): 0.0000


 57%|█████▋    | 56898/100000 [17:08:11<10:12:36,  1.17it/s]

Epoch [56900/100000]
Train Loss: 10.7645
test Loss: 10.2521, test Accuracy: 0.0000
test F1 (micro): 0.0000, test F1 (macro): 0.0000


 57%|█████▋    | 56998/100000 [17:09:57<10:11:49,  1.17it/s]

Epoch [57000/100000]
Train Loss: 10.6957
test Loss: 10.2581, test Accuracy: 0.0000
test F1 (micro): 0.0000, test F1 (macro): 0.0000


 57%|█████▋    | 57098/100000 [17:11:43<10:09:22,  1.17it/s]

Epoch [57100/100000]
Train Loss: 10.5851
test Loss: 10.2669, test Accuracy: 0.0000
test F1 (micro): 0.0000, test F1 (macro): 0.0000


 57%|█████▋    | 57198/100000 [17:13:33<10:04:22,  1.18it/s]

Epoch [57200/100000]
Train Loss: 10.6629
test Loss: 10.2736, test Accuracy: 0.0000
test F1 (micro): 0.0000, test F1 (macro): 0.0000


 57%|█████▋    | 57298/100000 [17:15:22<10:08:11,  1.17it/s]

Epoch [57300/100000]
Train Loss: 10.9352
test Loss: 10.2824, test Accuracy: 0.0000
test F1 (micro): 0.0000, test F1 (macro): 0.0000


 57%|█████▋    | 57398/100000 [17:17:07<13:26:46,  1.14s/it]

Epoch [57400/100000]
Train Loss: 10.7548
test Loss: 10.2723, test Accuracy: 0.0000
test F1 (micro): 0.0000, test F1 (macro): 0.0000


 57%|█████▋    | 57498/100000 [17:18:58<10:03:59,  1.17it/s] 

Epoch [57500/100000]
Train Loss: 10.9737
test Loss: 10.2689, test Accuracy: 0.0000
test F1 (micro): 0.0000, test F1 (macro): 0.0000


 58%|█████▊    | 57598/100000 [17:20:59<10:02:07,  1.17it/s] 

Epoch [57600/100000]
Train Loss: 10.8730
test Loss: 10.2694, test Accuracy: 0.0000
test F1 (micro): 0.0000, test F1 (macro): 0.0000


 58%|█████▊    | 57631/100000 [17:21:51<10:03:36,  1.17it/s]IOPub message rate exceeded.
The Jupyter server will temporarily stop sending output
to the client in order to avoid crashing it.
To change this limit, set the config variable
`--ServerApp.iopub_msg_rate_limit`.

Current values:
ServerApp.iopub_msg_rate_limit=1000.0 (msgs/sec)
ServerApp.rate_limit_window=3.0 (secs)

 58%|█████▊    | 57698/100000 [17:22:48<10:03:39,  1.17it/s]

Epoch [57700/100000]
Train Loss: 10.7168
test Loss: 10.2712, test Accuracy: 0.0000
test F1 (micro): 0.0000, test F1 (macro): 0.0000


 58%|█████▊    | 57798/100000 [17:24:36<9:59:53,  1.17it/s] 

Epoch [57800/100000]
Train Loss: 11.0049
test Loss: 10.2744, test Accuracy: 0.0000
test F1 (micro): 0.0000, test F1 (macro): 0.0000


 58%|█████▊    | 57898/100000 [17:26:24<9:59:20,  1.17it/s] 

Epoch [57900/100000]
Train Loss: 10.6853
test Loss: 10.2806, test Accuracy: 0.0000
test F1 (micro): 0.0000, test F1 (macro): 0.0000


 58%|█████▊    | 57998/100000 [17:28:14<9:57:27,  1.17it/s] 

Epoch [58000/100000]
Train Loss: 10.9313
test Loss: 10.2825, test Accuracy: 0.0000
test F1 (micro): 0.0000, test F1 (macro): 0.0000


 58%|█████▊    | 58098/100000 [17:30:01<9:58:14,  1.17it/s] 

Epoch [58100/100000]
Train Loss: 10.6521
test Loss: 10.2907, test Accuracy: 0.0000
test F1 (micro): 0.0000, test F1 (macro): 0.0000


 58%|█████▊    | 58198/100000 [17:31:47<9:54:42,  1.17it/s] 

Epoch [58200/100000]
Train Loss: 10.9455
test Loss: 10.2747, test Accuracy: 0.0000
test F1 (micro): 0.0000, test F1 (macro): 0.0000


 58%|█████▊    | 58298/100000 [17:33:40<9:51:45,  1.17it/s]  

Epoch [58300/100000]
Train Loss: 10.8063
test Loss: 10.2731, test Accuracy: 0.0000
test F1 (micro): 0.0000, test F1 (macro): 0.0000


 58%|█████▊    | 58398/100000 [17:35:26<9:51:45,  1.17it/s] 

Epoch [58400/100000]
Train Loss: 11.0033
test Loss: 10.2660, test Accuracy: 0.0000
test F1 (micro): 0.0000, test F1 (macro): 0.0000


 58%|█████▊    | 58498/100000 [17:37:21<9:50:19,  1.17it/s]  

Epoch [58500/100000]
Train Loss: 10.5239
test Loss: 10.2649, test Accuracy: 0.0000
test F1 (micro): 0.0000, test F1 (macro): 0.0000


 59%|█████▊    | 58698/100000 [17:40:57<9:47:53,  1.17it/s] 

Epoch [58700/100000]
Train Loss: 10.7430
test Loss: 10.2624, test Accuracy: 0.0000
test F1 (micro): 0.0000, test F1 (macro): 0.0000


 59%|█████▉    | 58798/100000 [17:42:44<9:48:04,  1.17it/s] 

Epoch [58800/100000]
Train Loss: 10.7497
test Loss: 10.2758, test Accuracy: 0.0000
test F1 (micro): 0.0000, test F1 (macro): 0.0000


 59%|█████▉    | 58898/100000 [17:44:33<9:45:19,  1.17it/s] 

Epoch [58900/100000]
Train Loss: 10.8886
test Loss: 10.2813, test Accuracy: 0.0000
test F1 (micro): 0.0000, test F1 (macro): 0.0000


 59%|█████▉    | 58998/100000 [17:46:19<9:44:50,  1.17it/s] 

Epoch [59000/100000]
Train Loss: 10.9599
test Loss: 10.2867, test Accuracy: 0.0000
test F1 (micro): 0.0000, test F1 (macro): 0.0000


 59%|█████▉    | 59098/100000 [17:48:05<9:45:35,  1.16it/s] 

Epoch [59100/100000]
Train Loss: 10.7295
test Loss: 10.3030, test Accuracy: 0.0000
test F1 (micro): 0.0000, test F1 (macro): 0.0000


 59%|█████▉    | 59198/100000 [17:49:51<9:39:47,  1.17it/s] 

Epoch [59200/100000]
Train Loss: 10.8504
test Loss: 10.2982, test Accuracy: 0.0000
test F1 (micro): 0.0000, test F1 (macro): 0.0000


 59%|█████▉    | 59298/100000 [17:51:42<9:36:32,  1.18it/s] 

Epoch [59300/100000]
Train Loss: 10.9713
test Loss: 10.3098, test Accuracy: 0.0000
test F1 (micro): 0.0000, test F1 (macro): 0.0000


 59%|█████▉    | 59398/100000 [17:53:27<9:33:35,  1.18it/s] 

Epoch [59400/100000]
Train Loss: 10.7640
test Loss: 10.3013, test Accuracy: 0.0000
test F1 (micro): 0.0000, test F1 (macro): 0.0000


 59%|█████▉    | 59498/100000 [17:55:12<9:32:10,  1.18it/s] 

Epoch [59500/100000]
Train Loss: 10.7323
test Loss: 10.3043, test Accuracy: 0.0000
test F1 (micro): 0.0000, test F1 (macro): 0.0000


 60%|█████▉    | 59598/100000 [17:56:59<9:34:22,  1.17it/s] 

Epoch [59600/100000]
Train Loss: 10.5696
test Loss: 10.3117, test Accuracy: 0.0000
test F1 (micro): 0.0000, test F1 (macro): 0.0000


 60%|█████▉    | 59698/100000 [17:58:45<9:33:05,  1.17it/s] 

Epoch [59700/100000]
Train Loss: 10.6906
test Loss: 10.3176, test Accuracy: 0.0000
test F1 (micro): 0.0000, test F1 (macro): 0.0000


 60%|█████▉    | 59798/100000 [18:00:31<9:29:46,  1.18it/s] 

Epoch [59800/100000]
Train Loss: 11.1062
test Loss: 10.3144, test Accuracy: 0.0000
test F1 (micro): 0.0000, test F1 (macro): 0.0000


 60%|█████▉    | 59898/100000 [18:02:22<9:30:21,  1.17it/s] 

Epoch [59900/100000]
Train Loss: 10.8275
test Loss: 10.2988, test Accuracy: 0.0000
test F1 (micro): 0.0000, test F1 (macro): 0.0000


 60%|█████▉    | 59998/100000 [18:04:10<9:28:54,  1.17it/s] 

Epoch [60000/100000]
Train Loss: 10.7924
test Loss: 10.2853, test Accuracy: 0.0000
test F1 (micro): 0.0000, test F1 (macro): 0.0000


 60%|██████    | 60098/100000 [18:05:56<9:25:52,  1.18it/s] 

Epoch [60100/100000]
Train Loss: 10.8168
test Loss: 10.2900, test Accuracy: 0.0000
test F1 (micro): 0.0000, test F1 (macro): 0.0000


 60%|██████    | 60198/100000 [18:07:48<9:28:29,  1.17it/s] 

Epoch [60200/100000]
Train Loss: 10.8674
test Loss: 10.2874, test Accuracy: 0.0000
test F1 (micro): 0.0000, test F1 (macro): 0.0000


 60%|██████    | 60298/100000 [18:09:38<9:23:47,  1.17it/s] 

Epoch [60300/100000]
Train Loss: 10.8455
test Loss: 10.2865, test Accuracy: 0.0000
test F1 (micro): 0.0000, test F1 (macro): 0.0000


 60%|██████    | 60398/100000 [18:11:24<9:22:32,  1.17it/s] 

Epoch [60400/100000]
Train Loss: 10.5248
test Loss: 10.2820, test Accuracy: 0.0000
test F1 (micro): 0.0000, test F1 (macro): 0.0000


 60%|██████    | 60498/100000 [18:13:11<9:20:29,  1.17it/s] 

Epoch [60500/100000]
Train Loss: 11.0790
test Loss: 10.2866, test Accuracy: 0.0000
test F1 (micro): 0.0000, test F1 (macro): 0.0000


 61%|██████    | 60598/100000 [18:15:03<9:19:54,  1.17it/s]  

Epoch [60600/100000]
Train Loss: 10.8817
test Loss: 10.2927, test Accuracy: 0.0000
test F1 (micro): 0.0000, test F1 (macro): 0.0000


 61%|██████    | 60798/100000 [18:18:40<9:17:07,  1.17it/s] 

Epoch [60800/100000]
Train Loss: 10.8234
test Loss: 10.2978, test Accuracy: 0.0000
test F1 (micro): 0.0000, test F1 (macro): 0.0000


 61%|██████    | 60898/100000 [18:20:31<9:13:24,  1.18it/s] 

Epoch [60900/100000]
Train Loss: 10.9520
test Loss: 10.3011, test Accuracy: 0.0000
test F1 (micro): 0.0000, test F1 (macro): 0.0000


 61%|██████    | 60998/100000 [18:22:22<9:15:42,  1.17it/s] 

Epoch [61000/100000]
Train Loss: 11.0879
test Loss: 10.3001, test Accuracy: 0.0000
test F1 (micro): 0.0000, test F1 (macro): 0.0000


 61%|██████    | 61098/100000 [18:24:08<9:14:52,  1.17it/s] 

Epoch [61100/100000]
Train Loss: 10.8336
test Loss: 10.2980, test Accuracy: 0.0000
test F1 (micro): 0.0000, test F1 (macro): 0.0000


 61%|██████    | 61198/100000 [18:26:01<9:10:02,  1.18it/s] 

Epoch [61200/100000]
Train Loss: 10.9054
test Loss: 10.2875, test Accuracy: 0.0000
test F1 (micro): 0.0000, test F1 (macro): 0.0000


 61%|██████▏   | 61298/100000 [18:27:46<9:07:50,  1.18it/s] 

Epoch [61300/100000]
Train Loss: 10.7067
test Loss: 10.2882, test Accuracy: 0.0000
test F1 (micro): 0.0000, test F1 (macro): 0.0000


 61%|██████▏   | 61398/100000 [18:29:34<9:12:17,  1.16it/s] 

Epoch [61400/100000]
Train Loss: 11.0296
test Loss: 10.2847, test Accuracy: 0.0000
test F1 (micro): 0.0000, test F1 (macro): 0.0000


 61%|██████▏   | 61498/100000 [18:31:27<9:08:34,  1.17it/s] 

Epoch [61500/100000]
Train Loss: 11.0405
test Loss: 10.2832, test Accuracy: 0.0000
test F1 (micro): 0.0000, test F1 (macro): 0.0000


 62%|██████▏   | 61598/100000 [18:33:16<9:03:53,  1.18it/s] 

Epoch [61600/100000]
Train Loss: 10.8631
test Loss: 10.2629, test Accuracy: 0.0000
test F1 (micro): 0.0000, test F1 (macro): 0.0000


 62%|██████▏   | 61698/100000 [18:35:07<9:04:06,  1.17it/s] 

Epoch [61700/100000]
Train Loss: 10.7250
test Loss: 10.2540, test Accuracy: 0.0000
test F1 (micro): 0.0000, test F1 (macro): 0.0000


 62%|██████▏   | 61798/100000 [18:36:52<9:04:04,  1.17it/s] 

Epoch [61800/100000]
Train Loss: 10.9204
test Loss: 10.2653, test Accuracy: 0.0000
test F1 (micro): 0.0000, test F1 (macro): 0.0000


 62%|██████▏   | 61898/100000 [18:38:46<8:59:46,  1.18it/s]  

Epoch [61900/100000]
Train Loss: 10.8448
test Loss: 10.2657, test Accuracy: 0.0000
test F1 (micro): 0.0000, test F1 (macro): 0.0000


 62%|██████▏   | 61998/100000 [18:40:38<8:56:05,  1.18it/s] 

Epoch [62000/100000]
Train Loss: 10.8463
test Loss: 10.2793, test Accuracy: 0.0000
test F1 (micro): 0.0000, test F1 (macro): 0.0000


 62%|██████▏   | 62098/100000 [18:42:26<9:04:22,  1.16it/s] 

Epoch [62100/100000]
Train Loss: 10.6437
test Loss: 10.2882, test Accuracy: 0.0000
test F1 (micro): 0.0000, test F1 (macro): 0.0000


 62%|██████▏   | 62198/100000 [18:44:11<8:56:37,  1.17it/s] 

Epoch [62200/100000]
Train Loss: 10.9923
test Loss: 10.2914, test Accuracy: 0.0000
test F1 (micro): 0.0000, test F1 (macro): 0.0000


 62%|██████▏   | 62298/100000 [18:45:55<8:53:27,  1.18it/s] 

Epoch [62300/100000]
Train Loss: 10.9693
test Loss: 10.2914, test Accuracy: 0.0000
test F1 (micro): 0.0000, test F1 (macro): 0.0000


 62%|██████▏   | 62398/100000 [18:47:43<8:55:46,  1.17it/s] 

Epoch [62400/100000]
Train Loss: 11.0782
test Loss: 10.3004, test Accuracy: 0.0000
test F1 (micro): 0.0000, test F1 (macro): 0.0000


 62%|██████▏   | 62498/100000 [18:49:32<8:53:16,  1.17it/s] 

Epoch [62500/100000]
Train Loss: 10.8707
test Loss: 10.3085, test Accuracy: 0.0000
test F1 (micro): 0.0000, test F1 (macro): 0.0000


 63%|██████▎   | 62598/100000 [18:51:18<8:52:05,  1.17it/s] 

Epoch [62600/100000]
Train Loss: 11.0078
test Loss: 10.3128, test Accuracy: 0.0000
test F1 (micro): 0.0000, test F1 (macro): 0.0000


 63%|██████▎   | 62698/100000 [18:53:07<8:48:37,  1.18it/s] 

Epoch [62700/100000]
Train Loss: 10.7935
test Loss: 10.3194, test Accuracy: 0.0000
test F1 (micro): 0.0000, test F1 (macro): 0.0000


 63%|██████▎   | 62798/100000 [18:54:55<8:52:56,  1.16it/s] 

Epoch [62800/100000]
Train Loss: 10.6752
test Loss: 10.3137, test Accuracy: 0.0000
test F1 (micro): 0.0000, test F1 (macro): 0.0000


 63%|██████▎   | 62898/100000 [18:56:44<8:48:43,  1.17it/s] 

Epoch [62900/100000]
Train Loss: 11.0366
test Loss: 10.3108, test Accuracy: 0.0000
test F1 (micro): 0.0000, test F1 (macro): 0.0000


 63%|██████▎   | 62998/100000 [18:58:33<8:41:09,  1.18it/s] 

Epoch [63000/100000]
Train Loss: 10.8691
test Loss: 10.3153, test Accuracy: 0.0000
test F1 (micro): 0.0000, test F1 (macro): 0.0000


 63%|██████▎   | 63098/100000 [19:00:18<8:44:03,  1.17it/s] 

Epoch [63100/100000]
Train Loss: 10.8628
test Loss: 10.3131, test Accuracy: 0.0000
test F1 (micro): 0.0000, test F1 (macro): 0.0000


 63%|██████▎   | 63198/100000 [19:02:04<8:42:42,  1.17it/s] 

Epoch [63200/100000]
Train Loss: 10.9903
test Loss: 10.3124, test Accuracy: 0.0000
test F1 (micro): 0.0000, test F1 (macro): 0.0000


 63%|██████▎   | 63298/100000 [19:03:50<8:42:53,  1.17it/s] 

Epoch [63300/100000]
Train Loss: 10.8535
test Loss: 10.3198, test Accuracy: 0.0000
test F1 (micro): 0.0000, test F1 (macro): 0.0000


 63%|██████▎   | 63398/100000 [19:05:36<8:40:22,  1.17it/s] 

Epoch [63400/100000]
Train Loss: 10.9760
test Loss: 10.3261, test Accuracy: 0.0000
test F1 (micro): 0.0000, test F1 (macro): 0.0000


 63%|██████▎   | 63498/100000 [19:07:24<8:40:02,  1.17it/s] 

Epoch [63500/100000]
Train Loss: 10.8154
test Loss: 10.3307, test Accuracy: 0.0000
test F1 (micro): 0.0000, test F1 (macro): 0.0000


 64%|██████▎   | 63598/100000 [19:09:14<8:36:50,  1.17it/s] 

Epoch [63600/100000]
Train Loss: 10.8001
test Loss: 10.3340, test Accuracy: 0.0000
test F1 (micro): 0.0000, test F1 (macro): 0.0000


 64%|██████▎   | 63698/100000 [19:11:05<8:35:03,  1.17it/s] 

Epoch [63700/100000]
Train Loss: 10.9333
test Loss: 10.3392, test Accuracy: 0.0000
test F1 (micro): 0.0000, test F1 (macro): 0.0000


 64%|██████▍   | 63798/100000 [19:12:51<8:34:25,  1.17it/s] 

Epoch [63800/100000]
Train Loss: 10.8556
test Loss: 10.3430, test Accuracy: 0.0000
test F1 (micro): 0.0000, test F1 (macro): 0.0000


 64%|██████▍   | 63898/100000 [19:14:45<8:31:53,  1.18it/s] 

Epoch [63900/100000]
Train Loss: 10.9405
test Loss: 10.3468, test Accuracy: 0.0000
test F1 (micro): 0.0000, test F1 (macro): 0.0000


 64%|██████▍   | 63998/100000 [19:16:37<8:33:15,  1.17it/s] 

Epoch [64000/100000]
Train Loss: 11.0868
test Loss: 10.3503, test Accuracy: 0.0000
test F1 (micro): 0.0000, test F1 (macro): 0.0000


 64%|██████▍   | 64098/100000 [19:18:22<8:31:58,  1.17it/s] 

Epoch [64100/100000]
Train Loss: 10.8514
test Loss: 10.3523, test Accuracy: 0.0000
test F1 (micro): 0.0000, test F1 (macro): 0.0000


 64%|██████▍   | 64198/100000 [19:20:11<8:30:04,  1.17it/s] 

Epoch [64200/100000]
Train Loss: 10.7574
test Loss: 10.3580, test Accuracy: 0.0000
test F1 (micro): 0.0000, test F1 (macro): 0.0000


 64%|██████▍   | 64298/100000 [19:21:58<8:25:17,  1.18it/s] 

Epoch [64300/100000]
Train Loss: 10.9051
test Loss: 10.3594, test Accuracy: 0.0000
test F1 (micro): 0.0000, test F1 (macro): 0.0000


 64%|██████▍   | 64498/100000 [19:25:32<8:24:26,  1.17it/s] 

Epoch [64500/100000]
Train Loss: 10.8949
test Loss: 10.3639, test Accuracy: 0.0000
test F1 (micro): 0.0000, test F1 (macro): 0.0000


 65%|██████▍   | 64598/100000 [19:27:19<8:23:16,  1.17it/s] 

Epoch [64600/100000]
Train Loss: 10.9919
test Loss: 10.3663, test Accuracy: 0.0000
test F1 (micro): 0.0000, test F1 (macro): 0.0000


 65%|██████▍   | 64698/100000 [19:29:09<8:21:17,  1.17it/s] 

Epoch [64700/100000]
Train Loss: 10.8488
test Loss: 10.3687, test Accuracy: 0.0000
test F1 (micro): 0.0000, test F1 (macro): 0.0000


 65%|██████▍   | 64798/100000 [19:30:57<8:20:24,  1.17it/s] 

Epoch [64800/100000]
Train Loss: 10.8307
test Loss: 10.3706, test Accuracy: 0.0000
test F1 (micro): 0.0000, test F1 (macro): 0.0000


 65%|██████▍   | 64898/100000 [19:32:45<8:16:43,  1.18it/s] 

Epoch [64900/100000]
Train Loss: 11.0139
test Loss: 10.3719, test Accuracy: 0.0000
test F1 (micro): 0.0000, test F1 (macro): 0.0000


 65%|██████▍   | 64998/100000 [19:34:34<8:19:10,  1.17it/s] 

Epoch [65000/100000]
Train Loss: 10.8012
test Loss: 10.3739, test Accuracy: 0.0000
test F1 (micro): 0.0000, test F1 (macro): 0.0000


 65%|██████▌   | 65098/100000 [19:36:22<8:15:15,  1.17it/s] 

Epoch [65100/100000]
Train Loss: 10.8355
test Loss: 10.3762, test Accuracy: 0.0000
test F1 (micro): 0.0000, test F1 (macro): 0.0000


 65%|██████▌   | 65198/100000 [19:38:13<8:15:28,  1.17it/s] 

Epoch [65200/100000]
Train Loss: 11.0790
test Loss: 10.3784, test Accuracy: 0.0000
test F1 (micro): 0.0000, test F1 (macro): 0.0000


 65%|██████▌   | 65298/100000 [19:39:59<8:12:06,  1.18it/s] 

Epoch [65300/100000]
Train Loss: 10.8366
test Loss: 10.3806, test Accuracy: 0.0000
test F1 (micro): 0.0000, test F1 (macro): 0.0000


 65%|██████▌   | 65398/100000 [19:41:50<8:09:28,  1.18it/s] 

Epoch [65400/100000]
Train Loss: 11.0202
test Loss: 10.3816, test Accuracy: 0.0000
test F1 (micro): 0.0000, test F1 (macro): 0.0000


 65%|██████▌   | 65498/100000 [19:43:37<8:10:45,  1.17it/s] 

Epoch [65500/100000]
Train Loss: 10.8622
test Loss: 10.3827, test Accuracy: 0.0000
test F1 (micro): 0.0000, test F1 (macro): 0.0000


 66%|██████▌   | 65598/100000 [19:45:29<8:08:30,  1.17it/s] 

Epoch [65600/100000]
Train Loss: 10.9407
test Loss: 10.3833, test Accuracy: 0.0000
test F1 (micro): 0.0000, test F1 (macro): 0.0000


 66%|██████▌   | 65698/100000 [19:47:16<8:07:34,  1.17it/s] 

Epoch [65700/100000]
Train Loss: 11.0641
test Loss: 10.3848, test Accuracy: 0.0000
test F1 (micro): 0.0000, test F1 (macro): 0.0000


 66%|██████▌   | 65798/100000 [19:49:07<8:05:24,  1.17it/s] 

Epoch [65800/100000]
Train Loss: 10.9410
test Loss: 10.3867, test Accuracy: 0.0000
test F1 (micro): 0.0000, test F1 (macro): 0.0000


 66%|██████▌   | 65898/100000 [19:50:54<8:05:21,  1.17it/s] 

Epoch [65900/100000]
Train Loss: 10.9099
test Loss: 10.3878, test Accuracy: 0.0000
test F1 (micro): 0.0000, test F1 (macro): 0.0000


 66%|██████▌   | 65998/100000 [19:52:46<8:04:05,  1.17it/s] 

Epoch [66000/100000]
Train Loss: 10.7981
test Loss: 10.3882, test Accuracy: 0.0000
test F1 (micro): 0.0000, test F1 (macro): 0.0000


 66%|██████▌   | 66098/100000 [19:54:34<8:00:21,  1.18it/s] 

Epoch [66100/100000]
Train Loss: 10.9973
test Loss: 10.3911, test Accuracy: 0.0000
test F1 (micro): 0.0000, test F1 (macro): 0.0000


 66%|██████▌   | 66198/100000 [19:56:30<8:01:37,  1.17it/s] 

Epoch [66200/100000]
Train Loss: 11.0171
test Loss: 10.3907, test Accuracy: 0.0000
test F1 (micro): 0.0000, test F1 (macro): 0.0000


 66%|██████▋   | 66298/100000 [19:58:19<7:59:14,  1.17it/s] 

Epoch [66300/100000]
Train Loss: 10.9533
test Loss: 10.3907, test Accuracy: 0.0000
test F1 (micro): 0.0000, test F1 (macro): 0.0000


 66%|██████▋   | 66398/100000 [20:00:14<7:59:06,  1.17it/s] 

Epoch [66400/100000]
Train Loss: 11.0184
test Loss: 10.3916, test Accuracy: 0.0000
test F1 (micro): 0.0000, test F1 (macro): 0.0000


 71%|███████   | 71198/100000 [21:26:47<6:43:19,  1.19it/s] 

Epoch [71200/100000]
Train Loss: 10.7082
test Loss: 10.4285, test Accuracy: 0.0000
test F1 (micro): 0.0000, test F1 (macro): 0.0000


 71%|███████▏  | 71298/100000 [21:28:33<6:42:26,  1.19it/s] 

Epoch [71300/100000]
Train Loss: 10.9747
test Loss: 10.4288, test Accuracy: 0.0000
test F1 (micro): 0.0000, test F1 (macro): 0.0000


 71%|███████▏  | 71398/100000 [21:30:18<6:39:59,  1.19it/s] 

Epoch [71400/100000]
Train Loss: 11.0484
test Loss: 10.4292, test Accuracy: 0.0000
test F1 (micro): 0.0000, test F1 (macro): 0.0000


 71%|███████▏  | 71498/100000 [21:32:03<6:39:00,  1.19it/s] 

Epoch [71500/100000]
Train Loss: 10.8250
test Loss: 10.4295, test Accuracy: 0.0000
test F1 (micro): 0.0000, test F1 (macro): 0.0000


 72%|███████▏  | 71598/100000 [21:33:48<6:37:50,  1.19it/s] 

Epoch [71600/100000]
Train Loss: 10.9892
test Loss: 10.4298, test Accuracy: 0.0000
test F1 (micro): 0.0000, test F1 (macro): 0.0000


 72%|███████▏  | 71698/100000 [21:35:36<6:36:45,  1.19it/s] 

Epoch [71700/100000]
Train Loss: 11.2356
test Loss: 10.4302, test Accuracy: 0.0000
test F1 (micro): 0.0000, test F1 (macro): 0.0000


 72%|███████▏  | 71798/100000 [21:37:26<6:34:18,  1.19it/s] 

Epoch [71800/100000]
Train Loss: 10.8283
test Loss: 10.4305, test Accuracy: 0.0000
test F1 (micro): 0.0000, test F1 (macro): 0.0000


 72%|███████▏  | 71898/100000 [21:39:13<6:35:41,  1.18it/s] 

Epoch [71900/100000]
Train Loss: 10.8872
test Loss: 10.4310, test Accuracy: 0.0000
test F1 (micro): 0.0000, test F1 (macro): 0.0000


 72%|███████▏  | 71998/100000 [21:40:57<6:31:55,  1.19it/s] 

Epoch [72000/100000]
Train Loss: 10.7607
test Loss: 10.4315, test Accuracy: 0.0000
test F1 (micro): 0.0000, test F1 (macro): 0.0000


 72%|███████▏  | 72098/100000 [21:42:46<6:30:14,  1.19it/s] 

Epoch [72100/100000]
Train Loss: 11.0414
test Loss: 10.4319, test Accuracy: 0.0000
test F1 (micro): 0.0000, test F1 (macro): 0.0000


 72%|███████▏  | 72198/100000 [21:44:33<6:29:22,  1.19it/s] 

Epoch [72200/100000]
Train Loss: 10.9540
test Loss: 10.4323, test Accuracy: 0.0000
test F1 (micro): 0.0000, test F1 (macro): 0.0000


 72%|███████▏  | 72298/100000 [21:46:19<6:27:33,  1.19it/s] 

Epoch [72300/100000]
Train Loss: 11.0232
test Loss: 10.4327, test Accuracy: 0.0000
test F1 (micro): 0.0000, test F1 (macro): 0.0000


 72%|███████▏  | 72398/100000 [21:48:03<6:26:12,  1.19it/s] 

Epoch [72400/100000]
Train Loss: 11.1795
test Loss: 10.4331, test Accuracy: 0.0000
test F1 (micro): 0.0000, test F1 (macro): 0.0000


 72%|███████▏  | 72498/100000 [21:49:54<6:24:36,  1.19it/s] 

Epoch [72500/100000]
Train Loss: 10.9181
test Loss: 10.4335, test Accuracy: 0.0000
test F1 (micro): 0.0000, test F1 (macro): 0.0000


 73%|███████▎  | 72598/100000 [21:51:45<6:22:43,  1.19it/s] 

Epoch [72600/100000]
Train Loss: 10.9036
test Loss: 10.4339, test Accuracy: 0.0000
test F1 (micro): 0.0000, test F1 (macro): 0.0000


 73%|███████▎  | 72698/100000 [21:53:32<6:20:28,  1.20it/s] 

Epoch [72700/100000]
Train Loss: 11.0813
test Loss: 10.4343, test Accuracy: 0.0000
test F1 (micro): 0.0000, test F1 (macro): 0.0000


 73%|███████▎  | 72798/100000 [21:55:17<6:20:08,  1.19it/s] 

Epoch [72800/100000]
Train Loss: 11.0893
test Loss: 10.4349, test Accuracy: 0.0000
test F1 (micro): 0.0000, test F1 (macro): 0.0000


 73%|███████▎  | 72898/100000 [21:57:05<6:20:48,  1.19it/s] 

Epoch [72900/100000]
Train Loss: 10.9561
test Loss: 10.4353, test Accuracy: 0.0000
test F1 (micro): 0.0000, test F1 (macro): 0.0000


 73%|███████▎  | 72998/100000 [21:58:50<6:17:46,  1.19it/s] 

Epoch [73000/100000]
Train Loss: 11.0459
test Loss: 10.4357, test Accuracy: 0.0000
test F1 (micro): 0.0000, test F1 (macro): 0.0000


 73%|███████▎  | 73098/100000 [22:00:39<6:14:44,  1.20it/s] 

Epoch [73100/100000]
Train Loss: 11.0246
test Loss: 10.4360, test Accuracy: 0.0000
test F1 (micro): 0.0000, test F1 (macro): 0.0000


 73%|███████▎  | 73198/100000 [22:02:28<6:16:07,  1.19it/s] 

Epoch [73200/100000]
Train Loss: 10.5786
test Loss: 10.4363, test Accuracy: 0.0000
test F1 (micro): 0.0000, test F1 (macro): 0.0000


 73%|███████▎  | 73298/100000 [22:04:21<6:11:45,  1.20it/s] 

Epoch [73300/100000]
Train Loss: 10.8871
test Loss: 10.4366, test Accuracy: 0.0000
test F1 (micro): 0.0000, test F1 (macro): 0.0000


 73%|███████▎  | 73398/100000 [22:06:06<6:11:57,  1.19it/s] 

Epoch [73400/100000]
Train Loss: 10.9103
test Loss: 10.4368, test Accuracy: 0.0000
test F1 (micro): 0.0000, test F1 (macro): 0.0000


 73%|███████▎  | 73498/100000 [22:07:51<6:11:00,  1.19it/s] 

Epoch [73500/100000]
Train Loss: 11.0391
test Loss: 10.4370, test Accuracy: 0.0000
test F1 (micro): 0.0000, test F1 (macro): 0.0000


 74%|███████▎  | 73598/100000 [22:09:38<6:09:14,  1.19it/s] 

Epoch [73600/100000]
Train Loss: 10.9995
test Loss: 10.4372, test Accuracy: 0.0000
test F1 (micro): 0.0000, test F1 (macro): 0.0000


 74%|███████▎  | 73698/100000 [22:11:29<6:07:41,  1.19it/s] 

Epoch [73700/100000]
Train Loss: 10.7613
test Loss: 10.4374, test Accuracy: 0.0000
test F1 (micro): 0.0000, test F1 (macro): 0.0000


 74%|███████▍  | 73798/100000 [22:13:16<6:06:55,  1.19it/s] 

Epoch [73800/100000]
Train Loss: 10.8608
test Loss: 10.4375, test Accuracy: 0.0000
test F1 (micro): 0.0000, test F1 (macro): 0.0000


 74%|███████▍  | 73898/100000 [22:15:02<6:06:05,  1.19it/s] 

Epoch [73900/100000]
Train Loss: 10.7740
test Loss: 10.4376, test Accuracy: 0.0000
test F1 (micro): 0.0000, test F1 (macro): 0.0000


 74%|███████▍  | 73998/100000 [22:16:48<6:02:40,  1.19it/s] 

Epoch [74000/100000]
Train Loss: 10.8439
test Loss: 10.4377, test Accuracy: 0.0000
test F1 (micro): 0.0000, test F1 (macro): 0.0000


 74%|███████▍  | 74069/100000 [22:18:08<6:00:57,  1.20it/s] IOPub message rate exceeded.
The Jupyter server will temporarily stop sending output
to the client in order to avoid crashing it.
To change this limit, set the config variable
`--ServerApp.iopub_msg_rate_limit`.

Current values:
ServerApp.iopub_msg_rate_limit=1000.0 (msgs/sec)
ServerApp.rate_limit_window=3.0 (secs)

 77%|███████▋  | 76598/100000 [23:02:50<5:26:14,  1.20it/s]

Epoch [76600/100000]
Train Loss: 11.0695
test Loss: 10.4410, test Accuracy: 0.0000
test F1 (micro): 0.0000, test F1 (macro): 0.0000


 77%|███████▋  | 76698/100000 [23:04:39<5:25:51,  1.19it/s] 

Epoch [76700/100000]
Train Loss: 11.0008
test Loss: 10.4412, test Accuracy: 0.0000
test F1 (micro): 0.0000, test F1 (macro): 0.0000


 77%|███████▋  | 76798/100000 [23:06:24<5:24:05,  1.19it/s] 

Epoch [76800/100000]
Train Loss: 10.9674
test Loss: 10.4413, test Accuracy: 0.0000
test F1 (micro): 0.0000, test F1 (macro): 0.0000


 77%|███████▋  | 76898/100000 [23:08:08<5:22:48,  1.19it/s] 

Epoch [76900/100000]
Train Loss: 10.9105
test Loss: 10.4414, test Accuracy: 0.0000
test F1 (micro): 0.0000, test F1 (macro): 0.0000


 77%|███████▋  | 76998/100000 [23:09:53<5:21:24,  1.19it/s] 

Epoch [77000/100000]
Train Loss: 10.7943
test Loss: 10.4415, test Accuracy: 0.0000
test F1 (micro): 0.0000, test F1 (macro): 0.0000


 77%|███████▋  | 77098/100000 [23:11:38<5:20:40,  1.19it/s] 

Epoch [77100/100000]
Train Loss: 10.9989
test Loss: 10.4417, test Accuracy: 0.0000
test F1 (micro): 0.0000, test F1 (macro): 0.0000


 77%|███████▋  | 77198/100000 [23:13:23<5:21:02,  1.18it/s] 

Epoch [77200/100000]
Train Loss: 10.8869
test Loss: 10.4418, test Accuracy: 0.0000
test F1 (micro): 0.0000, test F1 (macro): 0.0000


 77%|███████▋  | 77298/100000 [23:15:09<5:17:18,  1.19it/s] 

Epoch [77300/100000]
Train Loss: 10.8008
test Loss: 10.4419, test Accuracy: 0.0000
test F1 (micro): 0.0000, test F1 (macro): 0.0000


 77%|███████▋  | 77398/100000 [23:16:58<5:15:00,  1.20it/s] 

Epoch [77400/100000]
Train Loss: 10.9384
test Loss: 10.4420, test Accuracy: 0.0000
test F1 (micro): 0.0000, test F1 (macro): 0.0000


 78%|███████▊  | 77598/100000 [23:20:27<5:15:03,  1.19it/s] 

Epoch [77600/100000]
Train Loss: 10.9719
test Loss: 10.4423, test Accuracy: 0.0000
test F1 (micro): 0.0000, test F1 (macro): 0.0000


 78%|███████▊  | 77698/100000 [23:22:16<5:10:33,  1.20it/s] 

Epoch [77700/100000]
Train Loss: 11.1764
test Loss: 10.4424, test Accuracy: 0.0000
test F1 (micro): 0.0000, test F1 (macro): 0.0000


 78%|███████▊  | 77798/100000 [23:24:03<5:12:22,  1.18it/s] 

Epoch [77800/100000]
Train Loss: 11.0845
test Loss: 10.4425, test Accuracy: 0.0000
test F1 (micro): 0.0000, test F1 (macro): 0.0000


 78%|███████▊  | 77898/100000 [23:25:47<5:10:57,  1.18it/s] 

Epoch [77900/100000]
Train Loss: 10.8241
test Loss: 10.4426, test Accuracy: 0.0000
test F1 (micro): 0.0000, test F1 (macro): 0.0000


 78%|███████▊  | 77998/100000 [23:27:41<5:06:48,  1.20it/s] 

Epoch [78000/100000]
Train Loss: 10.8046
test Loss: 10.4427, test Accuracy: 0.0000
test F1 (micro): 0.0000, test F1 (macro): 0.0000


 78%|███████▊  | 78098/100000 [23:29:28<5:03:54,  1.20it/s] 

Epoch [78100/100000]
Train Loss: 10.8164
test Loss: 10.4428, test Accuracy: 0.0000
test F1 (micro): 0.0000, test F1 (macro): 0.0000


 78%|███████▊  | 78198/100000 [23:31:13<5:05:18,  1.19it/s] 

Epoch [78200/100000]
Train Loss: 10.7768
test Loss: 10.4429, test Accuracy: 0.0000
test F1 (micro): 0.0000, test F1 (macro): 0.0000


 78%|███████▊  | 78298/100000 [23:32:57<5:01:16,  1.20it/s] 

Epoch [78300/100000]
Train Loss: 11.1067
test Loss: 10.4430, test Accuracy: 0.0000
test F1 (micro): 0.0000, test F1 (macro): 0.0000


 78%|███████▊  | 78398/100000 [23:34:42<5:02:40,  1.19it/s] 

Epoch [78400/100000]
Train Loss: 10.9743
test Loss: 10.4431, test Accuracy: 0.0000
test F1 (micro): 0.0000, test F1 (macro): 0.0000


 78%|███████▊  | 78498/100000 [23:36:30<5:00:52,  1.19it/s] 

Epoch [78500/100000]
Train Loss: 11.1398
test Loss: 10.4432, test Accuracy: 0.0000
test F1 (micro): 0.0000, test F1 (macro): 0.0000


 79%|███████▊  | 78598/100000 [23:38:15<4:59:17,  1.19it/s] 

Epoch [78600/100000]
Train Loss: 10.7948
test Loss: 10.4433, test Accuracy: 0.0000
test F1 (micro): 0.0000, test F1 (macro): 0.0000


 79%|███████▊  | 78698/100000 [23:40:01<4:57:43,  1.19it/s] 

Epoch [78700/100000]
Train Loss: 10.8073
test Loss: 10.4434, test Accuracy: 0.0000
test F1 (micro): 0.0000, test F1 (macro): 0.0000


 79%|███████▉  | 78798/100000 [23:41:47<4:57:12,  1.19it/s] 

Epoch [78800/100000]
Train Loss: 11.2095
test Loss: 10.4435, test Accuracy: 0.0000
test F1 (micro): 0.0000, test F1 (macro): 0.0000


 79%|███████▉  | 78898/100000 [23:43:31<4:53:41,  1.20it/s] 

Epoch [78900/100000]
Train Loss: 10.9462
test Loss: 10.4435, test Accuracy: 0.0000
test F1 (micro): 0.0000, test F1 (macro): 0.0000


 79%|███████▉  | 78998/100000 [23:45:16<4:53:41,  1.19it/s] 

Epoch [79000/100000]
Train Loss: 11.0422
test Loss: 10.4436, test Accuracy: 0.0000
test F1 (micro): 0.0000, test F1 (macro): 0.0000


 79%|███████▉  | 79098/100000 [23:47:02<4:52:23,  1.19it/s] 

Epoch [79100/100000]
Train Loss: 10.9515
test Loss: 10.4437, test Accuracy: 0.0000
test F1 (micro): 0.0000, test F1 (macro): 0.0000


 79%|███████▉  | 79198/100000 [23:48:51<4:50:30,  1.19it/s] 

Epoch [79200/100000]
Train Loss: 11.1030
test Loss: 10.4438, test Accuracy: 0.0000
test F1 (micro): 0.0000, test F1 (macro): 0.0000


 79%|███████▉  | 79298/100000 [23:50:36<4:50:49,  1.19it/s] 

Epoch [79300/100000]
Train Loss: 10.8269
test Loss: 10.4438, test Accuracy: 0.0000
test F1 (micro): 0.0000, test F1 (macro): 0.0000


 79%|███████▉  | 79398/100000 [23:52:20<4:46:03,  1.20it/s] 

Epoch [79400/100000]
Train Loss: 11.0503
test Loss: 10.4439, test Accuracy: 0.0000
test F1 (micro): 0.0000, test F1 (macro): 0.0000


 79%|███████▉  | 79498/100000 [23:54:07<4:44:58,  1.20it/s] 

Epoch [79500/100000]
Train Loss: 10.9035
test Loss: 10.4439, test Accuracy: 0.0000
test F1 (micro): 0.0000, test F1 (macro): 0.0000


 80%|███████▉  | 79598/100000 [23:55:56<4:45:32,  1.19it/s] 

Epoch [79600/100000]
Train Loss: 10.9335
test Loss: 10.4440, test Accuracy: 0.0000
test F1 (micro): 0.0000, test F1 (macro): 0.0000


 80%|███████▉  | 79698/100000 [23:57:44<4:43:02,  1.20it/s] 

Epoch [79700/100000]
Train Loss: 10.8148
test Loss: 10.4441, test Accuracy: 0.0000
test F1 (micro): 0.0000, test F1 (macro): 0.0000


 80%|███████▉  | 79798/100000 [23:59:33<4:43:19,  1.19it/s] 

Epoch [79800/100000]
Train Loss: 10.9089
test Loss: 10.4441, test Accuracy: 0.0000
test F1 (micro): 0.0000, test F1 (macro): 0.0000


 80%|███████▉  | 79898/100000 [24:01:20<4:44:18,  1.18it/s] 

Epoch [79900/100000]
Train Loss: 10.8234
test Loss: 10.4441, test Accuracy: 0.0000
test F1 (micro): 0.0000, test F1 (macro): 0.0000


 80%|███████▉  | 79998/100000 [24:03:04<4:40:15,  1.19it/s] 

Epoch [80000/100000]
Train Loss: 11.0830
test Loss: 10.4442, test Accuracy: 0.0000
test F1 (micro): 0.0000, test F1 (macro): 0.0000


 80%|████████  | 80098/100000 [24:04:52<4:37:14,  1.20it/s] 

Epoch [80100/100000]
Train Loss: 10.8292
test Loss: 10.4442, test Accuracy: 0.0000
test F1 (micro): 0.0000, test F1 (macro): 0.0000


 80%|████████  | 80198/100000 [24:06:41<4:37:34,  1.19it/s] 

Epoch [80200/100000]
Train Loss: 10.9028
test Loss: 10.4442, test Accuracy: 0.0000
test F1 (micro): 0.0000, test F1 (macro): 0.0000


 80%|████████  | 80298/100000 [24:08:29<4:35:03,  1.19it/s] 

Epoch [80300/100000]
Train Loss: 11.0031
test Loss: 10.4442, test Accuracy: 0.0000
test F1 (micro): 0.0000, test F1 (macro): 0.0000


 80%|████████  | 80398/100000 [24:10:15<4:35:17,  1.19it/s] 

Epoch [80400/100000]
Train Loss: 11.0854
test Loss: 10.4443, test Accuracy: 0.0000
test F1 (micro): 0.0000, test F1 (macro): 0.0000


 80%|████████  | 80498/100000 [24:12:00<4:32:27,  1.19it/s] 

Epoch [80500/100000]
Train Loss: 11.0213
test Loss: 10.4443, test Accuracy: 0.0000
test F1 (micro): 0.0000, test F1 (macro): 0.0000


 81%|████████  | 80598/100000 [24:13:49<4:28:44,  1.20it/s] 

Epoch [80600/100000]
Train Loss: 10.7986
test Loss: 10.4443, test Accuracy: 0.0000
test F1 (micro): 0.0000, test F1 (macro): 0.0000


 81%|████████  | 80698/100000 [24:15:32<4:30:37,  1.19it/s] 

Epoch [80700/100000]
Train Loss: 11.1455
test Loss: 10.4443, test Accuracy: 0.0000
test F1 (micro): 0.0000, test F1 (macro): 0.0000


 81%|████████  | 80898/100000 [24:19:00<4:27:24,  1.19it/s] 

Epoch [80900/100000]
Train Loss: 11.1555
test Loss: 10.4443, test Accuracy: 0.0000
test F1 (micro): 0.0000, test F1 (macro): 0.0000


 81%|████████  | 80998/100000 [24:20:45<4:26:36,  1.19it/s] 

Epoch [81000/100000]
Train Loss: 10.7550
test Loss: 10.4443, test Accuracy: 0.0000
test F1 (micro): 0.0000, test F1 (macro): 0.0000


 81%|████████  | 81098/100000 [24:22:31<4:24:09,  1.19it/s] 

Epoch [81100/100000]
Train Loss: 10.9020
test Loss: 10.4443, test Accuracy: 0.0000
test F1 (micro): 0.0000, test F1 (macro): 0.0000


 81%|████████  | 81198/100000 [24:24:17<4:21:46,  1.20it/s] 

Epoch [81200/100000]
Train Loss: 10.8549
test Loss: 10.4443, test Accuracy: 0.0000
test F1 (micro): 0.0000, test F1 (macro): 0.0000


 81%|████████▏ | 81298/100000 [24:26:06<4:21:43,  1.19it/s] 

Epoch [81300/100000]
Train Loss: 10.9987
test Loss: 10.4443, test Accuracy: 0.0000
test F1 (micro): 0.0000, test F1 (macro): 0.0000


 81%|████████▏ | 81398/100000 [24:27:56<4:21:15,  1.19it/s] 

Epoch [81400/100000]
Train Loss: 11.0393
test Loss: 10.4443, test Accuracy: 0.0000
test F1 (micro): 0.0000, test F1 (macro): 0.0000


 82%|████████▏ | 81598/100000 [24:31:40<4:17:29,  1.19it/s] 

Epoch [81600/100000]
Train Loss: 10.9489
test Loss: 10.4443, test Accuracy: 0.0000
test F1 (micro): 0.0000, test F1 (macro): 0.0000


 82%|████████▏ | 81698/100000 [24:33:25<4:17:20,  1.19it/s] 

Epoch [81700/100000]
Train Loss: 11.0192
test Loss: 10.4443, test Accuracy: 0.0000
test F1 (micro): 0.0000, test F1 (macro): 0.0000


 82%|████████▏ | 81798/100000 [24:35:11<4:14:35,  1.19it/s] 

Epoch [81800/100000]
Train Loss: 10.7249
test Loss: 10.4443, test Accuracy: 0.0000
test F1 (micro): 0.0000, test F1 (macro): 0.0000


 82%|████████▏ | 81898/100000 [24:36:56<4:13:33,  1.19it/s] 

Epoch [81900/100000]
Train Loss: 10.8555
test Loss: 10.4443, test Accuracy: 0.0000
test F1 (micro): 0.0000, test F1 (macro): 0.0000


 82%|████████▏ | 81998/100000 [24:38:41<4:12:09,  1.19it/s] 

Epoch [82000/100000]
Train Loss: 11.0550
test Loss: 10.4454, test Accuracy: 0.0000
test F1 (micro): 0.0000, test F1 (macro): 0.0000


 82%|████████▏ | 82098/100000 [24:40:26<4:09:35,  1.20it/s] 

Epoch [82100/100000]
Train Loss: 10.8519
test Loss: 10.4469, test Accuracy: 0.0000
test F1 (micro): 0.0000, test F1 (macro): 0.0000


 82%|████████▏ | 82198/100000 [24:42:12<4:08:44,  1.19it/s] 

Epoch [82200/100000]
Train Loss: 10.9856
test Loss: 10.4487, test Accuracy: 0.0000
test F1 (micro): 0.0000, test F1 (macro): 0.0000


 82%|████████▏ | 82298/100000 [24:43:56<4:08:13,  1.19it/s] 

Epoch [82300/100000]
Train Loss: 11.1587
test Loss: 10.4498, test Accuracy: 0.0000
test F1 (micro): 0.0000, test F1 (macro): 0.0000


 82%|████████▏ | 82398/100000 [24:45:41<4:05:47,  1.19it/s] 

Epoch [82400/100000]
Train Loss: 10.8650
test Loss: 10.4510, test Accuracy: 0.0000
test F1 (micro): 0.0000, test F1 (macro): 0.0000


 82%|████████▏ | 82498/100000 [24:47:26<4:05:11,  1.19it/s] 

Epoch [82500/100000]
Train Loss: 11.0508
test Loss: 10.4514, test Accuracy: 0.0000
test F1 (micro): 0.0000, test F1 (macro): 0.0000


 83%|████████▎ | 82598/100000 [24:49:13<4:04:04,  1.19it/s] 

Epoch [82600/100000]
Train Loss: 10.7157
test Loss: 10.4519, test Accuracy: 0.0000
test F1 (micro): 0.0000, test F1 (macro): 0.0000


 83%|████████▎ | 82698/100000 [24:50:58<4:02:05,  1.19it/s] 

Epoch [82700/100000]
Train Loss: 10.9591
test Loss: 10.4535, test Accuracy: 0.0000
test F1 (micro): 0.0000, test F1 (macro): 0.0000


 83%|████████▎ | 82798/100000 [24:52:44<4:00:10,  1.19it/s] 

Epoch [82800/100000]
Train Loss: 10.9429
test Loss: 10.4543, test Accuracy: 0.0000
test F1 (micro): 0.0000, test F1 (macro): 0.0000


 83%|████████▎ | 82998/100000 [24:56:16<3:57:36,  1.19it/s] 

Epoch [83000/100000]
Train Loss: 10.9089
test Loss: 10.4578, test Accuracy: 0.0000
test F1 (micro): 0.0000, test F1 (macro): 0.0000


 83%|████████▎ | 83098/100000 [24:58:03<3:56:41,  1.19it/s] 

Epoch [83100/100000]
Train Loss: 10.7143
test Loss: 10.4587, test Accuracy: 0.0000
test F1 (micro): 0.0000, test F1 (macro): 0.0000


 83%|████████▎ | 83198/100000 [24:59:56<3:53:39,  1.20it/s] 

Epoch [83200/100000]
Train Loss: 11.1179
test Loss: 10.4592, test Accuracy: 0.0000
test F1 (micro): 0.0000, test F1 (macro): 0.0000


 83%|████████▎ | 83298/100000 [25:01:50<3:52:58,  1.19it/s] 

Epoch [83300/100000]
Train Loss: 10.9860
test Loss: 10.4598, test Accuracy: 0.0000
test F1 (micro): 0.0000, test F1 (macro): 0.0000


 83%|████████▎ | 83398/100000 [25:03:36<3:51:06,  1.20it/s] 

Epoch [83400/100000]
Train Loss: 10.9179
test Loss: 10.4610, test Accuracy: 0.0000
test F1 (micro): 0.0000, test F1 (macro): 0.0000


 83%|████████▎ | 83498/100000 [25:05:20<3:49:38,  1.20it/s] 

Epoch [83500/100000]
Train Loss: 10.8518
test Loss: 10.4618, test Accuracy: 0.0000
test F1 (micro): 0.0000, test F1 (macro): 0.0000


 84%|████████▎ | 83598/100000 [25:07:11<3:49:22,  1.19it/s] 

Epoch [83600/100000]
Train Loss: 11.1102
test Loss: 10.4621, test Accuracy: 0.0000
test F1 (micro): 0.0000, test F1 (macro): 0.0000


 84%|████████▎ | 83698/100000 [25:08:57<3:48:27,  1.19it/s] 

Epoch [83700/100000]
Train Loss: 10.7893
test Loss: 10.4631, test Accuracy: 0.0000
test F1 (micro): 0.0000, test F1 (macro): 0.0000


 84%|████████▍ | 83798/100000 [25:10:42<3:45:25,  1.20it/s] 

Epoch [83800/100000]
Train Loss: 11.2292
test Loss: 10.4634, test Accuracy: 0.0000
test F1 (micro): 0.0000, test F1 (macro): 0.0000


 84%|████████▍ | 83898/100000 [25:12:28<3:44:00,  1.20it/s] 

Epoch [83900/100000]
Train Loss: 11.0729
test Loss: 10.4644, test Accuracy: 0.0000
test F1 (micro): 0.0000, test F1 (macro): 0.0000


 84%|████████▍ | 83998/100000 [25:14:20<3:43:56,  1.19it/s] 

Epoch [84000/100000]
Train Loss: 10.8024
test Loss: 10.4643, test Accuracy: 0.0000
test F1 (micro): 0.0000, test F1 (macro): 0.0000


 84%|████████▍ | 84098/100000 [25:16:06<3:42:08,  1.19it/s] 

Epoch [84100/100000]
Train Loss: 10.7443
test Loss: 10.4650, test Accuracy: 0.0000
test F1 (micro): 0.0000, test F1 (macro): 0.0000


 84%|████████▍ | 84198/100000 [25:17:55<3:40:57,  1.19it/s] 

Epoch [84200/100000]
Train Loss: 10.9505
test Loss: 10.4661, test Accuracy: 0.0000
test F1 (micro): 0.0000, test F1 (macro): 0.0000


 84%|████████▍ | 84298/100000 [25:19:43<3:39:15,  1.19it/s] 

Epoch [84300/100000]
Train Loss: 10.8306
test Loss: 10.4654, test Accuracy: 0.0000
test F1 (micro): 0.0000, test F1 (macro): 0.0000


 84%|████████▍ | 84398/100000 [25:21:29<3:38:16,  1.19it/s] 

Epoch [84400/100000]
Train Loss: 11.0671
test Loss: 10.4663, test Accuracy: 0.0000
test F1 (micro): 0.0000, test F1 (macro): 0.0000


 84%|████████▍ | 84498/100000 [25:23:17<3:37:45,  1.19it/s] 

Epoch [84500/100000]
Train Loss: 11.1620
test Loss: 10.4668, test Accuracy: 0.0000
test F1 (micro): 0.0000, test F1 (macro): 0.0000


 85%|████████▍ | 84598/100000 [25:25:01<3:35:15,  1.19it/s] 

Epoch [84600/100000]
Train Loss: 11.0088
test Loss: 10.4671, test Accuracy: 0.0000
test F1 (micro): 0.0000, test F1 (macro): 0.0000


 85%|████████▍ | 84698/100000 [25:26:50<3:33:55,  1.19it/s] 

Epoch [84700/100000]
Train Loss: 10.9986
test Loss: 10.4676, test Accuracy: 0.0000
test F1 (micro): 0.0000, test F1 (macro): 0.0000


 85%|████████▍ | 84798/100000 [25:28:35<3:33:21,  1.19it/s] 

Epoch [84800/100000]
Train Loss: 10.8953
test Loss: 10.4674, test Accuracy: 0.0000
test F1 (micro): 0.0000, test F1 (macro): 0.0000


 85%|████████▍ | 84898/100000 [25:30:20<3:31:28,  1.19it/s] 

Epoch [84900/100000]
Train Loss: 11.0771
test Loss: 10.4673, test Accuracy: 0.0000
test F1 (micro): 0.0000, test F1 (macro): 0.0000


 85%|████████▌ | 85098/100000 [25:33:52<3:28:23,  1.19it/s] 

Epoch [85100/100000]
Train Loss: 10.7978
test Loss: 10.4676, test Accuracy: 0.0000
test F1 (micro): 0.0000, test F1 (macro): 0.0000


 85%|████████▌ | 85198/100000 [25:35:39<3:25:55,  1.20it/s] 

Epoch [85200/100000]
Train Loss: 11.1095
test Loss: 10.4684, test Accuracy: 0.0000
test F1 (micro): 0.0000, test F1 (macro): 0.0000


 85%|████████▌ | 85298/100000 [25:37:28<3:25:54,  1.19it/s] 

Epoch [85300/100000]
Train Loss: 10.8948
test Loss: 10.4686, test Accuracy: 0.0000
test F1 (micro): 0.0000, test F1 (macro): 0.0000


 85%|████████▌ | 85398/100000 [25:39:13<3:23:25,  1.20it/s] 

Epoch [85400/100000]
Train Loss: 10.8032
test Loss: 10.4696, test Accuracy: 0.0000
test F1 (micro): 0.0000, test F1 (macro): 0.0000


 85%|████████▌ | 85498/100000 [25:40:59<3:22:42,  1.19it/s] 

Epoch [85500/100000]
Train Loss: 10.8110
test Loss: 10.4706, test Accuracy: 0.0000
test F1 (micro): 0.0000, test F1 (macro): 0.0000


 86%|████████▌ | 85598/100000 [25:42:45<3:20:10,  1.20it/s] 

Epoch [85600/100000]
Train Loss: 10.9572
test Loss: 10.4701, test Accuracy: 0.0000
test F1 (micro): 0.0000, test F1 (macro): 0.0000


 86%|████████▌ | 85698/100000 [25:44:37<3:19:56,  1.19it/s] 

Epoch [85700/100000]
Train Loss: 10.8747
test Loss: 10.4719, test Accuracy: 0.0000
test F1 (micro): 0.0000, test F1 (macro): 0.0000


 86%|████████▌ | 85798/100000 [25:46:29<3:17:32,  1.20it/s] 

Epoch [85800/100000]
Train Loss: 11.0501
test Loss: 10.4708, test Accuracy: 0.0000
test F1 (micro): 0.0000, test F1 (macro): 0.0000


 86%|████████▌ | 85898/100000 [25:48:17<3:16:35,  1.20it/s] 

Epoch [85900/100000]
Train Loss: 10.8295
test Loss: 10.4707, test Accuracy: 0.0000
test F1 (micro): 0.0000, test F1 (macro): 0.0000


 86%|████████▌ | 85998/100000 [25:50:04<3:16:01,  1.19it/s] 

Epoch [86000/100000]
Train Loss: 10.9990
test Loss: 10.4717, test Accuracy: 0.0000
test F1 (micro): 0.0000, test F1 (macro): 0.0000


 86%|████████▌ | 86098/100000 [25:51:49<3:14:18,  1.19it/s] 

Epoch [86100/100000]
Train Loss: 11.1087
test Loss: 10.4730, test Accuracy: 0.0000
test F1 (micro): 0.0000, test F1 (macro): 0.0000


 86%|████████▌ | 86198/100000 [25:53:34<3:13:09,  1.19it/s] 

Epoch [86200/100000]
Train Loss: 11.1809
test Loss: 10.4748, test Accuracy: 0.0000
test F1 (micro): 0.0000, test F1 (macro): 0.0000


 86%|████████▋ | 86298/100000 [25:55:20<3:11:43,  1.19it/s] 

Epoch [86300/100000]
Train Loss: 11.0614
test Loss: 10.4759, test Accuracy: 0.0000
test F1 (micro): 0.0000, test F1 (macro): 0.0000


 86%|████████▋ | 86398/100000 [25:57:05<3:09:37,  1.20it/s] 

Epoch [86400/100000]
Train Loss: 11.0023
test Loss: 10.4775, test Accuracy: 0.0000
test F1 (micro): 0.0000, test F1 (macro): 0.0000


 86%|████████▋ | 86498/100000 [25:58:50<3:07:37,  1.20it/s] 

Epoch [86500/100000]
Train Loss: 10.9523
test Loss: 10.4790, test Accuracy: 0.0000
test F1 (micro): 0.0000, test F1 (macro): 0.0000


 87%|████████▋ | 86598/100000 [26:00:36<3:07:34,  1.19it/s] 

Epoch [86600/100000]
Train Loss: 10.8320
test Loss: 10.4803, test Accuracy: 0.0000
test F1 (micro): 0.0000, test F1 (macro): 0.0000


 87%|████████▋ | 86698/100000 [26:02:20<3:05:33,  1.19it/s] 

Epoch [86700/100000]
Train Loss: 11.1177
test Loss: 10.4816, test Accuracy: 0.0000
test F1 (micro): 0.0000, test F1 (macro): 0.0000


 87%|████████▋ | 86798/100000 [26:04:12<3:04:07,  1.20it/s] 

Epoch [86800/100000]
Train Loss: 10.9898
test Loss: 10.4817, test Accuracy: 0.0000
test F1 (micro): 0.0000, test F1 (macro): 0.0000


 87%|████████▋ | 86898/100000 [26:06:01<3:03:19,  1.19it/s] 

Epoch [86900/100000]
Train Loss: 11.0165
test Loss: 10.4839, test Accuracy: 0.0000
test F1 (micro): 0.0000, test F1 (macro): 0.0000


 87%|████████▋ | 86998/100000 [26:07:47<3:01:20,  1.20it/s] 

Epoch [87000/100000]
Train Loss: 11.1467
test Loss: 10.4847, test Accuracy: 0.0000
test F1 (micro): 0.0000, test F1 (macro): 0.0000


 87%|████████▋ | 87098/100000 [26:09:31<2:59:42,  1.20it/s] 

Epoch [87100/100000]
Train Loss: 10.9241
test Loss: 10.4867, test Accuracy: 0.0000
test F1 (micro): 0.0000, test F1 (macro): 0.0000


 87%|████████▋ | 87198/100000 [26:11:20<2:58:31,  1.20it/s] 

Epoch [87200/100000]
Train Loss: 10.8692
test Loss: 10.4893, test Accuracy: 0.0000
test F1 (micro): 0.0000, test F1 (macro): 0.0000


 87%|████████▋ | 87298/100000 [26:13:07<2:57:19,  1.19it/s] 

Epoch [87300/100000]
Train Loss: 11.0007
test Loss: 10.4901, test Accuracy: 0.0000
test F1 (micro): 0.0000, test F1 (macro): 0.0000


 87%|████████▋ | 87398/100000 [26:14:52<2:56:46,  1.19it/s] 

Epoch [87400/100000]
Train Loss: 11.0650
test Loss: 10.4917, test Accuracy: 0.0000
test F1 (micro): 0.0000, test F1 (macro): 0.0000


 87%|████████▋ | 87498/100000 [26:16:45<2:55:56,  1.18it/s] 

Epoch [87500/100000]
Train Loss: 10.8414
test Loss: 10.4925, test Accuracy: 0.0000
test F1 (micro): 0.0000, test F1 (macro): 0.0000


 88%|████████▊ | 87598/100000 [26:18:31<2:53:12,  1.19it/s] 

Epoch [87600/100000]
Train Loss: 10.9992
test Loss: 10.4949, test Accuracy: 0.0000
test F1 (micro): 0.0000, test F1 (macro): 0.0000


 88%|████████▊ | 87698/100000 [26:20:16<2:51:16,  1.20it/s] 

Epoch [87700/100000]
Train Loss: 11.0202
test Loss: 10.4951, test Accuracy: 0.0000
test F1 (micro): 0.0000, test F1 (macro): 0.0000


 88%|████████▊ | 87798/100000 [26:22:00<2:50:38,  1.19it/s] 

Epoch [87800/100000]
Train Loss: 10.8696
test Loss: 10.4948, test Accuracy: 0.0000
test F1 (micro): 0.0000, test F1 (macro): 0.0000


 88%|████████▊ | 87898/100000 [26:23:47<2:49:43,  1.19it/s] 

Epoch [87900/100000]
Train Loss: 10.8249
test Loss: 10.4952, test Accuracy: 0.0000
test F1 (micro): 0.0000, test F1 (macro): 0.0000


 88%|████████▊ | 87998/100000 [26:25:30<2:47:45,  1.19it/s] 

Epoch [88000/100000]
Train Loss: 11.0344
test Loss: 10.4948, test Accuracy: 0.0000
test F1 (micro): 0.0000, test F1 (macro): 0.0000


 88%|████████▊ | 88098/100000 [26:27:15<2:46:31,  1.19it/s] 

Epoch [88100/100000]
Train Loss: 10.9119
test Loss: 10.4964, test Accuracy: 0.0000
test F1 (micro): 0.0000, test F1 (macro): 0.0000


 88%|████████▊ | 88098/100000 [26:27:37<3:34:29,  1.08s/it]


KeyboardInterrupt: 

In [None]:
model, _, _, latest_epoch, metrics = load_checkpoint(model)

val_batches_ = [virus_da.get_batch() for _ in range(num_val // 2)] + [cellular_da.get_batch() for _ in range(num_val // 2)]

# input_sequences_ = [e['Sequence'] for b in val_batches_ for e in b]
# labels_ = [encode_lineage(e['Taxonomic_lineage__ALL_'])  for b in val_batches_ for e in b]

input_sequences_ = ["ACACAD"]
labels_ = [{0: 1}]

def evaluate_df(model):
    model.eval()  # Set model to evaluation mode
    
    df = {
        "sequence": [],
        "label": [],
        "pred": [],
        "loss": []
    }

    metrics = {
        "loss": 0,
        "accuracy": 0,
        "f1 macro": 0,
        "f1 micro": 0
    }
    
    # Process each sequence
    for sequence, label in zip(input_sequences_, labels_):
        inputs = tokenizer_(
            [sequence],
            return_tensors="pt",
            padding='max_length',
            truncation=True,
            max_length=max_seq_len
        ).to(device)
    
        # Get model output
        with torch.no_grad():
            output = model(inputs['input_ids'], inputs['attention_mask'])

        pred = output.argmax(dim=-1).cpu().item()
        loss = criterion(output, torch.tensor([label[0]]).to(device))
        df["sequence"].append(sequence)
        df["label"].append(level_decoder[0][label[0]])
        df["pred"].append(level_decoder[0][pred])
        df["loss"].append(round(loss.cpu().item(), 4))

    # Convert to DataFrame
    new_df = pd.DataFrame(df)
    new_df['is_incorrect'] = new_df['label'] != new_df['pred']
    new_df = new_df.sort_values(['is_incorrect', 'loss'], ascending=[False, False])
    new_df.to_csv(f'classification_results__new_att.csv', index=False)

    metrics["loss"] = np.array(df["loss"]).mean()
    metrics["accuracy"] = accuracy_score(np.array(df["label"]), np.array(df["pred"]))
    metrics["f1 macro"] = f1_score(np.array(df["label"]), np.array(df["pred"]), average='macro')  # F1-score for multi-label classification
    metrics["f1 micro"] = f1_score(np.array(df["label"]), np.array(df["pred"]), average='micro') 
    print(metrics)

evaluate_df(model)

[1;34mwandb[0m: 🚀 View run [33mlight-terrain-11[0m at: [34mhttps://wandb.ai/alireza_noroozi/Finetune_ESM/runs/nvokgtnu[0m
[1;34mwandb[0m: Find logs at: [1;35mwandb/run-20241205_144714-nvokgtnu/logs[0m
