In [1]:
import torch
import torch.nn as nn
import torch.nn.functional as F
import pandas as pd
import numpy as np
import torch.optim as optim
from tqdm import tqdm
import sys, os, math
import wandb

sys.path.insert(0, '../dlp')
from batch import Batch

device = 'cuda:0' if torch.cuda.is_available() else 'cpu'
print(device)

epochs = 100_000
val_epoch = 1000
num_val = 500
batch_size = 64
dataset_name = "corpus_200_500_random"
lr = 0.01
model_name = "CNN last level"
max_seq_len = 500

from data_access import PQDataAccess
da = PQDataAccess(f"/home/aac/Alireza/datasets/export_pqt_4_taxseq_new/{dataset_name}", batch_size)

checkpoint_dir = f"../checkpoints/{model_name}_checkpoints"
if not os.path.exists(checkpoint_dir):
    os.makedirs(checkpoint_dir)
print(checkpoint_dir)

wandb.init(
    # set the wandb project where this run will be logged
    project=model_name,

    # track hyperparameters and run metadata
    config={
        "learning_rate": lr,
        "architecture": "Cnn last level",
        "dataset": dataset_name,
        "epochs": epochs,
        "batch_szie": batch_size,
        "max_seq_len": max_seq_len
    }
)

cuda:0
 WORLD_SIZE=1 , LOCAL_WORLD_SIZE=1,RANK =0,LOCAL_RANK = 0 
../checkpoints/CNN last level_checkpoints


[34m[1mwandb[0m: Using wandb-core as the SDK backend.  Please refer to https://wandb.me/wandb-core for more information.
[34m[1mwandb[0m: Currently logged in as: [33malirezanor[0m ([33malireza_noroozi[0m). Use [1m`wandb login --relogin`[0m to force relogin


In [2]:
from data_process import normal_CNN_batch
df = pd.read_csv('../data/new_output_id.csv')

first_unique_labels = sorted(set(df['Organism_ID'].values))
first_label_to_idx = {label: idx for idx, label in enumerate(first_unique_labels)}
first_idx_to_label = {idx: label for label, idx in first_label_to_idx.items()}
first_map_label = {r['Organism_ID']: first_label_to_idx[r['Organism_ID']] for _, r in df.iterrows()}

print("first level # of Classes", len(first_unique_labels))
first_num_classes = len(first_unique_labels)


# end_unique_labels = sorted(set(df['new label'].values))
# end_label_to_idx = {label: idx for idx, label in enumerate(end_unique_labels)}
# end_idx_to_label = {idx: label for label, idx in end_label_to_idx.items()}
# end_map_label = {r['Organism_ID']: end_label_to_idx[r['new label']] for _, r in df.iterrows()}

# print("end level # of Classes", len(end_unique_labels))
# end_num_classes = len(end_unique_labels)


def batch_convertor(b):
    inputs = normal_CNN_batch(b)
    
    first_tax_ids = [first_map_label[e['Taxonomic_lineage_IDs']] for e in b]
    # end_tax_ids = [end_map_label[e['Taxonomic_lineage_IDs']] for e in b]

    tax_ids = {
        "first": torch.LongTensor(first_tax_ids),
        # "end": torch.LongTensor(end_tax_ids),
    }
    
    return Batch(inputs, tax_ids)

  from .autonotebook import tqdm as notebook_tqdm


Loaded dictionary.
first level # of Classes 1276825


In [3]:
class ResidualBlock(nn.Module):
    def __init__(self, channels, bottleneck_factor=0.5):
        super().__init__()
        bottleneck_channels = int(channels * bottleneck_factor)
        
        self.layers = nn.Sequential(
            # Add operation (residual connection is added later)
            nn.Conv1d(channels, bottleneck_channels, kernel_size=1),  # bottleneck convolution
            nn.ReLU(),
            nn.BatchNorm1d(bottleneck_channels),
            nn.Conv1d(bottleneck_channels, bottleneck_channels, kernel_size=21, padding='same'),  # dilated convolution
            nn.ReLU(),
            nn.BatchNorm1d(bottleneck_channels),
            nn.Conv1d(bottleneck_channels, channels, kernel_size=1),  # restore channels
        )
        
    def forward(self, x):
        return x + self.layers(x)  # residual connection

class EnhancedProteinCNN(nn.Module):
    def __init__(self, 
                 num_classes_first,
                 num_classes_end,
                 vocab_size=25,
                 embedding_dim=128,
                 max_seq_length=max_seq_len,
                 num_filters=2000,  # as per parameter table
                 dropout_rate=0.5):
        super().__init__()
        
        # Original embedding for amino acid indices
        self.embedding = nn.Embedding(vocab_size, embedding_dim, padding_idx=0)
        
        # Additional feature processing
        self.feature_dense = nn.Linear(3, embedding_dim)
        
        # Process global sequence features
        self.global_feature_dense = nn.Linear(28, embedding_dim)
        
        # Initial convolution
        self.initial_conv = nn.Sequential(
            nn.Conv1d(embedding_dim, num_filters, kernel_size=21, padding='same'),
            nn.ReLU(),
            nn.BatchNorm1d(num_filters)
        )
        
        # Residual blocks (multiple blocks as shown in the architecture)
        self.residual_blocks = nn.ModuleList([
            ResidualBlock(num_filters) for _ in range(5)  # adjust number of blocks as needed
        ])
        
        # Max pooling
        self.pool = nn.AdaptiveMaxPool1d(1)
        
        # Fully connected layers
        self.fc1 = nn.Linear(num_filters + embedding_dim, 512)
        self.fc2 = nn.Linear(512, 256)
        self.fc3 = nn.Linear(256, num_classes_first)
        # self.fc4 = nn.Linear(256, num_classes_end)
        
        self.dropout = nn.Dropout(dropout_rate)
        self.layer_norm = nn.LayerNorm(embedding_dim)
    
    def forward(self, x, global_features, attention_mask=None):
        # Process amino acid indices
        seq_embeddings = self.embedding(x[:, :, 0].long())
        
        # Process additional features
        feature_embeddings = self.feature_dense(x[:, :, 1:4])
        
        # Combine embeddings
        x = seq_embeddings + feature_embeddings
        x = self.layer_norm(x)
        
        # Process global sequence features
        global_embedding = self.global_feature_dense(global_features)
        
        if attention_mask is not None:
            x = x * attention_mask.unsqueeze(-1)
        
        # Transpose for convolution
        x = x.transpose(1, 2)  # Shape: (batch_size, embedding_dim, seq_length)
        
        # Initial convolution
        x = self.initial_conv(x)
        
        # Apply residual blocks
        for block in self.residual_blocks:
            x = block(x)
        
        # Global pooling
        x = self.pool(x).squeeze(-1)
        
        # Add global features
        x = torch.cat([x, global_embedding], dim=1)
        
        # Final fully connected layers
        x = self.dropout(x)
        x = F.relu(self.fc1(x))
        x = self.dropout(x)
        x = F.relu(self.fc2(x))
        x = self.dropout(x)
        # end_x = self.fc4(x)
        first_x = self.fc3(x)
        
        # return first_x, end_x
        return first_x

In [5]:
end_num_classes = 51275
model = EnhancedProteinCNN(first_num_classes, end_num_classes).to(device)
print("model:", sum(p.numel() for p in model.parameters()) / 1e6, 'M parameters')
print(model)
optimizer = torch.optim.Adam(model.parameters(), lr=lr)
criterion = nn.CrossEntropyLoss()
# Cosine annealing with warm restarts
scheduler = torch.optim.lr_scheduler.CosineAnnealingWarmRestarts(
    optimizer,
    T_0=10,  # Initial restart interval
    T_mult=2,  # Multiply interval by 2 after each restart
    eta_min=1e-6  # Minimum learning rate
)

model: 459.795081 M parameters
EnhancedProteinCNN(
  (embedding): Embedding(25, 128, padding_idx=0)
  (feature_dense): Linear(in_features=3, out_features=128, bias=True)
  (global_feature_dense): Linear(in_features=28, out_features=128, bias=True)
  (initial_conv): Sequential(
    (0): Conv1d(128, 2000, kernel_size=(21,), stride=(1,), padding=same)
    (1): ReLU()
    (2): BatchNorm1d(2000, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  )
  (residual_blocks): ModuleList(
    (0-4): 5 x ResidualBlock(
      (layers): Sequential(
        (0): Conv1d(2000, 1000, kernel_size=(1,), stride=(1,))
        (1): ReLU()
        (2): BatchNorm1d(1000, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (3): Conv1d(1000, 1000, kernel_size=(21,), stride=(1,), padding=same)
        (4): ReLU()
        (5): BatchNorm1d(1000, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (6): Conv1d(1000, 2000, kernel_size=(1,), stride=(1,))
      )
    )

In [6]:
from sklearn.metrics import accuracy_score, f1_score, confusion_matrix, precision_score, recall_score
import warnings
warnings.simplefilter("ignore")

def compute_metrics(all_labels, all_preds, running_loss, num_val):
    # Convert tensors to numpy arrays
    labels_np = torch.cat(all_labels).numpy()
    preds_np = torch.cat(all_preds).numpy()

    # print("labels:", labels_np)
    # print("preds:", preds_np)
    
    # Compute evaluation metrics
    accuracy = accuracy_score(labels_np, preds_np)
    
    # Precision scores
    precision_macro = precision_score(labels_np, preds_np, average='macro')
    precision_micro = precision_score(labels_np, preds_np, average='micro')
    
    # Recall scores
    recall_macro = recall_score(labels_np, preds_np, average='macro')
    recall_micro = recall_score(labels_np, preds_np, average='micro')
    
    # F1 scores
    f1_macro = f1_score(labels_np, preds_np, average='macro')
    f1_micro = f1_score(labels_np, preds_np, average='micro')
    
    # Average loss
    avg_loss = running_loss / num_val
    
    return {
        'loss': avg_loss,
        'accuracy': accuracy,
        'precision_macro': precision_macro,
        'precision_micro': precision_micro,
        'recall_macro': recall_macro,
        'recall_micro': recall_micro,
        'f1_macro': f1_macro,
        'f1_micro': f1_micro,
    }

In [7]:
val_batches = [da.get_batch() for _ in range(num_val)]

def evaluate(model):
    model.eval()  # Set model to evaluation mode

    running_loss = 0.0
    first_running_loss = 0.0
    # end_running_loss = 0.0
    first_all_preds = []
    first_all_labels = []

    # end_all_preds = []
    # end_all_labels = []
    
    for epoch in range(num_val):
        with torch.no_grad():  # Disable gradient computation during evaluation
            tensor_batch = batch_convertor(val_batches[epoch])
            tensor_batch.gpu(device)
        
            first_labels = tensor_batch.taxes["first"]
            # end_labels = tensor_batch.taxes["end"]
            
            outputs = model(
                tensor_batch.seq_ids["batch_encoding"],
                tensor_batch.seq_ids["batch_global_features"],
                tensor_batch.seq_ids["batch_maks"],
            )

            first_loss = criterion(outputs, first_labels)
            # end_loss = criterion(outputs[1], end_labels)
            # Calculate the loss
            # loss = first_loss + end_loss
            loss = first_loss

            running_loss += loss.item()
            first_running_loss += first_loss.item()
            # end_running_loss += end_loss.item()
                
            first_preds = torch.argmax(outputs, dim=1)
            # end_preds = torch.argmax(outputs[1], dim=1)
    
            first_all_preds.append(first_preds.cpu())
            first_all_labels.append(first_labels.cpu())

            # end_all_preds.append(end_preds.cpu())
            # end_all_labels.append(end_labels.cpu())

    return {
        "first": compute_metrics(first_all_labels, first_all_preds, first_running_loss, num_val),
        # "end": compute_metrics(end_all_labels, end_all_preds, end_running_loss, num_val),
        "loss": running_loss / num_val
    }

In [8]:
import glob
def load_checkpoint(model, optimizer=None, scheduler=None):
    checkpoints = glob.glob(os.path.join(checkpoint_dir, 'checkpoint_epoch_*.pt'))        
    # Extract epoch numbers and find latest
    latest_checkpoint = max(checkpoints, key=lambda x: int(x.split('_')[-1].split('.')[0]))
    checkpoint = torch.load(latest_checkpoint)
    
    # Load model state
    model.load_state_dict(checkpoint['model_state_dict'])
    model = model.to(device)
    
    # Load optimizer state if provided (for training)
    if optimizer is not None:
        optimizer.load_state_dict(checkpoint['optimizer_state_dict'])
        # Move optimizer state to GPU if necessary
        for state in optimizer.state.values():
            for k, v in state.items():
                if isinstance(v, torch.Tensor):
                    state[k] = v.to(device)

    scheduler.load_state_dict(checkpoint['scheduler_state_dict'])
    
    # Get training metadata
    epoch = checkpoint['epoch']
    metrics = checkpoint['metrics']
    
    print(f"Successfully loaded checkpoint from epoch {epoch}")
    print("Metrics at checkpoint:", metrics)
    
    return model, optimizer, scheduler, epoch, metrics
        
# model, optimizer, scheduler, latest_epoch, metrics = load_checkpoint(model, optimizer, scheduler)

In [9]:
running_loss = 0
first_running_loss = 0.0
# end_running_loss = 0.0

first_all_preds = []
first_all_labels = []
# end_all_preds = []
# end_all_labels = []

current_lr = lr

for epoch in tqdm(range(epochs)):
    model.train()

    tensor_batch = batch_convertor(da.get_batch())
    tensor_batch.gpu(device)
        
    first_labels = tensor_batch.taxes["first"]
    # end_labels = tensor_batch.taxes["end"]
    
    outputs = model(
        tensor_batch.seq_ids["batch_encoding"],
        tensor_batch.seq_ids["batch_global_features"],
        tensor_batch.seq_ids["batch_maks"],
    )
    
    first_loss = criterion(outputs, first_labels)
    # end_loss = criterion(outputs[1], end_labels)
    # Calculate the loss
    # loss = first_loss + end_loss
    loss = first_loss

    running_loss += loss.item()
    first_running_loss += first_loss.item()
    # end_running_loss += end_loss.item()
    
    optimizer.zero_grad()
    loss.backward()
    optimizer.step()
    
    first_preds = torch.argmax(outputs, dim=1)
    # end_preds = torch.argmax(outputs[1], dim=1)

    first_all_preds.append(first_preds.cpu())
    first_all_labels.append(first_labels.cpu())

    # end_all_preds.append(end_preds.cpu())
    # end_all_labels.append(end_labels.cpu())
    
    if (epoch + 1) % val_epoch == 0:
        train_metrics = {
            "first": compute_metrics(first_all_labels, first_all_preds, first_running_loss, num_val),
            # "end": compute_metrics(end_all_labels, end_all_preds, end_running_loss, num_val),
            "loss": running_loss / num_val
        }
        val_metrics = evaluate(model)
        
        print(f"Epoch [{epoch + 1}/{epochs}]")
        # Training metrics - First
        print("First Position Metrics:")
        print(f"Train Loss: {train_metrics['first']['loss']:.4f}, Train Accuracy: {train_metrics['first']['accuracy']:.4f}")
        print(f"Train Precision (micro): {train_metrics['first']['precision_micro']:.4f}, Train Precision (macro): {train_metrics['first']['precision_macro']:.4f}")
        print(f"Train Recall (micro): {train_metrics['first']['recall_micro']:.4f}, Train Recall (macro): {train_metrics['first']['recall_macro']:.4f}")
        print(f"Train F1 (micro): {train_metrics['first']['f1_micro']:.4f}, Train F1 (macro): {train_metrics['first']['f1_macro']:.4f}")
        print()
        
        # Training metrics - End
        # print("End Position Metrics:")
        # print(f"Train Loss: {train_metrics['end']['loss']:.4f}")
        # print(f"Train Accuracy: {train_metrics['end']['accuracy']:.4f}")
        # print(f"Train Precision (micro): {train_metrics['end']['precision_micro']:.4f}, Train Precision (macro): {train_metrics['end']['precision_macro']:.4f}")
        # print(f"Train Recall (micro): {train_metrics['end']['recall_micro']:.4f}, Train Recall (macro): {train_metrics['end']['recall_macro']:.4f}")
        # print(f"Train F1 (micro): {train_metrics['end']['f1_micro']:.4f}, Train F1 (macro): {train_metrics['end']['f1_macro']:.4f}")
        # print()
        
        # Validation metrics - First
        print("First Position Validation Metrics:")
        print(f"Val Loss: {val_metrics['first']['loss']:.4f}, Val Accuracy: {val_metrics['first']['accuracy']:.4f}")
        print(f"Val Precision (micro): {val_metrics['first']['precision_micro']:.4f}, Val Precision (macro): {val_metrics['first']['precision_macro']:.4f}")
        print(f"Val Recall (micro): {val_metrics['first']['recall_micro']:.4f}, Val Recall (macro): {val_metrics['first']['recall_macro']:.4f}")
        print(f"Val F1 (micro): {val_metrics['first']['f1_micro']:.4f}, Val F1 (macro): {val_metrics['first']['f1_macro']:.4f}")
        print()
        
        # Validation metrics - End
        # print("End Position Validation Metrics:")
        # print(f"Val Loss: {val_metrics['end']['loss']:.4f}, Val Accuracy: {val_metrics['end']['accuracy']:.4f}")
        # print(f"Val Precision (micro): {val_metrics['end']['precision_micro']:.4f}, Val Precision (macro): {val_metrics['end']['precision_macro']:.4f}")
        # print(f"Val Recall (micro): {val_metrics['end']['recall_micro']:.4f}, Val Recall (macro): {val_metrics['end']['recall_macro']:.4f}")
        # print(f"Val F1 (micro): {val_metrics['end']['f1_micro']:.4f}, Val F1 (macro): {val_metrics['end']['f1_macro']:.4f}")
        print("+" * 80)  # Separator
        print(f"Train Loss: {train_metrics['loss']:.4f}, Val Loss: {val_metrics['loss']:.4f}")
        print("-" * 80)  # Separator
        

        metrics = {
            # Training metrics - First
            "train_loss_first": train_metrics['first']['loss'],
            "train_accuracy_first": train_metrics['first']['accuracy'],
            "train_precision_micro_first": train_metrics['first']['precision_micro'],
            "train_precision_macro_first": train_metrics['first']['precision_macro'],
            "train_recall_micro_first": train_metrics['first']['recall_micro'],
            "train_recall_macro_first": train_metrics['first']['recall_macro'],
            "train_f1_micro_first": train_metrics['first']['f1_micro'],
            "train_f1_macro_first": train_metrics['first']['f1_macro'],
            
            # Training metrics - End
            # "train_loss_end": train_metrics['end']['loss'],
            # "train_accuracy_end": train_metrics['end']['accuracy'],
            # "train_precision_micro_end": train_metrics['end']['precision_micro'],
            # "train_precision_macro_end": train_metrics['end']['precision_macro'],
            # "train_recall_micro_end": train_metrics['end']['recall_micro'],
            # "train_recall_macro_end": train_metrics['end']['recall_macro'],
            # "train_f1_micro_end": train_metrics['end']['f1_micro'],
            # "train_f1_macro_end": train_metrics['end']['f1_macro'],
            
            # Validation metrics - First
            "val_loss_first": val_metrics['first']['loss'],
            "val_accuracy_first": val_metrics['first']['accuracy'],
            "val_precision_micro_first": val_metrics['first']['precision_micro'],
            "val_precision_macro_first": val_metrics['first']['precision_macro'],
            "val_recall_micro_first": val_metrics['first']['recall_micro'],
            "val_recall_macro_first": val_metrics['first']['recall_macro'],
            "val_f1_micro_first": val_metrics['first']['f1_micro'],
            "val_f1_macro_first": val_metrics['first']['f1_macro'],
            
            # Validation metrics - End
            # "val_loss_end": val_metrics['end']['loss'],
            # "val_accuracy_end": val_metrics['end']['accuracy'],
            # "val_precision_micro_end": val_metrics['end']['precision_micro'],
            # "val_precision_macro_end": val_metrics['end']['precision_macro'],
            # "val_recall_micro_end": val_metrics['end']['recall_micro'],
            # "val_recall_macro_end": val_metrics['end']['recall_macro'],
            # "val_f1_micro_end": val_metrics['end']['f1_micro'],
            # "val_f1_macro_end": val_metrics['end']['f1_macro'],
            
            # Additional info
            "train_loss": train_metrics["loss"],
            "val_loss": val_metrics["loss"],
            "epoch": epoch + 1,
            "lr": current_lr,
        }
        # Step the scheduler
        scheduler.step(epoch + loss.item())
        current_lr = scheduler.get_last_lr()[0]
        
        # Save periodic checkpoint
        checkpoint_path = os.path.join(checkpoint_dir, f'checkpoint_epoch_{epoch}.pt')
        torch.save({
            'epoch': epoch,
            'model_state_dict': model.state_dict(),
            'optimizer_state_dict': optimizer.state_dict(),
            'scheduler_state_dict': scheduler.state_dict(),
            'metrics': metrics
        }, checkpoint_path)
        
        # Log to wandb
        wandb.log(metrics)
        
        # Reset training metrics
        running_loss = 0
        first_running_loss = 0
        first_all_preds = []
        first_all_labels = []
        # end_all_preds = []
        # end_all_labels = []

wandb.finish()

  1%|          | 999/100000 [31:22<51:53:31,  1.89s/it]

Epoch [1000/100000]
First Position Metrics:
Train Loss: 32.0788, Train Accuracy: 0.0046
Train Precision (micro): 0.0046, Train Precision (macro): 0.0000
Train Recall (micro): 0.0046, Train Recall (macro): 0.0000
Train F1 (micro): 0.0046, Train F1 (macro): 0.0000

First Position Validation Metrics:
Val Loss: 11.2562, Val Accuracy: 0.0053
Val Precision (micro): 0.0053, Val Precision (macro): 0.0000
Val Recall (micro): 0.0053, Val Recall (macro): 0.0001
Val F1 (micro): 0.0053, Val F1 (macro): 0.0000

--------------------------------------------------------------------------------


  2%|▏         | 1999/100000 [1:09:26<51:45:37,  1.90s/it] 

Epoch [2000/100000]
First Position Metrics:
Train Loss: 54.5968, Train Accuracy: 0.0051
Train Precision (micro): 0.0051, Train Precision (macro): 0.0000
Train Recall (micro): 0.0051, Train Recall (macro): 0.0000
Train F1 (micro): 0.0051, Train F1 (macro): 0.0000

First Position Validation Metrics:
Val Loss: 11.1065, Val Accuracy: 0.0053
Val Precision (micro): 0.0053, Val Precision (macro): 0.0000
Val Recall (micro): 0.0053, Val Recall (macro): 0.0001
Val F1 (micro): 0.0053, Val F1 (macro): 0.0000

--------------------------------------------------------------------------------


  3%|▎         | 2999/100000 [1:47:32<50:35:34,  1.88s/it]   

Epoch [3000/100000]
First Position Metrics:
Train Loss: 76.6682, Train Accuracy: 0.0052
Train Precision (micro): 0.0052, Train Precision (macro): 0.0000
Train Recall (micro): 0.0052, Train Recall (macro): 0.0000
Train F1 (micro): 0.0052, Train F1 (macro): 0.0000

First Position Validation Metrics:
Val Loss: 10.9646, Val Accuracy: 0.0053
Val Precision (micro): 0.0053, Val Precision (macro): 0.0000
Val Recall (micro): 0.0053, Val Recall (macro): 0.0001
Val F1 (micro): 0.0053, Val F1 (macro): 0.0000

--------------------------------------------------------------------------------


  4%|▍         | 3999/100000 [2:25:35<51:09:45,  1.92s/it]   

Epoch [4000/100000]
First Position Metrics:
Train Loss: 98.5535, Train Accuracy: 0.0047
Train Precision (micro): 0.0047, Train Precision (macro): 0.0000
Train Recall (micro): 0.0047, Train Recall (macro): 0.0000
Train F1 (micro): 0.0047, Train F1 (macro): 0.0000

First Position Validation Metrics:
Val Loss: 10.7333, Val Accuracy: 0.0040
Val Precision (micro): 0.0040, Val Precision (macro): 0.0000
Val Recall (micro): 0.0040, Val Recall (macro): 0.0001
Val F1 (micro): 0.0040, Val F1 (macro): 0.0000

--------------------------------------------------------------------------------


  5%|▍         | 4999/100000 [3:03:42<49:21:09,  1.87s/it]   

Epoch [5000/100000]
First Position Metrics:
Train Loss: 120.1199, Train Accuracy: 0.0047
Train Precision (micro): 0.0047, Train Precision (macro): 0.0000
Train Recall (micro): 0.0047, Train Recall (macro): 0.0000
Train F1 (micro): 0.0047, Train F1 (macro): 0.0000

First Position Validation Metrics:
Val Loss: 10.6502, Val Accuracy: 0.0053
Val Precision (micro): 0.0053, Val Precision (macro): 0.0000
Val Recall (micro): 0.0053, Val Recall (macro): 0.0001
Val F1 (micro): 0.0053, Val F1 (macro): 0.0000

--------------------------------------------------------------------------------


  6%|▌         | 5999/100000 [3:41:52<49:02:34,  1.88s/it]   

Epoch [6000/100000]
First Position Metrics:
Train Loss: 141.4306, Train Accuracy: 0.0048
Train Precision (micro): 0.0048, Train Precision (macro): 0.0000
Train Recall (micro): 0.0048, Train Recall (macro): 0.0000
Train F1 (micro): 0.0048, Train F1 (macro): 0.0000

First Position Validation Metrics:
Val Loss: 10.6493, Val Accuracy: 0.0053
Val Precision (micro): 0.0053, Val Precision (macro): 0.0000
Val Recall (micro): 0.0053, Val Recall (macro): 0.0001
Val F1 (micro): 0.0053, Val F1 (macro): 0.0000

--------------------------------------------------------------------------------


  7%|▋         | 6999/100000 [4:19:59<48:53:19,  1.89s/it]   

Epoch [7000/100000]
First Position Metrics:
Train Loss: 164.5604, Train Accuracy: 0.0045
Train Precision (micro): 0.0045, Train Precision (macro): 0.0000
Train Recall (micro): 0.0045, Train Recall (macro): 0.0000
Train F1 (micro): 0.0045, Train F1 (macro): 0.0000

First Position Validation Metrics:
Val Loss: 10.5071, Val Accuracy: 0.0053
Val Precision (micro): 0.0053, Val Precision (macro): 0.0000
Val Recall (micro): 0.0053, Val Recall (macro): 0.0001
Val F1 (micro): 0.0053, Val F1 (macro): 0.0000

--------------------------------------------------------------------------------


  8%|▊         | 7999/100000 [4:58:03<47:55:21,  1.88s/it]   

Epoch [8000/100000]
First Position Metrics:
Train Loss: 187.6277, Train Accuracy: 0.0052
Train Precision (micro): 0.0052, Train Precision (macro): 0.0000
Train Recall (micro): 0.0052, Train Recall (macro): 0.0000
Train F1 (micro): 0.0052, Train F1 (macro): 0.0000

First Position Validation Metrics:
Val Loss: 10.4212, Val Accuracy: 0.0047
Val Precision (micro): 0.0047, Val Precision (macro): 0.0000
Val Recall (micro): 0.0047, Val Recall (macro): 0.0001
Val F1 (micro): 0.0047, Val F1 (macro): 0.0000

--------------------------------------------------------------------------------


  9%|▉         | 8999/100000 [5:36:05<47:22:41,  1.87s/it]   

Epoch [9000/100000]
First Position Metrics:
Train Loss: 210.1652, Train Accuracy: 0.0054
Train Precision (micro): 0.0054, Train Precision (macro): 0.0000
Train Recall (micro): 0.0054, Train Recall (macro): 0.0000
Train F1 (micro): 0.0054, Train F1 (macro): 0.0000

First Position Validation Metrics:
Val Loss: 10.3768, Val Accuracy: 0.0040
Val Precision (micro): 0.0040, Val Precision (macro): 0.0000
Val Recall (micro): 0.0040, Val Recall (macro): 0.0001
Val F1 (micro): 0.0040, Val F1 (macro): 0.0000

--------------------------------------------------------------------------------


 10%|▉         | 9999/100000 [6:14:12<47:18:42,  1.89s/it]   

Epoch [10000/100000]
First Position Metrics:
Train Loss: 231.0216, Train Accuracy: 0.0043
Train Precision (micro): 0.0043, Train Precision (macro): 0.0000
Train Recall (micro): 0.0043, Train Recall (macro): 0.0000
Train F1 (micro): 0.0043, Train F1 (macro): 0.0000

First Position Validation Metrics:
Val Loss: 10.3609, Val Accuracy: 0.0040
Val Precision (micro): 0.0040, Val Precision (macro): 0.0000
Val Recall (micro): 0.0040, Val Recall (macro): 0.0001
Val F1 (micro): 0.0040, Val F1 (macro): 0.0000

--------------------------------------------------------------------------------


 11%|█         | 10999/100000 [6:52:20<46:22:29,  1.88s/it]   

Epoch [11000/100000]
First Position Metrics:
Train Loss: 251.7518, Train Accuracy: 0.0042
Train Precision (micro): 0.0042, Train Precision (macro): 0.0000
Train Recall (micro): 0.0042, Train Recall (macro): 0.0000
Train F1 (micro): 0.0042, Train F1 (macro): 0.0000

First Position Validation Metrics:
Val Loss: 10.3603, Val Accuracy: 0.0053
Val Precision (micro): 0.0053, Val Precision (macro): 0.0000
Val Recall (micro): 0.0053, Val Recall (macro): 0.0001
Val F1 (micro): 0.0053, Val F1 (macro): 0.0000

--------------------------------------------------------------------------------


 12%|█▏        | 11999/100000 [7:30:25<45:35:47,  1.87s/it]   

Epoch [12000/100000]
First Position Metrics:
Train Loss: 272.4656, Train Accuracy: 0.0049
Train Precision (micro): 0.0049, Train Precision (macro): 0.0000
Train Recall (micro): 0.0049, Train Recall (macro): 0.0000
Train F1 (micro): 0.0049, Train F1 (macro): 0.0000

First Position Validation Metrics:
Val Loss: 10.2869, Val Accuracy: 0.0047
Val Precision (micro): 0.0047, Val Precision (macro): 0.0000
Val Recall (micro): 0.0047, Val Recall (macro): 0.0001
Val F1 (micro): 0.0047, Val F1 (macro): 0.0000

--------------------------------------------------------------------------------


 13%|█▎        | 12999/100000 [8:08:30<45:35:51,  1.89s/it]   

Epoch [13000/100000]
First Position Metrics:
Train Loss: 293.0500, Train Accuracy: 0.0053
Train Precision (micro): 0.0053, Train Precision (macro): 0.0000
Train Recall (micro): 0.0053, Train Recall (macro): 0.0000
Train F1 (micro): 0.0053, Train F1 (macro): 0.0000

First Position Validation Metrics:
Val Loss: 10.2364, Val Accuracy: 0.0047
Val Precision (micro): 0.0047, Val Precision (macro): 0.0000
Val Recall (micro): 0.0047, Val Recall (macro): 0.0001
Val F1 (micro): 0.0047, Val F1 (macro): 0.0000

--------------------------------------------------------------------------------


 14%|█▍        | 13999/100000 [8:46:32<44:55:33,  1.88s/it]   

Epoch [14000/100000]
First Position Metrics:
Train Loss: 313.4993, Train Accuracy: 0.0051
Train Precision (micro): 0.0051, Train Precision (macro): 0.0000
Train Recall (micro): 0.0051, Train Recall (macro): 0.0000
Train F1 (micro): 0.0051, Train F1 (macro): 0.0000

First Position Validation Metrics:
Val Loss: 10.1991, Val Accuracy: 0.0046
Val Precision (micro): 0.0046, Val Precision (macro): 0.0000
Val Recall (micro): 0.0046, Val Recall (macro): 0.0001
Val F1 (micro): 0.0046, Val F1 (macro): 0.0000

--------------------------------------------------------------------------------


 15%|█▍        | 14999/100000 [9:24:40<44:25:28,  1.88s/it]   

Epoch [15000/100000]
First Position Metrics:
Train Loss: 333.9409, Train Accuracy: 0.0047
Train Precision (micro): 0.0047, Train Precision (macro): 0.0000
Train Recall (micro): 0.0047, Train Recall (macro): 0.0000
Train F1 (micro): 0.0047, Train F1 (macro): 0.0000

First Position Validation Metrics:
Val Loss: 10.1719, Val Accuracy: 0.0040
Val Precision (micro): 0.0040, Val Precision (macro): 0.0000
Val Recall (micro): 0.0040, Val Recall (macro): 0.0001
Val F1 (micro): 0.0040, Val F1 (macro): 0.0000

--------------------------------------------------------------------------------


 16%|█▌        | 15999/100000 [10:02:45<44:23:09,  1.90s/it]  

Epoch [16000/100000]
First Position Metrics:
Train Loss: 354.3143, Train Accuracy: 0.0049
Train Precision (micro): 0.0049, Train Precision (macro): 0.0000
Train Recall (micro): 0.0049, Train Recall (macro): 0.0000
Train F1 (micro): 0.0049, Train F1 (macro): 0.0000

First Position Validation Metrics:
Val Loss: 10.1540, Val Accuracy: 0.0053
Val Precision (micro): 0.0053, Val Precision (macro): 0.0000
Val Recall (micro): 0.0053, Val Recall (macro): 0.0001
Val F1 (micro): 0.0053, Val F1 (macro): 0.0000

--------------------------------------------------------------------------------


 17%|█▋        | 16999/100000 [10:40:47<43:01:19,  1.87s/it]   

Epoch [17000/100000]
First Position Metrics:
Train Loss: 374.5937, Train Accuracy: 0.0050
Train Precision (micro): 0.0050, Train Precision (macro): 0.0000
Train Recall (micro): 0.0050, Train Recall (macro): 0.0000
Train F1 (micro): 0.0050, Train F1 (macro): 0.0000

First Position Validation Metrics:
Val Loss: 10.1410, Val Accuracy: 0.0053
Val Precision (micro): 0.0053, Val Precision (macro): 0.0000
Val Recall (micro): 0.0053, Val Recall (macro): 0.0001
Val F1 (micro): 0.0053, Val F1 (macro): 0.0000

--------------------------------------------------------------------------------


 18%|█▊        | 17999/100000 [11:18:53<42:31:44,  1.87s/it]   

Epoch [18000/100000]
First Position Metrics:
Train Loss: 394.8905, Train Accuracy: 0.0053
Train Precision (micro): 0.0053, Train Precision (macro): 0.0000
Train Recall (micro): 0.0053, Train Recall (macro): 0.0000
Train F1 (micro): 0.0053, Train F1 (macro): 0.0000

First Position Validation Metrics:
Val Loss: 10.1327, Val Accuracy: 0.0053
Val Precision (micro): 0.0053, Val Precision (macro): 0.0000
Val Recall (micro): 0.0053, Val Recall (macro): 0.0001
Val F1 (micro): 0.0053, Val F1 (macro): 0.0000

--------------------------------------------------------------------------------


 19%|█▉        | 18999/100000 [11:56:55<42:04:28,  1.87s/it]   

Epoch [19000/100000]
First Position Metrics:
Train Loss: 415.1665, Train Accuracy: 0.0050
Train Precision (micro): 0.0050, Train Precision (macro): 0.0000
Train Recall (micro): 0.0050, Train Recall (macro): 0.0000
Train F1 (micro): 0.0050, Train F1 (macro): 0.0000

First Position Validation Metrics:
Val Loss: 10.1281, Val Accuracy: 0.0053
Val Precision (micro): 0.0053, Val Precision (macro): 0.0000
Val Recall (micro): 0.0053, Val Recall (macro): 0.0001
Val F1 (micro): 0.0053, Val F1 (macro): 0.0000

--------------------------------------------------------------------------------


 20%|█▉        | 19999/100000 [12:35:03<41:44:50,  1.88s/it]   

Epoch [20000/100000]
First Position Metrics:
Train Loss: 435.4311, Train Accuracy: 0.0050
Train Precision (micro): 0.0050, Train Precision (macro): 0.0000
Train Recall (micro): 0.0050, Train Recall (macro): 0.0000
Train F1 (micro): 0.0050, Train F1 (macro): 0.0000

First Position Validation Metrics:
Val Loss: 10.1262, Val Accuracy: 0.0053
Val Precision (micro): 0.0053, Val Precision (macro): 0.0000
Val Recall (micro): 0.0053, Val Recall (macro): 0.0001
Val F1 (micro): 0.0053, Val F1 (macro): 0.0000

--------------------------------------------------------------------------------


 21%|██        | 20999/100000 [13:13:10<40:52:53,  1.86s/it]   

Epoch [21000/100000]
First Position Metrics:
Train Loss: 455.6756, Train Accuracy: 0.0049
Train Precision (micro): 0.0049, Train Precision (macro): 0.0000
Train Recall (micro): 0.0049, Train Recall (macro): 0.0000
Train F1 (micro): 0.0049, Train F1 (macro): 0.0000

First Position Validation Metrics:
Val Loss: 10.1260, Val Accuracy: 0.0053
Val Precision (micro): 0.0053, Val Precision (macro): 0.0000
Val Recall (micro): 0.0053, Val Recall (macro): 0.0001
Val F1 (micro): 0.0053, Val F1 (macro): 0.0000

--------------------------------------------------------------------------------


 22%|██▏       | 21999/100000 [13:51:18<40:51:03,  1.89s/it]   

Epoch [22000/100000]
First Position Metrics:
Train Loss: 475.9676, Train Accuracy: 0.0048
Train Precision (micro): 0.0048, Train Precision (macro): 0.0000
Train Recall (micro): 0.0048, Train Recall (macro): 0.0000
Train F1 (micro): 0.0048, Train F1 (macro): 0.0000

First Position Validation Metrics:
Val Loss: 10.1238, Val Accuracy: 0.0040
Val Precision (micro): 0.0040, Val Precision (macro): 0.0000
Val Recall (micro): 0.0040, Val Recall (macro): 0.0001
Val F1 (micro): 0.0040, Val F1 (macro): 0.0000

--------------------------------------------------------------------------------


 23%|██▎       | 22999/100000 [14:29:25<39:55:34,  1.87s/it]   

Epoch [23000/100000]
First Position Metrics:
Train Loss: 496.2106, Train Accuracy: 0.0045
Train Precision (micro): 0.0045, Train Precision (macro): 0.0000
Train Recall (micro): 0.0045, Train Recall (macro): 0.0000
Train F1 (micro): 0.0045, Train F1 (macro): 0.0000

First Position Validation Metrics:
Val Loss: 10.1192, Val Accuracy: 0.0039
Val Precision (micro): 0.0039, Val Precision (macro): 0.0000
Val Recall (micro): 0.0039, Val Recall (macro): 0.0001
Val F1 (micro): 0.0039, Val F1 (macro): 0.0000

--------------------------------------------------------------------------------


 24%|██▍       | 23999/100000 [15:07:32<40:08:38,  1.90s/it]   

Epoch [24000/100000]
First Position Metrics:
Train Loss: 516.4778, Train Accuracy: 0.0053
Train Precision (micro): 0.0053, Train Precision (macro): 0.0000
Train Recall (micro): 0.0053, Train Recall (macro): 0.0000
Train F1 (micro): 0.0053, Train F1 (macro): 0.0000

First Position Validation Metrics:
Val Loss: 10.1127, Val Accuracy: 0.0053
Val Precision (micro): 0.0053, Val Precision (macro): 0.0000
Val Recall (micro): 0.0053, Val Recall (macro): 0.0001
Val F1 (micro): 0.0053, Val F1 (macro): 0.0000

--------------------------------------------------------------------------------


 25%|██▍       | 24999/100000 [15:45:38<39:06:51,  1.88s/it]   

Epoch [25000/100000]
First Position Metrics:
Train Loss: 536.7438, Train Accuracy: 0.0046
Train Precision (micro): 0.0046, Train Precision (macro): 0.0000
Train Recall (micro): 0.0046, Train Recall (macro): 0.0000
Train F1 (micro): 0.0046, Train F1 (macro): 0.0000

First Position Validation Metrics:
Val Loss: 10.1077, Val Accuracy: 0.0053
Val Precision (micro): 0.0053, Val Precision (macro): 0.0000
Val Recall (micro): 0.0053, Val Recall (macro): 0.0001
Val F1 (micro): 0.0053, Val F1 (macro): 0.0000

--------------------------------------------------------------------------------


 26%|██▌       | 25999/100000 [16:23:47<38:17:14,  1.86s/it]   

Epoch [26000/100000]
First Position Metrics:
Train Loss: 558.4477, Train Accuracy: 0.0047
Train Precision (micro): 0.0047, Train Precision (macro): 0.0000
Train Recall (micro): 0.0047, Train Recall (macro): 0.0000
Train F1 (micro): 0.0047, Train F1 (macro): 0.0000

First Position Validation Metrics:
Val Loss: 10.1023, Val Accuracy: 0.0040
Val Precision (micro): 0.0040, Val Precision (macro): 0.0000
Val Recall (micro): 0.0040, Val Recall (macro): 0.0001
Val F1 (micro): 0.0040, Val F1 (macro): 0.0000

--------------------------------------------------------------------------------


 27%|██▋       | 26999/100000 [17:01:53<38:21:07,  1.89s/it]   

Epoch [27000/100000]
First Position Metrics:
Train Loss: 578.6732, Train Accuracy: 0.0053
Train Precision (micro): 0.0053, Train Precision (macro): 0.0000
Train Recall (micro): 0.0053, Train Recall (macro): 0.0000
Train F1 (micro): 0.0053, Train F1 (macro): 0.0000

First Position Validation Metrics:
Val Loss: 10.1001, Val Accuracy: 0.0053
Val Precision (micro): 0.0053, Val Precision (macro): 0.0000
Val Recall (micro): 0.0053, Val Recall (macro): 0.0001
Val F1 (micro): 0.0053, Val F1 (macro): 0.0000

--------------------------------------------------------------------------------


 28%|██▊       | 27999/100000 [17:39:57<37:24:43,  1.87s/it]   

Epoch [28000/100000]
First Position Metrics:
Train Loss: 598.8594, Train Accuracy: 0.0057
Train Precision (micro): 0.0057, Train Precision (macro): 0.0000
Train Recall (micro): 0.0057, Train Recall (macro): 0.0000
Train F1 (micro): 0.0057, Train F1 (macro): 0.0000

First Position Validation Metrics:
Val Loss: 10.0976, Val Accuracy: 0.0053
Val Precision (micro): 0.0053, Val Precision (macro): 0.0000
Val Recall (micro): 0.0053, Val Recall (macro): 0.0001
Val F1 (micro): 0.0053, Val F1 (macro): 0.0000

--------------------------------------------------------------------------------


 29%|██▉       | 28999/100000 [18:18:03<37:25:02,  1.90s/it]   

Epoch [29000/100000]
First Position Metrics:
Train Loss: 619.0754, Train Accuracy: 0.0052
Train Precision (micro): 0.0052, Train Precision (macro): 0.0000
Train Recall (micro): 0.0052, Train Recall (macro): 0.0000
Train F1 (micro): 0.0052, Train F1 (macro): 0.0000

First Position Validation Metrics:
Val Loss: 10.0953, Val Accuracy: 0.0046
Val Precision (micro): 0.0046, Val Precision (macro): 0.0000
Val Recall (micro): 0.0046, Val Recall (macro): 0.0001
Val F1 (micro): 0.0046, Val F1 (macro): 0.0000

--------------------------------------------------------------------------------


 30%|██▉       | 29999/100000 [18:56:05<36:31:13,  1.88s/it]   

Epoch [30000/100000]
First Position Metrics:
Train Loss: 639.3178, Train Accuracy: 0.0047
Train Precision (micro): 0.0047, Train Precision (macro): 0.0000
Train Recall (micro): 0.0047, Train Recall (macro): 0.0000
Train F1 (micro): 0.0047, Train F1 (macro): 0.0000

First Position Validation Metrics:
Val Loss: 10.0904, Val Accuracy: 0.0053
Val Precision (micro): 0.0053, Val Precision (macro): 0.0000
Val Recall (micro): 0.0053, Val Recall (macro): 0.0001
Val F1 (micro): 0.0053, Val F1 (macro): 0.0000

--------------------------------------------------------------------------------


 31%|███       | 30999/100000 [19:38:50<43:44:00,  2.28s/it]   


KeyboardInterrupt: 