# Data Preprocessing

In [66]:
import torch
import torchvision
import torchvision.transforms as transforms
from torch.utils.data import Dataset, DataLoader
import random
import torch.nn as nn
import torch.nn.functional as F
from sklearn.metrics import classification_report
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")



def load_and_shuffle_mnist():
    transform = transforms.Compose([transforms.ToTensor(), transforms.Normalize((0.5,), (0.5, ))])
    mnist_train = torchvision.datasets.MNIST(root='./data', train=True, transform=transform, download=True)
    mnist_test = torchvision.datasets.MNIST(root='./data', train=False, transform=transform, download=True)

    train_data = list(mnist_train)
    test_data = list(mnist_test)

    random.shuffle(train_data)
    random.shuffle(test_data)

    return train_data, test_data

In [67]:
def create_sequences(mnist_data, window_size):
    sequences = []
    labels = []

    for i in range(len(mnist_data) - window_size + 1):
        window = mnist_data[i:i + window_size]
        sequence_images = torch.stack([img for img, _ in window])

        # Default label is 'null' (using a specific number to represent 'null', e.g., 10)
        label = 10  # Assuming 10 represents 'null'

        # Check for the condition in the last window_size elements
        last_digit = window[-1][1]
        for _, prev_label in window[:-1]:
            if prev_label == last_digit:
                label = last_digit  # Label is the digit itself if condition is met
                break

        sequences.append(sequence_images)
        labels.append(label)

    return sequences, labels

In [68]:
class MNISTSequenceDataset(Dataset):
    def __init__(self, sequences, labels):
        self.sequences = sequences
        self.labels = labels  # Labels are already numerical, no need for a map

    def __len__(self):
        return len(self.sequences)

    def __getitem__(self, idx):
        sequence = self.sequences[idx]
        # Ensure label is a tensor of dtype torch.long
        label_tensor = torch.tensor(self.labels[idx], dtype=torch.long)
        return sequence, label_tensor

# LSMT Definition

In [69]:

class SimpleLSTM(nn.Module):
    def __init__(self, input_size, hidden_size, num_classes):
        super(SimpleLSTM, self).__init__()
        self.hidden_size = hidden_size
        self.lstm = nn.LSTM(input_size, hidden_size, batch_first=True)
        self.fc = nn.Linear(hidden_size, num_classes)
        self.dropout = nn.Dropout(0.2)


    def forward(self, x):
        x = x.view(-1, window_size, 28*28)  # Reshape x
        out, _ = self.lstm(x)
        out = self.fc(out[:, -1, :])
        return out


In [70]:
def balanced_sampling(mnist_data, size, num_classes=11):
    print('balanced_sampling')
    # Group data by labels
    grouped_data = {label: [] for label in range(num_classes)}
    for img, label in mnist_data:
        grouped_data[label].append((img, label))

    # Calculate the number of samples per class
    samples_per_class = size // num_classes

    # Initialize list for sampled data
    sampled_data = []

    for label, data in grouped_data.items():
        data_len = len(data)
        if data_len == 0:
            print(f"Warning: No data for class {label}. Skipping this class.")
            continue
        if data_len >= samples_per_class:
            sampled_data.extend(random.sample(data, samples_per_class))
        else:
            # Efficiently replicate data to meet the required number of samples
            repeats = samples_per_class // data_len
            remainder = samples_per_class % data_len
            sampled_data.extend(data * repeats + random.sample(data, remainder))

    # Shuffle the final dataset using Python's random.shuffle for compatibility
    random.shuffle(sampled_data)
    print('Done sampling.')
    return sampled_data


# Train and Testing

In [64]:
import time
import numpy as np
from sklearn.metrics import f1_score


def train_model(model, train_loader, criterion, optimizer, num_epochs,test_loader):
    model = model.to(device)  # Move model to GPU

    for epoch in range(num_epochs):
        epoch_start_time = time.time()
        logger = {'time': {}, 'loss': {}, 'F1': {}, 'Accuracy': {}}
        print(f"Starting Epoch {epoch + 1}/{num_epochs}")  # Print at the start of each epoch


        for i, (sequences, labels) in enumerate(train_loader):
            sequences, labels = sequences.to(device), labels.to(device)  # Move data to GPU

            # Forward pass
            outputs = model(sequences)
            loss = criterion(outputs, labels)

            # Backward and optimize
            optimizer.zero_grad()
            loss.backward()
            optimizer.step()

            # Logging
            if i % 100 == 0:
                logger['time'][i] = time.time() - epoch_start_time
                logger['loss'][i] = loss.item()
                print(f"Epoch {epoch + 1}, Step {i}, Loss: {loss.item()}, Time Elapsed: {time.time() - epoch_start_time} seconds")


        epoch_end_time = time.time()
        print(f"Epoch {epoch + 1}\n")
        print(f"Writing snapshot to model_iter_{(epoch + 1) * len(train_loader):06d}.mdl")
        print(f"Epoch time: {epoch_end_time - epoch_start_time}")

        # Evaluate at the end of the epoch
        f1_score, accuracy = evaluate_model(model, test_loader)
        logger['F1'][len(test_loader) * (epoch + 1)] = f1_score
        logger['Accuracy'][len(test_loader) * (epoch + 1)] = accuracy

        # Print detailed logger information
        print("Logger", logger)

# Update the evaluate_model function to return F1 score and accuracy
def evaluate_model(model, test_loader):
    model.eval()
    correct = 0
    total = 0
    all_predictions = []
    all_labels = []
    with torch.no_grad():
        for sequences, labels in test_loader:
            outputs = model(sequences)
            _, predicted = torch.max(outputs.data, 1)
            total += labels.size(0)
            correct += (predicted == labels).sum().item()
            all_predictions.extend(predicted.cpu().numpy())
            all_labels.extend(labels.cpu().numpy())

    print(classification_report(all_labels, all_predictions))

    # Compute F1 score and accuracy
    f1_score_value = f1_score(all_labels, all_predictions, average='macro')  # Change average as needed
    accuracy = correct / total  # Corrected accuracy calculation
    return f1_score_value, accuracy

In [61]:
# Load and shuffle MNIST dataset
train_data, test_data = load_and_shuffle_mnist()

# Set LSTM parameters
input_size = 784  # 28x28
hidden_size = 128
num_classes = 11  # Digits 0-9 and 'null'
num_epochs = 10
batch_size = 100
learning_rate = 0.001
window_size = 10  # Example window size

# Create model
model = SimpleLSTM(input_size, hidden_size, num_classes)
model = model.to(device)

criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate)

# Iterate over different dataset sizes
for size in [1000, 5000, 10000, 20000, 30000, 40000, 50000]:
    print(f"Training with dataset size: {size}")

    # Balanced sampling
    sampled_train_data = balanced_sampling(train_data, size)
    sampled_test_data = balanced_sampling(test_data, size // 2)
    print('Done sample data.')

    # Create sequences
    train_sequences, train_labels = create_sequences(sampled_train_data, window_size)
    test_sequences, test_labels = create_sequences(sampled_test_data, window_size)

    # Create data loaders
    train_dataset = MNISTSequenceDataset(train_sequences, train_labels)
    test_dataset = MNISTSequenceDataset(test_sequences, test_labels)

    train_loader = DataLoader(dataset=train_dataset, batch_size=batch_size, shuffle=True)
    test_loader = DataLoader(dataset=test_dataset, batch_size=batch_size, shuffle=False)

    # Train and evaluate the model
    train_model(model, train_loader, criterion, optimizer, num_epochs, test_loader)


Training with dataset size: 1000
balanced_sampling
Done sampling.
balanced_sampling
Done sampling.
Done sample data.
Starting Epoch 1/10
Epoch 1, Step 0, Loss: 2.2875936031341553, Time Elapsed: 0.1277780532836914 seconds
Epoch 1

Writing snapshot to model_iter_000009.mdl
Epoch time: 0.9980049133300781
              precision    recall  f1-score   support

           0       0.00      0.00      0.00        28
           1       0.00      0.00      0.00        28
           2       0.00      0.00      0.00        29
           3       0.00      0.00      0.00        25
           4       0.00      0.00      0.00        22
           5       0.00      0.00      0.00        22
           6       0.00      0.00      0.00        26
           7       0.00      0.00      0.00        27
           8       0.00      0.00      0.00        27
           9       0.00      0.00      0.00        28
          10       0.41      1.00      0.58       179

    accuracy                           0.41    

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


Epoch 2

Writing snapshot to model_iter_000018.mdl
Epoch time: 0.6590173244476318
              precision    recall  f1-score   support

           0       0.00      0.00      0.00        28
           1       0.00      0.00      0.00        28
           2       0.00      0.00      0.00        29
           3       0.00      0.00      0.00        25
           4       0.00      0.00      0.00        22
           5       0.00      0.00      0.00        22
           6       0.00      0.00      0.00        26
           7       0.00      0.00      0.00        27
           8       0.00      0.00      0.00        27
           9       0.00      0.00      0.00        28
          10       0.41      1.00      0.58       179

    accuracy                           0.41       441
   macro avg       0.04      0.09      0.05       441
weighted avg       0.16      0.41      0.23       441

Logger {'time': {0: 0.06718325614929199}, 'loss': {0: 1.961031436920166}, 'F1': {10: 0.05249266862170088}

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


Epoch 3

Writing snapshot to model_iter_000027.mdl
Epoch time: 0.4717540740966797
              precision    recall  f1-score   support

           0       0.00      0.00      0.00        28
           1       0.00      0.00      0.00        28
           2       0.00      0.00      0.00        29
           3       0.00      0.00      0.00        25
           4       0.00      0.00      0.00        22
           5       0.00      0.00      0.00        22
           6       0.50      0.04      0.07        26
           7       0.00      0.00      0.00        27
           8       0.00      0.00      0.00        27
           9       0.00      0.00      0.00        28
          10       0.41      0.99      0.58       179

    accuracy                           0.41       441
   macro avg       0.08      0.09      0.06       441
weighted avg       0.19      0.41      0.24       441

Logger {'time': {0: 0.05473208427429199}, 'loss': {0: 1.7852561473846436}, 'F1': {15: 0.05886185012398604

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


Epoch 4

Writing snapshot to model_iter_000036.mdl
Epoch time: 0.4778261184692383
              precision    recall  f1-score   support

           0       0.43      0.11      0.17        28
           1       0.00      0.00      0.00        28
           2       0.00      0.00      0.00        29
           3       0.00      0.00      0.00        25
           4       0.00      0.00      0.00        22
           5       0.00      0.00      0.00        22
           6       0.60      0.12      0.19        26
           7       0.00      0.00      0.00        27
           8       0.00      0.00      0.00        27
           9       0.00      0.00      0.00        28
          10       0.40      0.97      0.57       179

    accuracy                           0.41       441
   macro avg       0.13      0.11      0.08       441
weighted avg       0.23      0.41      0.25       441

Logger {'time': {0: 0.04736614227294922}, 'loss': {0: 1.6081671714782715}, 'F1': {20: 0.0849141732630697}

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


Epoch 5

Writing snapshot to model_iter_000045.mdl
Epoch time: 0.471494197845459
              precision    recall  f1-score   support

           0       0.67      0.29      0.40        28
           1       0.75      0.21      0.33        28
           2       0.00      0.00      0.00        29
           3       0.00      0.00      0.00        25
           4       0.00      0.00      0.00        22
           5       0.00      0.00      0.00        22
           6       0.67      0.31      0.42        26
           7       0.40      0.07      0.12        27
           8       0.00      0.00      0.00        27
           9       0.00      0.00      0.00        28
          10       0.41      0.93      0.57       179

    accuracy                           0.43       441
   macro avg       0.26      0.16      0.17       441
weighted avg       0.32      0.43      0.31       441

Logger {'time': {0: 0.05804610252380371}, 'loss': {0: 1.4979350566864014}, 'F1': {25: 0.16807765749943238}

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


Epoch 6

Writing snapshot to model_iter_000054.mdl
Epoch time: 0.47171783447265625
              precision    recall  f1-score   support

           0       0.57      0.61      0.59        28
           1       0.60      0.75      0.67        28
           2       0.67      0.07      0.12        29
           3       0.00      0.00      0.00        25
           4       0.67      0.18      0.29        22
           5       0.50      0.05      0.08        22
           6       0.58      0.42      0.49        26
           7       0.47      0.30      0.36        27
           8       1.00      0.04      0.07        27
           9       0.50      0.04      0.07        28
          10       0.40      0.72      0.51       179

    accuracy                           0.44       441
   macro avg       0.54      0.29      0.30       441
weighted avg       0.49      0.44      0.37       441

Logger {'time': {0: 0.0497589111328125}, 'loss': {0: 1.401717185974121}, 'F1': {30: 0.2952274392953682},

# Train without batches

In [72]:
import torch
import torchvision
import torchvision.transforms as transforms
from torch.utils.data import Dataset
import random
import torch.nn as nn
import torch.nn.functional as F
from sklearn.metrics import classification_report, f1_score


def train_model(model, train_data, criterion, optimizer, num_epochs, test_data):
    for epoch in range(num_epochs):
        epoch_start_time = time.time()
        logger = {'time': {}, 'loss': {}, 'F1': {}, 'Accuracy': {}}

        # Iterate over each data point in the training data
        for i, (sequence, label) in enumerate(train_data):
            # Convert label to tensor if it's not already
            if not isinstance(label, torch.Tensor):
                label = torch.tensor(label, dtype=torch.long)

            # Forward pass
            outputs = model(sequence.unsqueeze(0))
            loss = criterion(outputs, label.unsqueeze(0))

            # Backward and optimize
            optimizer.zero_grad()
            loss.backward()
            optimizer.step()

            # Logging
            if i % 100 == 0:
                logger['time'][i] = time.time() - epoch_start_time
                logger['loss'][i] = loss.item()

        epoch_end_time = time.time()
        print(f"Epoch {epoch + 1}\n")
        print(f"Epoch time: {epoch_end_time - epoch_start_time}")

        # Evaluate at the end of the epoch
        f1_score_value, accuracy = evaluate_model(model, test_data)
        logger['F1'][epoch + 1] = f1_score_value
        logger['Accuracy'][epoch + 1] = accuracy

        # Print detailed logger information
        print("Logger", logger)

def evaluate_model(model, test_data):
    model.eval()
    correct = 0
    total = 0
    all_predictions = []
    all_labels = []

    with torch.no_grad():
        for sequence, label in test_data:
            # Ensure label is a tensor
            if not isinstance(label, torch.Tensor):
                label = torch.tensor(label, dtype=torch.long)

            outputs = model(sequence.unsqueeze(0))
            _, predicted = torch.max(outputs.data, 1)

            total += 1  # Increment total for each label
            correct += (predicted == label).sum().item()
            all_predictions.append(predicted.item())
            all_labels.append(label.item())

    print(classification_report(all_labels, all_predictions))
    f1_score_value = f1_score(all_labels, all_predictions, average='macro')
    accuracy = correct / total
    return f1_score_value, accuracy

# Load and shuffle MNIST dataset
train_data, test_data = load_and_shuffle_mnist()

# Set LSTM parameters
input_size = 784  # 28x28
hidden_size = 128
num_classes = 11  # Digits 0-9 and 'null'
num_epochs = 10
batch_size = 100
learning_rate = 0.001
window_size = 10  # Example window size

# Create model
model = SimpleLSTM(input_size, hidden_size, num_classes)
model = model.to(device)

criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate)
# Example of training with a single data point
for size in [1000, 5000, 10000, 20000, 30000, 40000, 50000]:
    print(f"Training with dataset size: {size}")
    sampled_train_data = balanced_sampling(train_data, size)
    sampled_test_data = balanced_sampling(test_data, size // 2)

    train_sequences, train_labels = create_sequences(sampled_train_data, window_size)
    test_sequences, test_labels = create_sequences(sampled_test_data, window_size)

    # Convert sequences and labels into a list of tuples for easier iteration
    train_data_tuples = list(zip(train_sequences, train_labels))
    test_data_tuples = list(zip(test_sequences, test_labels))

    train_model(model, train_data_tuples, criterion, optimizer, num_epochs, test_data_tuples)


Training with dataset size: 1000
balanced_sampling
Done sampling.
balanced_sampling
Done sampling.
Epoch 1

Epoch time: 4.595603942871094


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


              precision    recall  f1-score   support

           0       0.59      0.59      0.59        27
           1       0.00      0.00      0.00        28
           2       0.00      0.00      0.00        29
           3       0.00      0.00      0.00        27
           4       0.00      0.00      0.00        28
           5       0.00      0.00      0.00        30
           6       0.00      0.00      0.00        25
           7       0.00      0.00      0.00        28
           8       0.00      0.00      0.00        27
           9       0.00      0.00      0.00        28
          10       0.37      0.93      0.53       164

    accuracy                           0.38       441
   macro avg       0.09      0.14      0.10       441
weighted avg       0.17      0.38      0.23       441

Logger {'time': {0: 0.015468835830688477, 100: 0.5594398975372314, 200: 1.0551037788391113, 300: 1.6020641326904297, 400: 2.1452648639678955, 500: 2.70550274848938, 600: 3.195931673049926

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


              precision    recall  f1-score   support

           0       0.58      0.56      0.57        27
           1       0.00      0.00      0.00        28
           2       0.60      0.10      0.18        29
           3       0.50      0.15      0.23        27
           4       0.59      0.36      0.44        28
           5       0.00      0.00      0.00        30
           6       0.65      0.60      0.63        25
           7       0.00      0.00      0.00        28
           8       0.37      0.26      0.30        27
           9       0.00      0.00      0.00        28
          10       0.36      0.75      0.49       164

    accuracy                           0.40       441
   macro avg       0.33      0.25      0.26       441
weighted avg       0.34      0.40      0.32       441

Logger {'time': {0: 0.006639242172241211, 100: 0.4898984432220459, 200: 0.9854342937469482, 300: 1.4615452289581299, 400: 2.018139362335205, 500: 2.5223193168640137, 600: 3.05710721015930

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


              precision    recall  f1-score   support

           0       0.54      0.70      0.61        27
           1       0.80      0.43      0.56        28
           2       0.75      0.31      0.44        29
           3       0.50      0.41      0.45        27
           4       0.67      0.36      0.47        28
           5       1.00      0.03      0.06        30
           6       0.59      0.68      0.63        25
           7       0.62      0.36      0.45        28
           8       0.41      0.52      0.46        27
           9       0.00      0.00      0.00        28
          10       0.36      0.58      0.45       164

    accuracy                           0.45       441
   macro avg       0.57      0.40      0.42       441
weighted avg       0.51      0.45      0.42       441

Logger {'time': {0: 0.005797863006591797, 100: 0.510612964630127, 200: 0.967383861541748, 300: 1.4508631229400635, 400: 1.9122979640960693, 500: 2.401071071624756, 600: 2.860825777053833,

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


              precision    recall  f1-score   support

           0       0.61      0.81      0.70        27
           1       0.00      0.00      0.00        28
           2       0.74      0.59      0.65        29
           3       0.47      0.26      0.33        27
           4       0.56      0.36      0.43        28
           5       0.50      0.07      0.12        30
           6       0.64      0.64      0.64        25
           7       0.58      0.39      0.47        28
           8       0.58      0.56      0.57        27
           9       0.60      0.11      0.18        28
          10       0.37      0.60      0.46       164

    accuracy                           0.46       441
   macro avg       0.51      0.40      0.41       441
weighted avg       0.47      0.46      0.42       441

Logger {'time': {0: 0.006541013717651367, 100: 0.5629189014434814, 200: 1.082840919494629, 300: 1.577406883239746, 400: 2.0495429039001465, 500: 2.521696090698242, 600: 3.042526960372925,

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


              precision    recall  f1-score   support

           0       0.60      0.78      0.68        27
           1       0.00      0.00      0.00        28
           2       0.74      0.59      0.65        29
           3       0.62      0.56      0.59        27
           4       0.62      0.29      0.39        28
           5       0.57      0.13      0.22        30
           6       0.70      0.56      0.62        25
           7       0.60      0.43      0.50        28
           8       0.58      0.56      0.57        27
           9       0.56      0.18      0.27        28
          10       0.38      0.62      0.47       164

    accuracy                           0.48       441
   macro avg       0.54      0.43      0.45       441
weighted avg       0.49      0.48      0.45       441

Logger {'time': {0: 0.006830930709838867, 100: 0.5513668060302734, 200: 1.0657899379730225, 300: 1.5965559482574463, 400: 2.1696767807006836, 500: 2.69647479057312, 600: 3.225852966308593

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


              precision    recall  f1-score   support

           0       0.58      0.70      0.63        27
           1       0.00      0.00      0.00        28
           2       0.71      0.69      0.70        29
           3       0.60      0.44      0.51        27
           4       0.58      0.25      0.35        28
           5       0.62      0.17      0.26        30
           6       0.60      0.60      0.60        25
           7       0.64      0.32      0.43        28
           8       0.52      0.48      0.50        27
           9       0.56      0.36      0.43        28
          10       0.37      0.58      0.45       164

    accuracy                           0.46       441
   macro avg       0.53      0.42      0.44       441
weighted avg       0.48      0.46      0.44       441

Logger {'time': {0: 0.0058460235595703125, 100: 0.5240237712860107, 200: 1.0793049335479736, 300: 1.631138801574707, 400: 2.1448137760162354, 500: 2.6149168014526367, 600: 3.0985689163208

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


              precision    recall  f1-score   support

           0       0.58      0.78      0.67        27
           1       0.00      0.00      0.00        28
           2       0.73      0.66      0.69        29
           3       0.65      0.56      0.60        27
           4       0.69      0.32      0.44        28
           5       0.60      0.10      0.17        30
           6       0.56      0.40      0.47        25
           7       0.62      0.46      0.53        28
           8       0.56      0.52      0.54        27
           9       0.60      0.32      0.42        28
          10       0.38      0.60      0.46       164

    accuracy                           0.48       441
   macro avg       0.54      0.43      0.45       441
weighted avg       0.49      0.48      0.45       441

Logger {'time': {0: 0.00569605827331543, 100: 0.5752592086791992, 200: 1.130937099456787, 300: 1.626847267150879, 400: 2.136575222015381, 500: 2.6297051906585693, 600: 3.130347967147827, 

KeyboardInterrupt: 

In [76]:
import torch
import torchvision
import torchvision.transforms as transforms
from torch.utils.data import Dataset
import random
import torch.nn as nn
import torch.nn.functional as F
from sklearn.metrics import classification_report, f1_score


def train_model(model, train_data, criterion, optimizer, num_epochs, test_data, eval_interval=100):
    logger = {'time': {}, 'loss': {}, 'F1': {}, 'Accuracy': {}}

    for epoch in range(num_epochs):
        epoch_start_time = time.time()

        for i, (sequence, label) in enumerate(train_data):
            # Convert sequence and label to tensors if they are not already
            if not isinstance(sequence, torch.Tensor):
                sequence = torch.tensor(sequence, dtype=torch.float32)
            if not isinstance(label, torch.Tensor):
                label = torch.tensor(label, dtype=torch.long)

            # Forward pass
            outputs = model(sequence.unsqueeze(0))
            loss = criterion(outputs, label.unsqueeze(0))

            # Backward and optimize
            optimizer.zero_grad()
            loss.backward()
            optimizer.step()

            # Logging
            if i % 100 == 0:
                logger['time'][i] = time.time() - epoch_start_time
                logger['loss'][i] = loss.item()

                # Evaluate model performance on test data
                f1_score_value, accuracy = evaluate_model(model, test_data)
                logger['F1'][i] = f1_score_value
                logger['Accuracy'][i] = accuracy

                print(f"Epoch {epoch + 1}, Step {i}, Loss: {loss.item()}, F1: {f1_score_value}, Accuracy: {accuracy}, Time Elapsed: {time.time() - epoch_start_time} seconds")

        epoch_end_time = time.time()
        print(f"Epoch {epoch + 1} completed. Time: {epoch_end_time - epoch_start_time}")

        # Print detailed logger information
        print("Logger", logger)
        
from sklearn.metrics import confusion_matrix, classification_report, f1_score
import numpy as np

def evaluate_model(model, test_data):
    model.eval()
    all_predictions = []
    all_labels = []

    with torch.no_grad():
        for sequence, label in test_data:
            # Ensure label is a tensor
            if not isinstance(label, torch.Tensor):
                label = torch.tensor(label, dtype=torch.long)

            outputs = model(sequence.unsqueeze(0))
            _, predicted = torch.max(outputs.data, 1)
            
            all_predictions.append(predicted.item())
            all_labels.append(label.item())

    # Calculate confusion matrix and metrics
    conf_matrix = confusion_matrix(all_labels, all_predictions)
    accuracy = np.trace(conf_matrix) / np.sum(conf_matrix)
    f1_score_value = f1_score(all_labels, all_predictions, average='macro')

    # Print classification report
    print(classification_report(all_labels, all_predictions))

    return f1_score_value, accuracy

# Load and shuffle MNIST dataset
train_data, test_data = load_and_shuffle_mnist()

# Set LSTM parameters
input_size = 784  # 28x28
hidden_size = 128
num_classes = 11  # Digits 0-9 and 'null'
num_epochs = 5
batch_size = 100
learning_rate = 0.001
window_size = 10  # Example window size

# Create model
model = SimpleLSTM(input_size, hidden_size, num_classes)
model = model.to(device)

criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate)
# Example of training with a single data point
for size in [32000]:
    print(f"Training with dataset size: {size}")
    sampled_train_data = balanced_sampling(train_data, size)
    sampled_test_data = balanced_sampling(test_data, size // 2)

    train_sequences, train_labels = create_sequences(sampled_train_data, window_size)
    test_sequences, test_labels = create_sequences(sampled_test_data, window_size)

    # Convert sequences and labels into a list of tuples for easier iteration
    train_data_tuples = list(zip(train_sequences, train_labels))
    test_data_tuples = list(zip(test_sequences, test_labels))

    train_model(model, train_data_tuples, criterion, optimizer, num_epochs, test_data_tuples)


Training with dataset size: 32000
balanced_sampling
Done sampling.
balanced_sampling
Done sampling.


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


              precision    recall  f1-score   support

           0       0.06      1.00      0.12       912
           1       0.00      0.00      0.00       885
           2       0.00      0.00      0.00       877
           3       0.00      0.00      0.00       897
           4       0.00      0.00      0.00       892
           5       0.00      0.00      0.00       862
           6       0.00      0.00      0.00       903
           7       0.00      0.00      0.00       889
           8       0.00      0.00      0.00       892
           9       0.00      0.00      0.00       876
          10       0.00      0.00      0.00      5646

    accuracy                           0.06     14531
   macro avg       0.01      0.09      0.01     14531
weighted avg       0.00      0.06      0.01     14531

Epoch 1, Step 0, Loss: 2.4203009605407715, F1: 0.01073743325896405, Accuracy: 0.06276237010529213, Time Elapsed: 14.67314100265503 seconds


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


              precision    recall  f1-score   support

           0       0.00      0.00      0.00       912
           1       0.00      0.00      0.00       885
           2       0.00      0.00      0.00       877
           3       0.00      0.00      0.00       897
           4       0.00      0.00      0.00       892
           5       0.00      0.00      0.00       862
           6       0.00      0.00      0.00       903
           7       0.00      0.00      0.00       889
           8       0.00      0.00      0.00       892
           9       0.00      0.00      0.00       876
          10       0.39      1.00      0.56      5646

    accuracy                           0.39     14531
   macro avg       0.04      0.09      0.05     14531
weighted avg       0.15      0.39      0.22     14531

Epoch 1, Step 100, Loss: 3.5031092166900635, F1: 0.05087701117834438, Accuracy: 0.38854862019131514, Time Elapsed: 30.499523162841797 seconds


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


              precision    recall  f1-score   support

           0       0.00      0.00      0.00       912
           1       0.00      0.00      0.00       885
           2       0.00      0.00      0.00       877
           3       0.00      0.00      0.00       897
           4       0.00      0.00      0.00       892
           5       0.00      0.00      0.00       862
           6       0.00      0.00      0.00       903
           7       0.00      0.00      0.00       889
           8       0.00      0.00      0.00       892
           9       0.00      0.00      0.00       876
          10       0.39      1.00      0.56      5646

    accuracy                           0.39     14531
   macro avg       0.04      0.09      0.05     14531
weighted avg       0.15      0.39      0.22     14531

Epoch 1, Step 200, Loss: 3.043261766433716, F1: 0.05087701117834438, Accuracy: 0.38854862019131514, Time Elapsed: 46.72759819030762 seconds


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


              precision    recall  f1-score   support

           0       0.00      0.00      0.00       912
           1       0.00      0.00      0.00       885
           2       0.00      0.00      0.00       877
           3       0.00      0.00      0.00       897
           4       0.00      0.00      0.00       892
           5       0.00      0.00      0.00       862
           6       0.00      0.00      0.00       903
           7       0.00      0.00      0.00       889
           8       0.00      0.00      0.00       892
           9       0.00      0.00      0.00       876
          10       0.39      1.00      0.56      5646

    accuracy                           0.39     14531
   macro avg       0.04      0.09      0.05     14531
weighted avg       0.15      0.39      0.22     14531

Epoch 1, Step 300, Loss: 0.8586784601211548, F1: 0.05087701117834438, Accuracy: 0.38854862019131514, Time Elapsed: 62.06820201873779 seconds


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


              precision    recall  f1-score   support

           0       0.00      0.00      0.00       912
           1       0.00      0.00      0.00       885
           2       0.00      0.00      0.00       877
           3       0.00      0.00      0.00       897
           4       0.00      0.00      0.00       892
           5       0.00      0.00      0.00       862
           6       0.00      0.00      0.00       903
           7       0.00      0.00      0.00       889
           8       0.00      0.00      0.00       892
           9       0.00      0.00      0.00       876
          10       0.39      1.00      0.56      5646

    accuracy                           0.39     14531
   macro avg       0.04      0.09      0.05     14531
weighted avg       0.15      0.39      0.22     14531

Epoch 1, Step 400, Loss: 2.905369758605957, F1: 0.05087701117834438, Accuracy: 0.38854862019131514, Time Elapsed: 77.43299317359924 seconds


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


              precision    recall  f1-score   support

           0       0.00      0.00      0.00       912
           1       0.00      0.00      0.00       885
           2       0.00      0.00      0.00       877
           3       0.00      0.00      0.00       897
           4       0.00      0.00      0.00       892
           5       0.00      0.00      0.00       862
           6       0.00      0.00      0.00       903
           7       0.00      0.00      0.00       889
           8       0.00      0.00      0.00       892
           9       0.00      0.00      0.00       876
          10       0.39      1.00      0.56      5646

    accuracy                           0.39     14531
   macro avg       0.04      0.09      0.05     14531
weighted avg       0.15      0.39      0.22     14531

Epoch 1, Step 500, Loss: 2.7015929222106934, F1: 0.05087701117834438, Accuracy: 0.38854862019131514, Time Elapsed: 93.60171604156494 seconds


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


              precision    recall  f1-score   support

           0       0.00      0.00      0.00       912
           1       0.00      0.00      0.00       885
           2       0.00      0.00      0.00       877
           3       0.00      0.00      0.00       897
           4       0.00      0.00      0.00       892
           5       0.00      0.00      0.00       862
           6       0.00      0.00      0.00       903
           7       0.00      0.00      0.00       889
           8       0.00      0.00      0.00       892
           9       0.00      0.00      0.00       876
          10       0.39      1.00      0.56      5646

    accuracy                           0.39     14531
   macro avg       0.04      0.09      0.05     14531
weighted avg       0.15      0.39      0.22     14531

Epoch 1, Step 600, Loss: 1.099332332611084, F1: 0.05087701117834438, Accuracy: 0.38854862019131514, Time Elapsed: 109.04908633232117 seconds


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


              precision    recall  f1-score   support

           0       0.00      0.00      0.00       912
           1       0.00      0.00      0.00       885
           2       0.00      0.00      0.00       877
           3       0.00      0.00      0.00       897
           4       0.00      0.00      0.00       892
           5       0.00      0.00      0.00       862
           6       0.00      0.00      0.00       903
           7       0.00      0.00      0.00       889
           8       0.00      0.00      0.00       892
           9       0.00      0.00      0.00       876
          10       0.39      1.00      0.56      5646

    accuracy                           0.39     14531
   macro avg       0.04      0.09      0.05     14531
weighted avg       0.15      0.39      0.22     14531

Epoch 1, Step 700, Loss: 1.6591286659240723, F1: 0.05087701117834438, Accuracy: 0.38854862019131514, Time Elapsed: 123.92569518089294 seconds


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


              precision    recall  f1-score   support

           0       0.00      0.00      0.00       912
           1       0.00      0.00      0.00       885
           2       0.00      0.00      0.00       877
           3       0.00      0.00      0.00       897
           4       0.00      0.00      0.00       892
           5       0.00      0.00      0.00       862
           6       0.00      0.00      0.00       903
           7       0.00      0.00      0.00       889
           8       0.00      0.00      0.00       892
           9       0.00      0.00      0.00       876
          10       0.39      1.00      0.56      5646

    accuracy                           0.39     14531
   macro avg       0.04      0.09      0.05     14531
weighted avg       0.15      0.39      0.22     14531

Epoch 1, Step 800, Loss: 1.9338128566741943, F1: 0.05087701117834438, Accuracy: 0.38854862019131514, Time Elapsed: 139.0218963623047 seconds


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


              precision    recall  f1-score   support

           0       1.00      0.01      0.02       912
           1       0.00      0.00      0.00       885
           2       0.55      0.01      0.01       877
           3       0.00      0.00      0.00       897
           4       0.00      0.00      0.00       892
           5       0.00      0.00      0.00       862
           6       0.00      0.00      0.00       903
           7       0.00      0.00      0.00       889
           8       0.00      0.00      0.00       892
           9       0.00      0.00      0.00       876
          10       0.39      1.00      0.56      5646

    accuracy                           0.39     14531
   macro avg       0.18      0.09      0.05     14531
weighted avg       0.25      0.39      0.22     14531

Epoch 1, Step 900, Loss: 0.7233871817588806, F1: 0.053689396127181106, Accuracy: 0.38916798568577526, Time Elapsed: 154.44687914848328 seconds


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


              precision    recall  f1-score   support

           0       0.62      0.50      0.55       912
           1       0.00      0.00      0.00       885
           2       0.00      0.00      0.00       877
           3       0.00      0.00      0.00       897
           4       0.00      0.00      0.00       892
           5       0.00      0.00      0.00       862
           6       0.00      0.00      0.00       903
           7       0.00      0.00      0.00       889
           8       0.00      0.00      0.00       892
           9       0.00      0.00      0.00       876
          10       0.39      0.95      0.55      5646

    accuracy                           0.40     14531
   macro avg       0.09      0.13      0.10     14531
weighted avg       0.19      0.40      0.25     14531

Epoch 1, Step 1000, Loss: 2.9249792098999023, F1: 0.10046694851253948, Accuracy: 0.4007982933039708, Time Elapsed: 169.11343836784363 seconds


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


              precision    recall  f1-score   support

           0       0.58      0.89      0.70       912
           1       0.00      0.00      0.00       885
           2       0.00      0.00      0.00       877
           3       0.00      0.00      0.00       897
           4       0.00      0.00      0.00       892
           5       0.00      0.00      0.00       862
           6       0.00      0.00      0.00       903
           7       0.00      0.00      0.00       889
           8       0.00      0.00      0.00       892
           9       0.00      0.00      0.00       876
          10       0.39      0.91      0.55      5646

    accuracy                           0.41     14531
   macro avg       0.09      0.16      0.11     14531
weighted avg       0.19      0.41      0.26     14531

Epoch 1, Step 1100, Loss: 0.7943834662437439, F1: 0.11374757978943505, Accuracy: 0.4091253182850458, Time Elapsed: 185.1915671825409 seconds


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


              precision    recall  f1-score   support

           0       0.63      0.50      0.56       912
           1       0.00      0.00      0.00       885
           2       0.00      0.00      0.00       877
           3       0.00      0.00      0.00       897
           4       0.00      0.00      0.00       892
           5       0.00      0.00      0.00       862
           6       0.00      0.00      0.00       903
           7       0.00      0.00      0.00       889
           8       0.00      0.00      0.00       892
           9       0.00      0.00      0.00       876
          10       0.39      0.95      0.55      5646

    accuracy                           0.40     14531
   macro avg       0.09      0.13      0.10     14531
weighted avg       0.19      0.40      0.25     14531

Epoch 1, Step 1200, Loss: 2.0605263710021973, F1: 0.10098378469390475, Accuracy: 0.40141765879843094, Time Elapsed: 202.35684514045715 seconds


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


              precision    recall  f1-score   support

           0       0.63      0.65      0.64       912
           1       0.00      0.00      0.00       885
           2       0.00      0.00      0.00       877
           3       0.00      0.00      0.00       897
           4       0.00      0.00      0.00       892
           5       0.00      0.00      0.00       862
           6       0.00      0.00      0.00       903
           7       0.00      0.00      0.00       889
           8       0.00      0.00      0.00       892
           9       0.00      0.00      0.00       876
          10       0.39      0.94      0.55      5646

    accuracy                           0.41     14531
   macro avg       0.09      0.14      0.11     14531
weighted avg       0.19      0.41      0.25     14531

Epoch 1, Step 1300, Loss: 1.1146883964538574, F1: 0.1081871567505937, Accuracy: 0.4057532172596518, Time Elapsed: 218.5067000389099 seconds


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


              precision    recall  f1-score   support

           0       0.60      0.08      0.14       912
           1       0.00      0.00      0.00       885
           2       0.00      0.00      0.00       877
           3       0.00      0.00      0.00       897
           4       0.00      0.00      0.00       892
           5       0.00      0.00      0.00       862
           6       0.00      0.00      0.00       903
           7       0.00      0.00      0.00       889
           8       0.00      0.00      0.00       892
           9       0.00      0.00      0.00       876
          10       0.39      0.99      0.56      5646

    accuracy                           0.39     14531
   macro avg       0.09      0.10      0.06     14531
weighted avg       0.19      0.39      0.23     14531

Epoch 1, Step 1400, Loss: 0.7597019672393799, F1: 0.06341245656226632, Accuracy: 0.3901314431216021, Time Elapsed: 236.25294733047485 seconds


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


              precision    recall  f1-score   support

           0       0.61      0.79      0.69       912
           1       0.00      0.00      0.00       885
           2       0.00      0.00      0.00       877
           3       0.00      0.00      0.00       897
           4       0.59      0.43      0.50       892
           5       0.00      0.00      0.00       862
           6       0.00      0.00      0.00       903
           7       0.00      0.00      0.00       889
           8       0.00      0.00      0.00       892
           9       0.00      0.00      0.00       876
          10       0.39      0.88      0.54      5646

    accuracy                           0.42     14531
   macro avg       0.14      0.19      0.16     14531
weighted avg       0.23      0.42      0.28     14531

Epoch 1, Step 1500, Loss: 0.7967299818992615, F1: 0.15716553239757722, Accuracy: 0.41724588810130064, Time Elapsed: 252.31317710876465 seconds


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


              precision    recall  f1-score   support

           0       0.62      0.41      0.50       912
           1       0.00      0.00      0.00       885
           2       0.00      0.00      0.00       877
           3       0.00      0.00      0.00       897
           4       0.00      0.00      0.00       892
           5       0.00      0.00      0.00       862
           6       0.00      0.00      0.00       903
           7       0.00      0.00      0.00       889
           8       0.00      0.00      0.00       892
           9       0.00      0.00      0.00       876
          10       0.39      0.96      0.55      5646

    accuracy                           0.40     14531
   macro avg       0.09      0.12      0.10     14531
weighted avg       0.19      0.40      0.25     14531

Epoch 1, Step 1600, Loss: 0.9628119468688965, F1: 0.09548003256509206, Accuracy: 0.3987337416557704, Time Elapsed: 268.1903941631317 seconds


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


              precision    recall  f1-score   support

           0       0.62      0.44      0.51       912
           1       0.00      0.00      0.00       885
           2       0.00      0.00      0.00       877
           3       0.00      0.00      0.00       897
           4       0.00      0.00      0.00       892
           5       0.00      0.00      0.00       862
           6       0.62      0.56      0.59       903
           7       0.00      0.00      0.00       889
           8       0.00      0.00      0.00       892
           9       0.00      0.00      0.00       876
          10       0.39      0.90      0.54      5646

    accuracy                           0.41     14531
   macro avg       0.15      0.17      0.15     14531
weighted avg       0.23      0.41      0.28     14531

Epoch 1, Step 1700, Loss: 0.9505224823951721, F1: 0.14956410485390034, Accuracy: 0.4125662376987131, Time Elapsed: 285.856192111969 seconds


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


              precision    recall  f1-score   support

           0       0.58      0.88      0.70       912
           1       0.00      0.00      0.00       885
           2       0.00      0.00      0.00       877
           3       0.00      0.00      0.00       897
           4       0.00      0.00      0.00       892
           5       0.00      0.00      0.00       862
           6       0.00      0.00      0.00       903
           7       0.00      0.00      0.00       889
           8       0.00      0.00      0.00       892
           9       0.00      0.00      0.00       876
          10       0.39      0.91      0.55      5646

    accuracy                           0.41     14531
   macro avg       0.09      0.16      0.11     14531
weighted avg       0.19      0.41      0.26     14531

Epoch 1, Step 1800, Loss: 0.7866067290306091, F1: 0.11314308458451389, Accuracy: 0.4077489505195788, Time Elapsed: 302.0912981033325 seconds


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


              precision    recall  f1-score   support

           0       0.61      0.85      0.71       912
           1       0.60      0.75      0.67       885
           2       0.60      0.39      0.47       877
           3       0.00      0.00      0.00       897
           4       0.66      0.11      0.18       892
           5       0.00      0.00      0.00       862
           6       0.62      0.56      0.58       903
           7       0.00      0.00      0.00       889
           8       0.00      0.00      0.00       892
           9       0.00      0.00      0.00       876
          10       0.39      0.74      0.51      5646

    accuracy                           0.45     14531
   macro avg       0.32      0.31      0.28     14531
weighted avg       0.34      0.45      0.36     14531

Epoch 1, Step 1900, Loss: 0.933186948299408, F1: 0.28456015981260135, Accuracy: 0.45069162480214714, Time Elapsed: 318.5169322490692 seconds


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


              precision    recall  f1-score   support

           0       0.63      0.78      0.69       912
           1       0.59      0.47      0.52       885
           2       0.57      0.54      0.55       877
           3       0.00      0.00      0.00       897
           4       0.50      0.01      0.02       892
           5       0.00      0.00      0.00       862
           6       0.47      0.92      0.62       903
           7       0.00      0.00      0.00       889
           8       0.00      0.00      0.00       892
           9       0.00      0.00      0.00       876
          10       0.39      0.70      0.50      5646

    accuracy                           0.44     14531
   macro avg       0.29      0.31      0.26     14531
weighted avg       0.32      0.44      0.34     14531

Epoch 1, Step 2000, Loss: 1.0499539375305176, F1: 0.26496300848752996, Accuracy: 0.4384419516894914, Time Elapsed: 334.0909821987152 seconds


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


              precision    recall  f1-score   support

           0       0.55      0.94      0.69       912
           1       0.00      0.00      0.00       885
           2       0.61      0.27      0.37       877
           3       0.00      0.00      0.00       897
           4       0.00      0.00      0.00       892
           5       0.00      0.00      0.00       862
           6       0.61      0.77      0.68       903
           7       0.75      0.00      0.01       889
           8       0.00      0.00      0.00       892
           9       0.00      0.00      0.00       876
          10       0.39      0.79      0.52      5646

    accuracy                           0.43     14531
   macro avg       0.26      0.25      0.21     14531
weighted avg       0.31      0.43      0.31     14531

Epoch 1, Step 2100, Loss: 0.5745329260826111, F1: 0.20654332408139273, Accuracy: 0.4311472025325167, Time Elapsed: 348.5855281352997 seconds


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


              precision    recall  f1-score   support

           0       0.63      0.50      0.56       912
           1       0.60      0.86      0.70       885
           2       0.00      0.00      0.00       877
           3       0.00      0.00      0.00       897
           4       0.00      0.00      0.00       892
           5       0.00      0.00      0.00       862
           6       0.63      0.10      0.18       903
           7       0.00      0.00      0.00       889
           8       0.00      0.00      0.00       892
           9       0.00      0.00      0.00       876
          10       0.39      0.85      0.54      5646

    accuracy                           0.42     14531
   macro avg       0.20      0.21      0.18     14531
weighted avg       0.27      0.42      0.30     14531

Epoch 1, Step 2200, Loss: 2.961423635482788, F1: 0.17974719645629122, Accuracy: 0.42226963044525495, Time Elapsed: 363.8493010997772 seconds


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


              precision    recall  f1-score   support

           0       0.61      0.83      0.71       912
           1       0.63      0.08      0.14       885
           2       0.62      0.02      0.04       877
           3       0.00      0.00      0.00       897
           4       0.33      0.00      0.00       892
           5       0.00      0.00      0.00       862
           6       0.64      0.08      0.15       903
           7       0.00      0.00      0.00       889
           8       0.00      0.00      0.00       892
           9       0.00      0.00      0.00       876
          10       0.39      0.90      0.55      5646

    accuracy                           0.41     14531
   macro avg       0.29      0.17      0.14     14531
weighted avg       0.33      0.41      0.28     14531

Epoch 1, Step 2300, Loss: 2.1002864837646484, F1: 0.1432578237710499, Accuracy: 0.4140114238524534, Time Elapsed: 380.4639811515808 seconds


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


              precision    recall  f1-score   support

           0       0.64      0.66      0.65       912
           1       0.59      0.32      0.42       885
           2       0.92      0.01      0.02       877
           3       0.00      0.00      0.00       897
           4       0.33      0.00      0.00       892
           5       0.00      0.00      0.00       862
           6       0.00      0.00      0.00       903
           7       0.00      0.00      0.00       889
           8       0.00      0.00      0.00       892
           9       0.00      0.00      0.00       876
          10       0.39      0.91      0.55      5646

    accuracy                           0.41     14531
   macro avg       0.26      0.17      0.15     14531
weighted avg       0.30      0.41      0.28     14531

Epoch 1, Step 2400, Loss: 1.7487837076187134, F1: 0.14900490802874666, Accuracy: 0.41380496868763333, Time Elapsed: 395.4858841896057 seconds


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


              precision    recall  f1-score   support

           0       0.52      0.96      0.67       912
           1       0.00      0.00      0.00       885
           2       0.00      0.00      0.00       877
           3       0.00      0.00      0.00       897
           4       0.00      0.00      0.00       892
           5       0.59      0.10      0.16       862
           6       0.40      0.00      0.00       903
           7       0.00      0.00      0.00       889
           8       0.00      0.00      0.00       892
           9       0.00      0.00      0.00       876
          10       0.39      0.88      0.54      5646

    accuracy                           0.41     14531
   macro avg       0.17      0.18      0.13     14531
weighted avg       0.24      0.41      0.26     14531

Epoch 1, Step 2500, Loss: 1.9968363046646118, F1: 0.1256956909108766, Accuracy: 0.4071984034133921, Time Elapsed: 410.6447522640228 seconds


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


              precision    recall  f1-score   support

           0       0.61      0.82      0.70       912
           1       0.50      0.96      0.66       885
           2       1.00      0.00      0.00       877
           3       0.58      0.05      0.10       897
           4       0.63      0.31      0.42       892
           5       0.22      0.93      0.36       862
           6       0.64      0.13      0.22       903
           7       0.60      0.20      0.30       889
           8       0.45      0.17      0.25       892
           9       0.00      0.00      0.00       876
          10       0.39      0.46      0.42      5646

    accuracy                           0.40     14531
   macro avg       0.51      0.37      0.31     14531
weighted avg       0.47      0.40      0.35     14531

Epoch 1, Step 2600, Loss: 1.1910966634750366, F1: 0.31100106154575985, Accuracy: 0.39790792099649025, Time Elapsed: 426.4049742221832 seconds


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


              precision    recall  f1-score   support

           0       0.74      0.17      0.28       912
           1       0.61      0.67      0.64       885
           2       0.74      0.09      0.16       877
           3       0.00      0.00      0.00       897
           4       0.62      0.01      0.01       892
           5       0.00      0.00      0.00       862
           6       0.62      0.69      0.65       903
           7       0.49      0.87      0.63       889
           8       0.00      0.00      0.00       892
           9       0.00      0.00      0.00       876
          10       0.40      0.75      0.52      5646

    accuracy                           0.44     14531
   macro avg       0.38      0.29      0.26     14531
weighted avg       0.39      0.44      0.35     14531

Epoch 1, Step 2700, Loss: 3.195530891418457, F1: 0.2623441590632922, Accuracy: 0.4436033308099924, Time Elapsed: 442.1592321395874 seconds


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


              precision    recall  f1-score   support

           0       0.64      0.07      0.13       912
           1       0.00      0.00      0.00       885
           2       1.00      0.00      0.00       877
           3       0.00      0.00      0.00       897
           4       0.59      0.42      0.49       892
           5       0.00      0.00      0.00       862
           6       0.60      0.75      0.67       903
           7       0.60      0.21      0.31       889
           8       0.50      0.00      0.01       892
           9       0.00      0.00      0.00       876
          10       0.39      0.86      0.54      5646

    accuracy                           0.42     14531
   macro avg       0.39      0.21      0.19     14531
weighted avg       0.39      0.42      0.31     14531

Epoch 1, Step 2800, Loss: 1.3049222230911255, F1: 0.19433381506705255, Accuracy: 0.4230954511045351, Time Elapsed: 457.0471670627594 seconds


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


              precision    recall  f1-score   support

           0       0.63      0.73      0.68       912
           1       0.61      0.79      0.69       885
           2       0.64      0.12      0.20       877
           3       0.00      0.00      0.00       897
           4       0.66      0.14      0.23       892
           5       0.00      0.00      0.00       862
           6       0.63      0.51      0.56       903
           7       0.64      0.03      0.06       889
           8       0.49      0.46      0.47       892
           9       0.45      0.10      0.17       876
          10       0.39      0.71      0.51      5646

    accuracy                           0.45     14531
   macro avg       0.47      0.33      0.32     14531
weighted avg       0.44      0.45      0.38     14531

Epoch 1, Step 2900, Loss: 1.1272387504577637, F1: 0.32392191430865097, Accuracy: 0.4524809028972541, Time Elapsed: 471.7495470046997 seconds


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


              precision    recall  f1-score   support

           0       0.57      0.94      0.71       912
           1       0.00      0.00      0.00       885
           2       0.60      0.32      0.42       877
           3       0.58      0.39      0.47       897
           4       0.55      0.01      0.02       892
           5       0.54      0.03      0.05       862
           6       0.60      0.72      0.65       903
           7       0.62      0.46      0.53       889
           8       0.00      0.00      0.00       892
           9       0.47      0.13      0.21       876
          10       0.39      0.68      0.50      5646

    accuracy                           0.45     14531
   macro avg       0.45      0.34      0.32     14531
weighted avg       0.43      0.45      0.38     14531

Epoch 1, Step 3000, Loss: 2.794877290725708, F1: 0.3235736974056821, Accuracy: 0.4515174454614273, Time Elapsed: 487.08635115623474 seconds


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


              precision    recall  f1-score   support

           0       0.62      0.09      0.16       912
           1       0.60      0.80      0.69       885
           2       0.73      0.05      0.09       877
           3       0.57      0.12      0.20       897
           4       0.00      0.00      0.00       892
           5       0.48      0.33      0.39       862
           6       0.56      0.84      0.68       903
           7       0.55      0.05      0.09       889
           8       0.00      0.00      0.00       892
           9       0.56      0.11      0.18       876
          10       0.39      0.75      0.51      5646

    accuracy                           0.44     14531
   macro avg       0.46      0.28      0.27     14531
weighted avg       0.44      0.44      0.35     14531

Epoch 1, Step 3100, Loss: 1.5894628763198853, F1: 0.27069736196302224, Accuracy: 0.4366526735943844, Time Elapsed: 501.93846225738525 seconds


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


              precision    recall  f1-score   support

           0       0.52      0.96      0.67       912
           1       0.53      0.96      0.69       885
           2       0.65      0.21      0.31       877
           3       0.54      0.27      0.36       897
           4       0.63      0.08      0.14       892
           5       0.00      0.00      0.00       862
           6       0.62      0.73      0.67       903
           7       0.00      0.00      0.00       889
           8       0.00      0.00      0.00       892
           9       0.00      0.00      0.00       876
          10       0.39      0.65      0.49      5646

    accuracy                           0.45     14531
   macro avg       0.35      0.35      0.30     14531
weighted avg       0.37      0.45      0.37     14531

Epoch 1, Step 3200, Loss: 0.8997759222984314, F1: 0.30320991520498397, Accuracy: 0.4508980799669672, Time Elapsed: 516.7281432151794 seconds


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


              precision    recall  f1-score   support

           0       0.63      0.73      0.68       912
           1       0.53      0.96      0.69       885
           2       0.00      0.00      0.00       877
           3       0.50      0.66      0.57       897
           4       0.00      0.00      0.00       892
           5       0.48      0.02      0.03       862
           6       0.46      0.94      0.62       903
           7       0.61      0.58      0.60       889
           8       0.00      0.00      0.00       892
           9       0.00      0.00      0.00       876
          10       0.39      0.56      0.46      5646

    accuracy                           0.46     14531
   macro avg       0.33      0.40      0.33     14531
weighted avg       0.35      0.46      0.38     14531

Epoch 1, Step 3300, Loss: 1.4486511945724487, F1: 0.33113530989859113, Accuracy: 0.45612827747574153, Time Elapsed: 531.4090790748596 seconds


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


              precision    recall  f1-score   support

           0       0.64      0.64      0.64       912
           1       0.62      0.25      0.36       885
           2       0.00      0.00      0.00       877
           3       0.67      0.01      0.01       897
           4       0.00      0.00      0.00       892
           5       0.00      0.00      0.00       862
           6       0.62      0.33      0.43       903
           7       1.00      0.00      0.00       889
           8       0.00      0.00      0.00       892
           9       0.00      0.00      0.00       876
          10       0.39      0.89      0.54      5646

    accuracy                           0.42     14531
   macro avg       0.36      0.19      0.18     14531
weighted avg       0.37      0.42      0.30     14531

Epoch 1, Step 3400, Loss: 1.973649263381958, F1: 0.18034434759346143, Accuracy: 0.4202050787970546, Time Elapsed: 547.0755960941315 seconds


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


              precision    recall  f1-score   support

           0       0.65      0.61      0.63       912
           1       0.61      0.73      0.66       885
           2       1.00      0.00      0.00       877
           3       0.56      0.33      0.42       897
           4       0.00      0.00      0.00       892
           5       0.67      0.00      0.01       862
           6       0.61      0.12      0.20       903
           7       0.60      0.33      0.42       889
           8       0.51      0.42      0.46       892
           9       0.00      0.00      0.00       876
          10       0.39      0.74      0.51      5646

    accuracy                           0.44     14531
   macro avg       0.51      0.30      0.30     14531
weighted avg       0.47      0.44      0.37     14531

Epoch 1, Step 3500, Loss: 0.7031534314155579, F1: 0.300648609417523, Accuracy: 0.4436033308099924, Time Elapsed: 562.7941980361938 seconds


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


              precision    recall  f1-score   support

           0       0.69      0.58      0.63       912
           1       0.61      0.42      0.50       885
           2       0.61      0.26      0.37       877
           3       0.56      0.18      0.27       897
           4       0.00      0.00      0.00       892
           5       0.48      0.41      0.44       862
           6       0.61      0.79      0.69       903
           7       0.62      0.33      0.43       889
           8       0.00      0.00      0.00       892
           9       0.00      0.00      0.00       876
          10       0.39      0.71      0.51      5646

    accuracy                           0.46     14531
   macro avg       0.42      0.33      0.35     14531
weighted avg       0.41      0.46      0.40     14531

Epoch 1, Step 3600, Loss: 1.203021764755249, F1: 0.3487793783153457, Accuracy: 0.457022916523295, Time Elapsed: 578.1462302207947 seconds


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


              precision    recall  f1-score   support

           0       0.63      0.78      0.70       912
           1       0.62      0.32      0.42       885
           2       0.61      0.41      0.49       877
           3       0.25      0.00      0.00       897
           4       0.61      0.10      0.18       892
           5       0.00      0.00      0.00       862
           6       0.59      0.83      0.69       903
           7       0.52      0.84      0.64       889
           8       0.00      0.00      0.00       892
           9       0.00      0.00      0.00       876
          10       0.40      0.67      0.50      5646

    accuracy                           0.46     14531
   macro avg       0.38      0.36      0.33     14531
weighted avg       0.39      0.46      0.39     14531

Epoch 1, Step 3700, Loss: 0.6653522849082947, F1: 0.32889518912086635, Accuracy: 0.4620466588672493, Time Elapsed: 592.7558693885803 seconds


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


              precision    recall  f1-score   support

           0       0.67      0.46      0.55       912
           1       0.57      0.93      0.70       885
           2       0.58      0.49      0.53       877
           3       0.54      0.64      0.59       897
           4       0.65      0.08      0.15       892
           5       0.57      0.10      0.17       862
           6       0.62      0.68      0.65       903
           7       0.62      0.61      0.61       889
           8       0.00      0.00      0.00       892
           9       0.00      0.00      0.00       876
          10       0.39      0.59      0.47      5646

    accuracy                           0.48     14531
   macro avg       0.47      0.42      0.40     14531
weighted avg       0.45      0.48      0.43     14531

Epoch 1, Step 3800, Loss: 2.761232376098633, F1: 0.40186382145962796, Accuracy: 0.47567269974537196, Time Elapsed: 607.9283080101013 seconds
              precision    recall  f1-score   

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


              precision    recall  f1-score   support

           0       0.62      0.84      0.71       912
           1       0.60      0.86      0.71       885
           2       0.59      0.49      0.54       877
           3       0.60      0.25      0.35       897
           4       0.00      0.00      0.00       892
           5       0.54      0.19      0.28       862
           6       0.60      0.82      0.69       903
           7       0.59      0.64      0.62       889
           8       0.67      0.04      0.08       892
           9       0.00      0.00      0.00       876
          10       0.39      0.58      0.47      5646

    accuracy                           0.48     14531
   macro avg       0.47      0.43      0.40     14531
weighted avg       0.45      0.48      0.43     14531

Epoch 1, Step 4000, Loss: 1.4311714172363281, F1: 0.40408187283079333, Accuracy: 0.4809028972541463, Time Elapsed: 640.5679471492767 seconds


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


              precision    recall  f1-score   support

           0       0.00      0.00      0.00       912
           1       0.58      0.02      0.05       885
           2       0.56      0.56      0.56       877
           3       0.33      0.00      0.00       897
           4       0.00      0.00      0.00       892
           5       0.00      0.00      0.00       862
           6       0.63      0.77      0.69       903
           7       0.60      0.03      0.07       889
           8       0.00      0.00      0.00       892
           9       0.75      0.01      0.01       876
          10       0.39      0.86      0.53      5646

    accuracy                           0.42     14531
   macro avg       0.35      0.20      0.17     14531
weighted avg       0.36      0.42      0.29     14531

Epoch 1, Step 4100, Loss: 1.1060750484466553, F1: 0.17425701939009153, Accuracy: 0.4186222558667676, Time Elapsed: 655.4441223144531 seconds


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


              precision    recall  f1-score   support

           0       0.65      0.57      0.61       912
           1       0.62      0.38      0.47       885
           2       0.55      0.70      0.62       877
           3       0.48      0.03      0.06       897
           4       0.38      0.86      0.53       892
           5       0.00      0.00      0.00       862
           6       0.63      0.64      0.63       903
           7       0.62      0.57      0.59       889
           8       0.00      0.00      0.00       892
           9       0.00      0.00      0.00       876
          10       0.39      0.57      0.47      5646

    accuracy                           0.45     14531
   macro avg       0.39      0.39      0.36     14531
weighted avg       0.39      0.45      0.40     14531

Epoch 1, Step 4200, Loss: 1.081134557723999, F1: 0.36217924495923676, Accuracy: 0.4546142729337279, Time Elapsed: 670.2130591869354 seconds


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


              precision    recall  f1-score   support

           0       0.63      0.31      0.42       912
           1       0.57      0.27      0.37       885
           2       0.57      0.61      0.59       877
           3       0.34      0.83      0.48       897
           4       0.61      0.11      0.19       892
           5       0.00      0.00      0.00       862
           6       0.64      0.64      0.64       903
           7       0.60      0.47      0.53       889
           8       0.00      0.00      0.00       892
           9       0.67      0.08      0.14       876
          10       0.39      0.60      0.47      5646

    accuracy                           0.44     14531
   macro avg       0.45      0.36      0.35     14531
weighted avg       0.43      0.44      0.39     14531

Epoch 1, Step 4300, Loss: 1.3471254110336304, F1: 0.34756285554125665, Accuracy: 0.4366526735943844, Time Elapsed: 685.2388021945953 seconds


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


              precision    recall  f1-score   support

           0       0.63      0.51      0.57       912
           1       0.60      0.86      0.71       885
           2       0.63      0.36      0.46       877
           3       0.41      0.82      0.55       897
           4       0.00      0.00      0.00       892
           5       0.58      0.07      0.12       862
           6       0.00      0.00      0.00       903
           7       0.00      0.00      0.00       889
           8       0.65      0.02      0.05       892
           9       0.68      0.02      0.03       876
          10       0.39      0.70      0.50      5646

    accuracy                           0.44     14531
   macro avg       0.42      0.31      0.27     14531
weighted avg       0.41      0.44      0.35     14531

Epoch 1, Step 4400, Loss: 1.803849458694458, F1: 0.2709955577058917, Accuracy: 0.43534512421719085, Time Elapsed: 700.4208383560181 seconds
              precision    recall  f1-score   s

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


              precision    recall  f1-score   support

           0       0.60      0.07      0.13       912
           1       0.00      0.00      0.00       885
           2       0.51      0.75      0.61       877
           3       0.58      0.34      0.43       897
           4       0.66      0.10      0.18       892
           5       0.41      0.56      0.48       862
           6       0.59      0.87      0.70       903
           7       0.60      0.70      0.65       889
           8       0.58      0.12      0.20       892
           9       0.00      0.00      0.00       876
          10       0.39      0.60      0.47      5646

    accuracy                           0.45     14531
   macro avg       0.45      0.37      0.35     14531
weighted avg       0.43      0.45      0.39     14531

Epoch 1, Step 4600, Loss: 1.1643867492675781, F1: 0.34883354733384847, Accuracy: 0.44835179960085336, Time Elapsed: 958.2118752002716 seconds


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


              precision    recall  f1-score   support

           0       0.00      0.00      0.00       912
           1       0.61      0.74      0.67       885
           2       0.59      0.54      0.56       877
           3       0.59      0.42      0.49       897
           4       1.00      0.00      0.01       892
           5       0.52      0.02      0.04       862
           6       0.64      0.06      0.11       903
           7       0.00      0.00      0.00       889
           8       0.00      0.00      0.00       892
           9       0.00      0.00      0.00       876
          10       0.39      0.82      0.53      5646

    accuracy                           0.43     14531
   macro avg       0.39      0.24      0.22     14531
weighted avg       0.39      0.43      0.32     14531

Epoch 1, Step 4700, Loss: 1.295391321182251, F1: 0.21873348419053984, Accuracy: 0.4265363705182025, Time Elapsed: 974.1141831874847 seconds


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


              precision    recall  f1-score   support

           0       0.56      0.02      0.03       912
           1       0.58      0.92      0.71       885
           2       0.62      0.35      0.45       877
           3       0.58      0.57      0.57       897
           4       0.36      0.81      0.50       892
           5       0.60      0.00      0.01       862
           6       0.63      0.62      0.62       903
           7       0.59      0.26      0.36       889
           8       0.00      0.00      0.00       892
           9       0.00      0.00      0.00       876
          10       0.39      0.58      0.47      5646

    accuracy                           0.44     14531
   macro avg       0.45      0.38      0.34     14531
weighted avg       0.43      0.44      0.38     14531

Epoch 1, Step 4800, Loss: 0.8206798434257507, F1: 0.3386377344714648, Accuracy: 0.4433968756451724, Time Elapsed: 1016.0183222293854 seconds


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


              precision    recall  f1-score   support

           0       0.62      0.07      0.13       912
           1       0.61      0.87      0.71       885
           2       0.57      0.65      0.61       877
           3       0.48      0.03      0.06       897
           4       0.57      0.53      0.55       892
           5       0.24      0.01      0.02       862
           6       0.63      0.60      0.61       903
           7       0.59      0.21      0.31       889
           8       0.54      0.04      0.08       892
           9       0.00      0.00      0.00       876
          10       0.39      0.68      0.49      5646

    accuracy                           0.45     14531
   macro avg       0.48      0.34      0.32     14531
weighted avg       0.45      0.45      0.38     14531

Epoch 1, Step 4900, Loss: 2.093484878540039, F1: 0.3248168831622469, Accuracy: 0.45041635124905377, Time Elapsed: 1031.5849132537842 seconds


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


              precision    recall  f1-score   support

           0       0.61      0.80      0.70       912
           1       0.60      0.44      0.51       885
           2       0.60      0.38      0.47       877
           3       0.56      0.58      0.57       897
           4       0.62      0.43      0.50       892
           5       0.00      0.00      0.00       862
           6       0.62      0.44      0.51       903
           7       0.58      0.21      0.31       889
           8       0.60      0.11      0.19       892
           9       0.00      0.00      0.00       876
          10       0.39      0.65      0.49      5646

    accuracy                           0.46     14531
   macro avg       0.47      0.37      0.39     14531
weighted avg       0.45      0.46      0.42     14531

Epoch 1, Step 5000, Loss: 0.9772859811782837, F1: 0.3862945134671795, Accuracy: 0.46369830018580965, Time Elapsed: 1047.132131099701 seconds


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


              precision    recall  f1-score   support

           0       0.63      0.41      0.50       912
           1       0.60      0.51      0.55       885
           2       0.60      0.53      0.56       877
           3       0.55      0.62      0.59       897
           4       0.45      0.73      0.56       892
           5       0.47      0.61      0.53       862
           6       0.60      0.17      0.26       903
           7       0.57      0.81      0.67       889
           8       0.61      0.09      0.16       892
           9       0.00      0.00      0.00       876
          10       0.39      0.50      0.44      5646

    accuracy                           0.47     14531
   macro avg       0.50      0.45      0.44     14531
weighted avg       0.46      0.47      0.44     14531

Epoch 1, Step 5100, Loss: 0.5452333092689514, F1: 0.43793244430702916, Accuracy: 0.4667263092698369, Time Elapsed: 1129.969954252243 seconds


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


              precision    recall  f1-score   support

           0       0.00      0.00      0.00       912
           1       0.60      0.87      0.71       885
           2       0.63      0.27      0.38       877
           3       0.57      0.42      0.48       897
           4       0.44      0.83      0.57       892
           5       0.55      0.24      0.33       862
           6       0.61      0.74      0.67       903
           7       0.58      0.14      0.22       889
           8       0.54      0.10      0.17       892
           9       0.00      0.00      0.00       876
          10       0.39      0.59      0.47      5646

    accuracy                           0.45     14531
   macro avg       0.45      0.38      0.36     14531
weighted avg       0.43      0.45      0.40     14531

Epoch 1, Step 5200, Loss: 2.282413959503174, F1: 0.364482662109112, Accuracy: 0.45165508223797396, Time Elapsed: 1146.57754611969 seconds
              precision    recall  f1-score   sup

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


              precision    recall  f1-score   support

           0       0.63      0.42      0.50       912
           1       0.61      0.69      0.65       885
           2       0.59      0.55      0.57       877
           3       0.57      0.56      0.57       897
           4       0.40      0.82      0.53       892
           5       0.60      0.05      0.10       862
           6       0.57      0.90      0.70       903
           7       0.61      0.47      0.53       889
           8       0.00      0.00      0.00       892
           9       0.00      0.00      0.00       876
          10       0.39      0.50      0.44      5646

    accuracy                           0.47     14531
   macro avg       0.45      0.45      0.42     14531
weighted avg       0.43      0.47      0.42     14531

Epoch 1, Step 5600, Loss: 2.685230255126953, F1: 0.41679194447261453, Accuracy: 0.4674144931525704, Time Elapsed: 1208.049348115921 seconds


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


              precision    recall  f1-score   support

           0       0.00      0.00      0.00       912
           1       0.53      0.02      0.04       885
           2       0.59      0.52      0.55       877
           3       0.53      0.67      0.59       897
           4       0.35      0.96      0.51       892
           5       0.54      0.28      0.37       862
           6       0.59      0.83      0.69       903
           7       0.57      0.83      0.67       889
           8       0.58      0.02      0.04       892
           9       0.55      0.12      0.20       876
          10       0.38      0.47      0.42      5646

    accuracy                           0.44     14531
   macro avg       0.47      0.43      0.37     14531
weighted avg       0.44      0.44      0.39     14531

Epoch 1, Step 5700, Loss: 0.2888947129249573, F1: 0.3723245753777906, Accuracy: 0.4428463285389856, Time Elapsed: 1223.90500831604 seconds


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


              precision    recall  f1-score   support

           0       0.62      0.12      0.20       912
           1       0.61      0.60      0.61       885
           2       0.58      0.54      0.56       877
           3       0.53      0.58      0.56       897
           4       0.47      0.73      0.57       892
           5       0.57      0.18      0.28       862
           6       0.61      0.82      0.70       903
           7       0.60      0.74      0.66       889
           8       0.00      0.00      0.00       892
           9       0.60      0.15      0.24       876
          10       0.39      0.52      0.44      5646

    accuracy                           0.47     14531
   macro avg       0.51      0.45      0.44     14531
weighted avg       0.47      0.47      0.44     14531

Epoch 1, Step 5800, Loss: 1.1996161937713623, F1: 0.437068112897367, Accuracy: 0.47374578487371827, Time Elapsed: 1239.2630791664124 seconds


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


              precision    recall  f1-score   support

           0       1.00      0.00      0.00       912
           1       0.61      0.83      0.70       885
           2       0.60      0.53      0.56       877
           3       0.49      0.74      0.59       897
           4       0.57      0.46      0.51       892
           5       0.60      0.01      0.03       862
           6       0.63      0.52      0.57       903
           7       0.61      0.54      0.57       889
           8       0.00      0.00      0.00       892
           9       0.00      0.00      0.00       876
          10       0.39      0.62      0.48      5646

    accuracy                           0.46     14531
   macro avg       0.50      0.39      0.36     14531
weighted avg       0.47      0.46      0.40     14531

Epoch 1, Step 5900, Loss: 0.6877462267875671, F1: 0.364739203374186, Accuracy: 0.4625972059734361, Time Elapsed: 1255.1398222446442 seconds
              precision    recall  f1-score   s

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


              precision    recall  f1-score   support

           0       0.62      0.82      0.70       912
           1       0.60      0.83      0.70       885
           2       0.67      0.07      0.13       877
           3       0.57      0.23      0.33       897
           4       1.00      0.00      0.00       892
           5       0.56      0.14      0.23       862
           6       0.62      0.44      0.52       903
           7       0.52      0.85      0.65       889
           8       0.00      0.00      0.00       892
           9       0.59      0.02      0.04       876
          10       0.39      0.65      0.49      5646

    accuracy                           0.46     14531
   macro avg       0.56      0.37      0.34     14531
weighted avg       0.50      0.46      0.39     14531

Epoch 1, Step 6100, Loss: 0.9552854299545288, F1: 0.34264107768959756, Accuracy: 0.46011974399559563, Time Elapsed: 1284.5611011981964 seconds
              precision    recall  f1-score 

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


              precision    recall  f1-score   support

           0       0.64      0.38      0.48       912
           1       0.59      0.91      0.71       885
           2       0.65      0.16      0.26       877
           3       0.52      0.80      0.63       897
           4       0.59      0.49      0.54       892
           5       0.56      0.15      0.24       862
           6       0.60      0.83      0.70       903
           7       0.61      0.54      0.57       889
           8       0.00      0.00      0.00       892
           9       0.78      0.01      0.02       876
          10       0.39      0.55      0.46      5646

    accuracy                           0.48     14531
   macro avg       0.54      0.44      0.42     14531
weighted avg       0.49      0.48      0.43     14531

Epoch 1, Step 6400, Loss: 0.9176557064056396, F1: 0.4179226389167292, Accuracy: 0.4783566168880325, Time Elapsed: 1329.767415046692 seconds


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


              precision    recall  f1-score   support

           0       0.63      0.71      0.67       912
           1       0.62      0.47      0.53       885
           2       0.70      0.11      0.18       877
           3       0.57      0.66      0.61       897
           4       0.56      0.25      0.34       892
           5       0.53      0.45      0.49       862
           6       0.62      0.60      0.61       903
           7       0.60      0.49      0.54       889
           8       0.00      0.00      0.00       892
           9       0.29      0.93      0.44       876
          10       0.39      0.42      0.41      5646

    accuracy                           0.45     14531
   macro avg       0.50      0.46      0.44     14531
weighted avg       0.47      0.45      0.43     14531

Epoch 1, Step 6500, Loss: 0.9666669964790344, F1: 0.43797787755675105, Accuracy: 0.44965934897804694, Time Elapsed: 1345.5906422138214 seconds
              precision    recall  f1-score 

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


              precision    recall  f1-score   support

           0       0.61      0.74      0.67       912
           1       0.61      0.58      0.59       885
           2       0.61      0.35      0.44       877
           3       0.57      0.21      0.31       897
           4       0.45      0.82      0.58       892
           5       0.58      0.24      0.34       862
           6       0.56      0.01      0.01       903
           7       0.61      0.28      0.38       889
           8       0.00      0.00      0.00       892
           9       0.63      0.07      0.13       876
          10       0.39      0.63      0.48      5646

    accuracy                           0.45     14531
   macro avg       0.51      0.36      0.36     14531
weighted avg       0.47      0.45      0.40     14531

Epoch 1, Step 6800, Loss: 1.0890846252441406, F1: 0.3584902181007991, Accuracy: 0.44869589154222006, Time Elapsed: 1390.2712321281433 seconds


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


              precision    recall  f1-score   support

           0       0.61      0.86      0.72       912
           1       0.60      0.50      0.55       885
           2       0.67      0.16      0.26       877
           3       0.55      0.16      0.24       897
           4       0.56      0.61      0.58       892
           5       0.45      0.70      0.55       862
           6       0.61      0.77      0.68       903
           7       0.62      0.45      0.52       889
           8       0.53      0.57      0.55       892
           9       0.00      0.00      0.00       876
          10       0.39      0.49      0.43      5646

    accuracy                           0.48     14531
   macro avg       0.51      0.48      0.46     14531
weighted avg       0.47      0.48      0.45     14531

Epoch 1, Step 6900, Loss: 0.6775614023208618, F1: 0.4618245747942165, Accuracy: 0.48214162824306656, Time Elapsed: 1405.8001441955566 seconds
              precision    recall  f1-score  

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


              precision    recall  f1-score   support

           0       0.63      0.76      0.69       912
           1       0.61      0.35      0.44       885
           2       0.70      0.04      0.07       877
           3       0.59      0.47      0.52       897
           4       0.58      0.51      0.54       892
           5       0.00      0.00      0.00       862
           6       0.62      0.77      0.68       903
           7       0.61      0.50      0.55       889
           8       0.58      0.40      0.47       892
           9       0.63      0.27      0.38       876
          10       0.39      0.60      0.47      5646

    accuracy                           0.48     14531
   macro avg       0.54      0.42      0.44     14531
weighted avg       0.49      0.48      0.45     14531

Epoch 1, Step 7600, Loss: 1.1246851682662964, F1: 0.43773412009124013, Accuracy: 0.4817287179134265, Time Elapsed: 1520.5058953762054 seconds
              precision    recall  f1-score  

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


              precision    recall  f1-score   support

           0       0.62      0.34      0.44       912
           1       0.60      0.92      0.73       885
           2       0.58      0.55      0.57       877
           3       0.54      0.03      0.06       897
           4       0.57      0.67      0.61       892
           5       0.60      0.04      0.07       862
           6       0.62      0.72      0.67       903
           7       0.63      0.38      0.48       889
           8       0.59      0.44      0.50       892
           9       0.00      0.00      0.00       876
          10       0.39      0.58      0.47      5646

    accuracy                           0.48     14531
   macro avg       0.52      0.42      0.42     14531
weighted avg       0.48      0.48      0.43     14531

Epoch 1, Step 8400, Loss: 0.5730993747711182, F1: 0.41776534743355376, Accuracy: 0.4773931594522056, Time Elapsed: 1640.9937252998352 seconds


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


              precision    recall  f1-score   support

           0       0.62      0.69      0.65       912
           1       0.59      0.93      0.72       885
           2       0.59      0.39      0.47       877
           3       0.55      0.26      0.35       897
           4       0.52      0.76      0.62       892
           5       0.39      0.82      0.53       862
           6       0.63      0.46      0.54       903
           7       0.61      0.38      0.47       889
           8       0.00      0.00      0.00       892
           9       0.00      0.00      0.00       876
          10       0.39      0.47      0.42      5646

    accuracy                           0.47     14531
   macro avg       0.45      0.47      0.43     14531
weighted avg       0.43      0.47      0.43     14531

Epoch 1, Step 8500, Loss: 1.2633931636810303, F1: 0.43405387967669107, Accuracy: 0.46824031381185055, Time Elapsed: 1656.1059641838074 seconds
              precision    recall  f1-score 

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


              precision    recall  f1-score   support

           0       0.60      0.92      0.73       912
           1       0.00      0.00      0.00       885
           2       0.74      0.12      0.20       877
           3       0.52      0.80      0.63       897
           4       0.49      0.82      0.61       892
           5       0.00      0.00      0.00       862
           6       0.63      0.51      0.56       903
           7       0.64      0.29      0.39       889
           8       0.60      0.15      0.24       892
           9       0.00      0.00      0.00       876
          10       0.39      0.61      0.48      5646

    accuracy                           0.46     14531
   macro avg       0.42      0.38      0.35     14531
weighted avg       0.41      0.46      0.39     14531

Epoch 1, Step 9000, Loss: 0.5216660499572754, F1: 0.3494462042857561, Accuracy: 0.46073910949005575, Time Elapsed: 1732.226879119873 seconds
              precision    recall  f1-score   

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


              precision    recall  f1-score   support

           0       0.61      0.88      0.72       912
           1       0.77      0.01      0.02       885
           2       0.59      0.52      0.55       877
           3       0.59      0.42      0.49       897
           4       0.63      0.28      0.38       892
           5       0.80      0.00      0.01       862
           6       0.60      0.17      0.26       903
           7       0.61      0.52      0.56       889
           8       0.37      0.87      0.51       892
           9       0.00      0.00      0.00       876
          10       0.39      0.57      0.46      5646

    accuracy                           0.45     14531
   macro avg       0.54      0.39      0.36     14531
weighted avg       0.49      0.45      0.40     14531

Epoch 1, Step 9400, Loss: 1.0729398727416992, F1: 0.3622037470499385, Accuracy: 0.4481453444360333, Time Elapsed: 2391.3193600177765 seconds
              precision    recall  f1-score   

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


              precision    recall  f1-score   support

           0       0.62      0.69      0.65       912
           1       0.00      0.00      0.00       885
           2       0.52      0.81      0.63       877
           3       0.60      0.54      0.56       897
           4       0.62      0.31      0.42       892
           5       0.57      0.18      0.28       862
           6       0.60      0.80      0.69       903
           7       0.59      0.78      0.67       889
           8       0.59      0.31      0.41       892
           9       0.50      0.74      0.59       876
          10       0.39      0.45      0.42      5646

    accuracy                           0.49     14531
   macro avg       0.51      0.51      0.48     14531
weighted avg       0.47      0.49      0.46     14531

Epoch 1, Step 10700, Loss: 1.8532965183258057, F1: 0.4829174233423078, Accuracy: 0.48922992223522127, Time Elapsed: 2587.400017261505 seconds
              precision    recall  f1-score  

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


              precision    recall  f1-score   support

           0       0.64      0.50      0.57       912
           1       0.61      0.70      0.65       885
           2       0.56      0.56      0.56       877
           3       0.50      0.02      0.03       897
           4       0.58      0.61      0.60       892
           5       0.60      0.04      0.07       862
           6       0.59      0.05      0.10       903
           7       0.00      0.00      0.00       889
           8       0.60      0.19      0.28       892
           9       0.55      0.70      0.62       876
          10       0.39      0.65      0.49      5646

    accuracy                           0.46     14531
   macro avg       0.51      0.37      0.36     14531
weighted avg       0.47      0.46      0.40     14531

Epoch 1, Step 12800, Loss: 2.620351552963257, F1: 0.3598005805493027, Accuracy: 0.45826164751221526, Time Elapsed: 2924.0040199756622 seconds
              precision    recall  f1-score  

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


              precision    recall  f1-score   support

           0       0.62      0.54      0.58       912
           1       0.00      0.00      0.00       885
           2       0.59      0.61      0.60       877
           3       0.57      0.45      0.50       897
           4       0.57      0.62      0.59       892
           5       0.50      0.73      0.59       862
           6       0.65      0.44      0.53       903
           7       0.58      0.73      0.65       889
           8       0.56      0.53      0.55       892
           9       0.00      0.00      0.00       876
          10       0.39      0.51      0.44      5646

    accuracy                           0.48     14531
   macro avg       0.46      0.47      0.46     14531
weighted avg       0.44      0.48      0.45     14531

Epoch 1, Step 13000, Loss: 1.5705983638763428, F1: 0.45698876029619967, Accuracy: 0.480834078865873, Time Elapsed: 2961.5105521678925 seconds
              precision    recall  f1-score  

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


              precision    recall  f1-score   support

           0       0.62      0.86      0.72       912
           1       0.00      0.00      0.00       885
           2       0.49      0.82      0.61       877
           3       0.55      0.12      0.19       897
           4       0.60      0.42      0.49       892
           5       0.60      0.06      0.10       862
           6       0.62      0.05      0.09       903
           7       0.58      0.20      0.30       889
           8       0.57      0.01      0.03       892
           9       0.62      0.36      0.45       876
          10       0.39      0.69      0.50      5646

    accuracy                           0.44     14531
   macro avg       0.51      0.33      0.32     14531
weighted avg       0.47      0.44      0.38     14531

Epoch 1, Step 13600, Loss: 1.288863182067871, F1: 0.3166662165052841, Accuracy: 0.44436033308099926, Time Elapsed: 3060.8922250270844 seconds
              precision    recall  f1-score  

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


              precision    recall  f1-score   support

           0       0.61      0.53      0.57       912
           1       0.61      0.60      0.60       885
           2       0.54      0.76      0.63       877
           3       0.59      0.32      0.42       897
           4       0.60      0.58      0.59       892
           5       0.00      0.00      0.00       862
           6       0.53      0.94      0.68       903
           7       0.61      0.44      0.51       889
           8       0.55      0.03      0.06       892
           9       0.62      0.27      0.38       876
          10       0.39      0.53      0.45      5646

    accuracy                           0.48     14531
   macro avg       0.51      0.45      0.44     14531
weighted avg       0.48      0.48      0.45     14531

Epoch 1, Step 13800, Loss: 0.24679876863956451, F1: 0.4444813503712869, Accuracy: 0.4798018030417728, Time Elapsed: 3095.2505202293396 seconds
              precision    recall  f1-score 

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


              precision    recall  f1-score   support

           0       0.63      0.64      0.64       912
           1       0.59      0.91      0.72       885
           2       0.54      0.77      0.64       877
           3       0.00      0.00      0.00       897
           4       0.60      0.64      0.62       892
           5       0.57      0.00      0.01       862
           6       0.63      0.62      0.62       903
           7       0.00      0.00      0.00       889
           8       0.58      0.30      0.39       892
           9       0.42      0.84      0.56       876
          10       0.39      0.48      0.43      5646

    accuracy                           0.48     14531
   macro avg       0.45      0.47      0.42     14531
weighted avg       0.43      0.48      0.42     14531

Epoch 1, Step 14500, Loss: 0.47712668776512146, F1: 0.4209240533631755, Accuracy: 0.47594797329846533, Time Elapsed: 3198.3051941394806 seconds
              precision    recall  f1-score

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


              precision    recall  f1-score   support

           0       0.65      0.08      0.15       912
           1       0.62      0.26      0.37       885
           2       0.64      0.27      0.38       877
           3       0.59      0.36      0.45       897
           4       0.61      0.47      0.53       892
           5       0.00      0.00      0.00       862
           6       0.62      0.17      0.26       903
           7       0.59      0.67      0.63       889
           8       0.55      0.64      0.59       892
           9       0.61      0.14      0.23       876
          10       0.39      0.69      0.50      5646

    accuracy                           0.45     14531
   macro avg       0.53      0.34      0.37     14531
weighted avg       0.49      0.45      0.41     14531

Epoch 1, Step 15000, Loss: 0.34555545449256897, F1: 0.3709730645488066, Accuracy: 0.45475190971027457, Time Elapsed: 3275.4866151809692 seconds
              precision    recall  f1-score

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


              precision    recall  f1-score   support

           0       0.62      0.69      0.65       912
           1       0.62      0.01      0.02       885
           2       0.62      0.24      0.34       877
           3       0.00      0.00      0.00       897
           4       0.52      0.89      0.66       892
           5       0.55      0.52      0.54       862
           6       0.63      0.59      0.61       903
           7       0.61      0.47      0.53       889
           8       0.61      0.52      0.56       892
           9       0.62      0.33      0.43       876
          10       0.39      0.56      0.46      5646

    accuracy                           0.48     14531
   macro avg       0.53      0.44      0.44     14531
weighted avg       0.48      0.48      0.45     14531

Epoch 1, Step 15400, Loss: 0.8271118402481079, F1: 0.4368777308577307, Accuracy: 0.47959534787695274, Time Elapsed: 3335.872922182083 seconds
              precision    recall  f1-score  

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


              precision    recall  f1-score   support

           0       0.62      0.73      0.67       912
           1       0.61      0.49      0.54       885
           2       0.59      0.52      0.55       877
           3       0.58      0.19      0.28       897
           4       0.55      0.80      0.65       892
           5       0.58      0.36      0.45       862
           6       0.57      0.91      0.70       903
           7       0.61      0.63      0.62       889
           8       0.00      0.00      0.00       892
           9       0.60      0.41      0.48       876
          10       0.39      0.48      0.43      5646

    accuracy                           0.49     14531
   macro avg       0.52      0.50      0.49     14531
weighted avg       0.48      0.49      0.47     14531

Epoch 1, Step 15600, Loss: 0.8283507227897644, F1: 0.4887902104948957, Accuracy: 0.4939095726378088, Time Elapsed: 3366.997593164444 seconds
              precision    recall  f1-score   

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


              precision    recall  f1-score   support

           0       0.00      0.00      0.00       912
           1       0.58      0.91      0.71       885
           2       0.59      0.42      0.49       877
           3       0.60      0.53      0.56       897
           4       0.61      0.39      0.47       892
           5       0.50      0.73      0.60       862
           6       0.58      0.88      0.70       903
           7       0.56      0.80      0.66       889
           8       0.56      0.66      0.61       892
           9       0.44      0.88      0.59       876
          10       0.39      0.31      0.35      5646

    accuracy                           0.50     14531
   macro avg       0.49      0.59      0.52     14531
weighted avg       0.46      0.50      0.46     14531

Epoch 1, Step 17100, Loss: 0.8524736762046814, F1: 0.5210099629364676, Accuracy: 0.4981763127107563, Time Elapsed: 3595.324218273163 seconds
              precision    recall  f1-score   

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


              precision    recall  f1-score   support

           0       0.64      0.41      0.50       912
           1       0.60      0.87      0.71       885
           2       0.60      0.49      0.54       877
           3       0.00      0.00      0.00       897
           4       0.57      0.71      0.63       892
           5       0.49      0.56      0.53       862
           6       0.63      0.68      0.66       903
           7       0.59      0.64      0.61       889
           8       0.57      0.57      0.57       892
           9       0.45      0.88      0.59       876
          10       0.38      0.36      0.37      5646

    accuracy                           0.49     14531
   macro avg       0.50      0.56      0.52     14531
weighted avg       0.46      0.49      0.47     14531

Epoch 1, Step 17800, Loss: 1.4420979022979736, F1: 0.5191517240498068, Accuracy: 0.4948042116853623, Time Elapsed: 3711.459631204605 seconds
              precision    recall  f1-score   

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


              precision    recall  f1-score   support

           0       0.00      0.00      0.00       912
           1       0.61      0.83      0.70       885
           2       0.60      0.62      0.61       877
           3       0.58      0.47      0.52       897
           4       0.62      0.42      0.50       892
           5       0.57      0.41      0.47       862
           6       0.63      0.65      0.64       903
           7       0.00      0.00      0.00       889
           8       0.40      0.90      0.56       892
           9       0.66      0.07      0.13       876
          10       0.39      0.52      0.45      5646

    accuracy                           0.47     14531
   macro avg       0.46      0.44      0.42     14531
weighted avg       0.44      0.47      0.43     14531

Epoch 1, Step 20400, Loss: 0.8868150115013123, F1: 0.4163225339854868, Accuracy: 0.4679650402587571, Time Elapsed: 4114.190453052521 seconds
              precision    recall  f1-score   

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


              precision    recall  f1-score   support

           0       0.00      0.00      0.00       912
           1       0.63      0.68      0.65       885
           2       0.58      0.29      0.39       877
           3       0.53      0.85      0.65       897
           4       0.57      0.76      0.65       892
           5       0.56      0.11      0.19       862
           6       0.61      0.83      0.70       903
           7       0.59      0.73      0.65       889
           8       0.61      0.30      0.41       892
           9       0.00      0.00      0.00       876
          10       0.39      0.52      0.44      5646

    accuracy                           0.48     14531
   macro avg       0.46      0.46      0.43     14531
weighted avg       0.44      0.48      0.44     14531

Epoch 1, Step 20800, Loss: 1.1997294425964355, F1: 0.43065955619028445, Accuracy: 0.4813846259720597, Time Elapsed: 4176.820930242538 seconds
              precision    recall  f1-score  

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


              precision    recall  f1-score   support

           0       0.70      0.02      0.04       912
           1       0.61      0.82      0.70       885
           2       0.60      0.38      0.46       877
           3       0.55      0.13      0.21       897
           4       0.60      0.60      0.60       892
           5       0.58      0.13      0.21       862
           6       0.63      0.49      0.55       903
           7       0.62      0.35      0.45       889
           8       0.00      0.00      0.00       892
           9       0.62      0.42      0.50       876
          10       0.39      0.67      0.49      5646

    accuracy                           0.46     14531
   macro avg       0.54      0.36      0.38     14531
weighted avg       0.49      0.46      0.42     14531

Epoch 1, Step 23900, Loss: 1.061922311782837, F1: 0.3832569176021243, Accuracy: 0.46369830018580965, Time Elapsed: 5370.8088092803955 seconds


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


              precision    recall  f1-score   support

           0       0.00      0.00      0.00       912
           1       0.59      0.94      0.73       885
           2       0.60      0.41      0.49       877
           3       0.59      0.68      0.63       897
           4       0.46      0.92      0.61       892
           5       0.55      0.60      0.57       862
           6       0.61      0.72      0.66       903
           7       0.63      0.49      0.55       889
           8       0.00      0.00      0.00       892
           9       0.59      0.57      0.58       876
          10       0.39      0.42      0.40      5646

    accuracy                           0.49     14531
   macro avg       0.45      0.52      0.48     14531
weighted avg       0.43      0.49      0.45     14531

Epoch 1, Step 24000, Loss: 0.17836138606071472, F1: 0.47541288810697996, Accuracy: 0.48964283256486135, Time Elapsed: 5387.783933162689 seconds


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


              precision    recall  f1-score   support

           0       0.00      0.00      0.00       912
           1       0.58      0.95      0.72       885
           2       0.61      0.36      0.45       877
           3       0.59      0.68      0.63       897
           4       0.58      0.72      0.64       892
           5       0.55      0.18      0.28       862
           6       0.62      0.42      0.50       903
           7       0.62      0.07      0.13       889
           8       0.00      0.00      0.00       892
           9       0.56      0.74      0.63       876
          10       0.39      0.57      0.46      5646

    accuracy                           0.47     14531
   macro avg       0.46      0.43      0.41     14531
weighted avg       0.44      0.47      0.42     14531

Epoch 1, Step 24100, Loss: 1.920084834098816, F1: 0.40518505344442757, Accuracy: 0.4720253251668846, Time Elapsed: 5404.815759181976 seconds
              precision    recall  f1-score   

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


              precision    recall  f1-score   support

           0       0.62      0.76      0.68       912
           1       0.00      0.00      0.00       885
           2       0.60      0.52      0.56       877
           3       0.57      0.29      0.38       897
           4       0.58      0.34      0.43       892
           5       0.56      0.60      0.58       862
           6       0.61      0.80      0.69       903
           7       0.61      0.03      0.06       889
           8       0.58      0.58      0.58       892
           9       1.00      0.01      0.02       876
          10       0.39      0.59      0.47      5646

    accuracy                           0.47     14531
   macro avg       0.56      0.41      0.40     14531
weighted avg       0.50      0.47      0.43     14531

Epoch 1, Step 24400, Loss: 1.8597675561904907, F1: 0.4045373064870839, Accuracy: 0.47119950450760445, Time Elapsed: 5454.580825090408 seconds
              precision    recall  f1-score  

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


              precision    recall  f1-score   support

           0       0.61      0.74      0.67       912
           1       0.60      0.87      0.71       885
           2       0.63      0.10      0.17       877
           3       0.61      0.42      0.50       897
           4       0.61      0.40      0.48       892
           5       0.58      0.47      0.52       862
           6       0.00      0.00      0.00       903
           7       0.65      0.08      0.14       889
           8       0.61      0.45      0.52       892
           9       0.00      0.00      0.00       876
          10       0.39      0.65      0.49      5646

    accuracy                           0.47     14531
   macro avg       0.48      0.38      0.38     14531
weighted avg       0.45      0.47      0.42     14531

Epoch 1, Step 26700, Loss: 0.9911185503005981, F1: 0.3812241504802154, Accuracy: 0.46817149542357717, Time Elapsed: 5818.300986289978 seconds
              precision    recall  f1-score  

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


              precision    recall  f1-score   support

           0       0.61      0.78      0.68       912
           1       0.62      0.34      0.44       885
           2       0.58      0.51      0.54       877
           3       0.60      0.49      0.54       897
           4       0.54      0.66      0.59       892
           5       0.52      0.50      0.51       862
           6       0.63      0.50      0.56       903
           7       0.62      0.51      0.56       889
           8       0.54      0.69      0.61       892
           9       0.00      0.00      0.00       876
          10       0.39      0.48      0.43      5646

    accuracy                           0.49     14531
   macro avg       0.51      0.49      0.50     14531
weighted avg       0.47      0.49      0.47     14531

Epoch 1, Step 27000, Loss: 0.7305819988250732, F1: 0.4961788104773523, Accuracy: 0.48957401417658797, Time Elapsed: 5864.427634239197 seconds
              precision    recall  f1-score  

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


              precision    recall  f1-score   support

           0       0.60      0.87      0.71       912
           1       0.00      0.00      0.00       885
           2       0.60      0.54      0.57       877
           3       0.60      0.37      0.46       897
           4       0.61      0.37      0.46       892
           5       0.60      0.32      0.42       862
           6       0.64      0.64      0.64       903
           7       0.58      0.81      0.68       889
           8       0.57      0.70      0.63       892
           9       0.61      0.58      0.60       876
          10       0.39      0.48      0.43      5646

    accuracy                           0.50     14531
   macro avg       0.53      0.52      0.51     14531
weighted avg       0.48      0.50      0.48     14531

Epoch 1, Step 27300, Loss: 0.713289737701416, F1: 0.507739813325682, Accuracy: 0.5036129653843507, Time Elapsed: 5910.586131334305 seconds
              precision    recall  f1-score   su

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


              precision    recall  f1-score   support

           0       0.63      0.29      0.40       912
           1       0.59      0.85      0.70       885
           2       0.59      0.55      0.57       877
           3       0.00      0.00      0.00       897
           4       0.53      0.63      0.58       892
           5       0.56      0.08      0.14       862
           6       0.63      0.62      0.63       903
           7       0.60      0.74      0.66       889
           8       0.58      0.47      0.52       892
           9       0.59      0.45      0.51       876
          10       0.39      0.52      0.45      5646

    accuracy                           0.49     14531
   macro avg       0.52      0.47      0.47     14531
weighted avg       0.48      0.49      0.46     14531

Epoch 1, Step 27900, Loss: 0.47270968556404114, F1: 0.46769039072125335, Accuracy: 0.4873030073635675, Time Elapsed: 6003.44517827034 seconds
              precision    recall  f1-score  

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


              precision    recall  f1-score   support

           0       0.60      0.88      0.72       912
           1       0.00      0.00      0.00       885
           2       0.63      0.25      0.36       877
           3       0.60      0.51      0.55       897
           4       0.59      0.68      0.63       892
           5       0.52      0.73      0.60       862
           6       0.63      0.65      0.64       903
           7       0.61      0.74      0.67       889
           8       0.55      0.72      0.62       892
           9       0.50      0.84      0.63       876
          10       0.39      0.36      0.38      5646

    accuracy                           0.51     14531
   macro avg       0.51      0.58      0.53     14531
weighted avg       0.47      0.51      0.48     14531

Epoch 2, Step 100, Loss: 3.1736302375793457, F1: 0.5265751095482181, Accuracy: 0.5078797054572982, Time Elapsed: 332.2375512123108 seconds


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


              precision    recall  f1-score   support

           0       0.60      0.92      0.73       912
           1       0.00      0.00      0.00       885
           2       0.59      0.53      0.56       877
           3       0.60      0.50      0.55       897
           4       0.61      0.49      0.54       892
           5       0.60      0.39      0.48       862
           6       0.58      0.88      0.70       903
           7       0.60      0.70      0.65       889
           8       0.56      0.66      0.61       892
           9       0.64      0.34      0.44       876
          10       0.39      0.44      0.42      5646

    accuracy                           0.51     14531
   macro avg       0.52      0.53      0.51     14531
weighted avg       0.48      0.51      0.48     14531

Epoch 2, Step 200, Loss: 0.8100726008415222, F1: 0.5145899766642761, Accuracy: 0.5058839721973711, Time Elapsed: 349.82083916664124 seconds
              precision    recall  f1-score   s

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


              precision    recall  f1-score   support

           0       0.56      0.95      0.70       912
           1       0.64      0.34      0.45       885
           2       0.54      0.77      0.64       877
           3       0.00      0.00      0.00       897
           4       0.59      0.66      0.62       892
           5       0.63      0.12      0.21       862
           6       0.61      0.79      0.69       903
           7       0.60      0.49      0.54       889
           8       0.60      0.49      0.54       892
           9       0.54      0.74      0.62       876
          10       0.39      0.44      0.41      5646

    accuracy                           0.50     14531
   macro avg       0.52      0.53      0.49     14531
weighted avg       0.48      0.50      0.47     14531

Epoch 2, Step 1100, Loss: 0.4997534453868866, F1: 0.4927781121135286, Accuracy: 0.4978322207693896, Time Elapsed: 1644.4852311611176 seconds
              precision    recall  f1-score   

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


              precision    recall  f1-score   support

           0       0.62      0.82      0.71       912
           1       0.61      0.83      0.70       885
           2       0.64      0.36      0.46       877
           3       0.60      0.32      0.42       897
           4       0.56      0.80      0.66       892
           5       0.55      0.57      0.56       862
           6       0.63      0.65      0.64       903
           7       0.60      0.63      0.61       889
           8       0.59      0.57      0.58       892
           9       0.00      0.00      0.00       876
          10       0.40      0.44      0.42      5646

    accuracy                           0.51     14531
   macro avg       0.53      0.54      0.52     14531
weighted avg       0.48      0.51      0.49     14531

Epoch 2, Step 1800, Loss: 0.3250542879104614, F1: 0.5229603614573946, Accuracy: 0.5109077145413254, Time Elapsed: 3715.356700181961 seconds
              precision    recall  f1-score   s

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


              precision    recall  f1-score   support

           0       0.59      0.84      0.69       912
           1       0.60      0.13      0.22       885
           2       0.66      0.21      0.32       877
           3       0.58      0.44      0.50       897
           4       0.00      0.00      0.00       892
           5       0.58      0.36      0.44       862
           6       0.62      0.60      0.61       903
           7       0.60      0.03      0.06       889
           8       0.57      0.62      0.59       892
           9       0.66      0.07      0.13       876
          10       0.39      0.66      0.49      5646

    accuracy                           0.46     14531
   macro avg       0.53      0.36      0.37     14531
weighted avg       0.49      0.46      0.41     14531

Epoch 2, Step 3400, Loss: 0.6067671775817871, F1: 0.36972320999536074, Accuracy: 0.46149611176106253, Time Elapsed: 9960.504036188126 seconds
              precision    recall  f1-score  

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


              precision    recall  f1-score   support

           0       0.66      0.24      0.35       912
           1       0.67      0.22      0.33       885
           2       0.58      0.71      0.64       877
           3       0.60      0.31      0.41       897
           4       0.00      0.00      0.00       892
           5       0.56      0.19      0.29       862
           6       0.61      0.72      0.66       903
           7       0.63      0.34      0.44       889
           8       0.57      0.71      0.63       892
           9       0.54      0.51      0.53       876
          10       0.39      0.59      0.47      5646

    accuracy                           0.47     14531
   macro avg       0.53      0.41      0.43     14531
weighted avg       0.48      0.47      0.44     14531

Epoch 2, Step 4100, Loss: 1.7548611164093018, F1: 0.4311300396590735, Accuracy: 0.4718876883903379, Time Elapsed: 11971.686388015747 seconds
              precision    recall  f1-score   

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


              precision    recall  f1-score   support

           0       0.63      0.53      0.58       912
           1       0.60      0.91      0.72       885
           2       0.59      0.46      0.52       877
           3       0.00      0.00      0.00       897
           4       0.53      0.61      0.57       892
           5       0.51      0.68      0.59       862
           6       0.66      0.27      0.38       903
           7       0.56      0.88      0.68       889
           8       0.60      0.43      0.50       892
           9       0.59      0.49      0.53       876
          10       0.40      0.45      0.42      5646

    accuracy                           0.50     14531
   macro avg       0.52      0.52      0.50     14531
weighted avg       0.48      0.50      0.47     14531

Epoch 2, Step 12000, Loss: 0.7874510288238525, F1: 0.4993223991808576, Accuracy: 0.4957676691211892, Time Elapsed: 17862.801710128784 seconds
              precision    recall  f1-score  

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


              precision    recall  f1-score   support

           0       0.63      0.61      0.62       912
           1       0.61      0.74      0.67       885
           2       0.59      0.60      0.59       877
           3       0.00      0.00      0.00       897
           4       0.53      0.75      0.62       892
           5       0.57      0.40      0.47       862
           6       0.63      0.50      0.56       903
           7       0.63      0.06      0.12       889
           8       0.58      0.45      0.51       892
           9       0.49      0.81      0.61       876
          10       0.39      0.47      0.43      5646

    accuracy                           0.48     14531
   macro avg       0.51      0.49      0.47     14531
weighted avg       0.47      0.48      0.46     14531

Epoch 2, Step 12800, Loss: 3.3466899394989014, F1: 0.47241562940327614, Accuracy: 0.4843438166678136, Time Elapsed: 19921.990955114365 seconds
              precision    recall  f1-score 

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


              precision    recall  f1-score   support

           0       0.61      0.61      0.61       912
           1       0.00      0.00      0.00       885
           2       0.56      0.73      0.64       877
           3       0.59      0.66      0.62       897
           4       0.53      0.69      0.60       892
           5       0.52      0.74      0.61       862
           6       0.63      0.48      0.54       903
           7       0.59      0.72      0.65       889
           8       0.56      0.71      0.63       892
           9       0.00      0.00      0.00       876
          10       0.39      0.43      0.41      5646

    accuracy                           0.49     14531
   macro avg       0.45      0.52      0.48     14531
weighted avg       0.43      0.49      0.46     14531

Epoch 2, Step 13000, Loss: 1.5022363662719727, F1: 0.482426261999261, Accuracy: 0.4923267497075218, Time Elapsed: 20924.88951396942 seconds
              precision    recall  f1-score   s

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


              precision    recall  f1-score   support

           0       0.59      0.87      0.70       912
           1       0.00      0.00      0.00       885
           2       0.51      0.80      0.62       877
           3       0.61      0.31      0.41       897
           4       0.54      0.63      0.58       892
           5       0.59      0.34      0.43       862
           6       0.62      0.19      0.29       903
           7       0.63      0.20      0.31       889
           8       0.59      0.29      0.39       892
           9       0.60      0.19      0.29       876
          10       0.39      0.59      0.47      5646

    accuracy                           0.46     14531
   macro avg       0.52      0.40      0.41     14531
weighted avg       0.47      0.46      0.43     14531

Epoch 2, Step 13600, Loss: 0.7715439200401306, F1: 0.408005638242221, Accuracy: 0.46280366113825616, Time Elapsed: 23985.199004888535 seconds
              precision    recall  f1-score  

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


              precision    recall  f1-score   support

           0       0.61      0.80      0.69       912
           1       0.59      0.94      0.72       885
           2       0.58      0.74      0.65       877
           3       0.00      0.00      0.00       897
           4       0.59      0.53      0.56       892
           5       0.62      0.04      0.08       862
           6       0.61      0.63      0.62       903
           7       0.62      0.32      0.42       889
           8       0.58      0.56      0.57       892
           9       0.44      0.86      0.58       876
          10       0.39      0.41      0.40      5646

    accuracy                           0.49     14531
   macro avg       0.51      0.53      0.48     14531
weighted avg       0.47      0.49      0.46     14531

Epoch 2, Step 14500, Loss: 1.225121021270752, F1: 0.48163126070913714, Accuracy: 0.4923955680957952, Time Elapsed: 26357.601392269135 seconds
              precision    recall  f1-score  

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


              precision    recall  f1-score   support

           0       0.61      0.91      0.73       912
           1       0.60      0.78      0.68       885
           2       0.58      0.64      0.61       877
           3       0.61      0.45      0.52       897
           4       0.49      0.82      0.62       892
           5       0.56      0.59      0.58       862
           6       0.52      0.95      0.67       903
           7       0.62      0.62      0.62       889
           8       0.00      0.00      0.00       892
           9       0.59      0.40      0.48       876
          10       0.39      0.34      0.36      5646

    accuracy                           0.51     14531
   macro avg       0.51      0.59      0.53     14531
weighted avg       0.47      0.51      0.48     14531

Epoch 2, Step 15600, Loss: 0.607339084148407, F1: 0.5321683998201819, Accuracy: 0.5084990709517583, Time Elapsed: 29368.878165960312 seconds
              precision    recall  f1-score   

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


              precision    recall  f1-score   support

           0       0.62      0.52      0.57       912
           1       0.56      0.95      0.71       885
           2       0.00      0.00      0.00       877
           3       0.60      0.28      0.38       897
           4       0.53      0.20      0.29       892
           5       0.57      0.32      0.41       862
           6       0.63      0.37      0.47       903
           7       0.62      0.54      0.58       889
           8       0.59      0.65      0.62       892
           9       0.59      0.27      0.37       876
          10       0.39      0.58      0.47      5646

    accuracy                           0.48     14531
   macro avg       0.52      0.43      0.44     14531
weighted avg       0.48      0.48      0.45     14531

Epoch 2, Step 16500, Loss: 0.6573219895362854, F1: 0.44151852176319917, Accuracy: 0.4768426123460189, Time Elapsed: 31616.28035211563 seconds
              precision    recall  f1-score  

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


              precision    recall  f1-score   support

           0       0.62      0.46      0.52       912
           1       0.61      0.87      0.72       885
           2       0.60      0.63      0.62       877
           3       0.61      0.24      0.35       897
           4       0.45      0.56      0.50       892
           5       0.53      0.51      0.52       862
           6       0.63      0.47      0.54       903
           7       0.63      0.27      0.38       889
           8       0.00      0.00      0.00       892
           9       0.64      0.16      0.26       876
          10       0.39      0.56      0.46      5646

    accuracy                           0.47     14531
   macro avg       0.52      0.43      0.44     14531
weighted avg       0.48      0.47      0.45     14531

Epoch 2, Step 16800, Loss: 0.37382131814956665, F1: 0.4414539899866878, Accuracy: 0.4724382354965247, Time Elapsed: 31662.540598154068 seconds
              precision    recall  f1-score 

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


              precision    recall  f1-score   support

           0       0.60      0.71      0.65       912
           1       0.60      0.91      0.72       885
           2       0.58      0.74      0.65       877
           3       0.58      0.74      0.65       897
           4       0.60      0.32      0.42       892
           5       0.61      0.14      0.23       862
           6       0.62      0.65      0.63       903
           7       0.59      0.31      0.41       889
           8       0.00      0.00      0.00       892
           9       0.51      0.60      0.55       876
          10       0.39      0.46      0.42      5646

    accuracy                           0.49     14531
   macro avg       0.52      0.51      0.49     14531
weighted avg       0.47      0.49      0.47     14531

Epoch 2, Step 20100, Loss: 0.7149640321731567, F1: 0.48513495060003586, Accuracy: 0.494735393297089, Time Elapsed: 41259.52907323837 seconds
              precision    recall  f1-score   

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


              precision    recall  f1-score   support

           0       0.62      0.32      0.42       912
           1       0.55      0.81      0.65       885
           2       0.55      0.51      0.53       877
           3       0.56      0.77      0.65       897
           4       0.60      0.24      0.35       892
           5       0.44      0.72      0.55       862
           6       0.62      0.48      0.54       903
           7       0.60      0.69      0.64       889
           8       0.00      0.00      0.00       892
           9       0.45      0.72      0.55       876
          10       0.39      0.40      0.40      5646

    accuracy                           0.48     14531
   macro avg       0.49      0.51      0.48     14531
weighted avg       0.46      0.48      0.45     14531

Epoch 2, Step 21300, Loss: 0.8250985145568848, F1: 0.47974950358074064, Accuracy: 0.4771867042873856, Time Elapsed: 43427.59052491188 seconds
              precision    recall  f1-score  

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


              precision    recall  f1-score   support

           0       0.61      0.78      0.68       912
           1       0.60      0.12      0.20       885
           2       0.60      0.51      0.55       877
           3       0.61      0.37      0.46       897
           4       0.57      0.41      0.48       892
           5       0.54      0.64      0.59       862
           6       0.62      0.67      0.65       903
           7       0.58      0.05      0.09       889
           8       0.57      0.75      0.65       892
           9       0.00      0.00      0.00       876
          10       0.39      0.55      0.46      5646

    accuracy                           0.48     14531
   macro avg       0.52      0.44      0.44     14531
weighted avg       0.48      0.48      0.44     14531

Epoch 2, Step 24400, Loss: 3.601393222808838, F1: 0.4363260221168143, Accuracy: 0.479251255935586, Time Elapsed: 43903.775930166245 seconds
              precision    recall  f1-score   s

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


              precision    recall  f1-score   support

           0       0.63      0.70      0.66       912
           1       0.60      0.89      0.72       885
           2       0.65      0.12      0.21       877
           3       0.59      0.60      0.60       897
           4       0.58      0.25      0.35       892
           5       0.54      0.37      0.44       862
           6       0.67      0.19      0.29       903
           7       0.63      0.37      0.47       889
           8       0.60      0.48      0.53       892
           9       0.00      0.00      0.00       876
          10       0.39      0.61      0.48      5646

    accuracy                           0.48     14531
   macro avg       0.54      0.42      0.43     14531
weighted avg       0.49      0.48      0.45     14531

Epoch 2, Step 26700, Loss: 1.175136685371399, F1: 0.4315486353473403, Accuracy: 0.47952652948867935, Time Elapsed: 46119.121415138245 seconds
              precision    recall  f1-score  

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


              precision    recall  f1-score   support

           0       0.63      0.47      0.54       912
           1       0.59      0.90      0.72       885
           2       0.57      0.75      0.65       877
           3       0.00      0.00      0.00       897
           4       0.50      0.69      0.58       892
           5       0.54      0.08      0.13       862
           6       0.62      0.73      0.67       903
           7       0.60      0.77      0.67       889
           8       0.58      0.63      0.61       892
           9       0.59      0.26      0.37       876
          10       0.39      0.44      0.41      5646

    accuracy                           0.50     14531
   macro avg       0.51      0.52      0.49     14531
weighted avg       0.47      0.50      0.46     14531

Epoch 2, Step 27900, Loss: 0.6921783685684204, F1: 0.4863229527951567, Accuracy: 0.49652467139219597, Time Elapsed: 46329.99759697914 seconds
              precision    recall  f1-score  

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


              precision    recall  f1-score   support

           0       0.62      0.77      0.69       912
           1       0.61      0.47      0.53       885
           2       0.62      0.35      0.45       877
           3       0.00      0.00      0.00       897
           4       0.58      0.61      0.60       892
           5       0.57      0.50      0.53       862
           6       0.62      0.73      0.67       903
           7       0.61      0.69      0.65       889
           8       0.54      0.80      0.64       892
           9       0.55      0.59      0.57       876
          10       0.40      0.43      0.41      5646

    accuracy                           0.51     14531
   macro avg       0.52      0.54      0.52     14531
weighted avg       0.48      0.51      0.49     14531

Epoch 3, Step 1700, Loss: 2.1024653911590576, F1: 0.5211201548093599, Accuracy: 0.5055398802560044, Time Elapsed: 322.42789602279663 seconds
              precision    recall  f1-score   

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


              precision    recall  f1-score   support

           0       0.57      0.95      0.72       912
           1       0.61      0.78      0.69       885
           2       0.00      0.00      0.00       877
           3       0.59      0.49      0.53       897
           4       0.52      0.06      0.10       892
           5       0.46      0.83      0.59       862
           6       0.62      0.21      0.32       903
           7       0.61      0.61      0.61       889
           8       0.58      0.33      0.42       892
           9       0.60      0.40      0.48       876
          10       0.39      0.49      0.44      5646

    accuracy                           0.48     14531
   macro avg       0.50      0.47      0.45     14531
weighted avg       0.47      0.48      0.44     14531

Epoch 3, Step 2500, Loss: 1.0015186071395874, F1: 0.4451345245322029, Accuracy: 0.47766843300529904, Time Elapsed: 467.3242747783661 seconds
              precision    recall  f1-score   

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


              precision    recall  f1-score   support

           0       0.64      0.51      0.57       912
           1       0.58      0.89      0.70       885
           2       0.61      0.53      0.57       877
           3       0.59      0.66      0.63       897
           4       0.55      0.55      0.55       892
           5       0.57      0.10      0.17       862
           6       0.61      0.67      0.64       903
           7       0.60      0.62      0.61       889
           8       0.60      0.61      0.61       892
           9       0.00      0.00      0.00       876
          10       0.40      0.48      0.43      5646

    accuracy                           0.50     14531
   macro avg       0.52      0.51      0.50     14531
weighted avg       0.48      0.50      0.48     14531

Epoch 3, Step 4800, Loss: 0.7237018346786499, F1: 0.4978684683482289, Accuracy: 0.501479595347877, Time Elapsed: 915.535206079483 seconds


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


              precision    recall  f1-score   support

           0       0.63      0.71      0.67       912
           1       0.62      0.58      0.60       885
           2       0.57      0.69      0.62       877
           3       0.61      0.48      0.54       897
           4       0.54      0.67      0.60       892
           5       0.57      0.37      0.45       862
           6       0.62      0.67      0.64       903
           7       0.60      0.62      0.61       889
           8       0.60      0.65      0.62       892
           9       0.00      0.00      0.00       876
          10       0.40      0.45      0.42      5646

    accuracy                           0.51     14531
   macro avg       0.52      0.53      0.52     14531
weighted avg       0.48      0.51      0.49     14531

Epoch 3, Step 4900, Loss: 2.3260550498962402, F1: 0.5249479212306326, Accuracy: 0.5077420686807514, Time Elapsed: 934.9425818920135 seconds
              precision    recall  f1-score   s

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


              precision    recall  f1-score   support

           0       0.00      0.00      0.00       912
           1       0.61      0.70      0.65       885
           2       0.58      0.67      0.62       877
           3       0.57      0.77      0.65       897
           4       0.51      0.80      0.62       892
           5       0.56      0.48      0.52       862
           6       0.61      0.77      0.68       903
           7       0.60      0.80      0.68       889
           8       0.60      0.61      0.61       892
           9       0.59      0.54      0.56       876
          10       0.39      0.36      0.38      5646

    accuracy                           0.51     14531
   macro avg       0.51      0.59      0.54     14531
weighted avg       0.47      0.51      0.49     14531

Epoch 3, Step 5700, Loss: 0.2966063618659973, F1: 0.5435894018660631, Accuracy: 0.5141421787901728, Time Elapsed: 1057.886193037033 seconds


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


              precision    recall  f1-score   support

           0       0.00      0.00      0.00       912
           1       0.60      0.79      0.69       885
           2       0.58      0.74      0.65       877
           3       0.56      0.77      0.65       897
           4       0.58      0.49      0.53       892
           5       0.57      0.43      0.49       862
           6       0.63      0.69      0.65       903
           7       0.59      0.85      0.69       889
           8       0.00      0.00      0.00       892
           9       0.59      0.62      0.60       876
          10       0.39      0.45      0.42      5646

    accuracy                           0.50     14531
   macro avg       0.46      0.53      0.49     14531
weighted avg       0.44      0.50      0.46     14531

Epoch 3, Step 5800, Loss: 0.7696855068206787, F1: 0.4886330591467163, Accuracy: 0.5014107769596036, Time Elapsed: 1072.548199892044 seconds
              precision    recall  f1-score   s

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


              precision    recall  f1-score   support

           0       0.62      0.73      0.67       912
           1       0.61      0.71      0.66       885
           2       0.71      0.09      0.17       877
           3       0.59      0.37      0.46       897
           4       0.56      0.43      0.49       892
           5       0.56      0.16      0.25       862
           6       0.62      0.74      0.67       903
           7       0.45      0.94      0.61       889
           8       0.00      0.00      0.00       892
           9       0.49      0.65      0.56       876
          10       0.39      0.47      0.43      5646

    accuracy                           0.48     14531
   macro avg       0.51      0.48      0.45     14531
weighted avg       0.47      0.48      0.44     14531

Epoch 3, Step 7500, Loss: 1.0611329078674316, F1: 0.45051522138860567, Accuracy: 0.47808134333493907, Time Elapsed: 1341.1996879577637 seconds
              precision    recall  f1-score 

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


              precision    recall  f1-score   support

           0       0.61      0.19      0.29       912
           1       0.58      0.41      0.48       885
           2       0.57      0.68      0.62       877
           3       0.00      0.00      0.00       897
           4       0.58      0.41      0.48       892
           5       0.56      0.44      0.49       862
           6       0.58      0.84      0.69       903
           7       0.60      0.28      0.38       889
           8       0.61      0.30      0.40       892
           9       0.51      0.84      0.63       876
          10       0.38      0.52      0.44      5646

    accuracy                           0.47     14531
   macro avg       0.51      0.45      0.45     14531
weighted avg       0.47      0.47      0.44     14531

Epoch 3, Step 9500, Loss: 0.5014516711235046, F1: 0.4458729918482765, Accuracy: 0.46899731608285733, Time Elapsed: 1669.674332857132 seconds
              precision    recall  f1-score   

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


              precision    recall  f1-score   support

           0       0.60      0.17      0.26       912
           1       0.60      0.89      0.72       885
           2       0.60      0.46      0.52       877
           3       0.00      0.00      0.00       897
           4       0.58      0.52      0.55       892
           5       0.53      0.58      0.56       862
           6       0.62      0.11      0.18       903
           7       0.58      0.87      0.69       889
           8       0.59      0.45      0.51       892
           9       0.53      0.67      0.60       876
          10       0.38      0.50      0.43      5646

    accuracy                           0.48     14531
   macro avg       0.51      0.47      0.46     14531
weighted avg       0.47      0.48      0.45     14531

Epoch 3, Step 12000, Loss: 1.0206608772277832, F1: 0.45717268023223206, Accuracy: 0.48007707659486615, Time Elapsed: 2061.496286869049 seconds
              precision    recall  f1-score 

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


              precision    recall  f1-score   support

           0       0.00      0.00      0.00       912
           1       0.61      0.68      0.64       885
           2       0.60      0.56      0.58       877
           3       0.61      0.65      0.63       897
           4       0.55      0.74      0.63       892
           5       0.53      0.60      0.57       862
           6       0.61      0.49      0.54       903
           7       0.61      0.70      0.65       889
           8       0.60      0.53      0.56       892
           9       0.55      0.73      0.63       876
          10       0.39      0.40      0.40      5646

    accuracy                           0.50     14531
   macro avg       0.52      0.55      0.53     14531
weighted avg       0.47      0.50      0.49     14531

Epoch 3, Step 12700, Loss: 0.605501115322113, F1: 0.5303812884128055, Accuracy: 0.5035441469960773, Time Elapsed: 2187.2541530132294 seconds


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


              precision    recall  f1-score   support

           0       0.63      0.30      0.41       912
           1       0.60      0.88      0.72       885
           2       0.59      0.58      0.59       877
           3       0.00      0.00      0.00       897
           4       0.58      0.64      0.61       892
           5       0.56      0.28      0.37       862
           6       0.63      0.33      0.43       903
           7       0.61      0.45      0.52       889
           8       0.60      0.35      0.44       892
           9       0.51      0.84      0.63       876
          10       0.39      0.51      0.44      5646

    accuracy                           0.48     14531
   macro avg       0.52      0.47      0.47     14531
weighted avg       0.48      0.48      0.46     14531

Epoch 3, Step 12800, Loss: 0.8983965516090393, F1: 0.4690717306076266, Accuracy: 0.4823480834078866, Time Elapsed: 2201.7373859882355 seconds
              precision    recall  f1-score  

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


              precision    recall  f1-score   support

           0       0.62      0.60      0.61       912
           1       0.00      0.00      0.00       885
           2       0.58      0.67      0.62       877
           3       0.59      0.60      0.59       897
           4       0.56      0.73      0.64       892
           5       0.51      0.67      0.58       862
           6       0.63      0.61      0.62       903
           7       0.59      0.73      0.65       889
           8       0.55      0.67      0.60       892
           9       0.62      0.20      0.30       876
          10       0.39      0.42      0.40      5646

    accuracy                           0.50     14531
   macro avg       0.51      0.54      0.51     14531
weighted avg       0.47      0.50      0.47     14531

Epoch 3, Step 13000, Loss: 1.7297009229660034, F1: 0.5102043234650413, Accuracy: 0.49652467139219597, Time Elapsed: 2231.5391330718994 seconds
              precision    recall  f1-score 

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


              precision    recall  f1-score   support

           0       0.59      0.88      0.71       912
           1       0.00      0.00      0.00       885
           2       0.56      0.73      0.63       877
           3       0.60      0.57      0.58       897
           4       0.57      0.57      0.57       892
           5       0.58      0.42      0.49       862
           6       0.64      0.52      0.58       903
           7       0.62      0.27      0.37       889
           8       0.60      0.48      0.53       892
           9       0.59      0.42      0.49       876
          10       0.39      0.50      0.44      5646

    accuracy                           0.49     14531
   macro avg       0.52      0.49      0.49     14531
weighted avg       0.48      0.49      0.47     14531

Epoch 3, Step 13600, Loss: 0.5496973395347595, F1: 0.4898275989350683, Accuracy: 0.49108801871860164, Time Elapsed: 2323.1641578674316 seconds
              precision    recall  f1-score 

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


              precision    recall  f1-score   support

           0       0.00      0.00      0.00       912
           1       0.63      0.48      0.55       885
           2       0.59      0.60      0.60       877
           3       0.60      0.42      0.50       897
           4       0.65      0.29      0.40       892
           5       0.53      0.23      0.32       862
           6       0.62      0.50      0.55       903
           7       0.63      0.31      0.41       889
           8       0.55      0.74      0.63       892
           9       0.53      0.76      0.62       876
          10       0.39      0.55      0.46      5646

    accuracy                           0.48     14531
   macro avg       0.52      0.44      0.46     14531
weighted avg       0.48      0.48      0.46     14531

Epoch 3, Step 14000, Loss: 0.2803606390953064, F1: 0.45856249449903186, Accuracy: 0.4783566168880325, Time Elapsed: 2383.541666030884 seconds
              precision    recall  f1-score  

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


              precision    recall  f1-score   support

           0       0.62      0.84      0.71       912
           1       0.60      0.94      0.73       885
           2       0.57      0.78      0.66       877
           3       0.00      0.00      0.00       897
           4       0.57      0.42      0.48       892
           5       0.57      0.17      0.26       862
           6       0.62      0.60      0.61       903
           7       0.62      0.35      0.45       889
           8       0.60      0.63      0.61       892
           9       0.47      0.86      0.61       876
          10       0.39      0.41      0.40      5646

    accuracy                           0.50     14531
   macro avg       0.51      0.55      0.50     14531
weighted avg       0.47      0.50      0.47     14531

Epoch 3, Step 14500, Loss: 0.5603703260421753, F1: 0.5027904406337028, Accuracy: 0.501686050512697, Time Elapsed: 2765.172590970993 seconds
              precision    recall  f1-score   s

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


              precision    recall  f1-score   support

           0       0.00      0.00      0.00       912
           1       0.61      0.49      0.55       885
           2       0.61      0.57      0.59       877
           3       0.58      0.36      0.45       897
           4       0.57      0.72      0.64       892
           5       0.83      0.01      0.02       862
           6       0.60      0.30      0.40       903
           7       0.59      0.78      0.67       889
           8       0.51      0.75      0.60       892
           9       0.61      0.39      0.48       876
          10       0.39      0.54      0.45      5646

    accuracy                           0.48     14531
   macro avg       0.54      0.45      0.44     14531
weighted avg       0.49      0.48      0.44     14531

Epoch 3, Step 15000, Loss: 1.048850655555725, F1: 0.44044901060219316, Accuracy: 0.47670497556947217, Time Elapsed: 2841.5325739383698 seconds
              precision    recall  f1-score 

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


              precision    recall  f1-score   support

           0       0.62      0.81      0.70       912
           1       0.33      0.00      0.00       885
           2       0.58      0.37      0.45       877
           3       0.00      0.00      0.00       897
           4       0.51      0.84      0.63       892
           5       0.54      0.51      0.53       862
           6       0.61      0.71      0.66       903
           7       0.62      0.43      0.51       889
           8       0.59      0.34      0.43       892
           9       0.62      0.45      0.52       876
          10       0.39      0.53      0.45      5646

    accuracy                           0.48     14531
   macro avg       0.49      0.45      0.44     14531
weighted avg       0.46      0.48      0.45     14531

Epoch 3, Step 15400, Loss: 0.6708773970603943, F1: 0.44456062438112975, Accuracy: 0.4797329846534994, Time Elapsed: 2901.4170999526978 seconds
              precision    recall  f1-score 

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


              precision    recall  f1-score   support

           0       0.57      0.92      0.71       912
           1       0.61      0.79      0.69       885
           2       0.60      0.52      0.56       877
           3       0.61      0.35      0.44       897
           4       0.57      0.66      0.61       892
           5       0.50      0.62      0.55       862
           6       0.56      0.85      0.68       903
           7       0.61      0.75      0.67       889
           8       0.00      0.00      0.00       892
           9       0.59      0.53      0.56       876
          10       0.39      0.36      0.38      5646

    accuracy                           0.51     14531
   macro avg       0.51      0.58      0.53     14531
weighted avg       0.47      0.51      0.48     14531

Epoch 3, Step 15600, Loss: 0.5172858834266663, F1: 0.5304261409186307, Accuracy: 0.5083614341752116, Time Elapsed: 2931.5337011814117 seconds
              precision    recall  f1-score  

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


              precision    recall  f1-score   support

           0       0.00      0.00      0.00       912
           1       0.63      0.34      0.44       885
           2       0.62      0.45      0.52       877
           3       0.60      0.65      0.63       897
           4       0.58      0.29      0.39       892
           5       0.54      0.54      0.54       862
           6       0.61      0.77      0.68       903
           7       0.57      0.85      0.68       889
           8       0.49      0.82      0.62       892
           9       0.49      0.80      0.61       876
          10       0.39      0.40      0.40      5646

    accuracy                           0.49     14531
   macro avg       0.50      0.54      0.50     14531
weighted avg       0.47      0.49      0.47     14531

Epoch 3, Step 17100, Loss: 0.7382040619850159, F1: 0.49990427854040986, Accuracy: 0.4921202945427018, Time Elapsed: 3162.4689099788666 seconds
              precision    recall  f1-score 

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


              precision    recall  f1-score   support

           0       0.00      0.00      0.00       912
           1       0.61      0.77      0.68       885
           2       0.58      0.72      0.64       877
           3       0.59      0.64      0.62       897
           4       0.53      0.80      0.64       892
           5       0.46      0.75      0.57       862
           6       0.60      0.40      0.48       903
           7       0.60      0.53      0.56       889
           8       0.59      0.65      0.62       892
           9       0.55      0.74      0.63       876
          10       0.38      0.34      0.36      5646

    accuracy                           0.50     14531
   macro avg       0.50      0.58      0.53     14531
weighted avg       0.46      0.50      0.47     14531

Epoch 3, Step 17700, Loss: 2.3620033264160156, F1: 0.5260820372248621, Accuracy: 0.49659348978046935, Time Elapsed: 3254.3550000190735 seconds
              precision    recall  f1-score 

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


              precision    recall  f1-score   support

           0       0.00      0.00      0.00       912
           1       0.62      0.43      0.51       885
           2       0.66      0.18      0.29       877
           3       0.61      0.42      0.50       897
           4       0.58      0.47      0.52       892
           5       0.56      0.30      0.39       862
           6       0.58      0.87      0.70       903
           7       0.60      0.45      0.51       889
           8       0.61      0.44      0.51       892
           9       0.51      0.02      0.05       876
          10       0.39      0.63      0.48      5646

    accuracy                           0.47     14531
   macro avg       0.52      0.38      0.41     14531
weighted avg       0.48      0.47      0.43     14531

Epoch 3, Step 18000, Loss: 1.4610944986343384, F1: 0.4053162641261761, Accuracy: 0.46617576216365014, Time Elapsed: 3302.6424050331116 seconds


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


              precision    recall  f1-score   support

           0       0.00      0.00      0.00       912
           1       0.62      0.50      0.56       885
           2       0.45      0.77      0.57       877
           3       0.62      0.51      0.56       897
           4       0.54      0.70      0.61       892
           5       0.52      0.56      0.54       862
           6       0.59      0.82      0.69       903
           7       0.60      0.54      0.57       889
           8       0.60      0.60      0.60       892
           9       0.55      0.11      0.18       876
          10       0.38      0.43      0.41      5646

    accuracy                           0.48     14531
   macro avg       0.50      0.50      0.48     14531
weighted avg       0.46      0.48      0.46     14531

Epoch 3, Step 18100, Loss: 0.5951346755027771, F1: 0.4803938342498613, Accuracy: 0.4811781708072397, Time Elapsed: 3318.62250995636 seconds
              precision    recall  f1-score   s

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


              precision    recall  f1-score   support

           0       0.60      0.75      0.67       912
           1       0.60      0.90      0.72       885
           2       0.58      0.57      0.58       877
           3       0.59      0.61      0.60       897
           4       0.61      0.35      0.44       892
           5       0.59      0.13      0.21       862
           6       0.62      0.61      0.61       903
           7       0.62      0.29      0.40       889
           8       0.00      0.00      0.00       892
           9       0.49      0.67      0.57       876
          10       0.40      0.50      0.44      5646

    accuracy                           0.49     14531
   macro avg       0.52      0.49      0.48     14531
weighted avg       0.48      0.49      0.47     14531

Epoch 3, Step 20100, Loss: 0.9330406785011292, F1: 0.4764121006681774, Accuracy: 0.4926708416488886, Time Elapsed: 6500.071303129196 seconds
              precision    recall  f1-score   

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


              precision    recall  f1-score   support

           0       0.63      0.27      0.37       912
           1       0.62      0.67      0.64       885
           2       0.55      0.67      0.60       877
           3       0.60      0.47      0.53       897
           4       0.60      0.63      0.61       892
           5       0.53      0.44      0.48       862
           6       0.63      0.70      0.66       903
           7       0.00      0.00      0.00       889
           8       0.44      0.89      0.59       892
           9       0.71      0.09      0.16       876
          10       0.39      0.48      0.43      5646

    accuracy                           0.48     14531
   macro avg       0.52      0.48      0.46     14531
weighted avg       0.48      0.48      0.45     14531

Epoch 3, Step 20400, Loss: 0.9301566481590271, F1: 0.4619915006974923, Accuracy: 0.4801458949831395, Time Elapsed: 6564.635300159454 seconds
              precision    recall  f1-score   

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


              precision    recall  f1-score   support

           0       0.00      0.00      0.00       912
           1       0.61      0.72      0.66       885
           2       0.65      0.09      0.15       877
           3       0.56      0.83      0.67       897
           4       0.55      0.82      0.66       892
           5       0.54      0.30      0.39       862
           6       0.63      0.73      0.68       903
           7       0.58      0.83      0.68       889
           8       0.59      0.36      0.45       892
           9       0.53      0.05      0.09       876
          10       0.39      0.50      0.44      5646

    accuracy                           0.48     14531
   macro avg       0.51      0.48      0.44     14531
weighted avg       0.47      0.48      0.44     14531

Epoch 3, Step 20800, Loss: 1.2304741144180298, F1: 0.4431024358873451, Accuracy: 0.48496318216227374, Time Elapsed: 6651.057363986969 seconds
              precision    recall  f1-score  

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


              precision    recall  f1-score   support

           0       0.62      0.53      0.57       912
           1       0.56      0.96      0.71       885
           2       0.57      0.66      0.61       877
           3       0.55      0.73      0.63       897
           4       0.59      0.48      0.53       892
           5       0.53      0.65      0.58       862
           6       0.62      0.51      0.56       903
           7       0.58      0.78      0.67       889
           8       0.00      0.00      0.00       892
           9       0.54      0.79      0.64       876
          10       0.38      0.34      0.36      5646

    accuracy                           0.50     14531
   macro avg       0.50      0.58      0.53     14531
weighted avg       0.46      0.50      0.48     14531

Epoch 3, Step 21300, Loss: 0.523834228515625, F1: 0.5318585393561305, Accuracy: 0.503475328607804, Time Elapsed: 6724.694540977478 seconds
              precision    recall  f1-score   su

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


              precision    recall  f1-score   support

           0       0.61      0.72      0.66       912
           1       0.00      0.00      0.00       885
           2       0.57      0.66      0.61       877
           3       0.59      0.54      0.56       897
           4       0.60      0.56      0.58       892
           5       0.57      0.37      0.44       862
           6       0.80      0.01      0.02       903
           7       0.58      0.83      0.69       889
           8       0.57      0.74      0.64       892
           9       0.58      0.35      0.43       876
          10       0.39      0.49      0.43      5646

    accuracy                           0.48     14531
   macro avg       0.53      0.48      0.46     14531
weighted avg       0.48      0.48      0.45     14531

Epoch 3, Step 21600, Loss: 1.645923376083374, F1: 0.4605074382026441, Accuracy: 0.4843438166678136, Time Elapsed: 6769.930688858032 seconds
              precision    recall  f1-score   s

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


              precision    recall  f1-score   support

           0       0.00      0.00      0.00       912
           1       0.60      0.89      0.72       885
           2       0.63      0.34      0.44       877
           3       0.62      0.43      0.51       897
           4       0.57      0.47      0.51       892
           5       0.57      0.35      0.43       862
           6       0.62      0.48      0.54       903
           7       0.61      0.68      0.64       889
           8       0.60      0.60      0.60       892
           9       0.53      0.08      0.13       876
          10       0.39      0.56      0.46      5646

    accuracy                           0.48     14531
   macro avg       0.52      0.44      0.45     14531
weighted avg       0.48      0.48      0.46     14531

Epoch 3, Step 22100, Loss: 0.9535820484161377, F1: 0.4528416295276323, Accuracy: 0.4819351730782465, Time Elapsed: 6845.834110021591 seconds
              precision    recall  f1-score   

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


              precision    recall  f1-score   support

           0       0.00      0.00      0.00       912
           1       0.60      0.92      0.73       885
           2       0.60      0.36      0.45       877
           3       0.59      0.50      0.54       897
           4       0.58      0.57      0.58       892
           5       0.58      0.32      0.41       862
           6       0.62      0.69      0.65       903
           7       0.59      0.57      0.58       889
           8       0.60      0.02      0.03       892
           9       0.62      0.36      0.46       876
          10       0.39      0.56      0.46      5646

    accuracy                           0.48     14531
   macro avg       0.52      0.44      0.45     14531
weighted avg       0.48      0.48      0.45     14531

Epoch 3, Step 23900, Loss: 0.9787629842758179, F1: 0.44551306176840755, Accuracy: 0.48152226274860643, Time Elapsed: 7115.374708890915 seconds


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


              precision    recall  f1-score   support

           0       0.00      0.00      0.00       912
           1       0.60      0.89      0.72       885
           2       0.59      0.48      0.53       877
           3       0.59      0.73      0.65       897
           4       0.50      0.90      0.65       892
           5       0.52      0.66      0.58       862
           6       0.60      0.70      0.65       903
           7       0.58      0.50      0.54       889
           8       0.58      0.04      0.07       892
           9       0.60      0.61      0.60       876
          10       0.39      0.41      0.40      5646

    accuracy                           0.49     14531
   macro avg       0.50      0.54      0.49     14531
weighted avg       0.46      0.49      0.46     14531

Epoch 3, Step 24000, Loss: 0.20165862143039703, F1: 0.4886201556405083, Accuracy: 0.49335902553162203, Time Elapsed: 7129.814404010773 seconds


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


              precision    recall  f1-score   support

           0       0.00      0.00      0.00       912
           1       0.55      0.93      0.69       885
           2       0.59      0.54      0.56       877
           3       0.54      0.76      0.64       897
           4       0.56      0.77      0.65       892
           5       0.54      0.41      0.47       862
           6       0.60      0.66      0.63       903
           7       0.33      0.00      0.00       889
           8       0.61      0.32      0.42       892
           9       0.59      0.48      0.53       876
          10       0.39      0.48      0.43      5646

    accuracy                           0.48     14531
   macro avg       0.48      0.49      0.46     14531
weighted avg       0.45      0.48      0.45     14531

Epoch 3, Step 24100, Loss: 0.7528786063194275, F1: 0.45574730347477943, Accuracy: 0.48262335696097997, Time Elapsed: 7144.751101970673 seconds
              precision    recall  f1-score 

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


              precision    recall  f1-score   support

           0       0.62      0.82      0.70       912
           1       0.58      0.89      0.70       885
           2       0.62      0.21      0.31       877
           3       0.57      0.76      0.65       897
           4       0.60      0.32      0.41       892
           5       0.53      0.62      0.58       862
           6       0.62      0.39      0.48       903
           7       0.60      0.29      0.39       889
           8       0.57      0.39      0.46       892
           9       0.00      0.00      0.00       876
          10       0.39      0.51      0.44      5646

    accuracy                           0.49     14531
   macro avg       0.52      0.47      0.47     14531
weighted avg       0.48      0.49      0.46     14531

Epoch 3, Step 26700, Loss: 1.131869912147522, F1: 0.46703145696028847, Accuracy: 0.48599545798637395, Time Elapsed: 7768.023874998093 seconds
              precision    recall  f1-score  

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


              precision    recall  f1-score   support

           0       0.52      0.95      0.68       912
           1       0.60      0.03      0.06       885
           2       0.59      0.62      0.60       877
           3       0.60      0.75      0.67       897
           4       0.00      0.00      0.00       892
           5       0.57      0.29      0.39       862
           6       0.61      0.81      0.69       903
           7       0.60      0.78      0.67       889
           8       0.59      0.52      0.55       892
           9       0.56      0.79      0.65       876
          10       0.39      0.41      0.40      5646

    accuracy                           0.50     14531
   macro avg       0.51      0.54      0.49     14531
weighted avg       0.47      0.50      0.46     14531

Epoch 3, Step 28200, Loss: 0.32810312509536743, F1: 0.4877589570631612, Accuracy: 0.49955268047622325, Time Elapsed: 7998.937278985977 seconds
              precision    recall  f1-score 

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


              precision    recall  f1-score   support

           0       0.53      0.95      0.68       912
           1       0.61      0.78      0.68       885
           2       0.00      0.00      0.00       877
           3       0.61      0.51      0.56       897
           4       0.63      0.13      0.22       892
           5       0.50      0.67      0.57       862
           6       0.60      0.23      0.34       903
           7       0.59      0.75      0.66       889
           8       0.60      0.44      0.50       892
           9       0.61      0.44      0.52       876
          10       0.39      0.48      0.43      5646

    accuracy                           0.49     14531
   macro avg       0.52      0.49      0.47     14531
weighted avg       0.48      0.49      0.46     14531

Epoch 4, Step 2500, Loss: 1.9618251323699951, F1: 0.4696424973358766, Accuracy: 0.4869589154222008, Time Elapsed: 405.0145580768585 seconds
              precision    recall  f1-score   s

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


              precision    recall  f1-score   support

           0       0.00      0.00      0.00       912
           1       0.59      0.73      0.66       885
           2       0.60      0.37      0.46       877
           3       0.66      0.22      0.33       897
           4       0.55      0.75      0.64       892
           5       0.56      0.51      0.54       862
           6       0.59      0.86      0.70       903
           7       0.59      0.79      0.67       889
           8       0.57      0.71      0.63       892
           9       0.56      0.59      0.58       876
          10       0.39      0.42      0.40      5646

    accuracy                           0.50     14531
   macro avg       0.52      0.54      0.51     14531
weighted avg       0.47      0.50      0.47     14531

Epoch 4, Step 2800, Loss: 0.3835756778717041, F1: 0.5089593307182763, Accuracy: 0.49941504369967654, Time Elapsed: 450.48042607307434 seconds
              precision    recall  f1-score  

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


              precision    recall  f1-score   support

           0       0.62      0.85      0.72       912
           1       0.61      0.32      0.42       885
           2       0.71      0.04      0.08       877
           3       0.58      0.74      0.65       897
           4       0.00      0.00      0.00       892
           5       0.55      0.47      0.51       862
           6       0.62      0.63      0.62       903
           7       0.61      0.32      0.42       889
           8       0.56      0.64      0.60       892
           9       0.62      0.54      0.58       876
          10       0.39      0.54      0.45      5646

    accuracy                           0.49     14531
   macro avg       0.53      0.46      0.46     14531
weighted avg       0.49      0.49      0.46     14531

Epoch 4, Step 3400, Loss: 0.5051282048225403, F1: 0.45973986022115376, Accuracy: 0.4888170119055812, Time Elapsed: 1380.722608089447 seconds


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


              precision    recall  f1-score   support

           0       0.62      0.80      0.70       912
           1       0.60      0.81      0.69       885
           2       0.62      0.29      0.40       877
           3       0.61      0.63      0.62       897
           4       0.60      0.43      0.50       892
           5       0.55      0.40      0.46       862
           6       0.62      0.46      0.53       903
           7       0.60      0.64      0.62       889
           8       0.53      0.83      0.65       892
           9       0.00      0.00      0.00       876
          10       0.39      0.46      0.42      5646

    accuracy                           0.50     14531
   macro avg       0.52      0.52      0.51     14531
weighted avg       0.48      0.50      0.48     14531

Epoch 4, Step 3500, Loss: 0.6392044425010681, F1: 0.5066461987561042, Accuracy: 0.5006537746885968, Time Elapsed: 1396.461787223816 seconds
              precision    recall  f1-score   s

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


              precision    recall  f1-score   support

           0       0.59      0.89      0.71       912
           1       0.61      0.59      0.60       885
           2       0.57      0.59      0.58       877
           3       0.59      0.10      0.17       897
           4       0.58      0.79      0.67       892
           5       0.55      0.48      0.51       862
           6       0.56      0.91      0.70       903
           7       0.60      0.78      0.67       889
           8       0.00      0.00      0.00       892
           9       0.59      0.15      0.24       876
          10       0.39      0.44      0.41      5646

    accuracy                           0.50     14531
   macro avg       0.51      0.52      0.48     14531
weighted avg       0.47      0.50      0.46     14531

Epoch 4, Step 3700, Loss: 0.49406880140304565, F1: 0.4783511283145922, Accuracy: 0.49556121395636915, Time Elapsed: 1425.917073249817 seconds


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


              precision    recall  f1-score   support

           0       0.60      0.84      0.70       912
           1       0.59      0.91      0.72       885
           2       0.58      0.57      0.58       877
           3       0.58      0.73      0.64       897
           4       0.58      0.77      0.66       892
           5       0.54      0.59      0.56       862
           6       0.61      0.76      0.68       903
           7       0.59      0.74      0.66       889
           8       0.00      0.00      0.00       892
           9       0.60      0.39      0.48       876
          10       0.39      0.35      0.37      5646

    accuracy                           0.52     14531
   macro avg       0.52      0.60      0.55     14531
weighted avg       0.47      0.52      0.49     14531

Epoch 4, Step 3800, Loss: 1.902572512626648, F1: 0.5493871699920868, Accuracy: 0.5199917417934072, Time Elapsed: 1441.5433101654053 seconds
              precision    recall  f1-score   s

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


              precision    recall  f1-score   support

           0       0.00      0.00      0.00       912
           1       0.61      0.40      0.48       885
           2       0.57      0.71      0.63       877
           3       0.60      0.33      0.42       897
           4       1.00      0.00      0.00       892
           5       0.55      0.36      0.43       862
           6       0.61      0.73      0.66       903
           7       0.62      0.23      0.33       889
           8       0.59      0.63      0.61       892
           9       0.59      0.60      0.59       876
          10       0.39      0.59      0.47      5646

    accuracy                           0.47     14531
   macro avg       0.56      0.42      0.42     14531
weighted avg       0.50      0.47      0.44     14531

Epoch 4, Step 4100, Loss: 1.005047082901001, F1: 0.4223827648986119, Accuracy: 0.4714747780606978, Time Elapsed: 1488.5828659534454 seconds
              precision    recall  f1-score   s

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


              precision    recall  f1-score   support

           0       0.00      0.00      0.00       912
           1       0.62      0.32      0.42       885
           2       0.53      0.75      0.62       877
           3       0.62      0.51      0.56       897
           4       0.60      0.41      0.49       892
           5       0.52      0.51      0.51       862
           6       0.59      0.80      0.68       903
           7       0.56      0.88      0.68       889
           8       0.60      0.48      0.54       892
           9       0.59      0.66      0.62       876
          10       0.39      0.43      0.41      5646

    accuracy                           0.49     14531
   macro avg       0.51      0.52      0.50     14531
weighted avg       0.47      0.49      0.47     14531

Epoch 4, Step 4600, Loss: 1.6826964616775513, F1: 0.5036482438149223, Accuracy: 0.49308375197852866, Time Elapsed: 1565.969661951065 seconds


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


              precision    recall  f1-score   support

           0       0.00      0.00      0.00       912
           1       0.58      0.87      0.70       885
           2       0.55      0.72      0.62       877
           3       0.60      0.71      0.65       897
           4       0.60      0.48      0.53       892
           5       0.53      0.42      0.46       862
           6       0.62      0.49      0.54       903
           7       0.59      0.45      0.51       889
           8       0.60      0.55      0.57       892
           9       0.59      0.62      0.61       876
          10       0.38      0.44      0.41      5646

    accuracy                           0.49     14531
   macro avg       0.51      0.52      0.51     14531
weighted avg       0.47      0.49      0.48     14531

Epoch 4, Step 4700, Loss: 1.3760157823562622, F1: 0.5092887180092626, Accuracy: 0.49260202326061525, Time Elapsed: 1581.7175641059875 seconds


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


              precision    recall  f1-score   support

           0       0.00      0.00      0.00       912
           1       0.53      0.90      0.67       885
           2       0.60      0.45      0.51       877
           3       0.57      0.75      0.65       897
           4       0.57      0.71      0.63       892
           5       0.54      0.17      0.26       862
           6       0.62      0.47      0.53       903
           7       0.59      0.82      0.69       889
           8       0.59      0.39      0.47       892
           9       0.59      0.42      0.49       876
          10       0.39      0.46      0.42      5646

    accuracy                           0.49     14531
   macro avg       0.51      0.50      0.48     14531
weighted avg       0.47      0.49      0.46     14531

Epoch 4, Step 4800, Loss: 0.5260170698165894, F1: 0.4837378718699988, Accuracy: 0.4884041015759411, Time Elapsed: 1597.4453308582306 seconds


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


              precision    recall  f1-score   support

           0       0.00      0.00      0.00       912
           1       0.57      0.42      0.49       885
           2       0.57      0.60      0.59       877
           3       0.59      0.33      0.43       897
           4       0.56      0.87      0.68       892
           5       0.54      0.24      0.33       862
           6       0.59      0.79      0.68       903
           7       0.59      0.28      0.38       889
           8       0.56      0.68      0.61       892
           9       0.59      0.61      0.60       876
          10       0.38      0.48      0.43      5646

    accuracy                           0.48     14531
   macro avg       0.50      0.48      0.47     14531
weighted avg       0.46      0.48      0.46     14531

Epoch 4, Step 4900, Loss: 0.42066439986228943, F1: 0.4729247818686045, Accuracy: 0.4813846259720597, Time Elapsed: 1613.5920372009277 seconds
              precision    recall  f1-score  

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


              precision    recall  f1-score   support

           0       0.59      0.90      0.72       912
           1       0.62      0.74      0.67       885
           2       0.62      0.31      0.41       877
           3       0.59      0.64      0.61       897
           4       0.54      0.79      0.65       892
           5       0.56      0.45      0.50       862
           6       0.00      0.00      0.00       903
           7       0.61      0.64      0.62       889
           8       0.59      0.48      0.53       892
           9       0.62      0.32      0.42       876
          10       0.39      0.45      0.42      5646

    accuracy                           0.50     14531
   macro avg       0.52      0.52      0.50     14531
weighted avg       0.48      0.50      0.48     14531

Epoch 4, Step 7400, Loss: 1.0868268013000488, F1: 0.5044813742018093, Accuracy: 0.4991397701465832, Time Elapsed: 2011.0215039253235 seconds
              precision    recall  f1-score   

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


              precision    recall  f1-score   support

           0       0.57      0.91      0.70       912
           1       0.60      0.46      0.52       885
           2       0.57      0.67      0.62       877
           3       0.60      0.49      0.54       897
           4       0.57      0.37      0.45       892
           5       0.58      0.08      0.14       862
           6       0.62      0.46      0.53       903
           7       0.61      0.56      0.58       889
           8       0.56      0.71      0.63       892
           9       0.00      0.00      0.00       876
          10       0.39      0.50      0.44      5646

    accuracy                           0.49     14531
   macro avg       0.52      0.47      0.47     14531
weighted avg       0.47      0.49      0.46     14531

Epoch 4, Step 9400, Loss: 0.9717301726341248, F1: 0.4681057115094991, Accuracy: 0.48558254765673386, Time Elapsed: 2496.7974750995636 seconds
              precision    recall  f1-score  

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


              precision    recall  f1-score   support

           0       0.60      0.73      0.66       912
           1       0.61      0.51      0.55       885
           2       0.57      0.73      0.64       877
           3       0.58      0.18      0.27       897
           4       0.56      0.81      0.66       892
           5       0.53      0.62      0.57       862
           6       0.60      0.85      0.70       903
           7       0.65      0.27      0.38       889
           8       0.57      0.40      0.47       892
           9       0.00      0.00      0.00       876
          10       0.39      0.46      0.42      5646

    accuracy                           0.49     14531
   macro avg       0.51      0.50      0.48     14531
weighted avg       0.47      0.49      0.46     14531

Epoch 4, Step 10500, Loss: 0.5672615170478821, F1: 0.48450359858475095, Accuracy: 0.49053747161241484, Time Elapsed: 2666.4452290534973 seconds
              precision    recall  f1-score

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


              precision    recall  f1-score   support

           0       0.62      0.18      0.28       912
           1       0.61      0.31      0.41       885
           2       0.57      0.56      0.57       877
           3       0.53      0.80      0.64       897
           4       0.59      0.52      0.56       892
           5       0.57      0.51      0.54       862
           6       0.00      0.00      0.00       903
           7       0.61      0.62      0.61       889
           8       0.55      0.72      0.62       892
           9       0.57      0.71      0.64       876
          10       0.39      0.47      0.43      5646

    accuracy                           0.49     14531
   macro avg       0.51      0.49      0.48     14531
weighted avg       0.47      0.49      0.46     14531

Epoch 4, Step 11000, Loss: 1.7096818685531616, F1: 0.48124482781114336, Accuracy: 0.4850320005505471, Time Elapsed: 2743.5885450839996 seconds
              precision    recall  f1-score 

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


              precision    recall  f1-score   support

           0       0.63      0.73      0.68       912
           1       0.61      0.70      0.65       885
           2       0.59      0.58      0.58       877
           3       0.55      0.05      0.08       897
           4       0.56      0.78      0.65       892
           5       0.54      0.42      0.47       862
           6       0.00      0.00      0.00       903
           7       0.65      0.11      0.19       889
           8       0.60      0.51      0.55       892
           9       0.52      0.85      0.65       876
          10       0.39      0.50      0.44      5646

    accuracy                           0.48     14531
   macro avg       0.51      0.48      0.45     14531
weighted avg       0.47      0.48      0.45     14531

Epoch 4, Step 12800, Loss: 1.1605682373046875, F1: 0.4498296344523755, Accuracy: 0.4832427224554401, Time Elapsed: 3025.8068730831146 seconds
              precision    recall  f1-score  

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


              precision    recall  f1-score   support

           0       0.61      0.85      0.71       912
           1       0.00      0.00      0.00       885
           2       0.59      0.60      0.60       877
           3       0.59      0.61      0.60       897
           4       0.57      0.72      0.64       892
           5       0.50      0.70      0.58       862
           6       0.62      0.61      0.61       903
           7       0.61      0.60      0.60       889
           8       0.58      0.68      0.63       892
           9       0.61      0.23      0.33       876
          10       0.39      0.41      0.40      5646

    accuracy                           0.50     14531
   macro avg       0.51      0.55      0.52     14531
weighted avg       0.47      0.50      0.48     14531

Epoch 4, Step 13000, Loss: 0.7967206835746765, F1: 0.5178143996859259, Accuracy: 0.5010666850182369, Time Elapsed: 3056.109304189682 seconds


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


              precision    recall  f1-score   support

           0       0.60      0.85      0.71       912
           1       0.58      0.85      0.69       885
           2       0.59      0.48      0.53       877
           3       0.60      0.64      0.62       897
           4       0.57      0.63      0.60       892
           5       0.50      0.67      0.57       862
           6       0.00      0.00      0.00       903
           7       0.61      0.62      0.61       889
           8       0.59      0.46      0.52       892
           9       0.60      0.44      0.51       876
          10       0.39      0.41      0.40      5646

    accuracy                           0.50     14531
   macro avg       0.51      0.55      0.52     14531
weighted avg       0.47      0.50      0.48     14531

Epoch 4, Step 13100, Loss: 0.8481056094169617, F1: 0.5227164651650801, Accuracy: 0.5022365976188837, Time Elapsed: 3071.296555042267 seconds
              precision    recall  f1-score   

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


              precision    recall  f1-score   support

           0       0.59      0.92      0.72       912
           1       0.60      0.37      0.46       885
           2       0.60      0.44      0.51       877
           3       0.57      0.54      0.56       897
           4       0.46      0.81      0.59       892
           5       0.53      0.21      0.31       862
           6       0.00      0.00      0.00       903
           7       0.61      0.68      0.64       889
           8       0.51      0.83      0.63       892
           9       0.61      0.28      0.38       876
          10       0.39      0.44      0.41      5646

    accuracy                           0.48     14531
   macro avg       0.50      0.50      0.47     14531
weighted avg       0.46      0.48      0.45     14531

Epoch 4, Step 13300, Loss: 0.27409040927886963, F1: 0.4725293342087611, Accuracy: 0.48110935241896635, Time Elapsed: 3103.1132349967957 seconds
              precision    recall  f1-score

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


              precision    recall  f1-score   support

           0       0.51      0.90      0.65       912
           1       0.58      0.48      0.53       885
           2       0.48      0.83      0.61       877
           3       0.56      0.73      0.64       897
           4       0.57      0.32      0.41       892
           5       0.58      0.35      0.43       862
           6       0.00      0.00      0.00       903
           7       0.59      0.65      0.62       889
           8       0.59      0.50      0.54       892
           9       0.51      0.72      0.60       876
          10       0.39      0.38      0.38      5646

    accuracy                           0.48     14531
   macro avg       0.49      0.53      0.49     14531
weighted avg       0.45      0.48      0.46     14531

Epoch 4, Step 13500, Loss: 1.9610693454742432, F1: 0.4911802533496528, Accuracy: 0.4819351730782465, Time Elapsed: 3135.0621361732483 seconds


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


              precision    recall  f1-score   support

           0       0.55      0.76      0.64       912
           1       0.00      0.00      0.00       885
           2       0.45      0.82      0.58       877
           3       0.61      0.37      0.46       897
           4       0.54      0.41      0.47       892
           5       0.53      0.47      0.50       862
           6       0.00      0.00      0.00       903
           7       0.62      0.26      0.36       889
           8       0.62      0.23      0.34       892
           9       0.58      0.33      0.42       876
          10       0.39      0.58      0.46      5646

    accuracy                           0.45     14531
   macro avg       0.44      0.38      0.38     14531
weighted avg       0.42      0.45      0.41     14531

Epoch 4, Step 13600, Loss: 0.6876226663589478, F1: 0.3843164123611992, Accuracy: 0.44766361571811986, Time Elapsed: 3150.5873510837555 seconds


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


              precision    recall  f1-score   support

           0       0.58      0.86      0.69       912
           1       0.59      0.91      0.72       885
           2       0.60      0.51      0.55       877
           3       0.58      0.54      0.56       897
           4       0.48      0.78      0.59       892
           5       0.55      0.39      0.45       862
           6       0.00      0.00      0.00       903
           7       0.62      0.27      0.37       889
           8       0.60      0.51      0.55       892
           9       0.57      0.29      0.39       876
          10       0.39      0.45      0.41      5646

    accuracy                           0.48     14531
   macro avg       0.50      0.50      0.48     14531
weighted avg       0.47      0.48      0.46     14531

Epoch 4, Step 13700, Loss: 2.937188148498535, F1: 0.4813151063971786, Accuracy: 0.4843438166678136, Time Elapsed: 3165.8513810634613 seconds
              precision    recall  f1-score   

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


              precision    recall  f1-score   support

           0       0.60      0.88      0.72       912
           1       0.61      0.27      0.38       885
           2       0.61      0.56      0.58       877
           3       0.00      0.00      0.00       897
           4       0.52      0.92      0.66       892
           5       0.51      0.55      0.53       862
           6       0.63      0.55      0.59       903
           7       0.61      0.66      0.63       889
           8       0.59      0.57      0.58       892
           9       0.61      0.53      0.56       876
          10       0.39      0.43      0.41      5646

    accuracy                           0.50     14531
   macro avg       0.52      0.54      0.51     14531
weighted avg       0.48      0.50      0.48     14531

Epoch 4, Step 15400, Loss: 0.8876734972000122, F1: 0.5130822004214064, Accuracy: 0.5019613240657904, Time Elapsed: 3738.6622672080994 seconds
              precision    recall  f1-score  

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


              precision    recall  f1-score   support

           0       0.58      0.95      0.72       912
           1       0.59      0.92      0.72       885
           2       0.00      0.00      0.00       877
           3       0.66      0.17      0.27       897
           4       0.57      0.71      0.63       892
           5       0.55      0.34      0.42       862
           6       0.61      0.67      0.64       903
           7       0.61      0.70      0.65       889
           8       0.60      0.53      0.57       892
           9       0.59      0.66      0.62       876
          10       0.39      0.42      0.40      5646

    accuracy                           0.51     14531
   macro avg       0.52      0.55      0.51     14531
weighted avg       0.48      0.51      0.48     14531

Epoch 4, Step 16500, Loss: 0.5261918902397156, F1: 0.5125768081221378, Accuracy: 0.5081549790103915, Time Elapsed: 4208.776690006256 seconds
              precision    recall  f1-score   

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


              precision    recall  f1-score   support

           0       0.00      0.00      0.00       912
           1       0.62      0.88      0.73       885
           2       0.56      0.72      0.63       877
           3       0.63      0.46      0.53       897
           4       0.56      0.75      0.64       892
           5       0.48      0.76      0.59       862
           6       0.62      0.57      0.59       903
           7       0.62      0.52      0.57       889
           8       0.59      0.73      0.65       892
           9       0.60      0.48      0.53       876
          10       0.39      0.38      0.39      5646

    accuracy                           0.51     14531
   macro avg       0.51      0.57      0.53     14531
weighted avg       0.47      0.51      0.48     14531

Epoch 4, Step 17700, Loss: 2.3967323303222656, F1: 0.5318551862722173, Accuracy: 0.5062968825270112, Time Elapsed: 8241.734138250351 seconds
              precision    recall  f1-score   

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


              precision    recall  f1-score   support

           0       0.59      0.90      0.71       912
           1       0.00      0.00      0.00       885
           2       0.60      0.55      0.57       877
           3       0.55      0.80      0.65       897
           4       0.59      0.45      0.51       892
           5       0.56      0.39      0.46       862
           6       0.63      0.57      0.60       903
           7       0.60      0.45      0.51       889
           8       0.60      0.51      0.55       892
           9       0.57      0.39      0.46       876
          10       0.40      0.48      0.44      5646

    accuracy                           0.50     14531
   macro avg       0.52      0.50      0.50     14531
weighted avg       0.48      0.50      0.48     14531

Epoch 4, Step 18700, Loss: 1.1279642581939697, F1: 0.4970896482853075, Accuracy: 0.4958364875094625, Time Elapsed: 11288.87950706482 seconds
              precision    recall  f1-score   

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


              precision    recall  f1-score   support

           0       0.64      0.18      0.28       912
           1       0.55      0.95      0.70       885
           2       0.57      0.70      0.63       877
           3       0.53      0.85      0.65       897
           4       0.58      0.42      0.48       892
           5       0.52      0.61      0.57       862
           6       0.62      0.66      0.64       903
           7       0.60      0.79      0.68       889
           8       0.00      0.00      0.00       892
           9       0.60      0.65      0.62       876
          10       0.39      0.38      0.39      5646

    accuracy                           0.50     14531
   macro avg       0.51      0.56      0.51     14531
weighted avg       0.47      0.50      0.47     14531

Epoch 4, Step 21300, Loss: 0.7068644762039185, F1: 0.5125767014657276, Accuracy: 0.5029247815016172, Time Elapsed: 18327.10528612137 seconds
              precision    recall  f1-score   

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


              precision    recall  f1-score   support

           0       0.61      0.88      0.72       912
           1       0.59      0.13      0.22       885
           2       0.60      0.45      0.51       877
           3       0.58      0.61      0.60       897
           4       0.00      0.00      0.00       892
           5       0.53      0.37      0.44       862
           6       0.62      0.41      0.49       903
           7       0.61      0.69      0.65       889
           8       0.60      0.49      0.54       892
           9       0.59      0.70      0.64       876
          10       0.39      0.51      0.44      5646

    accuracy                           0.49     14531
   macro avg       0.52      0.48      0.48     14531
weighted avg       0.48      0.49      0.47     14531

Epoch 4, Step 23400, Loss: 0.8696884512901306, F1: 0.47682738960466936, Accuracy: 0.48922992223522127, Time Elapsed: 18651.918332099915 seconds
              precision    recall  f1-score

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


              precision    recall  f1-score   support

           0       0.59      0.93      0.72       912
           1       0.57      0.92      0.70       885
           2       0.62      0.40      0.48       877
           3       0.60      0.70      0.65       897
           4       0.56      0.59      0.57       892
           5       0.54      0.61      0.57       862
           6       0.62      0.58      0.60       903
           7       0.75      0.00      0.01       889
           8       0.00      0.00      0.00       892
           9       0.53      0.84      0.65       876
          10       0.39      0.41      0.40      5646

    accuracy                           0.50     14531
   macro avg       0.52      0.54      0.49     14531
weighted avg       0.48      0.50      0.46     14531

Epoch 4, Step 25200, Loss: 0.829976499080658, F1: 0.48707307662862287, Accuracy: 0.4987268598169431, Time Elapsed: 18930.673279047012 seconds
              precision    recall  f1-score  

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


              precision    recall  f1-score   support

           0       0.59      0.84      0.70       912
           1       0.60      0.18      0.28       885
           2       0.60      0.48      0.53       877
           3       0.62      0.44      0.51       897
           4       0.56      0.76      0.65       892
           5       0.50      0.65      0.57       862
           6       0.00      0.00      0.00       903
           7       0.61      0.22      0.32       889
           8       0.54      0.78      0.64       892
           9       0.61      0.12      0.20       876
          10       0.39      0.52      0.44      5646

    accuracy                           0.47     14531
   macro avg       0.51      0.45      0.44     14531
weighted avg       0.47      0.47      0.44     14531

Epoch 4, Step 27000, Loss: 0.776713490486145, F1: 0.4388623367004261, Accuracy: 0.47408987681508497, Time Elapsed: 20881.937746047974 seconds
              precision    recall  f1-score  

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


              precision    recall  f1-score   support

           0       0.58      0.94      0.71       912
           1       0.00      0.00      0.00       885
           2       0.57      0.64      0.61       877
           3       0.55      0.80      0.65       897
           4       0.00      0.00      0.00       892
           5       0.56      0.35      0.43       862
           6       0.59      0.86      0.70       903
           7       0.60      0.80      0.69       889
           8       0.60      0.45      0.51       892
           9       0.58      0.78      0.67       876
          10       0.39      0.41      0.40      5646

    accuracy                           0.50     14531
   macro avg       0.46      0.55      0.49     14531
weighted avg       0.44      0.50      0.46     14531

Epoch 4, Step 28200, Loss: 0.2920841872692108, F1: 0.4882574771433657, Accuracy: 0.503475328607804, Time Elapsed: 21080.831884145737 seconds
              precision    recall  f1-score   

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


              precision    recall  f1-score   support

           0       0.61      0.78      0.68       912
           1       0.62      0.49      0.55       885
           2       0.46      0.82      0.59       877
           3       0.61      0.47      0.53       897
           4       0.59      0.37      0.45       892
           5       0.55      0.27      0.37       862
           6       0.61      0.82      0.70       903
           7       0.61      0.70      0.65       889
           8       0.00      0.00      0.00       892
           9       0.59      0.47      0.52       876
          10       0.39      0.45      0.42      5646

    accuracy                           0.49     14531
   macro avg       0.51      0.51      0.50     14531
weighted avg       0.47      0.49      0.47     14531

Epoch 5, Step 900, Loss: 1.2991607189178467, F1: 0.4966320473304725, Accuracy: 0.49335902553162203, Time Elapsed: 190.31378412246704 seconds
              precision    recall  f1-score   

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


              precision    recall  f1-score   support

           0       0.61      0.70      0.66       912
           1       0.61      0.90      0.73       885
           2       0.58      0.46      0.51       877
           3       0.61      0.36      0.45       897
           4       0.00      0.00      0.00       892
           5       0.56      0.20      0.30       862
           6       0.61      0.82      0.70       903
           7       0.59      0.84      0.70       889
           8       0.60      0.45      0.51       892
           9       0.61      0.50      0.55       876
          10       0.39      0.47      0.43      5646

    accuracy                           0.50     14531
   macro avg       0.53      0.52      0.50     14531
weighted avg       0.48      0.50      0.48     14531

Epoch 5, Step 1400, Loss: 0.47996440529823303, F1: 0.5026784663555443, Accuracy: 0.504851696373271, Time Elapsed: 290.39527797698975 seconds
              precision    recall  f1-score   

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


              precision    recall  f1-score   support

           0       0.62      0.78      0.69       912
           1       0.57      0.87      0.69       885
           2       0.00      0.00      0.00       877
           3       0.55      0.56      0.55       897
           4       0.57      0.70      0.63       892
           5       0.56      0.56      0.56       862
           6       0.62      0.62      0.62       903
           7       0.60      0.73      0.66       889
           8       0.60      0.54      0.57       892
           9       0.64      0.36      0.46       876
          10       0.39      0.41      0.40      5646

    accuracy                           0.51     14531
   macro avg       0.52      0.56      0.53     14531
weighted avg       0.48      0.51      0.49     14531

Epoch 5, Step 1800, Loss: 0.46367090940475464, F1: 0.5304848911549975, Accuracy: 0.5105636225999587, Time Elapsed: 382.95721793174744 seconds
              precision    recall  f1-score  

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


              precision    recall  f1-score   support

           0       0.59      0.84      0.70       912
           1       0.61      0.63      0.62       885
           2       0.61      0.07      0.12       877
           3       0.59      0.72      0.65       897
           4       0.00      0.00      0.00       892
           5       0.55      0.50      0.52       862
           6       0.62      0.74      0.67       903
           7       0.63      0.34      0.44       889
           8       0.59      0.59      0.59       892
           9       0.64      0.38      0.47       876
          10       0.39      0.51      0.44      5646

    accuracy                           0.49     14531
   macro avg       0.53      0.48      0.48     14531
weighted avg       0.48      0.49      0.47     14531

Epoch 5, Step 3400, Loss: 0.6694774031639099, F1: 0.4754641154322295, Accuracy: 0.49411602780262887, Time Elapsed: 665.9505362510681 seconds
              precision    recall  f1-score   

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


              precision    recall  f1-score   support

           0       0.61      0.49      0.55       912
           1       0.60      0.91      0.73       885
           2       0.60      0.43      0.50       877
           3       0.59      0.74      0.66       897
           4       0.58      0.53      0.55       892
           5       0.56      0.56      0.56       862
           6       0.00      0.00      0.00       903
           7       0.59      0.25      0.35       889
           8       0.57      0.55      0.56       892
           9       0.60      0.49      0.54       876
          10       0.39      0.49      0.43      5646

    accuracy                           0.49     14531
   macro avg       0.52      0.49      0.49     14531
weighted avg       0.48      0.49      0.47     14531

Epoch 5, Step 4400, Loss: 1.72833251953125, F1: 0.4935534001866721, Accuracy: 0.4923955680957952, Time Elapsed: 821.286327123642 seconds
              precision    recall  f1-score   supp

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


              precision    recall  f1-score   support

           0       0.60      0.79      0.68       912
           1       0.00      0.00      0.00       885
           2       0.57      0.68      0.62       877
           3       0.59      0.34      0.43       897
           4       0.54      0.91      0.67       892
           5       0.51      0.60      0.55       862
           6       0.55      0.90      0.68       903
           7       0.61      0.70      0.65       889
           8       0.51      0.78      0.62       892
           9       0.60      0.28      0.38       876
          10       0.40      0.35      0.37      5646

    accuracy                           0.50     14531
   macro avg       0.50      0.57      0.51     14531
weighted avg       0.46      0.50      0.47     14531

Epoch 5, Step 6700, Loss: 6.899417877197266, F1: 0.514591062758409, Accuracy: 0.5021677792306104, Time Elapsed: 1395.5634591579437 seconds
              precision    recall  f1-score   su

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


              precision    recall  f1-score   support

           0       0.60      0.85      0.70       912
           1       0.61      0.54      0.57       885
           2       0.60      0.11      0.19       877
           3       0.60      0.36      0.45       897
           4       0.54      0.86      0.66       892
           5       0.54      0.37      0.44       862
           6       0.00      0.00      0.00       903
           7       0.61      0.68      0.64       889
           8       0.58      0.44      0.50       892
           9       0.64      0.24      0.35       876
          10       0.39      0.53      0.45      5646

    accuracy                           0.48     14531
   macro avg       0.52      0.45      0.45     14531
weighted avg       0.48      0.48      0.45     14531

Epoch 5, Step 7400, Loss: 1.0815258026123047, F1: 0.4514510420358958, Accuracy: 0.4809028972541463, Time Elapsed: 1507.2648618221283 seconds
              precision    recall  f1-score   

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


              precision    recall  f1-score   support

           0       0.62      0.69      0.65       912
           1       0.61      0.88      0.73       885
           2       0.59      0.53      0.56       877
           3       0.60      0.01      0.02       897
           4       0.56      0.89      0.68       892
           5       0.54      0.45      0.49       862
           6       0.60      0.74      0.66       903
           7       0.61      0.64      0.62       889
           8       0.54      0.65      0.59       892
           9       0.00      0.00      0.00       876
          10       0.39      0.43      0.41      5646

    accuracy                           0.50     14531
   macro avg       0.52      0.54      0.49     14531
weighted avg       0.48      0.50      0.47     14531

Epoch 5, Step 8400, Loss: 0.3225213587284088, F1: 0.49262256977651214, Accuracy: 0.5027871447250706, Time Elapsed: 1664.3945488929749 seconds
              precision    recall  f1-score  

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


              precision    recall  f1-score   support

           0       0.59      0.90      0.71       912
           1       0.67      0.30      0.41       885
           2       0.59      0.59      0.59       877
           3       0.59      0.48      0.53       897
           4       0.57      0.50      0.53       892
           5       0.53      0.06      0.10       862
           6       0.62      0.48      0.54       903
           7       0.59      0.72      0.65       889
           8       0.46      0.87      0.60       892
           9       0.00      0.00      0.00       876
          10       0.39      0.47      0.43      5646

    accuracy                           0.49     14531
   macro avg       0.51      0.49      0.46     14531
weighted avg       0.47      0.49      0.45     14531

Epoch 5, Step 9400, Loss: 0.8768149018287659, F1: 0.4645788055682319, Accuracy: 0.48523845571536717, Time Elapsed: 1822.0077080726624 seconds
              precision    recall  f1-score  

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


              precision    recall  f1-score   support

           0       0.00      0.00      0.00       912
           1       0.67      0.04      0.07       885
           2       0.60      0.44      0.51       877
           3       0.60      0.27      0.37       897
           4       0.61      0.40      0.49       892
           5       0.58      0.45      0.51       862
           6       0.63      0.50      0.56       903
           7       0.60      0.50      0.54       889
           8       0.56      0.66      0.61       892
           9       0.58      0.74      0.65       876
          10       0.39      0.59      0.47      5646

    accuracy                           0.47     14531
   macro avg       0.53      0.42      0.43     14531
weighted avg       0.48      0.47      0.44     14531

Epoch 5, Step 14000, Loss: 0.3447560667991638, F1: 0.4333470722017944, Accuracy: 0.4724382354965247, Time Elapsed: 8692.845880031586 seconds
              precision    recall  f1-score   

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


              precision    recall  f1-score   support

           0       0.00      0.00      0.00       912
           1       0.60      0.46      0.52       885
           2       0.60      0.47      0.53       877
           3       0.61      0.48      0.54       897
           4       0.59      0.69      0.64       892
           5       0.55      0.11      0.19       862
           6       0.56      0.03      0.05       903
           7       0.61      0.75      0.67       889
           8       0.55      0.75      0.63       892
           9       0.61      0.64      0.62       876
          10       0.39      0.55      0.46      5646

    accuracy                           0.48     14531
   macro avg       0.51      0.45      0.44     14531
weighted avg       0.47      0.48      0.45     14531

Epoch 5, Step 15000, Loss: 1.1898478269577026, F1: 0.4405526815836472, Accuracy: 0.4811781708072397, Time Elapsed: 8853.676421165466 seconds


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


              precision    recall  f1-score   support

           0       0.00      0.00      0.00       912
           1       0.64      0.23      0.34       885
           2       0.58      0.61      0.60       877
           3       0.59      0.32      0.41       897
           4       0.58      0.67      0.62       892
           5       0.57      0.51      0.54       862
           6       0.61      0.68      0.65       903
           7       0.60      0.68      0.64       889
           8       0.59      0.56      0.58       892
           9       0.61      0.35      0.44       876
          10       0.39      0.53      0.45      5646

    accuracy                           0.49     14531
   macro avg       0.53      0.47      0.48     14531
weighted avg       0.48      0.49      0.47     14531

Epoch 5, Step 15100, Loss: 0.9679230451583862, F1: 0.478654747208609, Accuracy: 0.4864771867042874, Time Elapsed: 8868.778615951538 seconds


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


              precision    recall  f1-score   support

           0       0.00      0.00      0.00       912
           1       0.60      0.83      0.69       885
           2       0.58      0.70      0.63       877
           3       0.58      0.24      0.34       897
           4       0.57      0.84      0.68       892
           5       0.54      0.29      0.38       862
           6       0.61      0.74      0.67       903
           7       0.60      0.59      0.60       889
           8       0.60      0.54      0.57       892
           9       0.53      0.88      0.66       876
          10       0.38      0.40      0.39      5646

    accuracy                           0.50     14531
   macro avg       0.51      0.55      0.51     14531
weighted avg       0.47      0.50      0.47     14531

Epoch 5, Step 15200, Loss: 0.6519365906715393, F1: 0.5101418270246865, Accuracy: 0.49907095175830984, Time Elapsed: 8884.49589920044 seconds


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


              precision    recall  f1-score   support

           0       0.00      0.00      0.00       912
           1       0.61      0.69      0.65       885
           2       0.60      0.51      0.55       877
           3       0.55      0.13      0.21       897
           4       0.57      0.75      0.65       892
           5       0.52      0.55      0.53       862
           6       0.60      0.79      0.68       903
           7       0.63      0.41      0.50       889
           8       0.61      0.24      0.34       892
           9       0.59      0.77      0.67       876
          10       0.39      0.50      0.44      5646

    accuracy                           0.49     14531
   macro avg       0.52      0.48      0.47     14531
weighted avg       0.47      0.49      0.46     14531

Epoch 5, Step 15300, Loss: 0.519737184047699, F1: 0.4740538777591546, Accuracy: 0.4873718257518409, Time Elapsed: 8900.460785150528 seconds
              precision    recall  f1-score   s

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


              precision    recall  f1-score   support

           0       0.58      0.95      0.72       912
           1       0.00      0.00      0.00       885
           2       0.57      0.67      0.61       877
           3       0.58      0.69      0.63       897
           4       0.58      0.70      0.63       892
           5       0.52      0.09      0.16       862
           6       0.63      0.60      0.61       903
           7       0.62      0.37      0.47       889
           8       0.58      0.28      0.38       892
           9       0.58      0.62      0.60       876
          10       0.39      0.48      0.43      5646

    accuracy                           0.49     14531
   macro avg       0.51      0.50      0.48     14531
weighted avg       0.47      0.49      0.46     14531

Epoch 5, Step 16100, Loss: 2.014955997467041, F1: 0.47598527150629794, Accuracy: 0.4908127451655082, Time Elapsed: 9023.599661111832 seconds
              precision    recall  f1-score   

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


              precision    recall  f1-score   support

           0       0.00      0.00      0.00       912
           1       0.59      0.93      0.72       885
           2       0.60      0.44      0.51       877
           3       0.59      0.59      0.59       897
           4       0.58      0.47      0.52       892
           5       0.49      0.80      0.61       862
           6       0.54      0.89      0.67       903
           7       0.60      0.71      0.65       889
           8       0.52      0.81      0.63       892
           9       0.48      0.86      0.62       876
          10       0.38      0.27      0.31      5646

    accuracy                           0.50     14531
   macro avg       0.49      0.61      0.53     14531
weighted avg       0.45      0.50      0.46     14531

Epoch 5, Step 17100, Loss: 0.7696090936660767, F1: 0.5297656843983867, Accuracy: 0.4991397701465832, Time Elapsed: 9175.815976142883 seconds
              precision    recall  f1-score   

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


              precision    recall  f1-score   support

           0       0.00      0.00      0.00       912
           1       0.61      0.91      0.73       885
           2       0.55      0.81      0.65       877
           3       0.61      0.59      0.60       897
           4       0.56      0.78      0.65       892
           5       0.50      0.74      0.60       862
           6       0.61      0.67      0.64       903
           7       0.60      0.56      0.58       889
           8       0.56      0.73      0.63       892
           9       0.60      0.43      0.50       876
          10       0.39      0.34      0.36      5646

    accuracy                           0.51     14531
   macro avg       0.51      0.60      0.54     14531
weighted avg       0.47      0.51      0.48     14531

Epoch 5, Step 17700, Loss: 2.5088906288146973, F1: 0.5405623042683901, Accuracy: 0.5100818938820453, Time Elapsed: 9269.217948198318 seconds
              precision    recall  f1-score   

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


              precision    recall  f1-score   support

           0       0.60      0.86      0.71       912
           1       0.00      0.00      0.00       885
           2       0.60      0.42      0.49       877
           3       0.57      0.25      0.35       897
           4       0.56      0.80      0.66       892
           5       0.59      0.33      0.43       862
           6       0.59      0.75      0.66       903
           7       0.55      0.86      0.67       889
           8       0.54      0.39      0.45       892
           9       0.56      0.73      0.63       876
          10       0.39      0.42      0.40      5646

    accuracy                           0.49     14531
   macro avg       0.50      0.53      0.50     14531
weighted avg       0.47      0.49      0.47     14531

Epoch 5, Step 18600, Loss: 2.019392251968384, F1: 0.4959330245855872, Accuracy: 0.4943224829674489, Time Elapsed: 9558.344501972198 seconds


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


              precision    recall  f1-score   support

           0       0.61      0.81      0.70       912
           1       0.00      0.00      0.00       885
           2       0.61      0.31      0.41       877
           3       0.56      0.42      0.48       897
           4       0.58      0.70      0.63       892
           5       0.57      0.32      0.41       862
           6       0.61      0.54      0.57       903
           7       0.57      0.81      0.67       889
           8       0.56      0.32      0.41       892
           9       0.57      0.65      0.61       876
          10       0.39      0.49      0.43      5646

    accuracy                           0.49     14531
   macro avg       0.51      0.49      0.48     14531
weighted avg       0.47      0.49      0.47     14531

Epoch 5, Step 18700, Loss: 0.5769649744033813, F1: 0.4843549453191533, Accuracy: 0.48950519578831464, Time Elapsed: 9574.183101177216 seconds
              precision    recall  f1-score  

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


              precision    recall  f1-score   support

           0       0.00      0.00      0.00       912
           1       0.61      0.16      0.26       885
           2       0.59      0.64      0.61       877
           3       0.57      0.25      0.34       897
           4       0.58      0.84      0.69       892
           5       0.54      0.13      0.22       862
           6       0.61      0.67      0.64       903
           7       0.58      0.64      0.61       889
           8       0.61      0.33      0.43       892
           9       0.60      0.44      0.51       876
          10       0.39      0.57      0.46      5646

    accuracy                           0.47     14531
   macro avg       0.52      0.43      0.43     14531
weighted avg       0.47      0.47      0.44     14531

Epoch 5, Step 19200, Loss: 0.20762912929058075, F1: 0.433611571199636, Accuracy: 0.4743651503681784, Time Elapsed: 9654.703468084335 seconds
              precision    recall  f1-score   

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


              precision    recall  f1-score   support

           0       0.00      0.00      0.00       912
           1       0.61      0.81      0.69       885
           2       0.62      0.25      0.35       877
           3       0.60      0.70      0.65       897
           4       0.57      0.88      0.69       892
           5       0.56      0.36      0.44       862
           6       0.61      0.76      0.68       903
           7       0.59      0.84      0.69       889
           8       0.58      0.38      0.46       892
           9       0.59      0.09      0.16       876
          10       0.39      0.47      0.43      5646

    accuracy                           0.50     14531
   macro avg       0.52      0.50      0.48     14531
weighted avg       0.48      0.50      0.46     14531

Epoch 5, Step 20800, Loss: 1.2614498138427734, F1: 0.4761422544306692, Accuracy: 0.49501066685018236, Time Elapsed: 9908.832289934158 seconds
              precision    recall  f1-score  

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


              precision    recall  f1-score   support

           0       0.00      0.00      0.00       912
           1       0.60      0.81      0.69       885
           2       0.61      0.55      0.58       877
           3       0.60      0.56      0.58       897
           4       0.59      0.37      0.45       892
           5       0.56      0.61      0.59       862
           6       0.61      0.75      0.68       903
           7       0.60      0.55      0.57       889
           8       0.59      0.62      0.60       892
           9       0.57      0.78      0.66       876
          10       0.39      0.42      0.41      5646

    accuracy                           0.51     14531
   macro avg       0.52      0.55      0.53     14531
weighted avg       0.48      0.51      0.49     14531

Epoch 5, Step 21400, Loss: 0.8342468738555908, F1: 0.52816101884389, Accuracy: 0.5064345193035579, Time Elapsed: 10004.246562957764 seconds


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


              precision    recall  f1-score   support

           0       0.00      0.00      0.00       912
           1       0.61      0.75      0.67       885
           2       0.63      0.25      0.35       877
           3       0.60      0.59      0.59       897
           4       0.54      0.80      0.64       892
           5       0.56      0.56      0.56       862
           6       0.61      0.67      0.64       903
           7       0.60      0.82      0.69       889
           8       0.57      0.70      0.63       892
           9       0.59      0.34      0.43       876
          10       0.39      0.43      0.40      5646

    accuracy                           0.50     14531
   macro avg       0.52      0.54      0.51     14531
weighted avg       0.47      0.50      0.48     14531

Epoch 5, Step 21500, Loss: 0.8159475922584534, F1: 0.5105444224718488, Accuracy: 0.4994838620879499, Time Elapsed: 10020.263107061386 seconds
              precision    recall  f1-score  

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


              precision    recall  f1-score   support

           0       0.00      0.00      0.00       912
           1       0.58      0.40      0.48       885
           2       0.59      0.41      0.49       877
           3       0.61      0.45      0.52       897
           4       0.56      0.39      0.46       892
           5       0.54      0.60      0.57       862
           6       0.61      0.52      0.56       903
           7       0.60      0.76      0.67       889
           8       0.58      0.76      0.66       892
           9       0.67      0.00      0.00       876
          10       0.38      0.54      0.45      5646

    accuracy                           0.47     14531
   macro avg       0.52      0.44      0.44     14531
weighted avg       0.48      0.47      0.44     14531

Epoch 5, Step 22100, Loss: 0.9603506922721863, F1: 0.44164455609914205, Accuracy: 0.4743651503681784, Time Elapsed: 10185.635381937027 seconds
              precision    recall  f1-score 

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


              precision    recall  f1-score   support

           0       0.59      0.89      0.71       912
           1       0.59      0.94      0.73       885
           2       0.00      0.00      0.00       877
           3       0.61      0.27      0.38       897
           4       0.58      0.71      0.64       892
           5       0.57      0.35      0.44       862
           6       0.59      0.85      0.70       903
           7       0.62      0.36      0.46       889
           8       0.58      0.31      0.40       892
           9       0.58      0.78      0.66       876
          10       0.39      0.43      0.41      5646

    accuracy                           0.50     14531
   macro avg       0.52      0.54      0.50     14531
weighted avg       0.48      0.50      0.47     14531

Epoch 5, Step 23600, Loss: 5.432952404022217, F1: 0.5015000804698994, Accuracy: 0.501686050512697, Time Elapsed: 12717.574142217636 seconds
              precision    recall  f1-score   s

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


              precision    recall  f1-score   support

           0       0.00      0.00      0.00       912
           1       0.60      0.78      0.68       885
           2       0.57      0.36      0.44       877
           3       0.59      0.30      0.39       897
           4       0.57      0.62      0.59       892
           5       0.53      0.20      0.30       862
           6       0.62      0.66      0.64       903
           7       0.61      0.52      0.56       889
           8       0.60      0.42      0.49       892
           9       0.62      0.40      0.48       876
          10       0.39      0.56      0.46      5646

    accuracy                           0.48     14531
   macro avg       0.52      0.44      0.46     14531
weighted avg       0.47      0.48      0.46     14531

Epoch 5, Step 23900, Loss: 1.994378924369812, F1: 0.4575584993969311, Accuracy: 0.47856307205285253, Time Elapsed: 13765.972583055496 seconds


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


              precision    recall  f1-score   support

           0       0.00      0.00      0.00       912
           1       0.60      0.86      0.71       885
           2       0.60      0.41      0.49       877
           3       0.59      0.78      0.67       897
           4       0.53      0.88      0.66       892
           5       0.52      0.72      0.61       862
           6       0.61      0.75      0.67       903
           7       0.61      0.56      0.58       889
           8       0.55      0.10      0.17       892
           9       0.60      0.71      0.65       876
          10       0.39      0.39      0.39      5646

    accuracy                           0.50     14531
   macro avg       0.51      0.56      0.51     14531
weighted avg       0.47      0.50      0.47     14531

Epoch 5, Step 24000, Loss: 0.149493008852005, F1: 0.5089439126787878, Accuracy: 0.5036817837726241, Time Elapsed: 13786.780714035034 seconds


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


              precision    recall  f1-score   support

           0       0.00      0.00      0.00       912
           1       0.57      0.98      0.72       885
           2       0.60      0.52      0.56       877
           3       0.58      0.74      0.65       897
           4       0.58      0.84      0.68       892
           5       0.56      0.51      0.54       862
           6       0.59      0.86      0.70       903
           7       0.62      0.21      0.31       889
           8       0.60      0.30      0.40       892
           9       0.59      0.76      0.66       876
          10       0.39      0.41      0.40      5646

    accuracy                           0.51     14531
   macro avg       0.52      0.56      0.51     14531
weighted avg       0.48      0.51      0.47     14531

Epoch 5, Step 24100, Loss: 2.9777698516845703, F1: 0.5113873009944817, Accuracy: 0.5069850664097447, Time Elapsed: 13805.816635131836 seconds
              precision    recall  f1-score  

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


              precision    recall  f1-score   support

           0       0.63      0.84      0.72       912
           1       0.00      0.00      0.00       885
           2       0.59      0.45      0.51       877
           3       0.61      0.34      0.44       897
           4       0.59      0.68      0.63       892
           5       0.55      0.56      0.55       862
           6       0.60      0.82      0.69       903
           7       0.64      0.35      0.45       889
           8       0.55      0.72      0.63       892
           9       0.57      0.05      0.09       876
          10       0.40      0.51      0.45      5646

    accuracy                           0.49     14531
   macro avg       0.52      0.48      0.47     14531
weighted avg       0.48      0.49      0.46     14531

Epoch 5, Step 24400, Loss: 1.8231043815612793, F1: 0.46958597280053477, Accuracy: 0.4944601197439956, Time Elapsed: 15772.447652101517 seconds


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


              precision    recall  f1-score   support

           0       0.58      0.93      0.71       912
           1       0.59      0.95      0.73       885
           2       0.59      0.63      0.61       877
           3       0.61      0.45      0.52       897
           4       0.56      0.85      0.68       892
           5       0.57      0.49      0.53       862
           6       0.61      0.82      0.70       903
           7       0.59      0.70      0.64       889
           8       0.58      0.58      0.58       892
           9       0.00      0.00      0.00       876
          10       0.39      0.33      0.36      5646

    accuracy                           0.52     14531
   macro avg       0.51      0.61      0.55     14531
weighted avg       0.47      0.52      0.49     14531

Epoch 5, Step 24500, Loss: 0.7092469930648804, F1: 0.5496940848718119, Accuracy: 0.5212304727823275, Time Elapsed: 15788.359540939331 seconds
              precision    recall  f1-score  

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


              precision    recall  f1-score   support

           0       0.00      0.00      0.00       912
           1       0.57      0.97      0.72       885
           2       0.58      0.73      0.64       877
           3       0.60      0.66      0.63       897
           4       0.60      0.45      0.52       892
           5       0.55      0.40      0.47       862
           6       0.63      0.62      0.63       903
           7       0.57      0.73      0.64       889
           8       0.61      0.42      0.49       892
           9       0.58      0.71      0.64       876
          10       0.39      0.41      0.40      5646

    accuracy                           0.51     14531
   macro avg       0.52      0.55      0.52     14531
weighted avg       0.47      0.51      0.48     14531

Epoch 5, Step 26000, Loss: 0.4485699534416199, F1: 0.5243253405153473, Accuracy: 0.505058151538091, Time Elapsed: 21130.747053146362 seconds
              precision    recall  f1-score   

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


              precision    recall  f1-score   support

           0       0.00      0.00      0.00       912
           1       0.60      0.91      0.72       885
           2       0.44      0.89      0.59       877
           3       0.58      0.75      0.66       897
           4       0.60      0.42      0.50       892
           5       0.54      0.40      0.46       862
           6       0.60      0.82      0.69       903
           7       0.57      0.85      0.68       889
           8       0.57      0.62      0.60       892
           9       0.60      0.56      0.58       876
          10       0.39      0.32      0.35      5646

    accuracy                           0.50     14531
   macro avg       0.50      0.59      0.53     14531
weighted avg       0.46      0.50      0.47     14531

Epoch 5, Step 26200, Loss: 1.2926077842712402, F1: 0.5298624927689641, Accuracy: 0.5041635124905375, Time Elapsed: 21170.266030073166 seconds
              precision    recall  f1-score  

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


              precision    recall  f1-score   support

           0       0.61      0.37      0.46       912
           1       0.61      0.85      0.71       885
           2       0.57      0.66      0.61       877
           3       0.00      0.00      0.00       897
           4       0.57      0.79      0.67       892
           5       0.51      0.18      0.26       862
           6       0.60      0.76      0.67       903
           7       0.60      0.82      0.69       889
           8       0.57      0.60      0.58       892
           9       0.58      0.59      0.59       876
          10       0.39      0.41      0.40      5646

    accuracy                           0.50     14531
   macro avg       0.51      0.55      0.51     14531
weighted avg       0.47      0.50      0.48     14531

Epoch 5, Step 27900, Loss: 0.4729008376598358, F1: 0.5131892583172074, Accuracy: 0.5039570573257174, Time Elapsed: 21428.494715213776 seconds
              precision    recall  f1-score  