In [1]:
import os
import random
import torch
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split


from torch.utils.data import DataLoader, TensorDataset

from Hyperparameters.Embeddings.BertTokenEmbedder import BertTokenEmbedder
from Hyperparameters.Dataloader.EmbeddingDataset import EmbeddingDataset
from Hyperparameters.Dataloader.collate_fn import collate_fn
from Hyperparameters.Models.BertPreTrainedClassifier import BertPreTrainedClassifier
from Hyperparameters.Training.ActiveLearningLoop import active_learning_loop
from Hyperparameters.Training.ActiveLearningLoop import query_entropy
from Hyperparameters.Models.CustomLoss import OrdinalLoss

from Hyperparameters.Utils.Misc import get_device



Registering Model: SimpleModel (enforce_clean=False)
Git Info:
  User      : bbalfou <b_balfou@yahoo.com>
  Commit    : ba7bc29082098d103142682ee5eecebd634748d7
  Branch    : bruce
  File link : https://github.com/bennellis/CIL_Sentiment_analysis/blob/ba7bc29082098d103142682ee5eecebd634748d7/Hyperparameters\Models\ModelDummy.py


In [2]:

seed = 42

## Model Parameters

model_name="FacebookAI/roberta-base"
csv_path="data/Sentiment/training.csv"
lr = 1e-5
class_order = [0,1,2]
lr_top = 1e-5
lr_mid = 5e-6
lr_bot = 1e-6
dropout = 0.4

## Loss Parameters

temperature = 1.0
ce_weight = 0.2
margin = 0.0
use_cdw_ce = True
use_mae = False

## Old

# model_name="FacebookAI/roberta-base"
# csv_path="data/Sentiment/training.csv"
# seed = 42
# 
# lr = 1e-3
# class_order = [0,1,2]
# lr_top = 5e-5
# lr_mid = 3e-5
# lr_bot = 2e-5
# dropout = 0.4
# temperature = 0.5
# ce_weight = 0.1

In [3]:
df = pd.read_csv(csv_path, index_col=0)
label_map = {'negative': -1, 'neutral': 0, 'positive': 1}
df['label_encoded'] = df['label'].map(label_map)

In [4]:

train_texts, val_texts, train_labels, val_labels = train_test_split(
    df['sentence'], df['label_encoded'],
    stratify=df['label_encoded'], test_size=0.1, random_state=seed
)

In [5]:
embedder = BertTokenEmbedder(model_name)
features = embedder.fit_transform(df['sentence'].to_list())
labels = df['label_encoded'].to_numpy()

Some weights of RobertaForSequenceClassification were not initialized from the model checkpoint at FacebookAI/roberta-base and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Processing batches:   0%|          | 0/3191 [00:00<?, ?it/s]

In [6]:


if embedder.is_variable_length:
    feature_dataset = EmbeddingDataset(features, labels)

    cache_name= model_name.replace("/", "_")
    cache_path = "cache/" + cache_name
    emb_dataset_path = cache_path + "emb_dataset.pt"


    if os.path.exists(emb_dataset_path):
        embedded_feature_dataset = torch.load(emb_dataset_path, weights_only=False)
    else:
        feature_dataloader = DataLoader(feature_dataset, batch_size=8,collate_fn=collate_fn)
        embedded_feature_dataset = embedder.embed_dataset(feature_dataloader)
        os.makedirs("cache", exist_ok=True)
        torch.save(embedded_feature_dataset, emb_dataset_path)

else:
    raise Exception("blaalalal")

In [7]:
model = BertPreTrainedClassifier(
    model_name = model_name,
    lr = lr,
    pt_lr_bot = lr_bot,
    pt_lr_mid = lr_mid,
    pt_lr_top = lr_top,
    class_order = class_order,
    ce_weight = ce_weight,
    temperature = temperature,
    frozen = True,
    custom_ll = True
)

Some weights of RobertaModel were not initialized from the model checkpoint at FacebookAI/roberta-base and are newly initialized: ['pooler.dense.bias', 'pooler.dense.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


In [8]:

criterion = OrdinalLoss(
    temperature=temperature,
    ce_weight = ce_weight,
    use_mae = use_mae,
    use_cdw_ce= use_cdw_ce,
    margin = margin
)
model.set_criterion(criterion=criterion)

[INFO] Using device: NVIDIA GeForce RTX 3080


In [9]:

active_learning_loop(
        model,
        get_device(),
        embedded_feature_dataset,
        query_entropy,
        max_rounds=1000,
        query_batch_size=1000,
        train_epochs_per_round=3,
        initial_label_count=1000,
        val_split=0.2,
        batch_size=32
)

[INFO] Using device: NVIDIA GeForce RTX 3080
1000, 80677, 1000


Training:   0%|          | 0/63 [00:00<?, ?batch/s]

Evaluating:   0%|          | 0/1277 [00:00<?, ?batch/s]

Training:   0%|          | 0/63 [00:00<?, ?batch/s]

Evaluating:   0%|          | 0/1277 [00:00<?, ?batch/s]

Training:   0%|          | 0/63 [00:00<?, ?batch/s]

Evaluating:   0%|          | 0/1277 [00:00<?, ?batch/s]

Getting Logits:   0%|          | 0/631 [00:00<?, ?batch/s]

Training:   0%|          | 0/125 [00:00<?, ?batch/s]

Evaluating:   0%|          | 0/1277 [00:00<?, ?batch/s]

Training:   0%|          | 0/125 [00:00<?, ?batch/s]

Evaluating:   0%|          | 0/1277 [00:00<?, ?batch/s]

Training:   0%|          | 0/125 [00:00<?, ?batch/s]

Evaluating:   0%|          | 0/1277 [00:00<?, ?batch/s]

Getting Logits:   0%|          | 0/623 [00:00<?, ?batch/s]

Training:   0%|          | 0/188 [00:00<?, ?batch/s]

Evaluating:   0%|          | 0/1277 [00:00<?, ?batch/s]

Training:   0%|          | 0/188 [00:00<?, ?batch/s]

Evaluating:   0%|          | 0/1277 [00:00<?, ?batch/s]

Training:   0%|          | 0/188 [00:00<?, ?batch/s]

Evaluating:   0%|          | 0/1277 [00:00<?, ?batch/s]

Getting Logits:   0%|          | 0/615 [00:00<?, ?batch/s]

Training:   0%|          | 0/250 [00:00<?, ?batch/s]

Evaluating:   0%|          | 0/1277 [00:00<?, ?batch/s]

Training:   0%|          | 0/250 [00:00<?, ?batch/s]

Evaluating:   0%|          | 0/1277 [00:00<?, ?batch/s]

Training:   0%|          | 0/250 [00:00<?, ?batch/s]

Evaluating:   0%|          | 0/1277 [00:00<?, ?batch/s]

Getting Logits:   0%|          | 0/607 [00:00<?, ?batch/s]

Training:   0%|          | 0/313 [00:00<?, ?batch/s]

Evaluating:   0%|          | 0/1277 [00:00<?, ?batch/s]

Training:   0%|          | 0/313 [00:00<?, ?batch/s]

Evaluating:   0%|          | 0/1277 [00:00<?, ?batch/s]

Training:   0%|          | 0/313 [00:00<?, ?batch/s]

Evaluating:   0%|          | 0/1277 [00:00<?, ?batch/s]

Getting Logits:   0%|          | 0/600 [00:00<?, ?batch/s]

Training:   0%|          | 0/375 [00:00<?, ?batch/s]

Evaluating:   0%|          | 0/1277 [00:00<?, ?batch/s]

Training:   0%|          | 0/375 [00:00<?, ?batch/s]

Evaluating:   0%|          | 0/1277 [00:00<?, ?batch/s]

Training:   0%|          | 0/375 [00:00<?, ?batch/s]

Evaluating:   0%|          | 0/1277 [00:00<?, ?batch/s]

Getting Logits:   0%|          | 0/592 [00:00<?, ?batch/s]

Training:   0%|          | 0/438 [00:00<?, ?batch/s]

Evaluating:   0%|          | 0/1277 [00:00<?, ?batch/s]

Training:   0%|          | 0/438 [00:00<?, ?batch/s]

Evaluating:   0%|          | 0/1277 [00:00<?, ?batch/s]

Training:   0%|          | 0/438 [00:00<?, ?batch/s]

Evaluating:   0%|          | 0/1277 [00:00<?, ?batch/s]

Getting Logits:   0%|          | 0/584 [00:00<?, ?batch/s]

Training:   0%|          | 0/500 [00:00<?, ?batch/s]

Evaluating:   0%|          | 0/1277 [00:00<?, ?batch/s]

Training:   0%|          | 0/500 [00:00<?, ?batch/s]

Evaluating:   0%|          | 0/1277 [00:00<?, ?batch/s]

Training:   0%|          | 0/500 [00:00<?, ?batch/s]

Evaluating:   0%|          | 0/1277 [00:00<?, ?batch/s]

Getting Logits:   0%|          | 0/576 [00:00<?, ?batch/s]

Training:   0%|          | 0/563 [00:00<?, ?batch/s]

Evaluating:   0%|          | 0/1277 [00:00<?, ?batch/s]

Training:   0%|          | 0/563 [00:00<?, ?batch/s]

Evaluating:   0%|          | 0/1277 [00:00<?, ?batch/s]

Training:   0%|          | 0/563 [00:00<?, ?batch/s]

Evaluating:   0%|          | 0/1277 [00:00<?, ?batch/s]

Getting Logits:   0%|          | 0/568 [00:00<?, ?batch/s]

Training:   0%|          | 0/625 [00:00<?, ?batch/s]

Evaluating:   0%|          | 0/1277 [00:00<?, ?batch/s]

Training:   0%|          | 0/625 [00:00<?, ?batch/s]

Evaluating:   0%|          | 0/1277 [00:00<?, ?batch/s]

Training:   0%|          | 0/625 [00:00<?, ?batch/s]

Evaluating:   0%|          | 0/1277 [00:00<?, ?batch/s]

Getting Logits:   0%|          | 0/560 [00:00<?, ?batch/s]

Training:   0%|          | 0/688 [00:00<?, ?batch/s]

Evaluating:   0%|          | 0/1277 [00:00<?, ?batch/s]

Training:   0%|          | 0/688 [00:00<?, ?batch/s]

Evaluating:   0%|          | 0/1277 [00:00<?, ?batch/s]

Training:   0%|          | 0/688 [00:00<?, ?batch/s]

Evaluating:   0%|          | 0/1277 [00:00<?, ?batch/s]

Getting Logits:   0%|          | 0/553 [00:00<?, ?batch/s]

Training:   0%|          | 0/750 [00:00<?, ?batch/s]

Evaluating:   0%|          | 0/1277 [00:00<?, ?batch/s]

Training:   0%|          | 0/750 [00:00<?, ?batch/s]

Evaluating:   0%|          | 0/1277 [00:00<?, ?batch/s]

Training:   0%|          | 0/750 [00:00<?, ?batch/s]

Evaluating:   0%|          | 0/1277 [00:00<?, ?batch/s]

Getting Logits:   0%|          | 0/545 [00:00<?, ?batch/s]

Training:   0%|          | 0/813 [00:00<?, ?batch/s]

Evaluating:   0%|          | 0/1277 [00:00<?, ?batch/s]

Training:   0%|          | 0/813 [00:00<?, ?batch/s]

Evaluating:   0%|          | 0/1277 [00:00<?, ?batch/s]

Training:   0%|          | 0/813 [00:00<?, ?batch/s]

Evaluating:   0%|          | 0/1277 [00:00<?, ?batch/s]

Getting Logits:   0%|          | 0/537 [00:00<?, ?batch/s]

Training:   0%|          | 0/875 [00:00<?, ?batch/s]

Evaluating:   0%|          | 0/1277 [00:00<?, ?batch/s]

Training:   0%|          | 0/875 [00:00<?, ?batch/s]

Evaluating:   0%|          | 0/1277 [00:00<?, ?batch/s]

Training:   0%|          | 0/875 [00:00<?, ?batch/s]

Evaluating:   0%|          | 0/1277 [00:00<?, ?batch/s]

Getting Logits:   0%|          | 0/529 [00:00<?, ?batch/s]

Training:   0%|          | 0/938 [00:00<?, ?batch/s]

Evaluating:   0%|          | 0/1277 [00:00<?, ?batch/s]

Training:   0%|          | 0/938 [00:00<?, ?batch/s]

Evaluating:   0%|          | 0/1277 [00:00<?, ?batch/s]

Training:   0%|          | 0/938 [00:00<?, ?batch/s]

Evaluating:   0%|          | 0/1277 [00:00<?, ?batch/s]

Getting Logits:   0%|          | 0/521 [00:00<?, ?batch/s]

Training:   0%|          | 0/1000 [00:00<?, ?batch/s]

Evaluating:   0%|          | 0/1277 [00:00<?, ?batch/s]

Training:   0%|          | 0/1000 [00:00<?, ?batch/s]

Evaluating:   0%|          | 0/1277 [00:00<?, ?batch/s]

Training:   0%|          | 0/1000 [00:00<?, ?batch/s]

Evaluating:   0%|          | 0/1277 [00:00<?, ?batch/s]

Getting Logits:   0%|          | 0/514 [00:00<?, ?batch/s]

Training:   0%|          | 0/1063 [00:00<?, ?batch/s]

Evaluating:   0%|          | 0/1277 [00:00<?, ?batch/s]

Training:   0%|          | 0/1063 [00:00<?, ?batch/s]

Evaluating:   0%|          | 0/1277 [00:00<?, ?batch/s]

Training:   0%|          | 0/1063 [00:00<?, ?batch/s]

Evaluating:   0%|          | 0/1277 [00:00<?, ?batch/s]

Getting Logits:   0%|          | 0/506 [00:00<?, ?batch/s]

Training:   0%|          | 0/1125 [00:00<?, ?batch/s]

Evaluating:   0%|          | 0/1277 [00:00<?, ?batch/s]

Training:   0%|          | 0/1125 [00:00<?, ?batch/s]

Evaluating:   0%|          | 0/1277 [00:00<?, ?batch/s]

Training:   0%|          | 0/1125 [00:00<?, ?batch/s]

Evaluating:   0%|          | 0/1277 [00:00<?, ?batch/s]

Getting Logits:   0%|          | 0/498 [00:00<?, ?batch/s]

Training:   0%|          | 0/1188 [00:00<?, ?batch/s]

Evaluating:   0%|          | 0/1277 [00:00<?, ?batch/s]

Training:   0%|          | 0/1188 [00:00<?, ?batch/s]

Evaluating:   0%|          | 0/1277 [00:00<?, ?batch/s]

Training:   0%|          | 0/1188 [00:00<?, ?batch/s]

Evaluating:   0%|          | 0/1277 [00:00<?, ?batch/s]

Getting Logits:   0%|          | 0/490 [00:00<?, ?batch/s]

Training:   0%|          | 0/1250 [00:00<?, ?batch/s]

Evaluating:   0%|          | 0/1277 [00:00<?, ?batch/s]

Training:   0%|          | 0/1250 [00:00<?, ?batch/s]

Evaluating:   0%|          | 0/1277 [00:00<?, ?batch/s]

Training:   0%|          | 0/1250 [00:00<?, ?batch/s]

Evaluating:   0%|          | 0/1277 [00:00<?, ?batch/s]

Getting Logits:   0%|          | 0/482 [00:00<?, ?batch/s]

Training:   0%|          | 0/1313 [00:00<?, ?batch/s]

Evaluating:   0%|          | 0/1277 [00:00<?, ?batch/s]

Training:   0%|          | 0/1313 [00:00<?, ?batch/s]

Evaluating:   0%|          | 0/1277 [00:00<?, ?batch/s]

Training:   0%|          | 0/1313 [00:00<?, ?batch/s]

Evaluating:   0%|          | 0/1277 [00:00<?, ?batch/s]

Getting Logits:   0%|          | 0/475 [00:00<?, ?batch/s]

Training:   0%|          | 0/1375 [00:00<?, ?batch/s]

Evaluating:   0%|          | 0/1277 [00:00<?, ?batch/s]

Training:   0%|          | 0/1375 [00:00<?, ?batch/s]

Evaluating:   0%|          | 0/1277 [00:00<?, ?batch/s]

Training:   0%|          | 0/1375 [00:00<?, ?batch/s]

Evaluating:   0%|          | 0/1277 [00:00<?, ?batch/s]

Getting Logits:   0%|          | 0/467 [00:00<?, ?batch/s]

Training:   0%|          | 0/1438 [00:00<?, ?batch/s]

Evaluating:   0%|          | 0/1277 [00:00<?, ?batch/s]

Training:   0%|          | 0/1438 [00:00<?, ?batch/s]

Evaluating:   0%|          | 0/1277 [00:00<?, ?batch/s]

Training:   0%|          | 0/1438 [00:00<?, ?batch/s]

Evaluating:   0%|          | 0/1277 [00:00<?, ?batch/s]

Getting Logits:   0%|          | 0/459 [00:00<?, ?batch/s]

Training:   0%|          | 0/1500 [00:00<?, ?batch/s]

Evaluating:   0%|          | 0/1277 [00:00<?, ?batch/s]

Training:   0%|          | 0/1500 [00:00<?, ?batch/s]

Evaluating:   0%|          | 0/1277 [00:00<?, ?batch/s]

Training:   0%|          | 0/1500 [00:00<?, ?batch/s]

Evaluating:   0%|          | 0/1277 [00:00<?, ?batch/s]

Getting Logits:   0%|          | 0/451 [00:00<?, ?batch/s]

Training:   0%|          | 0/1563 [00:00<?, ?batch/s]

Evaluating:   0%|          | 0/1277 [00:00<?, ?batch/s]

Training:   0%|          | 0/1563 [00:00<?, ?batch/s]

Evaluating:   0%|          | 0/1277 [00:00<?, ?batch/s]

Training:   0%|          | 0/1563 [00:00<?, ?batch/s]

Evaluating:   0%|          | 0/1277 [00:00<?, ?batch/s]

Getting Logits:   0%|          | 0/443 [00:00<?, ?batch/s]

Training:   0%|          | 0/1625 [00:00<?, ?batch/s]

Evaluating:   0%|          | 0/1277 [00:00<?, ?batch/s]

Training:   0%|          | 0/1625 [00:00<?, ?batch/s]

Evaluating:   0%|          | 0/1277 [00:00<?, ?batch/s]

Training:   0%|          | 0/1625 [00:00<?, ?batch/s]

Evaluating:   0%|          | 0/1277 [00:00<?, ?batch/s]

Getting Logits:   0%|          | 0/435 [00:00<?, ?batch/s]

Training:   0%|          | 0/1688 [00:00<?, ?batch/s]

Evaluating:   0%|          | 0/1277 [00:00<?, ?batch/s]

Training:   0%|          | 0/1688 [00:00<?, ?batch/s]

Evaluating:   0%|          | 0/1277 [00:00<?, ?batch/s]

Training:   0%|          | 0/1688 [00:00<?, ?batch/s]

Evaluating:   0%|          | 0/1277 [00:00<?, ?batch/s]

Getting Logits:   0%|          | 0/428 [00:00<?, ?batch/s]

Training:   0%|          | 0/1750 [00:00<?, ?batch/s]

Evaluating:   0%|          | 0/1277 [00:00<?, ?batch/s]

Training:   0%|          | 0/1750 [00:00<?, ?batch/s]

Evaluating:   0%|          | 0/1277 [00:00<?, ?batch/s]

Training:   0%|          | 0/1750 [00:00<?, ?batch/s]

Evaluating:   0%|          | 0/1277 [00:00<?, ?batch/s]

Getting Logits:   0%|          | 0/420 [00:00<?, ?batch/s]

Training:   0%|          | 0/1813 [00:00<?, ?batch/s]

Evaluating:   0%|          | 0/1277 [00:00<?, ?batch/s]

Training:   0%|          | 0/1813 [00:00<?, ?batch/s]

Evaluating:   0%|          | 0/1277 [00:00<?, ?batch/s]

Training:   0%|          | 0/1813 [00:00<?, ?batch/s]

Evaluating:   0%|          | 0/1277 [00:00<?, ?batch/s]

Getting Logits:   0%|          | 0/412 [00:00<?, ?batch/s]

Training:   0%|          | 0/1875 [00:00<?, ?batch/s]

Evaluating:   0%|          | 0/1277 [00:00<?, ?batch/s]

Training:   0%|          | 0/1875 [00:00<?, ?batch/s]

Evaluating:   0%|          | 0/1277 [00:00<?, ?batch/s]

Training:   0%|          | 0/1875 [00:00<?, ?batch/s]

Evaluating:   0%|          | 0/1277 [00:00<?, ?batch/s]

Getting Logits:   0%|          | 0/404 [00:00<?, ?batch/s]

Training:   0%|          | 0/1938 [00:00<?, ?batch/s]

Evaluating:   0%|          | 0/1277 [00:00<?, ?batch/s]

Training:   0%|          | 0/1938 [00:00<?, ?batch/s]

Evaluating:   0%|          | 0/1277 [00:00<?, ?batch/s]

Training:   0%|          | 0/1938 [00:00<?, ?batch/s]

Evaluating:   0%|          | 0/1277 [00:00<?, ?batch/s]

Getting Logits:   0%|          | 0/396 [00:00<?, ?batch/s]

Training:   0%|          | 0/2000 [00:00<?, ?batch/s]

Evaluating:   0%|          | 0/1277 [00:00<?, ?batch/s]

Training:   0%|          | 0/2000 [00:00<?, ?batch/s]

Evaluating:   0%|          | 0/1277 [00:00<?, ?batch/s]

Training:   0%|          | 0/2000 [00:00<?, ?batch/s]

Evaluating:   0%|          | 0/1277 [00:00<?, ?batch/s]

Getting Logits:   0%|          | 0/389 [00:00<?, ?batch/s]

Training:   0%|          | 0/2063 [00:00<?, ?batch/s]

Evaluating:   0%|          | 0/1277 [00:00<?, ?batch/s]

Training:   0%|          | 0/2063 [00:00<?, ?batch/s]

Evaluating:   0%|          | 0/1277 [00:00<?, ?batch/s]

Training:   0%|          | 0/2063 [00:00<?, ?batch/s]

Evaluating:   0%|          | 0/1277 [00:00<?, ?batch/s]

Getting Logits:   0%|          | 0/381 [00:00<?, ?batch/s]

Training:   0%|          | 0/2125 [00:00<?, ?batch/s]

Evaluating:   0%|          | 0/1277 [00:00<?, ?batch/s]

Training:   0%|          | 0/2125 [00:00<?, ?batch/s]

Evaluating:   0%|          | 0/1277 [00:00<?, ?batch/s]

Training:   0%|          | 0/2125 [00:00<?, ?batch/s]

Evaluating:   0%|          | 0/1277 [00:00<?, ?batch/s]

Getting Logits:   0%|          | 0/373 [00:00<?, ?batch/s]

Training:   0%|          | 0/2188 [00:00<?, ?batch/s]

Evaluating:   0%|          | 0/1277 [00:00<?, ?batch/s]

Training:   0%|          | 0/2188 [00:00<?, ?batch/s]

Evaluating:   0%|          | 0/1277 [00:00<?, ?batch/s]

Training:   0%|          | 0/2188 [00:00<?, ?batch/s]

Evaluating:   0%|          | 0/1277 [00:00<?, ?batch/s]

Getting Logits:   0%|          | 0/365 [00:00<?, ?batch/s]

Training:   0%|          | 0/2250 [00:00<?, ?batch/s]

Evaluating:   0%|          | 0/1277 [00:00<?, ?batch/s]

Training:   0%|          | 0/2250 [00:00<?, ?batch/s]

Evaluating:   0%|          | 0/1277 [00:00<?, ?batch/s]

Training:   0%|          | 0/2250 [00:00<?, ?batch/s]

Evaluating:   0%|          | 0/1277 [00:00<?, ?batch/s]

Getting Logits:   0%|          | 0/357 [00:00<?, ?batch/s]

Training:   0%|          | 0/2313 [00:00<?, ?batch/s]

Evaluating:   0%|          | 0/1277 [00:00<?, ?batch/s]

Training:   0%|          | 0/2313 [00:00<?, ?batch/s]

Evaluating:   0%|          | 0/1277 [00:00<?, ?batch/s]

Training:   0%|          | 0/2313 [00:00<?, ?batch/s]

Evaluating:   0%|          | 0/1277 [00:00<?, ?batch/s]

Getting Logits:   0%|          | 0/350 [00:00<?, ?batch/s]

Training:   0%|          | 0/2375 [00:00<?, ?batch/s]

Evaluating:   0%|          | 0/1277 [00:00<?, ?batch/s]

Training:   0%|          | 0/2375 [00:00<?, ?batch/s]

Evaluating:   0%|          | 0/1277 [00:00<?, ?batch/s]

Training:   0%|          | 0/2375 [00:00<?, ?batch/s]

Evaluating:   0%|          | 0/1277 [00:00<?, ?batch/s]

Getting Logits:   0%|          | 0/342 [00:00<?, ?batch/s]

Training:   0%|          | 0/2438 [00:00<?, ?batch/s]

Evaluating:   0%|          | 0/1277 [00:00<?, ?batch/s]

Training:   0%|          | 0/2438 [00:00<?, ?batch/s]

Evaluating:   0%|          | 0/1277 [00:00<?, ?batch/s]

Training:   0%|          | 0/2438 [00:00<?, ?batch/s]

Evaluating:   0%|          | 0/1277 [00:00<?, ?batch/s]

Getting Logits:   0%|          | 0/334 [00:00<?, ?batch/s]

Training:   0%|          | 0/2500 [00:00<?, ?batch/s]

Evaluating:   0%|          | 0/1277 [00:00<?, ?batch/s]

Training:   0%|          | 0/2500 [00:00<?, ?batch/s]

Evaluating:   0%|          | 0/1277 [00:00<?, ?batch/s]

Training:   0%|          | 0/2500 [00:00<?, ?batch/s]

Evaluating:   0%|          | 0/1277 [00:00<?, ?batch/s]

Getting Logits:   0%|          | 0/326 [00:00<?, ?batch/s]

Training:   0%|          | 0/2563 [00:00<?, ?batch/s]

Evaluating:   0%|          | 0/1277 [00:00<?, ?batch/s]

Training:   0%|          | 0/2563 [00:00<?, ?batch/s]

Evaluating:   0%|          | 0/1277 [00:00<?, ?batch/s]

Training:   0%|          | 0/2563 [00:00<?, ?batch/s]

Evaluating:   0%|          | 0/1277 [00:00<?, ?batch/s]

Getting Logits:   0%|          | 0/318 [00:00<?, ?batch/s]

Training:   0%|          | 0/2625 [00:00<?, ?batch/s]

Evaluating:   0%|          | 0/1277 [00:00<?, ?batch/s]

Training:   0%|          | 0/2625 [00:00<?, ?batch/s]

Evaluating:   0%|          | 0/1277 [00:00<?, ?batch/s]

Training:   0%|          | 0/2625 [00:00<?, ?batch/s]

Evaluating:   0%|          | 0/1277 [00:00<?, ?batch/s]

Getting Logits:   0%|          | 0/310 [00:00<?, ?batch/s]

Training:   0%|          | 0/2688 [00:00<?, ?batch/s]

Evaluating:   0%|          | 0/1277 [00:00<?, ?batch/s]

Training:   0%|          | 0/2688 [00:00<?, ?batch/s]

Evaluating:   0%|          | 0/1277 [00:00<?, ?batch/s]

Training:   0%|          | 0/2688 [00:00<?, ?batch/s]

Evaluating:   0%|          | 0/1277 [00:00<?, ?batch/s]

Getting Logits:   0%|          | 0/303 [00:00<?, ?batch/s]

Training:   0%|          | 0/2750 [00:00<?, ?batch/s]

Evaluating:   0%|          | 0/1277 [00:00<?, ?batch/s]

Training:   0%|          | 0/2750 [00:00<?, ?batch/s]

Evaluating:   0%|          | 0/1277 [00:00<?, ?batch/s]

Training:   0%|          | 0/2750 [00:00<?, ?batch/s]

Evaluating:   0%|          | 0/1277 [00:00<?, ?batch/s]

Getting Logits:   0%|          | 0/295 [00:00<?, ?batch/s]

Training:   0%|          | 0/2813 [00:00<?, ?batch/s]

Evaluating:   0%|          | 0/1277 [00:00<?, ?batch/s]

Training:   0%|          | 0/2813 [00:00<?, ?batch/s]

Evaluating:   0%|          | 0/1277 [00:00<?, ?batch/s]

Training:   0%|          | 0/2813 [00:00<?, ?batch/s]

Evaluating:   0%|          | 0/1277 [00:00<?, ?batch/s]

Getting Logits:   0%|          | 0/287 [00:00<?, ?batch/s]

Training:   0%|          | 0/2875 [00:00<?, ?batch/s]

Evaluating:   0%|          | 0/1277 [00:00<?, ?batch/s]

Training:   0%|          | 0/2875 [00:00<?, ?batch/s]

Evaluating:   0%|          | 0/1277 [00:00<?, ?batch/s]

Training:   0%|          | 0/2875 [00:00<?, ?batch/s]

Evaluating:   0%|          | 0/1277 [00:00<?, ?batch/s]

Getting Logits:   0%|          | 0/279 [00:00<?, ?batch/s]

Training:   0%|          | 0/2938 [00:00<?, ?batch/s]

Evaluating:   0%|          | 0/1277 [00:00<?, ?batch/s]

Training:   0%|          | 0/2938 [00:00<?, ?batch/s]

Evaluating:   0%|          | 0/1277 [00:00<?, ?batch/s]

Training:   0%|          | 0/2938 [00:00<?, ?batch/s]

Evaluating:   0%|          | 0/1277 [00:00<?, ?batch/s]

Getting Logits:   0%|          | 0/271 [00:00<?, ?batch/s]

Training:   0%|          | 0/3000 [00:00<?, ?batch/s]

Evaluating:   0%|          | 0/1277 [00:00<?, ?batch/s]

Training:   0%|          | 0/3000 [00:00<?, ?batch/s]

Evaluating:   0%|          | 0/1277 [00:00<?, ?batch/s]

Training:   0%|          | 0/3000 [00:00<?, ?batch/s]

Evaluating:   0%|          | 0/1277 [00:00<?, ?batch/s]

Getting Logits:   0%|          | 0/264 [00:00<?, ?batch/s]

Training:   0%|          | 0/3063 [00:00<?, ?batch/s]

Evaluating:   0%|          | 0/1277 [00:00<?, ?batch/s]

Training:   0%|          | 0/3063 [00:00<?, ?batch/s]

Evaluating:   0%|          | 0/1277 [00:00<?, ?batch/s]

Training:   0%|          | 0/3063 [00:00<?, ?batch/s]

Evaluating:   0%|          | 0/1277 [00:00<?, ?batch/s]

Getting Logits:   0%|          | 0/256 [00:00<?, ?batch/s]

Training:   0%|          | 0/3125 [00:00<?, ?batch/s]

Evaluating:   0%|          | 0/1277 [00:00<?, ?batch/s]

Training:   0%|          | 0/3125 [00:00<?, ?batch/s]

Evaluating:   0%|          | 0/1277 [00:00<?, ?batch/s]

Training:   0%|          | 0/3125 [00:00<?, ?batch/s]

Evaluating:   0%|          | 0/1277 [00:00<?, ?batch/s]

Getting Logits:   0%|          | 0/248 [00:00<?, ?batch/s]

Training:   0%|          | 0/3188 [00:00<?, ?batch/s]

Evaluating:   0%|          | 0/1277 [00:00<?, ?batch/s]

Training:   0%|          | 0/3188 [00:00<?, ?batch/s]

Evaluating:   0%|          | 0/1277 [00:00<?, ?batch/s]

Training:   0%|          | 0/3188 [00:00<?, ?batch/s]

Evaluating:   0%|          | 0/1277 [00:00<?, ?batch/s]

Getting Logits:   0%|          | 0/240 [00:00<?, ?batch/s]

Training:   0%|          | 0/3250 [00:00<?, ?batch/s]

Evaluating:   0%|          | 0/1277 [00:00<?, ?batch/s]

Training:   0%|          | 0/3250 [00:00<?, ?batch/s]

Evaluating:   0%|          | 0/1277 [00:00<?, ?batch/s]

Training:   0%|          | 0/3250 [00:00<?, ?batch/s]

Evaluating:   0%|          | 0/1277 [00:00<?, ?batch/s]

Getting Logits:   0%|          | 0/232 [00:00<?, ?batch/s]

Training:   0%|          | 0/3313 [00:00<?, ?batch/s]

Evaluating:   0%|          | 0/1277 [00:00<?, ?batch/s]

Training:   0%|          | 0/3313 [00:00<?, ?batch/s]

KeyboardInterrupt: 