#### Imports


In [1]:
import pandas as pd
import torch
import numpy as np
import os
from sklearn.model_selection import train_test_split
from sklearn.utils.class_weight import compute_class_weight
from datasets import Dataset
from huggingface_hub import login
import logging
from transformers import (
    TrainingArguments,
    Trainer,
    AutoModelForSequenceClassification,
    # tokenizers
    AutoTokenizer,
    DebertaV2Tokenizer,
    DistilBertTokenizer,
    BertTokenizer,
    RobertaTokenizer,
    ElectraTokenizer,
    AlbertTokenizer,
    XLNetTokenizer,
    MobileBertTokenizer,
    # models
    DebertaV2ForSequenceClassification,
    DistilBertForSequenceClassification,
    BertForSequenceClassification,
    RobertaForSequenceClassification,
    ElectraForSequenceClassification,
    AlbertForSequenceClassification,
    XLNetForSequenceClassification,
    MobileBertForSequenceClassification,
)
from torch.nn import CrossEntropyLoss
# evaluation metrics
from sklearn.metrics import accuracy_score, precision_recall_fscore_support, classification_report
from collections import Counter

import transformers
print(transformers.__version__)
print(transformers.TrainingArguments)

# Cuda
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

4.53.2
<class 'transformers.training_args.TrainingArguments'>


In [2]:
logging.basicConfig(filename='classification_nvidia.log', level=logging.INFO)
logging.info(f"Running on device: {device}")

In [3]:
# setting huggingface token
login(token=os.getenv("HUGGINGFACE_TOKEN"))

os.environ["HF_HOME"] = "huggingface_cache" 
os.environ["TRANSFORMERS_CACHE"] = "huggingface_cache"
os.environ["HUGGINGFACE_HUB_CACHE"] = "huggingface_cache"

print("HF_HOME:", os.getenv("HF_HOME"))
print("TRANSFORMERS_CACHE:", os.getenv("TRANSFORMERS_CACHE"))
print("HUGGINGFACE_HUB_CACHE:", os.getenv("HUGGINGFACE_HUB_CACHE"))

logging.info(f"HF_HOME: {os.getenv('HF_HOME')}")
logging.info(f"TRANSFORMERS_CACHE: {os.getenv('TRANSFORMERS_CACHE')}")
logging.info(f"HUGGINGFACE_HUB_CACHE: {os.getenv('HUGGINGFACE_HUB_CACHE')}")

transformers.utils.hub.TRANSFORMERS_CACHE = "huggingface_cache"

VBox(children=(HTML(value='<center> <img\nsrc=https://huggingface.co/front/assets/huggingface_logo-noborder.svâ€¦

HF_HOME: huggingface_cache
TRANSFORMERS_CACHE: huggingface_cache
HUGGINGFACE_HUB_CACHE: huggingface_cache


### LOADING SQLITE DB WITH RECORDS



In [4]:
import sqlite3
import json
import pandas as pd

DB_FILE = "chunks.db"
OUTPUT_FILE = "exported_chunks.jsonl"

# Connect to the database
conn = sqlite3.connect(DB_FILE)
cur = conn.cursor()

# Query all data from chunks table
cur.execute("SELECT text, label FROM chunks")
rows = cur.fetchall()

# Write to JSONL
with open(OUTPUT_FILE, "w", encoding="utf-8") as f:
    for text, label in rows:
        obj = {"text": text}
        if label is not None:
            obj["label"] = label
        f.write(json.dumps(obj) + "\n")

conn.close()

print("Data exported to JSONL file.")


Data exported to JSONL file.


In [5]:
# Load the labeled chunks
with open("exported_chunks.jsonl", "r", encoding="utf-8") as f:
    labeled_chunks = [json.loads(line) for line in f]

data = pd.DataFrame(labeled_chunks)
labeled_count = data['label'].value_counts().to_dict()

# Get the first 9000 rows
data = data.head(9000)

# Remove rows with label == 11
data = data[data['label'] != 11]

# Print labeled count after removing label 11
labeled_count = data['label'].value_counts().to_dict()
print(f"Labeled chunks after removing label 11: {labeled_count}")

# Remove rows where label == 1 and text length < 100
data = data[~((data['label'] == 1) & (data['text'].str.len() < 100))]

# Print final labeled count
labeled_count = data['label'].value_counts().to_dict()
print(f"Final labeled chunks: {labeled_count}")


Labeled chunks after removing label 11: {1: 8199, 0: 800}
Final labeled chunks: {1: 5384, 0: 800}


##### Spliting data


In [6]:
# Loading the data
data['label'] = data['label'].astype(int)

# Train-Test Split using stratified sampling
train_df, test_df = train_test_split(data, test_size=0.2, stratify=data['label'], random_state=42)

# since there is a class imbalance, we will compute class weights
# to handle this in the loss function
labels = train_df["label"].values
# Compute class weights
classes = np.unique(labels)
weights = compute_class_weight(class_weight="balanced",
                            classes=classes,
                            y=labels)
class_weights = torch.tensor(weights, dtype=torch.float, device=device)
print("Class weights:", class_weights)

# Convert ing the DataFrames to Hugging Face Datasets
train_dataset = Dataset.from_pandas(train_df.reset_index(drop=True))
test_dataset = Dataset.from_pandas(test_df.reset_index(drop=True))

    Found GPU0 NVIDIA GB10 which is of cuda capability 12.1.
    Minimum and Maximum cuda capability supported by this version of PyTorch is
    (8.0) - (12.0)
    


Class weights: tensor([3.8648, 0.5743], device='cuda:0')


In [7]:

# ---- Tuning parameters ----
CONFIG = {
    "epochs": 2,
    "batch_size": 16,
    "batch_size_larger": 8, # Batch size for larger models
    "max_length": [128, 256, 512], # Max length of input sequences
    "learning_rate": 5e-5, # Learning rate for the optimizer
    "learning_rate_larger": 3e-5, # Learning rate for larger models
    "weight_decay": 0.01, # Weight decay for regularization
    "output_dir": "huggingface_cache/classification_models"
}

# ---- Model configurations ----
MODEL_CONFIGS = {
    "deberta": {
        "tokenizer_class": DebertaV2Tokenizer,
        "pretrained_model_name": "microsoft/deberta-v3-small", # params 55M
        "model_class": DebertaV2ForSequenceClassification
    },
    "distilbert": {
        "tokenizer_class": DistilBertTokenizer,
        "pretrained_model_name": "distilbert-base-uncased", # params 66M
        "model_class": DistilBertForSequenceClassification
    },
    "bert": {
        "tokenizer_class": BertTokenizer,
        "pretrained_model_name": "bert-base-uncased", # params 110M
        "model_class": BertForSequenceClassification
    },
    "roberta": {
        "tokenizer_class": RobertaTokenizer,
        "pretrained_model_name": "roberta-base", # params 125M
        "model_class": RobertaForSequenceClassification
    },
    "electra": {
        "tokenizer_class": ElectraTokenizer,
        "pretrained_model_name": "google/electra-small-discriminator", # params 14M
        "model_class": ElectraForSequenceClassification
    },
    "albert": {
        "tokenizer_class": AlbertTokenizer,
        "pretrained_model_name": "albert-base-v2", # params 11M
        "model_class": AlbertForSequenceClassification
    },
    "xlnet": {
        "tokenizer_class": XLNetTokenizer,
        "pretrained_model_name": "xlnet-base-cased", # params 110M
        "model_class": XLNetForSequenceClassification
    },
    "mobilebert": {
        "tokenizer_class": AutoTokenizer,
        "pretrained_model_name": "google/mobilebert-uncased", # params 25M
        "model_class": MobileBertForSequenceClassification
    },
    "albert-base-v1": {
        "tokenizer_class": AlbertTokenizer,
        "pretrained_model_name": "albert-base-v1", # params 12M
        "model_class": AlbertForSequenceClassification
    },
    "albert-large-v2": {
        "tokenizer_class": AlbertTokenizer,
        "pretrained_model_name": "albert-large-v2", # params 18M
        "model_class": AlbertForSequenceClassification
    },
    "albert-xlarge-v2": {
        "tokenizer_class": AlbertTokenizer,
        "pretrained_model_name": "albert-xlarge-v2", # params 60M
        "model_class": AlbertForSequenceClassification
    },
    "albert-xxlarge-v2": {
        "tokenizer_class": AlbertTokenizer,
        "pretrained_model_name": "albert-xxlarge-v2", # params 235M
        "model_class": AlbertForSequenceClassification
    },
    "bert-large-uncased": {
        "tokenizer_class": BertTokenizer,
        "pretrained_model_name": "bert-large-uncased", # params 340M
        "model_class": BertForSequenceClassification
    }
}


In [8]:

# ---- Metric function ----
def compute_metrics(pred):
    labels = pred.label_ids
    preds = pred.predictions.argmax(-1)
    precision, recall, f1, _ = precision_recall_fscore_support(labels, preds, average='weighted')
    acc = accuracy_score(labels, preds)
    return {'accuracy': acc, 'f1': f1, 'precision': precision, 'recall': recall}




In [9]:
# ---- Weighted Trainer ----
class WeightedLossTrainer(Trainer):
    def __init__(self, class_weights=None, *args, **kwargs):
        super().__init__(*args, **kwargs)
        self.class_weights = class_weights

    def compute_loss(self, model, inputs, return_outputs=False, **kwargs):
        labels = inputs.get("labels")
        outputs = model(**inputs)
        logits = outputs.get("logits")
        loss_fct = CrossEntropyLoss(weight=self.class_weights)
        loss = loss_fct(logits, labels)
        return (loss, outputs) if return_outputs else loss



In [10]:

# ---- Function to train and evaluate ----
def train_and_evaluate(model_name, train_dataset, test_dataset, data, max_length):
    print(f"\n===== Training {model_name} =====")

    # Model + tokenizer
    cfg = MODEL_CONFIGS[model_name]
    tokenizer = cfg["tokenizer_class"].from_pretrained(cfg["pretrained_model_name"])
    model = cfg["model_class"].from_pretrained(
        cfg["pretrained_model_name"],
        num_labels=len(data['label'].unique())
    )

    # Tokenization
    def tokenize_fn(batch):
        return tokenizer(batch["text"], truncation=True, padding="max_length", max_length=max_length)

    train_enc = train_dataset.map(tokenize_fn, batched=True)
    test_enc = test_dataset.map(tokenize_fn, batched=True)
    train_enc = train_enc.rename_column("label", "labels")
    test_enc = test_enc.rename_column("label", "labels")
    train_enc.set_format(type="torch", columns=["input_ids", "attention_mask", "labels"])
    test_enc.set_format(type="torch", columns=["input_ids", "attention_mask", "labels"])

    # Training args
    training_args = TrainingArguments(
        output_dir=f"{CONFIG['output_dir']}/{model_name}",
        eval_strategy="epoch",
        save_strategy="epoch",
        learning_rate=CONFIG["learning_rate"],
        per_device_train_batch_size=CONFIG["batch_size"],
        per_device_eval_batch_size=CONFIG["batch_size"],
        num_train_epochs=CONFIG["epochs"],
        weight_decay=CONFIG["weight_decay"],
        logging_dir=f"./logs_{model_name}",
        report_to="none",
        load_best_model_at_end=True,
        metric_for_best_model="f1",
        seed=42
    )

    # Trainer
    trainer = WeightedLossTrainer(
        model=model,
        args=training_args,
        train_dataset=train_enc,
        eval_dataset=test_enc,
        compute_metrics=compute_metrics,
        tokenizer=tokenizer,
        class_weights=class_weights
    )

    # Train + evaluate
    # time to train
    t_train_start = torch.cuda.Event(enable_timing=True)
    t_train_end = torch.cuda.Event(enable_timing=True)
    t_train_start.record()
    trainer.train()
    t_train_end.record()
    torch.cuda.synchronize()
    t_train_time = t_train_start.elapsed_time(t_train_end) / 1000  # convert to seconds
    print(f"Training time for {model_name}: {t_train_time:.2f} seconds")

    # Predictions
    t_pred_start = torch.cuda.Event(enable_timing=True)
    t_pred_end = torch.cuda.Event(enable_timing=True)
    t_pred_start.record()
    preds = trainer.predict(test_enc)
    t_pred_end.record()
    torch.cuda.synchronize()
    t_pred_time = t_pred_start.elapsed_time(t_pred_end) / 1000  # convert to seconds
    print(f"Prediction time for {model_name}: {t_pred_time:.2f} seconds")
    y_true = preds.label_ids
    y_pred = np.argmax(preds.predictions, axis=1)

    print(f"\nClassification Report for {model_name}:")
    print(classification_report(y_true, y_pred, digits=4))

    performance = {
        "model": model_name,
        "max_length": max_length,
        "train_time_sec": t_train_time,
        "pred_time_sec": t_pred_time,
        "accuracy": accuracy_score(y_true, y_pred),
        "f1": precision_recall_fscore_support(y_true, y_pred, average='weighted')[2],
        "precision": precision_recall_fscore_support(y_true, y_pred, average='weighted')[0],
        "recall": precision_recall_fscore_support(y_true, y_pred, average='weighted')[1],
    }
    return performance

In [11]:
for model_name in MODEL_CONFIGS.keys():
    for max_len in CONFIG["max_length"]:
        # skip if this (model, max_len) already exists in CSV
        if os.path.exists("model_performance_nvidia.csv") and f"{model_name},{max_len}," in open("model_performance_nvidia.csv").read():
            print(f"Skipping {model_name} with max_length={max_len} (already in CSV).")
            continue

        print(f"\n--- Training {model_name} with max_length={max_len} ---")
        performance = train_and_evaluate(model_name, train_dataset, test_dataset, data, max_len)
        print(performance)
        # save performance to a csv file
        with open("model_performance_nvidia.csv", "a") as f:
            #headers if file is empty
            if os.stat("model_performance_nvidia.csv").st_size == 0:
                f.write("model,max_length,train_time_sec,pred_time_sec,accuracy,f1,precision,recall\n")
            f.write(",".join([str(performance[k]) for k in performance]) + "\n")



--- Training deberta with max_length=128 ---

===== Training deberta =====


Some weights of DebertaV2ForSequenceClassification were not initialized from the model checkpoint at microsoft/deberta-v3-small and are newly initialized: ['classifier.bias', 'classifier.weight', 'pooler.dense.bias', 'pooler.dense.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Map:   0%|          | 0/4947 [00:00<?, ? examples/s]

Map:   0%|          | 0/1237 [00:00<?, ? examples/s]

  super().__init__(*args, **kwargs)


Epoch,Training Loss,Validation Loss,Accuracy,F1,Precision,Recall
1,No log,0.322136,0.969281,0.968302,0.968735,0.969281
2,0.283200,0.284359,0.974131,0.973773,0.973694,0.974131


Training time for deberta: 477.04 seconds


Prediction time for deberta: 13.01 seconds

Classification Report for deberta:
              precision    recall  f1-score   support

           0     0.9267    0.8688    0.8968       160
           1     0.9807    0.9898    0.9852      1077

    accuracy                         0.9741      1237
   macro avg     0.9537    0.9293    0.9410      1237
weighted avg     0.9737    0.9741    0.9738      1237

{'model': 'deberta', 'max_length': 128, 'train_time_sec': 477.0391875, 'pred_time_sec': 13.0142890625, 'accuracy': 0.9741309620048505, 'f1': 0.9737734908012624, 'precision': 0.9736943079539011, 'recall': 0.9741309620048505}

--- Training deberta with max_length=256 ---

===== Training deberta =====


Some weights of DebertaV2ForSequenceClassification were not initialized from the model checkpoint at microsoft/deberta-v3-small and are newly initialized: ['classifier.bias', 'classifier.weight', 'pooler.dense.bias', 'pooler.dense.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Map:   0%|          | 0/4947 [00:00<?, ? examples/s]

Map:   0%|          | 0/1237 [00:00<?, ? examples/s]

  super().__init__(*args, **kwargs)


Epoch,Training Loss,Validation Loss,Accuracy,F1,Precision,Recall
1,No log,0.376166,0.972514,0.971463,0.972458,0.972514
2,0.300600,0.308724,0.976556,0.976061,0.976215,0.976556


Training time for deberta: 830.52 seconds


Prediction time for deberta: 28.20 seconds

Classification Report for deberta:
              precision    recall  f1-score   support

           0     0.9517    0.8625    0.9049       160
           1     0.9799    0.9935    0.9866      1077

    accuracy                         0.9766      1237
   macro avg     0.9658    0.9280    0.9458      1237
weighted avg     0.9762    0.9766    0.9761      1237

{'model': 'deberta', 'max_length': 256, 'train_time_sec': 830.5245, 'pred_time_sec': 28.2044921875, 'accuracy': 0.9765561843168957, 'f1': 0.9760607614155817, 'precision': 0.9762150847786284, 'recall': 0.9765561843168957}

--- Training deberta with max_length=512 ---

===== Training deberta =====


Some weights of DebertaV2ForSequenceClassification were not initialized from the model checkpoint at microsoft/deberta-v3-small and are newly initialized: ['classifier.bias', 'classifier.weight', 'pooler.dense.bias', 'pooler.dense.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Map:   0%|          | 0/4947 [00:00<?, ? examples/s]

Map:   0%|          | 0/1237 [00:00<?, ? examples/s]

  super().__init__(*args, **kwargs)


Epoch,Training Loss,Validation Loss,Accuracy,F1,Precision,Recall
1,No log,0.325287,0.974131,0.973388,0.973821,0.974131
2,0.258200,0.306613,0.977365,0.976714,0.977256,0.977365


Training time for deberta: 1877.73 seconds


Prediction time for deberta: 79.17 seconds

Classification Report for deberta:
              precision    recall  f1-score   support

           0     0.9714    0.8500    0.9067       160
           1     0.9781    0.9963    0.9871      1077

    accuracy                         0.9774      1237
   macro avg     0.9748    0.9231    0.9469      1237
weighted avg     0.9773    0.9774    0.9767      1237

{'model': 'deberta', 'max_length': 512, 'train_time_sec': 1877.734375, 'pred_time_sec': 79.1731484375, 'accuracy': 0.9773645917542442, 'f1': 0.9767141968592342, 'precision': 0.9772563689588809, 'recall': 0.9773645917542442}

--- Training distilbert with max_length=128 ---

===== Training distilbert =====


tokenizer_config.json:   0%|          | 0.00/48.0 [00:00<?, ?B/s]

vocab.txt:   0%|          | 0.00/232k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/466k [00:00<?, ?B/s]

config.json:   0%|          | 0.00/483 [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/268M [00:00<?, ?B/s]

Some weights of DistilBertForSequenceClassification were not initialized from the model checkpoint at distilbert-base-uncased and are newly initialized: ['classifier.bias', 'classifier.weight', 'pre_classifier.bias', 'pre_classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Map:   0%|          | 0/4947 [00:00<?, ? examples/s]

Map:   0%|          | 0/1237 [00:00<?, ? examples/s]

  super().__init__(*args, **kwargs)


Epoch,Training Loss,Validation Loss,Accuracy,F1,Precision,Recall
1,No log,0.250081,0.974131,0.973699,0.973671,0.974131
2,0.234100,0.247255,0.974939,0.974769,0.974668,0.974939


Training time for distilbert: 248.81 seconds


Prediction time for distilbert: 8.01 seconds

Classification Report for distilbert:
              precision    recall  f1-score   support

           0     0.9161    0.8875    0.9016       160
           1     0.9834    0.9879    0.9856      1077

    accuracy                         0.9749      1237
   macro avg     0.9497    0.9377    0.9436      1237
weighted avg     0.9747    0.9749    0.9748      1237

{'model': 'distilbert', 'max_length': 128, 'train_time_sec': 248.805546875, 'pred_time_sec': 8.0120302734375, 'accuracy': 0.9749393694421988, 'f1': 0.9747694943429517, 'precision': 0.9746676026345088, 'recall': 0.9749393694421988}

--- Training distilbert with max_length=256 ---

===== Training distilbert =====


Some weights of DistilBertForSequenceClassification were not initialized from the model checkpoint at distilbert-base-uncased and are newly initialized: ['classifier.bias', 'classifier.weight', 'pre_classifier.bias', 'pre_classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Map:   0%|          | 0/4947 [00:00<?, ? examples/s]

Map:   0%|          | 0/1237 [00:00<?, ? examples/s]

  super().__init__(*args, **kwargs)


Epoch,Training Loss,Validation Loss,Accuracy,F1,Precision,Recall
1,No log,0.264911,0.976556,0.975919,0.976348,0.976556
2,0.238400,0.279296,0.974939,0.974484,0.974508,0.974939


Training time for distilbert: 392.08 seconds


Prediction time for distilbert: 13.07 seconds

Classification Report for distilbert:
              precision    recall  f1-score   support

           0     0.9645    0.8500    0.9037       160
           1     0.9781    0.9954    0.9867      1077

    accuracy                         0.9766      1237
   macro avg     0.9713    0.9227    0.9452      1237
weighted avg     0.9763    0.9766    0.9759      1237

{'model': 'distilbert', 'max_length': 256, 'train_time_sec': 392.075125, 'pred_time_sec': 13.0747568359375, 'accuracy': 0.9765561843168957, 'f1': 0.9759187557453016, 'precision': 0.9763478573394604, 'recall': 0.9765561843168957}

--- Training distilbert with max_length=512 ---

===== Training distilbert =====


Some weights of DistilBertForSequenceClassification were not initialized from the model checkpoint at distilbert-base-uncased and are newly initialized: ['classifier.bias', 'classifier.weight', 'pre_classifier.bias', 'pre_classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Map:   0%|          | 0/4947 [00:00<?, ? examples/s]

Map:   0%|          | 0/1237 [00:00<?, ? examples/s]

  super().__init__(*args, **kwargs)


Epoch,Training Loss,Validation Loss,Accuracy,F1,Precision,Recall
1,No log,0.27227,0.978173,0.97758,0.97806,0.978173
2,0.248100,0.282182,0.978981,0.978442,0.978864,0.978981


Training time for distilbert: 733.91 seconds


Prediction time for distilbert: 26.81 seconds

Classification Report for distilbert:
              precision    recall  f1-score   support

           0     0.9718    0.8625    0.9139       160
           1     0.9799    0.9963    0.9880      1077

    accuracy                         0.9790      1237
   macro avg     0.9759    0.9294    0.9510      1237
weighted avg     0.9789    0.9790    0.9784      1237

{'model': 'distilbert', 'max_length': 512, 'train_time_sec': 733.914625, 'pred_time_sec': 26.807705078125, 'accuracy': 0.978981406628941, 'f1': 0.9784421183262277, 'precision': 0.9788638654665281, 'recall': 0.978981406628941}
Skipping bert with max_length=128 (already in CSV).
Skipping bert with max_length=256 (already in CSV).
Skipping bert with max_length=512 (already in CSV).

--- Training roberta with max_length=128 ---

===== Training roberta =====


tokenizer_config.json:   0%|          | 0.00/25.0 [00:00<?, ?B/s]

vocab.json:   0%|          | 0.00/899k [00:00<?, ?B/s]

merges.txt:   0%|          | 0.00/456k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/1.36M [00:00<?, ?B/s]

config.json:   0%|          | 0.00/481 [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/499M [00:00<?, ?B/s]

Some weights of RobertaForSequenceClassification were not initialized from the model checkpoint at roberta-base and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Map:   0%|          | 0/4947 [00:00<?, ? examples/s]

Map:   0%|          | 0/1237 [00:00<?, ? examples/s]

  super().__init__(*args, **kwargs)


Epoch,Training Loss,Validation Loss,Accuracy,F1,Precision,Recall
1,No log,0.42169,0.967664,0.966104,0.967695,0.967664
2,0.353500,0.282394,0.976556,0.976266,0.976202,0.976556


Training time for roberta: 461.77 seconds


Prediction time for roberta: 13.71 seconds

Classification Report for roberta:
              precision    recall  f1-score   support

           0     0.9338    0.8812    0.9068       160
           1     0.9825    0.9907    0.9866      1077

    accuracy                         0.9766      1237
   macro avg     0.9581    0.9360    0.9467      1237
weighted avg     0.9762    0.9766    0.9763      1237

{'model': 'roberta', 'max_length': 128, 'train_time_sec': 461.76853125, 'pred_time_sec': 13.706326171875, 'accuracy': 0.9765561843168957, 'f1': 0.9762657386605518, 'precision': 0.9762016427433062, 'recall': 0.9765561843168957}

--- Training roberta with max_length=256 ---

===== Training roberta =====


Some weights of RobertaForSequenceClassification were not initialized from the model checkpoint at roberta-base and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Map:   0%|          | 0/4947 [00:00<?, ? examples/s]

Map:   0%|          | 0/1237 [00:00<?, ? examples/s]

  super().__init__(*args, **kwargs)


Epoch,Training Loss,Validation Loss,Accuracy,F1,Precision,Recall
1,No log,0.478067,0.970089,0.968697,0.970275,0.970089
2,0.338600,0.26343,0.97979,0.979423,0.979544,0.97979


Training time for roberta: 757.37 seconds


Prediction time for roberta: 24.29 seconds

Classification Report for roberta:
              precision    recall  f1-score   support

           0     0.9592    0.8812    0.9186       160
           1     0.9826    0.9944    0.9885      1077

    accuracy                         0.9798      1237
   macro avg     0.9709    0.9378    0.9535      1237
weighted avg     0.9795    0.9798    0.9794      1237

{'model': 'roberta', 'max_length': 256, 'train_time_sec': 757.3684375, 'pred_time_sec': 24.287298828125, 'accuracy': 0.9797898140662894, 'f1': 0.9794225323382386, 'precision': 0.9795440527564181, 'recall': 0.9797898140662894}

--- Training roberta with max_length=512 ---

===== Training roberta =====


Some weights of RobertaForSequenceClassification were not initialized from the model checkpoint at roberta-base and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Map:   0%|          | 0/4947 [00:00<?, ? examples/s]

Map:   0%|          | 0/1237 [00:00<?, ? examples/s]

  super().__init__(*args, **kwargs)


Epoch,Training Loss,Validation Loss,Accuracy,F1,Precision,Recall
1,No log,0.298596,0.972514,0.971724,0.972104,0.972514
2,0.310300,0.277755,0.980598,0.980274,0.980365,0.980598


Training time for roberta: 1449.71 seconds


Prediction time for roberta: 50.86 seconds

Classification Report for roberta:
              precision    recall  f1-score   support

           0     0.9595    0.8875    0.9221       160
           1     0.9835    0.9944    0.9889      1077

    accuracy                         0.9806      1237
   macro avg     0.9715    0.9410    0.9555      1237
weighted avg     0.9804    0.9806    0.9803      1237

{'model': 'roberta', 'max_length': 512, 'train_time_sec': 1449.71025, 'pred_time_sec': 50.8598359375, 'accuracy': 0.9805982215036378, 'f1': 0.9802740093184538, 'precision': 0.9803652874866222, 'recall': 0.9805982215036378}

--- Training electra with max_length=128 ---

===== Training electra =====


tokenizer_config.json:   0%|          | 0.00/48.0 [00:00<?, ?B/s]

vocab.txt: 0.00B [00:00, ?B/s]

tokenizer.json: 0.00B [00:00, ?B/s]

config.json:   0%|          | 0.00/665 [00:00<?, ?B/s]

pytorch_model.bin:   0%|          | 0.00/54.2M [00:00<?, ?B/s]

Some weights of ElectraForSequenceClassification were not initialized from the model checkpoint at google/electra-small-discriminator and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Map:   0%|          | 0/4947 [00:00<?, ? examples/s]

model.safetensors:   0%|          | 0.00/54.2M [00:00<?, ?B/s]

Map:   0%|          | 0/1237 [00:00<?, ? examples/s]

  super().__init__(*args, **kwargs)


Epoch,Training Loss,Validation Loss,Accuracy,F1,Precision,Recall
1,No log,0.293449,0.970897,0.969784,0.970704,0.970897
2,0.273800,0.27704,0.971706,0.970936,0.971213,0.971706


Training time for electra: 105.04 seconds


Prediction time for electra: 4.41 seconds

Classification Report for electra:
              precision    recall  f1-score   support

           0     0.9433    0.8313    0.8837       160
           1     0.9754    0.9926    0.9839      1077

    accuracy                         0.9717      1237
   macro avg     0.9593    0.9119    0.9338      1237
weighted avg     0.9712    0.9717    0.9709      1237

{'model': 'electra', 'max_length': 128, 'train_time_sec': 105.0444609375, 'pred_time_sec': 4.405046875, 'accuracy': 0.9717057396928052, 'f1': 0.9709364293477776, 'precision': 0.9712126527963085, 'recall': 0.9717057396928052}

--- Training electra with max_length=256 ---

===== Training electra =====


Some weights of ElectraForSequenceClassification were not initialized from the model checkpoint at google/electra-small-discriminator and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Map:   0%|          | 0/4947 [00:00<?, ? examples/s]

Map:   0%|          | 0/1237 [00:00<?, ? examples/s]

  super().__init__(*args, **kwargs)


Epoch,Training Loss,Validation Loss,Accuracy,F1,Precision,Recall
1,No log,0.284488,0.973323,0.972431,0.973117,0.973323
2,0.276000,0.267624,0.975748,0.975126,0.975452,0.975748


Training time for electra: 189.46 seconds


Prediction time for electra: 6.85 seconds

Classification Report for electra:
              precision    recall  f1-score   support

           0     0.9577    0.8500    0.9007       160
           1     0.9781    0.9944    0.9862      1077

    accuracy                         0.9757      1237
   macro avg     0.9679    0.9222    0.9434      1237
weighted avg     0.9755    0.9757    0.9751      1237

{'model': 'electra', 'max_length': 256, 'train_time_sec': 189.4565, 'pred_time_sec': 6.84640966796875, 'accuracy': 0.9757477768795473, 'f1': 0.9751255211456473, 'precision': 0.9754518651314984, 'recall': 0.9757477768795473}

--- Training electra with max_length=512 ---

===== Training electra =====


Some weights of ElectraForSequenceClassification were not initialized from the model checkpoint at google/electra-small-discriminator and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Map:   0%|          | 0/4947 [00:00<?, ? examples/s]

Map:   0%|          | 0/1237 [00:00<?, ? examples/s]

  super().__init__(*args, **kwargs)


Epoch,Training Loss,Validation Loss,Accuracy,F1,Precision,Recall
1,No log,0.279696,0.970089,0.969276,0.969501,0.970089
2,0.276900,0.308584,0.971706,0.97076,0.971382,0.971706


Training time for electra: 526.51 seconds


Prediction time for electra: 20.03 seconds

Classification Report for electra:
              precision    recall  f1-score   support

           0     0.9562    0.8187    0.8822       160
           1     0.9736    0.9944    0.9839      1077

    accuracy                         0.9717      1237
   macro avg     0.9649    0.9066    0.9330      1237
weighted avg     0.9714    0.9717    0.9708      1237

{'model': 'electra', 'max_length': 512, 'train_time_sec': 526.513, 'pred_time_sec': 20.027138671875, 'accuracy': 0.9717057396928052, 'f1': 0.970759635088592, 'precision': 0.9713816203446165, 'recall': 0.9717057396928052}

--- Training albert with max_length=128 ---

===== Training albert =====


tokenizer_config.json:   0%|          | 0.00/25.0 [00:00<?, ?B/s]

spiece.model:   0%|          | 0.00/760k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/1.31M [00:00<?, ?B/s]

config.json:   0%|          | 0.00/684 [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/47.4M [00:00<?, ?B/s]

Some weights of AlbertForSequenceClassification were not initialized from the model checkpoint at albert-base-v2 and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Map:   0%|          | 0/4947 [00:00<?, ? examples/s]

Map:   0%|          | 0/1237 [00:00<?, ? examples/s]

  super().__init__(*args, **kwargs)


Epoch,Training Loss,Validation Loss,Accuracy,F1,Precision,Recall
1,No log,0.541814,0.922393,0.908325,0.926186,0.922393
2,0.513400,0.444349,0.941795,0.935867,0.9417,0.941795


Training time for albert: 479.51 seconds


Prediction time for albert: 20.56 seconds

Classification Report for albert:
              precision    recall  f1-score   support

           0     0.9400    0.5875    0.7231       160
           1     0.9420    0.9944    0.9675      1077

    accuracy                         0.9418      1237
   macro avg     0.9410    0.7910    0.8453      1237
weighted avg     0.9417    0.9418    0.9359      1237

{'model': 'albert', 'max_length': 128, 'train_time_sec': 479.50625, 'pred_time_sec': 20.561255859375, 'accuracy': 0.9417946645109135, 'f1': 0.9358673544449516, 'precision': 0.9416999592596779, 'recall': 0.9417946645109135}

--- Training albert with max_length=256 ---

===== Training albert =====


Some weights of AlbertForSequenceClassification were not initialized from the model checkpoint at albert-base-v2 and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Map:   0%|          | 0/4947 [00:00<?, ? examples/s]

Map:   0%|          | 0/1237 [00:00<?, ? examples/s]

  super().__init__(*args, **kwargs)


Epoch,Training Loss,Validation Loss,Accuracy,F1,Precision,Recall
1,No log,0.349967,0.960388,0.960009,0.959757,0.960388
2,0.426800,0.449939,0.971706,0.970293,0.972313,0.971706


Training time for albert: 926.66 seconds


Prediction time for albert: 38.97 seconds

Classification Report for albert:
              precision    recall  f1-score   support

           0     0.9921    0.7875    0.8780       160
           1     0.9694    0.9991    0.9840      1077

    accuracy                         0.9717      1237
   macro avg     0.9807    0.8933    0.9310      1237
weighted avg     0.9723    0.9717    0.9703      1237

{'model': 'albert', 'max_length': 256, 'train_time_sec': 926.660875, 'pred_time_sec': 38.9680546875, 'accuracy': 0.9717057396928052, 'f1': 0.9702925345470507, 'precision': 0.9723128280445642, 'recall': 0.9717057396928052}

--- Training albert with max_length=512 ---

===== Training albert =====


Some weights of AlbertForSequenceClassification were not initialized from the model checkpoint at albert-base-v2 and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Map:   0%|          | 0/4947 [00:00<?, ? examples/s]

Map:   0%|          | 0/1237 [00:00<?, ? examples/s]

  super().__init__(*args, **kwargs)


Epoch,Training Loss,Validation Loss,Accuracy,F1,Precision,Recall
1,No log,0.374696,0.970089,0.968896,0.969901,0.970089
2,0.335500,0.366208,0.975748,0.975051,0.975539,0.975748


Training time for albert: 1895.25 seconds


Prediction time for albert: 80.36 seconds

Classification Report for albert:
              precision    recall  f1-score   support

           0     0.9643    0.8438    0.9000       160
           1     0.9772    0.9954    0.9862      1077

    accuracy                         0.9757      1237
   macro avg     0.9707    0.9196    0.9431      1237
weighted avg     0.9755    0.9757    0.9751      1237

{'model': 'albert', 'max_length': 512, 'train_time_sec': 1895.24925, 'pred_time_sec': 80.358671875, 'accuracy': 0.9757477768795473, 'f1': 0.9750509252063224, 'precision': 0.9755388058204071, 'recall': 0.9757477768795473}

--- Training xlnet with max_length=128 ---

===== Training xlnet =====


spiece.model:   0%|          | 0.00/798k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/1.38M [00:00<?, ?B/s]

config.json:   0%|          | 0.00/760 [00:00<?, ?B/s]

pytorch_model.bin:   0%|          | 0.00/467M [00:00<?, ?B/s]

Some weights of XLNetForSequenceClassification were not initialized from the model checkpoint at xlnet-base-cased and are newly initialized: ['logits_proj.bias', 'logits_proj.weight', 'sequence_summary.summary.bias', 'sequence_summary.summary.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Map:   0%|          | 0/4947 [00:00<?, ? examples/s]

model.safetensors:   0%|          | 0.00/467M [00:00<?, ?B/s]

Map:   0%|          | 0/1237 [00:00<?, ? examples/s]

  super().__init__(*args, **kwargs)


Epoch,Training Loss,Validation Loss,Accuracy,F1,Precision,Recall
1,No log,0.381294,0.974939,0.97394,0.975184,0.974939
2,0.359300,0.305158,0.977365,0.976643,0.977383,0.977365


Training time for xlnet: 596.65 seconds


Prediction time for xlnet: 19.04 seconds

Classification Report for xlnet:
              precision    recall  f1-score   support

           0     0.9783    0.8438    0.9060       160
           1     0.9773    0.9972    0.9871      1077

    accuracy                         0.9774      1237
   macro avg     0.9778    0.9205    0.9466      1237
weighted avg     0.9774    0.9774    0.9766      1237

{'model': 'xlnet', 'max_length': 128, 'train_time_sec': 596.6463125, 'pred_time_sec': 19.0395703125, 'accuracy': 0.9773645917542442, 'f1': 0.9766434818679605, 'precision': 0.9773825336212518, 'recall': 0.9773645917542442}

--- Training xlnet with max_length=256 ---

===== Training xlnet =====


Some weights of XLNetForSequenceClassification were not initialized from the model checkpoint at xlnet-base-cased and are newly initialized: ['logits_proj.bias', 'logits_proj.weight', 'sequence_summary.summary.bias', 'sequence_summary.summary.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Map:   0%|          | 0/4947 [00:00<?, ? examples/s]

Map:   0%|          | 0/1237 [00:00<?, ? examples/s]

  super().__init__(*args, **kwargs)


Epoch,Training Loss,Validation Loss,Accuracy,F1,Precision,Recall
1,No log,0.365717,0.965238,0.963009,0.966205,0.965238
2,0.348400,0.263828,0.977365,0.976986,0.977018,0.977365


Training time for xlnet: 1263.63 seconds


Prediction time for xlnet: 45.48 seconds

Classification Report for xlnet:
              precision    recall  f1-score   support

           0     0.9459    0.8750    0.9091       160
           1     0.9816    0.9926    0.9871      1077

    accuracy                         0.9774      1237
   macro avg     0.9638    0.9338    0.9481      1237
weighted avg     0.9770    0.9774    0.9770      1237

{'model': 'xlnet', 'max_length': 256, 'train_time_sec': 1263.63075, 'pred_time_sec': 45.47715234375, 'accuracy': 0.9773645917542442, 'f1': 0.9769863442048629, 'precision': 0.9770183807811498, 'recall': 0.9773645917542442}

--- Training xlnet with max_length=512 ---

===== Training xlnet =====


Some weights of XLNetForSequenceClassification were not initialized from the model checkpoint at xlnet-base-cased and are newly initialized: ['logits_proj.bias', 'logits_proj.weight', 'sequence_summary.summary.bias', 'sequence_summary.summary.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Map:   0%|          | 0/4947 [00:00<?, ? examples/s]

Map:   0%|          | 0/1237 [00:00<?, ? examples/s]

  super().__init__(*args, **kwargs)


Epoch,Training Loss,Validation Loss,Accuracy,F1,Precision,Recall
1,No log,0.461371,0.966855,0.965424,0.966523,0.966855
2,0.393300,0.369984,0.975748,0.975126,0.975452,0.975748


Training time for xlnet: 3257.27 seconds


Prediction time for xlnet: 124.90 seconds

Classification Report for xlnet:
              precision    recall  f1-score   support

           0     0.9577    0.8500    0.9007       160
           1     0.9781    0.9944    0.9862      1077

    accuracy                         0.9757      1237
   macro avg     0.9679    0.9222    0.9434      1237
weighted avg     0.9755    0.9757    0.9751      1237

{'model': 'xlnet', 'max_length': 512, 'train_time_sec': 3257.2715, 'pred_time_sec': 124.9036328125, 'accuracy': 0.9757477768795473, 'f1': 0.9751255211456473, 'precision': 0.9754518651314984, 'recall': 0.9757477768795473}

--- Training mobilebert with max_length=128 ---

===== Training mobilebert =====


config.json:   0%|          | 0.00/847 [00:00<?, ?B/s]

vocab.txt: 0.00B [00:00, ?B/s]

tokenizer.json: 0.00B [00:00, ?B/s]

pytorch_model.bin:   0%|          | 0.00/147M [00:00<?, ?B/s]

Some weights of MobileBertForSequenceClassification were not initialized from the model checkpoint at google/mobilebert-uncased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Map:   0%|          | 0/4947 [00:00<?, ? examples/s]

model.safetensors:   0%|          | 0.00/147M [00:00<?, ?B/s]

Map:   0%|          | 0/1237 [00:00<?, ? examples/s]

  super().__init__(*args, **kwargs)


Epoch,Training Loss,Validation Loss,Accuracy,F1,Precision,Recall
1,No log,0.341277,0.971706,0.971192,0.971151,0.971706
2,8210.993000,0.334536,0.973323,0.972838,0.972829,0.973323


Training time for mobilebert: 265.17 seconds


Prediction time for mobilebert: 9.23 seconds

Classification Report for mobilebert:
              precision    recall  f1-score   support

           0     0.9320    0.8562    0.8925       160
           1     0.9789    0.9907    0.9848      1077

    accuracy                         0.9733      1237
   macro avg     0.9554    0.9235    0.9386      1237
weighted avg     0.9728    0.9733    0.9728      1237

{'model': 'mobilebert', 'max_length': 128, 'train_time_sec': 265.1703125, 'pred_time_sec': 9.2333017578125, 'accuracy': 0.973322554567502, 'f1': 0.9728377426864748, 'precision': 0.9728293922272504, 'recall': 0.973322554567502}

--- Training mobilebert with max_length=256 ---

===== Training mobilebert =====


Some weights of MobileBertForSequenceClassification were not initialized from the model checkpoint at google/mobilebert-uncased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Map:   0%|          | 0/4947 [00:00<?, ? examples/s]

Map:   0%|          | 0/1237 [00:00<?, ? examples/s]

  super().__init__(*args, **kwargs)


Epoch,Training Loss,Validation Loss,Accuracy,F1,Precision,Recall
1,No log,0.419742,0.968472,0.967517,0.967837,0.968472
2,5993.056000,0.330779,0.972514,0.971809,0.97204,0.972514


Training time for mobilebert: 479.89 seconds


Prediction time for mobilebert: 16.82 seconds

Classification Report for mobilebert:
              precision    recall  f1-score   support

           0     0.9437    0.8375    0.8874       160
           1     0.9763    0.9926    0.9843      1077

    accuracy                         0.9725      1237
   macro avg     0.9600    0.9150    0.9359      1237
weighted avg     0.9720    0.9725    0.9718      1237

{'model': 'mobilebert', 'max_length': 256, 'train_time_sec': 479.89309375, 'pred_time_sec': 16.823578125, 'accuracy': 0.9725141471301536, 'f1': 0.9718089239650671, 'precision': 0.9720398647964685, 'recall': 0.9725141471301536}

--- Training mobilebert with max_length=512 ---

===== Training mobilebert =====


Some weights of MobileBertForSequenceClassification were not initialized from the model checkpoint at google/mobilebert-uncased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Map:   0%|          | 0/4947 [00:00<?, ? examples/s]

Map:   0%|          | 0/1237 [00:00<?, ? examples/s]

  super().__init__(*args, **kwargs)


Epoch,Training Loss,Validation Loss,Accuracy,F1,Precision,Recall
1,No log,538.605713,0.909458,0.889746,0.911141,0.909458
2,9682.771000,0.563883,0.96443,0.963297,0.963546,0.96443


Training time for mobilebert: 1261.53 seconds


Prediction time for mobilebert: 47.25 seconds

Classification Report for mobilebert:
              precision    recall  f1-score   support

           0     0.9203    0.7937    0.8523       160
           1     0.9700    0.9898    0.9798      1077

    accuracy                         0.9644      1237
   macro avg     0.9451    0.8918    0.9161      1237
weighted avg     0.9635    0.9644    0.9633      1237

{'model': 'mobilebert', 'max_length': 512, 'train_time_sec': 1261.532875, 'pred_time_sec': 47.2532421875, 'accuracy': 0.9644300727566694, 'f1': 0.9632969000782238, 'precision': 0.9635464651233187, 'recall': 0.9644300727566694}

--- Training albert-base-v1 with max_length=128 ---

===== Training albert-base-v1 =====


tokenizer_config.json:   0%|          | 0.00/25.0 [00:00<?, ?B/s]

spiece.model:   0%|          | 0.00/760k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/1.31M [00:00<?, ?B/s]

config.json:   0%|          | 0.00/684 [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/47.4M [00:00<?, ?B/s]

Some weights of AlbertForSequenceClassification were not initialized from the model checkpoint at albert-base-v1 and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Map:   0%|          | 0/4947 [00:00<?, ? examples/s]

Map:   0%|          | 0/1237 [00:00<?, ? examples/s]

  super().__init__(*args, **kwargs)


Epoch,Training Loss,Validation Loss,Accuracy,F1,Precision,Recall
1,No log,0.424813,0.966047,0.965176,0.965216,0.966047
2,0.317000,0.40129,0.967664,0.967028,0.966936,0.967664


Training time for albert-base-v1: 328.85 seconds


Prediction time for albert-base-v1: 13.97 seconds

Classification Report for albert-base-v1:
              precision    recall  f1-score   support

           0     0.9110    0.8313    0.8693       160
           1     0.9753    0.9879    0.9815      1077

    accuracy                         0.9677      1237
   macro avg     0.9431    0.9096    0.9254      1237
weighted avg     0.9669    0.9677    0.9670      1237

{'model': 'albert-base-v1', 'max_length': 128, 'train_time_sec': 328.8515, 'pred_time_sec': 13.9677998046875, 'accuracy': 0.967663702506063, 'f1': 0.9670283901469479, 'precision': 0.96693605156422, 'recall': 0.967663702506063}

--- Training albert-base-v1 with max_length=256 ---

===== Training albert-base-v1 =====


Some weights of AlbertForSequenceClassification were not initialized from the model checkpoint at albert-base-v1 and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Map:   0%|          | 0/4947 [00:00<?, ? examples/s]

Map:   0%|          | 0/1237 [00:00<?, ? examples/s]

  super().__init__(*args, **kwargs)


Epoch,Training Loss,Validation Loss,Accuracy,F1,Precision,Recall
1,No log,0.423654,0.965238,0.964504,0.96439,0.965238
2,0.325900,0.359128,0.975748,0.974975,0.975653,0.975748


Training time for albert-base-v1: 625.30 seconds


Prediction time for albert-base-v1: 24.91 seconds

Classification Report for albert-base-v1:
              precision    recall  f1-score   support

           0     0.9710    0.8375    0.8993       160
           1     0.9763    0.9963    0.9862      1077

    accuracy                         0.9757      1237
   macro avg     0.9737    0.9169    0.9428      1237
weighted avg     0.9757    0.9757    0.9750      1237

{'model': 'albert-base-v1', 'max_length': 256, 'train_time_sec': 625.3001875, 'pred_time_sec': 24.90740625, 'accuracy': 0.9757477768795473, 'f1': 0.9749751591442434, 'precision': 0.9756530250590102, 'recall': 0.9757477768795473}

--- Training albert-base-v1 with max_length=512 ---

===== Training albert-base-v1 =====


Some weights of AlbertForSequenceClassification were not initialized from the model checkpoint at albert-base-v1 and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Map:   0%|          | 0/4947 [00:00<?, ? examples/s]

Map:   0%|          | 0/1237 [00:00<?, ? examples/s]

  super().__init__(*args, **kwargs)


Epoch,Training Loss,Validation Loss,Accuracy,F1,Precision,Recall
1,No log,0.396304,0.968472,0.967418,0.96791,0.968472
2,0.308900,0.278369,0.977365,0.97692,0.97704,0.977365


Training time for albert-base-v1: 1308.87 seconds


Prediction time for albert-base-v1: 52.46 seconds

Classification Report for albert-base-v1:
              precision    recall  f1-score   support

           0     0.9521    0.8688    0.9085       160
           1     0.9808    0.9935    0.9871      1077

    accuracy                         0.9774      1237
   macro avg     0.9664    0.9311    0.9478      1237
weighted avg     0.9770    0.9774    0.9769      1237

{'model': 'albert-base-v1', 'max_length': 512, 'train_time_sec': 1308.87075, 'pred_time_sec': 52.46413671875, 'accuracy': 0.9773645917542442, 'f1': 0.9769198731028634, 'precision': 0.9770398097549116, 'recall': 0.9773645917542442}

--- Training albert-large-v2 with max_length=128 ---

===== Training albert-large-v2 =====


tokenizer_config.json:   0%|          | 0.00/25.0 [00:00<?, ?B/s]

spiece.model:   0%|          | 0.00/760k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/1.31M [00:00<?, ?B/s]

config.json:   0%|          | 0.00/685 [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/71.5M [00:00<?, ?B/s]

Some weights of AlbertForSequenceClassification were not initialized from the model checkpoint at albert-large-v2 and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Map:   0%|          | 0/4947 [00:00<?, ? examples/s]

Map:   0%|          | 0/1237 [00:00<?, ? examples/s]

  super().__init__(*args, **kwargs)


Epoch,Training Loss,Validation Loss,Accuracy,F1,Precision,Recall
1,No log,0.634853,0.902991,0.879941,0.902714,0.902991
2,0.711700,0.433383,0.932094,0.929496,0.928593,0.932094


Training time for albert-large-v2: 1408.74 seconds


Prediction time for albert-large-v2: 59.23 seconds

Classification Report for albert-large-v2:
              precision    recall  f1-score   support

           0     0.7836    0.6562    0.7143       160
           1     0.9501    0.9731    0.9615      1077

    accuracy                         0.9321      1237
   macro avg     0.8669    0.8147    0.8379      1237
weighted avg     0.9286    0.9321    0.9295      1237

{'model': 'albert-large-v2', 'max_length': 128, 'train_time_sec': 1408.73625, 'pred_time_sec': 59.23441015625, 'accuracy': 0.9320937752627324, 'f1': 0.9294960644437404, 'precision': 0.9285930464971179, 'recall': 0.9320937752627324}

--- Training albert-large-v2 with max_length=256 ---

===== Training albert-large-v2 =====


Some weights of AlbertForSequenceClassification were not initialized from the model checkpoint at albert-large-v2 and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Map:   0%|          | 0/4947 [00:00<?, ? examples/s]

Map:   0%|          | 0/1237 [00:00<?, ? examples/s]

  super().__init__(*args, **kwargs)


Epoch,Training Loss,Validation Loss,Accuracy,F1,Precision,Recall
1,No log,0.690005,0.870655,0.810454,0.75804,0.870655
2,0.716700,0.689622,0.870655,0.810454,0.75804,0.870655


  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


Training time for albert-large-v2: 2764.15 seconds


  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


Prediction time for albert-large-v2: 117.35 seconds

Classification Report for albert-large-v2:
              precision    recall  f1-score   support

           0     0.0000    0.0000    0.0000       160
           1     0.8707    1.0000    0.9309      1077

    accuracy                         0.8707      1237
   macro avg     0.4353    0.5000    0.4654      1237
weighted avg     0.7580    0.8707    0.8105      1237

{'model': 'albert-large-v2', 'max_length': 256, 'train_time_sec': 2764.15225, 'pred_time_sec': 117.3475859375, 'accuracy': 0.8706548100242523, 'f1': 0.8104539588557645, 'precision': 0.7580397982183668, 'recall': 0.8706548100242523}

--- Training albert-large-v2 with max_length=512 ---

===== Training albert-large-v2 =====


Some weights of AlbertForSequenceClassification were not initialized from the model checkpoint at albert-large-v2 and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Map:   0%|          | 0/4947 [00:00<?, ? examples/s]

Map:   0%|          | 0/1237 [00:00<?, ? examples/s]

  super().__init__(*args, **kwargs)


Epoch,Training Loss,Validation Loss,Accuracy,F1,Precision,Recall
1,No log,0.461458,0.936944,0.932068,0.93392,0.936944
2,0.586600,0.429393,0.955538,0.953468,0.954179,0.955538


Training time for albert-large-v2: 5588.21 seconds


Prediction time for albert-large-v2: 238.23 seconds

Classification Report for albert-large-v2:
              precision    recall  f1-score   support

           0     0.9070    0.7312    0.8097       160
           1     0.9612    0.9889    0.9748      1077

    accuracy                         0.9555      1237
   macro avg     0.9341    0.8601    0.8923      1237
weighted avg     0.9542    0.9555    0.9535      1237

{'model': 'albert-large-v2', 'max_length': 512, 'train_time_sec': 5588.2065, 'pred_time_sec': 238.229984375, 'accuracy': 0.9555375909458367, 'f1': 0.9534683372622854, 'precision': 0.9541789390960591, 'recall': 0.9555375909458367}

--- Training albert-xlarge-v2 with max_length=128 ---

===== Training albert-xlarge-v2 =====


tokenizer_config.json:   0%|          | 0.00/25.0 [00:00<?, ?B/s]

spiece.model:   0%|          | 0.00/760k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/1.31M [00:00<?, ?B/s]

config.json:   0%|          | 0.00/685 [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/236M [00:00<?, ?B/s]

Some weights of AlbertForSequenceClassification were not initialized from the model checkpoint at albert-xlarge-v2 and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Map:   0%|          | 0/4947 [00:00<?, ? examples/s]

Map:   0%|          | 0/1237 [00:00<?, ? examples/s]

  super().__init__(*args, **kwargs)


Epoch,Training Loss,Validation Loss,Accuracy,F1,Precision,Recall
1,No log,0.690866,0.870655,0.810454,0.75804,0.870655
2,0.715200,0.689503,0.870655,0.810454,0.75804,0.870655


  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


Training time for albert-xlarge-v2: 4883.73 seconds


  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


Prediction time for albert-xlarge-v2: 252.47 seconds

Classification Report for albert-xlarge-v2:
              precision    recall  f1-score   support

           0     0.0000    0.0000    0.0000       160
           1     0.8707    1.0000    0.9309      1077

    accuracy                         0.8707      1237
   macro avg     0.4353    0.5000    0.4654      1237
weighted avg     0.7580    0.8707    0.8105      1237

{'model': 'albert-xlarge-v2', 'max_length': 128, 'train_time_sec': 4883.7335, 'pred_time_sec': 252.47365625, 'accuracy': 0.8706548100242523, 'f1': 0.8104539588557645, 'precision': 0.7580397982183668, 'recall': 0.8706548100242523}

--- Training albert-xlarge-v2 with max_length=256 ---

===== Training albert-xlarge-v2 =====


Some weights of AlbertForSequenceClassification were not initialized from the model checkpoint at albert-xlarge-v2 and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Map:   0%|          | 0/4947 [00:00<?, ? examples/s]

Map:   0%|          | 0/1237 [00:00<?, ? examples/s]

  super().__init__(*args, **kwargs)


Epoch,Training Loss,Validation Loss,Accuracy,F1,Precision,Recall
1,No log,0.694994,0.129345,0.029628,0.01673,0.129345
2,0.738300,0.689611,0.870655,0.810454,0.75804,0.870655


  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


Training time for albert-xlarge-v2: 9559.82 seconds


  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


Prediction time for albert-xlarge-v2: 501.68 seconds

Classification Report for albert-xlarge-v2:
              precision    recall  f1-score   support

           0     0.0000    0.0000    0.0000       160
           1     0.8707    1.0000    0.9309      1077

    accuracy                         0.8707      1237
   macro avg     0.4353    0.5000    0.4654      1237
weighted avg     0.7580    0.8707    0.8105      1237

{'model': 'albert-xlarge-v2', 'max_length': 256, 'train_time_sec': 9559.815, 'pred_time_sec': 501.6791875, 'accuracy': 0.8706548100242523, 'f1': 0.8104539588557645, 'precision': 0.7580397982183668, 'recall': 0.8706548100242523}

--- Training albert-xlarge-v2 with max_length=512 ---

===== Training albert-xlarge-v2 =====


Some weights of AlbertForSequenceClassification were not initialized from the model checkpoint at albert-xlarge-v2 and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Map:   0%|          | 0/4947 [00:00<?, ? examples/s]

Map:   0%|          | 0/1237 [00:00<?, ? examples/s]

  super().__init__(*args, **kwargs)


Epoch,Training Loss,Validation Loss,Accuracy,F1,Precision,Recall
1,No log,0.681307,0.870655,0.810454,0.75804,0.870655
2,0.724600,0.68136,0.870655,0.810454,0.75804,0.870655


  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


Training time for albert-xlarge-v2: 16844.53 seconds


  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


Prediction time for albert-xlarge-v2: 788.57 seconds

Classification Report for albert-xlarge-v2:
              precision    recall  f1-score   support

           0     0.0000    0.0000    0.0000       160
           1     0.8707    1.0000    0.9309      1077

    accuracy                         0.8707      1237
   macro avg     0.4353    0.5000    0.4654      1237
weighted avg     0.7580    0.8707    0.8105      1237

{'model': 'albert-xlarge-v2', 'max_length': 512, 'train_time_sec': 16844.526, 'pred_time_sec': 788.5704375, 'accuracy': 0.8706548100242523, 'f1': 0.8104539588557645, 'precision': 0.7580397982183668, 'recall': 0.8706548100242523}

--- Training albert-xxlarge-v2 with max_length=128 ---

===== Training albert-xxlarge-v2 =====


tokenizer_config.json:   0%|          | 0.00/25.0 [00:00<?, ?B/s]

spiece.model:   0%|          | 0.00/760k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/1.31M [00:00<?, ?B/s]

config.json:   0%|          | 0.00/710 [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/893M [00:00<?, ?B/s]

Some weights of AlbertForSequenceClassification were not initialized from the model checkpoint at albert-xxlarge-v2 and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Map:   0%|          | 0/4947 [00:00<?, ? examples/s]

Map:   0%|          | 0/1237 [00:00<?, ? examples/s]

  super().__init__(*args, **kwargs)


Epoch,Training Loss,Validation Loss,Accuracy,F1,Precision,Recall
1,No log,0.344984,0.957154,0.954866,0.956237,0.957154
2,0.499800,0.448747,0.971706,0.97076,0.971382,0.971706


Training time for albert-xxlarge-v2: 8921.32 seconds


Prediction time for albert-xxlarge-v2: 491.27 seconds

Classification Report for albert-xxlarge-v2:
              precision    recall  f1-score   support

           0     0.9562    0.8187    0.8822       160
           1     0.9736    0.9944    0.9839      1077

    accuracy                         0.9717      1237
   macro avg     0.9649    0.9066    0.9330      1237
weighted avg     0.9714    0.9717    0.9708      1237

{'model': 'albert-xxlarge-v2', 'max_length': 128, 'train_time_sec': 8921.315, 'pred_time_sec': 491.27440625, 'accuracy': 0.9717057396928052, 'f1': 0.970759635088592, 'precision': 0.9713816203446165, 'recall': 0.9717057396928052}

--- Training albert-xxlarge-v2 with max_length=256 ---

===== Training albert-xxlarge-v2 =====


Some weights of AlbertForSequenceClassification were not initialized from the model checkpoint at albert-xxlarge-v2 and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Map:   0%|          | 0/4947 [00:00<?, ? examples/s]

Map:   0%|          | 0/1237 [00:00<?, ? examples/s]

  super().__init__(*args, **kwargs)


Epoch,Training Loss,Validation Loss,Accuracy,F1,Precision,Recall
1,No log,0.691153,0.870655,0.810454,0.75804,0.870655
2,0.705900,0.689785,0.870655,0.810454,0.75804,0.870655


  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


Training time for albert-xxlarge-v2: 15023.97 seconds


  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


Prediction time for albert-xxlarge-v2: 764.08 seconds

Classification Report for albert-xxlarge-v2:
              precision    recall  f1-score   support

           0     0.0000    0.0000    0.0000       160
           1     0.8707    1.0000    0.9309      1077

    accuracy                         0.8707      1237
   macro avg     0.4353    0.5000    0.4654      1237
weighted avg     0.7580    0.8707    0.8105      1237

{'model': 'albert-xxlarge-v2', 'max_length': 256, 'train_time_sec': 15023.971, 'pred_time_sec': 764.084875, 'accuracy': 0.8706548100242523, 'f1': 0.8104539588557645, 'precision': 0.7580397982183668, 'recall': 0.8706548100242523}

--- Training albert-xxlarge-v2 with max_length=512 ---

===== Training albert-xxlarge-v2 =====


Some weights of AlbertForSequenceClassification were not initialized from the model checkpoint at albert-xxlarge-v2 and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Map:   0%|          | 0/4947 [00:00<?, ? examples/s]

Map:   0%|          | 0/1237 [00:00<?, ? examples/s]

  super().__init__(*args, **kwargs)


Epoch,Training Loss,Validation Loss,Accuracy,F1,Precision,Recall
1,No log,0.435945,0.92401,0.92421,0.92442,0.92401
2,0.578000,0.414999,0.870655,0.88433,0.916262,0.870655


Training time for albert-xxlarge-v2: 9261.66 seconds


Prediction time for albert-xxlarge-v2: 527.08 seconds

Classification Report for albert-xxlarge-v2:
              precision    recall  f1-score   support

           0     0.7037    0.7125    0.7081       160
           1     0.9572    0.9554    0.9563      1077

    accuracy                         0.9240      1237
   macro avg     0.8305    0.8340    0.8322      1237
weighted avg     0.9244    0.9240    0.9242      1237

{'model': 'albert-xxlarge-v2', 'max_length': 512, 'train_time_sec': 9261.659, 'pred_time_sec': 527.0794375, 'accuracy': 0.9240097008892482, 'f1': 0.9242103841297016, 'precision': 0.9244195725119191, 'recall': 0.9240097008892482}

--- Training bert-large-uncased with max_length=128 ---

===== Training bert-large-uncased =====


tokenizer_config.json:   0%|          | 0.00/48.0 [00:00<?, ?B/s]

vocab.txt:   0%|          | 0.00/232k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/466k [00:00<?, ?B/s]

config.json:   0%|          | 0.00/571 [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/1.34G [00:00<?, ?B/s]

Some weights of BertForSequenceClassification were not initialized from the model checkpoint at bert-large-uncased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Map:   0%|          | 0/4947 [00:00<?, ? examples/s]

Map:   0%|          | 0/1237 [00:00<?, ? examples/s]

  super().__init__(*args, **kwargs)


Epoch,Training Loss,Validation Loss,Accuracy,F1,Precision,Recall
1,No log,0.207506,0.967664,0.968158,0.969003,0.967664
2,0.358200,0.247776,0.967664,0.967749,0.967845,0.967664


Training time for bert-large-uncased: 375.89 seconds


Prediction time for bert-large-uncased: 11.77 seconds

Classification Report for bert-large-uncased:
              precision    recall  f1-score   support

           0     0.8488    0.9125    0.8795       160
           1     0.9869    0.9759    0.9813      1077

    accuracy                         0.9677      1237
   macro avg     0.9178    0.9442    0.9304      1237
weighted avg     0.9690    0.9677    0.9682      1237

{'model': 'bert-large-uncased', 'max_length': 128, 'train_time_sec': 375.89234375, 'pred_time_sec': 11.772083984375, 'accuracy': 0.967663702506063, 'f1': 0.9681575155604993, 'precision': 0.9690025925703306, 'recall': 0.967663702506063}

--- Training bert-large-uncased with max_length=256 ---

===== Training bert-large-uncased =====


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at bert-large-uncased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Map:   0%|          | 0/4947 [00:00<?, ? examples/s]

Map:   0%|          | 0/1237 [00:00<?, ? examples/s]

  super().__init__(*args, **kwargs)


Epoch,Training Loss,Validation Loss,Accuracy,F1,Precision,Recall
1,No log,0.288586,0.977365,0.976714,0.977256,0.977365
2,0.300000,0.241077,0.97979,0.979539,0.979518,0.97979


Training time for bert-large-uncased: 649.37 seconds


Prediction time for bert-large-uncased: 23.58 seconds

Classification Report for bert-large-uncased:
              precision    recall  f1-score   support

           0     0.9470    0.8938    0.9196       160
           1     0.9843    0.9926    0.9884      1077

    accuracy                         0.9798      1237
   macro avg     0.9657    0.9432    0.9540      1237
weighted avg     0.9795    0.9798    0.9795      1237

{'model': 'bert-large-uncased', 'max_length': 256, 'train_time_sec': 649.3740625, 'pred_time_sec': 23.580330078125, 'accuracy': 0.9797898140662894, 'f1': 0.979539429879786, 'precision': 0.9795182399238865, 'recall': 0.9797898140662894}

--- Training bert-large-uncased with max_length=512 ---

===== Training bert-large-uncased =====


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at bert-large-uncased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Map:   0%|          | 0/4947 [00:00<?, ? examples/s]

Map:   0%|          | 0/1237 [00:00<?, ? examples/s]

  super().__init__(*args, **kwargs)


Epoch,Training Loss,Validation Loss,Accuracy,F1,Precision,Recall
1,No log,0.27876,0.971706,0.971743,0.971783,0.971706
2,0.279400,0.205149,0.97979,0.979763,0.979739,0.97979


Training time for bert-large-uncased: 1213.27 seconds


Prediction time for bert-large-uncased: 47.68 seconds

Classification Report for bert-large-uncased:
              precision    recall  f1-score   support

           0     0.9245    0.9187    0.9216       160
           1     0.9879    0.9889    0.9884      1077

    accuracy                         0.9798      1237
   macro avg     0.9562    0.9538    0.9550      1237
weighted avg     0.9797    0.9798    0.9798      1237

{'model': 'bert-large-uncased', 'max_length': 512, 'train_time_sec': 1213.265, 'pred_time_sec': 47.68050390625, 'accuracy': 0.9797898140662894, 'f1': 0.9797628257971375, 'precision': 0.9797385510679781, 'recall': 0.9797898140662894}
