<a href="https://colab.research.google.com/github/bogus1aw/text-classification-benchmark/blob/main/M_herBERT_PolEmo2_0_raw.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# HerBERT benchmark for PolEmo2.0 raw dataset 

In [None]:
# check available GPU
!nvidia-smi --query-gpu=gpu_name,driver_version,memory.total --format=csv

name, driver_version, memory.total [MiB]
Tesla T4, 460.32.03, 15109 MiB


In [None]:
%%capture
!pip install datasets transformers

In [None]:
from torch import cuda
device = 'cuda' if cuda.is_available() else 'cpu'
cuda.is_available()

True

In [None]:
from sklearn.model_selection import train_test_split
import pandas as pd
import datetime
import time
import math
import shutil

In [None]:
def load_corpora_to_dataframe(corpora):
  data = open(corpora).read()
  labels, texts = [], []
  for i, line in enumerate(data.split("\n")):
      content = line.split()
      if len(content) > 0: 
        labels.append(content[-1])
        texts.append(" ".join(content[:-1]))
  # return texts, labels
  # create a dataframe using texts and labels
  trainDF = pd.DataFrame()
  trainDF['texts'] = texts
  trainDF['labels'] = labels
  return trainDF


# 1. load corpora
# 2. create training, test fractions
# 3. create specific No. per class fractions
# 4. create train, val sets
# 5. create new fresh model (tokenizer can stay the same)
# 6. tokenize and encode train, test 


In [None]:
import torch
# from transformers import AutoTokenizer, RobertaForSequenceClassification, EvalPrediction
from transformers import HerbertTokenizer, RobertaForSequenceClassification, EvalPrediction


# tokenizer = AutoTokenizer.from_pretrained("allegro/herbert-base-cased")
# model = RobertaForSequenceClassification.from_pretrained("allegro/herbert-base-cased", num_labels=4)
tokenizer = HerbertTokenizer.from_pretrained("allegro/herbert-klej-cased-tokenizer-v1")
model = RobertaForSequenceClassification.from_pretrained("allegro/herbert-klej-cased-v1", num_labels=4)

HBox(children=(FloatProgress(value=0.0, description='Downloading', max=1037897.0, style=ProgressStyle(descriptâ€¦




HBox(children=(FloatProgress(value=0.0, description='Downloading', max=590648.0, style=ProgressStyle(descriptiâ€¦




HBox(children=(FloatProgress(value=0.0, description='Downloading', max=300.0, style=ProgressStyle(description_â€¦




HBox(children=(FloatProgress(value=0.0, description='Downloading', max=341.0, style=ProgressStyle(description_â€¦




HBox(children=(FloatProgress(value=0.0, description='Downloading', max=547.0, style=ProgressStyle(description_â€¦




HBox(children=(FloatProgress(value=0.0, description='Downloading', max=499534190.0, style=ProgressStyle(descriâ€¦




Some weights of the model checkpoint at allegro/herbert-klej-cased-v1 were not used when initializing RobertaForSequenceClassification: ['pooler.dense.weight', 'pooler.dense.bias']
- This IS expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of RobertaForSequenceClassification were not initialized from the model checkpoint at allegro/herbert-klej-cased-v1 and are newly initialized: ['classifier.dense.weight', 'classifier.out_proj.weight', 'classifier.out_proj.bias', 'classifier.dense.bias']
You should probably TRAIN this model on a down-stream

In [None]:
from sklearn import preprocessing
encoder = preprocessing.LabelEncoder()

def build_databases(train_data, dev_data, test_data):

  max_length = 200
  train_encodings = tokenizer(train_data['texts'].to_list(), truncation=True, padding=True, max_length=max_length)
  val_encodings = tokenizer(dev_data['texts'].to_list(), truncation=True, padding=True, max_length=max_length)
  test_encodings = tokenizer(test_data['texts'].to_list(), truncation=True, padding=True, max_length=max_length)

  # encode labels
  train_labels = encoder.fit_transform(train_data['labels'].to_list())
  val_labels = encoder.fit_transform(dev_data['labels'].to_list())
  test_labels = encoder.fit_transform(test_data['labels'].to_list())

  # build pyTorch dataset
  import torch

  class wikiDataset(torch.utils.data.Dataset):
      def __init__(self, encodings, labels):
          self.encodings = encodings
          self.labels = labels

      def __getitem__(self, idx):
          item = {key: torch.tensor(val[idx]) for key, val in self.encodings.items()}
          item['labels'] = torch.tensor(self.labels[idx])
          return item

      def __len__(self):
          return len(self.labels)

  train_dataset = wikiDataset(train_encodings, train_labels)
  val_dataset = wikiDataset(val_encodings, val_labels)
  test_dataset = wikiDataset(test_encodings, test_labels)
  return train_dataset, val_dataset, test_dataset


In [None]:
from transformers import Trainer, TrainingArguments

training_args = TrainingArguments(
    output_dir='./results',          
    evaluation_strategy = "epoch",
    num_train_epochs=4,              
    per_device_train_batch_size=16,  
    per_device_eval_batch_size=64,   
    warmup_steps=100,                
    weight_decay=0.01,               
    logging_dir='./logs',            
    logging_steps=10,
    load_best_model_at_end=True,
    metric_for_best_model="accuracy"
)

from datasets import load_metric
import numpy as np
metric = load_metric('accuracy')

def compute_metrics(eval_pred):
    predictions, labels = eval_pred
    predictions = np.argmax(predictions, axis=1)
    # print(predictions[:10])
    # print(labels[:10])
    return metric.compute(predictions=predictions, references=labels)

def get_trainer(model, train_dataset, val_dataset):
  trainer = Trainer(
      model=model,                         # the instantiated ðŸ¤— Transformers model to be trained
      args=training_args,                  # training arguments, defined above
      train_dataset=train_dataset,         # training dataset
      eval_dataset=val_dataset,             # evaluation dataset
      tokenizer=tokenizer,
      compute_metrics=compute_metrics
  )
  return trainer


HBox(children=(FloatProgress(value=0.0, description='Downloading', max=1362.0, style=ProgressStyle(descriptionâ€¦




In [None]:
def write_to_logs(values):
  with open(metrice_path, 'a') as f:
    f.write(values)

In [None]:
 timestamp = datetime.datetime.now().replace(microsecond=0).isoformat().replace(':', '-')
metrice_path = '/content/drive/MyDrive/metrics/hebert_PolEmo2.0_raw2.0' + timestamp + '.txt'
fig_path = '/content/drive/MyDrive/figures/'
dataset_path = '/content/drive/MyDrive/master_datasets/dataset_conll/'


no_samples_per_class = [1, 3, 5, 8, 10, 20, 30, 60, 100, 200] ###### FINALL SAMPLES LIST  
repetitions = 3

domains = [
           ('all', 'MDT-A'),
           ('hotels', 'SDT-H'),
           ('medicine', 'SDT-M'),
           ('products', 'SDT-P'),
           ('reviews', 'SDT-R')
           ]


In [None]:
def benchmark(train, dev, test, n_sample, domain):
  accuraccy, train_time, eval_time = [], [], []
  
  # print('%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%')
  # print('training for n_sample = ', n_sample)
  # print('%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%')
  # model = RobertaForSequenceClassification.from_pretrained("allegro/herbert-base-cased", num_labels=4) ######## Change num of labels !!!!!
  model = RobertaForSequenceClassification.from_pretrained("allegro/herbert-klej-cased-v1", num_labels=4) ######## Change num of labels !!!!!
  train_fraction = train.groupby(['labels']).sample(n=max(math.floor(n_sample * 0.9), 1), replace=True)
  dev_fraction = dev.groupby(['labels']).sample(n=max(math.floor(n_sample*0.1), 1), replace=True)
  train_dataset, val_dataset, test_dataset = build_databases(train_fraction, dev_fraction, test)
  trainer = get_trainer(model=model, train_dataset=train_dataset, val_dataset=val_dataset)
  
  train_time_start = time.time()
  trainer.train()
  train_elapsed_time = time.time() - train_time_start
  train_time.append(train_elapsed_time)
  # trainer.evaluate()
  metrics = trainer.predict(test_dataset)
  accuraccy.append(metrics.metrics['eval_accuracy']) 
  eval_time.append(metrics.metrics['eval_runtime']) 
  to_save = f'domiain {domain} n_samples_per_class={n_sample}, repeat={repeat}, time_elapsed={train_elapsed_time}, {metrics.metrics}\n' 
  print(to_save)
  write_to_logs(to_save)
  shutil.rmtree('./results') # deleate checkpoints files

  return accuraccy, train_time, eval_time

In [None]:
print(metrice_path)

results_a = pd.DataFrame()
results_t = pd.DataFrame()
results_e = pd.DataFrame()

for repeat in range(repetitions):
  for domain, ix_name in domains:
    write_to_logs('%%%%%%%%%%%%%%%% domain: ' + domain)
    CORPORA_TRAIN = dataset_path + domain + '.text.train.txt'
    CORPORA_DEV = dataset_path + domain + '.text.dev.txt'
    CORPORA_TEST = dataset_path + domain + '.text.test.txt'

    train = load_corpora_to_dataframe(CORPORA_TRAIN)
    dev = load_corpora_to_dataframe(CORPORA_DEV)
    test = load_corpora_to_dataframe(CORPORA_TEST)

    accu_list, train_t, eval_t = [], [], []

    for n_sample in no_samples_per_class:
      accuraccy, train_time, eval_time = benchmark(train, dev, test, n_sample, domain)
      accu_list.append(np.mean(accuraccy))
      train_t.append(np.mean(train_time))
      eval_t.append(np.mean(eval_time))
    
#     results_a.append(pd.DataFrame(accu_list, index=no_samples_per_class, columns=[ix_name + '_R_']).T)
#     results_t.append(pd.DataFrame(train_t, index=no_samples_per_class, columns=[ix_name + '_R_']).T)
#     results_e.append(pd.DataFrame(eval_t, index=no_samples_per_class, columns=[ix_name + '_R_']).T)
  
# display(results_a)
# display(results_t)
# display(results_e)

 

/content/drive/MyDrive/metrics/hebert_PolEmo2.0_raw2.02021-02-22T01-43-26.txt


Some weights of the model checkpoint at allegro/herbert-klej-cased-v1 were not used when initializing RobertaForSequenceClassification: ['pooler.dense.weight', 'pooler.dense.bias']
- This IS expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of RobertaForSequenceClassification were not initialized from the model checkpoint at allegro/herbert-klej-cased-v1 and are newly initialized: ['classifier.dense.weight', 'classifier.out_proj.weight', 'classifier.out_proj.bias', 'classifier.dense.bias']
You should probably TRAIN this model on a down-stream

Epoch,Training Loss,Validation Loss,Accuracy,Runtime,Samples Per Second
1,No log,1.358141,0.5,0.0591,67.669
2,No log,1.358606,0.5,0.045,88.885
3,No log,1.35962,0.5,0.046,86.979
4,No log,1.361152,0.5,0.0495,80.869


domiain all n_samples_per_class=1, repeat=0, time_elapsed=116.78080677986145, {'eval_loss': 1.388495922088623, 'eval_accuracy': 0.21097560975609755, 'eval_runtime': 9.7251, 'eval_samples_per_second': 84.318}



Some weights of the model checkpoint at allegro/herbert-klej-cased-v1 were not used when initializing RobertaForSequenceClassification: ['pooler.dense.weight', 'pooler.dense.bias']
- This IS expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of RobertaForSequenceClassification were not initialized from the model checkpoint at allegro/herbert-klej-cased-v1 and are newly initialized: ['classifier.dense.weight', 'classifier.out_proj.weight', 'classifier.out_proj.bias', 'classifier.dense.bias']
You should probably TRAIN this model on a down-stream

Epoch,Training Loss,Validation Loss,Accuracy,Runtime,Samples Per Second
1,No log,1.461183,0.0,0.0437,91.522
2,No log,1.46009,0.0,0.0485,82.44
3,No log,1.457978,0.0,0.0472,84.761
4,No log,1.454741,0.0,0.0473,84.486


domiain all n_samples_per_class=3, repeat=0, time_elapsed=120.78051519393921, {'eval_loss': 1.3671376705169678, 'eval_accuracy': 0.2926829268292683, 'eval_runtime': 10.458, 'eval_samples_per_second': 78.409}



Some weights of the model checkpoint at allegro/herbert-klej-cased-v1 were not used when initializing RobertaForSequenceClassification: ['pooler.dense.weight', 'pooler.dense.bias']
- This IS expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of RobertaForSequenceClassification were not initialized from the model checkpoint at allegro/herbert-klej-cased-v1 and are newly initialized: ['classifier.dense.weight', 'classifier.out_proj.weight', 'classifier.out_proj.bias', 'classifier.dense.bias']
You should probably TRAIN this model on a down-stream

Epoch,Training Loss,Validation Loss,Accuracy,Runtime,Samples Per Second
1,No log,1.439666,0.25,0.0345,115.802
2,No log,1.43957,0.0,0.0389,102.921
3,No log,1.439163,0.0,0.0362,110.559
4,No log,1.438475,0.0,0.0378,105.878


domiain all n_samples_per_class=5, repeat=0, time_elapsed=128.03509736061096, {'eval_loss': 1.4087063074111938, 'eval_accuracy': 0.1975609756097561, 'eval_runtime': 10.7566, 'eval_samples_per_second': 76.232}



Some weights of the model checkpoint at allegro/herbert-klej-cased-v1 were not used when initializing RobertaForSequenceClassification: ['pooler.dense.weight', 'pooler.dense.bias']
- This IS expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of RobertaForSequenceClassification were not initialized from the model checkpoint at allegro/herbert-klej-cased-v1 and are newly initialized: ['classifier.dense.weight', 'classifier.out_proj.weight', 'classifier.out_proj.bias', 'classifier.dense.bias']
You should probably TRAIN this model on a down-stream

Epoch,Training Loss,Validation Loss,Accuracy,Runtime,Samples Per Second
1,No log,1.439109,0.5,0.0427,93.608
2,No log,1.432603,0.5,0.0436,91.798
3,No log,1.421985,0.5,0.0427,93.723
4,No log,1.408566,0.5,0.0431,92.769


domiain all n_samples_per_class=8, repeat=0, time_elapsed=131.74066185951233, {'eval_loss': 1.3872382640838623, 'eval_accuracy': 0.23414634146341465, 'eval_runtime': 11.2522, 'eval_samples_per_second': 72.875}



Some weights of the model checkpoint at allegro/herbert-klej-cased-v1 were not used when initializing RobertaForSequenceClassification: ['pooler.dense.weight', 'pooler.dense.bias']
- This IS expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of RobertaForSequenceClassification were not initialized from the model checkpoint at allegro/herbert-klej-cased-v1 and are newly initialized: ['classifier.dense.weight', 'classifier.out_proj.weight', 'classifier.out_proj.bias', 'classifier.dense.bias']
You should probably TRAIN this model on a down-stream

Epoch,Training Loss,Validation Loss,Accuracy,Runtime,Samples Per Second
1,No log,1.388692,0.25,0.0531,75.391
2,No log,1.380318,0.25,0.0487,82.163
3,No log,1.367878,0.25,0.0505,79.208
4,1.409200,1.350596,0.25,0.0731,54.682


domiain all n_samples_per_class=10, repeat=0, time_elapsed=134.6136133670807, {'eval_loss': 1.4349567890167236, 'eval_accuracy': 0.13902439024390245, 'eval_runtime': 11.568, 'eval_samples_per_second': 70.885}



Some weights of the model checkpoint at allegro/herbert-klej-cased-v1 were not used when initializing RobertaForSequenceClassification: ['pooler.dense.weight', 'pooler.dense.bias']
- This IS expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of RobertaForSequenceClassification were not initialized from the model checkpoint at allegro/herbert-klej-cased-v1 and are newly initialized: ['classifier.dense.weight', 'classifier.out_proj.weight', 'classifier.out_proj.bias', 'classifier.dense.bias']
You should probably TRAIN this model on a down-stream

Epoch,Training Loss,Validation Loss,Accuracy,Runtime,Samples Per Second
1,No log,1.405628,0.25,0.107,74.753
2,1.386200,1.383923,0.375,0.1003,79.733
3,1.386200,1.347862,0.25,0.1005,79.577
4,1.289600,1.303191,0.25,0.0985,81.243


domiain all n_samples_per_class=20, repeat=0, time_elapsed=140.18014669418335, {'eval_loss': 1.3554974794387817, 'eval_accuracy': 0.27926829268292686, 'eval_runtime': 11.5021, 'eval_samples_per_second': 71.291}



Some weights of the model checkpoint at allegro/herbert-klej-cased-v1 were not used when initializing RobertaForSequenceClassification: ['pooler.dense.weight', 'pooler.dense.bias']
- This IS expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of RobertaForSequenceClassification were not initialized from the model checkpoint at allegro/herbert-klej-cased-v1 and are newly initialized: ['classifier.dense.weight', 'classifier.out_proj.weight', 'classifier.out_proj.bias', 'classifier.dense.bias']
You should probably TRAIN this model on a down-stream

Epoch,Training Loss,Validation Loss,Accuracy,Runtime,Samples Per Second
1,No log,1.380577,0.25,0.1395,86.008
2,1.395000,1.304811,0.416667,0.1415,84.794
3,1.314100,1.213,0.583333,0.14,85.727
4,1.314100,1.072612,0.666667,0.1395,86.032


domiain all n_samples_per_class=30, repeat=0, time_elapsed=127.60110712051392, {'eval_loss': 1.103852391242981, 'eval_accuracy': 0.6109756097560975, 'eval_runtime': 11.3561, 'eval_samples_per_second': 72.208}



Some weights of the model checkpoint at allegro/herbert-klej-cased-v1 were not used when initializing RobertaForSequenceClassification: ['pooler.dense.weight', 'pooler.dense.bias']
- This IS expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of RobertaForSequenceClassification were not initialized from the model checkpoint at allegro/herbert-klej-cased-v1 and are newly initialized: ['classifier.dense.weight', 'classifier.out_proj.weight', 'classifier.out_proj.bias', 'classifier.dense.bias']
You should probably TRAIN this model on a down-stream

Epoch,Training Loss,Validation Loss,Accuracy,Runtime,Samples Per Second
1,1.4079,1.332958,0.5,0.3277,73.245
2,1.3188,1.073363,0.625,0.3135,76.549
3,0.9241,0.751946,0.75,0.3079,77.935
4,0.7057,0.426709,0.875,0.326,73.624


domiain all n_samples_per_class=60, repeat=0, time_elapsed=131.5050666332245, {'eval_loss': 0.6195520162582397, 'eval_accuracy': 0.7707317073170732, 'eval_runtime': 11.0151, 'eval_samples_per_second': 74.443}



Some weights of the model checkpoint at allegro/herbert-klej-cased-v1 were not used when initializing RobertaForSequenceClassification: ['pooler.dense.weight', 'pooler.dense.bias']
- This IS expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of RobertaForSequenceClassification were not initialized from the model checkpoint at allegro/herbert-klej-cased-v1 and are newly initialized: ['classifier.dense.weight', 'classifier.out_proj.weight', 'classifier.out_proj.bias', 'classifier.dense.bias']
You should probably TRAIN this model on a down-stream

Epoch,Training Loss,Validation Loss,Accuracy,Runtime,Samples Per Second
1,1.3563,1.231084,0.6,0.5528,72.365
2,1.0252,0.761877,0.6,0.5282,75.727
3,0.5871,0.575584,0.75,0.5375,74.422
4,0.2642,1.044864,0.675,0.5817,68.769


domiain all n_samples_per_class=100, repeat=0, time_elapsed=139.65147614479065, {'eval_loss': 0.4746408462524414, 'eval_accuracy': 0.8256097560975609, 'eval_runtime': 10.6074, 'eval_samples_per_second': 77.304}



Some weights of the model checkpoint at allegro/herbert-klej-cased-v1 were not used when initializing RobertaForSequenceClassification: ['pooler.dense.weight', 'pooler.dense.bias']
- This IS expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of RobertaForSequenceClassification were not initialized from the model checkpoint at allegro/herbert-klej-cased-v1 and are newly initialized: ['classifier.dense.weight', 'classifier.out_proj.weight', 'classifier.out_proj.bias', 'classifier.dense.bias']
You should probably TRAIN this model on a down-stream

Epoch,Training Loss,Validation Loss,Accuracy,Runtime,Samples Per Second
1,1.0189,0.816688,0.6625,1.0994,72.764
2,0.4624,0.687399,0.7875,1.0643,75.165
3,0.3632,0.595227,0.825,1.1019,72.6
4,0.1075,0.518442,0.85,1.0677,74.928


domiain all n_samples_per_class=200, repeat=0, time_elapsed=153.70500707626343, {'eval_loss': 0.5273145437240601, 'eval_accuracy': 0.8268292682926829, 'eval_runtime': 10.5847, 'eval_samples_per_second': 77.47}



Some weights of the model checkpoint at allegro/herbert-klej-cased-v1 were not used when initializing RobertaForSequenceClassification: ['pooler.dense.weight', 'pooler.dense.bias']
- This IS expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of RobertaForSequenceClassification were not initialized from the model checkpoint at allegro/herbert-klej-cased-v1 and are newly initialized: ['classifier.dense.weight', 'classifier.out_proj.weight', 'classifier.out_proj.bias', 'classifier.dense.bias']
You should probably TRAIN this model on a down-stream

Epoch,Training Loss,Validation Loss,Accuracy,Runtime,Samples Per Second
1,No log,1.406825,0.25,0.0474,84.437
2,No log,1.405892,0.25,0.0556,71.907
3,No log,1.403973,0.5,0.0498,80.345
4,No log,1.400992,0.5,0.0692,57.775


domiain hotels n_samples_per_class=1, repeat=0, time_elapsed=109.64822721481323, {'eval_loss': 1.470782995223999, 'eval_accuracy': 0.13670886075949368, 'eval_runtime': 5.3577, 'eval_samples_per_second': 73.726}



Some weights of the model checkpoint at allegro/herbert-klej-cased-v1 were not used when initializing RobertaForSequenceClassification: ['pooler.dense.weight', 'pooler.dense.bias']
- This IS expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of RobertaForSequenceClassification were not initialized from the model checkpoint at allegro/herbert-klej-cased-v1 and are newly initialized: ['classifier.dense.weight', 'classifier.out_proj.weight', 'classifier.out_proj.bias', 'classifier.dense.bias']
You should probably TRAIN this model on a down-stream

Epoch,Training Loss,Validation Loss,Accuracy,Runtime,Samples Per Second
1,No log,1.405961,0.25,0.0371,107.824
2,No log,1.405231,0.25,0.037,108.23
3,No log,1.403816,0.25,0.0387,103.49
4,No log,1.401784,0.25,0.0396,100.981


domiain hotels n_samples_per_class=3, repeat=0, time_elapsed=134.38909554481506, {'eval_loss': 1.3659207820892334, 'eval_accuracy': 0.25063291139240507, 'eval_runtime': 5.3896, 'eval_samples_per_second': 73.29}



Some weights of the model checkpoint at allegro/herbert-klej-cased-v1 were not used when initializing RobertaForSequenceClassification: ['pooler.dense.weight', 'pooler.dense.bias']
- This IS expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of RobertaForSequenceClassification were not initialized from the model checkpoint at allegro/herbert-klej-cased-v1 and are newly initialized: ['classifier.dense.weight', 'classifier.out_proj.weight', 'classifier.out_proj.bias', 'classifier.dense.bias']
You should probably TRAIN this model on a down-stream

Epoch,Training Loss,Validation Loss,Accuracy,Runtime,Samples Per Second
1,No log,1.439616,0.25,0.0476,83.973
2,No log,1.438308,0.25,0.0497,80.563
3,No log,1.435704,0.25,0.0489,81.786
4,No log,1.431837,0.25,0.0491,81.395


domiain hotels n_samples_per_class=5, repeat=0, time_elapsed=129.16426491737366, {'eval_loss': 1.4175728559494019, 'eval_accuracy': 0.1518987341772152, 'eval_runtime': 5.3468, 'eval_samples_per_second': 73.875}



Some weights of the model checkpoint at allegro/herbert-klej-cased-v1 were not used when initializing RobertaForSequenceClassification: ['pooler.dense.weight', 'pooler.dense.bias']
- This IS expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of RobertaForSequenceClassification were not initialized from the model checkpoint at allegro/herbert-klej-cased-v1 and are newly initialized: ['classifier.dense.weight', 'classifier.out_proj.weight', 'classifier.out_proj.bias', 'classifier.dense.bias']
You should probably TRAIN this model on a down-stream

Epoch,Training Loss,Validation Loss,Accuracy,Runtime,Samples Per Second
1,No log,1.428001,0.5,0.0447,89.435
2,No log,1.419727,0.5,0.0436,91.813
3,No log,1.405723,0.5,0.0442,90.499
4,No log,1.387291,0.5,0.0429,93.329


domiain hotels n_samples_per_class=8, repeat=0, time_elapsed=131.09098625183105, {'eval_loss': 1.42399263381958, 'eval_accuracy': 0.2430379746835443, 'eval_runtime': 5.4115, 'eval_samples_per_second': 72.992}



Some weights of the model checkpoint at allegro/herbert-klej-cased-v1 were not used when initializing RobertaForSequenceClassification: ['pooler.dense.weight', 'pooler.dense.bias']
- This IS expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of RobertaForSequenceClassification were not initialized from the model checkpoint at allegro/herbert-klej-cased-v1 and are newly initialized: ['classifier.dense.weight', 'classifier.out_proj.weight', 'classifier.out_proj.bias', 'classifier.dense.bias']
You should probably TRAIN this model on a down-stream

Epoch,Training Loss,Validation Loss,Accuracy,Runtime,Samples Per Second
1,No log,1.416809,0.25,0.0476,84.007
2,No log,1.408452,0.25,0.0479,83.566
3,No log,1.395365,0.25,0.0459,87.125
4,1.434200,1.378989,0.5,0.0456,87.675


domiain hotels n_samples_per_class=10, repeat=0, time_elapsed=119.20806646347046, {'eval_loss': 1.37730073928833, 'eval_accuracy': 0.34430379746835443, 'eval_runtime': 5.3603, 'eval_samples_per_second': 73.69}



Some weights of the model checkpoint at allegro/herbert-klej-cased-v1 were not used when initializing RobertaForSequenceClassification: ['pooler.dense.weight', 'pooler.dense.bias']
- This IS expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of RobertaForSequenceClassification were not initialized from the model checkpoint at allegro/herbert-klej-cased-v1 and are newly initialized: ['classifier.dense.weight', 'classifier.out_proj.weight', 'classifier.out_proj.bias', 'classifier.dense.bias']
You should probably TRAIN this model on a down-stream

Epoch,Training Loss,Validation Loss,Accuracy,Runtime,Samples Per Second
1,No log,1.393609,0.375,0.1088,73.552
2,1.389900,1.361024,0.375,0.0992,80.625
3,1.389900,1.307788,0.625,0.1024,78.115
4,1.285800,1.234982,0.75,0.1015,78.808


domiain hotels n_samples_per_class=20, repeat=0, time_elapsed=135.84454727172852, {'eval_loss': 1.2465801239013672, 'eval_accuracy': 0.5746835443037974, 'eval_runtime': 5.3157, 'eval_samples_per_second': 74.308}



Some weights of the model checkpoint at allegro/herbert-klej-cased-v1 were not used when initializing RobertaForSequenceClassification: ['pooler.dense.weight', 'pooler.dense.bias']
- This IS expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of RobertaForSequenceClassification were not initialized from the model checkpoint at allegro/herbert-klej-cased-v1 and are newly initialized: ['classifier.dense.weight', 'classifier.out_proj.weight', 'classifier.out_proj.bias', 'classifier.dense.bias']
You should probably TRAIN this model on a down-stream

Epoch,Training Loss,Validation Loss,Accuracy,Runtime,Samples Per Second
1,No log,1.36089,0.416667,0.1686,71.16
2,1.391900,1.281228,0.583333,0.1622,73.964
3,1.310900,1.185818,0.583333,0.1583,75.805
4,1.310900,1.03946,0.666667,0.1674,71.704


domiain hotels n_samples_per_class=30, repeat=0, time_elapsed=137.27091574668884, {'eval_loss': 1.0706162452697754, 'eval_accuracy': 0.6987341772151898, 'eval_runtime': 5.302, 'eval_samples_per_second': 74.5}



Some weights of the model checkpoint at allegro/herbert-klej-cased-v1 were not used when initializing RobertaForSequenceClassification: ['pooler.dense.weight', 'pooler.dense.bias']
- This IS expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of RobertaForSequenceClassification were not initialized from the model checkpoint at allegro/herbert-klej-cased-v1 and are newly initialized: ['classifier.dense.weight', 'classifier.out_proj.weight', 'classifier.out_proj.bias', 'classifier.dense.bias']
You should probably TRAIN this model on a down-stream

Epoch,Training Loss,Validation Loss,Accuracy,Runtime,Samples Per Second
1,1.4105,1.337584,0.416667,0.3332,72.033
2,1.3249,1.091064,0.541667,0.3157,76.025
3,0.8398,0.789219,0.708333,0.314,76.445
4,0.6332,0.838216,0.666667,0.3296,72.816


domiain hotels n_samples_per_class=60, repeat=0, time_elapsed=138.30836725234985, {'eval_loss': 0.7749429941177368, 'eval_accuracy': 0.6658227848101266, 'eval_runtime': 5.1941, 'eval_samples_per_second': 76.048}



Some weights of the model checkpoint at allegro/herbert-klej-cased-v1 were not used when initializing RobertaForSequenceClassification: ['pooler.dense.weight', 'pooler.dense.bias']
- This IS expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of RobertaForSequenceClassification were not initialized from the model checkpoint at allegro/herbert-klej-cased-v1 and are newly initialized: ['classifier.dense.weight', 'classifier.out_proj.weight', 'classifier.out_proj.bias', 'classifier.dense.bias']
You should probably TRAIN this model on a down-stream

Epoch,Training Loss,Validation Loss,Accuracy,Runtime,Samples Per Second
1,1.3348,1.193639,0.7,0.5751,69.548
2,0.9423,0.54811,0.825,0.5331,75.033
3,0.43,0.571217,0.775,0.5328,75.072
4,0.304,0.336106,0.875,0.5695,70.238


domiain hotels n_samples_per_class=100, repeat=0, time_elapsed=138.68035697937012, {'eval_loss': 0.41386091709136963, 'eval_accuracy': 0.8126582278481013, 'eval_runtime': 5.1222, 'eval_samples_per_second': 77.116}



Some weights of the model checkpoint at allegro/herbert-klej-cased-v1 were not used when initializing RobertaForSequenceClassification: ['pooler.dense.weight', 'pooler.dense.bias']
- This IS expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of RobertaForSequenceClassification were not initialized from the model checkpoint at allegro/herbert-klej-cased-v1 and are newly initialized: ['classifier.dense.weight', 'classifier.out_proj.weight', 'classifier.out_proj.bias', 'classifier.dense.bias']
You should probably TRAIN this model on a down-stream

Epoch,Training Loss,Validation Loss,Accuracy,Runtime,Samples Per Second
1,0.9183,0.586043,0.775,1.1114,71.983
2,0.3082,0.53055,0.825,1.0577,75.632
3,0.2178,0.547824,0.8375,1.1117,71.963
4,0.0556,0.651933,0.85,1.0635,75.221


domiain hotels n_samples_per_class=200, repeat=0, time_elapsed=154.65048909187317, {'eval_loss': 0.6584933400154114, 'eval_accuracy': 0.8126582278481013, 'eval_runtime': 5.0266, 'eval_samples_per_second': 78.582}



Some weights of the model checkpoint at allegro/herbert-klej-cased-v1 were not used when initializing RobertaForSequenceClassification: ['pooler.dense.weight', 'pooler.dense.bias']
- This IS expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of RobertaForSequenceClassification were not initialized from the model checkpoint at allegro/herbert-klej-cased-v1 and are newly initialized: ['classifier.dense.weight', 'classifier.out_proj.weight', 'classifier.out_proj.bias', 'classifier.dense.bias']
You should probably TRAIN this model on a down-stream

Epoch,Training Loss,Validation Loss,Accuracy,Runtime,Samples Per Second
1,No log,1.42943,0.0,0.0557,71.849
2,No log,1.428205,0.0,0.0571,70.013
3,No log,1.425797,0.0,0.0557,71.776
4,No log,1.422297,0.0,0.052,76.904


domiain medicine n_samples_per_class=1, repeat=0, time_elapsed=129.53111481666565, {'eval_loss': 1.470704436302185, 'eval_accuracy': 0.18654434250764526, 'eval_runtime': 4.4786, 'eval_samples_per_second': 73.014}



Some weights of the model checkpoint at allegro/herbert-klej-cased-v1 were not used when initializing RobertaForSequenceClassification: ['pooler.dense.weight', 'pooler.dense.bias']
- This IS expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of RobertaForSequenceClassification were not initialized from the model checkpoint at allegro/herbert-klej-cased-v1 and are newly initialized: ['classifier.dense.weight', 'classifier.out_proj.weight', 'classifier.out_proj.bias', 'classifier.dense.bias']
You should probably TRAIN this model on a down-stream

Epoch,Training Loss,Validation Loss,Accuracy,Runtime,Samples Per Second
1,No log,1.460031,0.0,0.0467,85.681
2,No log,1.459214,0.0,0.0498,80.381
3,No log,1.457566,0.0,0.0487,82.13
4,No log,1.455078,0.0,0.0473,84.635


domiain medicine n_samples_per_class=3, repeat=0, time_elapsed=122.96328973770142, {'eval_loss': 1.3705888986587524, 'eval_accuracy': 0.3547400611620795, 'eval_runtime': 4.4186, 'eval_samples_per_second': 74.005}



Some weights of the model checkpoint at allegro/herbert-klej-cased-v1 were not used when initializing RobertaForSequenceClassification: ['pooler.dense.weight', 'pooler.dense.bias']
- This IS expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of RobertaForSequenceClassification were not initialized from the model checkpoint at allegro/herbert-klej-cased-v1 and are newly initialized: ['classifier.dense.weight', 'classifier.out_proj.weight', 'classifier.out_proj.bias', 'classifier.dense.bias']
You should probably TRAIN this model on a down-stream

Epoch,Training Loss,Validation Loss,Accuracy,Runtime,Samples Per Second
1,No log,1.32385,0.25,0.0444,90.043
2,No log,1.322567,0.25,0.048,83.36
3,No log,1.319909,0.25,0.0482,82.92
4,No log,1.315837,0.25,0.0479,83.579


domiain medicine n_samples_per_class=5, repeat=0, time_elapsed=127.81290459632874, {'eval_loss': 1.3991862535476685, 'eval_accuracy': 0.23853211009174313, 'eval_runtime': 4.4614, 'eval_samples_per_second': 73.295}



Some weights of the model checkpoint at allegro/herbert-klej-cased-v1 were not used when initializing RobertaForSequenceClassification: ['pooler.dense.weight', 'pooler.dense.bias']
- This IS expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of RobertaForSequenceClassification were not initialized from the model checkpoint at allegro/herbert-klej-cased-v1 and are newly initialized: ['classifier.dense.weight', 'classifier.out_proj.weight', 'classifier.out_proj.bias', 'classifier.dense.bias']
You should probably TRAIN this model on a down-stream

Epoch,Training Loss,Validation Loss,Accuracy,Runtime,Samples Per Second
1,No log,1.436737,0.0,0.0513,78.004
2,No log,1.429152,0.0,0.0531,75.273
3,No log,1.416781,0.0,0.0513,78.002
4,No log,1.400532,0.0,0.0508,78.668


domiain medicine n_samples_per_class=8, repeat=0, time_elapsed=130.669349193573, {'eval_loss': 1.3175863027572632, 'eval_accuracy': 0.26299694189602446, 'eval_runtime': 4.4588, 'eval_samples_per_second': 73.338}



Some weights of the model checkpoint at allegro/herbert-klej-cased-v1 were not used when initializing RobertaForSequenceClassification: ['pooler.dense.weight', 'pooler.dense.bias']
- This IS expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of RobertaForSequenceClassification were not initialized from the model checkpoint at allegro/herbert-klej-cased-v1 and are newly initialized: ['classifier.dense.weight', 'classifier.out_proj.weight', 'classifier.out_proj.bias', 'classifier.dense.bias']
You should probably TRAIN this model on a down-stream

Epoch,Training Loss,Validation Loss,Accuracy,Runtime,Samples Per Second
1,No log,1.402624,0.25,0.0414,96.569
2,No log,1.389965,0.25,0.0427,93.768
3,No log,1.36961,0.25,0.0411,97.286
4,1.427800,1.342696,0.25,0.041,97.614


domiain medicine n_samples_per_class=10, repeat=0, time_elapsed=135.69518661499023, {'eval_loss': 1.4153190851211548, 'eval_accuracy': 0.20489296636085627, 'eval_runtime': 4.4601, 'eval_samples_per_second': 73.317}



Some weights of the model checkpoint at allegro/herbert-klej-cased-v1 were not used when initializing RobertaForSequenceClassification: ['pooler.dense.weight', 'pooler.dense.bias']
- This IS expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of RobertaForSequenceClassification were not initialized from the model checkpoint at allegro/herbert-klej-cased-v1 and are newly initialized: ['classifier.dense.weight', 'classifier.out_proj.weight', 'classifier.out_proj.bias', 'classifier.dense.bias']
You should probably TRAIN this model on a down-stream

Epoch,Training Loss,Validation Loss,Accuracy,Runtime,Samples Per Second
1,No log,1.357672,0.25,0.1084,73.817
2,1.375400,1.328873,0.375,0.107,74.748
3,1.375400,1.27991,0.75,0.1002,79.805
4,1.267900,1.21756,0.75,0.0995,80.432


domiain medicine n_samples_per_class=20, repeat=0, time_elapsed=123.37501096725464, {'eval_loss': 1.314335584640503, 'eval_accuracy': 0.4648318042813456, 'eval_runtime': 4.4519, 'eval_samples_per_second': 73.451}



Some weights of the model checkpoint at allegro/herbert-klej-cased-v1 were not used when initializing RobertaForSequenceClassification: ['pooler.dense.weight', 'pooler.dense.bias']
- This IS expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of RobertaForSequenceClassification were not initialized from the model checkpoint at allegro/herbert-klej-cased-v1 and are newly initialized: ['classifier.dense.weight', 'classifier.out_proj.weight', 'classifier.out_proj.bias', 'classifier.dense.bias']
You should probably TRAIN this model on a down-stream

Epoch,Training Loss,Validation Loss,Accuracy,Runtime,Samples Per Second
1,No log,1.378149,0.333333,0.1687,71.118
2,1.399600,1.289061,0.5,0.162,74.097
3,1.318400,1.183137,0.666667,0.1662,72.19
4,1.318400,1.028352,0.666667,0.1697,70.713


domiain medicine n_samples_per_class=30, repeat=0, time_elapsed=138.45610523223877, {'eval_loss': 1.1922376155853271, 'eval_accuracy': 0.6972477064220184, 'eval_runtime': 4.3664, 'eval_samples_per_second': 74.889}



Some weights of the model checkpoint at allegro/herbert-klej-cased-v1 were not used when initializing RobertaForSequenceClassification: ['pooler.dense.weight', 'pooler.dense.bias']
- This IS expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of RobertaForSequenceClassification were not initialized from the model checkpoint at allegro/herbert-klej-cased-v1 and are newly initialized: ['classifier.dense.weight', 'classifier.out_proj.weight', 'classifier.out_proj.bias', 'classifier.dense.bias']
You should probably TRAIN this model on a down-stream

Epoch,Training Loss,Validation Loss,Accuracy,Runtime,Samples Per Second
1,1.4099,1.337053,0.375,0.335,71.633
2,1.3219,1.092196,0.666667,0.3208,74.818
3,0.9454,0.786871,0.75,0.3182,75.435
4,0.7943,0.698628,0.75,0.3321,72.257


domiain medicine n_samples_per_class=60, repeat=0, time_elapsed=138.87468814849854, {'eval_loss': 0.8146778345108032, 'eval_accuracy': 0.6666666666666666, 'eval_runtime': 4.2951, 'eval_samples_per_second': 76.133}



Some weights of the model checkpoint at allegro/herbert-klej-cased-v1 were not used when initializing RobertaForSequenceClassification: ['pooler.dense.weight', 'pooler.dense.bias']
- This IS expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of RobertaForSequenceClassification were not initialized from the model checkpoint at allegro/herbert-klej-cased-v1 and are newly initialized: ['classifier.dense.weight', 'classifier.out_proj.weight', 'classifier.out_proj.bias', 'classifier.dense.bias']
You should probably TRAIN this model on a down-stream

Epoch,Training Loss,Validation Loss,Accuracy,Runtime,Samples Per Second
1,1.3264,1.184859,0.6,0.5712,70.028
2,0.9871,0.662861,0.725,0.5374,74.436
3,0.5242,0.449265,0.775,0.5336,74.957
4,0.2795,0.954952,0.6,0.5723,69.892


domiain medicine n_samples_per_class=100, repeat=0, time_elapsed=138.2398841381073, {'eval_loss': 0.6134612560272217, 'eval_accuracy': 0.7339449541284404, 'eval_runtime': 4.221, 'eval_samples_per_second': 77.469}



Some weights of the model checkpoint at allegro/herbert-klej-cased-v1 were not used when initializing RobertaForSequenceClassification: ['pooler.dense.weight', 'pooler.dense.bias']
- This IS expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of RobertaForSequenceClassification were not initialized from the model checkpoint at allegro/herbert-klej-cased-v1 and are newly initialized: ['classifier.dense.weight', 'classifier.out_proj.weight', 'classifier.out_proj.bias', 'classifier.dense.bias']
You should probably TRAIN this model on a down-stream

Epoch,Training Loss,Validation Loss,Accuracy,Runtime,Samples Per Second
1,1.037,0.724621,0.7125,1.107,72.269
2,0.5229,0.437205,0.825,1.0496,76.223
3,0.2728,0.593906,0.825,1.1022,72.58
4,0.0639,0.426724,0.9,1.0728,74.568


domiain medicine n_samples_per_class=200, repeat=0, time_elapsed=154.94330286979675, {'eval_loss': 0.5052164793014526, 'eval_accuracy': 0.8776758409785933, 'eval_runtime': 4.2379, 'eval_samples_per_second': 77.162}



Some weights of the model checkpoint at allegro/herbert-klej-cased-v1 were not used when initializing RobertaForSequenceClassification: ['pooler.dense.weight', 'pooler.dense.bias']
- This IS expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of RobertaForSequenceClassification were not initialized from the model checkpoint at allegro/herbert-klej-cased-v1 and are newly initialized: ['classifier.dense.weight', 'classifier.out_proj.weight', 'classifier.out_proj.bias', 'classifier.dense.bias']
You should probably TRAIN this model on a down-stream

Epoch,Training Loss,Validation Loss,Accuracy,Runtime,Samples Per Second
1,No log,1.443964,0.0,0.0532,75.125
2,No log,1.443179,0.0,0.063,63.458
3,No log,1.441593,0.0,0.0503,79.549
4,No log,1.439421,0.0,0.0538,74.348


domiain products n_samples_per_class=1, repeat=0, time_elapsed=129.57369375228882, {'eval_loss': 1.5651975870132446, 'eval_accuracy': 0.041666666666666664, 'eval_runtime': 0.6604, 'eval_samples_per_second': 72.678}



Some weights of the model checkpoint at allegro/herbert-klej-cased-v1 were not used when initializing RobertaForSequenceClassification: ['pooler.dense.weight', 'pooler.dense.bias']
- This IS expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of RobertaForSequenceClassification were not initialized from the model checkpoint at allegro/herbert-klej-cased-v1 and are newly initialized: ['classifier.dense.weight', 'classifier.out_proj.weight', 'classifier.out_proj.bias', 'classifier.dense.bias']
You should probably TRAIN this model on a down-stream

Epoch,Training Loss,Validation Loss,Accuracy,Runtime,Samples Per Second
1,No log,1.443455,0.25,0.0351,113.87
2,No log,1.441285,0.25,0.0406,98.404
3,No log,1.437124,0.25,0.0385,103.953
4,No log,1.431006,0.25,0.0374,107.043


domiain products n_samples_per_class=3, repeat=0, time_elapsed=126.90430903434753, {'eval_loss': 1.4811333417892456, 'eval_accuracy': 0.041666666666666664, 'eval_runtime': 0.657, 'eval_samples_per_second': 73.056}



Some weights of the model checkpoint at allegro/herbert-klej-cased-v1 were not used when initializing RobertaForSequenceClassification: ['pooler.dense.weight', 'pooler.dense.bias']
- This IS expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of RobertaForSequenceClassification were not initialized from the model checkpoint at allegro/herbert-klej-cased-v1 and are newly initialized: ['classifier.dense.weight', 'classifier.out_proj.weight', 'classifier.out_proj.bias', 'classifier.dense.bias']
You should probably TRAIN this model on a down-stream

Epoch,Training Loss,Validation Loss,Accuracy,Runtime,Samples Per Second
1,No log,1.445538,0.5,0.0476,83.991
2,No log,1.444605,0.25,0.0494,81.045
3,No log,1.442913,0.25,0.0505,79.134
4,No log,1.440559,0.25,0.0506,79.048


domiain products n_samples_per_class=5, repeat=0, time_elapsed=128.17705059051514, {'eval_loss': 1.5553652048110962, 'eval_accuracy': 0.020833333333333332, 'eval_runtime': 0.6563, 'eval_samples_per_second': 73.14}



Some weights of the model checkpoint at allegro/herbert-klej-cased-v1 were not used when initializing RobertaForSequenceClassification: ['pooler.dense.weight', 'pooler.dense.bias']
- This IS expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of RobertaForSequenceClassification were not initialized from the model checkpoint at allegro/herbert-klej-cased-v1 and are newly initialized: ['classifier.dense.weight', 'classifier.out_proj.weight', 'classifier.out_proj.bias', 'classifier.dense.bias']
You should probably TRAIN this model on a down-stream

Epoch,Training Loss,Validation Loss,Accuracy,Runtime,Samples Per Second
1,No log,1.392622,0.0,0.0482,83.012
2,No log,1.389194,0.0,0.0409,97.812
3,No log,1.38447,0.0,0.0426,93.974
4,No log,1.380576,0.0,0.0441,90.735


domiain products n_samples_per_class=8, repeat=0, time_elapsed=134.00170993804932, {'eval_loss': 1.5079900026321411, 'eval_accuracy': 0.0625, 'eval_runtime': 0.6728, 'eval_samples_per_second': 71.34}



Some weights of the model checkpoint at allegro/herbert-klej-cased-v1 were not used when initializing RobertaForSequenceClassification: ['pooler.dense.weight', 'pooler.dense.bias']
- This IS expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of RobertaForSequenceClassification were not initialized from the model checkpoint at allegro/herbert-klej-cased-v1 and are newly initialized: ['classifier.dense.weight', 'classifier.out_proj.weight', 'classifier.out_proj.bias', 'classifier.dense.bias']
You should probably TRAIN this model on a down-stream

Epoch,Training Loss,Validation Loss,Accuracy,Runtime,Samples Per Second
1,No log,1.459293,0.5,0.038,105.174
2,No log,1.451724,0.5,0.039,102.493
3,No log,1.439137,0.25,0.0402,99.593
4,No log,1.425515,0.25,0.0427,93.768


domiain products n_samples_per_class=10, repeat=0, time_elapsed=136.966166973114, {'eval_loss': 1.74907386302948, 'eval_accuracy': 0.020833333333333332, 'eval_runtime': 0.6695, 'eval_samples_per_second': 71.696}



Some weights of the model checkpoint at allegro/herbert-klej-cased-v1 were not used when initializing RobertaForSequenceClassification: ['pooler.dense.weight', 'pooler.dense.bias']
- This IS expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of RobertaForSequenceClassification were not initialized from the model checkpoint at allegro/herbert-klej-cased-v1 and are newly initialized: ['classifier.dense.weight', 'classifier.out_proj.weight', 'classifier.out_proj.bias', 'classifier.dense.bias']
You should probably TRAIN this model on a down-stream

Epoch,Training Loss,Validation Loss,Accuracy,Runtime,Samples Per Second
1,No log,1.36458,0.375,0.0819,97.684
2,No log,1.358174,0.375,0.0788,101.536
3,1.341500,1.361621,0.375,0.0818,97.758
4,1.341500,1.387848,0.25,0.0782,102.346


domiain products n_samples_per_class=20, repeat=0, time_elapsed=141.98556566238403, {'eval_loss': 1.1876739263534546, 'eval_accuracy': 0.5625, 'eval_runtime': 0.661, 'eval_samples_per_second': 72.617}



Some weights of the model checkpoint at allegro/herbert-klej-cased-v1 were not used when initializing RobertaForSequenceClassification: ['pooler.dense.weight', 'pooler.dense.bias']
- This IS expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of RobertaForSequenceClassification were not initialized from the model checkpoint at allegro/herbert-klej-cased-v1 and are newly initialized: ['classifier.dense.weight', 'classifier.out_proj.weight', 'classifier.out_proj.bias', 'classifier.dense.bias']
You should probably TRAIN this model on a down-stream

Epoch,Training Loss,Validation Loss,Accuracy,Runtime,Samples Per Second
1,No log,1.439589,0.25,0.1624,73.872
2,1.370200,1.439453,0.25,0.1675,71.659
3,1.370200,1.493102,0.166667,0.1583,75.792
4,1.224200,1.594452,0.416667,0.1614,74.365


domiain products n_samples_per_class=30, repeat=0, time_elapsed=125.39853239059448, {'eval_loss': 1.195607304573059, 'eval_accuracy': 0.25, 'eval_runtime': 0.6678, 'eval_samples_per_second': 71.874}



Some weights of the model checkpoint at allegro/herbert-klej-cased-v1 were not used when initializing RobertaForSequenceClassification: ['pooler.dense.weight', 'pooler.dense.bias']
- This IS expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of RobertaForSequenceClassification were not initialized from the model checkpoint at allegro/herbert-klej-cased-v1 and are newly initialized: ['classifier.dense.weight', 'classifier.out_proj.weight', 'classifier.out_proj.bias', 'classifier.dense.bias']
You should probably TRAIN this model on a down-stream

Epoch,Training Loss,Validation Loss,Accuracy,Runtime,Samples Per Second
1,1.4318,1.444815,0.291667,0.3157,76.016
2,1.2009,1.581801,0.375,0.3072,78.121
3,1.0872,1.771036,0.458333,0.3102,77.369
4,0.9594,1.779419,0.458333,0.3182,75.43


domiain products n_samples_per_class=60, repeat=0, time_elapsed=142.96581387519836, {'eval_loss': 1.0509508848190308, 'eval_accuracy': 0.7291666666666666, 'eval_runtime': 0.6377, 'eval_samples_per_second': 75.265}



Some weights of the model checkpoint at allegro/herbert-klej-cased-v1 were not used when initializing RobertaForSequenceClassification: ['pooler.dense.weight', 'pooler.dense.bias']
- This IS expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of RobertaForSequenceClassification were not initialized from the model checkpoint at allegro/herbert-klej-cased-v1 and are newly initialized: ['classifier.dense.weight', 'classifier.out_proj.weight', 'classifier.out_proj.bias', 'classifier.dense.bias']
You should probably TRAIN this model on a down-stream

Epoch,Training Loss,Validation Loss,Accuracy,Runtime,Samples Per Second
1,1.3603,1.514546,0.2,0.5489,72.868
2,1.0795,1.790059,0.3,0.5476,73.044
3,0.7522,1.598663,0.35,0.5391,74.199
4,0.4689,1.496853,0.625,0.5596,71.476


domiain products n_samples_per_class=100, repeat=0, time_elapsed=142.477881193161, {'eval_loss': 0.6606875061988831, 'eval_accuracy': 0.7708333333333334, 'eval_runtime': 0.6449, 'eval_samples_per_second': 74.428}



Some weights of the model checkpoint at allegro/herbert-klej-cased-v1 were not used when initializing RobertaForSequenceClassification: ['pooler.dense.weight', 'pooler.dense.bias']
- This IS expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of RobertaForSequenceClassification were not initialized from the model checkpoint at allegro/herbert-klej-cased-v1 and are newly initialized: ['classifier.dense.weight', 'classifier.out_proj.weight', 'classifier.out_proj.bias', 'classifier.dense.bias']
You should probably TRAIN this model on a down-stream

Epoch,Training Loss,Validation Loss,Accuracy,Runtime,Samples Per Second
1,1.1053,1.743729,0.4125,1.1424,70.027
2,0.5612,1.225292,0.625,1.0729,74.563
3,0.1452,1.798836,0.5125,1.0998,72.739
4,0.0515,1.767499,0.5875,1.12,71.431


domiain products n_samples_per_class=200, repeat=0, time_elapsed=161.32974433898926, {'eval_loss': 0.5667858123779297, 'eval_accuracy': 0.8541666666666666, 'eval_runtime': 0.6311, 'eval_samples_per_second': 76.058}



Some weights of the model checkpoint at allegro/herbert-klej-cased-v1 were not used when initializing RobertaForSequenceClassification: ['pooler.dense.weight', 'pooler.dense.bias']
- This IS expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of RobertaForSequenceClassification were not initialized from the model checkpoint at allegro/herbert-klej-cased-v1 and are newly initialized: ['classifier.dense.weight', 'classifier.out_proj.weight', 'classifier.out_proj.bias', 'classifier.dense.bias']
You should probably TRAIN this model on a down-stream

Epoch,Training Loss,Validation Loss,Accuracy,Runtime,Samples Per Second
1,No log,1.42427,0.333333,0.0258,116.382
2,No log,1.422208,0.333333,0.027,111.216
3,No log,1.418241,0.333333,0.0296,101.31
4,No log,1.412444,0.333333,0.0307,97.563


domiain reviews n_samples_per_class=1, repeat=0, time_elapsed=117.5131664276123, {'eval_loss': 1.3604148626327515, 'eval_accuracy': 0.36, 'eval_runtime': 0.674, 'eval_samples_per_second': 74.189}



Some weights of the model checkpoint at allegro/herbert-klej-cased-v1 were not used when initializing RobertaForSequenceClassification: ['pooler.dense.weight', 'pooler.dense.bias']
- This IS expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of RobertaForSequenceClassification were not initialized from the model checkpoint at allegro/herbert-klej-cased-v1 and are newly initialized: ['classifier.dense.weight', 'classifier.out_proj.weight', 'classifier.out_proj.bias', 'classifier.dense.bias']
You should probably TRAIN this model on a down-stream

Epoch,Training Loss,Validation Loss,Accuracy,Runtime,Samples Per Second
1,No log,1.578997,0.0,0.0399,75.247
2,No log,1.574619,0.0,0.0349,85.843
3,No log,1.565853,0.0,0.0323,92.833
4,No log,1.552721,0.0,0.0401,74.808


domiain reviews n_samples_per_class=3, repeat=0, time_elapsed=121.06590700149536, {'eval_loss': 1.571073293685913, 'eval_accuracy': 0.0, 'eval_runtime': 0.6955, 'eval_samples_per_second': 71.89}



Some weights of the model checkpoint at allegro/herbert-klej-cased-v1 were not used when initializing RobertaForSequenceClassification: ['pooler.dense.weight', 'pooler.dense.bias']
- This IS expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of RobertaForSequenceClassification were not initialized from the model checkpoint at allegro/herbert-klej-cased-v1 and are newly initialized: ['classifier.dense.weight', 'classifier.out_proj.weight', 'classifier.out_proj.bias', 'classifier.dense.bias']
You should probably TRAIN this model on a down-stream

Epoch,Training Loss,Validation Loss,Accuracy,Runtime,Samples Per Second
1,No log,1.507921,0.0,0.0356,84.238
2,No log,1.504487,0.0,0.0371,80.821
3,No log,1.497536,0.0,0.0379,79.076
4,No log,1.487531,0.333333,0.0367,81.649


domiain reviews n_samples_per_class=5, repeat=0, time_elapsed=109.44407653808594, {'eval_loss': 1.4510514736175537, 'eval_accuracy': 0.3, 'eval_runtime': 0.6956, 'eval_samples_per_second': 71.882}



Some weights of the model checkpoint at allegro/herbert-klej-cased-v1 were not used when initializing RobertaForSequenceClassification: ['pooler.dense.weight', 'pooler.dense.bias']
- This IS expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of RobertaForSequenceClassification were not initialized from the model checkpoint at allegro/herbert-klej-cased-v1 and are newly initialized: ['classifier.dense.weight', 'classifier.out_proj.weight', 'classifier.out_proj.bias', 'classifier.dense.bias']
You should probably TRAIN this model on a down-stream

Epoch,Training Loss,Validation Loss,Accuracy,Runtime,Samples Per Second
1,No log,1.362964,0.666667,0.0314,95.41
2,No log,1.348676,0.666667,0.0307,97.612
3,No log,1.324812,0.666667,0.0301,99.514
4,No log,1.293619,0.666667,0.0323,92.783


domiain reviews n_samples_per_class=8, repeat=0, time_elapsed=143.69496393203735, {'eval_loss': 1.3195005655288696, 'eval_accuracy': 0.44, 'eval_runtime': 0.6822, 'eval_samples_per_second': 73.295}



Some weights of the model checkpoint at allegro/herbert-klej-cased-v1 were not used when initializing RobertaForSequenceClassification: ['pooler.dense.weight', 'pooler.dense.bias']
- This IS expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of RobertaForSequenceClassification were not initialized from the model checkpoint at allegro/herbert-klej-cased-v1 and are newly initialized: ['classifier.dense.weight', 'classifier.out_proj.weight', 'classifier.out_proj.bias', 'classifier.dense.bias']
You should probably TRAIN this model on a down-stream

Epoch,Training Loss,Validation Loss,Accuracy,Runtime,Samples Per Second
1,No log,1.446671,0.0,0.0327,91.843
2,No log,1.426358,0.0,0.0318,94.398
3,No log,1.391212,0.333333,0.0325,92.426
4,No log,1.345725,0.333333,0.0324,92.734


domiain reviews n_samples_per_class=10, repeat=0, time_elapsed=117.59122681617737, {'eval_loss': 1.3297404050827026, 'eval_accuracy': 0.3, 'eval_runtime': 0.7256, 'eval_samples_per_second': 68.907}



Some weights of the model checkpoint at allegro/herbert-klej-cased-v1 were not used when initializing RobertaForSequenceClassification: ['pooler.dense.weight', 'pooler.dense.bias']
- This IS expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of RobertaForSequenceClassification were not initialized from the model checkpoint at allegro/herbert-klej-cased-v1 and are newly initialized: ['classifier.dense.weight', 'classifier.out_proj.weight', 'classifier.out_proj.bias', 'classifier.dense.bias']
You should probably TRAIN this model on a down-stream

Epoch,Training Loss,Validation Loss,Accuracy,Runtime,Samples Per Second
1,No log,1.324803,0.166667,0.0768,78.113
2,No log,1.278116,0.166667,0.0756,79.408
3,1.318700,1.215451,0.333333,0.0822,73.016
4,1.318700,1.154832,0.333333,0.0779,77.037


domiain reviews n_samples_per_class=20, repeat=0, time_elapsed=137.97349905967712, {'eval_loss': 1.2557663917541504, 'eval_accuracy': 0.38, 'eval_runtime': 0.673, 'eval_samples_per_second': 74.299}



Some weights of the model checkpoint at allegro/herbert-klej-cased-v1 were not used when initializing RobertaForSequenceClassification: ['pooler.dense.weight', 'pooler.dense.bias']
- This IS expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of RobertaForSequenceClassification were not initialized from the model checkpoint at allegro/herbert-klej-cased-v1 and are newly initialized: ['classifier.dense.weight', 'classifier.out_proj.weight', 'classifier.out_proj.bias', 'classifier.dense.bias']
You should probably TRAIN this model on a down-stream

Epoch,Training Loss,Validation Loss,Accuracy,Runtime,Samples Per Second
1,No log,1.377288,0.333333,0.0933,96.415
2,1.328200,1.277479,0.333333,0.0968,93.017
3,1.328200,1.167426,0.333333,0.0939,95.881
4,1.163800,1.086265,0.333333,0.0951,94.606


domiain reviews n_samples_per_class=30, repeat=0, time_elapsed=149.21026229858398, {'eval_loss': 1.3282934427261353, 'eval_accuracy': 0.48, 'eval_runtime': 0.6969, 'eval_samples_per_second': 71.749}



Some weights of the model checkpoint at allegro/herbert-klej-cased-v1 were not used when initializing RobertaForSequenceClassification: ['pooler.dense.weight', 'pooler.dense.bias']
- This IS expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of RobertaForSequenceClassification were not initialized from the model checkpoint at allegro/herbert-klej-cased-v1 and are newly initialized: ['classifier.dense.weight', 'classifier.out_proj.weight', 'classifier.out_proj.bias', 'classifier.dense.bias']
You should probably TRAIN this model on a down-stream

Epoch,Training Loss,Validation Loss,Accuracy,Runtime,Samples Per Second
1,1.4599,1.309835,0.555556,0.2278,79.024
2,1.2187,1.099038,0.611111,0.2308,77.999
3,1.0929,1.019083,0.5,0.2275,79.125
4,0.9745,0.913331,0.5,0.2332,77.176


domiain reviews n_samples_per_class=60, repeat=0, time_elapsed=141.02639985084534, {'eval_loss': 1.1157448291778564, 'eval_accuracy': 0.52, 'eval_runtime': 0.6806, 'eval_samples_per_second': 73.465}



Some weights of the model checkpoint at allegro/herbert-klej-cased-v1 were not used when initializing RobertaForSequenceClassification: ['pooler.dense.weight', 'pooler.dense.bias']
- This IS expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of RobertaForSequenceClassification were not initialized from the model checkpoint at allegro/herbert-klej-cased-v1 and are newly initialized: ['classifier.dense.weight', 'classifier.out_proj.weight', 'classifier.out_proj.bias', 'classifier.dense.bias']
You should probably TRAIN this model on a down-stream

Epoch,Training Loss,Validation Loss,Accuracy,Runtime,Samples Per Second
1,1.3482,1.140528,0.5,0.4184,71.693
2,1.0725,1.028953,0.366667,0.4135,72.556
3,0.7694,0.676516,0.866667,0.4008,74.845
4,0.4593,0.400806,0.866667,0.4183,71.726


domiain reviews n_samples_per_class=100, repeat=0, time_elapsed=131.75580024719238, {'eval_loss': 0.6051061749458313, 'eval_accuracy': 0.84, 'eval_runtime': 0.6753, 'eval_samples_per_second': 74.042}



Some weights of the model checkpoint at allegro/herbert-klej-cased-v1 were not used when initializing RobertaForSequenceClassification: ['pooler.dense.weight', 'pooler.dense.bias']
- This IS expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of RobertaForSequenceClassification were not initialized from the model checkpoint at allegro/herbert-klej-cased-v1 and are newly initialized: ['classifier.dense.weight', 'classifier.out_proj.weight', 'classifier.out_proj.bias', 'classifier.dense.bias']
You should probably TRAIN this model on a down-stream

Epoch,Training Loss,Validation Loss,Accuracy,Runtime,Samples Per Second
1,1.1087,1.033062,0.433333,0.8495,70.629
2,0.5669,0.469685,0.816667,0.7961,75.372
3,0.265,0.738197,0.766667,0.8273,72.522
4,0.0753,0.609273,0.766667,0.828,72.468


domiain reviews n_samples_per_class=200, repeat=0, time_elapsed=159.37267088890076, {'eval_loss': 0.557488203048706, 'eval_accuracy': 0.8, 'eval_runtime': 0.7215, 'eval_samples_per_second': 69.296}



Some weights of the model checkpoint at allegro/herbert-klej-cased-v1 were not used when initializing RobertaForSequenceClassification: ['pooler.dense.weight', 'pooler.dense.bias']
- This IS expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of RobertaForSequenceClassification were not initialized from the model checkpoint at allegro/herbert-klej-cased-v1 and are newly initialized: ['classifier.dense.weight', 'classifier.out_proj.weight', 'classifier.out_proj.bias', 'classifier.dense.bias']
You should probably TRAIN this model on a down-stream

Epoch,Training Loss,Validation Loss,Accuracy,Runtime,Samples Per Second
1,No log,1.41793,0.25,0.0488,81.967
2,No log,1.417454,0.25,0.0638,62.68
3,No log,1.416458,0.25,0.0576,69.452
4,No log,1.414506,0.25,0.0517,77.343


domiain all n_samples_per_class=1, repeat=1, time_elapsed=119.75889301300049, {'eval_loss': 1.5145604610443115, 'eval_accuracy': 0.14634146341463414, 'eval_runtime': 11.0042, 'eval_samples_per_second': 74.517}



Some weights of the model checkpoint at allegro/herbert-klej-cased-v1 were not used when initializing RobertaForSequenceClassification: ['pooler.dense.weight', 'pooler.dense.bias']
- This IS expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of RobertaForSequenceClassification were not initialized from the model checkpoint at allegro/herbert-klej-cased-v1 and are newly initialized: ['classifier.dense.weight', 'classifier.out_proj.weight', 'classifier.out_proj.bias', 'classifier.dense.bias']
You should probably TRAIN this model on a down-stream

Epoch,Training Loss,Validation Loss,Accuracy,Runtime,Samples Per Second
1,No log,1.461183,0.0,0.0461,86.838
2,No log,1.46009,0.0,0.0476,84.105
3,No log,1.457978,0.0,0.0507,78.952
4,No log,1.454741,0.0,0.0499,80.172


domiain all n_samples_per_class=3, repeat=1, time_elapsed=121.17298316955566, {'eval_loss': 1.3671376705169678, 'eval_accuracy': 0.2926829268292683, 'eval_runtime': 11.2096, 'eval_samples_per_second': 73.152}



Some weights of the model checkpoint at allegro/herbert-klej-cased-v1 were not used when initializing RobertaForSequenceClassification: ['pooler.dense.weight', 'pooler.dense.bias']
- This IS expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of RobertaForSequenceClassification were not initialized from the model checkpoint at allegro/herbert-klej-cased-v1 and are newly initialized: ['classifier.dense.weight', 'classifier.out_proj.weight', 'classifier.out_proj.bias', 'classifier.dense.bias']
You should probably TRAIN this model on a down-stream

Epoch,Training Loss,Validation Loss,Accuracy,Runtime,Samples Per Second
1,No log,1.439666,0.25,0.0371,107.9
2,No log,1.43957,0.0,0.037,108.017
3,No log,1.439163,0.0,0.0402,99.598
4,No log,1.438475,0.0,0.0361,110.777


domiain all n_samples_per_class=5, repeat=1, time_elapsed=128.76265621185303, {'eval_loss': 1.4087063074111938, 'eval_accuracy': 0.1975609756097561, 'eval_runtime': 11.266, 'eval_samples_per_second': 72.785}



Some weights of the model checkpoint at allegro/herbert-klej-cased-v1 were not used when initializing RobertaForSequenceClassification: ['pooler.dense.weight', 'pooler.dense.bias']
- This IS expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of RobertaForSequenceClassification were not initialized from the model checkpoint at allegro/herbert-klej-cased-v1 and are newly initialized: ['classifier.dense.weight', 'classifier.out_proj.weight', 'classifier.out_proj.bias', 'classifier.dense.bias']
You should probably TRAIN this model on a down-stream

Epoch,Training Loss,Validation Loss,Accuracy,Runtime,Samples Per Second
1,No log,1.439109,0.5,0.0437,91.62
2,No log,1.432603,0.5,0.043,92.95
3,No log,1.421985,0.5,0.0409,97.839
4,No log,1.408566,0.5,0.0439,91.181


domiain all n_samples_per_class=8, repeat=1, time_elapsed=131.0605447292328, {'eval_loss': 1.3872382640838623, 'eval_accuracy': 0.23414634146341465, 'eval_runtime': 11.4945, 'eval_samples_per_second': 71.338}



Some weights of the model checkpoint at allegro/herbert-klej-cased-v1 were not used when initializing RobertaForSequenceClassification: ['pooler.dense.weight', 'pooler.dense.bias']
- This IS expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of RobertaForSequenceClassification were not initialized from the model checkpoint at allegro/herbert-klej-cased-v1 and are newly initialized: ['classifier.dense.weight', 'classifier.out_proj.weight', 'classifier.out_proj.bias', 'classifier.dense.bias']
You should probably TRAIN this model on a down-stream

Epoch,Training Loss,Validation Loss,Accuracy,Runtime,Samples Per Second
1,No log,1.388692,0.25,0.0524,76.265
2,No log,1.380318,0.25,0.0497,80.452
3,No log,1.367878,0.25,0.0496,80.704
4,1.409200,1.350596,0.25,0.0524,76.365


domiain all n_samples_per_class=10, repeat=1, time_elapsed=134.65541315078735, {'eval_loss': 1.4349567890167236, 'eval_accuracy': 0.13902439024390245, 'eval_runtime': 11.4101, 'eval_samples_per_second': 71.866}



Some weights of the model checkpoint at allegro/herbert-klej-cased-v1 were not used when initializing RobertaForSequenceClassification: ['pooler.dense.weight', 'pooler.dense.bias']
- This IS expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of RobertaForSequenceClassification were not initialized from the model checkpoint at allegro/herbert-klej-cased-v1 and are newly initialized: ['classifier.dense.weight', 'classifier.out_proj.weight', 'classifier.out_proj.bias', 'classifier.dense.bias']
You should probably TRAIN this model on a down-stream

Epoch,Training Loss,Validation Loss,Accuracy,Runtime,Samples Per Second
1,No log,1.405628,0.25,0.106,75.455
2,1.386200,1.383923,0.375,0.1033,77.443
3,1.386200,1.347862,0.25,0.823,9.721
4,1.289600,1.303191,0.25,0.0965,82.906


domiain all n_samples_per_class=20, repeat=1, time_elapsed=139.8826198577881, {'eval_loss': 1.3554974794387817, 'eval_accuracy': 0.27926829268292686, 'eval_runtime': 10.9805, 'eval_samples_per_second': 74.678}



Some weights of the model checkpoint at allegro/herbert-klej-cased-v1 were not used when initializing RobertaForSequenceClassification: ['pooler.dense.weight', 'pooler.dense.bias']
- This IS expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of RobertaForSequenceClassification were not initialized from the model checkpoint at allegro/herbert-klej-cased-v1 and are newly initialized: ['classifier.dense.weight', 'classifier.out_proj.weight', 'classifier.out_proj.bias', 'classifier.dense.bias']
You should probably TRAIN this model on a down-stream

Epoch,Training Loss,Validation Loss,Accuracy,Runtime,Samples Per Second
1,No log,1.380577,0.25,0.1392,86.228
2,1.395000,1.304811,0.416667,0.1404,85.478
3,1.314100,1.213,0.583333,0.1436,83.571
4,1.314100,1.072612,0.666667,0.1401,85.633


domiain all n_samples_per_class=30, repeat=1, time_elapsed=128.8550055027008, {'eval_loss': 1.103852391242981, 'eval_accuracy': 0.6109756097560975, 'eval_runtime': 11.1242, 'eval_samples_per_second': 73.713}



Some weights of the model checkpoint at allegro/herbert-klej-cased-v1 were not used when initializing RobertaForSequenceClassification: ['pooler.dense.weight', 'pooler.dense.bias']
- This IS expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of RobertaForSequenceClassification were not initialized from the model checkpoint at allegro/herbert-klej-cased-v1 and are newly initialized: ['classifier.dense.weight', 'classifier.out_proj.weight', 'classifier.out_proj.bias', 'classifier.dense.bias']
You should probably TRAIN this model on a down-stream

Epoch,Training Loss,Validation Loss,Accuracy,Runtime,Samples Per Second
1,1.4079,1.332958,0.5,0.3247,73.915
2,1.3188,1.073363,0.625,0.3176,75.56
3,0.9241,0.751946,0.75,0.3082,77.872
4,0.7057,0.426709,0.875,0.3184,75.382


domiain all n_samples_per_class=60, repeat=1, time_elapsed=132.65745067596436, {'eval_loss': 0.6195520162582397, 'eval_accuracy': 0.7707317073170732, 'eval_runtime': 11.132, 'eval_samples_per_second': 73.661}



Some weights of the model checkpoint at allegro/herbert-klej-cased-v1 were not used when initializing RobertaForSequenceClassification: ['pooler.dense.weight', 'pooler.dense.bias']
- This IS expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of RobertaForSequenceClassification were not initialized from the model checkpoint at allegro/herbert-klej-cased-v1 and are newly initialized: ['classifier.dense.weight', 'classifier.out_proj.weight', 'classifier.out_proj.bias', 'classifier.dense.bias']
You should probably TRAIN this model on a down-stream

Epoch,Training Loss,Validation Loss,Accuracy,Runtime,Samples Per Second
1,1.3563,1.231084,0.6,0.5448,73.422
2,1.0252,0.761877,0.6,0.5282,75.728
3,0.5871,0.575584,0.75,0.5377,74.385
4,0.2642,1.044864,0.675,0.56,71.429


domiain all n_samples_per_class=100, repeat=1, time_elapsed=138.59173893928528, {'eval_loss': 0.4746408462524414, 'eval_accuracy': 0.8256097560975609, 'eval_runtime': 10.7181, 'eval_samples_per_second': 76.506}



Some weights of the model checkpoint at allegro/herbert-klej-cased-v1 were not used when initializing RobertaForSequenceClassification: ['pooler.dense.weight', 'pooler.dense.bias']
- This IS expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of RobertaForSequenceClassification were not initialized from the model checkpoint at allegro/herbert-klej-cased-v1 and are newly initialized: ['classifier.dense.weight', 'classifier.out_proj.weight', 'classifier.out_proj.bias', 'classifier.dense.bias']
You should probably TRAIN this model on a down-stream

Epoch,Training Loss,Validation Loss,Accuracy,Runtime,Samples Per Second
1,1.0189,0.816688,0.6625,1.09,73.398
2,0.4624,0.687399,0.7875,1.0576,75.646
3,0.3632,0.595227,0.825,1.1096,72.1
4,0.1075,0.518442,0.85,1.0696,74.791


domiain all n_samples_per_class=200, repeat=1, time_elapsed=154.53856444358826, {'eval_loss': 0.5273145437240601, 'eval_accuracy': 0.8268292682926829, 'eval_runtime': 10.5671, 'eval_samples_per_second': 77.599}



Some weights of the model checkpoint at allegro/herbert-klej-cased-v1 were not used when initializing RobertaForSequenceClassification: ['pooler.dense.weight', 'pooler.dense.bias']
- This IS expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of RobertaForSequenceClassification were not initialized from the model checkpoint at allegro/herbert-klej-cased-v1 and are newly initialized: ['classifier.dense.weight', 'classifier.out_proj.weight', 'classifier.out_proj.bias', 'classifier.dense.bias']
You should probably TRAIN this model on a down-stream

Epoch,Training Loss,Validation Loss,Accuracy,Runtime,Samples Per Second
1,No log,1.406825,0.25,0.0463,86.441
2,No log,1.405892,0.25,0.07,57.117
3,No log,1.403973,0.5,0.0507,78.93
4,No log,1.400992,0.5,0.0529,75.673


domiain hotels n_samples_per_class=1, repeat=1, time_elapsed=112.04147028923035, {'eval_loss': 1.470782995223999, 'eval_accuracy': 0.13670886075949368, 'eval_runtime': 5.4621, 'eval_samples_per_second': 72.317}



Some weights of the model checkpoint at allegro/herbert-klej-cased-v1 were not used when initializing RobertaForSequenceClassification: ['pooler.dense.weight', 'pooler.dense.bias']
- This IS expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of RobertaForSequenceClassification were not initialized from the model checkpoint at allegro/herbert-klej-cased-v1 and are newly initialized: ['classifier.dense.weight', 'classifier.out_proj.weight', 'classifier.out_proj.bias', 'classifier.dense.bias']
You should probably TRAIN this model on a down-stream

Epoch,Training Loss,Validation Loss,Accuracy,Runtime,Samples Per Second
1,No log,1.405961,0.25,0.0351,113.979
2,No log,1.405231,0.25,0.0373,107.22
3,No log,1.403816,0.25,0.0387,103.42
4,No log,1.401784,0.25,0.0379,105.53


domiain hotels n_samples_per_class=3, repeat=1, time_elapsed=135.39200401306152, {'eval_loss': 1.3659207820892334, 'eval_accuracy': 0.25063291139240507, 'eval_runtime': 5.4715, 'eval_samples_per_second': 72.192}



Some weights of the model checkpoint at allegro/herbert-klej-cased-v1 were not used when initializing RobertaForSequenceClassification: ['pooler.dense.weight', 'pooler.dense.bias']
- This IS expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of RobertaForSequenceClassification were not initialized from the model checkpoint at allegro/herbert-klej-cased-v1 and are newly initialized: ['classifier.dense.weight', 'classifier.out_proj.weight', 'classifier.out_proj.bias', 'classifier.dense.bias']
You should probably TRAIN this model on a down-stream

Epoch,Training Loss,Validation Loss,Accuracy,Runtime,Samples Per Second
1,No log,1.439616,0.25,0.0466,85.886
2,No log,1.438308,0.25,0.0521,76.804
3,No log,1.435704,0.25,0.0495,80.752
4,No log,1.431837,0.25,0.0511,78.22


domiain hotels n_samples_per_class=5, repeat=1, time_elapsed=128.40781617164612, {'eval_loss': 1.4175728559494019, 'eval_accuracy': 0.1518987341772152, 'eval_runtime': 5.4439, 'eval_samples_per_second': 72.559}



Some weights of the model checkpoint at allegro/herbert-klej-cased-v1 were not used when initializing RobertaForSequenceClassification: ['pooler.dense.weight', 'pooler.dense.bias']
- This IS expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of RobertaForSequenceClassification were not initialized from the model checkpoint at allegro/herbert-klej-cased-v1 and are newly initialized: ['classifier.dense.weight', 'classifier.out_proj.weight', 'classifier.out_proj.bias', 'classifier.dense.bias']
You should probably TRAIN this model on a down-stream

Epoch,Training Loss,Validation Loss,Accuracy,Runtime,Samples Per Second
1,No log,1.428001,0.5,0.0431,92.884
2,No log,1.419727,0.5,0.0454,88.02
3,No log,1.405723,0.5,0.0467,85.731
4,No log,1.387291,0.5,0.0439,91.105


domiain hotels n_samples_per_class=8, repeat=1, time_elapsed=133.01650023460388, {'eval_loss': 1.42399263381958, 'eval_accuracy': 0.2430379746835443, 'eval_runtime': 5.4898, 'eval_samples_per_second': 71.952}



Some weights of the model checkpoint at allegro/herbert-klej-cased-v1 were not used when initializing RobertaForSequenceClassification: ['pooler.dense.weight', 'pooler.dense.bias']
- This IS expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of RobertaForSequenceClassification were not initialized from the model checkpoint at allegro/herbert-klej-cased-v1 and are newly initialized: ['classifier.dense.weight', 'classifier.out_proj.weight', 'classifier.out_proj.bias', 'classifier.dense.bias']
You should probably TRAIN this model on a down-stream

Epoch,Training Loss,Validation Loss,Accuracy,Runtime,Samples Per Second
1,No log,1.416809,0.25,0.0489,81.789
2,No log,1.408452,0.25,0.0456,87.737
3,No log,1.395365,0.25,0.0456,87.802
4,1.434200,1.378989,0.5,0.0447,89.407


domiain hotels n_samples_per_class=10, repeat=1, time_elapsed=119.30197286605835, {'eval_loss': 1.37730073928833, 'eval_accuracy': 0.34430379746835443, 'eval_runtime': 5.4748, 'eval_samples_per_second': 72.149}



Some weights of the model checkpoint at allegro/herbert-klej-cased-v1 were not used when initializing RobertaForSequenceClassification: ['pooler.dense.weight', 'pooler.dense.bias']
- This IS expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of RobertaForSequenceClassification were not initialized from the model checkpoint at allegro/herbert-klej-cased-v1 and are newly initialized: ['classifier.dense.weight', 'classifier.out_proj.weight', 'classifier.out_proj.bias', 'classifier.dense.bias']
You should probably TRAIN this model on a down-stream

Epoch,Training Loss,Validation Loss,Accuracy,Runtime,Samples Per Second
1,No log,1.393609,0.375,0.111,72.079
2,1.389900,1.361024,0.375,0.0959,83.424
3,1.389900,1.307788,0.625,0.104,76.954
4,1.285800,1.234982,0.75,0.1074,74.519


domiain hotels n_samples_per_class=20, repeat=1, time_elapsed=134.6605625152588, {'eval_loss': 1.2465801239013672, 'eval_accuracy': 0.5746835443037974, 'eval_runtime': 5.2443, 'eval_samples_per_second': 75.32}



Some weights of the model checkpoint at allegro/herbert-klej-cased-v1 were not used when initializing RobertaForSequenceClassification: ['pooler.dense.weight', 'pooler.dense.bias']
- This IS expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of RobertaForSequenceClassification were not initialized from the model checkpoint at allegro/herbert-klej-cased-v1 and are newly initialized: ['classifier.dense.weight', 'classifier.out_proj.weight', 'classifier.out_proj.bias', 'classifier.dense.bias']
You should probably TRAIN this model on a down-stream

Epoch,Training Loss,Validation Loss,Accuracy,Runtime,Samples Per Second
1,No log,1.36089,0.416667,0.1208,99.341
2,1.391900,1.281228,0.583333,0.1707,70.295
3,1.310900,1.185818,0.583333,0.167,71.872
4,1.310900,1.03946,0.666667,0.1724,69.601


domiain hotels n_samples_per_class=30, repeat=1, time_elapsed=137.80446815490723, {'eval_loss': 1.0706162452697754, 'eval_accuracy': 0.6987341772151898, 'eval_runtime': 5.2851, 'eval_samples_per_second': 74.738}



Some weights of the model checkpoint at allegro/herbert-klej-cased-v1 were not used when initializing RobertaForSequenceClassification: ['pooler.dense.weight', 'pooler.dense.bias']
- This IS expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of RobertaForSequenceClassification were not initialized from the model checkpoint at allegro/herbert-klej-cased-v1 and are newly initialized: ['classifier.dense.weight', 'classifier.out_proj.weight', 'classifier.out_proj.bias', 'classifier.dense.bias']
You should probably TRAIN this model on a down-stream

Epoch,Training Loss,Validation Loss,Accuracy,Runtime,Samples Per Second
1,1.4105,1.337584,0.416667,0.3324,72.197
2,1.3249,1.091064,0.541667,0.3156,76.039
3,0.8398,0.789219,0.708333,0.3105,77.284
4,0.6332,0.838216,0.666667,0.3335,71.971


domiain hotels n_samples_per_class=60, repeat=1, time_elapsed=137.70158910751343, {'eval_loss': 0.7749429941177368, 'eval_accuracy': 0.6658227848101266, 'eval_runtime': 5.1835, 'eval_samples_per_second': 76.203}



Some weights of the model checkpoint at allegro/herbert-klej-cased-v1 were not used when initializing RobertaForSequenceClassification: ['pooler.dense.weight', 'pooler.dense.bias']
- This IS expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of RobertaForSequenceClassification were not initialized from the model checkpoint at allegro/herbert-klej-cased-v1 and are newly initialized: ['classifier.dense.weight', 'classifier.out_proj.weight', 'classifier.out_proj.bias', 'classifier.dense.bias']
You should probably TRAIN this model on a down-stream

Epoch,Training Loss,Validation Loss,Accuracy,Runtime,Samples Per Second
1,1.3348,1.193639,0.7,0.5764,69.398
2,0.9423,0.54811,0.825,0.5396,74.13
3,0.43,0.571217,0.775,0.5374,74.433
4,0.304,0.336106,0.875,0.5767,69.36


domiain hotels n_samples_per_class=100, repeat=1, time_elapsed=138.17340326309204, {'eval_loss': 0.41386091709136963, 'eval_accuracy': 0.8126582278481013, 'eval_runtime': 5.0821, 'eval_samples_per_second': 77.724}



Some weights of the model checkpoint at allegro/herbert-klej-cased-v1 were not used when initializing RobertaForSequenceClassification: ['pooler.dense.weight', 'pooler.dense.bias']
- This IS expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of RobertaForSequenceClassification were not initialized from the model checkpoint at allegro/herbert-klej-cased-v1 and are newly initialized: ['classifier.dense.weight', 'classifier.out_proj.weight', 'classifier.out_proj.bias', 'classifier.dense.bias']
You should probably TRAIN this model on a down-stream

Epoch,Training Loss,Validation Loss,Accuracy,Runtime,Samples Per Second
1,0.9183,0.586043,0.775,1.1143,71.794
2,0.3082,0.53055,0.825,1.0546,75.859
3,0.2178,0.547824,0.8375,1.112,71.943
4,0.0556,0.651933,0.85,1.0728,74.569


domiain hotels n_samples_per_class=200, repeat=1, time_elapsed=154.7972228527069, {'eval_loss': 0.6584933400154114, 'eval_accuracy': 0.8126582278481013, 'eval_runtime': 5.0819, 'eval_samples_per_second': 77.727}



Some weights of the model checkpoint at allegro/herbert-klej-cased-v1 were not used when initializing RobertaForSequenceClassification: ['pooler.dense.weight', 'pooler.dense.bias']
- This IS expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of RobertaForSequenceClassification were not initialized from the model checkpoint at allegro/herbert-klej-cased-v1 and are newly initialized: ['classifier.dense.weight', 'classifier.out_proj.weight', 'classifier.out_proj.bias', 'classifier.dense.bias']
You should probably TRAIN this model on a down-stream

Epoch,Training Loss,Validation Loss,Accuracy,Runtime,Samples Per Second
1,No log,1.42943,0.0,0.0518,77.253
2,No log,1.428205,0.0,0.051,78.4
3,No log,1.425797,0.0,0.0727,54.987
4,No log,1.422297,0.0,0.0774,51.654


domiain medicine n_samples_per_class=1, repeat=1, time_elapsed=130.37807059288025, {'eval_loss': 1.470704436302185, 'eval_accuracy': 0.18654434250764526, 'eval_runtime': 4.4933, 'eval_samples_per_second': 72.775}



Some weights of the model checkpoint at allegro/herbert-klej-cased-v1 were not used when initializing RobertaForSequenceClassification: ['pooler.dense.weight', 'pooler.dense.bias']
- This IS expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of RobertaForSequenceClassification were not initialized from the model checkpoint at allegro/herbert-klej-cased-v1 and are newly initialized: ['classifier.dense.weight', 'classifier.out_proj.weight', 'classifier.out_proj.bias', 'classifier.dense.bias']
You should probably TRAIN this model on a down-stream

Epoch,Training Loss,Validation Loss,Accuracy,Runtime,Samples Per Second
1,No log,1.460031,0.0,0.0451,88.658
2,No log,1.459214,0.0,0.0505,79.26
3,No log,1.457566,0.0,0.0483,82.858
4,No log,1.455078,0.0,0.0506,79.106


domiain medicine n_samples_per_class=3, repeat=1, time_elapsed=122.23682403564453, {'eval_loss': 1.3705888986587524, 'eval_accuracy': 0.3547400611620795, 'eval_runtime': 4.5352, 'eval_samples_per_second': 72.103}



Some weights of the model checkpoint at allegro/herbert-klej-cased-v1 were not used when initializing RobertaForSequenceClassification: ['pooler.dense.weight', 'pooler.dense.bias']
- This IS expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of RobertaForSequenceClassification were not initialized from the model checkpoint at allegro/herbert-klej-cased-v1 and are newly initialized: ['classifier.dense.weight', 'classifier.out_proj.weight', 'classifier.out_proj.bias', 'classifier.dense.bias']
You should probably TRAIN this model on a down-stream

Epoch,Training Loss,Validation Loss,Accuracy,Runtime,Samples Per Second
1,No log,1.32385,0.25,0.0465,85.986
2,No log,1.322567,0.25,0.0503,79.576
3,No log,1.319909,0.25,0.0491,81.524
4,No log,1.315837,0.25,0.0505,79.163


domiain medicine n_samples_per_class=5, repeat=1, time_elapsed=128.80579686164856, {'eval_loss': 1.3991862535476685, 'eval_accuracy': 0.23853211009174313, 'eval_runtime': 4.5141, 'eval_samples_per_second': 72.439}



Some weights of the model checkpoint at allegro/herbert-klej-cased-v1 were not used when initializing RobertaForSequenceClassification: ['pooler.dense.weight', 'pooler.dense.bias']
- This IS expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of RobertaForSequenceClassification were not initialized from the model checkpoint at allegro/herbert-klej-cased-v1 and are newly initialized: ['classifier.dense.weight', 'classifier.out_proj.weight', 'classifier.out_proj.bias', 'classifier.dense.bias']
You should probably TRAIN this model on a down-stream

Epoch,Training Loss,Validation Loss,Accuracy,Runtime,Samples Per Second
1,No log,1.436737,0.0,0.0513,77.93
2,No log,1.429152,0.0,0.0522,76.587
3,No log,1.416781,0.0,0.0507,78.878
4,No log,1.400532,0.0,0.0539,74.23


domiain medicine n_samples_per_class=8, repeat=1, time_elapsed=131.86017179489136, {'eval_loss': 1.3175863027572632, 'eval_accuracy': 0.26299694189602446, 'eval_runtime': 4.506, 'eval_samples_per_second': 72.569}



Some weights of the model checkpoint at allegro/herbert-klej-cased-v1 were not used when initializing RobertaForSequenceClassification: ['pooler.dense.weight', 'pooler.dense.bias']
- This IS expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of RobertaForSequenceClassification were not initialized from the model checkpoint at allegro/herbert-klej-cased-v1 and are newly initialized: ['classifier.dense.weight', 'classifier.out_proj.weight', 'classifier.out_proj.bias', 'classifier.dense.bias']
You should probably TRAIN this model on a down-stream

Epoch,Training Loss,Validation Loss,Accuracy,Runtime,Samples Per Second
1,No log,1.402624,0.25,0.0419,95.454
2,No log,1.389965,0.25,0.0422,94.805
3,No log,1.36961,0.25,0.0396,100.957
4,1.427800,1.342696,0.25,0.041,97.596


domiain medicine n_samples_per_class=10, repeat=1, time_elapsed=134.00989985466003, {'eval_loss': 1.4153190851211548, 'eval_accuracy': 0.20489296636085627, 'eval_runtime': 4.4802, 'eval_samples_per_second': 72.988}



Some weights of the model checkpoint at allegro/herbert-klej-cased-v1 were not used when initializing RobertaForSequenceClassification: ['pooler.dense.weight', 'pooler.dense.bias']
- This IS expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of RobertaForSequenceClassification were not initialized from the model checkpoint at allegro/herbert-klej-cased-v1 and are newly initialized: ['classifier.dense.weight', 'classifier.out_proj.weight', 'classifier.out_proj.bias', 'classifier.dense.bias']
You should probably TRAIN this model on a down-stream

Epoch,Training Loss,Validation Loss,Accuracy,Runtime,Samples Per Second
1,No log,1.357672,0.25,0.1075,74.439
2,1.375400,1.328873,0.375,0.1014,78.861
3,1.375400,1.27991,0.75,0.0969,82.597
4,1.267900,1.21756,0.75,0.1001,79.938


domiain medicine n_samples_per_class=20, repeat=1, time_elapsed=124.19617009162903, {'eval_loss': 1.314335584640503, 'eval_accuracy': 0.4648318042813456, 'eval_runtime': 4.4898, 'eval_samples_per_second': 72.831}



Some weights of the model checkpoint at allegro/herbert-klej-cased-v1 were not used when initializing RobertaForSequenceClassification: ['pooler.dense.weight', 'pooler.dense.bias']
- This IS expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of RobertaForSequenceClassification were not initialized from the model checkpoint at allegro/herbert-klej-cased-v1 and are newly initialized: ['classifier.dense.weight', 'classifier.out_proj.weight', 'classifier.out_proj.bias', 'classifier.dense.bias']
You should probably TRAIN this model on a down-stream

Epoch,Training Loss,Validation Loss,Accuracy,Runtime,Samples Per Second
1,No log,1.378149,0.333333,0.1684,71.243
2,1.399600,1.289061,0.5,0.1623,73.946
3,1.318400,1.183137,0.666667,0.1651,72.673
4,1.318400,1.028352,0.666667,0.1716,69.929


domiain medicine n_samples_per_class=30, repeat=1, time_elapsed=135.9996771812439, {'eval_loss': 1.1922376155853271, 'eval_accuracy': 0.6972477064220184, 'eval_runtime': 4.3167, 'eval_samples_per_second': 75.752}



Some weights of the model checkpoint at allegro/herbert-klej-cased-v1 were not used when initializing RobertaForSequenceClassification: ['pooler.dense.weight', 'pooler.dense.bias']
- This IS expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of RobertaForSequenceClassification were not initialized from the model checkpoint at allegro/herbert-klej-cased-v1 and are newly initialized: ['classifier.dense.weight', 'classifier.out_proj.weight', 'classifier.out_proj.bias', 'classifier.dense.bias']
You should probably TRAIN this model on a down-stream

Epoch,Training Loss,Validation Loss,Accuracy,Runtime,Samples Per Second
1,1.4099,1.337053,0.375,0.3294,72.856
2,1.3219,1.092196,0.666667,0.3196,75.096
3,0.9454,0.786871,0.75,0.3217,74.602
4,0.7943,0.698628,0.75,0.3252,73.794


domiain medicine n_samples_per_class=60, repeat=1, time_elapsed=139.10473108291626, {'eval_loss': 0.8146778345108032, 'eval_accuracy': 0.6666666666666666, 'eval_runtime': 4.367, 'eval_samples_per_second': 74.88}



Some weights of the model checkpoint at allegro/herbert-klej-cased-v1 were not used when initializing RobertaForSequenceClassification: ['pooler.dense.weight', 'pooler.dense.bias']
- This IS expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of RobertaForSequenceClassification were not initialized from the model checkpoint at allegro/herbert-klej-cased-v1 and are newly initialized: ['classifier.dense.weight', 'classifier.out_proj.weight', 'classifier.out_proj.bias', 'classifier.dense.bias']
You should probably TRAIN this model on a down-stream

Epoch,Training Loss,Validation Loss,Accuracy,Runtime,Samples Per Second
1,1.3264,1.184859,0.6,0.5685,70.363
2,0.9871,0.662861,0.725,0.5413,73.899
3,0.5242,0.449265,0.775,0.5304,75.412
4,0.2795,0.954952,0.6,0.5563,71.907


domiain medicine n_samples_per_class=100, repeat=1, time_elapsed=138.37388706207275, {'eval_loss': 0.6134612560272217, 'eval_accuracy': 0.7339449541284404, 'eval_runtime': 4.2586, 'eval_samples_per_second': 76.785}



Some weights of the model checkpoint at allegro/herbert-klej-cased-v1 were not used when initializing RobertaForSequenceClassification: ['pooler.dense.weight', 'pooler.dense.bias']
- This IS expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of RobertaForSequenceClassification were not initialized from the model checkpoint at allegro/herbert-klej-cased-v1 and are newly initialized: ['classifier.dense.weight', 'classifier.out_proj.weight', 'classifier.out_proj.bias', 'classifier.dense.bias']
You should probably TRAIN this model on a down-stream

Epoch,Training Loss,Validation Loss,Accuracy,Runtime,Samples Per Second
1,1.037,0.724621,0.7125,1.1076,72.228
2,0.5229,0.437205,0.825,1.0536,75.931
3,0.2728,0.593906,0.825,1.1127,71.897
4,0.0639,0.426724,0.9,1.0764,74.323


domiain medicine n_samples_per_class=200, repeat=1, time_elapsed=153.32962250709534, {'eval_loss': 0.5052164793014526, 'eval_accuracy': 0.8776758409785933, 'eval_runtime': 4.1971, 'eval_samples_per_second': 77.911}



Some weights of the model checkpoint at allegro/herbert-klej-cased-v1 were not used when initializing RobertaForSequenceClassification: ['pooler.dense.weight', 'pooler.dense.bias']
- This IS expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of RobertaForSequenceClassification were not initialized from the model checkpoint at allegro/herbert-klej-cased-v1 and are newly initialized: ['classifier.dense.weight', 'classifier.out_proj.weight', 'classifier.out_proj.bias', 'classifier.dense.bias']
You should probably TRAIN this model on a down-stream

Epoch,Training Loss,Validation Loss,Accuracy,Runtime,Samples Per Second
1,No log,1.443964,0.0,0.0475,84.218
2,No log,1.443179,0.0,0.0499,80.097
3,No log,1.441593,0.0,0.0846,47.296
4,No log,1.439421,0.0,0.0521,76.758


domiain products n_samples_per_class=1, repeat=1, time_elapsed=132.22044968605042, {'eval_loss': 1.5651975870132446, 'eval_accuracy': 0.041666666666666664, 'eval_runtime': 0.6772, 'eval_samples_per_second': 70.875}



Some weights of the model checkpoint at allegro/herbert-klej-cased-v1 were not used when initializing RobertaForSequenceClassification: ['pooler.dense.weight', 'pooler.dense.bias']
- This IS expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of RobertaForSequenceClassification were not initialized from the model checkpoint at allegro/herbert-klej-cased-v1 and are newly initialized: ['classifier.dense.weight', 'classifier.out_proj.weight', 'classifier.out_proj.bias', 'classifier.dense.bias']
You should probably TRAIN this model on a down-stream

Epoch,Training Loss,Validation Loss,Accuracy,Runtime,Samples Per Second
1,No log,1.443455,0.25,0.0342,117.019
2,No log,1.441285,0.25,0.0409,97.726
3,No log,1.437124,0.25,0.0409,97.766
4,No log,1.431006,0.25,0.0392,102.025


domiain products n_samples_per_class=3, repeat=1, time_elapsed=124.70336437225342, {'eval_loss': 1.4811333417892456, 'eval_accuracy': 0.041666666666666664, 'eval_runtime': 0.6742, 'eval_samples_per_second': 71.19}



Some weights of the model checkpoint at allegro/herbert-klej-cased-v1 were not used when initializing RobertaForSequenceClassification: ['pooler.dense.weight', 'pooler.dense.bias']
- This IS expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of RobertaForSequenceClassification were not initialized from the model checkpoint at allegro/herbert-klej-cased-v1 and are newly initialized: ['classifier.dense.weight', 'classifier.out_proj.weight', 'classifier.out_proj.bias', 'classifier.dense.bias']
You should probably TRAIN this model on a down-stream

Epoch,Training Loss,Validation Loss,Accuracy,Runtime,Samples Per Second
1,No log,1.445538,0.5,0.048,83.304
2,No log,1.444605,0.25,0.0499,80.126
