# Few shot Fine Tuning on Cola Data Set - Baseline

In [1]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [2]:
%cd '/content/drive/MyDrive/LLM/llm_finetuning/notebooks'
!ls

/content/drive/MyDrive/LLM/llm_finetuning/notebooks
pbft_cola_baseline.ipynb  pre_trained_opt_with_inference.ipynb	vanilla_cola_baseline.ipynb
pbft_mnli_baseline.ipynb  results


In [3]:
curr_filename = "vanilla_cola_baseline"

In [4]:
!pip install -q transformers accelerate bitsandbytes datasets

[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m297.4/297.4 kB[0m [31m5.7 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m119.8/119.8 MB[0m [31m14.7 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m510.5/510.5 kB[0m [31m49.9 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m116.3/116.3 kB[0m [31m17.6 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m194.1/194.1 kB[0m [31m28.4 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m134.8/134.8 kB[0m [31m20.0 MB/s[0m eta [36m0:00:00[0m
[?25h

In [5]:
from datasets import load_dataset, ClassLabel
from sklearn.metrics import accuracy_score
from transformers import Trainer, TrainingArguments, AutoModelForSequenceClassification, AutoTokenizer, AdamW, AutoConfig
import numpy as np
import pandas as pd
import torch

# Set seed, load COLA dataset

torch.manual_seed(42)
if torch.cuda.is_available():
    torch.cuda.manual_seed_all(42)

in_domain_data = load_dataset("glue", "cola")


# Define model and tokenizer
tokenizer = AutoTokenizer.from_pretrained("facebook/opt-125m")

config = AutoConfig.from_pretrained("facebook/opt-125m", num_labels=2, hidden_dropout_prob=0.1, attention_probs_dropout_prob=0.1)
model = AutoModelForSequenceClassification.from_pretrained("facebook/opt-125m", config=config)

# Function to load and parse out-of-domain COLA dataset
'''
\cite: https://github.com/uds-lsv/llmft/blob/main/task_utils.py
'''
def load_cola_ood_dataset(path, label=None, cache_dir=None):
    data_files = {"validation": path}
    dataset = load_dataset("csv", data_files=data_files, sep="\t", column_names=[
                           'code', 'label', 'annotation', 'sentence'], cache_dir=cache_dir)
    dataset = dataset["validation"]

    # cola-ood comes without indices, so we add them
    indices = list(range(len(dataset)))
    dataset = dataset.add_column(name="idx", column=indices)

    subset = "cola-ood"

    if label is not None:  # filter dataset based on label
        dataset = dataset.filter(
            lambda example: example["label"] == label)
        subset = f"{subset}-{'acceptable' if label == 1 else 'unacceptable'}"

    return dataset, subset


def compute_metrics(eval_pred):
    logits, labels = eval_pred
    predictions = np.argmax(logits, axis=-1)
    return {"accuracy": accuracy_score(labels, predictions)}


def manipulate_inputs_for_cola_with_prompt(inputs):
    # Add pattern/prompts
    #inputs = tokenizer(["Yes or No?" + sentence for sentence in inputs["sentence"]], truncation=True, padding="max_length", max_length=128)
    inputs = tokenizer(inputs["sentence"], truncation=True, padding="max_length", max_length=128)

    return inputs

# augment in-domain-data
in_domain_data = in_domain_data.map(manipulate_inputs_for_cola_with_prompt, batched=True)

# augment out of domain data
eval_ood_data, _ = load_cola_ood_dataset(path='../datafiles/dev.tsv')
eval_ood_data = eval_ood_data.map(manipulate_inputs_for_cola_with_prompt, batched=True)


# Define parameters for training experiments (per reference paper)

few_shot_sample_size = [2, 16, 32, 64, 128]  # number of examples for each class
num_epochs = 40
batch_size = 32
learning_rate = 1e-5
weight_decay = 0.
warmup_ratio = 0.1
num_runs = 10
optimizer = AdamW(model.parameters(), lr=learning_rate) # AdamW optimizer



results_df = pd.DataFrame(columns=["n", "run", "in_domain_accuracy", "out_of_domain_accuracy"])

for n in few_shot_sample_size:
    for run_idx in range(num_runs):  # repeat 10 times for each n
        # re-iniialize model for each run
        model = AutoModelForSequenceClassification.from_pretrained("facebook/opt-125m", config=config)
        optimizer = AdamW(model.parameters(), lr=learning_rate)

        # Select n random examples for each class from the original data
        indices_yes = np.where(np.array(in_domain_data["train"]["label"]) == 0)[0]
        indices_no = np.where(np.array(in_domain_data["train"]["label"]) == 1)[0]
        indices_yes = np.random.choice(indices_yes, n, replace=False)
        indices_no = np.random.choice(indices_no, n, replace=False)
        indices = np.concatenate([indices_yes, indices_no])

        # Select the examples for the new training set
        train_dataset = in_domain_data["train"].select(indices)

        # Define training config
        # Total steps = (#samples/batch size) * epochs
        total_steps = (len(train_dataset) // batch_size) * num_epochs

        training_args = TrainingArguments(
            output_dir = "./results",
            overwrite_output_dir = True,
            num_train_epochs = num_epochs,
            per_device_train_batch_size = batch_size,
            learning_rate = learning_rate,
            weight_decay = weight_decay,
            save_steps = 10_000,
            save_total_limit = 2,
            warmup_steps = int(warmup_ratio * total_steps),
        )

        # Define the trainer
        trainer = Trainer(
            model = model,
            args=training_args,
            train_dataset = train_dataset,
            compute_metrics = compute_metrics,
            optimizers=(optimizer, None),
        )

        # Train the model
        trainer.train()

        # Evaluate in-domain performance
        print(f"Evaluating in-domain performance for n={n}...")
        eval_results = trainer.evaluate(eval_dataset=in_domain_data["validation"])

        # Store the in-domain accuracy
        in_domain_accuracy = eval_results["eval_accuracy"]

        # Print the in-domain evaluation results
        for key, value in eval_results.items():
            print(f"In-domain {key}: {value}")

        # Evaluate out-of-domain performance
        print(f"Evaluating out-of-domain performance for n={n}...")
        eval_results = trainer.evaluate(eval_dataset=eval_ood_data)

        # Store the out-of-domain accuracy
        out_of_domain_accuracy = eval_results["eval_accuracy"]

        # Print the out-of-domain evaluation results
        for key, value in eval_results.items():
            print(f"Out-of-domain {key}: {value}")

        # Add the results to the DataFrame
        new_row = pd.DataFrame({
            "n": [n],
            "run": [run_idx],
            "in_domain_accuracy": [in_domain_accuracy],
            "out_of_domain_accuracy": [out_of_domain_accuracy]
        })
        results_df = pd.concat([results_df, new_row], ignore_index=True)



The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


Downloading readme:   0%|          | 0.00/35.3k [00:00<?, ?B/s]

Downloading data:   0%|          | 0.00/251k [00:00<?, ?B/s]

Downloading data:   0%|          | 0.00/37.6k [00:00<?, ?B/s]

Downloading data:   0%|          | 0.00/37.7k [00:00<?, ?B/s]

Generating train split:   0%|          | 0/8551 [00:00<?, ? examples/s]

Generating validation split:   0%|          | 0/1043 [00:00<?, ? examples/s]

Generating test split:   0%|          | 0/1063 [00:00<?, ? examples/s]

tokenizer_config.json:   0%|          | 0.00/685 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/651 [00:00<?, ?B/s]

vocab.json:   0%|          | 0.00/899k [00:00<?, ?B/s]

merges.txt:   0%|          | 0.00/456k [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/441 [00:00<?, ?B/s]

pytorch_model.bin:   0%|          | 0.00/251M [00:00<?, ?B/s]

Some weights of OPTForSequenceClassification were not initialized from the model checkpoint at facebook/opt-125m and are newly initialized: ['score.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Map:   0%|          | 0/8551 [00:00<?, ? examples/s]

Map:   0%|          | 0/1043 [00:00<?, ? examples/s]

Map:   0%|          | 0/1063 [00:00<?, ? examples/s]

Generating validation split: 0 examples [00:00, ? examples/s]

Map:   0%|          | 0/516 [00:00<?, ? examples/s]

Some weights of OPTForSequenceClassification were not initialized from the model checkpoint at facebook/opt-125m and are newly initialized: ['score.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
dataloader_config = DataLoaderConfiguration(dispatch_batches=None, split_batches=False, even_batches=True, use_seedable_sampler=True)


Step,Training Loss


Evaluating in-domain performance for n=2...


In-domain eval_loss: 0.782650351524353
In-domain eval_accuracy: 0.673058485139022
In-domain eval_runtime: 6.393
In-domain eval_samples_per_second: 163.148
In-domain eval_steps_per_second: 20.491
In-domain epoch: 40.0
Evaluating out-of-domain performance for n=2...


Some weights of OPTForSequenceClassification were not initialized from the model checkpoint at facebook/opt-125m and are newly initialized: ['score.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Out-of-domain eval_loss: 0.7729288339614868
Out-of-domain eval_accuracy: 0.6802325581395349
Out-of-domain eval_runtime: 3.242
Out-of-domain eval_samples_per_second: 159.16
Out-of-domain eval_steps_per_second: 20.049
Out-of-domain epoch: 40.0


dataloader_config = DataLoaderConfiguration(dispatch_batches=None, split_batches=False, even_batches=True, use_seedable_sampler=True)


Step,Training Loss


Evaluating in-domain performance for n=2...


In-domain eval_loss: 0.8257512450218201
In-domain eval_accuracy: 0.5225311601150527
In-domain eval_runtime: 6.5112
In-domain eval_samples_per_second: 160.186
In-domain eval_steps_per_second: 20.119
In-domain epoch: 40.0
Evaluating out-of-domain performance for n=2...


Some weights of OPTForSequenceClassification were not initialized from the model checkpoint at facebook/opt-125m and are newly initialized: ['score.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Out-of-domain eval_loss: 0.8057412505149841
Out-of-domain eval_accuracy: 0.5465116279069767
Out-of-domain eval_runtime: 3.2802
Out-of-domain eval_samples_per_second: 157.307
Out-of-domain eval_steps_per_second: 19.816
Out-of-domain epoch: 40.0


dataloader_config = DataLoaderConfiguration(dispatch_batches=None, split_batches=False, even_batches=True, use_seedable_sampler=True)


Step,Training Loss


Evaluating in-domain performance for n=2...


In-domain eval_loss: 0.8257512450218201
In-domain eval_accuracy: 0.5225311601150527
In-domain eval_runtime: 6.6435
In-domain eval_samples_per_second: 156.996
In-domain eval_steps_per_second: 19.719
In-domain epoch: 40.0
Evaluating out-of-domain performance for n=2...


Some weights of OPTForSequenceClassification were not initialized from the model checkpoint at facebook/opt-125m and are newly initialized: ['score.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Out-of-domain eval_loss: 0.8057412505149841
Out-of-domain eval_accuracy: 0.5465116279069767
Out-of-domain eval_runtime: 3.3532
Out-of-domain eval_samples_per_second: 153.882
Out-of-domain eval_steps_per_second: 19.384
Out-of-domain epoch: 40.0


dataloader_config = DataLoaderConfiguration(dispatch_batches=None, split_batches=False, even_batches=True, use_seedable_sampler=True)


Step,Training Loss


Evaluating in-domain performance for n=2...


In-domain eval_loss: 0.8257512450218201
In-domain eval_accuracy: 0.5225311601150527
In-domain eval_runtime: 6.7996
In-domain eval_samples_per_second: 153.392
In-domain eval_steps_per_second: 19.266
In-domain epoch: 40.0
Evaluating out-of-domain performance for n=2...


Some weights of OPTForSequenceClassification were not initialized from the model checkpoint at facebook/opt-125m and are newly initialized: ['score.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Out-of-domain eval_loss: 0.8057412505149841
Out-of-domain eval_accuracy: 0.5465116279069767
Out-of-domain eval_runtime: 3.4313
Out-of-domain eval_samples_per_second: 150.38
Out-of-domain eval_steps_per_second: 18.943
Out-of-domain epoch: 40.0


dataloader_config = DataLoaderConfiguration(dispatch_batches=None, split_batches=False, even_batches=True, use_seedable_sampler=True)


Step,Training Loss


Evaluating in-domain performance for n=2...


In-domain eval_loss: 0.8257512450218201
In-domain eval_accuracy: 0.5225311601150527
In-domain eval_runtime: 6.9626
In-domain eval_samples_per_second: 149.801
In-domain eval_steps_per_second: 18.815
In-domain epoch: 40.0
Evaluating out-of-domain performance for n=2...


Some weights of OPTForSequenceClassification were not initialized from the model checkpoint at facebook/opt-125m and are newly initialized: ['score.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Out-of-domain eval_loss: 0.8057412505149841
Out-of-domain eval_accuracy: 0.5465116279069767
Out-of-domain eval_runtime: 3.5244
Out-of-domain eval_samples_per_second: 146.409
Out-of-domain eval_steps_per_second: 18.443
Out-of-domain epoch: 40.0


dataloader_config = DataLoaderConfiguration(dispatch_batches=None, split_batches=False, even_batches=True, use_seedable_sampler=True)


Step,Training Loss


Evaluating in-domain performance for n=2...


In-domain eval_loss: 0.8257512450218201
In-domain eval_accuracy: 0.5225311601150527
In-domain eval_runtime: 7.1234
In-domain eval_samples_per_second: 146.42
In-domain eval_steps_per_second: 18.39
In-domain epoch: 40.0
Evaluating out-of-domain performance for n=2...


Some weights of OPTForSequenceClassification were not initialized from the model checkpoint at facebook/opt-125m and are newly initialized: ['score.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Out-of-domain eval_loss: 0.8057412505149841
Out-of-domain eval_accuracy: 0.5465116279069767
Out-of-domain eval_runtime: 3.5963
Out-of-domain eval_samples_per_second: 143.482
Out-of-domain eval_steps_per_second: 18.074
Out-of-domain epoch: 40.0


dataloader_config = DataLoaderConfiguration(dispatch_batches=None, split_batches=False, even_batches=True, use_seedable_sampler=True)


Step,Training Loss


Evaluating in-domain performance for n=2...


In-domain eval_loss: 0.8257512450218201
In-domain eval_accuracy: 0.5225311601150527
In-domain eval_runtime: 7.2969
In-domain eval_samples_per_second: 142.938
In-domain eval_steps_per_second: 17.953
In-domain epoch: 40.0
Evaluating out-of-domain performance for n=2...


Some weights of OPTForSequenceClassification were not initialized from the model checkpoint at facebook/opt-125m and are newly initialized: ['score.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Out-of-domain eval_loss: 0.8057412505149841
Out-of-domain eval_accuracy: 0.5465116279069767
Out-of-domain eval_runtime: 3.6631
Out-of-domain eval_samples_per_second: 140.865
Out-of-domain eval_steps_per_second: 17.745
Out-of-domain epoch: 40.0


dataloader_config = DataLoaderConfiguration(dispatch_batches=None, split_batches=False, even_batches=True, use_seedable_sampler=True)


Step,Training Loss


Evaluating in-domain performance for n=2...


In-domain eval_loss: 0.8257512450218201
In-domain eval_accuracy: 0.5225311601150527
In-domain eval_runtime: 7.2103
In-domain eval_samples_per_second: 144.655
In-domain eval_steps_per_second: 18.169
In-domain epoch: 40.0
Evaluating out-of-domain performance for n=2...


Some weights of OPTForSequenceClassification were not initialized from the model checkpoint at facebook/opt-125m and are newly initialized: ['score.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Out-of-domain eval_loss: 0.8057412505149841
Out-of-domain eval_accuracy: 0.5465116279069767
Out-of-domain eval_runtime: 3.6042
Out-of-domain eval_samples_per_second: 143.166
Out-of-domain eval_steps_per_second: 18.034
Out-of-domain epoch: 40.0


dataloader_config = DataLoaderConfiguration(dispatch_batches=None, split_batches=False, even_batches=True, use_seedable_sampler=True)


Step,Training Loss


Evaluating in-domain performance for n=2...


In-domain eval_loss: 0.8257512450218201
In-domain eval_accuracy: 0.5225311601150527
In-domain eval_runtime: 7.1006
In-domain eval_samples_per_second: 146.889
In-domain eval_steps_per_second: 18.449
In-domain epoch: 40.0
Evaluating out-of-domain performance for n=2...


Some weights of OPTForSequenceClassification were not initialized from the model checkpoint at facebook/opt-125m and are newly initialized: ['score.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Out-of-domain eval_loss: 0.8057412505149841
Out-of-domain eval_accuracy: 0.5465116279069767
Out-of-domain eval_runtime: 3.5662
Out-of-domain eval_samples_per_second: 144.692
Out-of-domain eval_steps_per_second: 18.227
Out-of-domain epoch: 40.0


dataloader_config = DataLoaderConfiguration(dispatch_batches=None, split_batches=False, even_batches=True, use_seedable_sampler=True)


Step,Training Loss


Evaluating in-domain performance for n=2...


In-domain eval_loss: 0.8257512450218201
In-domain eval_accuracy: 0.5225311601150527
In-domain eval_runtime: 7.0821
In-domain eval_samples_per_second: 147.273
In-domain eval_steps_per_second: 18.497
In-domain epoch: 40.0
Evaluating out-of-domain performance for n=2...


Some weights of OPTForSequenceClassification were not initialized from the model checkpoint at facebook/opt-125m and are newly initialized: ['score.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Out-of-domain eval_loss: 0.8057412505149841
Out-of-domain eval_accuracy: 0.5465116279069767
Out-of-domain eval_runtime: 3.5573
Out-of-domain eval_samples_per_second: 145.054
Out-of-domain eval_steps_per_second: 18.272
Out-of-domain epoch: 40.0


dataloader_config = DataLoaderConfiguration(dispatch_batches=None, split_batches=False, even_batches=True, use_seedable_sampler=True)


Step,Training Loss


Evaluating in-domain performance for n=16...


In-domain eval_loss: 0.8661156892776489
In-domain eval_accuracy: 0.5273250239693192
In-domain eval_runtime: 7.1666
In-domain eval_samples_per_second: 145.535
In-domain eval_steps_per_second: 18.279
In-domain epoch: 40.0
Evaluating out-of-domain performance for n=16...


Some weights of OPTForSequenceClassification were not initialized from the model checkpoint at facebook/opt-125m and are newly initialized: ['score.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Out-of-domain eval_loss: 0.8563407063484192
Out-of-domain eval_accuracy: 0.5310077519379846
Out-of-domain eval_runtime: 3.6056
Out-of-domain eval_samples_per_second: 143.112
Out-of-domain eval_steps_per_second: 18.028
Out-of-domain epoch: 40.0


dataloader_config = DataLoaderConfiguration(dispatch_batches=None, split_batches=False, even_batches=True, use_seedable_sampler=True)


Step,Training Loss


Evaluating in-domain performance for n=16...


In-domain eval_loss: 0.8661156892776489
In-domain eval_accuracy: 0.5273250239693192
In-domain eval_runtime: 7.1622
In-domain eval_samples_per_second: 145.625
In-domain eval_steps_per_second: 18.29
In-domain epoch: 40.0
Evaluating out-of-domain performance for n=16...


Some weights of OPTForSequenceClassification were not initialized from the model checkpoint at facebook/opt-125m and are newly initialized: ['score.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Out-of-domain eval_loss: 0.8563407063484192
Out-of-domain eval_accuracy: 0.5310077519379846
Out-of-domain eval_runtime: 3.5983
Out-of-domain eval_samples_per_second: 143.401
Out-of-domain eval_steps_per_second: 18.064
Out-of-domain epoch: 40.0


dataloader_config = DataLoaderConfiguration(dispatch_batches=None, split_batches=False, even_batches=True, use_seedable_sampler=True)


Step,Training Loss


Evaluating in-domain performance for n=16...


In-domain eval_loss: 0.8661156892776489
In-domain eval_accuracy: 0.5273250239693192
In-domain eval_runtime: 7.1681
In-domain eval_samples_per_second: 145.506
In-domain eval_steps_per_second: 18.275
In-domain epoch: 40.0
Evaluating out-of-domain performance for n=16...


Some weights of OPTForSequenceClassification were not initialized from the model checkpoint at facebook/opt-125m and are newly initialized: ['score.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Out-of-domain eval_loss: 0.8563407063484192
Out-of-domain eval_accuracy: 0.5310077519379846
Out-of-domain eval_runtime: 3.6062
Out-of-domain eval_samples_per_second: 143.085
Out-of-domain eval_steps_per_second: 18.024
Out-of-domain epoch: 40.0


dataloader_config = DataLoaderConfiguration(dispatch_batches=None, split_batches=False, even_batches=True, use_seedable_sampler=True)


Step,Training Loss


Evaluating in-domain performance for n=16...


In-domain eval_loss: 0.8661156892776489
In-domain eval_accuracy: 0.5273250239693192
In-domain eval_runtime: 7.1809
In-domain eval_samples_per_second: 145.246
In-domain eval_steps_per_second: 18.243
In-domain epoch: 40.0
Evaluating out-of-domain performance for n=16...


Some weights of OPTForSequenceClassification were not initialized from the model checkpoint at facebook/opt-125m and are newly initialized: ['score.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Out-of-domain eval_loss: 0.8563407063484192
Out-of-domain eval_accuracy: 0.5310077519379846
Out-of-domain eval_runtime: 3.6188
Out-of-domain eval_samples_per_second: 142.588
Out-of-domain eval_steps_per_second: 17.962
Out-of-domain epoch: 40.0


dataloader_config = DataLoaderConfiguration(dispatch_batches=None, split_batches=False, even_batches=True, use_seedable_sampler=True)


Step,Training Loss


Evaluating in-domain performance for n=16...


In-domain eval_loss: 0.8661156892776489
In-domain eval_accuracy: 0.5273250239693192
In-domain eval_runtime: 7.1973
In-domain eval_samples_per_second: 144.916
In-domain eval_steps_per_second: 18.201
In-domain epoch: 40.0
Evaluating out-of-domain performance for n=16...


Some weights of OPTForSequenceClassification were not initialized from the model checkpoint at facebook/opt-125m and are newly initialized: ['score.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Out-of-domain eval_loss: 0.8563407063484192
Out-of-domain eval_accuracy: 0.5310077519379846
Out-of-domain eval_runtime: 3.6178
Out-of-domain eval_samples_per_second: 142.63
Out-of-domain eval_steps_per_second: 17.967
Out-of-domain epoch: 40.0


dataloader_config = DataLoaderConfiguration(dispatch_batches=None, split_batches=False, even_batches=True, use_seedable_sampler=True)


Step,Training Loss


Evaluating in-domain performance for n=16...


In-domain eval_loss: 0.8661156892776489
In-domain eval_accuracy: 0.5273250239693192
In-domain eval_runtime: 7.1537
In-domain eval_samples_per_second: 145.799
In-domain eval_steps_per_second: 18.312
In-domain epoch: 40.0
Evaluating out-of-domain performance for n=16...


Some weights of OPTForSequenceClassification were not initialized from the model checkpoint at facebook/opt-125m and are newly initialized: ['score.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Out-of-domain eval_loss: 0.8563407063484192
Out-of-domain eval_accuracy: 0.5310077519379846
Out-of-domain eval_runtime: 3.6098
Out-of-domain eval_samples_per_second: 142.944
Out-of-domain eval_steps_per_second: 18.006
Out-of-domain epoch: 40.0


dataloader_config = DataLoaderConfiguration(dispatch_batches=None, split_batches=False, even_batches=True, use_seedable_sampler=True)


Step,Training Loss


Evaluating in-domain performance for n=16...


In-domain eval_loss: 0.8661156892776489
In-domain eval_accuracy: 0.5273250239693192
In-domain eval_runtime: 7.1321
In-domain eval_samples_per_second: 146.24
In-domain eval_steps_per_second: 18.368
In-domain epoch: 40.0
Evaluating out-of-domain performance for n=16...


Some weights of OPTForSequenceClassification were not initialized from the model checkpoint at facebook/opt-125m and are newly initialized: ['score.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Out-of-domain eval_loss: 0.8563407063484192
Out-of-domain eval_accuracy: 0.5310077519379846
Out-of-domain eval_runtime: 3.5964
Out-of-domain eval_samples_per_second: 143.475
Out-of-domain eval_steps_per_second: 18.073
Out-of-domain epoch: 40.0


dataloader_config = DataLoaderConfiguration(dispatch_batches=None, split_batches=False, even_batches=True, use_seedable_sampler=True)


Step,Training Loss


Evaluating in-domain performance for n=16...


In-domain eval_loss: 0.8661156892776489
In-domain eval_accuracy: 0.5273250239693192
In-domain eval_runtime: 7.1395
In-domain eval_samples_per_second: 146.089
In-domain eval_steps_per_second: 18.349
In-domain epoch: 40.0
Evaluating out-of-domain performance for n=16...


Some weights of OPTForSequenceClassification were not initialized from the model checkpoint at facebook/opt-125m and are newly initialized: ['score.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Out-of-domain eval_loss: 0.8563407063484192
Out-of-domain eval_accuracy: 0.5310077519379846
Out-of-domain eval_runtime: 3.5804
Out-of-domain eval_samples_per_second: 144.118
Out-of-domain eval_steps_per_second: 18.154
Out-of-domain epoch: 40.0


dataloader_config = DataLoaderConfiguration(dispatch_batches=None, split_batches=False, even_batches=True, use_seedable_sampler=True)


Step,Training Loss


Evaluating in-domain performance for n=16...


In-domain eval_loss: 0.8661156892776489
In-domain eval_accuracy: 0.5273250239693192
In-domain eval_runtime: 7.1446
In-domain eval_samples_per_second: 145.985
In-domain eval_steps_per_second: 18.336
In-domain epoch: 40.0
Evaluating out-of-domain performance for n=16...


Some weights of OPTForSequenceClassification were not initialized from the model checkpoint at facebook/opt-125m and are newly initialized: ['score.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Out-of-domain eval_loss: 0.8563407063484192
Out-of-domain eval_accuracy: 0.5310077519379846
Out-of-domain eval_runtime: 3.5916
Out-of-domain eval_samples_per_second: 143.67
Out-of-domain eval_steps_per_second: 18.098
Out-of-domain epoch: 40.0


dataloader_config = DataLoaderConfiguration(dispatch_batches=None, split_batches=False, even_batches=True, use_seedable_sampler=True)


Step,Training Loss


Evaluating in-domain performance for n=16...


In-domain eval_loss: 0.8661156892776489
In-domain eval_accuracy: 0.5273250239693192
In-domain eval_runtime: 7.1741
In-domain eval_samples_per_second: 145.384
In-domain eval_steps_per_second: 18.26
In-domain epoch: 40.0
Evaluating out-of-domain performance for n=16...


Some weights of OPTForSequenceClassification were not initialized from the model checkpoint at facebook/opt-125m and are newly initialized: ['score.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Out-of-domain eval_loss: 0.8563407063484192
Out-of-domain eval_accuracy: 0.5310077519379846
Out-of-domain eval_runtime: 3.6065
Out-of-domain eval_samples_per_second: 143.073
Out-of-domain eval_steps_per_second: 18.023
Out-of-domain epoch: 40.0


dataloader_config = DataLoaderConfiguration(dispatch_batches=None, split_batches=False, even_batches=True, use_seedable_sampler=True)


Step,Training Loss


Evaluating in-domain performance for n=32...


In-domain eval_loss: 1.0942018032073975
In-domain eval_accuracy: 0.573346116970278
In-domain eval_runtime: 7.1576
In-domain eval_samples_per_second: 145.718
In-domain eval_steps_per_second: 18.302
In-domain epoch: 40.0
Evaluating out-of-domain performance for n=32...


Some weights of OPTForSequenceClassification were not initialized from the model checkpoint at facebook/opt-125m and are newly initialized: ['score.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Out-of-domain eval_loss: 1.0560842752456665
Out-of-domain eval_accuracy: 0.5852713178294574
Out-of-domain eval_runtime: 3.5776
Out-of-domain eval_samples_per_second: 144.23
Out-of-domain eval_steps_per_second: 18.169
Out-of-domain epoch: 40.0


dataloader_config = DataLoaderConfiguration(dispatch_batches=None, split_batches=False, even_batches=True, use_seedable_sampler=True)


Step,Training Loss


Evaluating in-domain performance for n=32...


In-domain eval_loss: 0.9714317917823792
In-domain eval_accuracy: 0.5618408437200384
In-domain eval_runtime: 7.2083
In-domain eval_samples_per_second: 144.694
In-domain eval_steps_per_second: 18.173
In-domain epoch: 40.0
Evaluating out-of-domain performance for n=32...


Some weights of OPTForSequenceClassification were not initialized from the model checkpoint at facebook/opt-125m and are newly initialized: ['score.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Out-of-domain eval_loss: 0.9183535575866699
Out-of-domain eval_accuracy: 0.5484496124031008
Out-of-domain eval_runtime: 3.6141
Out-of-domain eval_samples_per_second: 142.775
Out-of-domain eval_steps_per_second: 17.985
Out-of-domain epoch: 40.0


dataloader_config = DataLoaderConfiguration(dispatch_batches=None, split_batches=False, even_batches=True, use_seedable_sampler=True)


Step,Training Loss


Evaluating in-domain performance for n=32...


In-domain eval_loss: 0.9714317917823792
In-domain eval_accuracy: 0.5618408437200384
In-domain eval_runtime: 7.1949
In-domain eval_samples_per_second: 144.964
In-domain eval_steps_per_second: 18.207
In-domain epoch: 40.0
Evaluating out-of-domain performance for n=32...


Some weights of OPTForSequenceClassification were not initialized from the model checkpoint at facebook/opt-125m and are newly initialized: ['score.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Out-of-domain eval_loss: 0.9183535575866699
Out-of-domain eval_accuracy: 0.5484496124031008
Out-of-domain eval_runtime: 3.616
Out-of-domain eval_samples_per_second: 142.7
Out-of-domain eval_steps_per_second: 17.976
Out-of-domain epoch: 40.0


dataloader_config = DataLoaderConfiguration(dispatch_batches=None, split_batches=False, even_batches=True, use_seedable_sampler=True)


Step,Training Loss


Evaluating in-domain performance for n=32...


In-domain eval_loss: 0.9714317917823792
In-domain eval_accuracy: 0.5618408437200384
In-domain eval_runtime: 7.1948
In-domain eval_samples_per_second: 144.966
In-domain eval_steps_per_second: 18.208
In-domain epoch: 40.0
Evaluating out-of-domain performance for n=32...


Some weights of OPTForSequenceClassification were not initialized from the model checkpoint at facebook/opt-125m and are newly initialized: ['score.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Out-of-domain eval_loss: 0.9183535575866699
Out-of-domain eval_accuracy: 0.5484496124031008
Out-of-domain eval_runtime: 3.6009
Out-of-domain eval_samples_per_second: 143.299
Out-of-domain eval_steps_per_second: 18.051
Out-of-domain epoch: 40.0


dataloader_config = DataLoaderConfiguration(dispatch_batches=None, split_batches=False, even_batches=True, use_seedable_sampler=True)


Step,Training Loss


Evaluating in-domain performance for n=32...


In-domain eval_loss: 0.9714317917823792
In-domain eval_accuracy: 0.5618408437200384
In-domain eval_runtime: 7.1357
In-domain eval_samples_per_second: 146.167
In-domain eval_steps_per_second: 18.358
In-domain epoch: 40.0
Evaluating out-of-domain performance for n=32...


Some weights of OPTForSequenceClassification were not initialized from the model checkpoint at facebook/opt-125m and are newly initialized: ['score.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Out-of-domain eval_loss: 0.9183535575866699
Out-of-domain eval_accuracy: 0.5484496124031008
Out-of-domain eval_runtime: 3.5834
Out-of-domain eval_samples_per_second: 143.999
Out-of-domain eval_steps_per_second: 18.139
Out-of-domain epoch: 40.0


dataloader_config = DataLoaderConfiguration(dispatch_batches=None, split_batches=False, even_batches=True, use_seedable_sampler=True)


Step,Training Loss


Evaluating in-domain performance for n=32...


In-domain eval_loss: 0.9714317917823792
In-domain eval_accuracy: 0.5618408437200384
In-domain eval_runtime: 7.1634
In-domain eval_samples_per_second: 145.601
In-domain eval_steps_per_second: 18.287
In-domain epoch: 40.0
Evaluating out-of-domain performance for n=32...


Some weights of OPTForSequenceClassification were not initialized from the model checkpoint at facebook/opt-125m and are newly initialized: ['score.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Out-of-domain eval_loss: 0.9183535575866699
Out-of-domain eval_accuracy: 0.5484496124031008
Out-of-domain eval_runtime: 3.5948
Out-of-domain eval_samples_per_second: 143.541
Out-of-domain eval_steps_per_second: 18.082
Out-of-domain epoch: 40.0


dataloader_config = DataLoaderConfiguration(dispatch_batches=None, split_batches=False, even_batches=True, use_seedable_sampler=True)


Step,Training Loss


Evaluating in-domain performance for n=32...


In-domain eval_loss: 0.9714317917823792
In-domain eval_accuracy: 0.5618408437200384
In-domain eval_runtime: 7.1339
In-domain eval_samples_per_second: 146.203
In-domain eval_steps_per_second: 18.363
In-domain epoch: 40.0
Evaluating out-of-domain performance for n=32...


Some weights of OPTForSequenceClassification were not initialized from the model checkpoint at facebook/opt-125m and are newly initialized: ['score.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Out-of-domain eval_loss: 0.9183535575866699
Out-of-domain eval_accuracy: 0.5484496124031008
Out-of-domain eval_runtime: 3.5893
Out-of-domain eval_samples_per_second: 143.76
Out-of-domain eval_steps_per_second: 18.109
Out-of-domain epoch: 40.0


dataloader_config = DataLoaderConfiguration(dispatch_batches=None, split_batches=False, even_batches=True, use_seedable_sampler=True)


Step,Training Loss


Evaluating in-domain performance for n=32...


In-domain eval_loss: 0.9714317917823792
In-domain eval_accuracy: 0.5618408437200384
In-domain eval_runtime: 7.1611
In-domain eval_samples_per_second: 145.648
In-domain eval_steps_per_second: 18.293
In-domain epoch: 40.0
Evaluating out-of-domain performance for n=32...


Some weights of OPTForSequenceClassification were not initialized from the model checkpoint at facebook/opt-125m and are newly initialized: ['score.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Out-of-domain eval_loss: 0.9183535575866699
Out-of-domain eval_accuracy: 0.5484496124031008
Out-of-domain eval_runtime: 3.5982
Out-of-domain eval_samples_per_second: 143.406
Out-of-domain eval_steps_per_second: 18.065
Out-of-domain epoch: 40.0


dataloader_config = DataLoaderConfiguration(dispatch_batches=None, split_batches=False, even_batches=True, use_seedable_sampler=True)


Step,Training Loss


Evaluating in-domain performance for n=32...


In-domain eval_loss: 0.9714317917823792
In-domain eval_accuracy: 0.5618408437200384
In-domain eval_runtime: 7.1869
In-domain eval_samples_per_second: 145.126
In-domain eval_steps_per_second: 18.228
In-domain epoch: 40.0
Evaluating out-of-domain performance for n=32...


Some weights of OPTForSequenceClassification were not initialized from the model checkpoint at facebook/opt-125m and are newly initialized: ['score.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Out-of-domain eval_loss: 0.9183535575866699
Out-of-domain eval_accuracy: 0.5484496124031008
Out-of-domain eval_runtime: 3.5921
Out-of-domain eval_samples_per_second: 143.647
Out-of-domain eval_steps_per_second: 18.095
Out-of-domain epoch: 40.0


dataloader_config = DataLoaderConfiguration(dispatch_batches=None, split_batches=False, even_batches=True, use_seedable_sampler=True)


Step,Training Loss


Evaluating in-domain performance for n=32...


In-domain eval_loss: 0.9714317917823792
In-domain eval_accuracy: 0.5618408437200384
In-domain eval_runtime: 7.1692
In-domain eval_samples_per_second: 145.484
In-domain eval_steps_per_second: 18.273
In-domain epoch: 40.0
Evaluating out-of-domain performance for n=32...


Some weights of OPTForSequenceClassification were not initialized from the model checkpoint at facebook/opt-125m and are newly initialized: ['score.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Out-of-domain eval_loss: 0.9183535575866699
Out-of-domain eval_accuracy: 0.5484496124031008
Out-of-domain eval_runtime: 3.5852
Out-of-domain eval_samples_per_second: 143.927
Out-of-domain eval_steps_per_second: 18.13
Out-of-domain epoch: 40.0


dataloader_config = DataLoaderConfiguration(dispatch_batches=None, split_batches=False, even_batches=True, use_seedable_sampler=True)


Step,Training Loss


Evaluating in-domain performance for n=64...


In-domain eval_loss: 1.4793576002120972
In-domain eval_accuracy: 0.5944391179290508
In-domain eval_runtime: 7.1539
In-domain eval_samples_per_second: 145.795
In-domain eval_steps_per_second: 18.312
In-domain epoch: 40.0
Evaluating out-of-domain performance for n=64...


Some weights of OPTForSequenceClassification were not initialized from the model checkpoint at facebook/opt-125m and are newly initialized: ['score.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Out-of-domain eval_loss: 1.3956272602081299
Out-of-domain eval_accuracy: 0.5910852713178295
Out-of-domain eval_runtime: 3.5925
Out-of-domain eval_samples_per_second: 143.633
Out-of-domain eval_steps_per_second: 18.093
Out-of-domain epoch: 40.0


dataloader_config = DataLoaderConfiguration(dispatch_batches=None, split_batches=False, even_batches=True, use_seedable_sampler=True)


Step,Training Loss


Evaluating in-domain performance for n=64...


In-domain eval_loss: 1.94387686252594
In-domain eval_accuracy: 0.588686481303931
In-domain eval_runtime: 7.1634
In-domain eval_samples_per_second: 145.602
In-domain eval_steps_per_second: 18.288
In-domain epoch: 40.0
Evaluating out-of-domain performance for n=64...


Some weights of OPTForSequenceClassification were not initialized from the model checkpoint at facebook/opt-125m and are newly initialized: ['score.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Out-of-domain eval_loss: 1.8603342771530151
Out-of-domain eval_accuracy: 0.5717054263565892
Out-of-domain eval_runtime: 3.5838
Out-of-domain eval_samples_per_second: 143.982
Out-of-domain eval_steps_per_second: 18.137
Out-of-domain epoch: 40.0


dataloader_config = DataLoaderConfiguration(dispatch_batches=None, split_batches=False, even_batches=True, use_seedable_sampler=True)


Step,Training Loss


Evaluating in-domain performance for n=64...


In-domain eval_loss: 1.94387686252594
In-domain eval_accuracy: 0.588686481303931
In-domain eval_runtime: 7.1533
In-domain eval_samples_per_second: 145.806
In-domain eval_steps_per_second: 18.313
In-domain epoch: 40.0
Evaluating out-of-domain performance for n=64...


Some weights of OPTForSequenceClassification were not initialized from the model checkpoint at facebook/opt-125m and are newly initialized: ['score.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Out-of-domain eval_loss: 1.8603342771530151
Out-of-domain eval_accuracy: 0.5717054263565892
Out-of-domain eval_runtime: 3.595
Out-of-domain eval_samples_per_second: 143.532
Out-of-domain eval_steps_per_second: 18.081
Out-of-domain epoch: 40.0


dataloader_config = DataLoaderConfiguration(dispatch_batches=None, split_batches=False, even_batches=True, use_seedable_sampler=True)


Step,Training Loss


Evaluating in-domain performance for n=64...


In-domain eval_loss: 1.94387686252594
In-domain eval_accuracy: 0.588686481303931
In-domain eval_runtime: 7.1532
In-domain eval_samples_per_second: 145.808
In-domain eval_steps_per_second: 18.313
In-domain epoch: 40.0
Evaluating out-of-domain performance for n=64...


Some weights of OPTForSequenceClassification were not initialized from the model checkpoint at facebook/opt-125m and are newly initialized: ['score.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Out-of-domain eval_loss: 1.8603342771530151
Out-of-domain eval_accuracy: 0.5717054263565892
Out-of-domain eval_runtime: 3.5834
Out-of-domain eval_samples_per_second: 143.998
Out-of-domain eval_steps_per_second: 18.139
Out-of-domain epoch: 40.0


dataloader_config = DataLoaderConfiguration(dispatch_batches=None, split_batches=False, even_batches=True, use_seedable_sampler=True)


Step,Training Loss


Evaluating in-domain performance for n=64...


In-domain eval_loss: 1.94387686252594
In-domain eval_accuracy: 0.588686481303931
In-domain eval_runtime: 7.1756
In-domain eval_samples_per_second: 145.354
In-domain eval_steps_per_second: 18.256
In-domain epoch: 40.0
Evaluating out-of-domain performance for n=64...


Some weights of OPTForSequenceClassification were not initialized from the model checkpoint at facebook/opt-125m and are newly initialized: ['score.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Out-of-domain eval_loss: 1.8603342771530151
Out-of-domain eval_accuracy: 0.5717054263565892
Out-of-domain eval_runtime: 3.5855
Out-of-domain eval_samples_per_second: 143.912
Out-of-domain eval_steps_per_second: 18.128
Out-of-domain epoch: 40.0


dataloader_config = DataLoaderConfiguration(dispatch_batches=None, split_batches=False, even_batches=True, use_seedable_sampler=True)


Step,Training Loss


Evaluating in-domain performance for n=64...


In-domain eval_loss: 1.94387686252594
In-domain eval_accuracy: 0.588686481303931
In-domain eval_runtime: 7.1608
In-domain eval_samples_per_second: 145.654
In-domain eval_steps_per_second: 18.294
In-domain epoch: 40.0
Evaluating out-of-domain performance for n=64...


Some weights of OPTForSequenceClassification were not initialized from the model checkpoint at facebook/opt-125m and are newly initialized: ['score.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Out-of-domain eval_loss: 1.8603342771530151
Out-of-domain eval_accuracy: 0.5717054263565892
Out-of-domain eval_runtime: 3.5911
Out-of-domain eval_samples_per_second: 143.69
Out-of-domain eval_steps_per_second: 18.1
Out-of-domain epoch: 40.0


dataloader_config = DataLoaderConfiguration(dispatch_batches=None, split_batches=False, even_batches=True, use_seedable_sampler=True)


Step,Training Loss


Evaluating in-domain performance for n=64...


In-domain eval_loss: 1.94387686252594
In-domain eval_accuracy: 0.588686481303931
In-domain eval_runtime: 7.178
In-domain eval_samples_per_second: 145.305
In-domain eval_steps_per_second: 18.25
In-domain epoch: 40.0
Evaluating out-of-domain performance for n=64...


Some weights of OPTForSequenceClassification were not initialized from the model checkpoint at facebook/opt-125m and are newly initialized: ['score.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Out-of-domain eval_loss: 1.8603342771530151
Out-of-domain eval_accuracy: 0.5717054263565892
Out-of-domain eval_runtime: 3.5925
Out-of-domain eval_samples_per_second: 143.632
Out-of-domain eval_steps_per_second: 18.093
Out-of-domain epoch: 40.0


dataloader_config = DataLoaderConfiguration(dispatch_batches=None, split_batches=False, even_batches=True, use_seedable_sampler=True)


Step,Training Loss


Evaluating in-domain performance for n=64...


In-domain eval_loss: 1.94387686252594
In-domain eval_accuracy: 0.588686481303931
In-domain eval_runtime: 7.171
In-domain eval_samples_per_second: 145.448
In-domain eval_steps_per_second: 18.268
In-domain epoch: 40.0
Evaluating out-of-domain performance for n=64...


Some weights of OPTForSequenceClassification were not initialized from the model checkpoint at facebook/opt-125m and are newly initialized: ['score.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Out-of-domain eval_loss: 1.8603342771530151
Out-of-domain eval_accuracy: 0.5717054263565892
Out-of-domain eval_runtime: 3.5833
Out-of-domain eval_samples_per_second: 144.003
Out-of-domain eval_steps_per_second: 18.14
Out-of-domain epoch: 40.0


dataloader_config = DataLoaderConfiguration(dispatch_batches=None, split_batches=False, even_batches=True, use_seedable_sampler=True)


Step,Training Loss


Evaluating in-domain performance for n=64...


In-domain eval_loss: 1.94387686252594
In-domain eval_accuracy: 0.588686481303931
In-domain eval_runtime: 7.1427
In-domain eval_samples_per_second: 146.023
In-domain eval_steps_per_second: 18.34
In-domain epoch: 40.0
Evaluating out-of-domain performance for n=64...


Some weights of OPTForSequenceClassification were not initialized from the model checkpoint at facebook/opt-125m and are newly initialized: ['score.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Out-of-domain eval_loss: 1.8603342771530151
Out-of-domain eval_accuracy: 0.5717054263565892
Out-of-domain eval_runtime: 3.5898
Out-of-domain eval_samples_per_second: 143.739
Out-of-domain eval_steps_per_second: 18.107
Out-of-domain epoch: 40.0


dataloader_config = DataLoaderConfiguration(dispatch_batches=None, split_batches=False, even_batches=True, use_seedable_sampler=True)


Step,Training Loss


Evaluating in-domain performance for n=64...


In-domain eval_loss: 1.94387686252594
In-domain eval_accuracy: 0.588686481303931
In-domain eval_runtime: 7.1366
In-domain eval_samples_per_second: 146.147
In-domain eval_steps_per_second: 18.356
In-domain epoch: 40.0
Evaluating out-of-domain performance for n=64...


Some weights of OPTForSequenceClassification were not initialized from the model checkpoint at facebook/opt-125m and are newly initialized: ['score.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Out-of-domain eval_loss: 1.8603342771530151
Out-of-domain eval_accuracy: 0.5717054263565892
Out-of-domain eval_runtime: 3.5994
Out-of-domain eval_samples_per_second: 143.358
Out-of-domain eval_steps_per_second: 18.059
Out-of-domain epoch: 40.0


dataloader_config = DataLoaderConfiguration(dispatch_batches=None, split_batches=False, even_batches=True, use_seedable_sampler=True)


Step,Training Loss


Evaluating in-domain performance for n=128...


In-domain eval_loss: 2.445613145828247
In-domain eval_accuracy: 0.6308724832214765
In-domain eval_runtime: 7.1678
In-domain eval_samples_per_second: 145.511
In-domain eval_steps_per_second: 18.276
In-domain epoch: 40.0
Evaluating out-of-domain performance for n=128...


Some weights of OPTForSequenceClassification were not initialized from the model checkpoint at facebook/opt-125m and are newly initialized: ['score.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Out-of-domain eval_loss: 2.3669300079345703
Out-of-domain eval_accuracy: 0.624031007751938
Out-of-domain eval_runtime: 3.5833
Out-of-domain eval_samples_per_second: 144.002
Out-of-domain eval_steps_per_second: 18.14
Out-of-domain epoch: 40.0


dataloader_config = DataLoaderConfiguration(dispatch_batches=None, split_batches=False, even_batches=True, use_seedable_sampler=True)


Step,Training Loss


Evaluating in-domain performance for n=128...


In-domain eval_loss: 2.298306941986084
In-domain eval_accuracy: 0.6279961649089166
In-domain eval_runtime: 7.1408
In-domain eval_samples_per_second: 146.063
In-domain eval_steps_per_second: 18.345
In-domain epoch: 40.0
Evaluating out-of-domain performance for n=128...


Some weights of OPTForSequenceClassification were not initialized from the model checkpoint at facebook/opt-125m and are newly initialized: ['score.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Out-of-domain eval_loss: 2.285783052444458
Out-of-domain eval_accuracy: 0.6201550387596899
Out-of-domain eval_runtime: 3.5681
Out-of-domain eval_samples_per_second: 144.617
Out-of-domain eval_steps_per_second: 18.217
Out-of-domain epoch: 40.0


dataloader_config = DataLoaderConfiguration(dispatch_batches=None, split_batches=False, even_batches=True, use_seedable_sampler=True)


Step,Training Loss


Evaluating in-domain performance for n=128...


In-domain eval_loss: 2.298306941986084
In-domain eval_accuracy: 0.6279961649089166
In-domain eval_runtime: 7.1603
In-domain eval_samples_per_second: 145.664
In-domain eval_steps_per_second: 18.295
In-domain epoch: 40.0
Evaluating out-of-domain performance for n=128...


Some weights of OPTForSequenceClassification were not initialized from the model checkpoint at facebook/opt-125m and are newly initialized: ['score.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Out-of-domain eval_loss: 2.285783052444458
Out-of-domain eval_accuracy: 0.6201550387596899
Out-of-domain eval_runtime: 3.5836
Out-of-domain eval_samples_per_second: 143.989
Out-of-domain eval_steps_per_second: 18.138
Out-of-domain epoch: 40.0


dataloader_config = DataLoaderConfiguration(dispatch_batches=None, split_batches=False, even_batches=True, use_seedable_sampler=True)


Step,Training Loss


Evaluating in-domain performance for n=128...


In-domain eval_loss: 2.298306941986084
In-domain eval_accuracy: 0.6279961649089166
In-domain eval_runtime: 7.1684
In-domain eval_samples_per_second: 145.499
In-domain eval_steps_per_second: 18.275
In-domain epoch: 40.0
Evaluating out-of-domain performance for n=128...


Some weights of OPTForSequenceClassification were not initialized from the model checkpoint at facebook/opt-125m and are newly initialized: ['score.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Out-of-domain eval_loss: 2.285783052444458
Out-of-domain eval_accuracy: 0.6201550387596899
Out-of-domain eval_runtime: 3.6002
Out-of-domain eval_samples_per_second: 143.327
Out-of-domain eval_steps_per_second: 18.055
Out-of-domain epoch: 40.0


dataloader_config = DataLoaderConfiguration(dispatch_batches=None, split_batches=False, even_batches=True, use_seedable_sampler=True)


Step,Training Loss


Evaluating in-domain performance for n=128...


In-domain eval_loss: 2.298306941986084
In-domain eval_accuracy: 0.6279961649089166
In-domain eval_runtime: 7.1685
In-domain eval_samples_per_second: 145.498
In-domain eval_steps_per_second: 18.274
In-domain epoch: 40.0
Evaluating out-of-domain performance for n=128...


Some weights of OPTForSequenceClassification were not initialized from the model checkpoint at facebook/opt-125m and are newly initialized: ['score.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Out-of-domain eval_loss: 2.285783052444458
Out-of-domain eval_accuracy: 0.6201550387596899
Out-of-domain eval_runtime: 3.6039
Out-of-domain eval_samples_per_second: 143.179
Out-of-domain eval_steps_per_second: 18.036
Out-of-domain epoch: 40.0


dataloader_config = DataLoaderConfiguration(dispatch_batches=None, split_batches=False, even_batches=True, use_seedable_sampler=True)


Step,Training Loss


Evaluating in-domain performance for n=128...


In-domain eval_loss: 2.298306941986084
In-domain eval_accuracy: 0.6279961649089166
In-domain eval_runtime: 7.1729
In-domain eval_samples_per_second: 145.408
In-domain eval_steps_per_second: 18.263
In-domain epoch: 40.0
Evaluating out-of-domain performance for n=128...


Some weights of OPTForSequenceClassification were not initialized from the model checkpoint at facebook/opt-125m and are newly initialized: ['score.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Out-of-domain eval_loss: 2.285783052444458
Out-of-domain eval_accuracy: 0.6201550387596899
Out-of-domain eval_runtime: 3.5763
Out-of-domain eval_samples_per_second: 144.285
Out-of-domain eval_steps_per_second: 18.175
Out-of-domain epoch: 40.0


dataloader_config = DataLoaderConfiguration(dispatch_batches=None, split_batches=False, even_batches=True, use_seedable_sampler=True)


Step,Training Loss


Evaluating in-domain performance for n=128...


In-domain eval_loss: 2.298306941986084
In-domain eval_accuracy: 0.6279961649089166
In-domain eval_runtime: 7.1539
In-domain eval_samples_per_second: 145.795
In-domain eval_steps_per_second: 18.312
In-domain epoch: 40.0
Evaluating out-of-domain performance for n=128...


Some weights of OPTForSequenceClassification were not initialized from the model checkpoint at facebook/opt-125m and are newly initialized: ['score.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Out-of-domain eval_loss: 2.285783052444458
Out-of-domain eval_accuracy: 0.6201550387596899
Out-of-domain eval_runtime: 3.5862
Out-of-domain eval_samples_per_second: 143.885
Out-of-domain eval_steps_per_second: 18.125
Out-of-domain epoch: 40.0


dataloader_config = DataLoaderConfiguration(dispatch_batches=None, split_batches=False, even_batches=True, use_seedable_sampler=True)


Step,Training Loss


Evaluating in-domain performance for n=128...


In-domain eval_loss: 2.298306941986084
In-domain eval_accuracy: 0.6279961649089166
In-domain eval_runtime: 7.1718
In-domain eval_samples_per_second: 145.431
In-domain eval_steps_per_second: 18.266
In-domain epoch: 40.0
Evaluating out-of-domain performance for n=128...


Some weights of OPTForSequenceClassification were not initialized from the model checkpoint at facebook/opt-125m and are newly initialized: ['score.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Out-of-domain eval_loss: 2.285783052444458
Out-of-domain eval_accuracy: 0.6201550387596899
Out-of-domain eval_runtime: 3.6182
Out-of-domain eval_samples_per_second: 142.611
Out-of-domain eval_steps_per_second: 17.965
Out-of-domain epoch: 40.0


dataloader_config = DataLoaderConfiguration(dispatch_batches=None, split_batches=False, even_batches=True, use_seedable_sampler=True)


Step,Training Loss


Evaluating in-domain performance for n=128...


In-domain eval_loss: 2.298306941986084
In-domain eval_accuracy: 0.6279961649089166
In-domain eval_runtime: 7.1694
In-domain eval_samples_per_second: 145.48
In-domain eval_steps_per_second: 18.272
In-domain epoch: 40.0
Evaluating out-of-domain performance for n=128...


Some weights of OPTForSequenceClassification were not initialized from the model checkpoint at facebook/opt-125m and are newly initialized: ['score.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Out-of-domain eval_loss: 2.285783052444458
Out-of-domain eval_accuracy: 0.6201550387596899
Out-of-domain eval_runtime: 3.6015
Out-of-domain eval_samples_per_second: 143.272
Out-of-domain eval_steps_per_second: 18.048
Out-of-domain epoch: 40.0


dataloader_config = DataLoaderConfiguration(dispatch_batches=None, split_batches=False, even_batches=True, use_seedable_sampler=True)


Step,Training Loss


Evaluating in-domain performance for n=128...


In-domain eval_loss: 2.298306941986084
In-domain eval_accuracy: 0.6279961649089166
In-domain eval_runtime: 7.1744
In-domain eval_samples_per_second: 145.378
In-domain eval_steps_per_second: 18.259
In-domain epoch: 40.0
Evaluating out-of-domain performance for n=128...
Out-of-domain eval_loss: 2.285783052444458
Out-of-domain eval_accuracy: 0.6201550387596899
Out-of-domain eval_runtime: 3.6063
Out-of-domain eval_samples_per_second: 143.083
Out-of-domain eval_steps_per_second: 18.024
Out-of-domain epoch: 40.0


In [6]:
# Save the DataFrame to a CSV file
results_df.to_csv(f'../Results/{curr_filename}.csv', sep = ',', index=False)

In [None]:
# disconnect runtime
from google.colab import runtime
runtime.unassign()