# Few shot Fine Tuning on Cola Data Set - Baseline

In [1]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [2]:
%cd '/content/drive/MyDrive/LLM/llm_finetuning/notebooks'
!ls

/content/drive/MyDrive/LLM/llm_finetuning/notebooks
pbft_cola_baseline.ipynb	      results				vanilla_mnli_baseline.ipynb
pbft_mnli_baseline.ipynb	      vanilla_cola_baseline_350M.ipynb
pre_trained_opt_with_inference.ipynb  vanilla_cola_baseline.ipynb


In [3]:
curr_filename = "vanilla_cola_baseline_350M"

In [4]:
!pip install -q transformers accelerate bitsandbytes datasets

[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m297.6/297.6 kB[0m [31m9.5 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m119.8/119.8 MB[0m [31m8.5 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m542.0/542.0 kB[0m [31m53.0 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m116.3/116.3 kB[0m [31m17.9 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m194.1/194.1 kB[0m [31m26.2 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m134.8/134.8 kB[0m [31m21.8 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m388.9/388.9 kB[0m [31m45.6 MB/s[0m eta [36m0:00:00[0m
[?25h

In [6]:
# 350M Version 1 04/19/2024

from datasets import load_dataset, ClassLabel
from sklearn.metrics import accuracy_score
from transformers import Trainer, TrainingArguments, AutoModelForSequenceClassification, AutoTokenizer, AdamW, AutoConfig
import numpy as np
import pandas as pd
import torch

# Set seed, load COLA dataset

torch.manual_seed(42)
if torch.cuda.is_available():
    torch.cuda.manual_seed_all(42)

in_domain_data = load_dataset("glue", "cola")


# Define model and tokenizer
tokenizer = AutoTokenizer.from_pretrained("facebook/opt-350m")

config = AutoConfig.from_pretrained("facebook/opt-350m", num_labels=2, hidden_dropout_prob=0.1, attention_probs_dropout_prob=0.1)
model = AutoModelForSequenceClassification.from_pretrained("facebook/opt-350m", config=config)

# Function to load and parse out-of-domain COLA dataset
'''
\cite: https://github.com/uds-lsv/llmft/blob/main/task_utils.py
'''
def load_cola_ood_dataset(path, label=None, cache_dir=None):
    data_files = {"validation": path}
    dataset = load_dataset("csv", data_files=data_files, sep="\t", column_names=[
                           'code', 'label', 'annotation', 'sentence'], cache_dir=cache_dir)
    dataset = dataset["validation"]

    # cola-ood comes without indices, so we add them
    indices = list(range(len(dataset)))
    dataset = dataset.add_column(name="idx", column=indices)

    subset = "cola-ood"

    if label is not None:  # filter dataset based on label
        dataset = dataset.filter(
            lambda example: example["label"] == label)
        subset = f"{subset}-{'acceptable' if label == 1 else 'unacceptable'}"

    return dataset, subset


def compute_metrics(eval_pred):
    logits, labels = eval_pred
    predictions = np.argmax(logits, axis=-1)
    return {"accuracy": accuracy_score(labels, predictions)}


def manipulate_inputs_for_cola_with_prompt(inputs):
    # Add pattern/prompts
    #inputs = tokenizer(["Yes or No?" + sentence for sentence in inputs["sentence"]], truncation=True, padding="max_length", max_length=128)
    inputs = tokenizer(inputs["sentence"], truncation=True, padding="max_length", max_length=128)

    return inputs

# augment in-domain-data
in_domain_data = in_domain_data.map(manipulate_inputs_for_cola_with_prompt, batched=True)

# augment out of domain data
eval_ood_data, _ = load_cola_ood_dataset(path='../datafiles/dev.tsv')
eval_ood_data = eval_ood_data.map(manipulate_inputs_for_cola_with_prompt, batched=True)


# Define parameters for training experiments (per reference paper)

few_shot_sample_size = [2, 16, 32, 64, 128]  # number of examples for each class
num_epochs = 40
batch_size = 32
learning_rate = 1e-5
weight_decay = 0.
warmup_ratio = 0.1
num_runs = 10
optimizer = AdamW(model.parameters(), lr=learning_rate) # AdamW optimizer



results_df = pd.DataFrame(columns=["n", "run", "in_domain_accuracy", "out_of_domain_accuracy"])

for n in few_shot_sample_size:
    for run_idx in range(num_runs):  # repeat 10 times for each n
        # re-iniialize model for each run
        model = AutoModelForSequenceClassification.from_pretrained("facebook/opt-350m", config=config)
        optimizer = AdamW(model.parameters(), lr=learning_rate)

        # Select n random examples for each class from the original data
        indices_yes = np.where(np.array(in_domain_data["train"]["label"]) == 0)[0]
        indices_no = np.where(np.array(in_domain_data["train"]["label"]) == 1)[0]
        indices_yes = np.random.choice(indices_yes, n, replace=False)
        indices_no = np.random.choice(indices_no, n, replace=False)
        indices = np.concatenate([indices_yes, indices_no])

        # Select the examples for the new training set
        train_dataset = in_domain_data["train"].select(indices)

        # Define training config
        # Total steps = (#samples/batch size) * epochs
        total_steps = (len(train_dataset) // batch_size) * num_epochs

        training_args = TrainingArguments(
            output_dir = "./results",
            overwrite_output_dir = True,
            num_train_epochs = num_epochs,
            per_device_train_batch_size = batch_size,
            learning_rate = learning_rate,
            weight_decay = weight_decay,
            save_steps = 10_000,
            save_total_limit = 2,
            warmup_steps = int(warmup_ratio * total_steps),
        )

        # Define the trainer
        trainer = Trainer(
            model = model,
            args=training_args,
            train_dataset = train_dataset,
            compute_metrics = compute_metrics,
            optimizers=(optimizer, None),
        )

        # Train the model
        trainer.train()

        # Evaluate in-domain performance
        print(f"Evaluating in-domain performance for n={n}...")
        eval_results = trainer.evaluate(eval_dataset=in_domain_data["validation"])

        # Store the in-domain accuracy
        in_domain_accuracy = eval_results["eval_accuracy"]

        # Print the in-domain evaluation results
        for key, value in eval_results.items():
            print(f"In-domain {key}: {value}")

        # Evaluate out-of-domain performance
        print(f"Evaluating out-of-domain performance for n={n}...")
        eval_results = trainer.evaluate(eval_dataset=eval_ood_data)

        # Store the out-of-domain accuracy
        out_of_domain_accuracy = eval_results["eval_accuracy"]

        # Print the out-of-domain evaluation results
        for key, value in eval_results.items():
            print(f"Out-of-domain {key}: {value}")

        # Add the results to the DataFrame
        new_row = pd.DataFrame({
            "n": [n],
            "run": [run_idx],
            "in_domain_accuracy": [in_domain_accuracy],
            "out_of_domain_accuracy": [out_of_domain_accuracy]
        })
        results_df = pd.concat([results_df, new_row], ignore_index=True)



vocab.json:   0%|          | 0.00/899k [00:00<?, ?B/s]

merges.txt:   0%|          | 0.00/456k [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/441 [00:00<?, ?B/s]

pytorch_model.bin:   0%|          | 0.00/663M [00:00<?, ?B/s]

Some weights of OPTForSequenceClassification were not initialized from the model checkpoint at facebook/opt-350m and are newly initialized: ['score.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Map:   0%|          | 0/8551 [00:00<?, ? examples/s]

Map:   0%|          | 0/1043 [00:00<?, ? examples/s]

Map:   0%|          | 0/1063 [00:00<?, ? examples/s]

Generating validation split: 0 examples [00:00, ? examples/s]

Map:   0%|          | 0/516 [00:00<?, ? examples/s]

Some weights of OPTForSequenceClassification were not initialized from the model checkpoint at facebook/opt-350m and are newly initialized: ['score.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
dataloader_config = DataLoaderConfiguration(dispatch_batches=None, split_batches=False, even_batches=True, use_seedable_sampler=True)


Step,Training Loss


Evaluating in-domain performance for n=2...


In-domain eval_loss: 2.1852316856384277
In-domain eval_accuracy: 0.3998082454458293
In-domain eval_runtime: 21.6904
In-domain eval_samples_per_second: 48.086
In-domain eval_steps_per_second: 6.04
In-domain epoch: 40.0
Evaluating out-of-domain performance for n=2...
Out-of-domain eval_loss: 1.945597767829895
Out-of-domain eval_accuracy: 0.43410852713178294
Out-of-domain eval_runtime: 10.9612
Out-of-domain eval_samples_per_second: 47.075
Out-of-domain eval_steps_per_second: 5.93
Out-of-domain epoch: 40.0


Some weights of OPTForSequenceClassification were not initialized from the model checkpoint at facebook/opt-350m and are newly initialized: ['score.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
dataloader_config = DataLoaderConfiguration(dispatch_batches=None, split_batches=False, even_batches=True, use_seedable_sampler=True)


Step,Training Loss


Evaluating in-domain performance for n=2...


In-domain eval_loss: 1.6794339418411255
In-domain eval_accuracy: 0.46116970278044106
In-domain eval_runtime: 23.2066
In-domain eval_samples_per_second: 44.944
In-domain eval_steps_per_second: 5.645
In-domain epoch: 40.0
Evaluating out-of-domain performance for n=2...
Out-of-domain eval_loss: 1.6530039310455322
Out-of-domain eval_accuracy: 0.4903100775193798
Out-of-domain eval_runtime: 11.7274
Out-of-domain eval_samples_per_second: 44.0
Out-of-domain eval_steps_per_second: 5.543
Out-of-domain epoch: 40.0


Some weights of OPTForSequenceClassification were not initialized from the model checkpoint at facebook/opt-350m and are newly initialized: ['score.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
dataloader_config = DataLoaderConfiguration(dispatch_batches=None, split_batches=False, even_batches=True, use_seedable_sampler=True)


Step,Training Loss


Evaluating in-domain performance for n=2...


In-domain eval_loss: 1.6794339418411255
In-domain eval_accuracy: 0.46116970278044106
In-domain eval_runtime: 23.0837
In-domain eval_samples_per_second: 45.183
In-domain eval_steps_per_second: 5.675
In-domain epoch: 40.0
Evaluating out-of-domain performance for n=2...
Out-of-domain eval_loss: 1.6530039310455322
Out-of-domain eval_accuracy: 0.4903100775193798
Out-of-domain eval_runtime: 11.3863
Out-of-domain eval_samples_per_second: 45.318
Out-of-domain eval_steps_per_second: 5.709
Out-of-domain epoch: 40.0


Some weights of OPTForSequenceClassification were not initialized from the model checkpoint at facebook/opt-350m and are newly initialized: ['score.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
dataloader_config = DataLoaderConfiguration(dispatch_batches=None, split_batches=False, even_batches=True, use_seedable_sampler=True)


Step,Training Loss


Evaluating in-domain performance for n=2...


In-domain eval_loss: 1.6794339418411255
In-domain eval_accuracy: 0.46116970278044106
In-domain eval_runtime: 23.2882
In-domain eval_samples_per_second: 44.787
In-domain eval_steps_per_second: 5.625
In-domain epoch: 40.0
Evaluating out-of-domain performance for n=2...
Out-of-domain eval_loss: 1.6530039310455322
Out-of-domain eval_accuracy: 0.4903100775193798
Out-of-domain eval_runtime: 11.526
Out-of-domain eval_samples_per_second: 44.768
Out-of-domain eval_steps_per_second: 5.639
Out-of-domain epoch: 40.0


Some weights of OPTForSequenceClassification were not initialized from the model checkpoint at facebook/opt-350m and are newly initialized: ['score.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
dataloader_config = DataLoaderConfiguration(dispatch_batches=None, split_batches=False, even_batches=True, use_seedable_sampler=True)


Step,Training Loss


Evaluating in-domain performance for n=2...


In-domain eval_loss: 1.6794339418411255
In-domain eval_accuracy: 0.46116970278044106
In-domain eval_runtime: 23.1442
In-domain eval_samples_per_second: 45.065
In-domain eval_steps_per_second: 5.66
In-domain epoch: 40.0
Evaluating out-of-domain performance for n=2...
Out-of-domain eval_loss: 1.6530039310455322
Out-of-domain eval_accuracy: 0.4903100775193798
Out-of-domain eval_runtime: 11.4798
Out-of-domain eval_samples_per_second: 44.949
Out-of-domain eval_steps_per_second: 5.662
Out-of-domain epoch: 40.0


Some weights of OPTForSequenceClassification were not initialized from the model checkpoint at facebook/opt-350m and are newly initialized: ['score.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
dataloader_config = DataLoaderConfiguration(dispatch_batches=None, split_batches=False, even_batches=True, use_seedable_sampler=True)


Step,Training Loss


Evaluating in-domain performance for n=2...


In-domain eval_loss: 1.6794339418411255
In-domain eval_accuracy: 0.46116970278044106
In-domain eval_runtime: 23.2349
In-domain eval_samples_per_second: 44.889
In-domain eval_steps_per_second: 5.638
In-domain epoch: 40.0
Evaluating out-of-domain performance for n=2...
Out-of-domain eval_loss: 1.6530039310455322
Out-of-domain eval_accuracy: 0.4903100775193798
Out-of-domain eval_runtime: 11.5026
Out-of-domain eval_samples_per_second: 44.86
Out-of-domain eval_steps_per_second: 5.651
Out-of-domain epoch: 40.0


Some weights of OPTForSequenceClassification were not initialized from the model checkpoint at facebook/opt-350m and are newly initialized: ['score.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
dataloader_config = DataLoaderConfiguration(dispatch_batches=None, split_batches=False, even_batches=True, use_seedable_sampler=True)


Step,Training Loss


Evaluating in-domain performance for n=2...


In-domain eval_loss: 1.6794339418411255
In-domain eval_accuracy: 0.46116970278044106
In-domain eval_runtime: 23.1705
In-domain eval_samples_per_second: 45.014
In-domain eval_steps_per_second: 5.654
In-domain epoch: 40.0
Evaluating out-of-domain performance for n=2...
Out-of-domain eval_loss: 1.6530039310455322
Out-of-domain eval_accuracy: 0.4903100775193798
Out-of-domain eval_runtime: 11.4437
Out-of-domain eval_samples_per_second: 45.09
Out-of-domain eval_steps_per_second: 5.68
Out-of-domain epoch: 40.0


Some weights of OPTForSequenceClassification were not initialized from the model checkpoint at facebook/opt-350m and are newly initialized: ['score.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
dataloader_config = DataLoaderConfiguration(dispatch_batches=None, split_batches=False, even_batches=True, use_seedable_sampler=True)


Step,Training Loss


Evaluating in-domain performance for n=2...


In-domain eval_loss: 1.6794339418411255
In-domain eval_accuracy: 0.46116970278044106
In-domain eval_runtime: 23.214
In-domain eval_samples_per_second: 44.93
In-domain eval_steps_per_second: 5.643
In-domain epoch: 40.0
Evaluating out-of-domain performance for n=2...
Out-of-domain eval_loss: 1.6530039310455322
Out-of-domain eval_accuracy: 0.4903100775193798
Out-of-domain eval_runtime: 11.4922
Out-of-domain eval_samples_per_second: 44.9
Out-of-domain eval_steps_per_second: 5.656
Out-of-domain epoch: 40.0


Some weights of OPTForSequenceClassification were not initialized from the model checkpoint at facebook/opt-350m and are newly initialized: ['score.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
dataloader_config = DataLoaderConfiguration(dispatch_batches=None, split_batches=False, even_batches=True, use_seedable_sampler=True)


Step,Training Loss


Evaluating in-domain performance for n=2...


In-domain eval_loss: 1.6794339418411255
In-domain eval_accuracy: 0.46116970278044106
In-domain eval_runtime: 23.2383
In-domain eval_samples_per_second: 44.883
In-domain eval_steps_per_second: 5.637
In-domain epoch: 40.0
Evaluating out-of-domain performance for n=2...
Out-of-domain eval_loss: 1.6530039310455322
Out-of-domain eval_accuracy: 0.4903100775193798
Out-of-domain eval_runtime: 11.4867
Out-of-domain eval_samples_per_second: 44.922
Out-of-domain eval_steps_per_second: 5.659
Out-of-domain epoch: 40.0


Some weights of OPTForSequenceClassification were not initialized from the model checkpoint at facebook/opt-350m and are newly initialized: ['score.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
dataloader_config = DataLoaderConfiguration(dispatch_batches=None, split_batches=False, even_batches=True, use_seedable_sampler=True)


Step,Training Loss


Evaluating in-domain performance for n=2...


In-domain eval_loss: 1.6794339418411255
In-domain eval_accuracy: 0.46116970278044106
In-domain eval_runtime: 23.1707
In-domain eval_samples_per_second: 45.014
In-domain eval_steps_per_second: 5.654
In-domain epoch: 40.0
Evaluating out-of-domain performance for n=2...
Out-of-domain eval_loss: 1.6530039310455322
Out-of-domain eval_accuracy: 0.4903100775193798
Out-of-domain eval_runtime: 11.4748
Out-of-domain eval_samples_per_second: 44.968
Out-of-domain eval_steps_per_second: 5.665
Out-of-domain epoch: 40.0


Some weights of OPTForSequenceClassification were not initialized from the model checkpoint at facebook/opt-350m and are newly initialized: ['score.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
dataloader_config = DataLoaderConfiguration(dispatch_batches=None, split_batches=False, even_batches=True, use_seedable_sampler=True)


Step,Training Loss


Evaluating in-domain performance for n=16...


In-domain eval_loss: 1.5088019371032715
In-domain eval_accuracy: 0.4851390220517737
In-domain eval_runtime: 23.1855
In-domain eval_samples_per_second: 44.985
In-domain eval_steps_per_second: 5.65
In-domain epoch: 40.0
Evaluating out-of-domain performance for n=16...
Out-of-domain eval_loss: 1.4837182760238647
Out-of-domain eval_accuracy: 0.5058139534883721
Out-of-domain eval_runtime: 11.456
Out-of-domain eval_samples_per_second: 45.042
Out-of-domain eval_steps_per_second: 5.674
Out-of-domain epoch: 40.0


Some weights of OPTForSequenceClassification were not initialized from the model checkpoint at facebook/opt-350m and are newly initialized: ['score.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
dataloader_config = DataLoaderConfiguration(dispatch_batches=None, split_batches=False, even_batches=True, use_seedable_sampler=True)


Step,Training Loss


Evaluating in-domain performance for n=16...


In-domain eval_loss: 1.5088019371032715
In-domain eval_accuracy: 0.4851390220517737
In-domain eval_runtime: 23.1688
In-domain eval_samples_per_second: 45.018
In-domain eval_steps_per_second: 5.654
In-domain epoch: 40.0
Evaluating out-of-domain performance for n=16...
Out-of-domain eval_loss: 1.4837182760238647
Out-of-domain eval_accuracy: 0.5058139534883721
Out-of-domain eval_runtime: 11.479
Out-of-domain eval_samples_per_second: 44.951
Out-of-domain eval_steps_per_second: 5.662
Out-of-domain epoch: 40.0


Some weights of OPTForSequenceClassification were not initialized from the model checkpoint at facebook/opt-350m and are newly initialized: ['score.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
dataloader_config = DataLoaderConfiguration(dispatch_batches=None, split_batches=False, even_batches=True, use_seedable_sampler=True)


Step,Training Loss


Evaluating in-domain performance for n=16...


In-domain eval_loss: 1.5088019371032715
In-domain eval_accuracy: 0.4851390220517737
In-domain eval_runtime: 23.2048
In-domain eval_samples_per_second: 44.948
In-domain eval_steps_per_second: 5.645
In-domain epoch: 40.0
Evaluating out-of-domain performance for n=16...
Out-of-domain eval_loss: 1.4837182760238647
Out-of-domain eval_accuracy: 0.5058139534883721
Out-of-domain eval_runtime: 11.4887
Out-of-domain eval_samples_per_second: 44.914
Out-of-domain eval_steps_per_second: 5.658
Out-of-domain epoch: 40.0


Some weights of OPTForSequenceClassification were not initialized from the model checkpoint at facebook/opt-350m and are newly initialized: ['score.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
dataloader_config = DataLoaderConfiguration(dispatch_batches=None, split_batches=False, even_batches=True, use_seedable_sampler=True)


Step,Training Loss


Evaluating in-domain performance for n=16...


In-domain eval_loss: 1.5088019371032715
In-domain eval_accuracy: 0.4851390220517737
In-domain eval_runtime: 23.2254
In-domain eval_samples_per_second: 44.908
In-domain eval_steps_per_second: 5.64
In-domain epoch: 40.0
Evaluating out-of-domain performance for n=16...
Out-of-domain eval_loss: 1.4837182760238647
Out-of-domain eval_accuracy: 0.5058139534883721
Out-of-domain eval_runtime: 11.5096
Out-of-domain eval_samples_per_second: 44.832
Out-of-domain eval_steps_per_second: 5.647
Out-of-domain epoch: 40.0


Some weights of OPTForSequenceClassification were not initialized from the model checkpoint at facebook/opt-350m and are newly initialized: ['score.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
dataloader_config = DataLoaderConfiguration(dispatch_batches=None, split_batches=False, even_batches=True, use_seedable_sampler=True)


Step,Training Loss


Evaluating in-domain performance for n=16...


In-domain eval_loss: 1.5088019371032715
In-domain eval_accuracy: 0.4851390220517737
In-domain eval_runtime: 23.242
In-domain eval_samples_per_second: 44.876
In-domain eval_steps_per_second: 5.636
In-domain epoch: 40.0
Evaluating out-of-domain performance for n=16...


Some weights of OPTForSequenceClassification were not initialized from the model checkpoint at facebook/opt-350m and are newly initialized: ['score.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Out-of-domain eval_loss: 1.4837182760238647
Out-of-domain eval_accuracy: 0.5058139534883721
Out-of-domain eval_runtime: 11.4946
Out-of-domain eval_samples_per_second: 44.891
Out-of-domain eval_steps_per_second: 5.655
Out-of-domain epoch: 40.0


dataloader_config = DataLoaderConfiguration(dispatch_batches=None, split_batches=False, even_batches=True, use_seedable_sampler=True)


Step,Training Loss


Evaluating in-domain performance for n=16...


In-domain eval_loss: 1.5088019371032715
In-domain eval_accuracy: 0.4851390220517737
In-domain eval_runtime: 23.1969
In-domain eval_samples_per_second: 44.963
In-domain eval_steps_per_second: 5.647
In-domain epoch: 40.0
Evaluating out-of-domain performance for n=16...
Out-of-domain eval_loss: 1.4837182760238647
Out-of-domain eval_accuracy: 0.5058139534883721
Out-of-domain eval_runtime: 11.4599
Out-of-domain eval_samples_per_second: 45.026
Out-of-domain eval_steps_per_second: 5.672
Out-of-domain epoch: 40.0


Some weights of OPTForSequenceClassification were not initialized from the model checkpoint at facebook/opt-350m and are newly initialized: ['score.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
dataloader_config = DataLoaderConfiguration(dispatch_batches=None, split_batches=False, even_batches=True, use_seedable_sampler=True)


Step,Training Loss


Evaluating in-domain performance for n=16...


In-domain eval_loss: 1.5088019371032715
In-domain eval_accuracy: 0.4851390220517737
In-domain eval_runtime: 23.1731
In-domain eval_samples_per_second: 45.009
In-domain eval_steps_per_second: 5.653
In-domain epoch: 40.0
Evaluating out-of-domain performance for n=16...
Out-of-domain eval_loss: 1.4837182760238647
Out-of-domain eval_accuracy: 0.5058139534883721
Out-of-domain eval_runtime: 11.464
Out-of-domain eval_samples_per_second: 45.01
Out-of-domain eval_steps_per_second: 5.67
Out-of-domain epoch: 40.0


Some weights of OPTForSequenceClassification were not initialized from the model checkpoint at facebook/opt-350m and are newly initialized: ['score.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
dataloader_config = DataLoaderConfiguration(dispatch_batches=None, split_batches=False, even_batches=True, use_seedable_sampler=True)


Step,Training Loss


Evaluating in-domain performance for n=16...


In-domain eval_loss: 1.5088019371032715
In-domain eval_accuracy: 0.4851390220517737
In-domain eval_runtime: 23.2257
In-domain eval_samples_per_second: 44.907
In-domain eval_steps_per_second: 5.64
In-domain epoch: 40.0
Evaluating out-of-domain performance for n=16...
Out-of-domain eval_loss: 1.4837182760238647
Out-of-domain eval_accuracy: 0.5058139534883721
Out-of-domain eval_runtime: 11.4763
Out-of-domain eval_samples_per_second: 44.962
Out-of-domain eval_steps_per_second: 5.664
Out-of-domain epoch: 40.0


Some weights of OPTForSequenceClassification were not initialized from the model checkpoint at facebook/opt-350m and are newly initialized: ['score.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
dataloader_config = DataLoaderConfiguration(dispatch_batches=None, split_batches=False, even_batches=True, use_seedable_sampler=True)


Step,Training Loss


Evaluating in-domain performance for n=16...


In-domain eval_loss: 1.5088019371032715
In-domain eval_accuracy: 0.4851390220517737
In-domain eval_runtime: 23.2126
In-domain eval_samples_per_second: 44.932
In-domain eval_steps_per_second: 5.643
In-domain epoch: 40.0
Evaluating out-of-domain performance for n=16...
Out-of-domain eval_loss: 1.4837182760238647
Out-of-domain eval_accuracy: 0.5058139534883721
Out-of-domain eval_runtime: 11.484
Out-of-domain eval_samples_per_second: 44.932
Out-of-domain eval_steps_per_second: 5.66
Out-of-domain epoch: 40.0


Some weights of OPTForSequenceClassification were not initialized from the model checkpoint at facebook/opt-350m and are newly initialized: ['score.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
dataloader_config = DataLoaderConfiguration(dispatch_batches=None, split_batches=False, even_batches=True, use_seedable_sampler=True)


Step,Training Loss


Evaluating in-domain performance for n=16...


In-domain eval_loss: 1.5088019371032715
In-domain eval_accuracy: 0.4851390220517737
In-domain eval_runtime: 23.2628
In-domain eval_samples_per_second: 44.836
In-domain eval_steps_per_second: 5.631
In-domain epoch: 40.0
Evaluating out-of-domain performance for n=16...
Out-of-domain eval_loss: 1.4837182760238647
Out-of-domain eval_accuracy: 0.5058139534883721
Out-of-domain eval_runtime: 11.4869
Out-of-domain eval_samples_per_second: 44.921
Out-of-domain eval_steps_per_second: 5.659
Out-of-domain epoch: 40.0


Some weights of OPTForSequenceClassification were not initialized from the model checkpoint at facebook/opt-350m and are newly initialized: ['score.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
dataloader_config = DataLoaderConfiguration(dispatch_batches=None, split_batches=False, even_batches=True, use_seedable_sampler=True)


Step,Training Loss


Evaluating in-domain performance for n=32...


In-domain eval_loss: 1.4610368013381958
In-domain eval_accuracy: 0.5560882070949185
In-domain eval_runtime: 23.1847
In-domain eval_samples_per_second: 44.987
In-domain eval_steps_per_second: 5.65
In-domain epoch: 40.0
Evaluating out-of-domain performance for n=32...
Out-of-domain eval_loss: 1.3606958389282227
Out-of-domain eval_accuracy: 0.5736434108527132
Out-of-domain eval_runtime: 11.4721
Out-of-domain eval_samples_per_second: 44.979
Out-of-domain eval_steps_per_second: 5.666
Out-of-domain epoch: 40.0


Some weights of OPTForSequenceClassification were not initialized from the model checkpoint at facebook/opt-350m and are newly initialized: ['score.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
dataloader_config = DataLoaderConfiguration(dispatch_batches=None, split_batches=False, even_batches=True, use_seedable_sampler=True)


Step,Training Loss


Evaluating in-domain performance for n=32...


In-domain eval_loss: 1.9770126342773438
In-domain eval_accuracy: 0.49760306807286675
In-domain eval_runtime: 23.2123
In-domain eval_samples_per_second: 44.933
In-domain eval_steps_per_second: 5.644
In-domain epoch: 40.0
Evaluating out-of-domain performance for n=32...
Out-of-domain eval_loss: 1.979759693145752
Out-of-domain eval_accuracy: 0.5077519379844961
Out-of-domain eval_runtime: 11.4846
Out-of-domain eval_samples_per_second: 44.93
Out-of-domain eval_steps_per_second: 5.66
Out-of-domain epoch: 40.0


Some weights of OPTForSequenceClassification were not initialized from the model checkpoint at facebook/opt-350m and are newly initialized: ['score.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
dataloader_config = DataLoaderConfiguration(dispatch_batches=None, split_batches=False, even_batches=True, use_seedable_sampler=True)


Step,Training Loss


Evaluating in-domain performance for n=32...


In-domain eval_loss: 1.9770126342773438
In-domain eval_accuracy: 0.49760306807286675
In-domain eval_runtime: 23.2208
In-domain eval_samples_per_second: 44.917
In-domain eval_steps_per_second: 5.641
In-domain epoch: 40.0
Evaluating out-of-domain performance for n=32...
Out-of-domain eval_loss: 1.979759693145752
Out-of-domain eval_accuracy: 0.5077519379844961
Out-of-domain eval_runtime: 11.4782
Out-of-domain eval_samples_per_second: 44.955
Out-of-domain eval_steps_per_second: 5.663
Out-of-domain epoch: 40.0


Some weights of OPTForSequenceClassification were not initialized from the model checkpoint at facebook/opt-350m and are newly initialized: ['score.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
dataloader_config = DataLoaderConfiguration(dispatch_batches=None, split_batches=False, even_batches=True, use_seedable_sampler=True)


Step,Training Loss


Evaluating in-domain performance for n=32...


In-domain eval_loss: 1.9770126342773438
In-domain eval_accuracy: 0.49760306807286675
In-domain eval_runtime: 23.1761
In-domain eval_samples_per_second: 45.003
In-domain eval_steps_per_second: 5.652
In-domain epoch: 40.0
Evaluating out-of-domain performance for n=32...
Out-of-domain eval_loss: 1.979759693145752
Out-of-domain eval_accuracy: 0.5077519379844961
Out-of-domain eval_runtime: 11.4795
Out-of-domain eval_samples_per_second: 44.95
Out-of-domain eval_steps_per_second: 5.662
Out-of-domain epoch: 40.0


Some weights of OPTForSequenceClassification were not initialized from the model checkpoint at facebook/opt-350m and are newly initialized: ['score.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
dataloader_config = DataLoaderConfiguration(dispatch_batches=None, split_batches=False, even_batches=True, use_seedable_sampler=True)


Step,Training Loss


Evaluating in-domain performance for n=32...


In-domain eval_loss: 1.9770126342773438
In-domain eval_accuracy: 0.49760306807286675
In-domain eval_runtime: 23.1597
In-domain eval_samples_per_second: 45.035
In-domain eval_steps_per_second: 5.656
In-domain epoch: 40.0
Evaluating out-of-domain performance for n=32...
Out-of-domain eval_loss: 1.979759693145752
Out-of-domain eval_accuracy: 0.5077519379844961
Out-of-domain eval_runtime: 11.4596
Out-of-domain eval_samples_per_second: 45.028
Out-of-domain eval_steps_per_second: 5.672
Out-of-domain epoch: 40.0


Some weights of OPTForSequenceClassification were not initialized from the model checkpoint at facebook/opt-350m and are newly initialized: ['score.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
dataloader_config = DataLoaderConfiguration(dispatch_batches=None, split_batches=False, even_batches=True, use_seedable_sampler=True)


Step,Training Loss


Evaluating in-domain performance for n=32...


In-domain eval_loss: 1.9770126342773438
In-domain eval_accuracy: 0.49760306807286675
In-domain eval_runtime: 23.1772
In-domain eval_samples_per_second: 45.001
In-domain eval_steps_per_second: 5.652
In-domain epoch: 40.0
Evaluating out-of-domain performance for n=32...
Out-of-domain eval_loss: 1.979759693145752
Out-of-domain eval_accuracy: 0.5077519379844961
Out-of-domain eval_runtime: 11.4545
Out-of-domain eval_samples_per_second: 45.048
Out-of-domain eval_steps_per_second: 5.675
Out-of-domain epoch: 40.0


Some weights of OPTForSequenceClassification were not initialized from the model checkpoint at facebook/opt-350m and are newly initialized: ['score.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
dataloader_config = DataLoaderConfiguration(dispatch_batches=None, split_batches=False, even_batches=True, use_seedable_sampler=True)


Step,Training Loss


Evaluating in-domain performance for n=32...


In-domain eval_loss: 1.9770126342773438
In-domain eval_accuracy: 0.49760306807286675
In-domain eval_runtime: 23.1667
In-domain eval_samples_per_second: 45.021
In-domain eval_steps_per_second: 5.655
In-domain epoch: 40.0
Evaluating out-of-domain performance for n=32...
Out-of-domain eval_loss: 1.979759693145752
Out-of-domain eval_accuracy: 0.5077519379844961
Out-of-domain eval_runtime: 11.4514
Out-of-domain eval_samples_per_second: 45.06
Out-of-domain eval_steps_per_second: 5.676
Out-of-domain epoch: 40.0


Some weights of OPTForSequenceClassification were not initialized from the model checkpoint at facebook/opt-350m and are newly initialized: ['score.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
dataloader_config = DataLoaderConfiguration(dispatch_batches=None, split_batches=False, even_batches=True, use_seedable_sampler=True)


Step,Training Loss


Evaluating in-domain performance for n=32...


In-domain eval_loss: 1.9770126342773438
In-domain eval_accuracy: 0.49760306807286675
In-domain eval_runtime: 23.1899
In-domain eval_samples_per_second: 44.977
In-domain eval_steps_per_second: 5.649
In-domain epoch: 40.0
Evaluating out-of-domain performance for n=32...
Out-of-domain eval_loss: 1.979759693145752
Out-of-domain eval_accuracy: 0.5077519379844961
Out-of-domain eval_runtime: 11.4656
Out-of-domain eval_samples_per_second: 45.004
Out-of-domain eval_steps_per_second: 5.669
Out-of-domain epoch: 40.0


Some weights of OPTForSequenceClassification were not initialized from the model checkpoint at facebook/opt-350m and are newly initialized: ['score.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
dataloader_config = DataLoaderConfiguration(dispatch_batches=None, split_batches=False, even_batches=True, use_seedable_sampler=True)


Step,Training Loss


Evaluating in-domain performance for n=32...


In-domain eval_loss: 1.9770126342773438
In-domain eval_accuracy: 0.49760306807286675
In-domain eval_runtime: 23.1619
In-domain eval_samples_per_second: 45.031
In-domain eval_steps_per_second: 5.656
In-domain epoch: 40.0
Evaluating out-of-domain performance for n=32...
Out-of-domain eval_loss: 1.979759693145752
Out-of-domain eval_accuracy: 0.5077519379844961
Out-of-domain eval_runtime: 11.4644
Out-of-domain eval_samples_per_second: 45.009
Out-of-domain eval_steps_per_second: 5.67
Out-of-domain epoch: 40.0


Some weights of OPTForSequenceClassification were not initialized from the model checkpoint at facebook/opt-350m and are newly initialized: ['score.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
dataloader_config = DataLoaderConfiguration(dispatch_batches=None, split_batches=False, even_batches=True, use_seedable_sampler=True)


Step,Training Loss


Evaluating in-domain performance for n=32...


In-domain eval_loss: 1.9770126342773438
In-domain eval_accuracy: 0.49760306807286675
In-domain eval_runtime: 23.1765
In-domain eval_samples_per_second: 45.002
In-domain eval_steps_per_second: 5.652
In-domain epoch: 40.0
Evaluating out-of-domain performance for n=32...
Out-of-domain eval_loss: 1.979759693145752
Out-of-domain eval_accuracy: 0.5077519379844961
Out-of-domain eval_runtime: 11.4931
Out-of-domain eval_samples_per_second: 44.896
Out-of-domain eval_steps_per_second: 5.656
Out-of-domain epoch: 40.0


Some weights of OPTForSequenceClassification were not initialized from the model checkpoint at facebook/opt-350m and are newly initialized: ['score.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
dataloader_config = DataLoaderConfiguration(dispatch_batches=None, split_batches=False, even_batches=True, use_seedable_sampler=True)


Step,Training Loss


Evaluating in-domain performance for n=64...


In-domain eval_loss: 1.9496732950210571
In-domain eval_accuracy: 0.5637583892617449
In-domain eval_runtime: 23.1919
In-domain eval_samples_per_second: 44.973
In-domain eval_steps_per_second: 5.649
In-domain epoch: 40.0
Evaluating out-of-domain performance for n=64...
Out-of-domain eval_loss: 2.011420249938965
Out-of-domain eval_accuracy: 0.5503875968992248
Out-of-domain eval_runtime: 11.4805
Out-of-domain eval_samples_per_second: 44.946
Out-of-domain eval_steps_per_second: 5.662
Out-of-domain epoch: 40.0


Some weights of OPTForSequenceClassification were not initialized from the model checkpoint at facebook/opt-350m and are newly initialized: ['score.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
dataloader_config = DataLoaderConfiguration(dispatch_batches=None, split_batches=False, even_batches=True, use_seedable_sampler=True)


Step,Training Loss


Evaluating in-domain performance for n=64...


In-domain eval_loss: 1.6461914777755737
In-domain eval_accuracy: 0.6279961649089166
In-domain eval_runtime: 23.2347
In-domain eval_samples_per_second: 44.89
In-domain eval_steps_per_second: 5.638
In-domain epoch: 40.0
Evaluating out-of-domain performance for n=64...
Out-of-domain eval_loss: 1.5185779333114624
Out-of-domain eval_accuracy: 0.6453488372093024
Out-of-domain eval_runtime: 11.4785
Out-of-domain eval_samples_per_second: 44.954
Out-of-domain eval_steps_per_second: 5.663
Out-of-domain epoch: 40.0


Some weights of OPTForSequenceClassification were not initialized from the model checkpoint at facebook/opt-350m and are newly initialized: ['score.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
dataloader_config = DataLoaderConfiguration(dispatch_batches=None, split_batches=False, even_batches=True, use_seedable_sampler=True)


Step,Training Loss


Evaluating in-domain performance for n=64...


In-domain eval_loss: 1.6461914777755737
In-domain eval_accuracy: 0.6279961649089166
In-domain eval_runtime: 23.1626
In-domain eval_samples_per_second: 45.029
In-domain eval_steps_per_second: 5.656
In-domain epoch: 40.0
Evaluating out-of-domain performance for n=64...
Out-of-domain eval_loss: 1.5185779333114624
Out-of-domain eval_accuracy: 0.6453488372093024
Out-of-domain eval_runtime: 11.4384
Out-of-domain eval_samples_per_second: 45.111
Out-of-domain eval_steps_per_second: 5.683
Out-of-domain epoch: 40.0


Some weights of OPTForSequenceClassification were not initialized from the model checkpoint at facebook/opt-350m and are newly initialized: ['score.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
dataloader_config = DataLoaderConfiguration(dispatch_batches=None, split_batches=False, even_batches=True, use_seedable_sampler=True)


Step,Training Loss


Evaluating in-domain performance for n=64...


In-domain eval_loss: 1.6461914777755737
In-domain eval_accuracy: 0.6279961649089166
In-domain eval_runtime: 23.1887
In-domain eval_samples_per_second: 44.979
In-domain eval_steps_per_second: 5.649
In-domain epoch: 40.0
Evaluating out-of-domain performance for n=64...
Out-of-domain eval_loss: 1.5185779333114624
Out-of-domain eval_accuracy: 0.6453488372093024
Out-of-domain eval_runtime: 11.4812
Out-of-domain eval_samples_per_second: 44.943
Out-of-domain eval_steps_per_second: 5.661
Out-of-domain epoch: 40.0


Some weights of OPTForSequenceClassification were not initialized from the model checkpoint at facebook/opt-350m and are newly initialized: ['score.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
dataloader_config = DataLoaderConfiguration(dispatch_batches=None, split_batches=False, even_batches=True, use_seedable_sampler=True)


Step,Training Loss


Evaluating in-domain performance for n=64...


In-domain eval_loss: 1.6461914777755737
In-domain eval_accuracy: 0.6279961649089166
In-domain eval_runtime: 23.1793
In-domain eval_samples_per_second: 44.997
In-domain eval_steps_per_second: 5.652
In-domain epoch: 40.0
Evaluating out-of-domain performance for n=64...
Out-of-domain eval_loss: 1.5185779333114624
Out-of-domain eval_accuracy: 0.6453488372093024
Out-of-domain eval_runtime: 11.4833
Out-of-domain eval_samples_per_second: 44.935
Out-of-domain eval_steps_per_second: 5.66
Out-of-domain epoch: 40.0


Some weights of OPTForSequenceClassification were not initialized from the model checkpoint at facebook/opt-350m and are newly initialized: ['score.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
dataloader_config = DataLoaderConfiguration(dispatch_batches=None, split_batches=False, even_batches=True, use_seedable_sampler=True)


Step,Training Loss


Evaluating in-domain performance for n=64...


In-domain eval_loss: 1.6461914777755737
In-domain eval_accuracy: 0.6279961649089166
In-domain eval_runtime: 23.1959
In-domain eval_samples_per_second: 44.965
In-domain eval_steps_per_second: 5.648
In-domain epoch: 40.0
Evaluating out-of-domain performance for n=64...
Out-of-domain eval_loss: 1.5185779333114624
Out-of-domain eval_accuracy: 0.6453488372093024
Out-of-domain eval_runtime: 11.4737
Out-of-domain eval_samples_per_second: 44.973
Out-of-domain eval_steps_per_second: 5.665
Out-of-domain epoch: 40.0


Some weights of OPTForSequenceClassification were not initialized from the model checkpoint at facebook/opt-350m and are newly initialized: ['score.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
dataloader_config = DataLoaderConfiguration(dispatch_batches=None, split_batches=False, even_batches=True, use_seedable_sampler=True)


Step,Training Loss


Evaluating in-domain performance for n=64...


In-domain eval_loss: 1.6461914777755737
In-domain eval_accuracy: 0.6279961649089166
In-domain eval_runtime: 23.2304
In-domain eval_samples_per_second: 44.898
In-domain eval_steps_per_second: 5.639
In-domain epoch: 40.0
Evaluating out-of-domain performance for n=64...
Out-of-domain eval_loss: 1.5185779333114624
Out-of-domain eval_accuracy: 0.6453488372093024
Out-of-domain eval_runtime: 11.4951
Out-of-domain eval_samples_per_second: 44.889
Out-of-domain eval_steps_per_second: 5.655
Out-of-domain epoch: 40.0


Some weights of OPTForSequenceClassification were not initialized from the model checkpoint at facebook/opt-350m and are newly initialized: ['score.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
dataloader_config = DataLoaderConfiguration(dispatch_batches=None, split_batches=False, even_batches=True, use_seedable_sampler=True)


Step,Training Loss


Evaluating in-domain performance for n=64...


In-domain eval_loss: 1.6461914777755737
In-domain eval_accuracy: 0.6279961649089166
In-domain eval_runtime: 23.1825
In-domain eval_samples_per_second: 44.991
In-domain eval_steps_per_second: 5.651
In-domain epoch: 40.0
Evaluating out-of-domain performance for n=64...
Out-of-domain eval_loss: 1.5185779333114624
Out-of-domain eval_accuracy: 0.6453488372093024
Out-of-domain eval_runtime: 11.4676
Out-of-domain eval_samples_per_second: 44.996
Out-of-domain eval_steps_per_second: 5.668
Out-of-domain epoch: 40.0


Some weights of OPTForSequenceClassification were not initialized from the model checkpoint at facebook/opt-350m and are newly initialized: ['score.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
dataloader_config = DataLoaderConfiguration(dispatch_batches=None, split_batches=False, even_batches=True, use_seedable_sampler=True)


Step,Training Loss


Evaluating in-domain performance for n=64...


In-domain eval_loss: 1.6461914777755737
In-domain eval_accuracy: 0.6279961649089166
In-domain eval_runtime: 23.1952
In-domain eval_samples_per_second: 44.966
In-domain eval_steps_per_second: 5.648
In-domain epoch: 40.0
Evaluating out-of-domain performance for n=64...
Out-of-domain eval_loss: 1.5185779333114624
Out-of-domain eval_accuracy: 0.6453488372093024
Out-of-domain eval_runtime: 11.4639
Out-of-domain eval_samples_per_second: 45.011
Out-of-domain eval_steps_per_second: 5.67
Out-of-domain epoch: 40.0


Some weights of OPTForSequenceClassification were not initialized from the model checkpoint at facebook/opt-350m and are newly initialized: ['score.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
dataloader_config = DataLoaderConfiguration(dispatch_batches=None, split_batches=False, even_batches=True, use_seedable_sampler=True)


Step,Training Loss


Evaluating in-domain performance for n=64...


In-domain eval_loss: 1.6461914777755737
In-domain eval_accuracy: 0.6279961649089166
In-domain eval_runtime: 23.1731
In-domain eval_samples_per_second: 45.009
In-domain eval_steps_per_second: 5.653
In-domain epoch: 40.0
Evaluating out-of-domain performance for n=64...
Out-of-domain eval_loss: 1.5185779333114624
Out-of-domain eval_accuracy: 0.6453488372093024
Out-of-domain eval_runtime: 11.4747
Out-of-domain eval_samples_per_second: 44.969
Out-of-domain eval_steps_per_second: 5.665
Out-of-domain epoch: 40.0


Some weights of OPTForSequenceClassification were not initialized from the model checkpoint at facebook/opt-350m and are newly initialized: ['score.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
dataloader_config = DataLoaderConfiguration(dispatch_batches=None, split_batches=False, even_batches=True, use_seedable_sampler=True)


Step,Training Loss


Evaluating in-domain performance for n=128...


In-domain eval_loss: 2.37613582611084
In-domain eval_accuracy: 0.6423777564717162
In-domain eval_runtime: 23.2027
In-domain eval_samples_per_second: 44.952
In-domain eval_steps_per_second: 5.646
In-domain epoch: 40.0
Evaluating out-of-domain performance for n=128...
Out-of-domain eval_loss: 2.2837653160095215
Out-of-domain eval_accuracy: 0.6569767441860465
Out-of-domain eval_runtime: 11.4845
Out-of-domain eval_samples_per_second: 44.93
Out-of-domain eval_steps_per_second: 5.66
Out-of-domain epoch: 40.0


Some weights of OPTForSequenceClassification were not initialized from the model checkpoint at facebook/opt-350m and are newly initialized: ['score.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
dataloader_config = DataLoaderConfiguration(dispatch_batches=None, split_batches=False, even_batches=True, use_seedable_sampler=True)


Step,Training Loss


Evaluating in-domain performance for n=128...


In-domain eval_loss: 2.4027559757232666
In-domain eval_accuracy: 0.5944391179290508
In-domain eval_runtime: 23.2548
In-domain eval_samples_per_second: 44.851
In-domain eval_steps_per_second: 5.633
In-domain epoch: 40.0
Evaluating out-of-domain performance for n=128...
Out-of-domain eval_loss: 2.5247931480407715
Out-of-domain eval_accuracy: 0.5910852713178295
Out-of-domain eval_runtime: 11.4869
Out-of-domain eval_samples_per_second: 44.921
Out-of-domain eval_steps_per_second: 5.659
Out-of-domain epoch: 40.0


Some weights of OPTForSequenceClassification were not initialized from the model checkpoint at facebook/opt-350m and are newly initialized: ['score.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
dataloader_config = DataLoaderConfiguration(dispatch_batches=None, split_batches=False, even_batches=True, use_seedable_sampler=True)


Step,Training Loss


Evaluating in-domain performance for n=128...


In-domain eval_loss: 2.4027559757232666
In-domain eval_accuracy: 0.5944391179290508
In-domain eval_runtime: 23.2226
In-domain eval_samples_per_second: 44.913
In-domain eval_steps_per_second: 5.641
In-domain epoch: 40.0
Evaluating out-of-domain performance for n=128...
Out-of-domain eval_loss: 2.5247931480407715
Out-of-domain eval_accuracy: 0.5910852713178295
Out-of-domain eval_runtime: 11.4848
Out-of-domain eval_samples_per_second: 44.929
Out-of-domain eval_steps_per_second: 5.66
Out-of-domain epoch: 40.0


Some weights of OPTForSequenceClassification were not initialized from the model checkpoint at facebook/opt-350m and are newly initialized: ['score.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
dataloader_config = DataLoaderConfiguration(dispatch_batches=None, split_batches=False, even_batches=True, use_seedable_sampler=True)


Step,Training Loss


Evaluating in-domain performance for n=128...


In-domain eval_loss: 2.4027559757232666
In-domain eval_accuracy: 0.5944391179290508
In-domain eval_runtime: 23.2298
In-domain eval_samples_per_second: 44.899
In-domain eval_steps_per_second: 5.639
In-domain epoch: 40.0
Evaluating out-of-domain performance for n=128...
Out-of-domain eval_loss: 2.5247931480407715
Out-of-domain eval_accuracy: 0.5910852713178295
Out-of-domain eval_runtime: 11.5258
Out-of-domain eval_samples_per_second: 44.769
Out-of-domain eval_steps_per_second: 5.64
Out-of-domain epoch: 40.0


Some weights of OPTForSequenceClassification were not initialized from the model checkpoint at facebook/opt-350m and are newly initialized: ['score.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
dataloader_config = DataLoaderConfiguration(dispatch_batches=None, split_batches=False, even_batches=True, use_seedable_sampler=True)


Step,Training Loss


Evaluating in-domain performance for n=128...


In-domain eval_loss: 2.4027559757232666
In-domain eval_accuracy: 0.5944391179290508
In-domain eval_runtime: 23.2336
In-domain eval_samples_per_second: 44.892
In-domain eval_steps_per_second: 5.638
In-domain epoch: 40.0
Evaluating out-of-domain performance for n=128...
Out-of-domain eval_loss: 2.5247931480407715
Out-of-domain eval_accuracy: 0.5910852713178295
Out-of-domain eval_runtime: 11.4999
Out-of-domain eval_samples_per_second: 44.87
Out-of-domain eval_steps_per_second: 5.652
Out-of-domain epoch: 40.0


Some weights of OPTForSequenceClassification were not initialized from the model checkpoint at facebook/opt-350m and are newly initialized: ['score.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
dataloader_config = DataLoaderConfiguration(dispatch_batches=None, split_batches=False, even_batches=True, use_seedable_sampler=True)


Step,Training Loss


Evaluating in-domain performance for n=128...


In-domain eval_loss: 2.4027559757232666
In-domain eval_accuracy: 0.5944391179290508
In-domain eval_runtime: 23.2317
In-domain eval_samples_per_second: 44.896
In-domain eval_steps_per_second: 5.639
In-domain epoch: 40.0
Evaluating out-of-domain performance for n=128...
Out-of-domain eval_loss: 2.5247931480407715
Out-of-domain eval_accuracy: 0.5910852713178295
Out-of-domain eval_runtime: 11.4958
Out-of-domain eval_samples_per_second: 44.886
Out-of-domain eval_steps_per_second: 5.654
Out-of-domain epoch: 40.0


Some weights of OPTForSequenceClassification were not initialized from the model checkpoint at facebook/opt-350m and are newly initialized: ['score.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
dataloader_config = DataLoaderConfiguration(dispatch_batches=None, split_batches=False, even_batches=True, use_seedable_sampler=True)


Step,Training Loss


Evaluating in-domain performance for n=128...


In-domain eval_loss: 2.4027559757232666
In-domain eval_accuracy: 0.5944391179290508
In-domain eval_runtime: 23.1961
In-domain eval_samples_per_second: 44.964
In-domain eval_steps_per_second: 5.647
In-domain epoch: 40.0
Evaluating out-of-domain performance for n=128...
Out-of-domain eval_loss: 2.5247931480407715
Out-of-domain eval_accuracy: 0.5910852713178295
Out-of-domain eval_runtime: 11.4734
Out-of-domain eval_samples_per_second: 44.974
Out-of-domain eval_steps_per_second: 5.665
Out-of-domain epoch: 40.0


Some weights of OPTForSequenceClassification were not initialized from the model checkpoint at facebook/opt-350m and are newly initialized: ['score.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
dataloader_config = DataLoaderConfiguration(dispatch_batches=None, split_batches=False, even_batches=True, use_seedable_sampler=True)


Step,Training Loss


Evaluating in-domain performance for n=128...


In-domain eval_loss: 2.4027559757232666
In-domain eval_accuracy: 0.5944391179290508
In-domain eval_runtime: 23.205
In-domain eval_samples_per_second: 44.947
In-domain eval_steps_per_second: 5.645
In-domain epoch: 40.0
Evaluating out-of-domain performance for n=128...
Out-of-domain eval_loss: 2.5247931480407715
Out-of-domain eval_accuracy: 0.5910852713178295
Out-of-domain eval_runtime: 11.4848
Out-of-domain eval_samples_per_second: 44.929
Out-of-domain eval_steps_per_second: 5.66
Out-of-domain epoch: 40.0


Some weights of OPTForSequenceClassification were not initialized from the model checkpoint at facebook/opt-350m and are newly initialized: ['score.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
dataloader_config = DataLoaderConfiguration(dispatch_batches=None, split_batches=False, even_batches=True, use_seedable_sampler=True)


Step,Training Loss


Evaluating in-domain performance for n=128...


In-domain eval_loss: 2.4027559757232666
In-domain eval_accuracy: 0.5944391179290508
In-domain eval_runtime: 23.2002
In-domain eval_samples_per_second: 44.956
In-domain eval_steps_per_second: 5.646
In-domain epoch: 40.0
Evaluating out-of-domain performance for n=128...
Out-of-domain eval_loss: 2.5247931480407715
Out-of-domain eval_accuracy: 0.5910852713178295
Out-of-domain eval_runtime: 11.4774
Out-of-domain eval_samples_per_second: 44.958
Out-of-domain eval_steps_per_second: 5.663
Out-of-domain epoch: 40.0


Some weights of OPTForSequenceClassification were not initialized from the model checkpoint at facebook/opt-350m and are newly initialized: ['score.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
dataloader_config = DataLoaderConfiguration(dispatch_batches=None, split_batches=False, even_batches=True, use_seedable_sampler=True)


Step,Training Loss


Evaluating in-domain performance for n=128...


In-domain eval_loss: 2.4027559757232666
In-domain eval_accuracy: 0.5944391179290508
In-domain eval_runtime: 23.2154
In-domain eval_samples_per_second: 44.927
In-domain eval_steps_per_second: 5.643
In-domain epoch: 40.0
Evaluating out-of-domain performance for n=128...
Out-of-domain eval_loss: 2.5247931480407715
Out-of-domain eval_accuracy: 0.5910852713178295
Out-of-domain eval_runtime: 11.484
Out-of-domain eval_samples_per_second: 44.932
Out-of-domain eval_steps_per_second: 5.66
Out-of-domain epoch: 40.0


In [7]:
# Group the results by 'n' and compute the average and maximum performance for each group
grouped_results = results_df.groupby('n')

average_in_domain_accuracy = grouped_results['in_domain_accuracy'].mean()
maximum_in_domain_accuracy = grouped_results['in_domain_accuracy'].max()

average_out_of_domain_accuracy = grouped_results['out_of_domain_accuracy'].mean()
maximum_out_of_domain_accuracy = grouped_results['out_of_domain_accuracy'].max()

# Print the results for each 'n'
for n in few_shot_sample_size:
    print(f"For n={n}:")
    print(f"Average in-domain accuracy: {average_in_domain_accuracy[n]}")
    print(f"Maximum in-domain accuracy: {maximum_in_domain_accuracy[n]}")
    print(f"Average out-of-domain accuracy: {average_out_of_domain_accuracy[n]}")
    print(f"Maximum out-of-domain accuracy: {maximum_out_of_domain_accuracy[n]}")
    print("\n")

# Compute and print the overall average and maximum performance
print("Overall:")
print(f"Average in-domain accuracy: {results_df['in_domain_accuracy'].mean()}")
print(f"Maximum in-domain accuracy: {results_df['in_domain_accuracy'].max()}")
print(f"Average out-of-domain accuracy: {results_df['out_of_domain_accuracy'].mean()}")
print(f"Maximum out-of-domain accuracy: {results_df['out_of_domain_accuracy'].max()}")


For n=2:
Average in-domain accuracy: 0.4550335570469799
Maximum in-domain accuracy: 0.46116970278044106
Average out-of-domain accuracy: 0.48468992248062015
Maximum out-of-domain accuracy: 0.4903100775193798


For n=16:
Average in-domain accuracy: 0.4851390220517738
Maximum in-domain accuracy: 0.4851390220517737
Average out-of-domain accuracy: 0.5058139534883721
Maximum out-of-domain accuracy: 0.5058139534883721


For n=32:
Average in-domain accuracy: 0.503451581975072
Maximum in-domain accuracy: 0.5560882070949185
Average out-of-domain accuracy: 0.5143410852713178
Maximum out-of-domain accuracy: 0.5736434108527132


For n=64:
Average in-domain accuracy: 0.6215723873441995
Maximum in-domain accuracy: 0.6279961649089166
Average out-of-domain accuracy: 0.6358527131782946
Maximum out-of-domain accuracy: 0.6453488372093024


For n=128:
Average in-domain accuracy: 0.5992329817833173
Maximum in-domain accuracy: 0.6423777564717162
Average out-of-domain accuracy: 0.5976744186046512
Maximum out-

In [8]:
# Save the DataFrame to a CSV file
curr_filename = "vanilla_cola_baseline_350M"
results_df.to_csv(f'../Results/{curr_filename}.csv', sep = ',', index=False)

In [None]:
# disconnect runtime
from google.colab import runtime
runtime.unassign()