# Few shot Fine Tuning on Cola Data Set - Baseline

In [2]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [3]:
%cd '/content/drive/MyDrive/LLM/llm_finetuning/notebooks'
!ls

/content/drive/MyDrive/LLM/llm_finetuning/notebooks
pbft_cola_baseline.ipynb	      vanilla_cola_adaptive_v2.ipynb
pbft_mnli_baseline.ipynb	      vanilla_cola_baseline_350M.ipynb
pre_trained_opt_with_inference.ipynb  vanilla_cola_baseline.ipynb
results				      vanilla_mnli_baseline.ipynb
vanilla_cola_adaptive.ipynb


In [4]:
curr_filename = "vanilla_cola_adaptive_2Layer_LR"

In [5]:
!pip install -q transformers accelerate bitsandbytes datasets

[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m297.6/297.6 kB[0m [31m5.1 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m119.8/119.8 MB[0m [31m14.8 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m542.0/542.0 kB[0m [31m49.3 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m116.3/116.3 kB[0m [31m16.2 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m194.1/194.1 kB[0m [31m23.4 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m134.8/134.8 kB[0m [31m19.5 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m388.9/388.9 kB[0m [31m36.5 MB/s[0m eta [36m0:00:00[0m
[?25h

In [6]:
# Version 2 04/19/2024

from datasets import load_dataset, ClassLabel
from sklearn.metrics import accuracy_score
from transformers import Trainer, TrainingArguments, AutoModelForSequenceClassification, AutoTokenizer, AdamW, AutoConfig
import numpy as np
import pandas as pd
import torch

# Set seed, load COLA dataset

torch.manual_seed(42)
if torch.cuda.is_available():
    torch.cuda.manual_seed_all(42)

in_domain_data = load_dataset("glue", "cola")


# Define model and tokenizer
tokenizer = AutoTokenizer.from_pretrained("facebook/opt-125m")

config = AutoConfig.from_pretrained("facebook/opt-125m", num_labels=2, hidden_dropout_prob=0.1, attention_probs_dropout_prob=0.1)
model = AutoModelForSequenceClassification.from_pretrained("facebook/opt-125m", config=config)



# Layer-wise Learning Rate Adjustment and Layer Freezing
def create_optimizer(model):
    param_optimizer = list(model.named_parameters())
    optimizer_grouped_parameters = []
    base_lr = 1e-5
    for name, param in param_optimizer:
        split_name = name.split('.')
        if 'layer' in split_name:
            layer_idx = int(split_name[1])
            # Adjust learning rates: lower layers have lower learning rates
            layer_lr = base_lr * (0.95 ** (12 - layer_idx))
            if layer_idx < 10:  # Freeze all except the last two layers
                param.requires_grad = False
                layer_lr = 0  # No learning rate for frozen layers
            optimizer_grouped_parameters.append({'params': [param], 'lr': layer_lr})
        else:
            # Non-layer parameters use the base learning rate
            optimizer_grouped_parameters.append({'params': [param], 'lr': base_lr})
    optimizer = AdamW(optimizer_grouped_parameters)
    return optimizer


# Function to load and parse out-of-domain COLA dataset
'''
\cite: https://github.com/uds-lsv/llmft/blob/main/task_utils.py
'''
def load_cola_ood_dataset(path, label=None, cache_dir=None):
    data_files = {"validation": path}
    dataset = load_dataset("csv", data_files=data_files, sep="\t", column_names=[
                           'code', 'label', 'annotation', 'sentence'], cache_dir=cache_dir)
    dataset = dataset["validation"]

    # cola-ood comes without indices, so we add them
    indices = list(range(len(dataset)))
    dataset = dataset.add_column(name="idx", column=indices)

    subset = "cola-ood"

    if label is not None:  # filter dataset based on label
        dataset = dataset.filter(
            lambda example: example["label"] == label)
        subset = f"{subset}-{'acceptable' if label == 1 else 'unacceptable'}"

    return dataset, subset


def compute_metrics(eval_pred):
    logits, labels = eval_pred
    predictions = np.argmax(logits, axis=-1)
    return {"accuracy": accuracy_score(labels, predictions)}


def manipulate_inputs_for_cola_with_prompt(inputs):
    # Add pattern/prompts
    #inputs = tokenizer(["Yes or No?" + sentence for sentence in inputs["sentence"]], truncation=True, padding="max_length", max_length=128)
    inputs = tokenizer(inputs["sentence"], truncation=True, padding="max_length", max_length=128)

    return inputs

# augment in-domain-data
in_domain_data = in_domain_data.map(manipulate_inputs_for_cola_with_prompt, batched=True)

# augment out of domain data
eval_ood_data, _ = load_cola_ood_dataset(path='../datafiles/dev.tsv')
eval_ood_data = eval_ood_data.map(manipulate_inputs_for_cola_with_prompt, batched=True)


# Define parameters for training experiments (per reference paper)

few_shot_sample_size = [2, 16, 32, 64, 128]  # number of examples for each class
num_epochs = 40
batch_size = 32
learning_rate = 1e-5
weight_decay = 0.
warmup_ratio = 0.1
num_runs = 10
optimizer = AdamW(model.parameters(), lr=learning_rate) # AdamW optimizer



results_df = pd.DataFrame(columns=["n", "run", "in_domain_accuracy", "out_of_domain_accuracy"])

for n in few_shot_sample_size:
    for run_idx in range(num_runs):  # repeat 10 times for each n
        # re-iniialize model for each run
        model = AutoModelForSequenceClassification.from_pretrained("facebook/opt-125m", config=config)
        #optimizer = AdamW(model.parameters(), lr=learning_rate)
        optimizer = create_optimizer(model)  # Call create_optimizer function to handle layer-wise rates and freezing

        # Freeze Layers

        for name, param in model.named_parameters():
            parts = name.split('.')
            # Find the part that is a digit and represents the layer index
            for part in parts:
                if part.isdigit():  # Check if the part is a digit
                    layer_index = int(part)
                    if layer_index < 10:  # Freeze all except the last two layers
                         param.requires_grad = False
                    break  # Once you find the index and process it, break out of the loop





        # Select n random examples for each class from the original data
        indices_yes = np.where(np.array(in_domain_data["train"]["label"]) == 0)[0]
        indices_no = np.where(np.array(in_domain_data["train"]["label"]) == 1)[0]
        indices_yes = np.random.choice(indices_yes, n, replace=False)
        indices_no = np.random.choice(indices_no, n, replace=False)
        indices = np.concatenate([indices_yes, indices_no])

        # Select the examples for the new training set
        train_dataset = in_domain_data["train"].select(indices)

        # Define training config
        # Total steps = (#samples/batch size) * epochs
        total_steps = (len(train_dataset) // batch_size) * num_epochs

        training_args = TrainingArguments(
            output_dir = "./results",
            overwrite_output_dir = True,
            num_train_epochs = num_epochs,
            per_device_train_batch_size = batch_size,
            learning_rate = learning_rate,
            weight_decay = weight_decay,
            save_steps = 10_000,
            save_total_limit = 2,
            warmup_steps = int(warmup_ratio * total_steps),
        )

        # Define the trainer
        trainer = Trainer(
            model = model,
            args=training_args,
            train_dataset = train_dataset,
            compute_metrics = compute_metrics,
            optimizers=(optimizer, None),
        )

        # Train the model
        trainer.train()

        # Evaluate in-domain performance
        print(f"Evaluating in-domain performance for n={n}...")
        eval_results = trainer.evaluate(eval_dataset=in_domain_data["validation"])

        # Store the in-domain accuracy
        in_domain_accuracy = eval_results["eval_accuracy"]

        # Print the in-domain evaluation results
        for key, value in eval_results.items():
            print(f"In-domain {key}: {value}")

        # Evaluate out-of-domain performance
        print(f"Evaluating out-of-domain performance for n={n}...")
        eval_results = trainer.evaluate(eval_dataset=eval_ood_data)

        # Store the out-of-domain accuracy
        out_of_domain_accuracy = eval_results["eval_accuracy"]

        # Print the out-of-domain evaluation results
        for key, value in eval_results.items():
            print(f"Out-of-domain {key}: {value}")

        # Add the results to the DataFrame
        new_row = pd.DataFrame({
            "n": [n],
            "run": [run_idx],
            "in_domain_accuracy": [in_domain_accuracy],
            "out_of_domain_accuracy": [out_of_domain_accuracy]
        })
        results_df = pd.concat([results_df, new_row], ignore_index=True)



The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


Downloading readme:   0%|          | 0.00/35.3k [00:00<?, ?B/s]

Downloading data:   0%|          | 0.00/251k [00:00<?, ?B/s]

Downloading data:   0%|          | 0.00/37.6k [00:00<?, ?B/s]

Downloading data:   0%|          | 0.00/37.7k [00:00<?, ?B/s]

Generating train split:   0%|          | 0/8551 [00:00<?, ? examples/s]

Generating validation split:   0%|          | 0/1043 [00:00<?, ? examples/s]

Generating test split:   0%|          | 0/1063 [00:00<?, ? examples/s]

tokenizer_config.json:   0%|          | 0.00/685 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/651 [00:00<?, ?B/s]

vocab.json:   0%|          | 0.00/899k [00:00<?, ?B/s]

merges.txt:   0%|          | 0.00/456k [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/441 [00:00<?, ?B/s]

pytorch_model.bin:   0%|          | 0.00/251M [00:00<?, ?B/s]

Some weights of OPTForSequenceClassification were not initialized from the model checkpoint at facebook/opt-125m and are newly initialized: ['score.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Map:   0%|          | 0/8551 [00:00<?, ? examples/s]

Map:   0%|          | 0/1043 [00:00<?, ? examples/s]

Map:   0%|          | 0/1063 [00:00<?, ? examples/s]

Generating validation split: 0 examples [00:00, ? examples/s]

Map:   0%|          | 0/516 [00:00<?, ? examples/s]

Some weights of OPTForSequenceClassification were not initialized from the model checkpoint at facebook/opt-125m and are newly initialized: ['score.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
dataloader_config = DataLoaderConfiguration(dispatch_batches=None, split_batches=False, even_batches=True, use_seedable_sampler=True)


Step,Training Loss


Evaluating in-domain performance for n=2...


In-domain eval_loss: 0.6838909983634949
In-domain eval_accuracy: 0.5714285714285714
In-domain eval_runtime: 6.3563
In-domain eval_samples_per_second: 164.089
In-domain eval_steps_per_second: 20.609
In-domain epoch: 40.0
Evaluating out-of-domain performance for n=2...


Some weights of OPTForSequenceClassification were not initialized from the model checkpoint at facebook/opt-125m and are newly initialized: ['score.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Out-of-domain eval_loss: 0.6587336659431458
Out-of-domain eval_accuracy: 0.5949612403100775
Out-of-domain eval_runtime: 3.2009
Out-of-domain eval_samples_per_second: 161.203
Out-of-domain eval_steps_per_second: 20.307
Out-of-domain epoch: 40.0


dataloader_config = DataLoaderConfiguration(dispatch_batches=None, split_batches=False, even_batches=True, use_seedable_sampler=True)


Step,Training Loss


Evaluating in-domain performance for n=2...


In-domain eval_loss: 0.727360725402832
In-domain eval_accuracy: 0.49952061361457334
In-domain eval_runtime: 6.4286
In-domain eval_samples_per_second: 162.244
In-domain eval_steps_per_second: 20.378
In-domain epoch: 40.0
Evaluating out-of-domain performance for n=2...


Some weights of OPTForSequenceClassification were not initialized from the model checkpoint at facebook/opt-125m and are newly initialized: ['score.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Out-of-domain eval_loss: 0.7110629677772522
Out-of-domain eval_accuracy: 0.5406976744186046
Out-of-domain eval_runtime: 3.2835
Out-of-domain eval_samples_per_second: 157.149
Out-of-domain eval_steps_per_second: 19.796
Out-of-domain epoch: 40.0


dataloader_config = DataLoaderConfiguration(dispatch_batches=None, split_batches=False, even_batches=True, use_seedable_sampler=True)


Step,Training Loss


Evaluating in-domain performance for n=2...


In-domain eval_loss: 0.727360725402832
In-domain eval_accuracy: 0.49952061361457334
In-domain eval_runtime: 6.5579
In-domain eval_samples_per_second: 159.045
In-domain eval_steps_per_second: 19.976
In-domain epoch: 40.0
Evaluating out-of-domain performance for n=2...


Some weights of OPTForSequenceClassification were not initialized from the model checkpoint at facebook/opt-125m and are newly initialized: ['score.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Out-of-domain eval_loss: 0.7110629677772522
Out-of-domain eval_accuracy: 0.5406976744186046
Out-of-domain eval_runtime: 3.3302
Out-of-domain eval_samples_per_second: 154.946
Out-of-domain eval_steps_per_second: 19.518
Out-of-domain epoch: 40.0


dataloader_config = DataLoaderConfiguration(dispatch_batches=None, split_batches=False, even_batches=True, use_seedable_sampler=True)


Step,Training Loss


Evaluating in-domain performance for n=2...


In-domain eval_loss: 0.727360725402832
In-domain eval_accuracy: 0.49952061361457334
In-domain eval_runtime: 6.6573
In-domain eval_samples_per_second: 156.669
In-domain eval_steps_per_second: 19.678
In-domain epoch: 40.0
Evaluating out-of-domain performance for n=2...


Some weights of OPTForSequenceClassification were not initialized from the model checkpoint at facebook/opt-125m and are newly initialized: ['score.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Out-of-domain eval_loss: 0.7110629677772522
Out-of-domain eval_accuracy: 0.5406976744186046
Out-of-domain eval_runtime: 3.3579
Out-of-domain eval_samples_per_second: 153.666
Out-of-domain eval_steps_per_second: 19.357
Out-of-domain epoch: 40.0


dataloader_config = DataLoaderConfiguration(dispatch_batches=None, split_batches=False, even_batches=True, use_seedable_sampler=True)


Step,Training Loss


Evaluating in-domain performance for n=2...


In-domain eval_loss: 0.727360725402832
In-domain eval_accuracy: 0.49952061361457334
In-domain eval_runtime: 6.7788
In-domain eval_samples_per_second: 153.862
In-domain eval_steps_per_second: 19.325
In-domain epoch: 40.0
Evaluating out-of-domain performance for n=2...


Some weights of OPTForSequenceClassification were not initialized from the model checkpoint at facebook/opt-125m and are newly initialized: ['score.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Out-of-domain eval_loss: 0.7110629677772522
Out-of-domain eval_accuracy: 0.5406976744186046
Out-of-domain eval_runtime: 3.4249
Out-of-domain eval_samples_per_second: 150.659
Out-of-domain eval_steps_per_second: 18.978
Out-of-domain epoch: 40.0


dataloader_config = DataLoaderConfiguration(dispatch_batches=None, split_batches=False, even_batches=True, use_seedable_sampler=True)


Step,Training Loss


Evaluating in-domain performance for n=2...


In-domain eval_loss: 0.727360725402832
In-domain eval_accuracy: 0.49952061361457334
In-domain eval_runtime: 6.9065
In-domain eval_samples_per_second: 151.016
In-domain eval_steps_per_second: 18.968
In-domain epoch: 40.0
Evaluating out-of-domain performance for n=2...


Some weights of OPTForSequenceClassification were not initialized from the model checkpoint at facebook/opt-125m and are newly initialized: ['score.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Out-of-domain eval_loss: 0.7110629677772522
Out-of-domain eval_accuracy: 0.5406976744186046
Out-of-domain eval_runtime: 3.4793
Out-of-domain eval_samples_per_second: 148.305
Out-of-domain eval_steps_per_second: 18.682
Out-of-domain epoch: 40.0


dataloader_config = DataLoaderConfiguration(dispatch_batches=None, split_batches=False, even_batches=True, use_seedable_sampler=True)


Step,Training Loss


Evaluating in-domain performance for n=2...


In-domain eval_loss: 0.727360725402832
In-domain eval_accuracy: 0.49952061361457334
In-domain eval_runtime: 7.0147
In-domain eval_samples_per_second: 148.687
In-domain eval_steps_per_second: 18.675
In-domain epoch: 40.0
Evaluating out-of-domain performance for n=2...


Some weights of OPTForSequenceClassification were not initialized from the model checkpoint at facebook/opt-125m and are newly initialized: ['score.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Out-of-domain eval_loss: 0.7110629677772522
Out-of-domain eval_accuracy: 0.5406976744186046
Out-of-domain eval_runtime: 3.5273
Out-of-domain eval_samples_per_second: 146.287
Out-of-domain eval_steps_per_second: 18.428
Out-of-domain epoch: 40.0


dataloader_config = DataLoaderConfiguration(dispatch_batches=None, split_batches=False, even_batches=True, use_seedable_sampler=True)


Step,Training Loss


Evaluating in-domain performance for n=2...


In-domain eval_loss: 0.727360725402832
In-domain eval_accuracy: 0.49952061361457334
In-domain eval_runtime: 7.1413
In-domain eval_samples_per_second: 146.051
In-domain eval_steps_per_second: 18.344
In-domain epoch: 40.0
Evaluating out-of-domain performance for n=2...


Some weights of OPTForSequenceClassification were not initialized from the model checkpoint at facebook/opt-125m and are newly initialized: ['score.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Out-of-domain eval_loss: 0.7110629677772522
Out-of-domain eval_accuracy: 0.5406976744186046
Out-of-domain eval_runtime: 3.6194
Out-of-domain eval_samples_per_second: 142.565
Out-of-domain eval_steps_per_second: 17.959
Out-of-domain epoch: 40.0


dataloader_config = DataLoaderConfiguration(dispatch_batches=None, split_batches=False, even_batches=True, use_seedable_sampler=True)


Step,Training Loss


Evaluating in-domain performance for n=2...


In-domain eval_loss: 0.727360725402832
In-domain eval_accuracy: 0.49952061361457334
In-domain eval_runtime: 7.2321
In-domain eval_samples_per_second: 144.218
In-domain eval_steps_per_second: 18.114
In-domain epoch: 40.0
Evaluating out-of-domain performance for n=2...


Some weights of OPTForSequenceClassification were not initialized from the model checkpoint at facebook/opt-125m and are newly initialized: ['score.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Out-of-domain eval_loss: 0.7110629677772522
Out-of-domain eval_accuracy: 0.5406976744186046
Out-of-domain eval_runtime: 3.6324
Out-of-domain eval_samples_per_second: 142.054
Out-of-domain eval_steps_per_second: 17.894
Out-of-domain epoch: 40.0


dataloader_config = DataLoaderConfiguration(dispatch_batches=None, split_batches=False, even_batches=True, use_seedable_sampler=True)


Step,Training Loss


Evaluating in-domain performance for n=2...


In-domain eval_loss: 0.727360725402832
In-domain eval_accuracy: 0.49952061361457334
In-domain eval_runtime: 7.1166
In-domain eval_samples_per_second: 146.558
In-domain eval_steps_per_second: 18.408
In-domain epoch: 40.0
Evaluating out-of-domain performance for n=2...


Some weights of OPTForSequenceClassification were not initialized from the model checkpoint at facebook/opt-125m and are newly initialized: ['score.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Out-of-domain eval_loss: 0.7110629677772522
Out-of-domain eval_accuracy: 0.5406976744186046
Out-of-domain eval_runtime: 3.5402
Out-of-domain eval_samples_per_second: 145.756
Out-of-domain eval_steps_per_second: 18.361
Out-of-domain epoch: 40.0


dataloader_config = DataLoaderConfiguration(dispatch_batches=None, split_batches=False, even_batches=True, use_seedable_sampler=True)


Step,Training Loss


Evaluating in-domain performance for n=16...


In-domain eval_loss: 0.7067421674728394
In-domain eval_accuracy: 0.5302013422818792
In-domain eval_runtime: 7.0377
In-domain eval_samples_per_second: 148.202
In-domain eval_steps_per_second: 18.614
In-domain epoch: 40.0
Evaluating out-of-domain performance for n=16...


Some weights of OPTForSequenceClassification were not initialized from the model checkpoint at facebook/opt-125m and are newly initialized: ['score.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Out-of-domain eval_loss: 0.6959239840507507
Out-of-domain eval_accuracy: 0.560077519379845
Out-of-domain eval_runtime: 3.5267
Out-of-domain eval_samples_per_second: 146.312
Out-of-domain eval_steps_per_second: 18.431
Out-of-domain epoch: 40.0


dataloader_config = DataLoaderConfiguration(dispatch_batches=None, split_batches=False, even_batches=True, use_seedable_sampler=True)


Step,Training Loss


Evaluating in-domain performance for n=16...


In-domain eval_loss: 0.7067421674728394
In-domain eval_accuracy: 0.5302013422818792
In-domain eval_runtime: 7.1152
In-domain eval_samples_per_second: 146.588
In-domain eval_steps_per_second: 18.411
In-domain epoch: 40.0
Evaluating out-of-domain performance for n=16...


Some weights of OPTForSequenceClassification were not initialized from the model checkpoint at facebook/opt-125m and are newly initialized: ['score.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Out-of-domain eval_loss: 0.6959239840507507
Out-of-domain eval_accuracy: 0.560077519379845
Out-of-domain eval_runtime: 3.5946
Out-of-domain eval_samples_per_second: 143.55
Out-of-domain eval_steps_per_second: 18.083
Out-of-domain epoch: 40.0


dataloader_config = DataLoaderConfiguration(dispatch_batches=None, split_batches=False, even_batches=True, use_seedable_sampler=True)


Step,Training Loss


Evaluating in-domain performance for n=16...


In-domain eval_loss: 0.7067421674728394
In-domain eval_accuracy: 0.5302013422818792
In-domain eval_runtime: 7.1314
In-domain eval_samples_per_second: 146.255
In-domain eval_steps_per_second: 18.369
In-domain epoch: 40.0
Evaluating out-of-domain performance for n=16...


Some weights of OPTForSequenceClassification were not initialized from the model checkpoint at facebook/opt-125m and are newly initialized: ['score.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Out-of-domain eval_loss: 0.6959239840507507
Out-of-domain eval_accuracy: 0.560077519379845
Out-of-domain eval_runtime: 3.5725
Out-of-domain eval_samples_per_second: 144.437
Out-of-domain eval_steps_per_second: 18.195
Out-of-domain epoch: 40.0


dataloader_config = DataLoaderConfiguration(dispatch_batches=None, split_batches=False, even_batches=True, use_seedable_sampler=True)


Step,Training Loss


Evaluating in-domain performance for n=16...


In-domain eval_loss: 0.7067421674728394
In-domain eval_accuracy: 0.5302013422818792
In-domain eval_runtime: 7.0707
In-domain eval_samples_per_second: 147.509
In-domain eval_steps_per_second: 18.527
In-domain epoch: 40.0
Evaluating out-of-domain performance for n=16...


Some weights of OPTForSequenceClassification were not initialized from the model checkpoint at facebook/opt-125m and are newly initialized: ['score.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Out-of-domain eval_loss: 0.6959239840507507
Out-of-domain eval_accuracy: 0.560077519379845
Out-of-domain eval_runtime: 3.5557
Out-of-domain eval_samples_per_second: 145.121
Out-of-domain eval_steps_per_second: 18.281
Out-of-domain epoch: 40.0


dataloader_config = DataLoaderConfiguration(dispatch_batches=None, split_batches=False, even_batches=True, use_seedable_sampler=True)


Step,Training Loss


Evaluating in-domain performance for n=16...


In-domain eval_loss: 0.7067421674728394
In-domain eval_accuracy: 0.5302013422818792
In-domain eval_runtime: 7.0839
In-domain eval_samples_per_second: 147.236
In-domain eval_steps_per_second: 18.493
In-domain epoch: 40.0
Evaluating out-of-domain performance for n=16...


Some weights of OPTForSequenceClassification were not initialized from the model checkpoint at facebook/opt-125m and are newly initialized: ['score.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Out-of-domain eval_loss: 0.6959239840507507
Out-of-domain eval_accuracy: 0.560077519379845
Out-of-domain eval_runtime: 3.5894
Out-of-domain eval_samples_per_second: 143.757
Out-of-domain eval_steps_per_second: 18.109
Out-of-domain epoch: 40.0


dataloader_config = DataLoaderConfiguration(dispatch_batches=None, split_batches=False, even_batches=True, use_seedable_sampler=True)


Step,Training Loss


Evaluating in-domain performance for n=16...


In-domain eval_loss: 0.7067421674728394
In-domain eval_accuracy: 0.5302013422818792
In-domain eval_runtime: 7.092
In-domain eval_samples_per_second: 147.066
In-domain eval_steps_per_second: 18.471
In-domain epoch: 40.0
Evaluating out-of-domain performance for n=16...


Some weights of OPTForSequenceClassification were not initialized from the model checkpoint at facebook/opt-125m and are newly initialized: ['score.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Out-of-domain eval_loss: 0.6959239840507507
Out-of-domain eval_accuracy: 0.560077519379845
Out-of-domain eval_runtime: 3.5575
Out-of-domain eval_samples_per_second: 145.047
Out-of-domain eval_steps_per_second: 18.271
Out-of-domain epoch: 40.0


dataloader_config = DataLoaderConfiguration(dispatch_batches=None, split_batches=False, even_batches=True, use_seedable_sampler=True)


Step,Training Loss


Evaluating in-domain performance for n=16...


In-domain eval_loss: 0.7067421674728394
In-domain eval_accuracy: 0.5302013422818792
In-domain eval_runtime: 7.1431
In-domain eval_samples_per_second: 146.015
In-domain eval_steps_per_second: 18.339
In-domain epoch: 40.0
Evaluating out-of-domain performance for n=16...


Some weights of OPTForSequenceClassification were not initialized from the model checkpoint at facebook/opt-125m and are newly initialized: ['score.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Out-of-domain eval_loss: 0.6959239840507507
Out-of-domain eval_accuracy: 0.560077519379845
Out-of-domain eval_runtime: 3.5837
Out-of-domain eval_samples_per_second: 143.986
Out-of-domain eval_steps_per_second: 18.138
Out-of-domain epoch: 40.0


dataloader_config = DataLoaderConfiguration(dispatch_batches=None, split_batches=False, even_batches=True, use_seedable_sampler=True)


Step,Training Loss


Evaluating in-domain performance for n=16...


In-domain eval_loss: 0.7067421674728394
In-domain eval_accuracy: 0.5302013422818792
In-domain eval_runtime: 7.1007
In-domain eval_samples_per_second: 146.886
In-domain eval_steps_per_second: 18.449
In-domain epoch: 40.0
Evaluating out-of-domain performance for n=16...


Some weights of OPTForSequenceClassification were not initialized from the model checkpoint at facebook/opt-125m and are newly initialized: ['score.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Out-of-domain eval_loss: 0.6959239840507507
Out-of-domain eval_accuracy: 0.560077519379845
Out-of-domain eval_runtime: 3.5694
Out-of-domain eval_samples_per_second: 144.56
Out-of-domain eval_steps_per_second: 18.21
Out-of-domain epoch: 40.0


dataloader_config = DataLoaderConfiguration(dispatch_batches=None, split_batches=False, even_batches=True, use_seedable_sampler=True)


Step,Training Loss


Evaluating in-domain performance for n=16...


In-domain eval_loss: 0.7067421674728394
In-domain eval_accuracy: 0.5302013422818792
In-domain eval_runtime: 7.1116
In-domain eval_samples_per_second: 146.662
In-domain eval_steps_per_second: 18.421
In-domain epoch: 40.0
Evaluating out-of-domain performance for n=16...


Some weights of OPTForSequenceClassification were not initialized from the model checkpoint at facebook/opt-125m and are newly initialized: ['score.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Out-of-domain eval_loss: 0.6959239840507507
Out-of-domain eval_accuracy: 0.560077519379845
Out-of-domain eval_runtime: 3.5783
Out-of-domain eval_samples_per_second: 144.203
Out-of-domain eval_steps_per_second: 18.165
Out-of-domain epoch: 40.0


dataloader_config = DataLoaderConfiguration(dispatch_batches=None, split_batches=False, even_batches=True, use_seedable_sampler=True)


Step,Training Loss


Evaluating in-domain performance for n=16...


In-domain eval_loss: 0.7067421674728394
In-domain eval_accuracy: 0.5302013422818792
In-domain eval_runtime: 7.1032
In-domain eval_samples_per_second: 146.836
In-domain eval_steps_per_second: 18.442
In-domain epoch: 40.0
Evaluating out-of-domain performance for n=16...


Some weights of OPTForSequenceClassification were not initialized from the model checkpoint at facebook/opt-125m and are newly initialized: ['score.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Out-of-domain eval_loss: 0.6959239840507507
Out-of-domain eval_accuracy: 0.560077519379845
Out-of-domain eval_runtime: 3.556
Out-of-domain eval_samples_per_second: 145.107
Out-of-domain eval_steps_per_second: 18.279
Out-of-domain epoch: 40.0


dataloader_config = DataLoaderConfiguration(dispatch_batches=None, split_batches=False, even_batches=True, use_seedable_sampler=True)


Step,Training Loss


Evaluating in-domain performance for n=32...


In-domain eval_loss: 0.6934338212013245
In-domain eval_accuracy: 0.5522531160115053
In-domain eval_runtime: 7.1117
In-domain eval_samples_per_second: 146.66
In-domain eval_steps_per_second: 18.42
In-domain epoch: 40.0
Evaluating out-of-domain performance for n=32...


Some weights of OPTForSequenceClassification were not initialized from the model checkpoint at facebook/opt-125m and are newly initialized: ['score.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Out-of-domain eval_loss: 0.6805055141448975
Out-of-domain eval_accuracy: 0.5949612403100775
Out-of-domain eval_runtime: 3.5623
Out-of-domain eval_samples_per_second: 144.849
Out-of-domain eval_steps_per_second: 18.246
Out-of-domain epoch: 40.0


dataloader_config = DataLoaderConfiguration(dispatch_batches=None, split_batches=False, even_batches=True, use_seedable_sampler=True)


Step,Training Loss


Evaluating in-domain performance for n=32...


In-domain eval_loss: 0.7187260985374451
In-domain eval_accuracy: 0.5273250239693192
In-domain eval_runtime: 7.0831
In-domain eval_samples_per_second: 147.252
In-domain eval_steps_per_second: 18.495
In-domain epoch: 40.0
Evaluating out-of-domain performance for n=32...


Some weights of OPTForSequenceClassification were not initialized from the model checkpoint at facebook/opt-125m and are newly initialized: ['score.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Out-of-domain eval_loss: 0.7078965902328491
Out-of-domain eval_accuracy: 0.5290697674418605
Out-of-domain eval_runtime: 3.5566
Out-of-domain eval_samples_per_second: 145.083
Out-of-domain eval_steps_per_second: 18.276
Out-of-domain epoch: 40.0


dataloader_config = DataLoaderConfiguration(dispatch_batches=None, split_batches=False, even_batches=True, use_seedable_sampler=True)


Step,Training Loss


Evaluating in-domain performance for n=32...


In-domain eval_loss: 0.7187260985374451
In-domain eval_accuracy: 0.5273250239693192
In-domain eval_runtime: 7.093
In-domain eval_samples_per_second: 147.047
In-domain eval_steps_per_second: 18.469
In-domain epoch: 40.0
Evaluating out-of-domain performance for n=32...


Some weights of OPTForSequenceClassification were not initialized from the model checkpoint at facebook/opt-125m and are newly initialized: ['score.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Out-of-domain eval_loss: 0.7078965902328491
Out-of-domain eval_accuracy: 0.5290697674418605
Out-of-domain eval_runtime: 3.5613
Out-of-domain eval_samples_per_second: 144.893
Out-of-domain eval_steps_per_second: 18.252
Out-of-domain epoch: 40.0


dataloader_config = DataLoaderConfiguration(dispatch_batches=None, split_batches=False, even_batches=True, use_seedable_sampler=True)


Step,Training Loss


Evaluating in-domain performance for n=32...


In-domain eval_loss: 0.7187260985374451
In-domain eval_accuracy: 0.5273250239693192
In-domain eval_runtime: 7.0918
In-domain eval_samples_per_second: 147.072
In-domain eval_steps_per_second: 18.472
In-domain epoch: 40.0
Evaluating out-of-domain performance for n=32...


Some weights of OPTForSequenceClassification were not initialized from the model checkpoint at facebook/opt-125m and are newly initialized: ['score.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Out-of-domain eval_loss: 0.7078965902328491
Out-of-domain eval_accuracy: 0.5290697674418605
Out-of-domain eval_runtime: 3.5561
Out-of-domain eval_samples_per_second: 145.101
Out-of-domain eval_steps_per_second: 18.278
Out-of-domain epoch: 40.0


dataloader_config = DataLoaderConfiguration(dispatch_batches=None, split_batches=False, even_batches=True, use_seedable_sampler=True)


Step,Training Loss


Evaluating in-domain performance for n=32...


In-domain eval_loss: 0.7187260985374451
In-domain eval_accuracy: 0.5273250239693192
In-domain eval_runtime: 7.0953
In-domain eval_samples_per_second: 146.998
In-domain eval_steps_per_second: 18.463
In-domain epoch: 40.0
Evaluating out-of-domain performance for n=32...


Some weights of OPTForSequenceClassification were not initialized from the model checkpoint at facebook/opt-125m and are newly initialized: ['score.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Out-of-domain eval_loss: 0.7078965902328491
Out-of-domain eval_accuracy: 0.5290697674418605
Out-of-domain eval_runtime: 3.5614
Out-of-domain eval_samples_per_second: 144.887
Out-of-domain eval_steps_per_second: 18.251
Out-of-domain epoch: 40.0


dataloader_config = DataLoaderConfiguration(dispatch_batches=None, split_batches=False, even_batches=True, use_seedable_sampler=True)


Step,Training Loss


Evaluating in-domain performance for n=32...


In-domain eval_loss: 0.7187260985374451
In-domain eval_accuracy: 0.5273250239693192
In-domain eval_runtime: 7.0982
In-domain eval_samples_per_second: 146.938
In-domain eval_steps_per_second: 18.455
In-domain epoch: 40.0
Evaluating out-of-domain performance for n=32...


Some weights of OPTForSequenceClassification were not initialized from the model checkpoint at facebook/opt-125m and are newly initialized: ['score.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Out-of-domain eval_loss: 0.7078965902328491
Out-of-domain eval_accuracy: 0.5290697674418605
Out-of-domain eval_runtime: 3.5683
Out-of-domain eval_samples_per_second: 144.609
Out-of-domain eval_steps_per_second: 18.216
Out-of-domain epoch: 40.0


dataloader_config = DataLoaderConfiguration(dispatch_batches=None, split_batches=False, even_batches=True, use_seedable_sampler=True)


Step,Training Loss


Evaluating in-domain performance for n=32...


In-domain eval_loss: 0.7187260985374451
In-domain eval_accuracy: 0.5273250239693192
In-domain eval_runtime: 7.1166
In-domain eval_samples_per_second: 146.56
In-domain eval_steps_per_second: 18.408
In-domain epoch: 40.0
Evaluating out-of-domain performance for n=32...


Some weights of OPTForSequenceClassification were not initialized from the model checkpoint at facebook/opt-125m and are newly initialized: ['score.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Out-of-domain eval_loss: 0.7078965902328491
Out-of-domain eval_accuracy: 0.5290697674418605
Out-of-domain eval_runtime: 3.5774
Out-of-domain eval_samples_per_second: 144.239
Out-of-domain eval_steps_per_second: 18.17
Out-of-domain epoch: 40.0


dataloader_config = DataLoaderConfiguration(dispatch_batches=None, split_batches=False, even_batches=True, use_seedable_sampler=True)


Step,Training Loss


Evaluating in-domain performance for n=32...


In-domain eval_loss: 0.7187260985374451
In-domain eval_accuracy: 0.5273250239693192
In-domain eval_runtime: 7.1111
In-domain eval_samples_per_second: 146.671
In-domain eval_steps_per_second: 18.422
In-domain epoch: 40.0
Evaluating out-of-domain performance for n=32...


Some weights of OPTForSequenceClassification were not initialized from the model checkpoint at facebook/opt-125m and are newly initialized: ['score.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Out-of-domain eval_loss: 0.7078965902328491
Out-of-domain eval_accuracy: 0.5290697674418605
Out-of-domain eval_runtime: 3.5978
Out-of-domain eval_samples_per_second: 143.422
Out-of-domain eval_steps_per_second: 18.067
Out-of-domain epoch: 40.0


dataloader_config = DataLoaderConfiguration(dispatch_batches=None, split_batches=False, even_batches=True, use_seedable_sampler=True)


Step,Training Loss


Evaluating in-domain performance for n=32...


In-domain eval_loss: 0.7187260985374451
In-domain eval_accuracy: 0.5273250239693192
In-domain eval_runtime: 7.091
In-domain eval_samples_per_second: 147.088
In-domain eval_steps_per_second: 18.474
In-domain epoch: 40.0
Evaluating out-of-domain performance for n=32...


Some weights of OPTForSequenceClassification were not initialized from the model checkpoint at facebook/opt-125m and are newly initialized: ['score.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Out-of-domain eval_loss: 0.7078965902328491
Out-of-domain eval_accuracy: 0.5290697674418605
Out-of-domain eval_runtime: 3.5614
Out-of-domain eval_samples_per_second: 144.887
Out-of-domain eval_steps_per_second: 18.251
Out-of-domain epoch: 40.0


dataloader_config = DataLoaderConfiguration(dispatch_batches=None, split_batches=False, even_batches=True, use_seedable_sampler=True)


Step,Training Loss


Evaluating in-domain performance for n=32...


In-domain eval_loss: 0.7187260985374451
In-domain eval_accuracy: 0.5273250239693192
In-domain eval_runtime: 7.126
In-domain eval_samples_per_second: 146.365
In-domain eval_steps_per_second: 18.383
In-domain epoch: 40.0
Evaluating out-of-domain performance for n=32...


Some weights of OPTForSequenceClassification were not initialized from the model checkpoint at facebook/opt-125m and are newly initialized: ['score.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Out-of-domain eval_loss: 0.7078965902328491
Out-of-domain eval_accuracy: 0.5290697674418605
Out-of-domain eval_runtime: 3.5924
Out-of-domain eval_samples_per_second: 143.635
Out-of-domain eval_steps_per_second: 18.094
Out-of-domain epoch: 40.0


dataloader_config = DataLoaderConfiguration(dispatch_batches=None, split_batches=False, even_batches=True, use_seedable_sampler=True)


Step,Training Loss


Evaluating in-domain performance for n=64...


In-domain eval_loss: 0.7570077776908875
In-domain eval_accuracy: 0.5091083413231065
In-domain eval_runtime: 7.1014
In-domain eval_samples_per_second: 146.872
In-domain eval_steps_per_second: 18.447
In-domain epoch: 40.0
Evaluating out-of-domain performance for n=64...


Some weights of OPTForSequenceClassification were not initialized from the model checkpoint at facebook/opt-125m and are newly initialized: ['score.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Out-of-domain eval_loss: 0.7534480690956116
Out-of-domain eval_accuracy: 0.49806201550387597
Out-of-domain eval_runtime: 3.5637
Out-of-domain eval_samples_per_second: 144.793
Out-of-domain eval_steps_per_second: 18.239
Out-of-domain epoch: 40.0


dataloader_config = DataLoaderConfiguration(dispatch_batches=None, split_batches=False, even_batches=True, use_seedable_sampler=True)


Step,Training Loss


Evaluating in-domain performance for n=64...


In-domain eval_loss: 0.7031458616256714
In-domain eval_accuracy: 0.5618408437200384
In-domain eval_runtime: 7.0852
In-domain eval_samples_per_second: 147.208
In-domain eval_steps_per_second: 18.489
In-domain epoch: 40.0
Evaluating out-of-domain performance for n=64...


Some weights of OPTForSequenceClassification were not initialized from the model checkpoint at facebook/opt-125m and are newly initialized: ['score.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Out-of-domain eval_loss: 0.6854815483093262
Out-of-domain eval_accuracy: 0.5775193798449613
Out-of-domain eval_runtime: 3.5715
Out-of-domain eval_samples_per_second: 144.475
Out-of-domain eval_steps_per_second: 18.199
Out-of-domain epoch: 40.0


dataloader_config = DataLoaderConfiguration(dispatch_batches=None, split_batches=False, even_batches=True, use_seedable_sampler=True)


Step,Training Loss


Evaluating in-domain performance for n=64...


In-domain eval_loss: 0.7031458616256714
In-domain eval_accuracy: 0.5618408437200384
In-domain eval_runtime: 7.0942
In-domain eval_samples_per_second: 147.022
In-domain eval_steps_per_second: 18.466
In-domain epoch: 40.0
Evaluating out-of-domain performance for n=64...


Some weights of OPTForSequenceClassification were not initialized from the model checkpoint at facebook/opt-125m and are newly initialized: ['score.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Out-of-domain eval_loss: 0.6854815483093262
Out-of-domain eval_accuracy: 0.5775193798449613
Out-of-domain eval_runtime: 3.5779
Out-of-domain eval_samples_per_second: 144.219
Out-of-domain eval_steps_per_second: 18.167
Out-of-domain epoch: 40.0


dataloader_config = DataLoaderConfiguration(dispatch_batches=None, split_batches=False, even_batches=True, use_seedable_sampler=True)


Step,Training Loss


Evaluating in-domain performance for n=64...


In-domain eval_loss: 0.7031458616256714
In-domain eval_accuracy: 0.5618408437200384
In-domain eval_runtime: 7.1188
In-domain eval_samples_per_second: 146.513
In-domain eval_steps_per_second: 18.402
In-domain epoch: 40.0
Evaluating out-of-domain performance for n=64...


Some weights of OPTForSequenceClassification were not initialized from the model checkpoint at facebook/opt-125m and are newly initialized: ['score.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Out-of-domain eval_loss: 0.6854815483093262
Out-of-domain eval_accuracy: 0.5775193798449613
Out-of-domain eval_runtime: 3.574
Out-of-domain eval_samples_per_second: 144.374
Out-of-domain eval_steps_per_second: 18.187
Out-of-domain epoch: 40.0


dataloader_config = DataLoaderConfiguration(dispatch_batches=None, split_batches=False, even_batches=True, use_seedable_sampler=True)


Step,Training Loss


Evaluating in-domain performance for n=64...


In-domain eval_loss: 0.7031458616256714
In-domain eval_accuracy: 0.5618408437200384
In-domain eval_runtime: 7.1144
In-domain eval_samples_per_second: 146.604
In-domain eval_steps_per_second: 18.413
In-domain epoch: 40.0
Evaluating out-of-domain performance for n=64...


Some weights of OPTForSequenceClassification were not initialized from the model checkpoint at facebook/opt-125m and are newly initialized: ['score.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Out-of-domain eval_loss: 0.6854815483093262
Out-of-domain eval_accuracy: 0.5775193798449613
Out-of-domain eval_runtime: 3.5475
Out-of-domain eval_samples_per_second: 145.453
Out-of-domain eval_steps_per_second: 18.323
Out-of-domain epoch: 40.0


dataloader_config = DataLoaderConfiguration(dispatch_batches=None, split_batches=False, even_batches=True, use_seedable_sampler=True)


Step,Training Loss


Evaluating in-domain performance for n=64...


In-domain eval_loss: 0.7031458616256714
In-domain eval_accuracy: 0.5618408437200384
In-domain eval_runtime: 7.1027
In-domain eval_samples_per_second: 146.846
In-domain eval_steps_per_second: 18.444
In-domain epoch: 40.0
Evaluating out-of-domain performance for n=64...


Some weights of OPTForSequenceClassification were not initialized from the model checkpoint at facebook/opt-125m and are newly initialized: ['score.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Out-of-domain eval_loss: 0.6854815483093262
Out-of-domain eval_accuracy: 0.5775193798449613
Out-of-domain eval_runtime: 3.5653
Out-of-domain eval_samples_per_second: 144.729
Out-of-domain eval_steps_per_second: 18.231
Out-of-domain epoch: 40.0


dataloader_config = DataLoaderConfiguration(dispatch_batches=None, split_batches=False, even_batches=True, use_seedable_sampler=True)


Step,Training Loss


Evaluating in-domain performance for n=64...


In-domain eval_loss: 0.7031458616256714
In-domain eval_accuracy: 0.5618408437200384
In-domain eval_runtime: 7.1045
In-domain eval_samples_per_second: 146.807
In-domain eval_steps_per_second: 18.439
In-domain epoch: 40.0
Evaluating out-of-domain performance for n=64...


Some weights of OPTForSequenceClassification were not initialized from the model checkpoint at facebook/opt-125m and are newly initialized: ['score.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Out-of-domain eval_loss: 0.6854815483093262
Out-of-domain eval_accuracy: 0.5775193798449613
Out-of-domain eval_runtime: 3.5925
Out-of-domain eval_samples_per_second: 143.631
Out-of-domain eval_steps_per_second: 18.093
Out-of-domain epoch: 40.0


dataloader_config = DataLoaderConfiguration(dispatch_batches=None, split_batches=False, even_batches=True, use_seedable_sampler=True)


Step,Training Loss


Evaluating in-domain performance for n=64...


In-domain eval_loss: 0.7031458616256714
In-domain eval_accuracy: 0.5618408437200384
In-domain eval_runtime: 7.1166
In-domain eval_samples_per_second: 146.559
In-domain eval_steps_per_second: 18.408
In-domain epoch: 40.0
Evaluating out-of-domain performance for n=64...


Some weights of OPTForSequenceClassification were not initialized from the model checkpoint at facebook/opt-125m and are newly initialized: ['score.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Out-of-domain eval_loss: 0.6854815483093262
Out-of-domain eval_accuracy: 0.5775193798449613
Out-of-domain eval_runtime: 3.5851
Out-of-domain eval_samples_per_second: 143.927
Out-of-domain eval_steps_per_second: 18.13
Out-of-domain epoch: 40.0


dataloader_config = DataLoaderConfiguration(dispatch_batches=None, split_batches=False, even_batches=True, use_seedable_sampler=True)


Step,Training Loss


Evaluating in-domain performance for n=64...


In-domain eval_loss: 0.7031458616256714
In-domain eval_accuracy: 0.5618408437200384
In-domain eval_runtime: 7.1222
In-domain eval_samples_per_second: 146.443
In-domain eval_steps_per_second: 18.393
In-domain epoch: 40.0
Evaluating out-of-domain performance for n=64...


Some weights of OPTForSequenceClassification were not initialized from the model checkpoint at facebook/opt-125m and are newly initialized: ['score.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Out-of-domain eval_loss: 0.6854815483093262
Out-of-domain eval_accuracy: 0.5775193798449613
Out-of-domain eval_runtime: 3.5847
Out-of-domain eval_samples_per_second: 143.945
Out-of-domain eval_steps_per_second: 18.133
Out-of-domain epoch: 40.0


dataloader_config = DataLoaderConfiguration(dispatch_batches=None, split_batches=False, even_batches=True, use_seedable_sampler=True)


Step,Training Loss


Evaluating in-domain performance for n=64...


In-domain eval_loss: 0.7031458616256714
In-domain eval_accuracy: 0.5618408437200384
In-domain eval_runtime: 7.106
In-domain eval_samples_per_second: 146.777
In-domain eval_steps_per_second: 18.435
In-domain epoch: 40.0
Evaluating out-of-domain performance for n=64...


Some weights of OPTForSequenceClassification were not initialized from the model checkpoint at facebook/opt-125m and are newly initialized: ['score.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Out-of-domain eval_loss: 0.6854815483093262
Out-of-domain eval_accuracy: 0.5775193798449613
Out-of-domain eval_runtime: 3.5543
Out-of-domain eval_samples_per_second: 145.178
Out-of-domain eval_steps_per_second: 18.288
Out-of-domain epoch: 40.0


dataloader_config = DataLoaderConfiguration(dispatch_batches=None, split_batches=False, even_batches=True, use_seedable_sampler=True)


Step,Training Loss


Evaluating in-domain performance for n=128...


In-domain eval_loss: 0.7339271903038025
In-domain eval_accuracy: 0.5838926174496645
In-domain eval_runtime: 7.0955
In-domain eval_samples_per_second: 146.994
In-domain eval_steps_per_second: 18.462
In-domain epoch: 40.0
Evaluating out-of-domain performance for n=128...


Some weights of OPTForSequenceClassification were not initialized from the model checkpoint at facebook/opt-125m and are newly initialized: ['score.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Out-of-domain eval_loss: 0.6999451518058777
Out-of-domain eval_accuracy: 0.5968992248062015
Out-of-domain eval_runtime: 3.5719
Out-of-domain eval_samples_per_second: 144.461
Out-of-domain eval_steps_per_second: 18.198
Out-of-domain epoch: 40.0


dataloader_config = DataLoaderConfiguration(dispatch_batches=None, split_batches=False, even_batches=True, use_seedable_sampler=True)


Step,Training Loss


Evaluating in-domain performance for n=128...


In-domain eval_loss: 0.7503437399864197
In-domain eval_accuracy: 0.5714285714285714
In-domain eval_runtime: 7.1097
In-domain eval_samples_per_second: 146.702
In-domain eval_steps_per_second: 18.426
In-domain epoch: 40.0
Evaluating out-of-domain performance for n=128...


Some weights of OPTForSequenceClassification were not initialized from the model checkpoint at facebook/opt-125m and are newly initialized: ['score.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Out-of-domain eval_loss: 0.735813319683075
Out-of-domain eval_accuracy: 0.5833333333333334
Out-of-domain eval_runtime: 3.5811
Out-of-domain eval_samples_per_second: 144.089
Out-of-domain eval_steps_per_second: 18.151
Out-of-domain epoch: 40.0


dataloader_config = DataLoaderConfiguration(dispatch_batches=None, split_batches=False, even_batches=True, use_seedable_sampler=True)


Step,Training Loss


Evaluating in-domain performance for n=128...


In-domain eval_loss: 0.7503437399864197
In-domain eval_accuracy: 0.5714285714285714
In-domain eval_runtime: 7.1026
In-domain eval_samples_per_second: 146.848
In-domain eval_steps_per_second: 18.444
In-domain epoch: 40.0
Evaluating out-of-domain performance for n=128...


Some weights of OPTForSequenceClassification were not initialized from the model checkpoint at facebook/opt-125m and are newly initialized: ['score.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Out-of-domain eval_loss: 0.735813319683075
Out-of-domain eval_accuracy: 0.5833333333333334
Out-of-domain eval_runtime: 3.5576
Out-of-domain eval_samples_per_second: 145.042
Out-of-domain eval_steps_per_second: 18.271
Out-of-domain epoch: 40.0


dataloader_config = DataLoaderConfiguration(dispatch_batches=None, split_batches=False, even_batches=True, use_seedable_sampler=True)


Step,Training Loss


Evaluating in-domain performance for n=128...


In-domain eval_loss: 0.7503437399864197
In-domain eval_accuracy: 0.5714285714285714
In-domain eval_runtime: 7.0961
In-domain eval_samples_per_second: 146.983
In-domain eval_steps_per_second: 18.461
In-domain epoch: 40.0
Evaluating out-of-domain performance for n=128...


Some weights of OPTForSequenceClassification were not initialized from the model checkpoint at facebook/opt-125m and are newly initialized: ['score.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Out-of-domain eval_loss: 0.735813319683075
Out-of-domain eval_accuracy: 0.5833333333333334
Out-of-domain eval_runtime: 3.5555
Out-of-domain eval_samples_per_second: 145.128
Out-of-domain eval_steps_per_second: 18.282
Out-of-domain epoch: 40.0


dataloader_config = DataLoaderConfiguration(dispatch_batches=None, split_batches=False, even_batches=True, use_seedable_sampler=True)


Step,Training Loss


Evaluating in-domain performance for n=128...


In-domain eval_loss: 0.7503437399864197
In-domain eval_accuracy: 0.5714285714285714
In-domain eval_runtime: 7.105
In-domain eval_samples_per_second: 146.798
In-domain eval_steps_per_second: 18.438
In-domain epoch: 40.0
Evaluating out-of-domain performance for n=128...


Some weights of OPTForSequenceClassification were not initialized from the model checkpoint at facebook/opt-125m and are newly initialized: ['score.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Out-of-domain eval_loss: 0.735813319683075
Out-of-domain eval_accuracy: 0.5833333333333334
Out-of-domain eval_runtime: 3.5666
Out-of-domain eval_samples_per_second: 144.675
Out-of-domain eval_steps_per_second: 18.225
Out-of-domain epoch: 40.0


dataloader_config = DataLoaderConfiguration(dispatch_batches=None, split_batches=False, even_batches=True, use_seedable_sampler=True)


Step,Training Loss


Evaluating in-domain performance for n=128...


In-domain eval_loss: 0.7503437399864197
In-domain eval_accuracy: 0.5714285714285714
In-domain eval_runtime: 7.1349
In-domain eval_samples_per_second: 146.184
In-domain eval_steps_per_second: 18.361
In-domain epoch: 40.0
Evaluating out-of-domain performance for n=128...


Some weights of OPTForSequenceClassification were not initialized from the model checkpoint at facebook/opt-125m and are newly initialized: ['score.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Out-of-domain eval_loss: 0.735813319683075
Out-of-domain eval_accuracy: 0.5833333333333334
Out-of-domain eval_runtime: 3.563
Out-of-domain eval_samples_per_second: 144.822
Out-of-domain eval_steps_per_second: 18.243
Out-of-domain epoch: 40.0


dataloader_config = DataLoaderConfiguration(dispatch_batches=None, split_batches=False, even_batches=True, use_seedable_sampler=True)


Step,Training Loss


Evaluating in-domain performance for n=128...


In-domain eval_loss: 0.7503437399864197
In-domain eval_accuracy: 0.5714285714285714
In-domain eval_runtime: 7.1141
In-domain eval_samples_per_second: 146.61
In-domain eval_steps_per_second: 18.414
In-domain epoch: 40.0
Evaluating out-of-domain performance for n=128...


Some weights of OPTForSequenceClassification were not initialized from the model checkpoint at facebook/opt-125m and are newly initialized: ['score.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Out-of-domain eval_loss: 0.735813319683075
Out-of-domain eval_accuracy: 0.5833333333333334
Out-of-domain eval_runtime: 3.5859
Out-of-domain eval_samples_per_second: 143.898
Out-of-domain eval_steps_per_second: 18.127
Out-of-domain epoch: 40.0


dataloader_config = DataLoaderConfiguration(dispatch_batches=None, split_batches=False, even_batches=True, use_seedable_sampler=True)


Step,Training Loss


Evaluating in-domain performance for n=128...


In-domain eval_loss: 0.7503437399864197
In-domain eval_accuracy: 0.5714285714285714
In-domain eval_runtime: 7.1145
In-domain eval_samples_per_second: 146.603
In-domain eval_steps_per_second: 18.413
In-domain epoch: 40.0
Evaluating out-of-domain performance for n=128...


Some weights of OPTForSequenceClassification were not initialized from the model checkpoint at facebook/opt-125m and are newly initialized: ['score.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Out-of-domain eval_loss: 0.735813319683075
Out-of-domain eval_accuracy: 0.5833333333333334
Out-of-domain eval_runtime: 3.5726
Out-of-domain eval_samples_per_second: 144.434
Out-of-domain eval_steps_per_second: 18.194
Out-of-domain epoch: 40.0


dataloader_config = DataLoaderConfiguration(dispatch_batches=None, split_batches=False, even_batches=True, use_seedable_sampler=True)


Step,Training Loss


Evaluating in-domain performance for n=128...


In-domain eval_loss: 0.7503437399864197
In-domain eval_accuracy: 0.5714285714285714
In-domain eval_runtime: 7.1036
In-domain eval_samples_per_second: 146.827
In-domain eval_steps_per_second: 18.441
In-domain epoch: 40.0
Evaluating out-of-domain performance for n=128...


Some weights of OPTForSequenceClassification were not initialized from the model checkpoint at facebook/opt-125m and are newly initialized: ['score.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Out-of-domain eval_loss: 0.735813319683075
Out-of-domain eval_accuracy: 0.5833333333333334
Out-of-domain eval_runtime: 3.5821
Out-of-domain eval_samples_per_second: 144.051
Out-of-domain eval_steps_per_second: 18.146
Out-of-domain epoch: 40.0


dataloader_config = DataLoaderConfiguration(dispatch_batches=None, split_batches=False, even_batches=True, use_seedable_sampler=True)


Step,Training Loss


Evaluating in-domain performance for n=128...


In-domain eval_loss: 0.7503437399864197
In-domain eval_accuracy: 0.5714285714285714
In-domain eval_runtime: 7.1126
In-domain eval_samples_per_second: 146.641
In-domain eval_steps_per_second: 18.418
In-domain epoch: 40.0
Evaluating out-of-domain performance for n=128...
Out-of-domain eval_loss: 0.735813319683075
Out-of-domain eval_accuracy: 0.5833333333333334
Out-of-domain eval_runtime: 3.5715
Out-of-domain eval_samples_per_second: 144.478
Out-of-domain eval_steps_per_second: 18.2
Out-of-domain epoch: 40.0


In [8]:
# Group the results by 'n' and compute the average and maximum performance for each group
grouped_results = results_df.groupby('n')

average_in_domain_accuracy = grouped_results['in_domain_accuracy'].mean()
maximum_in_domain_accuracy = grouped_results['in_domain_accuracy'].max()

average_out_of_domain_accuracy = grouped_results['out_of_domain_accuracy'].mean()
maximum_out_of_domain_accuracy = grouped_results['out_of_domain_accuracy'].max()

# Print the results for each 'n'
for n in few_shot_sample_size:
    print(f"For n={n}:")
    print(f"Average in-domain accuracy: {average_in_domain_accuracy[n]}")
    print(f"Maximum in-domain accuracy: {maximum_in_domain_accuracy[n]}")
    print(f"Average out-of-domain accuracy: {average_out_of_domain_accuracy[n]}")
    print(f"Maximum out-of-domain accuracy: {maximum_out_of_domain_accuracy[n]}")
    print("\n")

# Compute and print the overall average and maximum performance
print("Overall:")
print(f"Average in-domain accuracy: {results_df['in_domain_accuracy'].mean()}")
print(f"Maximum in-domain accuracy: {results_df['in_domain_accuracy'].max()}")
print(f"Average out-of-domain accuracy: {results_df['out_of_domain_accuracy'].mean()}")
print(f"Maximum out-of-domain accuracy: {results_df['out_of_domain_accuracy'].max()}")


For n=2:
Average in-domain accuracy: 0.5067114093959731
Maximum in-domain accuracy: 0.5714285714285714
Average out-of-domain accuracy: 0.546124031007752
Maximum out-of-domain accuracy: 0.5949612403100775


For n=16:
Average in-domain accuracy: 0.5302013422818792
Maximum in-domain accuracy: 0.5302013422818792
Average out-of-domain accuracy: 0.560077519379845
Maximum out-of-domain accuracy: 0.560077519379845


For n=32:
Average in-domain accuracy: 0.5298178331735379
Maximum in-domain accuracy: 0.5522531160115053
Average out-of-domain accuracy: 0.5356589147286822
Maximum out-of-domain accuracy: 0.5949612403100775


For n=64:
Average in-domain accuracy: 0.5565675934803452
Maximum in-domain accuracy: 0.5618408437200384
Average out-of-domain accuracy: 0.5695736434108527
Maximum out-of-domain accuracy: 0.5775193798449613


For n=128:
Average in-domain accuracy: 0.5726749760306806
Maximum in-domain accuracy: 0.5838926174496645
Average out-of-domain accuracy: 0.5846899224806201
Maximum out-of-d

In [9]:
# Save the DataFrame to a CSV file
curr_filename = "vanilla_cola_adaptive_125M_2Layer_LearningRate"
results_df.to_csv(f'../Results/{curr_filename}.csv', sep = ',', index=False)

In [None]:
# disconnect runtime
from google.colab import runtime
runtime.unassign()