In [1]:
# Mount the google drive
from google.colab import drive
import os
import pandas as pd

### GLOBALS ###
BASE_PATH = ''
DATA_PATH = ''
MODEL_PATH = ''

def mount_drive():
    '''Mounts google drive and sets base/data paths'''
    drive.mount('/content/drive')

    global BASE_PATH
    global DATA_PATH
    global MODEL_PATH

    # Set up file paths
    BASE_PATH = '/content/drive/MyDrive/bert/'
    DATA_PATH = os.path.join(BASE_PATH, 'data')
    MODEL_PATH = os.path.join(BASE_PATH, 'models')

# Mount the drive
mount_drive()

print(f"BASE_PATH = {BASE_PATH} exists: {os.path.isdir(BASE_PATH)}")
print(f"DATA_PATH = {DATA_PATH} exists: {os.path.isdir(DATA_PATH)}")
print(f"MODEL_PATH = {MODEL_PATH} exists: {os.path.isdir(MODEL_PATH)}")

Mounted at /content/drive
BASE_PATH = /content/drive/MyDrive/bert/ exists: True
DATA_PATH = /content/drive/MyDrive/bert/data exists: True
MODEL_PATH = /content/drive/MyDrive/bert/models exists: True


## Load dataset

In [2]:
# Adapted from HuggingFace documentation
# https://colab.research.google.com/github/huggingface/notebooks/blob/master/examples/text_classification.ipynb#scrollTo=a5hBlsrHIrJL

! pip install datasets transformers

Collecting datasets
[?25l  Downloading https://files.pythonhosted.org/packages/46/1a/b9f9b3bfef624686ae81c070f0a6bb635047b17cdb3698c7ad01281e6f9a/datasets-1.6.2-py3-none-any.whl (221kB)
[K     |████████████████████████████████| 225kB 5.4MB/s 
[?25hCollecting transformers
[?25l  Downloading https://files.pythonhosted.org/packages/d8/b2/57495b5309f09fa501866e225c84532d1fd89536ea62406b2181933fb418/transformers-4.5.1-py3-none-any.whl (2.1MB)
[K     |████████████████████████████████| 2.1MB 6.7MB/s 
Collecting xxhash
[?25l  Downloading https://files.pythonhosted.org/packages/7d/4f/0a862cad26aa2ed7a7cd87178cbbfa824fc1383e472d63596a0d018374e7/xxhash-2.0.2-cp37-cp37m-manylinux2010_x86_64.whl (243kB)
[K     |████████████████████████████████| 245kB 21.4MB/s 
[?25hCollecting huggingface-hub<0.1.0
  Downloading https://files.pythonhosted.org/packages/a1/88/7b1e45720ecf59c6c6737ff332f41c955963090a18e72acbcbeac6b25e86/huggingface_hub-0.0.8-py3-none-any.whl
Collecting fsspec
[?25l  Downloadin

In [3]:
from datasets import load_dataset, load_metric
import numpy as np

# GLUE_TASKS = ["cola", "mnli", "mnli-mm", "mrpc", "qnli", "qqp", "rte", "sst2", "stsb", "wnli"]
task = "cola"
batch_size = 32
dataset = load_dataset("glue", task)
metric = load_metric('glue', task)

HBox(children=(FloatProgress(value=0.0, description='Downloading', max=7777.0, style=ProgressStyle(description…




HBox(children=(FloatProgress(value=0.0, description='Downloading', max=4473.0, style=ProgressStyle(description…


Downloading and preparing dataset glue/cola (download: 368.14 KiB, generated: 596.73 KiB, post-processed: Unknown size, total: 964.86 KiB) to /root/.cache/huggingface/datasets/glue/cola/1.0.0/dacbe3125aa31d7f70367a07a8a9e72a5a0bfeb5fc42e75c9db75b96da6053ad...


HBox(children=(FloatProgress(value=0.0, description='Downloading', max=376971.0, style=ProgressStyle(descripti…




HBox(children=(FloatProgress(value=1.0, bar_style='info', max=1.0), HTML(value='')))



HBox(children=(FloatProgress(value=1.0, bar_style='info', max=1.0), HTML(value='')))



HBox(children=(FloatProgress(value=1.0, bar_style='info', max=1.0), HTML(value='')))

Dataset glue downloaded and prepared to /root/.cache/huggingface/datasets/glue/cola/1.0.0/dacbe3125aa31d7f70367a07a8a9e72a5a0bfeb5fc42e75c9db75b96da6053ad. Subsequent calls will reuse this data.


HBox(children=(FloatProgress(value=0.0, description='Downloading', max=1848.0, style=ProgressStyle(description…




In [4]:
# Instantiate tokenizer

from transformers import AutoTokenizer
    
tokenizer = AutoTokenizer.from_pretrained('roberta-base', use_fast=True)
print(tokenizer)

HBox(children=(FloatProgress(value=0.0, description='Downloading', max=481.0, style=ProgressStyle(description_…




HBox(children=(FloatProgress(value=0.0, description='Downloading', max=898823.0, style=ProgressStyle(descripti…




HBox(children=(FloatProgress(value=0.0, description='Downloading', max=456318.0, style=ProgressStyle(descripti…




HBox(children=(FloatProgress(value=0.0, description='Downloading', max=1355863.0, style=ProgressStyle(descript…


PreTrainedTokenizerFast(name_or_path='roberta-base', vocab_size=50265, model_max_len=512, is_fast=True, padding_side='right', special_tokens={'bos_token': '<s>', 'eos_token': '</s>', 'unk_token': '<unk>', 'sep_token': '</s>', 'pad_token': '<pad>', 'cls_token': '<s>', 'mask_token': AddedToken("<mask>", rstrip=False, lstrip=True, single_word=False, normalized=False)})


In [5]:
# Set up preprocessing for the various tasks
task_to_keys = {
    "cola": ("sentence", None),
    "mnli": ("premise", "hypothesis"),
    "mnli-mm": ("premise", "hypothesis"),
    "mrpc": ("sentence1", "sentence2"),
    "qnli": ("question", "sentence"),
    "qqp": ("question1", "question2"),
    "rte": ("sentence1", "sentence2"),
    "sst2": ("sentence", None),
    "stsb": ("sentence1", "sentence2"),
    "wnli": ("sentence1", "sentence2"),
}

# sentence1_key, sentence2_key = task_to_keys[task]

# def preprocess_function(examples):
#     if sentence2_key is None:
#         return tokenizer(examples[sentence1_key], truncation=True)
#     return tokenizer(examples[sentence1_key], examples[sentence2_key], truncation=True)



In [18]:
# encoded_dataset = dataset.map(preprocess_function, batched=True)

HBox(children=(FloatProgress(value=0.0, max=9.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, max=2.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, max=2.0), HTML(value='')))




## Set up training and metrics config

In [9]:
def compute_metrics(eval_pred):
    predictions, labels = eval_pred
    if task != "stsb":
        predictions = np.argmax(predictions, axis=1)
    else:
        predictions = predictions[:, 0]
    return metric.compute(predictions=predictions, references=labels)

def get_trainer(model, task, batch_size=32, epochs=1):
    # Prepare dataset for the given task
    task_to_keys = {
        "cola": ("sentence", None),
        "mnli": ("premise", "hypothesis"),
        "mnli-mm": ("premise", "hypothesis"),
        "mrpc": ("sentence1", "sentence2"),
        "qnli": ("question", "sentence"),
        "qqp": ("question1", "question2"),
        "rte": ("sentence1", "sentence2"),
        "sst2": ("sentence", None),
        "stsb": ("sentence1", "sentence2"),
        "wnli": ("sentence1", "sentence2"),
    }

    def preprocess_function(examples):
        if sentence2_key is None:
            return tokenizer(examples[sentence1_key], truncation=True)
        return tokenizer(examples[sentence1_key], examples[sentence2_key], truncation=True)

    def compute_metrics(eval_pred):
        predictions, labels = eval_pred
        if task != "stsb":
            predictions = np.argmax(predictions, axis=1)
        else:
            predictions = predictions[:, 0]
        return metric.compute(predictions=predictions, references=labels)
    
    
    sentence1_key, sentence2_key = task_to_keys[task]
    dataset = load_dataset("glue", task)
    encoded_dataset = dataset.map(preprocess_function, batched=True)
    metric = load_metric('glue', task)

    metric_name = "pearson" if task == "stsb" else "matthews_correlation" if task == "cola" else "accuracy"
    args = TrainingArguments(
        "test-glue",
        evaluation_strategy = "epoch",
        learning_rate=2e-5,
        per_device_train_batch_size=batch_size,
        per_device_eval_batch_size=batch_size,
        num_train_epochs=epochs,
        weight_decay=0.01,
        load_best_model_at_end=True,
        metric_for_best_model=metric_name,
    )


    validation_key = "validation_mismatched" if task == "mnli-mm" else "validation_matched" if task == "mnli" else "validation"
    trainer = Trainer(
        model,
        args,
        train_dataset=encoded_dataset["train"],
        eval_dataset=encoded_dataset[validation_key],
        tokenizer=tokenizer,
        compute_metrics=compute_metrics
    )

    return trainer

## Instatiate new model head and set task

In [7]:
from transformers import AutoModelForSequenceClassification, TrainingArguments, Trainer

inorder_path = os.path.join(MODEL_PATH, 'in_order/0/base_model')
reverse_path = os.path.join(MODEL_PATH, 'reverse_order/0/base_model')
random_path = os.path.join(MODEL_PATH, 'random_order_1/0/base_model')
num_labels = 3 if task.startswith("mnli") else 1 if task=="stsb" else 2

model_inorder = AutoModelForSequenceClassification.from_pretrained(inorder_path, num_labels=num_labels)
model_reverse = AutoModelForSequenceClassification.from_pretrained(reverse_path, num_labels=num_labels)
model_random = AutoModelForSequenceClassification.from_pretrained(random_path, num_labels=num_labels)

Some weights of RobertaForSequenceClassification were not initialized from the model checkpoint at /content/drive/MyDrive/bert/models/in_order/0/base_model and are newly initialized: ['classifier.dense.bias', 'classifier.out_proj.weight', 'classifier.dense.weight', 'classifier.out_proj.bias']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
Some weights of RobertaForSequenceClassification were not initialized from the model checkpoint at /content/drive/MyDrive/bert/models/reverse_order/0/base_model and are newly initialized: ['classifier.dense.bias', 'classifier.out_proj.weight', 'classifier.dense.weight', 'classifier.out_proj.bias']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
Some weights of RobertaForSequenceClassification were not initialized from the model checkpoint at /content/drive/MyDrive/bert/models/random_order_1/0/base_model and are newly initializ

In [15]:
# Train inorder model
trainer_inorder = get_trainer(model_inorder, task='cola', batch_size=32, epochs=10)
loss_inorder = trainer_inorder.train()

Reusing dataset glue (/root/.cache/huggingface/datasets/glue/cola/1.0.0/dacbe3125aa31d7f70367a07a8a9e72a5a0bfeb5fc42e75c9db75b96da6053ad)


HBox(children=(FloatProgress(value=0.0, max=9.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, max=2.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, max=2.0), HTML(value='')))




Epoch,Training Loss,Validation Loss,Matthews Correlation,Runtime,Samples Per Second
1,No log,0.614481,0.0,1.98,526.758
2,0.612200,0.621396,0.081515,1.9427,536.889
3,0.612200,0.637677,0.045217,1.9198,543.285
4,0.522600,0.685651,0.076797,2.0081,519.389
5,0.522600,0.754324,0.087862,2.0038,520.498
6,0.416900,0.850426,0.119416,1.9601,532.122
7,0.416900,0.922855,0.132775,2.0095,519.04
8,0.332300,0.943866,0.145452,1.9184,543.696
9,0.332300,1.03048,0.148642,1.9866,525.006
10,0.284300,1.068755,0.139567,1.9325,539.714


  mcc = cov_ytyp / np.sqrt(cov_ytyt * cov_ypyp)


In [13]:
 # Train reverse model
trainer_reverse = get_trainer(model_reverse, task='cola', batch_size=32, epochs=10)
loss_reverse = trainer_reverse.train()

Reusing dataset glue (/root/.cache/huggingface/datasets/glue/cola/1.0.0/dacbe3125aa31d7f70367a07a8a9e72a5a0bfeb5fc42e75c9db75b96da6053ad)


HBox(children=(FloatProgress(value=0.0, max=9.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, max=2.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, max=2.0), HTML(value='')))




Epoch,Training Loss,Validation Loss,Matthews Correlation,Runtime,Samples Per Second
1,No log,0.61459,0.0,2.1314,489.358
2,0.609900,0.619926,0.046335,2.106,495.241
3,0.609900,0.623068,0.078847,2.0661,504.811
4,0.533800,0.685006,0.14305,2.1616,482.517
5,0.533800,0.722196,0.129605,2.0939,498.104
6,0.433600,0.798816,0.160154,2.0899,499.068
7,0.433600,0.877912,0.105095,2.1224,491.413
8,0.351900,0.936641,0.126424,2.0919,498.599
9,0.351900,1.013865,0.118035,2.1072,494.959
10,0.296700,1.041444,0.127868,2.0611,506.05


  mcc = cov_ytyp / np.sqrt(cov_ytyt * cov_ypyp)


In [14]:
# Train random model
trainer_random = get_trainer(model_random, task='cola', batch_size=32, epochs=10)
loss_random = trainer_random.train()

Reusing dataset glue (/root/.cache/huggingface/datasets/glue/cola/1.0.0/dacbe3125aa31d7f70367a07a8a9e72a5a0bfeb5fc42e75c9db75b96da6053ad)


HBox(children=(FloatProgress(value=0.0, max=9.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, max=2.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, max=2.0), HTML(value='')))




Epoch,Training Loss,Validation Loss,Matthews Correlation,Runtime,Samples Per Second
1,No log,0.618595,0.0,2.0412,510.985
2,0.613700,0.617532,0.049701,2.0546,507.643
3,0.613700,0.628364,0.095661,2.0341,512.757
4,0.542100,0.677519,0.122445,2.0251,515.04
5,0.542100,0.69972,0.146831,2.0607,506.144
6,0.448200,0.762374,0.142113,2.0357,512.349
7,0.448200,0.888546,0.145589,2.1059,495.268
8,0.363600,0.896922,0.117859,2.0646,505.177
9,0.363600,0.96524,0.129256,2.0643,505.244
10,0.317400,1.010797,0.129394,2.1037,495.783


  mcc = cov_ytyp / np.sqrt(cov_ytyt * cov_ypyp)


In [30]:
def train_on_task(task, epochs):
    inorder_path = os.path.join(MODEL_PATH, 'in_order/0/full_model')
    reverse_path = os.path.join(MODEL_PATH, 'reverse_order/0/full_model')
    random_path = os.path.join(MODEL_PATH, 'random_order_1/0/full_model')
    num_labels = 3 if task.startswith("mnli") else 1 if task=="stsb" else 2

    model_inorder = AutoModelForSequenceClassification.from_pretrained(inorder_path, num_labels=num_labels)
    model_reverse = AutoModelForSequenceClassification.from_pretrained(reverse_path, num_labels=num_labels)
    model_random = AutoModelForSequenceClassification.from_pretrained(random_path, num_labels=num_labels)

    models = [model_inorder, model_reverse, model_random]
    names = ['in order', 'reverse', 'random']
    test_losses = []
    for model, name in zip(models, names):
        print(f"TRAINING MODEL: {name}; TASK: {task}")
        trainer = get_trainer(model, task=task, batch_size=32, epochs=epochs)
        trainer.train()
        test_loss = trainer.evaluate()
        test_losses.append(test_loss)

    return test_losses

In [23]:
test_losses = train_on_task('mrpc')

Some weights of RobertaForSequenceClassification were not initialized from the model checkpoint at /content/drive/MyDrive/bert/models/in_order/0/base_model and are newly initialized: ['classifier.dense.bias', 'classifier.out_proj.weight', 'classifier.dense.weight', 'classifier.out_proj.bias']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
Some weights of RobertaForSequenceClassification were not initialized from the model checkpoint at /content/drive/MyDrive/bert/models/reverse_order/0/base_model and are newly initialized: ['classifier.dense.bias', 'classifier.out_proj.weight', 'classifier.dense.weight', 'classifier.out_proj.bias']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
Some weights of RobertaForSequenceClassification were not initialized from the model checkpoint at /content/drive/MyDrive/bert/models/random_order_1/0/base_model and are newly initializ

TRAINING MODEL: in order; TASK: mrpc
Downloading and preparing dataset glue/mrpc (download: 1.43 MiB, generated: 1.43 MiB, post-processed: Unknown size, total: 2.85 MiB) to /root/.cache/huggingface/datasets/glue/mrpc/1.0.0/dacbe3125aa31d7f70367a07a8a9e72a5a0bfeb5fc42e75c9db75b96da6053ad...


HBox(children=(FloatProgress(value=1.0, bar_style='info', description='Downloading', max=1.0, style=ProgressSt…




HBox(children=(FloatProgress(value=1.0, bar_style='info', description='Downloading', max=1.0, style=ProgressSt…




HBox(children=(FloatProgress(value=1.0, bar_style='info', description='Downloading', max=1.0, style=ProgressSt…




HBox(children=(FloatProgress(value=1.0, bar_style='info', max=1.0), HTML(value='')))



HBox(children=(FloatProgress(value=1.0, bar_style='info', max=1.0), HTML(value='')))



HBox(children=(FloatProgress(value=1.0, bar_style='info', max=1.0), HTML(value='')))

Dataset glue downloaded and prepared to /root/.cache/huggingface/datasets/glue/mrpc/1.0.0/dacbe3125aa31d7f70367a07a8a9e72a5a0bfeb5fc42e75c9db75b96da6053ad. Subsequent calls will reuse this data.


HBox(children=(FloatProgress(value=0.0, max=4.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, max=1.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, max=2.0), HTML(value='')))




Epoch,Training Loss,Validation Loss,Accuracy,F1,Runtime,Samples Per Second
1,No log,0.603033,0.681373,0.804217,0.9772,417.525
2,No log,0.612643,0.708333,0.819423,0.9264,440.393
3,No log,0.65492,0.696078,0.798046,0.9197,443.632
4,No log,0.683586,0.696078,0.792642,0.974,418.895
5,0.526300,0.89944,0.713235,0.813397,0.9279,439.692
6,0.526300,1.064876,0.625,0.711864,0.9917,411.399
7,0.526300,1.092541,0.664216,0.763385,0.9531,428.091
8,0.526300,1.264695,0.637255,0.723881,0.9806,416.092
9,0.199900,1.340027,0.644608,0.732965,0.9762,417.961
10,0.199900,1.427367,0.634804,0.722533,0.9361,435.848


TRAINING MODEL: reverse; TASK: mrpc


Reusing dataset glue (/root/.cache/huggingface/datasets/glue/mrpc/1.0.0/dacbe3125aa31d7f70367a07a8a9e72a5a0bfeb5fc42e75c9db75b96da6053ad)


HBox(children=(FloatProgress(value=0.0, max=4.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, max=1.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, max=2.0), HTML(value='')))




Epoch,Training Loss,Validation Loss,Accuracy,F1,Runtime,Samples Per Second
1,No log,0.602464,0.688725,0.801252,0.944,432.221
2,No log,0.605979,0.70098,0.816817,0.9488,430.024
3,No log,0.590399,0.693627,0.8,1.0292,396.415
4,No log,0.677736,0.696078,0.793333,1.0372,393.376
5,0.536700,0.793199,0.693627,0.784854,1.0262,397.595
6,0.536700,1.15462,0.571078,0.633124,1.0464,389.896
7,0.536700,1.057534,0.625,0.710775,1.0611,384.502
8,0.536700,1.157054,0.64951,0.738574,1.084,376.383
9,0.209600,1.373545,0.590686,0.661258,1.0503,388.461
10,0.209600,1.25953,0.642157,0.731618,1.0782,378.408


TRAINING MODEL: random; TASK: mrpc


Reusing dataset glue (/root/.cache/huggingface/datasets/glue/mrpc/1.0.0/dacbe3125aa31d7f70367a07a8a9e72a5a0bfeb5fc42e75c9db75b96da6053ad)


HBox(children=(FloatProgress(value=0.0, max=4.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, max=1.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, max=2.0), HTML(value='')))




Epoch,Training Loss,Validation Loss,Accuracy,F1,Runtime,Samples Per Second
1,No log,0.591858,0.691176,0.807339,1.0209,399.655
2,No log,0.580337,0.698529,0.798691,1.1025,370.065
3,No log,0.610307,0.710784,0.81388,1.0777,378.569
4,No log,0.660906,0.703431,0.8,1.0825,376.921
5,0.533400,0.798685,0.683824,0.787479,1.06,384.889
6,0.533400,1.00297,0.632353,0.716981,1.133,360.111
7,0.533400,1.113978,0.639706,0.729282,1.1043,369.469
8,0.533400,1.154595,0.659314,0.766387,1.0836,376.511
9,0.213900,1.317697,0.612745,0.703008,1.0062,405.476
10,0.213900,1.321677,0.654412,0.748663,0.9764,417.846


In [24]:
test_losses

[{'epoch': 10.0,
  'eval_accuracy': 0.7132352941176471,
  'eval_f1': 0.8133971291866028,
  'eval_loss': 0.8994396328926086,
  'eval_mem_cpu_alloc_delta': 102400,
  'eval_mem_cpu_peaked_delta': 0,
  'eval_mem_gpu_alloc_delta': 0,
  'eval_mem_gpu_peaked_delta': 94361088,
  'eval_runtime': 0.9813,
  'eval_samples_per_second': 415.786},
 {'epoch': 10.0,
  'eval_accuracy': 0.7009803921568627,
  'eval_f1': 0.8168168168168168,
  'eval_loss': 0.6059792041778564,
  'eval_mem_cpu_alloc_delta': 57344,
  'eval_mem_cpu_peaked_delta': 0,
  'eval_mem_gpu_alloc_delta': -24576,
  'eval_mem_gpu_peaked_delta': 92554240,
  'eval_runtime': 1.1061,
  'eval_samples_per_second': 368.878},
 {'epoch': 10.0,
  'eval_accuracy': 0.7107843137254902,
  'eval_f1': 0.8138801261829653,
  'eval_loss': 0.6103067398071289,
  'eval_mem_cpu_alloc_delta': 266240,
  'eval_mem_cpu_peaked_delta': 0,
  'eval_mem_gpu_alloc_delta': 0,
  'eval_mem_gpu_peaked_delta': 91964416,
  'eval_runtime': 1.0053,
  'eval_samples_per_second': 4

In [32]:
results_rte = train_on_task(task='rte', epochs=5)

Some weights of the model checkpoint at /content/drive/MyDrive/bert/models/in_order/0/full_model were not used when initializing RobertaForSequenceClassification: ['lm_head.bias', 'lm_head.dense.weight', 'lm_head.dense.bias', 'lm_head.layer_norm.weight', 'lm_head.layer_norm.bias', 'lm_head.decoder.weight', 'lm_head.decoder.bias']
- This IS expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of RobertaForSequenceClassification were not initialized from the model checkpoint at /content/drive/MyDrive/bert/models/in_order/0/full_model and are newly

TRAINING MODEL: in order; TASK: rte
Downloading and preparing dataset glue/rte (download: 680.81 KiB, generated: 1.83 MiB, post-processed: Unknown size, total: 2.49 MiB) to /root/.cache/huggingface/datasets/glue/rte/1.0.0/dacbe3125aa31d7f70367a07a8a9e72a5a0bfeb5fc42e75c9db75b96da6053ad...


HBox(children=(FloatProgress(value=0.0, description='Downloading', max=697150.0, style=ProgressStyle(descripti…




HBox(children=(FloatProgress(value=1.0, bar_style='info', max=1.0), HTML(value='')))



HBox(children=(FloatProgress(value=1.0, bar_style='info', max=1.0), HTML(value='')))



HBox(children=(FloatProgress(value=1.0, bar_style='info', max=1.0), HTML(value='')))

Dataset glue downloaded and prepared to /root/.cache/huggingface/datasets/glue/rte/1.0.0/dacbe3125aa31d7f70367a07a8a9e72a5a0bfeb5fc42e75c9db75b96da6053ad. Subsequent calls will reuse this data.


HBox(children=(FloatProgress(value=0.0, max=3.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, max=1.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, max=3.0), HTML(value='')))




Epoch,Training Loss,Validation Loss,Accuracy,Runtime,Samples Per Second
1,No log,0.706175,0.476534,1.0551,262.543
2,No log,0.696053,0.490975,1.0238,270.57
3,No log,0.723023,0.519856,1.0253,270.159
4,No log,0.734179,0.545126,0.9953,278.3
5,No log,0.762484,0.523466,0.9963,278.041


TRAINING MODEL: reverse; TASK: rte


Reusing dataset glue (/root/.cache/huggingface/datasets/glue/rte/1.0.0/dacbe3125aa31d7f70367a07a8a9e72a5a0bfeb5fc42e75c9db75b96da6053ad)


HBox(children=(FloatProgress(value=0.0, max=3.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, max=1.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, max=3.0), HTML(value='')))




Epoch,Training Loss,Validation Loss,Accuracy,Runtime,Samples Per Second
1,No log,0.702709,0.462094,1.0184,272.003
2,No log,0.699465,0.487365,0.9795,282.796
3,No log,0.714801,0.523466,1.0129,273.483
4,No log,0.726934,0.523466,1.0173,272.289
5,No log,0.745496,0.537906,1.0106,274.104


TRAINING MODEL: random; TASK: rte


Reusing dataset glue (/root/.cache/huggingface/datasets/glue/rte/1.0.0/dacbe3125aa31d7f70367a07a8a9e72a5a0bfeb5fc42e75c9db75b96da6053ad)


HBox(children=(FloatProgress(value=0.0, max=3.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, max=1.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, max=3.0), HTML(value='')))




Epoch,Training Loss,Validation Loss,Accuracy,Runtime,Samples Per Second
1,No log,0.692789,0.505415,1.0334,268.044
2,No log,0.698156,0.480144,1.0137,273.249
3,No log,0.712718,0.541516,0.9975,277.706
4,No log,0.722921,0.581227,1.0566,262.172
5,No log,0.754616,0.559567,1.0111,273.958


In [33]:
results_rte

[{'epoch': 5.0,
  'eval_accuracy': 0.5451263537906137,
  'eval_loss': 0.7341788411140442,
  'eval_mem_cpu_alloc_delta': 102400,
  'eval_mem_cpu_peaked_delta': 0,
  'eval_mem_gpu_alloc_delta': 0,
  'eval_mem_gpu_peaked_delta': 359825920,
  'eval_runtime': 1.0048,
  'eval_samples_per_second': 275.678},
 {'epoch': 5.0,
  'eval_accuracy': 0.5379061371841155,
  'eval_loss': 0.7454956769943237,
  'eval_mem_cpu_alloc_delta': 61440,
  'eval_mem_cpu_peaked_delta': 0,
  'eval_mem_gpu_alloc_delta': -87040,
  'eval_mem_gpu_peaked_delta': 359825920,
  'eval_runtime': 1.022,
  'eval_samples_per_second': 271.028},
 {'epoch': 5.0,
  'eval_accuracy': 0.5812274368231047,
  'eval_loss': 0.7229210734367371,
  'eval_mem_cpu_alloc_delta': 110592,
  'eval_mem_cpu_peaked_delta': 0,
  'eval_mem_gpu_alloc_delta': -87040,
  'eval_mem_gpu_peaked_delta': 359825920,
  'eval_runtime': 1.031,
  'eval_samples_per_second': 268.682}]

In [36]:
results_stsb = train_on_task(task='stsb', epochs=5)

Some weights of the model checkpoint at /content/drive/MyDrive/bert/models/in_order/0/full_model were not used when initializing RobertaForSequenceClassification: ['lm_head.bias', 'lm_head.dense.weight', 'lm_head.dense.bias', 'lm_head.layer_norm.weight', 'lm_head.layer_norm.bias', 'lm_head.decoder.weight', 'lm_head.decoder.bias']
- This IS expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of RobertaForSequenceClassification were not initialized from the model checkpoint at /content/drive/MyDrive/bert/models/in_order/0/full_model and are newly

TRAINING MODEL: in order; TASK: stsb
Downloading and preparing dataset glue/stsb (download: 784.05 KiB, generated: 1.09 MiB, post-processed: Unknown size, total: 1.86 MiB) to /root/.cache/huggingface/datasets/glue/stsb/1.0.0/dacbe3125aa31d7f70367a07a8a9e72a5a0bfeb5fc42e75c9db75b96da6053ad...


HBox(children=(FloatProgress(value=0.0, description='Downloading', max=802872.0, style=ProgressStyle(descripti…




HBox(children=(FloatProgress(value=1.0, bar_style='info', max=1.0), HTML(value='')))



HBox(children=(FloatProgress(value=1.0, bar_style='info', max=1.0), HTML(value='')))



HBox(children=(FloatProgress(value=1.0, bar_style='info', max=1.0), HTML(value='')))

Dataset glue downloaded and prepared to /root/.cache/huggingface/datasets/glue/stsb/1.0.0/dacbe3125aa31d7f70367a07a8a9e72a5a0bfeb5fc42e75c9db75b96da6053ad. Subsequent calls will reuse this data.


HBox(children=(FloatProgress(value=0.0, max=6.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, max=2.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, max=2.0), HTML(value='')))




Epoch,Training Loss,Validation Loss,Pearson,Spearmanr,Runtime,Samples Per Second
1,No log,2.510661,0.103267,0.098103,5.5592,269.823
2,No log,2.550126,0.191457,0.175682,5.5001,272.724
3,1.936900,2.743069,0.208522,0.201027,5.5165,271.912
4,1.936900,2.65894,0.214224,0.207991,5.4183,276.84
5,1.936900,2.640794,0.218463,0.213059,5.4885,273.298


TRAINING MODEL: reverse; TASK: stsb


Reusing dataset glue (/root/.cache/huggingface/datasets/glue/stsb/1.0.0/dacbe3125aa31d7f70367a07a8a9e72a5a0bfeb5fc42e75c9db75b96da6053ad)


HBox(children=(FloatProgress(value=0.0, max=6.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, max=2.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, max=2.0), HTML(value='')))




Epoch,Training Loss,Validation Loss,Pearson,Spearmanr,Runtime,Samples Per Second
1,No log,2.560408,0.116696,0.104829,5.6164,267.077
2,No log,2.671336,0.167193,0.156258,5.5008,272.689
3,1.937900,2.785782,0.184328,0.187928,5.4574,274.854
4,1.937900,2.525399,0.200583,0.20317,5.4546,274.999
5,1.937900,2.64256,0.200601,0.20597,5.4163,276.94


TRAINING MODEL: random; TASK: stsb


Reusing dataset glue (/root/.cache/huggingface/datasets/glue/stsb/1.0.0/dacbe3125aa31d7f70367a07a8a9e72a5a0bfeb5fc42e75c9db75b96da6053ad)


HBox(children=(FloatProgress(value=0.0, max=6.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, max=2.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, max=2.0), HTML(value='')))




Epoch,Training Loss,Validation Loss,Pearson,Spearmanr,Runtime,Samples Per Second
1,No log,2.517604,0.101058,0.094084,5.6255,266.641
2,No log,2.547838,0.174475,0.161885,5.512,272.136
3,1.939400,2.824066,0.210795,0.216028,5.5146,272.007
4,1.939400,2.746681,0.195234,0.195279,5.5365,270.93
5,1.939400,2.781202,0.212873,0.215492,5.4867,273.389


In [37]:
results_stsb

[{'epoch': 5.0,
  'eval_loss': 2.640793561935425,
  'eval_mem_cpu_alloc_delta': 28672,
  'eval_mem_cpu_peaked_delta': 0,
  'eval_mem_gpu_alloc_delta': 0,
  'eval_mem_gpu_peaked_delta': 99479552,
  'eval_pearson': 0.2184629525136646,
  'eval_runtime': 5.436,
  'eval_samples_per_second': 275.936,
  'eval_spearmanr': 0.21305851339261145},
 {'epoch': 5.0,
  'eval_loss': 2.6425604820251465,
  'eval_mem_cpu_alloc_delta': 208896,
  'eval_mem_cpu_peaked_delta': 0,
  'eval_mem_gpu_alloc_delta': 0,
  'eval_mem_gpu_peaked_delta': 118625280,
  'eval_pearson': 0.2006008984472332,
  'eval_runtime': 5.645,
  'eval_samples_per_second': 265.723,
  'eval_spearmanr': 0.20596969711894447},
 {'epoch': 5.0,
  'eval_loss': 2.7812018394470215,
  'eval_mem_cpu_alloc_delta': 4096,
  'eval_mem_cpu_peaked_delta': 0,
  'eval_mem_gpu_alloc_delta': 0,
  'eval_mem_gpu_peaked_delta': 99479552,
  'eval_pearson': 0.21287316685279353,
  'eval_runtime': 5.3628,
  'eval_samples_per_second': 279.705,
  'eval_spearmanr': 0.2

In [38]:
results_wnli = train_on_task(task='wnli', epochs=5)

Some weights of the model checkpoint at /content/drive/MyDrive/bert/models/in_order/0/full_model were not used when initializing RobertaForSequenceClassification: ['lm_head.bias', 'lm_head.dense.weight', 'lm_head.dense.bias', 'lm_head.layer_norm.weight', 'lm_head.layer_norm.bias', 'lm_head.decoder.weight', 'lm_head.decoder.bias']
- This IS expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of RobertaForSequenceClassification were not initialized from the model checkpoint at /content/drive/MyDrive/bert/models/in_order/0/full_model and are newly

TRAINING MODEL: in order; TASK: wnli


Reusing dataset glue (/root/.cache/huggingface/datasets/glue/wnli/1.0.0/dacbe3125aa31d7f70367a07a8a9e72a5a0bfeb5fc42e75c9db75b96da6053ad)


HBox(children=(FloatProgress(value=0.0, max=1.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, max=1.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, max=1.0), HTML(value='')))




Epoch,Training Loss,Validation Loss,Accuracy,Runtime,Samples Per Second
1,No log,0.697925,0.535211,0.3651,194.441
2,No log,0.706914,0.422535,0.3499,202.912
3,No log,0.706603,0.464789,0.3537,200.714
4,No log,0.722604,0.309859,0.3413,208.058
5,No log,0.720747,0.352113,0.3243,218.944


TRAINING MODEL: reverse; TASK: wnli


Reusing dataset glue (/root/.cache/huggingface/datasets/glue/wnli/1.0.0/dacbe3125aa31d7f70367a07a8a9e72a5a0bfeb5fc42e75c9db75b96da6053ad)


HBox(children=(FloatProgress(value=0.0, max=1.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, max=1.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, max=1.0), HTML(value='')))




Epoch,Training Loss,Validation Loss,Accuracy,Runtime,Samples Per Second
1,No log,0.700712,0.492958,0.3575,198.623
2,No log,0.708242,0.422535,0.3364,211.085
3,No log,0.706025,0.507042,0.349,203.414
4,No log,0.721738,0.28169,0.3481,203.977
5,No log,0.719198,0.352113,0.3364,211.05


TRAINING MODEL: random; TASK: wnli


Reusing dataset glue (/root/.cache/huggingface/datasets/glue/wnli/1.0.0/dacbe3125aa31d7f70367a07a8a9e72a5a0bfeb5fc42e75c9db75b96da6053ad)


HBox(children=(FloatProgress(value=0.0, max=1.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, max=1.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, max=1.0), HTML(value='')))




Epoch,Training Loss,Validation Loss,Accuracy,Runtime,Samples Per Second
1,No log,0.696027,0.56338,0.335,211.954
2,No log,0.699426,0.535211,0.3263,217.606
3,No log,0.701842,0.521127,0.3573,198.734
4,No log,0.723889,0.394366,0.3196,222.152
5,No log,0.714617,0.366197,0.3489,203.5


In [39]:
results_wnli

[{'epoch': 5.0,
  'eval_accuracy': 0.5352112676056338,
  'eval_loss': 0.6979249119758606,
  'eval_mem_cpu_alloc_delta': 237568,
  'eval_mem_cpu_peaked_delta': 0,
  'eval_mem_gpu_alloc_delta': 0,
  'eval_mem_gpu_peaked_delta': 115801088,
  'eval_runtime': 0.3907,
  'eval_samples_per_second': 181.748},
 {'epoch': 5.0,
  'eval_accuracy': 0.5070422535211268,
  'eval_loss': 0.7060251832008362,
  'eval_mem_cpu_alloc_delta': 8192,
  'eval_mem_cpu_peaked_delta': 0,
  'eval_mem_gpu_alloc_delta': 0,
  'eval_mem_gpu_peaked_delta': 115801088,
  'eval_runtime': 0.3768,
  'eval_samples_per_second': 188.407},
 {'epoch': 5.0,
  'eval_accuracy': 0.5633802816901409,
  'eval_loss': 0.696027398109436,
  'eval_mem_cpu_alloc_delta': 204800,
  'eval_mem_cpu_peaked_delta': 0,
  'eval_mem_gpu_alloc_delta': -28672,
  'eval_mem_gpu_peaked_delta': 115801088,
  'eval_runtime': 0.3328,
  'eval_samples_per_second': 213.315}]

In [40]:
def train_on_task_singlepass(task, epochs):
    inorder_path = os.path.join(MODEL_PATH, 'in_order_singlepass/0/full_model')
    reverse_path = os.path.join(MODEL_PATH, 'reverse_order_singlepass/0/full_model')
    random_path = os.path.join(MODEL_PATH, 'random_order_singlepass/0/full_model')
    num_labels = 3 if task.startswith("mnli") else 1 if task=="stsb" else 2

    model_inorder = AutoModelForSequenceClassification.from_pretrained(inorder_path, num_labels=num_labels)
    model_reverse = AutoModelForSequenceClassification.from_pretrained(reverse_path, num_labels=num_labels)
    model_random = AutoModelForSequenceClassification.from_pretrained(random_path, num_labels=num_labels)

    models = [model_inorder, model_reverse, model_random]
    names = ['in order', 'reverse', 'random']
    test_losses = []
    for model, name in zip(models, names):
        print(f"TRAINING MODEL: {name}; TASK: {task}")
        trainer = get_trainer(model, task=task, batch_size=32, epochs=epochs)
        trainer.train()
        test_loss = trainer.evaluate()
        test_losses.append(test_loss)

    return test_losses

In [41]:
singelpass_results_cola = train_on_task_singlepass(task='cola', epochs=5)
print(singelpass_results_cola)

Some weights of the model checkpoint at /content/drive/MyDrive/bert/models/in_order_singlepass/0/full_model were not used when initializing RobertaForSequenceClassification: ['lm_head.bias', 'lm_head.dense.weight', 'lm_head.dense.bias', 'lm_head.layer_norm.weight', 'lm_head.layer_norm.bias', 'lm_head.decoder.weight', 'lm_head.decoder.bias']
- This IS expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of RobertaForSequenceClassification were not initialized from the model checkpoint at /content/drive/MyDrive/bert/models/in_order_singlepass/0/fu

TRAINING MODEL: in order; TASK: cola


Reusing dataset glue (/root/.cache/huggingface/datasets/glue/cola/1.0.0/dacbe3125aa31d7f70367a07a8a9e72a5a0bfeb5fc42e75c9db75b96da6053ad)


HBox(children=(FloatProgress(value=0.0, max=9.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, max=2.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, max=2.0), HTML(value='')))




Epoch,Training Loss,Validation Loss,Matthews Correlation,Runtime,Samples Per Second
1,No log,0.615133,0.0,3.6452,286.126
2,0.615000,0.616393,0.0,3.5327,295.243
3,0.615000,0.630786,0.06664,3.1912,326.839
4,0.560200,0.646745,0.091092,3.259,320.036
5,0.560200,0.673747,0.091402,3.4942,298.494


  mcc = cov_ytyp / np.sqrt(cov_ytyt * cov_ypyp)
  mcc = cov_ytyp / np.sqrt(cov_ytyt * cov_ypyp)


TRAINING MODEL: reverse; TASK: cola


Reusing dataset glue (/root/.cache/huggingface/datasets/glue/cola/1.0.0/dacbe3125aa31d7f70367a07a8a9e72a5a0bfeb5fc42e75c9db75b96da6053ad)


HBox(children=(FloatProgress(value=0.0, max=9.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, max=2.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, max=2.0), HTML(value='')))




Epoch,Training Loss,Validation Loss,Matthews Correlation,Runtime,Samples Per Second
1,No log,0.61466,0.0,3.444,302.844
2,0.614300,0.619842,-0.004094,3.6127,288.708
3,0.614300,0.646236,0.106116,3.5311,295.375
4,0.557100,0.659293,0.06813,3.566,292.482
5,0.557100,0.691312,0.083008,3.597,289.965


  mcc = cov_ytyp / np.sqrt(cov_ytyt * cov_ypyp)


TRAINING MODEL: random; TASK: cola


Reusing dataset glue (/root/.cache/huggingface/datasets/glue/cola/1.0.0/dacbe3125aa31d7f70367a07a8a9e72a5a0bfeb5fc42e75c9db75b96da6053ad)


HBox(children=(FloatProgress(value=0.0, max=9.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, max=2.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, max=2.0), HTML(value='')))




Epoch,Training Loss,Validation Loss,Matthews Correlation,Runtime,Samples Per Second
1,No log,0.615712,0.0,3.5879,290.696
2,0.615700,0.619172,-0.028362,3.5762,291.651
3,0.615700,0.641672,0.055189,3.5814,291.223
4,0.562100,0.661491,0.109112,3.5417,294.494
5,0.562100,0.685414,0.105392,3.5146,296.763


  mcc = cov_ytyp / np.sqrt(cov_ytyt * cov_ypyp)


[{'eval_loss': 0.6737474799156189, 'eval_matthews_correlation': 0.09140240193207116, 'eval_runtime': 3.5608, 'eval_samples_per_second': 292.91, 'epoch': 5.0, 'eval_mem_cpu_alloc_delta': 155648, 'eval_mem_gpu_alloc_delta': 0, 'eval_mem_cpu_peaked_delta': 0, 'eval_mem_gpu_peaked_delta': 39500288}, {'eval_loss': 0.646236002445221, 'eval_matthews_correlation': 0.1061157598919786, 'eval_runtime': 3.5207, 'eval_samples_per_second': 296.247, 'epoch': 5.0, 'eval_mem_cpu_alloc_delta': 73728, 'eval_mem_gpu_alloc_delta': 0, 'eval_mem_cpu_peaked_delta': 0, 'eval_mem_gpu_peaked_delta': 49686528}, {'eval_loss': 0.6614912748336792, 'eval_matthews_correlation': 0.10911226324641782, 'eval_runtime': 3.6214, 'eval_samples_per_second': 288.009, 'epoch': 5.0, 'eval_mem_cpu_alloc_delta': 98304, 'eval_mem_gpu_alloc_delta': 0, 'eval_mem_cpu_peaked_delta': 0, 'eval_mem_gpu_peaked_delta': 39500288}]


In [45]:
for result in singelpass_results_cola:
    print(result['eval_matthews_correlation'])

0.09140240193207116
0.1061157598919786
0.10911226324641782


In [46]:
singelpass_results_mrpc = train_on_task_singlepass(task='mrpc', epochs=5)

Some weights of the model checkpoint at /content/drive/MyDrive/bert/models/in_order_singlepass/0/full_model were not used when initializing RobertaForSequenceClassification: ['lm_head.bias', 'lm_head.dense.weight', 'lm_head.dense.bias', 'lm_head.layer_norm.weight', 'lm_head.layer_norm.bias', 'lm_head.decoder.weight', 'lm_head.decoder.bias']
- This IS expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of RobertaForSequenceClassification were not initialized from the model checkpoint at /content/drive/MyDrive/bert/models/in_order_singlepass/0/fu

TRAINING MODEL: in order; TASK: mrpc


Reusing dataset glue (/root/.cache/huggingface/datasets/glue/mrpc/1.0.0/dacbe3125aa31d7f70367a07a8a9e72a5a0bfeb5fc42e75c9db75b96da6053ad)


HBox(children=(FloatProgress(value=0.0, max=4.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, max=1.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, max=2.0), HTML(value='')))




Epoch,Training Loss,Validation Loss,Accuracy,F1,Runtime,Samples Per Second
1,No log,0.599479,0.693627,0.808576,1.5126,269.738
2,No log,0.610973,0.688725,0.807284,1.4829,275.128
3,No log,0.602282,0.691176,0.801887,1.5015,271.722
4,No log,0.642489,0.70098,0.799342,1.5326,266.222
5,0.546800,0.673523,0.705882,0.79798,1.5366,265.517


TRAINING MODEL: reverse; TASK: mrpc


Reusing dataset glue (/root/.cache/huggingface/datasets/glue/mrpc/1.0.0/dacbe3125aa31d7f70367a07a8a9e72a5a0bfeb5fc42e75c9db75b96da6053ad)


HBox(children=(FloatProgress(value=0.0, max=4.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, max=1.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, max=2.0), HTML(value='')))




Epoch,Training Loss,Validation Loss,Accuracy,F1,Runtime,Samples Per Second
1,No log,0.612091,0.683824,0.797488,1.5127,269.712
2,No log,0.608797,0.688725,0.804916,1.5135,269.581
3,No log,0.62932,0.683824,0.799378,1.45,281.377
4,No log,0.640477,0.693627,0.797407,1.4553,280.352
5,0.561900,0.666288,0.691176,0.788591,1.498,272.355


TRAINING MODEL: random; TASK: mrpc


Reusing dataset glue (/root/.cache/huggingface/datasets/glue/mrpc/1.0.0/dacbe3125aa31d7f70367a07a8a9e72a5a0bfeb5fc42e75c9db75b96da6053ad)


HBox(children=(FloatProgress(value=0.0, max=4.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, max=1.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, max=2.0), HTML(value='')))




Epoch,Training Loss,Validation Loss,Accuracy,F1,Runtime,Samples Per Second
1,No log,0.602978,0.688725,0.809023,1.5283,266.955
2,No log,0.610946,0.688725,0.804916,1.5163,269.079
3,No log,0.617015,0.703431,0.814132,1.5421,264.568
4,No log,0.625029,0.698529,0.799347,1.5161,269.117
5,0.556600,0.657158,0.691176,0.784247,1.5244,267.643


In [47]:
print(singelpass_results_mrpc)

[{'eval_loss': 0.6735231876373291, 'eval_accuracy': 0.7058823529411765, 'eval_f1': 0.7979797979797979, 'eval_runtime': 1.5592, 'eval_samples_per_second': 261.675, 'epoch': 5.0, 'eval_mem_cpu_alloc_delta': 28672, 'eval_mem_gpu_alloc_delta': 0, 'eval_mem_cpu_peaked_delta': 0, 'eval_mem_gpu_peaked_delta': 124743168}, {'eval_loss': 0.6404773592948914, 'eval_accuracy': 0.6936274509803921, 'eval_f1': 0.7974068071312804, 'eval_runtime': 1.5298, 'eval_samples_per_second': 266.707, 'epoch': 5.0, 'eval_mem_cpu_alloc_delta': 53248, 'eval_mem_gpu_alloc_delta': 0, 'eval_mem_cpu_peaked_delta': 0, 'eval_mem_gpu_peaked_delta': 164343808}, {'eval_loss': 0.6170152425765991, 'eval_accuracy': 0.7034313725490197, 'eval_f1': 0.8141321044546851, 'eval_runtime': 1.5478, 'eval_samples_per_second': 263.594, 'epoch': 5.0, 'eval_mem_cpu_alloc_delta': 49152, 'eval_mem_gpu_alloc_delta': 0, 'eval_mem_cpu_peaked_delta': 0, 'eval_mem_gpu_peaked_delta': 134505472}]


In [48]:
singelpass_results_rte = train_on_task_singlepass(task='rte', epochs=5)
print(singelpass_results_rte)

Some weights of the model checkpoint at /content/drive/MyDrive/bert/models/in_order_singlepass/0/full_model were not used when initializing RobertaForSequenceClassification: ['lm_head.bias', 'lm_head.dense.weight', 'lm_head.dense.bias', 'lm_head.layer_norm.weight', 'lm_head.layer_norm.bias', 'lm_head.decoder.weight', 'lm_head.decoder.bias']
- This IS expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of RobertaForSequenceClassification were not initialized from the model checkpoint at /content/drive/MyDrive/bert/models/in_order_singlepass/0/fu

TRAINING MODEL: in order; TASK: rte


Reusing dataset glue (/root/.cache/huggingface/datasets/glue/rte/1.0.0/dacbe3125aa31d7f70367a07a8a9e72a5a0bfeb5fc42e75c9db75b96da6053ad)


HBox(children=(FloatProgress(value=0.0, max=3.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, max=1.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, max=3.0), HTML(value='')))




Epoch,Training Loss,Validation Loss,Accuracy,Runtime,Samples Per Second
1,No log,0.69535,0.490975,1.1135,248.767
2,No log,0.702802,0.480144,1.0607,261.138
3,No log,0.697317,0.527076,1.0645,260.226
4,No log,0.697122,0.563177,1.0641,260.324
5,No log,0.718448,0.530686,1.093,253.43


TRAINING MODEL: reverse; TASK: rte


Reusing dataset glue (/root/.cache/huggingface/datasets/glue/rte/1.0.0/dacbe3125aa31d7f70367a07a8a9e72a5a0bfeb5fc42e75c9db75b96da6053ad)


HBox(children=(FloatProgress(value=0.0, max=3.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, max=1.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, max=3.0), HTML(value='')))




Epoch,Training Loss,Validation Loss,Accuracy,Runtime,Samples Per Second
1,No log,0.702298,0.472924,1.1052,250.641
2,No log,0.695684,0.487365,1.1187,247.606
3,No log,0.694146,0.548736,1.0604,261.223
4,No log,0.693793,0.534296,1.0579,261.828
5,No log,0.706939,0.555957,1.016,272.628


TRAINING MODEL: random; TASK: rte


Reusing dataset glue (/root/.cache/huggingface/datasets/glue/rte/1.0.0/dacbe3125aa31d7f70367a07a8a9e72a5a0bfeb5fc42e75c9db75b96da6053ad)


HBox(children=(FloatProgress(value=0.0, max=3.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, max=1.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, max=3.0), HTML(value='')))




Epoch,Training Loss,Validation Loss,Accuracy,Runtime,Samples Per Second
1,No log,0.696956,0.505415,1.0503,263.735
2,No log,0.688356,0.523466,1.062,260.816
3,No log,0.689139,0.566787,1.1273,245.726
4,No log,0.691567,0.559567,1.1076,250.094
5,No log,0.715731,0.509025,1.077,257.203


[{'eval_loss': 0.6971224546432495, 'eval_accuracy': 0.5631768953068592, 'eval_runtime': 1.0927, 'eval_samples_per_second': 253.503, 'epoch': 5.0, 'eval_mem_cpu_alloc_delta': 155648, 'eval_mem_gpu_alloc_delta': -87040, 'eval_mem_cpu_peaked_delta': 0, 'eval_mem_gpu_peaked_delta': 359825920}, {'eval_loss': 0.7069389224052429, 'eval_accuracy': 0.555956678700361, 'eval_runtime': 1.1351, 'eval_samples_per_second': 244.026, 'epoch': 5.0, 'eval_mem_cpu_alloc_delta': 98304, 'eval_mem_gpu_alloc_delta': -87040, 'eval_mem_cpu_peaked_delta': 0, 'eval_mem_gpu_peaked_delta': 359825920}, {'eval_loss': 0.6891393065452576, 'eval_accuracy': 0.5667870036101083, 'eval_runtime': 1.1136, 'eval_samples_per_second': 248.735, 'epoch': 5.0, 'eval_mem_cpu_alloc_delta': 110592, 'eval_mem_gpu_alloc_delta': 0, 'eval_mem_cpu_peaked_delta': 0, 'eval_mem_gpu_peaked_delta': 359825920}]


In [49]:
singelpass_results_stsb = train_on_task_singlepass(task='stsb', epochs=5)
print(singelpass_results_stsb)

Some weights of the model checkpoint at /content/drive/MyDrive/bert/models/in_order_singlepass/0/full_model were not used when initializing RobertaForSequenceClassification: ['lm_head.bias', 'lm_head.dense.weight', 'lm_head.dense.bias', 'lm_head.layer_norm.weight', 'lm_head.layer_norm.bias', 'lm_head.decoder.weight', 'lm_head.decoder.bias']
- This IS expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of RobertaForSequenceClassification were not initialized from the model checkpoint at /content/drive/MyDrive/bert/models/in_order_singlepass/0/fu

TRAINING MODEL: in order; TASK: stsb


Reusing dataset glue (/root/.cache/huggingface/datasets/glue/stsb/1.0.0/dacbe3125aa31d7f70367a07a8a9e72a5a0bfeb5fc42e75c9db75b96da6053ad)


HBox(children=(FloatProgress(value=0.0, max=6.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, max=2.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, max=2.0), HTML(value='')))




Epoch,Training Loss,Validation Loss,Pearson,Spearmanr,Runtime,Samples Per Second
1,No log,2.472524,0.113437,0.102394,5.4906,273.195
2,No log,2.539297,0.166749,0.155141,5.4308,276.204
3,1.968300,2.743228,0.182632,0.174604,5.5477,270.382
4,1.968300,2.667387,0.181204,0.172396,5.478,273.824
5,1.968300,2.703138,0.18341,0.175073,5.4164,276.938


TRAINING MODEL: reverse; TASK: stsb


Reusing dataset glue (/root/.cache/huggingface/datasets/glue/stsb/1.0.0/dacbe3125aa31d7f70367a07a8a9e72a5a0bfeb5fc42e75c9db75b96da6053ad)


HBox(children=(FloatProgress(value=0.0, max=6.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, max=2.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, max=2.0), HTML(value='')))




Epoch,Training Loss,Validation Loss,Pearson,Spearmanr,Runtime,Samples Per Second
1,No log,2.561248,0.118239,0.105764,5.6494,265.515
2,No log,2.475914,0.17373,0.166759,5.5949,268.099
3,1.926200,2.790334,0.185534,0.185838,5.5799,268.824
4,1.926200,2.688552,0.181377,0.179867,5.6297,266.446
5,1.926200,2.746424,0.190735,0.192192,5.484,273.525


TRAINING MODEL: random; TASK: stsb


Reusing dataset glue (/root/.cache/huggingface/datasets/glue/stsb/1.0.0/dacbe3125aa31d7f70367a07a8a9e72a5a0bfeb5fc42e75c9db75b96da6053ad)


HBox(children=(FloatProgress(value=0.0, max=6.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, max=2.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, max=2.0), HTML(value='')))




Epoch,Training Loss,Validation Loss,Pearson,Spearmanr,Runtime,Samples Per Second
1,No log,2.535425,0.114512,0.106846,5.3351,281.155
2,No log,2.488205,0.162678,0.151675,5.5041,272.525
3,1.969200,2.87977,0.17364,0.173022,5.5249,271.496
4,1.969200,2.783651,0.185562,0.18639,5.5817,268.733
5,1.969200,2.690807,0.19559,0.197029,5.5085,272.307


[{'eval_loss': 2.7031376361846924, 'eval_pearson': 0.18340969123501255, 'eval_spearmanr': 0.17507347653633998, 'eval_runtime': 5.4633, 'eval_samples_per_second': 274.561, 'epoch': 5.0, 'eval_mem_cpu_alloc_delta': -81920, 'eval_mem_gpu_alloc_delta': 0, 'eval_mem_cpu_peaked_delta': 0, 'eval_mem_gpu_peaked_delta': 98496512}, {'eval_loss': 2.746424436569214, 'eval_pearson': 0.19073469176132674, 'eval_spearmanr': 0.1921921876231848, 'eval_runtime': 5.4399, 'eval_samples_per_second': 275.742, 'epoch': 5.0, 'eval_mem_cpu_alloc_delta': 45056, 'eval_mem_gpu_alloc_delta': 0, 'eval_mem_cpu_peaked_delta': 0, 'eval_mem_gpu_peaked_delta': 98496512}, {'eval_loss': 2.6908068656921387, 'eval_pearson': 0.1955904623471409, 'eval_spearmanr': 0.19702931195347756, 'eval_runtime': 5.5336, 'eval_samples_per_second': 271.071, 'epoch': 5.0, 'eval_mem_cpu_alloc_delta': 49152, 'eval_mem_gpu_alloc_delta': 0, 'eval_mem_cpu_peaked_delta': 0, 'eval_mem_gpu_peaked_delta': 98496512}]


In [50]:
singelpass_results_wnli = train_on_task_singlepass(task='wnli', epochs=5)
print(singelpass_results_wnli)

Some weights of the model checkpoint at /content/drive/MyDrive/bert/models/in_order_singlepass/0/full_model were not used when initializing RobertaForSequenceClassification: ['lm_head.bias', 'lm_head.dense.weight', 'lm_head.dense.bias', 'lm_head.layer_norm.weight', 'lm_head.layer_norm.bias', 'lm_head.decoder.weight', 'lm_head.decoder.bias']
- This IS expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of RobertaForSequenceClassification were not initialized from the model checkpoint at /content/drive/MyDrive/bert/models/in_order_singlepass/0/fu

TRAINING MODEL: in order; TASK: wnli


Reusing dataset glue (/root/.cache/huggingface/datasets/glue/wnli/1.0.0/dacbe3125aa31d7f70367a07a8a9e72a5a0bfeb5fc42e75c9db75b96da6053ad)


HBox(children=(FloatProgress(value=0.0, max=1.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, max=1.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, max=1.0), HTML(value='')))




Epoch,Training Loss,Validation Loss,Accuracy,Runtime,Samples Per Second
1,No log,0.69222,0.56338,0.3564,199.187
2,No log,0.706121,0.450704,0.3714,191.181
3,No log,0.702505,0.521127,0.3836,185.094
4,No log,0.719291,0.380282,0.3791,187.27
5,No log,0.713524,0.309859,0.387,183.451


TRAINING MODEL: reverse; TASK: wnli


Reusing dataset glue (/root/.cache/huggingface/datasets/glue/wnli/1.0.0/dacbe3125aa31d7f70367a07a8a9e72a5a0bfeb5fc42e75c9db75b96da6053ad)


HBox(children=(FloatProgress(value=0.0, max=1.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, max=1.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, max=1.0), HTML(value='')))




Epoch,Training Loss,Validation Loss,Accuracy,Runtime,Samples Per Second
1,No log,0.692245,0.56338,0.3449,205.841
2,No log,0.703489,0.478873,0.3487,203.63
3,No log,0.701715,0.521127,0.3527,201.307
4,No log,0.721082,0.43662,0.3944,180.014
5,No log,0.713485,0.366197,0.3493,203.249


TRAINING MODEL: random; TASK: wnli


Reusing dataset glue (/root/.cache/huggingface/datasets/glue/wnli/1.0.0/dacbe3125aa31d7f70367a07a8a9e72a5a0bfeb5fc42e75c9db75b96da6053ad)


HBox(children=(FloatProgress(value=0.0, max=1.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, max=1.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, max=1.0), HTML(value='')))




Epoch,Training Loss,Validation Loss,Accuracy,Runtime,Samples Per Second
1,No log,0.693808,0.535211,0.3533,200.977
2,No log,0.710846,0.394366,0.3388,209.585
3,No log,0.701264,0.521127,0.3382,209.945
4,No log,0.722652,0.43662,0.3216,220.793
5,No log,0.716858,0.338028,0.3474,204.379


[{'eval_loss': 0.692219614982605, 'eval_accuracy': 0.5633802816901409, 'eval_runtime': 0.3456, 'eval_samples_per_second': 205.45, 'epoch': 5.0, 'eval_mem_cpu_alloc_delta': 241664, 'eval_mem_gpu_alloc_delta': 0, 'eval_mem_cpu_peaked_delta': 0, 'eval_mem_gpu_peaked_delta': 115801088}, {'eval_loss': 0.692245364189148, 'eval_accuracy': 0.5633802816901409, 'eval_runtime': 0.3474, 'eval_samples_per_second': 204.37, 'epoch': 5.0, 'eval_mem_cpu_alloc_delta': -4096, 'eval_mem_gpu_alloc_delta': 0, 'eval_mem_cpu_peaked_delta': 4096, 'eval_mem_gpu_peaked_delta': 115801088}, {'eval_loss': 0.6938076615333557, 'eval_accuracy': 0.5352112676056338, 'eval_runtime': 0.3843, 'eval_samples_per_second': 184.74, 'epoch': 5.0, 'eval_mem_cpu_alloc_delta': 139264, 'eval_mem_gpu_alloc_delta': 0, 'eval_mem_cpu_peaked_delta': 0, 'eval_mem_gpu_peaked_delta': 115801088}]


In [51]:
def train_on_task_exp(task, epochs):
    path = os.path.join(MODEL_PATH, 'in_order_exp/0/full_model')
    num_labels = 3 if task.startswith("mnli") else 1 if task=="stsb" else 2

    model_exp = AutoModelForSequenceClassification.from_pretrained(path, num_labels=num_labels)

    models = [model_exp]
    names = ['exp']
    test_losses = []
    for model, name in zip(models, names):
        print(f"TRAINING MODEL: {name}; TASK: {task}")
        trainer = get_trainer(model, task=task, batch_size=32, epochs=epochs)
        trainer.train()
        test_loss = trainer.evaluate()
        test_losses.append(test_loss)

    return test_losses

In [55]:
exp_results_cola = train_on_task_exp(task='cola', epochs=5)
print(exp_results_cola)

Some weights of the model checkpoint at /content/drive/MyDrive/bert/models/in_order_exp/0/full_model were not used when initializing RobertaForSequenceClassification: ['lm_head.bias', 'lm_head.dense.weight', 'lm_head.dense.bias', 'lm_head.layer_norm.weight', 'lm_head.layer_norm.bias', 'lm_head.decoder.weight', 'lm_head.decoder.bias']
- This IS expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of RobertaForSequenceClassification were not initialized from the model checkpoint at /content/drive/MyDrive/bert/models/in_order_exp/0/full_model and a

TRAINING MODEL: exp; TASK: cola


Reusing dataset glue (/root/.cache/huggingface/datasets/glue/cola/1.0.0/dacbe3125aa31d7f70367a07a8a9e72a5a0bfeb5fc42e75c9db75b96da6053ad)


HBox(children=(FloatProgress(value=0.0, max=9.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, max=2.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, max=2.0), HTML(value='')))




Epoch,Training Loss,Validation Loss,Matthews Correlation,Runtime,Samples Per Second
1,No log,0.613271,0.0,3.8251,272.674
2,0.607700,0.618933,0.026843,3.937,264.924
3,0.607700,0.647186,0.088568,4.013,259.905
4,0.521700,0.657865,0.161985,3.9032,267.219
5,0.521700,0.68944,0.18747,3.8923,267.966


  mcc = cov_ytyp / np.sqrt(cov_ytyt * cov_ypyp)


[{'eval_loss': 0.6894397139549255, 'eval_matthews_correlation': 0.18746985483123144, 'eval_runtime': 3.8298, 'eval_samples_per_second': 272.337, 'epoch': 5.0, 'eval_mem_cpu_alloc_delta': 196608, 'eval_mem_gpu_alloc_delta': 0, 'eval_mem_cpu_peaked_delta': 0, 'eval_mem_gpu_peaked_delta': 39500288}]


In [56]:
exp_results_mrpc = train_on_task_exp(task='mrpc', epochs=5)
print(exp_results_mrpc)

Some weights of the model checkpoint at /content/drive/MyDrive/bert/models/in_order_exp/0/full_model were not used when initializing RobertaForSequenceClassification: ['lm_head.bias', 'lm_head.dense.weight', 'lm_head.dense.bias', 'lm_head.layer_norm.weight', 'lm_head.layer_norm.bias', 'lm_head.decoder.weight', 'lm_head.decoder.bias']
- This IS expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of RobertaForSequenceClassification were not initialized from the model checkpoint at /content/drive/MyDrive/bert/models/in_order_exp/0/full_model and a

TRAINING MODEL: exp; TASK: mrpc


Reusing dataset glue (/root/.cache/huggingface/datasets/glue/mrpc/1.0.0/dacbe3125aa31d7f70367a07a8a9e72a5a0bfeb5fc42e75c9db75b96da6053ad)


HBox(children=(FloatProgress(value=0.0, max=4.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, max=1.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, max=2.0), HTML(value='')))




Epoch,Training Loss,Validation Loss,Accuracy,F1,Runtime,Samples Per Second
1,No log,0.590902,0.703431,0.818591,1.6883,241.656
2,No log,0.62002,0.678922,0.758748,1.6523,246.929
3,No log,0.615533,0.70098,0.811728,1.6365,249.314
4,No log,0.65813,0.654412,0.747764,1.6814,242.655
5,0.529200,0.700195,0.64951,0.738574,1.6859,242.012


[{'eval_loss': 0.5909015536308289, 'eval_accuracy': 0.7034313725490197, 'eval_f1': 0.8185907046476761, 'eval_runtime': 1.6702, 'eval_samples_per_second': 244.288, 'epoch': 5.0, 'eval_mem_cpu_alloc_delta': 167936, 'eval_mem_gpu_alloc_delta': 0, 'eval_mem_cpu_peaked_delta': 0, 'eval_mem_gpu_peaked_delta': 100000768}]


In [57]:
exp_results_rte = train_on_task_exp(task='rte', epochs=5)
print(exp_results_rte)

Some weights of the model checkpoint at /content/drive/MyDrive/bert/models/in_order_exp/0/full_model were not used when initializing RobertaForSequenceClassification: ['lm_head.bias', 'lm_head.dense.weight', 'lm_head.dense.bias', 'lm_head.layer_norm.weight', 'lm_head.layer_norm.bias', 'lm_head.decoder.weight', 'lm_head.decoder.bias']
- This IS expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of RobertaForSequenceClassification were not initialized from the model checkpoint at /content/drive/MyDrive/bert/models/in_order_exp/0/full_model and a

TRAINING MODEL: exp; TASK: rte


Reusing dataset glue (/root/.cache/huggingface/datasets/glue/rte/1.0.0/dacbe3125aa31d7f70367a07a8a9e72a5a0bfeb5fc42e75c9db75b96da6053ad)


HBox(children=(FloatProgress(value=0.0, max=3.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, max=1.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, max=3.0), HTML(value='')))




Epoch,Training Loss,Validation Loss,Accuracy,Runtime,Samples Per Second
1,No log,0.697919,0.476534,1.2353,224.23
2,No log,0.696094,0.501805,1.1511,240.649
3,No log,0.709925,0.541516,1.1654,237.688
4,No log,0.724113,0.555957,1.2315,224.938
5,No log,0.752898,0.516245,1.1878,233.209


[{'eval_loss': 0.7241127490997314, 'eval_accuracy': 0.555956678700361, 'eval_runtime': 1.2136, 'eval_samples_per_second': 228.243, 'epoch': 5.0, 'eval_mem_cpu_alloc_delta': -8192, 'eval_mem_gpu_alloc_delta': 0, 'eval_mem_cpu_peaked_delta': 8192, 'eval_mem_gpu_peaked_delta': 359825920}]


In [58]:
exp_results_stsb = train_on_task_exp(task='stsb', epochs=5)
print(exp_results_stsb)

Some weights of the model checkpoint at /content/drive/MyDrive/bert/models/in_order_exp/0/full_model were not used when initializing RobertaForSequenceClassification: ['lm_head.bias', 'lm_head.dense.weight', 'lm_head.dense.bias', 'lm_head.layer_norm.weight', 'lm_head.layer_norm.bias', 'lm_head.decoder.weight', 'lm_head.decoder.bias']
- This IS expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of RobertaForSequenceClassification were not initialized from the model checkpoint at /content/drive/MyDrive/bert/models/in_order_exp/0/full_model and a

TRAINING MODEL: exp; TASK: stsb


Reusing dataset glue (/root/.cache/huggingface/datasets/glue/stsb/1.0.0/dacbe3125aa31d7f70367a07a8a9e72a5a0bfeb5fc42e75c9db75b96da6053ad)


HBox(children=(FloatProgress(value=0.0, max=6.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, max=2.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, max=2.0), HTML(value='')))




Epoch,Training Loss,Validation Loss,Pearson,Spearmanr,Runtime,Samples Per Second
1,No log,2.561773,0.106592,0.09792,5.8948,254.461
2,No log,2.451912,0.206879,0.192142,5.7882,259.147
3,1.917200,2.660355,0.231141,0.237403,5.9035,254.086
4,1.917200,2.77803,0.207228,0.209399,5.8378,256.948
5,1.917200,2.638678,0.22445,0.228063,5.9519,252.022


[{'eval_loss': 2.6603548526763916, 'eval_pearson': 0.2311413759651688, 'eval_spearmanr': 0.23740309977747343, 'eval_runtime': 5.9774, 'eval_samples_per_second': 250.947, 'epoch': 5.0, 'eval_mem_cpu_alloc_delta': 49152, 'eval_mem_gpu_alloc_delta': 0, 'eval_mem_cpu_peaked_delta': 0, 'eval_mem_gpu_peaked_delta': 100462592}]


In [59]:
exp_results_wnli = train_on_task_exp(task='wnli', epochs=5)
print(exp_results_wnli)

Some weights of the model checkpoint at /content/drive/MyDrive/bert/models/in_order_exp/0/full_model were not used when initializing RobertaForSequenceClassification: ['lm_head.bias', 'lm_head.dense.weight', 'lm_head.dense.bias', 'lm_head.layer_norm.weight', 'lm_head.layer_norm.bias', 'lm_head.decoder.weight', 'lm_head.decoder.bias']
- This IS expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of RobertaForSequenceClassification were not initialized from the model checkpoint at /content/drive/MyDrive/bert/models/in_order_exp/0/full_model and a

TRAINING MODEL: exp; TASK: wnli


Reusing dataset glue (/root/.cache/huggingface/datasets/glue/wnli/1.0.0/dacbe3125aa31d7f70367a07a8a9e72a5a0bfeb5fc42e75c9db75b96da6053ad)


HBox(children=(FloatProgress(value=0.0, max=1.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, max=1.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, max=1.0), HTML(value='')))




Epoch,Training Loss,Validation Loss,Accuracy,Runtime,Samples Per Second
1,No log,0.688745,0.56338,0.369,192.394
2,No log,0.697371,0.521127,0.367,193.45
3,No log,0.699918,0.521127,0.3878,183.092
4,No log,0.718271,0.380282,0.3912,181.485
5,No log,0.715347,0.352113,0.3735,190.092


[{'eval_loss': 0.6887449622154236, 'eval_accuracy': 0.5633802816901409, 'eval_runtime': 0.3842, 'eval_samples_per_second': 184.793, 'epoch': 5.0, 'eval_mem_cpu_alloc_delta': 196608, 'eval_mem_gpu_alloc_delta': 0, 'eval_mem_cpu_peaked_delta': 0, 'eval_mem_gpu_peaked_delta': 115801088}]
