In [None]:
!pip install transformers
!pip install datasets
!pip install --upgrade --no-cache-dir gdown==4.5.4

!gdown 18oZZ4jqRK-uF-Nz6ftRdgNjKix88hrnO
!unzip data_and_models.zip && rm data_and_models.zip

Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/
Collecting transformers
  Downloading transformers-4.23.1-py3-none-any.whl (5.3 MB)
[K     |████████████████████████████████| 5.3 MB 4.6 MB/s 
Collecting tokenizers!=0.11.3,<0.14,>=0.11.1
  Downloading tokenizers-0.13.1-cp37-cp37m-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (7.6 MB)
[K     |████████████████████████████████| 7.6 MB 72.2 MB/s 
Collecting huggingface-hub<1.0,>=0.10.0
  Downloading huggingface_hub-0.10.1-py3-none-any.whl (163 kB)
[K     |████████████████████████████████| 163 kB 93.9 MB/s 
Installing collected packages: tokenizers, huggingface-hub, transformers
Successfully installed huggingface-hub-0.10.1 tokenizers-0.13.1 transformers-4.23.1
Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/
Collecting datasets
  Downloading datasets-2.6.1-py3-none-any.whl (441 kB)
[K     |████████████████████████████████| 441 kB 4.8 MB/

In [None]:
import random
import time

from datasets import load_metric
import numpy as np
import pandas as pd
import pickle


def compute_metrics(eval_preds):
    metric = load_metric("accuracy")
    logits, labels = eval_preds
    predictions = np.argmax(logits, axis=-1)
    return metric.compute(predictions=predictions, references=labels)

start = time.time()
directory = "./data_and_models/"
all_df = pd.read_csv(directory+"target_corpus.csv")

import torch
class PSCDataset(torch.utils.data.Dataset):
    def __init__(self, encodings, labels):
        self.encodings = encodings
        self.labels = labels

    def __getitem__(self, idx):
        item = {key: torch.tensor(val[idx]) for key, val in self.encodings.items()}
        item['labels'] = torch.tensor(self.labels[idx])
        return item

    def __len__(self):
        return len(self.labels)

import sklearn
def top_k_accuracy(top_k, predictions, labels):
  assert len(predictions) == len(labels)
  total = 0
  correct = 0
  for i in range(len(predictions)):
    total += 1
    prediction = []
    for j, k in enumerate(predictions[i]):
      prediction.append([j, k]) # k is the value
    prediction.sort(key = lambda x: -x[1])
    for j, _ in prediction[:top_k]:
      if j == labels[i]:
        correct += 1
        break
  return correct/total

import transformers
from transformers import RobertaTokenizerFast
from transformers import RobertaForSequenceClassification, TrainingArguments, Trainer
tokenizer = RobertaTokenizerFast.from_pretrained('roberta-base')


metrics = ["Top-1 accuracy/F1 micro", "Top-3 accuracy", "Top-5 accuracy", "Balanced accuracy", "F1 macro"]
seeds = [11, 12, 13, 14, 15]
epochs = 20

tasks = {
    "44": {
        "number_of_labels": 42,
         "label_column": 1,
    },
    "8": {
        "number_of_labels": 8,
        "label_column": 2,
    }
}

def compute(task):
  t1, t3, t5, ba, f1 = [],[],[],[],[]
  baseline_t1, baseline_t3, baseline_t5, baseline_ba, baseline_f1 = [], [], [], [], []

  ###### load the cross-domain classifier
  with open(directory + "logistic_model_" + task + ".pkl", "rb") as doc:
          model = pickle.load(doc)
  with open(directory + "tfidf_" + task + ".pkl", "rb") as doc:
          cf_tokenizer = pickle.load(doc)
  class_mapper = {}
  class_reverse_mapper = {}
  for i, topic in enumerate(model.classes_):
      class_mapper[topic.replace(" ", ".").replace("-", ".")] = i
      class_reverse_mapper[i] = topic

  for seed in seeds:
    np.random.seed(seed)
    torch.manual_seed(seed)
    random.seed(seed)

    import csv
    from sklearn.model_selection import train_test_split

    index = -1
    classes = {}
    texts = []
    labels = []
    lm_reverse_mapper = {}
    with open(directory + "target_corpus.csv") as doc:
      reader = csv.reader(doc)
      next(reader)
      for row in reader:
        topic = row[tasks[task]["label_column"]]
        if topic not in classes:
          index += 1
          classes[topic] = index
          lm_reverse_mapper[index] = topic.capitalize()
        labels.append(classes[topic])
        texts.append(row[0])
    print("# classes", len(classes))
    X_train, X_test, y_train, y_test = train_test_split(texts, labels, test_size=625, random_state=seed)
    X_train, X_dev, y_train, y_dev = train_test_split(X_train, y_train, test_size=625, random_state=seed)
    print(len(X_train), len(X_dev), len(X_test))
    print("# classes in train", len(set(y_train)))
    print("# classes in dev", len(set(y_dev)))
    print("# classes in test", len(set(y_test)))

    mlength = 512
    train_encodings = tokenizer(X_train, truncation=True, padding=True, max_length=mlength)
    dev_encodings = tokenizer(X_dev, truncation=True, padding=True, max_length = mlength)
    test_encodings = tokenizer(X_test, truncation=True, padding=True, max_length= mlength)


    train_dataset = PSCDataset(train_encodings, y_train)
    dev_dataset = PSCDataset(dev_encodings, y_dev)
    test_dataset = PSCDataset(test_encodings, y_test)

    training_args = TrainingArguments(
        output_dir="./results",          # output directory
        num_train_epochs=epochs,         # total number of training epochs
        per_device_train_batch_size=16,  # batch size per device during training
        per_device_eval_batch_size=64,   # batch size for evaluation
        warmup_steps=0,                  # number of warmup steps for learning rate scheduler
        weight_decay=0.01,               # strength of weight decay
        logging_dir='./logs',            # directory for storing logs
        logging_steps=10,
        learning_rate = 2e-5,
        save_strategy= "epoch",
        evaluation_strategy="epoch",
        load_best_model_at_end= True,
        seed = seed, 
    )

    def model_init():
        return RobertaForSequenceClassification.from_pretrained("roberta-base", num_labels=tasks[task]["number_of_labels"])
    trainer = Trainer(
        model_init=model_init,               # the instantiated 🤗 Transformers model to be trained
        args=training_args,                  # training arguments, defined above
        train_dataset=train_dataset,         # training dataset
        eval_dataset=dev_dataset,            # evaluation dataset
        compute_metrics=compute_metrics,     # compute_metrics
        )

    trainer.train()
    predictions = trainer.predict(test_dataset)
    preds = np.argmax(predictions.predictions, axis=-1)

    t1.append(top_k_accuracy(1, predictions.predictions, test_dataset.labels))
    t3.append(top_k_accuracy(3, predictions.predictions, test_dataset.labels))
    t5.append(top_k_accuracy(5, predictions.predictions, test_dataset.labels))
    ba.append(sklearn.metrics.balanced_accuracy_score(test_dataset.labels, preds))
    f1.append(sklearn.metrics.f1_score(test_dataset.labels, preds, average = "macro"))

    df = all_df[all_df["text"].isin(X_test)]
    X = df['text']
    Y = list(df["topic_"+ task].transform(lambda x: class_mapper[x]))

    Xtfidf = cf_tokenizer.transform(X)

    preds = model.predict(Xtfidf)
    preds = [class_mapper[topic.replace(" ", ".").replace("-", ".")] for topic in preds]
    policy_probs = model.predict_proba(Xtfidf)
    
    baseline_t1.append(top_k_accuracy(1, policy_probs, Y))
    baseline_t3.append(top_k_accuracy(3, policy_probs, Y))
    baseline_t5.append(top_k_accuracy(5, policy_probs, Y))
    baseline_ba.append(sklearn.metrics.balanced_accuracy_score(Y, preds))
    baseline_f1.append(sklearn.metrics.f1_score(Y, preds, average = "macro"))

  result = {}
  for metric, baseline, experiment in zip(metrics, [baseline_t1, baseline_t3, baseline_t5, baseline_ba, baseline_f1], [t1, t3, t5, ba, f1]):
    result[metric] = [np.mean(baseline), np.std(baseline), np.mean(experiment), np.std(baseline)]
  return result

results = {}
for task in tasks:
  result = compute(task)
  results[task] = result

np.save( directory + "table_1_results.npy", results)


Downloading:   0%|          | 0.00/899k [00:00<?, ?B/s]

Downloading:   0%|          | 0.00/456k [00:00<?, ?B/s]

Downloading:   0%|          | 0.00/1.36M [00:00<?, ?B/s]

Downloading:   0%|          | 0.00/481 [00:00<?, ?B/s]

# classes 42
2915 625 625
# classes in train 42
# classes in dev 36
# classes in test 35


loading configuration file config.json from cache at /root/.cache/huggingface/hub/models--roberta-base/snapshots/ff46155979338ff8063cdad90908b498ab91b181/config.json
Model config RobertaConfig {
  "architectures": [
    "RobertaForMaskedLM"
  ],
  "attention_probs_dropout_prob": 0.1,
  "bos_token_id": 0,
  "classifier_dropout": null,
  "eos_token_id": 2,
  "hidden_act": "gelu",
  "hidden_dropout_prob": 0.1,
  "hidden_size": 768,
  "id2label": {
    "0": "LABEL_0",
    "1": "LABEL_1",
    "2": "LABEL_2",
    "3": "LABEL_3",
    "4": "LABEL_4",
    "5": "LABEL_5",
    "6": "LABEL_6",
    "7": "LABEL_7",
    "8": "LABEL_8",
    "9": "LABEL_9",
    "10": "LABEL_10",
    "11": "LABEL_11",
    "12": "LABEL_12",
    "13": "LABEL_13",
    "14": "LABEL_14",
    "15": "LABEL_15",
    "16": "LABEL_16",
    "17": "LABEL_17",
    "18": "LABEL_18",
    "19": "LABEL_19",
    "20": "LABEL_20",
    "21": "LABEL_21",
    "22": "LABEL_22",
    "23": "LABEL_23",
    "24": "LABEL_24",
    "25": "LABEL_25",

Downloading:   0%|          | 0.00/501M [00:00<?, ?B/s]

loading weights file pytorch_model.bin from cache at /root/.cache/huggingface/hub/models--roberta-base/snapshots/ff46155979338ff8063cdad90908b498ab91b181/pytorch_model.bin
Some weights of the model checkpoint at roberta-base were not used when initializing RobertaForSequenceClassification: ['roberta.pooler.dense.bias', 'lm_head.bias', 'lm_head.dense.bias', 'lm_head.dense.weight', 'lm_head.layer_norm.weight', 'roberta.pooler.dense.weight', 'lm_head.layer_norm.bias', 'lm_head.decoder.weight']
- This IS expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some 

Epoch,Training Loss,Validation Loss,Accuracy
1,2.4303,2.288778,0.4176
2,1.9171,1.934722,0.5168
3,1.5825,1.823273,0.5424
4,1.3624,1.823743,0.5488
5,0.9723,1.889241,0.5232
6,0.848,1.888951,0.5344
7,0.5739,2.010903,0.5328
8,0.4092,1.98929,0.5536
9,0.3174,2.029555,0.5632
10,0.2309,2.128701,0.5328


***** Running Evaluation *****
  Num examples = 625
  Batch size = 64
  if sys.path[0] == '':


Downloading builder script:   0%|          | 0.00/1.65k [00:00<?, ?B/s]

Saving model checkpoint to ./results/checkpoint-183
Configuration saved in ./results/checkpoint-183/config.json
Model weights saved in ./results/checkpoint-183/pytorch_model.bin
***** Running Evaluation *****
  Num examples = 625
  Batch size = 64
Saving model checkpoint to ./results/checkpoint-366
Configuration saved in ./results/checkpoint-366/config.json
Model weights saved in ./results/checkpoint-366/pytorch_model.bin
***** Running Evaluation *****
  Num examples = 625
  Batch size = 64
Saving model checkpoint to ./results/checkpoint-549
Configuration saved in ./results/checkpoint-549/config.json
Model weights saved in ./results/checkpoint-549/pytorch_model.bin
***** Running Evaluation *****
  Num examples = 625
  Batch size = 64
Saving model checkpoint to ./results/checkpoint-732
Configuration saved in ./results/checkpoint-732/config.json
Model weights saved in ./results/checkpoint-732/pytorch_model.bin
***** Running Evaluation *****
  Num examples = 625
  Batch size = 64
Saving m

# classes 42
2915 625 625
# classes in train 42
# classes in dev 38
# classes in test 36


PyTorch: setting up devices
The default value for the training argument `--report_to` will change in v5 (from all installed integrations to none). In v5, you will need to use `--report_to all` to get the same behavior as now. You should start updating your code and make this info disappear :-).
loading configuration file config.json from cache at /root/.cache/huggingface/hub/models--roberta-base/snapshots/ff46155979338ff8063cdad90908b498ab91b181/config.json
Model config RobertaConfig {
  "architectures": [
    "RobertaForMaskedLM"
  ],
  "attention_probs_dropout_prob": 0.1,
  "bos_token_id": 0,
  "classifier_dropout": null,
  "eos_token_id": 2,
  "hidden_act": "gelu",
  "hidden_dropout_prob": 0.1,
  "hidden_size": 768,
  "id2label": {
    "0": "LABEL_0",
    "1": "LABEL_1",
    "2": "LABEL_2",
    "3": "LABEL_3",
    "4": "LABEL_4",
    "5": "LABEL_5",
    "6": "LABEL_6",
    "7": "LABEL_7",
    "8": "LABEL_8",
    "9": "LABEL_9",
    "10": "LABEL_10",
    "11": "LABEL_11",
    "12": "

Epoch,Training Loss,Validation Loss,Accuracy
1,2.4401,2.280676,0.4352
2,2.1084,2.000825,0.4736
3,1.4616,1.840411,0.512
4,1.2014,1.78463,0.528
5,0.9437,1.797061,0.5472
6,0.6154,1.897894,0.5328
7,0.6419,1.956028,0.5392
8,0.2293,2.03653,0.544
9,0.3249,2.070697,0.5376
10,0.1992,2.198223,0.5312


***** Running Evaluation *****
  Num examples = 625
  Batch size = 64
Saving model checkpoint to ./results/checkpoint-183
Configuration saved in ./results/checkpoint-183/config.json
Model weights saved in ./results/checkpoint-183/pytorch_model.bin
***** Running Evaluation *****
  Num examples = 625
  Batch size = 64
Saving model checkpoint to ./results/checkpoint-366
Configuration saved in ./results/checkpoint-366/config.json
Model weights saved in ./results/checkpoint-366/pytorch_model.bin
***** Running Evaluation *****
  Num examples = 625
  Batch size = 64
Saving model checkpoint to ./results/checkpoint-549
Configuration saved in ./results/checkpoint-549/config.json
Model weights saved in ./results/checkpoint-549/pytorch_model.bin
***** Running Evaluation *****
  Num examples = 625
  Batch size = 64
Saving model checkpoint to ./results/checkpoint-732
Configuration saved in ./results/checkpoint-732/config.json
Model weights saved in ./results/checkpoint-732/pytorch_model.bin
***** Ru



# classes 42
2915 625 625
# classes in train 41
# classes in dev 38
# classes in test 37


PyTorch: setting up devices
The default value for the training argument `--report_to` will change in v5 (from all installed integrations to none). In v5, you will need to use `--report_to all` to get the same behavior as now. You should start updating your code and make this info disappear :-).
loading configuration file config.json from cache at /root/.cache/huggingface/hub/models--roberta-base/snapshots/ff46155979338ff8063cdad90908b498ab91b181/config.json
Model config RobertaConfig {
  "architectures": [
    "RobertaForMaskedLM"
  ],
  "attention_probs_dropout_prob": 0.1,
  "bos_token_id": 0,
  "classifier_dropout": null,
  "eos_token_id": 2,
  "hidden_act": "gelu",
  "hidden_dropout_prob": 0.1,
  "hidden_size": 768,
  "id2label": {
    "0": "LABEL_0",
    "1": "LABEL_1",
    "2": "LABEL_2",
    "3": "LABEL_3",
    "4": "LABEL_4",
    "5": "LABEL_5",
    "6": "LABEL_6",
    "7": "LABEL_7",
    "8": "LABEL_8",
    "9": "LABEL_9",
    "10": "LABEL_10",
    "11": "LABEL_11",
    "12": "

Epoch,Training Loss,Validation Loss,Accuracy
1,2.5073,2.389546,0.4032
2,1.9702,2.135563,0.472
3,1.7043,1.997115,0.496
4,1.453,2.0351,0.48
5,0.9569,2.062247,0.4864
6,0.6945,2.15573,0.4864
7,0.5929,2.221344,0.5088
8,0.4993,2.261892,0.4944
9,0.4328,2.324211,0.5056
10,0.2564,2.473519,0.4928


***** Running Evaluation *****
  Num examples = 625
  Batch size = 64
Saving model checkpoint to ./results/checkpoint-183
Configuration saved in ./results/checkpoint-183/config.json
Model weights saved in ./results/checkpoint-183/pytorch_model.bin
***** Running Evaluation *****
  Num examples = 625
  Batch size = 64
Saving model checkpoint to ./results/checkpoint-366
Configuration saved in ./results/checkpoint-366/config.json
Model weights saved in ./results/checkpoint-366/pytorch_model.bin
***** Running Evaluation *****
  Num examples = 625
  Batch size = 64
Saving model checkpoint to ./results/checkpoint-549
Configuration saved in ./results/checkpoint-549/config.json
Model weights saved in ./results/checkpoint-549/pytorch_model.bin
***** Running Evaluation *****
  Num examples = 625
  Batch size = 64
Saving model checkpoint to ./results/checkpoint-732
Configuration saved in ./results/checkpoint-732/config.json
Model weights saved in ./results/checkpoint-732/pytorch_model.bin
***** Ru

# classes 42
2915 625 625
# classes in train 42
# classes in dev 35
# classes in test 37


PyTorch: setting up devices
The default value for the training argument `--report_to` will change in v5 (from all installed integrations to none). In v5, you will need to use `--report_to all` to get the same behavior as now. You should start updating your code and make this info disappear :-).
loading configuration file config.json from cache at /root/.cache/huggingface/hub/models--roberta-base/snapshots/ff46155979338ff8063cdad90908b498ab91b181/config.json
Model config RobertaConfig {
  "architectures": [
    "RobertaForMaskedLM"
  ],
  "attention_probs_dropout_prob": 0.1,
  "bos_token_id": 0,
  "classifier_dropout": null,
  "eos_token_id": 2,
  "hidden_act": "gelu",
  "hidden_dropout_prob": 0.1,
  "hidden_size": 768,
  "id2label": {
    "0": "LABEL_0",
    "1": "LABEL_1",
    "2": "LABEL_2",
    "3": "LABEL_3",
    "4": "LABEL_4",
    "5": "LABEL_5",
    "6": "LABEL_6",
    "7": "LABEL_7",
    "8": "LABEL_8",
    "9": "LABEL_9",
    "10": "LABEL_10",
    "11": "LABEL_11",
    "12": "

Epoch,Training Loss,Validation Loss,Accuracy
1,2.5149,2.50049,0.3648
2,2.0622,2.101844,0.464
3,1.5385,1.987228,0.472
4,1.2171,1.94286,0.5024
5,0.8703,1.969932,0.4896
6,0.7309,2.034905,0.4896
7,0.6832,2.094531,0.4992
8,0.3393,2.186576,0.488
9,0.2957,2.145506,0.5184
10,0.2354,2.275206,0.4928


***** Running Evaluation *****
  Num examples = 625
  Batch size = 64
Saving model checkpoint to ./results/checkpoint-183
Configuration saved in ./results/checkpoint-183/config.json
Model weights saved in ./results/checkpoint-183/pytorch_model.bin
***** Running Evaluation *****
  Num examples = 625
  Batch size = 64
Saving model checkpoint to ./results/checkpoint-366
Configuration saved in ./results/checkpoint-366/config.json
Model weights saved in ./results/checkpoint-366/pytorch_model.bin
***** Running Evaluation *****
  Num examples = 625
  Batch size = 64
Saving model checkpoint to ./results/checkpoint-549
Configuration saved in ./results/checkpoint-549/config.json
Model weights saved in ./results/checkpoint-549/pytorch_model.bin
***** Running Evaluation *****
  Num examples = 625
  Batch size = 64
Saving model checkpoint to ./results/checkpoint-732
Configuration saved in ./results/checkpoint-732/config.json
Model weights saved in ./results/checkpoint-732/pytorch_model.bin
***** Ru



# classes 42
2915 625 625
# classes in train 42
# classes in dev 38
# classes in test 35


PyTorch: setting up devices
The default value for the training argument `--report_to` will change in v5 (from all installed integrations to none). In v5, you will need to use `--report_to all` to get the same behavior as now. You should start updating your code and make this info disappear :-).
loading configuration file config.json from cache at /root/.cache/huggingface/hub/models--roberta-base/snapshots/ff46155979338ff8063cdad90908b498ab91b181/config.json
Model config RobertaConfig {
  "architectures": [
    "RobertaForMaskedLM"
  ],
  "attention_probs_dropout_prob": 0.1,
  "bos_token_id": 0,
  "classifier_dropout": null,
  "eos_token_id": 2,
  "hidden_act": "gelu",
  "hidden_dropout_prob": 0.1,
  "hidden_size": 768,
  "id2label": {
    "0": "LABEL_0",
    "1": "LABEL_1",
    "2": "LABEL_2",
    "3": "LABEL_3",
    "4": "LABEL_4",
    "5": "LABEL_5",
    "6": "LABEL_6",
    "7": "LABEL_7",
    "8": "LABEL_8",
    "9": "LABEL_9",
    "10": "LABEL_10",
    "11": "LABEL_11",
    "12": "

Epoch,Training Loss,Validation Loss,Accuracy
1,2.592,2.406589,0.424
2,1.8191,2.11586,0.4624
3,1.7895,1.973715,0.4896
4,1.187,1.987817,0.4832
5,1.018,2.02603,0.4832
6,0.6837,2.087702,0.4896
7,0.5114,2.160427,0.4848
8,0.4005,2.265558,0.4912
9,0.2759,2.363732,0.4912
10,0.1687,2.525493,0.48


***** Running Evaluation *****
  Num examples = 625
  Batch size = 64
Saving model checkpoint to ./results/checkpoint-183
Configuration saved in ./results/checkpoint-183/config.json
Model weights saved in ./results/checkpoint-183/pytorch_model.bin
***** Running Evaluation *****
  Num examples = 625
  Batch size = 64
Saving model checkpoint to ./results/checkpoint-366
Configuration saved in ./results/checkpoint-366/config.json
Model weights saved in ./results/checkpoint-366/pytorch_model.bin
***** Running Evaluation *****
  Num examples = 625
  Batch size = 64
Saving model checkpoint to ./results/checkpoint-549
Configuration saved in ./results/checkpoint-549/config.json
Model weights saved in ./results/checkpoint-549/pytorch_model.bin
***** Running Evaluation *****
  Num examples = 625
  Batch size = 64
Saving model checkpoint to ./results/checkpoint-732
Configuration saved in ./results/checkpoint-732/config.json
Model weights saved in ./results/checkpoint-732/pytorch_model.bin
***** Ru

https://scikit-learn.org/stable/modules/model_persistence.html#security-maintainability-limitations
https://scikit-learn.org/stable/modules/model_persistence.html#security-maintainability-limitations
https://scikit-learn.org/stable/modules/model_persistence.html#security-maintainability-limitations


# classes 8
2915 625 625
# classes in train 8
# classes in dev 8
# classes in test 8


PyTorch: setting up devices
The default value for the training argument `--report_to` will change in v5 (from all installed integrations to none). In v5, you will need to use `--report_to all` to get the same behavior as now. You should start updating your code and make this info disappear :-).
loading configuration file config.json from cache at /root/.cache/huggingface/hub/models--roberta-base/snapshots/ff46155979338ff8063cdad90908b498ab91b181/config.json
Model config RobertaConfig {
  "architectures": [
    "RobertaForMaskedLM"
  ],
  "attention_probs_dropout_prob": 0.1,
  "bos_token_id": 0,
  "classifier_dropout": null,
  "eos_token_id": 2,
  "hidden_act": "gelu",
  "hidden_dropout_prob": 0.1,
  "hidden_size": 768,
  "id2label": {
    "0": "LABEL_0",
    "1": "LABEL_1",
    "2": "LABEL_2",
    "3": "LABEL_3",
    "4": "LABEL_4",
    "5": "LABEL_5",
    "6": "LABEL_6",
    "7": "LABEL_7"
  },
  "initializer_range": 0.02,
  "intermediate_size": 3072,
  "label2id": {
    "LABEL_0": 0,

Epoch,Training Loss,Validation Loss,Accuracy
1,1.3199,1.205588,0.5776
2,0.9789,1.116255,0.6192
3,0.8163,1.130859,0.6336
4,0.8216,1.280719,0.632
5,0.4968,1.309494,0.6288
6,0.1466,1.554289,0.6096
7,0.1655,1.672504,0.6256
8,0.1786,1.918652,0.6288
9,0.0383,2.086765,0.6384
10,0.116,2.2943,0.6256


***** Running Evaluation *****
  Num examples = 625
  Batch size = 64
Saving model checkpoint to ./results/checkpoint-183
Configuration saved in ./results/checkpoint-183/config.json
Model weights saved in ./results/checkpoint-183/pytorch_model.bin
***** Running Evaluation *****
  Num examples = 625
  Batch size = 64
Saving model checkpoint to ./results/checkpoint-366
Configuration saved in ./results/checkpoint-366/config.json
Model weights saved in ./results/checkpoint-366/pytorch_model.bin
***** Running Evaluation *****
  Num examples = 625
  Batch size = 64
Saving model checkpoint to ./results/checkpoint-549
Configuration saved in ./results/checkpoint-549/config.json
Model weights saved in ./results/checkpoint-549/pytorch_model.bin
***** Running Evaluation *****
  Num examples = 625
  Batch size = 64
Saving model checkpoint to ./results/checkpoint-732
Configuration saved in ./results/checkpoint-732/config.json
Model weights saved in ./results/checkpoint-732/pytorch_model.bin
***** Ru

# classes 8
2915 625 625
# classes in train 8
# classes in dev 8
# classes in test 8


PyTorch: setting up devices
The default value for the training argument `--report_to` will change in v5 (from all installed integrations to none). In v5, you will need to use `--report_to all` to get the same behavior as now. You should start updating your code and make this info disappear :-).
loading configuration file config.json from cache at /root/.cache/huggingface/hub/models--roberta-base/snapshots/ff46155979338ff8063cdad90908b498ab91b181/config.json
Model config RobertaConfig {
  "architectures": [
    "RobertaForMaskedLM"
  ],
  "attention_probs_dropout_prob": 0.1,
  "bos_token_id": 0,
  "classifier_dropout": null,
  "eos_token_id": 2,
  "hidden_act": "gelu",
  "hidden_dropout_prob": 0.1,
  "hidden_size": 768,
  "id2label": {
    "0": "LABEL_0",
    "1": "LABEL_1",
    "2": "LABEL_2",
    "3": "LABEL_3",
    "4": "LABEL_4",
    "5": "LABEL_5",
    "6": "LABEL_6",
    "7": "LABEL_7"
  },
  "initializer_range": 0.02,
  "intermediate_size": 3072,
  "label2id": {
    "LABEL_0": 0,

Epoch,Training Loss,Validation Loss,Accuracy
1,1.1399,1.186527,0.608
2,0.9094,1.059664,0.616
3,0.6856,1.133391,0.6304
4,0.6073,1.20822,0.6352
5,0.3167,1.424255,0.6192
6,0.1826,1.555377,0.632
7,0.1907,1.705964,0.624
8,0.098,1.995148,0.6336
9,0.0072,2.262979,0.6176
10,0.0135,2.442546,0.608


***** Running Evaluation *****
  Num examples = 625
  Batch size = 64
Saving model checkpoint to ./results/checkpoint-183
Configuration saved in ./results/checkpoint-183/config.json
Model weights saved in ./results/checkpoint-183/pytorch_model.bin
***** Running Evaluation *****
  Num examples = 625
  Batch size = 64
Saving model checkpoint to ./results/checkpoint-366
Configuration saved in ./results/checkpoint-366/config.json
Model weights saved in ./results/checkpoint-366/pytorch_model.bin
***** Running Evaluation *****
  Num examples = 625
  Batch size = 64
Saving model checkpoint to ./results/checkpoint-549
Configuration saved in ./results/checkpoint-549/config.json
Model weights saved in ./results/checkpoint-549/pytorch_model.bin
***** Running Evaluation *****
  Num examples = 625
  Batch size = 64
Saving model checkpoint to ./results/checkpoint-732
Configuration saved in ./results/checkpoint-732/config.json
Model weights saved in ./results/checkpoint-732/pytorch_model.bin
***** Ru

# classes 8
2915 625 625
# classes in train 8
# classes in dev 8
# classes in test 8


PyTorch: setting up devices
The default value for the training argument `--report_to` will change in v5 (from all installed integrations to none). In v5, you will need to use `--report_to all` to get the same behavior as now. You should start updating your code and make this info disappear :-).
loading configuration file config.json from cache at /root/.cache/huggingface/hub/models--roberta-base/snapshots/ff46155979338ff8063cdad90908b498ab91b181/config.json
Model config RobertaConfig {
  "architectures": [
    "RobertaForMaskedLM"
  ],
  "attention_probs_dropout_prob": 0.1,
  "bos_token_id": 0,
  "classifier_dropout": null,
  "eos_token_id": 2,
  "hidden_act": "gelu",
  "hidden_dropout_prob": 0.1,
  "hidden_size": 768,
  "id2label": {
    "0": "LABEL_0",
    "1": "LABEL_1",
    "2": "LABEL_2",
    "3": "LABEL_3",
    "4": "LABEL_4",
    "5": "LABEL_5",
    "6": "LABEL_6",
    "7": "LABEL_7"
  },
  "initializer_range": 0.02,
  "intermediate_size": 3072,
  "label2id": {
    "LABEL_0": 0,

Epoch,Training Loss,Validation Loss,Accuracy
1,1.2681,1.286879,0.5488
2,0.961,1.170761,0.5904
3,0.6919,1.191083,0.6256
4,0.6424,1.341316,0.6224
5,0.3129,1.541527,0.624
6,0.2846,1.650567,0.5984
7,0.2486,1.889238,0.6064
8,0.1233,2.147868,0.6
9,0.1619,2.279927,0.6128
10,0.0433,2.516886,0.6144


***** Running Evaluation *****
  Num examples = 625
  Batch size = 64
Saving model checkpoint to ./results/checkpoint-183
Configuration saved in ./results/checkpoint-183/config.json
Model weights saved in ./results/checkpoint-183/pytorch_model.bin
***** Running Evaluation *****
  Num examples = 625
  Batch size = 64
Saving model checkpoint to ./results/checkpoint-366
Configuration saved in ./results/checkpoint-366/config.json
Model weights saved in ./results/checkpoint-366/pytorch_model.bin
***** Running Evaluation *****
  Num examples = 625
  Batch size = 64
Saving model checkpoint to ./results/checkpoint-549
Configuration saved in ./results/checkpoint-549/config.json
Model weights saved in ./results/checkpoint-549/pytorch_model.bin
***** Running Evaluation *****
  Num examples = 625
  Batch size = 64
Saving model checkpoint to ./results/checkpoint-732
Configuration saved in ./results/checkpoint-732/config.json
Model weights saved in ./results/checkpoint-732/pytorch_model.bin
***** Ru

# classes 8
2915 625 625
# classes in train 8
# classes in dev 8
# classes in test 8


PyTorch: setting up devices
The default value for the training argument `--report_to` will change in v5 (from all installed integrations to none). In v5, you will need to use `--report_to all` to get the same behavior as now. You should start updating your code and make this info disappear :-).
loading configuration file config.json from cache at /root/.cache/huggingface/hub/models--roberta-base/snapshots/ff46155979338ff8063cdad90908b498ab91b181/config.json
Model config RobertaConfig {
  "architectures": [
    "RobertaForMaskedLM"
  ],
  "attention_probs_dropout_prob": 0.1,
  "bos_token_id": 0,
  "classifier_dropout": null,
  "eos_token_id": 2,
  "hidden_act": "gelu",
  "hidden_dropout_prob": 0.1,
  "hidden_size": 768,
  "id2label": {
    "0": "LABEL_0",
    "1": "LABEL_1",
    "2": "LABEL_2",
    "3": "LABEL_3",
    "4": "LABEL_4",
    "5": "LABEL_5",
    "6": "LABEL_6",
    "7": "LABEL_7"
  },
  "initializer_range": 0.02,
  "intermediate_size": 3072,
  "label2id": {
    "LABEL_0": 0,

Epoch,Training Loss,Validation Loss,Accuracy
1,1.3711,1.301919,0.5328
2,1.0813,1.196107,0.5968
3,0.6924,1.225822,0.6144
4,0.5353,1.311677,0.5984
5,0.348,1.458576,0.5984
6,0.1663,1.601807,0.6144
7,0.1594,1.825113,0.5968
8,0.1392,2.099366,0.608
9,0.0974,2.326327,0.5824
10,0.0113,2.486365,0.5904


***** Running Evaluation *****
  Num examples = 625
  Batch size = 64
Saving model checkpoint to ./results/checkpoint-183
Configuration saved in ./results/checkpoint-183/config.json
Model weights saved in ./results/checkpoint-183/pytorch_model.bin
***** Running Evaluation *****
  Num examples = 625
  Batch size = 64
Saving model checkpoint to ./results/checkpoint-366
Configuration saved in ./results/checkpoint-366/config.json
Model weights saved in ./results/checkpoint-366/pytorch_model.bin
***** Running Evaluation *****
  Num examples = 625
  Batch size = 64
Saving model checkpoint to ./results/checkpoint-549
Configuration saved in ./results/checkpoint-549/config.json
Model weights saved in ./results/checkpoint-549/pytorch_model.bin
***** Running Evaluation *****
  Num examples = 625
  Batch size = 64
Saving model checkpoint to ./results/checkpoint-732
Configuration saved in ./results/checkpoint-732/config.json
Model weights saved in ./results/checkpoint-732/pytorch_model.bin
***** Ru

# classes 8
2915 625 625
# classes in train 8
# classes in dev 8
# classes in test 8


PyTorch: setting up devices
The default value for the training argument `--report_to` will change in v5 (from all installed integrations to none). In v5, you will need to use `--report_to all` to get the same behavior as now. You should start updating your code and make this info disappear :-).
loading configuration file config.json from cache at /root/.cache/huggingface/hub/models--roberta-base/snapshots/ff46155979338ff8063cdad90908b498ab91b181/config.json
Model config RobertaConfig {
  "architectures": [
    "RobertaForMaskedLM"
  ],
  "attention_probs_dropout_prob": 0.1,
  "bos_token_id": 0,
  "classifier_dropout": null,
  "eos_token_id": 2,
  "hidden_act": "gelu",
  "hidden_dropout_prob": 0.1,
  "hidden_size": 768,
  "id2label": {
    "0": "LABEL_0",
    "1": "LABEL_1",
    "2": "LABEL_2",
    "3": "LABEL_3",
    "4": "LABEL_4",
    "5": "LABEL_5",
    "6": "LABEL_6",
    "7": "LABEL_7"
  },
  "initializer_range": 0.02,
  "intermediate_size": 3072,
  "label2id": {
    "LABEL_0": 0,

Epoch,Training Loss,Validation Loss,Accuracy
1,1.2929,1.18713,0.6032
2,1.0512,1.085045,0.6336
3,0.7895,1.104119,0.632
4,0.6917,1.240097,0.6064
5,0.3469,1.456409,0.6144
6,0.2584,1.631499,0.6352
7,0.2917,1.943097,0.6144
8,0.0631,2.06987,0.6304
9,0.1509,2.32331,0.6096
10,0.007,2.446229,0.6128


***** Running Evaluation *****
  Num examples = 625
  Batch size = 64
Saving model checkpoint to ./results/checkpoint-183
Configuration saved in ./results/checkpoint-183/config.json
Model weights saved in ./results/checkpoint-183/pytorch_model.bin
***** Running Evaluation *****
  Num examples = 625
  Batch size = 64
Saving model checkpoint to ./results/checkpoint-366
Configuration saved in ./results/checkpoint-366/config.json
Model weights saved in ./results/checkpoint-366/pytorch_model.bin
***** Running Evaluation *****
  Num examples = 625
  Batch size = 64
Saving model checkpoint to ./results/checkpoint-549
Configuration saved in ./results/checkpoint-549/config.json
Model weights saved in ./results/checkpoint-549/pytorch_model.bin
***** Running Evaluation *****
  Num examples = 625
  Batch size = 64
Saving model checkpoint to ./results/checkpoint-732
Configuration saved in ./results/checkpoint-732/config.json
Model weights saved in ./results/checkpoint-732/pytorch_model.bin
***** Ru

In [None]:
def preprocess_result(result):
  output = []
  outperform = result[2] > result[0]
  for i, j in enumerate(result):
    j = str(round(j, 3))
    if len(j) < 5:
      j += "0" * (5-len(j))
    if i % 2 == 1: # standard deviation
      j = "(" + j + ")"
      if i == 1:
        j += " & "
    if outperform:
      if i == 2:
        j = "\\textbf{" + j
      if i == 3:
        j += "}"
    else:
      if i == 0:
        j = "\\textbf{" + j
      if i == 1:
        j += "}"
    output.append(j)
  return " ".join(output)

for metric in metrics:
  output = [metric]
  for task in tasks:
    output.append(preprocess_result(results[task][metric]))
  print(" & ".join(output) + "\\\\")

Top-1 accuracy/F1 micro & 0.414 (0.009) &  \textbf{0.527 (0.009)} & 0.515 (0.006) &  \textbf{0.631 (0.006)}\\
Top-3 accuracy & 0.656 (0.008) &  \textbf{0.744 (0.008)} & 0.819 (0.003) &  \textbf{0.904 (0.003)}\\
Top-5 accuracy & 0.752 (0.004) &  \textbf{0.828 (0.004)} & 0.921 (0.008) &  \textbf{0.969 (0.008)}\\
Balanced accuracy & 0.309 (0.030) &  \textbf{0.357 (0.030)} & 0.454 (0.014) &  \textbf{0.580 (0.014)}\\
F1 macro & 0.294 (0.025) &  \textbf{0.328 (0.025)} & 0.449 (0.014) &  \textbf{0.574 (0.014)}\\


In [None]:
end = time.time()
print(f"The program took {(end - start) // 60} minutes in total.")

The program took 262.0 minutes in total.


In [None]:
from google.colab import runtime
runtime.unassign()