<h1>Table of Contents<span class="tocSkip"></span></h1>
<div class="toc"><ul class="toc-item"><li><span><a href="#imports-and-functions" data-toc-modified-id="imports-and-functions-1"><span class="toc-item-num">1&nbsp;&nbsp;</span>imports and functions</a></span></li><li><span><a href="#experiments" data-toc-modified-id="experiments-2"><span class="toc-item-num">2&nbsp;&nbsp;</span>experiments</a></span><ul class="toc-item"><li><span><a href="#0" data-toc-modified-id="0-2.1"><span class="toc-item-num">2.1&nbsp;&nbsp;</span>0</a></span></li><li><span><a href="#1" data-toc-modified-id="1-2.2"><span class="toc-item-num">2.2&nbsp;&nbsp;</span>1</a></span></li><li><span><a href="#2" data-toc-modified-id="2-2.3"><span class="toc-item-num">2.3&nbsp;&nbsp;</span>2</a></span></li></ul></li></ul></div>

# imports and functions

In [None]:
import collections
import pandas as pd
import os
import sys
import json
import matplotlib.pyplot as plt
from tqdm.auto import tqdm, trange

import numpy as np
import torch
import torch.nn as nn
import torch.nn.functional as F
from torch.utils.data import Dataset, DataLoader
from torch import optim
from torch.optim import lr_scheduler
# import torchmetrics

import datasets
from datasets import load_metric
from transformers import AutoConfig, AutoTokenizer, BertModel, RobertaModel
from transformers import BertForSequenceClassification
from transformers import TrainingArguments, Trainer

from sklearn.metrics import mean_squared_error, accuracy_score, precision_recall_fscore_support


In [None]:
# https://github.com/huggingface/transformers/issues/5486
# os.environ["TOKENIZERS_PARALLELISM"] = "false"
os.environ["TOKENIZERS_PARALLELISM"] = "true"

In [None]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
device

device(type='cuda')

In [None]:
with open('../../data/xslue/tasks.json', 'r') as f:
    tasks = json.load(f)
tasks

{'CrowdFlower': 13,
 'DailyDialog': 7,
 'EmoBank_Valence': 1,
 'EmoBank_Arousal': 1,
 'EmoBank_Dominance': 1,
 'HateOffensive': 3,
 'PASTEL_age': 8,
 'PASTEL_country': 2,
 'PASTEL_education': 10,
 'PASTEL_ethnic': 10,
 'PASTEL_gender': 3,
 'PASTEL_politics': 3,
 'PASTEL_tod': 5,
 'SARC': 2,
 'SarcasmGhosh': 2,
 'SentiTreeBank': 1,
 'ShortHumor': 2,
 'ShortJokeKaggle': 2,
 'ShortRomance': 2,
 'StanfordPoliteness': 1,
 'TroFi': 2,
 'VUA': 2}

In [None]:
class MyDataset(Dataset): 
    # currently it's a Mapping-style dataset. Not sure if a Iterable-style dataset will be better
    def __init__(self, tsv_file):
        self.tokenizer = AutoTokenizer.from_pretrained("bert-base-uncased")
        self.df = pd.read_csv(tsv_file, sep='\t')
        self.df = self.df.dropna()
        self.df = self.df.reset_index(drop=True)
        self.encodings = self.tokenizer(self.df['text'].tolist(), truncation=True, padding=True, max_length=128)
        if self.df['label'].dtype == 'float64':
            self.df['label'] = self.df['label'].astype('float32')
        self.labels = self.df['label'].tolist()
    def __len__(self):
        return len(self.df)

    def __getitem__(self, idx):
        if torch.is_tensor(idx):
            idx = idx.tolist()
        item = {k: torch.tensor(v[idx]) for k, v in self.encodings.items()}
        item["labels"] = torch.tensor([self.labels[idx]])
        return item


In [None]:
pearsonr = load_metric("pearsonr")
spearmanr = load_metric("spearmanr")

In [None]:
# GPU memory usage: 6617 - 6680mb with bs 32
# bs 64 gives OOM
# bs 48 GPU memory 7894
batch_size = 32

In [None]:
def train_baseline(task, freeze_bert=False):
    torch.cuda.empty_cache()
    model = None
    trainer = None 
    num_labels = tasks[task]
    
    data_folder = '../../data/xslue'
    train_dataset = MyDataset(f'{data_folder}/processed/train/{task}.tsv')
    test_dataset = MyDataset(f'{data_folder}/processed/test/{task}.tsv')
    valid_dataset = MyDataset(f'{data_folder}/processed/dev/{task}.tsv')
    
    singletaskbert = BertForSequenceClassification.from_pretrained("bert-base-uncased", num_labels=num_labels) 
    if freeze_bert:
        for param in singletaskbert.bert.parameters():
            param.requires_grad = False
    result_folder = '../../result'
    training_args = TrainingArguments(
        output_dir=f"{result_folder}/baselines/{task+'_freezed' if freeze_bert else task}",   # output directory
        num_train_epochs=5,              # total number of training epochs
        per_device_train_batch_size=batch_size,  # batch size per device during training
        per_device_eval_batch_size=batch_size,   # batch size for evaluation
        warmup_steps=500,                # number of warmup steps for learning rate scheduler
        weight_decay=0.01,               # strength of weight decay
        logging_dir=f"{result_folder}/baselines/{task+'_freezed' if freeze_bert else task}/logs",  # directory for storing logs
#         logging_first_step = True, 
#         logging_steps=500,               # log & save weights each logging_steps
#         save_steps=500,
        evaluation_strategy="epoch",     # evaluate each `logging_steps`
        save_total_limit = 1,
        save_strategy = 'epoch',
        load_best_model_at_end=True, # decide on loss
    )
    
    if num_labels == 1:
        def compute_metrics(pred):
            predictions, labels = pred
            rmse = mean_squared_error(labels, predictions, squared=False)
            return {"rmse": rmse}
    elif num_labels == 2:
        def compute_metrics(pred):
            labels = pred.label_ids
            preds = pred.predictions.argmax(-1)
            precision, recall, f1, _ = precision_recall_fscore_support(labels, preds, average='binary')
            acc = accuracy_score(labels, preds)
            return {
                'accuracy': acc,
                'f1': f1,
                'precision': precision,
                'recall': recall
            }
    else:
        def compute_metrics(pred):
            labels = pred.label_ids
            preds = pred.predictions.argmax(-1)
            precision, recall, f1, _ = precision_recall_fscore_support(labels, preds, average='macro')
            acc = accuracy_score(labels, preds)
            return {
                'accuracy': acc,
                'f1': f1,
                'precision': precision,
                'recall': recall
            }
    
    trainer = Trainer(
        model=singletaskbert,   # the instantiated Transformers model to be trained
        args=training_args,                  # training arguments, defined above
        train_dataset=train_dataset,         # training dataset
        eval_dataset=valid_dataset,          # evaluation dataset
#         test_dataset=test_dataset,            # test dataset
        compute_metrics=compute_metrics,     # the callback that computes metrics of interest
    )
    trainer.train()

# experiments

## 12

In [None]:
task = list(tasks.keys())[12]
train_baseline(task)

Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertForSequenceClassification: ['cls.predictions.transform.LayerNorm.weight', 'cls.seq_relationship.weight', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.dense.bias', 'cls.seq_relationship.bias', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.bias', 'cls.predictions.decoder.weight']
- This IS expected if you are initializing BertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at

Epoch,Training Loss,Validation Loss,Accuracy,F1,Precision,Recall
1,1.2961,1.299696,0.470036,0.266865,0.404191,0.292907
2,1.2644,1.298557,0.47148,0.271082,0.441297,0.295143
3,1.1017,1.368624,0.429122,0.307349,0.328053,0.309603
4,0.728,1.682379,0.427196,0.306962,0.323563,0.310037
5,0.4482,2.05661,0.419495,0.313106,0.323609,0.311995


***** Running Evaluation *****
  Num examples = 4155
  Batch size = 32
  _warn_prf(average, modifier, msg_start, len(result))
Saving model checkpoint to ../../result/baselines/PASTEL_tod/checkpoint-1039
Configuration saved in ../../result/baselines/PASTEL_tod/checkpoint-1039/config.json
Model weights saved in ../../result/baselines/PASTEL_tod/checkpoint-1039/pytorch_model.bin
***** Running Evaluation *****
  Num examples = 4155
  Batch size = 32
  _warn_prf(average, modifier, msg_start, len(result))
Saving model checkpoint to ../../result/baselines/PASTEL_tod/checkpoint-2078
Configuration saved in ../../result/baselines/PASTEL_tod/checkpoint-2078/config.json
Model weights saved in ../../result/baselines/PASTEL_tod/checkpoint-2078/pytorch_model.bin
Deleting older checkpoint [../../result/baselines/PASTEL_tod/checkpoint-1039] due to args.save_total_limit
***** Running Evaluation *****
  Num examples = 4155
  Batch size = 32
Saving model checkpoint to ../../result/baselines/PASTEL_tod/che

In [None]:
task = list(tasks.keys())[12]
train_baseline(task, freeze_bert=True)

loading configuration file https://huggingface.co/bert-base-uncased/resolve/main/config.json from cache at /home/jz17d/.cache/huggingface/transformers/3c61d016573b14f7f008c02c4e51a366c67ab274726fe2910691e2a761acf43e.37395cee442ab11005bcd270f3c34464dc1704b715b5d7d52b1a461abe3b9e4e
Model config BertConfig {
  "architectures": [
    "BertForMaskedLM"
  ],
  "attention_probs_dropout_prob": 0.1,
  "classifier_dropout": null,
  "gradient_checkpointing": false,
  "hidden_act": "gelu",
  "hidden_dropout_prob": 0.1,
  "hidden_size": 768,
  "initializer_range": 0.02,
  "intermediate_size": 3072,
  "layer_norm_eps": 1e-12,
  "max_position_embeddings": 512,
  "model_type": "bert",
  "num_attention_heads": 12,
  "num_hidden_layers": 12,
  "pad_token_id": 0,
  "position_embedding_type": "absolute",
  "transformers_version": "4.12.3",
  "type_vocab_size": 2,
  "use_cache": true,
  "vocab_size": 30522
}

loading file https://huggingface.co/bert-base-uncased/resolve/main/vocab.txt from cache at /home/j

loading configuration file https://huggingface.co/bert-base-uncased/resolve/main/config.json from cache at /home/jz17d/.cache/huggingface/transformers/3c61d016573b14f7f008c02c4e51a366c67ab274726fe2910691e2a761acf43e.37395cee442ab11005bcd270f3c34464dc1704b715b5d7d52b1a461abe3b9e4e
Model config BertConfig {
  "architectures": [
    "BertForMaskedLM"
  ],
  "attention_probs_dropout_prob": 0.1,
  "classifier_dropout": null,
  "gradient_checkpointing": false,
  "hidden_act": "gelu",
  "hidden_dropout_prob": 0.1,
  "hidden_size": 768,
  "id2label": {
    "0": "LABEL_0",
    "1": "LABEL_1",
    "2": "LABEL_2",
    "3": "LABEL_3",
    "4": "LABEL_4"
  },
  "initializer_range": 0.02,
  "intermediate_size": 3072,
  "label2id": {
    "LABEL_0": 0,
    "LABEL_1": 1,
    "LABEL_2": 2,
    "LABEL_3": 3,
    "LABEL_4": 4
  },
  "layer_norm_eps": 1e-12,
  "max_position_embeddings": 512,
  "model_type": "bert",
  "num_attention_heads": 12,
  "num_hidden_layers": 12,
  "pad_token_id": 0,
  "position_emb

Epoch,Training Loss,Validation Loss,Accuracy,F1,Precision,Recall
1,1.4265,1.421628,0.401444,0.184231,0.165445,0.237363
2,1.4222,1.416608,0.407702,0.184889,0.17316,0.239852
3,1.4145,1.413576,0.409386,0.186414,0.173394,0.241145
4,1.4217,1.413384,0.41083,0.18486,0.17741,0.241053
5,1.4153,1.41203,0.408664,0.18541,0.174045,0.240421


***** Running Evaluation *****
  Num examples = 4155
  Batch size = 32
  _warn_prf(average, modifier, msg_start, len(result))
Saving model checkpoint to ../../result/baselines/PASTEL_tod_freezed/checkpoint-1039
Configuration saved in ../../result/baselines/PASTEL_tod_freezed/checkpoint-1039/config.json
Model weights saved in ../../result/baselines/PASTEL_tod_freezed/checkpoint-1039/pytorch_model.bin
***** Running Evaluation *****
  Num examples = 4155
  Batch size = 32
  _warn_prf(average, modifier, msg_start, len(result))
Saving model checkpoint to ../../result/baselines/PASTEL_tod_freezed/checkpoint-2078
Configuration saved in ../../result/baselines/PASTEL_tod_freezed/checkpoint-2078/config.json
Model weights saved in ../../result/baselines/PASTEL_tod_freezed/checkpoint-2078/pytorch_model.bin
Deleting older checkpoint [../../result/baselines/PASTEL_tod_freezed/checkpoint-1039] due to args.save_total_limit
***** Running Evaluation *****
  Num examples = 4155
  Batch size = 32
  _warn_

## 13

In [None]:
task = list(tasks.keys())[13]
train_baseline(task)

loading configuration file https://huggingface.co/bert-base-uncased/resolve/main/config.json from cache at /home/jz17d/.cache/huggingface/transformers/3c61d016573b14f7f008c02c4e51a366c67ab274726fe2910691e2a761acf43e.37395cee442ab11005bcd270f3c34464dc1704b715b5d7d52b1a461abe3b9e4e
Model config BertConfig {
  "architectures": [
    "BertForMaskedLM"
  ],
  "attention_probs_dropout_prob": 0.1,
  "classifier_dropout": null,
  "gradient_checkpointing": false,
  "hidden_act": "gelu",
  "hidden_dropout_prob": 0.1,
  "hidden_size": 768,
  "initializer_range": 0.02,
  "intermediate_size": 3072,
  "layer_norm_eps": 1e-12,
  "max_position_embeddings": 512,
  "model_type": "bert",
  "num_attention_heads": 12,
  "num_hidden_layers": 12,
  "pad_token_id": 0,
  "position_embedding_type": "absolute",
  "transformers_version": "4.12.3",
  "type_vocab_size": 2,
  "use_cache": true,
  "vocab_size": 30522
}

loading file https://huggingface.co/bert-base-uncased/resolve/main/vocab.txt from cache at /home/j

loading configuration file https://huggingface.co/bert-base-uncased/resolve/main/config.json from cache at /home/jz17d/.cache/huggingface/transformers/3c61d016573b14f7f008c02c4e51a366c67ab274726fe2910691e2a761acf43e.37395cee442ab11005bcd270f3c34464dc1704b715b5d7d52b1a461abe3b9e4e
Model config BertConfig {
  "architectures": [
    "BertForMaskedLM"
  ],
  "attention_probs_dropout_prob": 0.1,
  "classifier_dropout": null,
  "gradient_checkpointing": false,
  "hidden_act": "gelu",
  "hidden_dropout_prob": 0.1,
  "hidden_size": 768,
  "initializer_range": 0.02,
  "intermediate_size": 3072,
  "layer_norm_eps": 1e-12,
  "max_position_embeddings": 512,
  "model_type": "bert",
  "num_attention_heads": 12,
  "num_hidden_layers": 12,
  "pad_token_id": 0,
  "position_embedding_type": "absolute",
  "transformers_version": "4.12.3",
  "type_vocab_size": 2,
  "use_cache": true,
  "vocab_size": 30522
}

loading weights file https://huggingface.co/bert-base-uncased/resolve/main/pytorch_model.bin from 

Epoch,Training Loss,Validation Loss,Accuracy,F1,Precision,Recall
1,0.5605,0.555968,0.714375,0.724823,0.69826,0.753487
2,0.4875,0.569209,0.720288,0.726626,0.709497,0.744604
3,0.3537,0.669095,0.712274,0.716726,0.704768,0.729097
4,0.2059,0.887727,0.702801,0.715671,0.685013,0.749201
5,0.1242,1.239568,0.70284,0.711281,0.690645,0.733188


***** Running Evaluation *****
  Num examples = 51410
  Batch size = 32
Saving model checkpoint to ../../result/baselines/SARC/checkpoint-6427
Configuration saved in ../../result/baselines/SARC/checkpoint-6427/config.json
Model weights saved in ../../result/baselines/SARC/checkpoint-6427/pytorch_model.bin
***** Running Evaluation *****
  Num examples = 51410
  Batch size = 32
Saving model checkpoint to ../../result/baselines/SARC/checkpoint-12854
Configuration saved in ../../result/baselines/SARC/checkpoint-12854/config.json
Model weights saved in ../../result/baselines/SARC/checkpoint-12854/pytorch_model.bin
***** Running Evaluation *****
  Num examples = 51410
  Batch size = 32
Saving model checkpoint to ../../result/baselines/SARC/checkpoint-19281
Configuration saved in ../../result/baselines/SARC/checkpoint-19281/config.json
Model weights saved in ../../result/baselines/SARC/checkpoint-19281/pytorch_model.bin
Deleting older checkpoint [../../result/baselines/SARC/checkpoint-12854] 

In [None]:
task = list(tasks.keys())[13]
train_baseline(task, freeze_bert=True)

loading configuration file https://huggingface.co/bert-base-uncased/resolve/main/config.json from cache at /home/jz17d/.cache/huggingface/transformers/3c61d016573b14f7f008c02c4e51a366c67ab274726fe2910691e2a761acf43e.37395cee442ab11005bcd270f3c34464dc1704b715b5d7d52b1a461abe3b9e4e
Model config BertConfig {
  "architectures": [
    "BertForMaskedLM"
  ],
  "attention_probs_dropout_prob": 0.1,
  "classifier_dropout": null,
  "gradient_checkpointing": false,
  "hidden_act": "gelu",
  "hidden_dropout_prob": 0.1,
  "hidden_size": 768,
  "initializer_range": 0.02,
  "intermediate_size": 3072,
  "layer_norm_eps": 1e-12,
  "max_position_embeddings": 512,
  "model_type": "bert",
  "num_attention_heads": 12,
  "num_hidden_layers": 12,
  "pad_token_id": 0,
  "position_embedding_type": "absolute",
  "transformers_version": "4.12.3",
  "type_vocab_size": 2,
  "use_cache": true,
  "vocab_size": 30522
}

loading file https://huggingface.co/bert-base-uncased/resolve/main/vocab.txt from cache at /home/j

loading configuration file https://huggingface.co/bert-base-uncased/resolve/main/config.json from cache at /home/jz17d/.cache/huggingface/transformers/3c61d016573b14f7f008c02c4e51a366c67ab274726fe2910691e2a761acf43e.37395cee442ab11005bcd270f3c34464dc1704b715b5d7d52b1a461abe3b9e4e
Model config BertConfig {
  "architectures": [
    "BertForMaskedLM"
  ],
  "attention_probs_dropout_prob": 0.1,
  "classifier_dropout": null,
  "gradient_checkpointing": false,
  "hidden_act": "gelu",
  "hidden_dropout_prob": 0.1,
  "hidden_size": 768,
  "initializer_range": 0.02,
  "intermediate_size": 3072,
  "layer_norm_eps": 1e-12,
  "max_position_embeddings": 512,
  "model_type": "bert",
  "num_attention_heads": 12,
  "num_hidden_layers": 12,
  "pad_token_id": 0,
  "position_embedding_type": "absolute",
  "transformers_version": "4.12.3",
  "type_vocab_size": 2,
  "use_cache": true,
  "vocab_size": 30522
}

loading weights file https://huggingface.co/bert-base-uncased/resolve/main/pytorch_model.bin from 

Epoch,Training Loss,Validation Loss,Accuracy,F1,Precision,Recall
1,0.6801,0.672663,0.592531,0.603032,0.587035,0.619925


***** Running Evaluation *****
  Num examples = 51410
  Batch size = 32
Saving model checkpoint to ../../result/baselines/SARC_freezed/checkpoint-6427
Configuration saved in ../../result/baselines/SARC_freezed/checkpoint-6427/config.json
Model weights saved in ../../result/baselines/SARC_freezed/checkpoint-6427/pytorch_model.bin


## 14

In [None]:
task = list(tasks.keys())[14]
train_baseline(task)

In [None]:
task = list(tasks.keys())[14]
train_baseline(task, freeze_bert=True)

## 15

In [None]:
task = list(tasks.keys())[15]
train_baseline(task)

In [None]:
task = list(tasks.keys())[15]
train_baseline(task, freeze_bert=True)

## 16

In [None]:
task = list(tasks.keys())[16]
train_baseline(task)

In [None]:
task = list(tasks.keys())[16]
train_baseline(task, freeze_bert=True)