In [1]:
import torch.nn as nn
import torch.nn.functional as F
import torch
from torch.utils.data import Dataset, DataLoader
from torch import optim
from torch.optim import lr_scheduler
# import torchmetrics
from sklearn.metrics import mean_squared_error, accuracy_score
import numpy as np

import collections
import pandas as pd
import json
from tqdm.auto import tqdm, trange

from datasets import load_metric
import datasets
from transformers import AutoConfig, AutoTokenizer, BertModel, RobertaModel
from transformers import BertForSequenceClassification
from transformers import TrainingArguments, Trainer, DataCollatorWithPadding

import matplotlib.pyplot as plt
import os

In [2]:
# https://github.com/huggingface/transformers/issues/5486
# os.environ["TOKENIZERS_PARALLELISM"] = "false-"
os.environ["TOKENIZERS_PARALLELISM"] = "true"

In [3]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
device

device(type='cuda')

In [4]:
with open('../data/xslue/tasks.json', 'r') as f:
    tasks = json.load(f)
tasks

{'CrowdFlower': 13,
 'DailyDialog': 7,
 'EmoBank_Valence': 1,
 'EmoBank_Arousal': 1,
 'EmoBank_Dominance': 1,
 'HateOffensive': 3,
 'PASTEL_age': 8,
 'PASTEL_country': 2,
 'PASTEL_education': 10,
 'PASTEL_ethnic': 10,
 'PASTEL_gender': 3,
 'PASTEL_politics': 3,
 'PASTEL_tod': 5,
 'SARC': 2,
 'SarcasmGhosh': 2,
 'SentiTreeBank': 1,
 'ShortHumor': 2,
 'ShortJokeKaggle': 2,
 'ShortRomance': 2,
 'StanfordPoliteness': 1,
 'TroFi': 2,
 'VUA': 2}

In [5]:
# task and their (train) dataset size 
selected_task = ['PASTEL_country', # 33224
#                  'SARC', # 205645
                 'SarcasmGhosh', # 39780
                 'ShortHumor', # 37801
#                  'ShortJokeKaggle', # 406682
#                  'ShortRomance', # 1902
#                  'TroFi', # 3335
                 'VUA', # 15157
                ] 


In [6]:
class MyMultitaskDataset(Dataset): 
    def __init__(self, selected_task, split, tokenizer):
        self.tokenizer = tokenizer
        self.tasks = selected_task
        self.num_task = len(self.tasks)
        self.num_label = sum([tasks[task] for task in self.tasks])
        # since all df have the same columns, combine them as one
        dfs = []
        for i,task in enumerate(selected_task):
            tsv_file = f'../data/xslue/processed/{split}/{task}.tsv'
            df = pd.read_csv(tsv_file, sep='\t')
            df = df.dropna()
            df = df.reset_index(drop=True)
#             one_hot = np.zeros((len(df), self.num_label), dtype = np.int64)
#             one_hot[np.arange(len(df)), df['label'].values + 2*i] = 1
#             df['label'] = one_hot.tolist()
            df['label'] = df['label'] + 2*i
            dfs.append(df)

        self.df = pd.concat(dfs)
        self.df = self.df.reset_index(drop=True)
    
    def __len__(self):
        return len(self.df)

    def __getitem__(self, idx):
        if torch.is_tensor(idx):
            idx = idx.tolist()
        temp_df = self.df.iloc[idx]
        item = self.tokenizer(temp_df['text'].tolist() if isinstance(temp_df['text'], pd.Series) else temp_df['text'], truncation=True, padding=True, max_length=128)
        item["label"] = temp_df['label'].tolist() if isinstance(temp_df['label'], pd.Series) else [temp_df['label']]
        return item


In [7]:
# GPU memory usage: 6617 - 6680mb with bs 32
# bs 64 gives OOM
# bs 48 GPU memory 7894
batch_size = 32

In [8]:
tokenizer = AutoTokenizer.from_pretrained("bert-base-uncased")

In [None]:
freeze_bert = True

torch.cuda.empty_cache()
train_dataset = MyMultitaskDataset(selected_task, split='train', tokenizer=tokenizer)
test_dataset = MyMultitaskDataset(selected_task, split='test', tokenizer=tokenizer)
valid_dataset = MyMultitaskDataset(selected_task, split='dev', tokenizer=tokenizer)
num_labels = sum([tasks[task] for task in selected_task])

model = BertForSequenceClassification.from_pretrained("bert-base-uncased", num_labels=num_labels) 
if freeze_bert:
    for param in model.bert.parameters():
        param.requires_grad = False
result_folder = '../../result'
model_name = 'combined_classifer'
training_args = TrainingArguments(
    output_dir=f"{result_folder}/{model_name+'_freeze' if freeze_bert else model_name}",   # output directory
    num_train_epochs=5,              # total number of training epochs
    per_device_train_batch_size=batch_size,  # batch size per device during training
    per_device_eval_batch_size=batch_size,   # batch size for evaluation
    warmup_steps=500,                # number of warmup steps for learning rate scheduler
    weight_decay=0.01,               # strength of weight decay
    logging_dir=f"{result_folder}/{model_name+'_freeze' if freeze_bert else model_name}/logs",  # directory for storing logs
#         logging_first_step = True, 
#         logging_steps=500,               # log & save weights each logging_steps
#         save_steps=500,
    evaluation_strategy="epoch",     # evaluate each `logging_steps`
    save_total_limit = 1,
    save_strategy = 'epoch',
    load_best_model_at_end=True, # decide on loss
)

def compute_metrics(pred):
    labels = pred.label_ids
    preds = pred.predictions.argmax(-1)
    precision, recall, f1, _ = precision_recall_fscore_support(labels, preds, average='macro')
    acc = accuracy_score(labels, preds)
    return {
        'accuracy': acc,
        'f1': f1,
        'precision': precision,
        'recall': recall
    }

trainer = Trainer(
    model=model,   # the instantiated Transformers model to be trained
    args=training_args,                  # training arguments, defined above
    train_dataset=train_dataset,         # training dataset
    eval_dataset=valid_dataset,          # evaluation dataset
    tokenizer=AutoTokenizer.from_pretrained("bert-base-uncased"),
#         test_dataset=test_dataset,            # test dataset
    compute_metrics=compute_metrics,     # the callback that computes metrics of interest
)
trainer.train()

Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertForSequenceClassification: ['cls.predictions.transform.dense.weight', 'cls.seq_relationship.weight', 'cls.seq_relationship.bias', 'cls.predictions.bias', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.decoder.weight', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.LayerNorm.weight']
- This IS expected if you are initializing BertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at

Epoch,Training Loss,Validation Loss


In [None]:
freeze_bert = False

torch.cuda.empty_cache()
train_dataset = MyMultitaskDataset(selected_task, split='train', tokenizer=tokenizer)
test_dataset = MyMultitaskDataset(selected_task, split='test', tokenizer=tokenizer)
valid_dataset = MyMultitaskDataset(selected_task, split='dev', tokenizer=tokenizer)
num_labels = sum([tasks[task] for task in selected_task])

model = BertForSequenceClassification.from_pretrained("bert-base-uncased", num_labels=num_labels) 
if freeze_bert:
    for param in model.bert.parameters():
        param.requires_grad = False
result_folder = '../../result'
model_name = 'combined_classifer'
training_args = TrainingArguments(
    output_dir=f"{result_folder}/{model_name+'_freeze' if freeze_bert else model_name}",   # output directory
    num_train_epochs=5,              # total number of training epochs
    per_device_train_batch_size=batch_size,  # batch size per device during training
    per_device_eval_batch_size=batch_size,   # batch size for evaluation
    warmup_steps=500,                # number of warmup steps for learning rate scheduler
    weight_decay=0.01,               # strength of weight decay
    logging_dir=f"{result_folder}/{model_name+'_freeze' if freeze_bert else model_name}/logs",  # directory for storing logs
#         logging_first_step = True, 
#         logging_steps=500,               # log & save weights each logging_steps
#         save_steps=500,
    evaluation_strategy="epoch",     # evaluate each `logging_steps`
    save_total_limit = 1,
    save_strategy = 'epoch',
    load_best_model_at_end=True, # decide on loss
)

def compute_metrics(pred):
    labels = pred.label_ids
    preds = pred.predictions.argmax(-1)
    precision, recall, f1, _ = precision_recall_fscore_support(labels, preds, average='macro')
    acc = accuracy_score(labels, preds)
    return {
        'accuracy': acc,
        'f1': f1,
        'precision': precision,
        'recall': recall
    }

trainer = Trainer(
    model=model,   # the instantiated Transformers model to be trained
    args=training_args,                  # training arguments, defined above
    train_dataset=train_dataset,         # training dataset
    eval_dataset=valid_dataset,          # evaluation dataset
    tokenizer=AutoTokenizer.from_pretrained("bert-base-uncased"),
#         test_dataset=test_dataset,            # test dataset
    compute_metrics=compute_metrics,     # the callback that computes metrics of interest
)
trainer.train()