In [None]:
%%capture
!pip install transformers
!pip install wandb
# !pip install pytorch-lightning

# MTL DataAsTask approach
- Pretrain Bert on all data and small output heads to make it learn a shared representation of disagreement
- Create Heads for different tasks: mysogyny, sexism, aggressive language, ...
  - USP: Run one input sentence for different heads, to understand the disagreement across topics
  - Fine tune last layers build on a the fixed pre fine tuned Bert
  - In our case: Different datasets for the tasks 
  - &rarr; 4 heads
- Loss:
  - BCE Loss for hard label and percentage of Soft Label
  - Example: 2 of 6 annotator labeled 1:
    - Hard label 0: Soft_label_1: 0.33 
  - $Loss = (BCELoss(HardLabel) + BCELoss(SoftLabel1) * 2 )/2$
- Output Layers
  - Here: Just a few linear layers with tanh and dropout
Modules
- Only train output layers


Further adjustements for DataAsTask Model:
- Use more complex heads, f.e. Use of MultiHeadAttention
- More datasets for the different headers
- Different choice of heads
  - Combination of KL divergence Loss and BCELoss, both o Soft Labels worked fine as well and focusses more on the disagreement than hard label
- Better fine tuning with respect to LR, Optimizer, Loss (weights) to improve on unbalanced datasets 



**Results**: 


In [None]:
from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [None]:
import wandb
wandb.login()

ERROR:wandb.jupyter:Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.
[34m[1mwandb[0m: Currently logged in as: [33msheuschk[0m ([33mcapture_disagreement[0m). Use [1m`wandb login --relogin`[0m to force relogin


True

In [None]:
from drive.MyDrive.cicl_data.helpers import read_data
# from drive.MyDrive.cicl_data.code import CustomLabelDataset

In [None]:
import numpy as np
import pandas as pd
import torch
from transformers import AutoTokenizer, AutoModelForSequenceClassification, get_scheduler
# from datasets import Dataset
import torch.nn.functional as Fun
from torch.utils.data import Dataset, random_split, DataLoader
from torch.optim import AdamW
import torch.nn as nn

from tqdm.notebook import tqdm

# from pytorch_lightning.trainer.supporters import CombinedLoader

In [None]:
seed = 14
torch.manual_seed(seed)
device = "cuda" if torch.cuda.is_available() else "cpu"

In [None]:
data_dict = read_data()
# df_all = pd.concat([data_dict[k] for k in data_dict.keys()])

In [None]:
def extract_soft_labels(row):
  return row[1]

In [None]:
for k in data_dict.keys():
  data_dict[k]["sl_1s"] = data_dict[k]["soft_list"].apply(extract_soft_labels)

In [None]:
num_epochs = 100
batch_size = 64

run = wandb.init(
    project="MTL_DaT",
    config={
        "epochs": num_epochs,
        "batch_size": batch_size,
        "device": device,
        "Seed": seed
        },
      save_code = True,
      tags = ["mulitBert", "MTL", "task_headers", "heads_with2_linLayer", "4_heads", "CE_Loss", "onlySoft"]
      )

### Pretrained model

In [None]:
from transformers import DistilBertModel, DistilBertTokenizer

In [None]:
# Maybe load from wandb in future
tokenizer = DistilBertTokenizer.from_pretrained('distilbert-base-multilingual-cased') # bert-base-multilingual-cased  / lanwuwei/GigaBERT-v4-Arabic-and-English

In [None]:
class MTLModel(nn.Module):
  def __init__(self, base_model):
    super().__init__()
    self.bert = base_model
    # self.linear = nn.Linear(768, 768)
    self.dropout = nn.Dropout(0.2)
    self.act = nn.Tanh()

    self.linear_hsH = nn.Sequential(nn.Linear(768, 768), nn.Tanh(), nn.Dropout(0.2), nn.Linear(768, 384), nn.Tanh(), nn.Linear(384, 1))
    self.linear_hsS = nn.Sequential(nn.Linear(768, 768), nn.Tanh(), nn.Dropout(0.2), nn.Linear(768, 384), nn.Tanh(), nn.Linear(384, 1))

    self.linear_mdH = nn.Sequential(nn.Linear(768, 768), nn.Tanh(), nn.Dropout(0.2), nn.Linear(768, 384), nn.Tanh(), nn.Linear(384, 1))
    self.linear_mdS = nn.Sequential(nn.Linear(768, 768), nn.Tanh(), nn.Dropout(0.2), nn.Linear(768, 384), nn.Tanh(), nn.Linear(384, 1))
    
    self.linear_abuH = nn.Sequential(nn.Linear(768, 768), nn.Tanh(), nn.Dropout(0.2), nn.Linear(768, 384), nn.Tanh(), nn.Linear(384, 1))
    self.linear_abuS = nn.Sequential(nn.Linear(768, 768), nn.Tanh(), nn.Dropout(0.2), nn.Linear(768, 384), nn.Tanh(), nn.Linear(384, 1))
    
    self.linear_misH = nn.Sequential(nn.Linear(768, 768), nn.Tanh(), nn.Dropout(0.2), nn.Linear(768, 384), nn.Tanh(), nn.Linear(384, 1))
    self.linear_misS = nn.Sequential(nn.Linear(768, 768), nn.Tanh(), nn.Dropout(0.2), nn.Linear(768, 384), nn.Tanh(), nn.Linear(384, 1))
    
    self.sigmoid = nn.Sigmoid()
    # self.softmax = nn.Softmax(dim=1)

  def forward(self, input_ids, attention_mask, task):
    """a linear layer on top of the pooled output (https://huggingface.co/transformers/v3.0.2/model_doc/bert.html#bertforsequenceclassification)"""
    """tasks: HS, MD, Abu, Mis"""

    x = self.bert(input_ids=input_ids, attention_mask=attention_mask)
    # x = self.linear(x.pooler_output)
    # x = x.pooler_output
    hidden_state = x[0]
    x = hidden_state[:, 0]

    # TODO: Exchange to attention modules
    if task.upper() == "HS":
      x_h = self.linear_hsH(x)
      x_s = self.linear_hsS(x)
    elif task.upper() == "MD":
      x_h = self.linear_mdH(x)
      x_s = self.linear_mdS(x)
    elif task.upper() == "ABU":
      x_h = self.linear_abuH(x)
      x_s = self.linear_abuS(x)
    elif task.upper() == "MIS":
      x_h = self.linear_misH(x)
      x_s = self.linear_misS(x)

    x_h =  torch.flatten(self.sigmoid(x_h))
    x_s =  torch.flatten(self.sigmoid(x_s))

    # x = Fun.log_softmax(x, dim=1)  # And use Kl and NLL Loss
    # x = self.softmax(x)  # CE

    return x_h.to(dtype=torch.float64), x_s.to(dtype=torch.float64)

In [None]:
base_model = DistilBertModel.from_pretrained('distilbert-base-multilingual-cased')
model = MTLModel(base_model)

Some weights of the model checkpoint at distilbert-base-multilingual-cased were not used when initializing DistilBertModel: ['vocab_layer_norm.weight', 'vocab_transform.weight', 'vocab_transform.bias', 'vocab_projector.weight', 'vocab_projector.bias', 'vocab_layer_norm.bias']
- This IS expected if you are initializing DistilBertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing DistilBertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


In [None]:
import os

In [None]:
# pretrained_weights = torch.load('/content/drive/MyDrive/cicl_data/model_params/rom5000_1H2N_BCE_05HL.pt') # , map_location=torch.device('cpu'))
art_name = 'model_param:v10'
weight_dir = f'/content/drive/MyDrive/cicl_data/model_params/{art_name}'

if not os.path.exists(weight_dir):
  artifact = run.use_artifact(f"MTL_DBert/{art_name}")  # 2H1N_LRcos_SoftLoss*2  CE=0.408
  weight_dir = artifact.download(weight_dir)

In [None]:
# , map_location=torch.device('cpu'))
model.load_state_dict(torch.load(f"{weight_dir}/model.pt"), strict=False)
model = model.to(device)

In [None]:
# Initialize headers with pretrained linear layer (if needed)
# import copy
"""
with torch.no_grad():
  for name, param in model.named_parameters():
    if "linear_" in name:
      if ".weight" in name:
        param = torch.nn.Parameter(copy.deepcopy(pretrained_weights['linear.weight']))
      elif ".bias" in name:
        param = copy.deepcopy(pretrained_weights['linear.bias'])
      param.requires_grad = False
      print(name)
"""

'\nwith torch.no_grad():\n  for name, param in model.named_parameters():\n    if "linear_" in name:\n      if ".weight" in name:\n        param = torch.nn.Parameter(copy.deepcopy(pretrained_weights[\'linear.weight\']))\n      elif ".bias" in name:\n        param = copy.deepcopy(pretrained_weights[\'linear.bias\'])\n      param.requires_grad = False\n      print(name)\n'

## Prepare Data

In [None]:
class CustomLabelDataset(Dataset):
    def __init__(self, df_all):
        self.text = list(map(self.tokenize_func, df_all["text"]))
        self.soft_labels = df_all["soft_list"] 
        self.hard_labels = df_all["hard_label"]
        self.hard_labels_1h = Fun.one_hot(torch.tensor(df_all['hard_label'].values))
        self.soft_labels_1s = df_all["sl_1s"] # 0.33 of soft labels like {"1": 0.33, "0": 0.67}

    def __len__(self):
        return len(self.text)
      
    def tokenize_func(self, text):
        return tokenizer(text, padding="max_length", truncation=True, max_length=240, add_special_tokens=True)

    def __getitem__(self, idx):
        input = {"attention_mask": torch.tensor(self.text[idx]["attention_mask"]),
                 "input_ids": torch.tensor(self.text[idx]["input_ids"])}
        return input, self.hard_labels_1h[idx], torch.tensor(self.soft_labels[idx]), torch.tensor(self.hard_labels[idx]), torch.tensor(self.soft_labels_1s[idx])


In [None]:
# Init dataset
task_keys = ["HS", "MD", "Abu", "Mis"]
batch_size = 64
train_dataloaders = {}
train_size = 0
for key in data_dict.keys():
  dataset = CustomLabelDataset(data_dict[key])
  train_size += len(dataset)

  dataloader = DataLoader(
      dataset,
      batch_size=batch_size,
      shuffle=True)
  
  if "HS-" in key:
    train_dataloaders["HS"] = dataloader
  elif "MD-" in key:
    train_dataloaders["MD"] = dataloader
  elif "Conv" in key:
    train_dataloaders["Abu"] = dataloader
  elif "MIS" in key:
    train_dataloaders["Mis"] = dataloader

# train_dataloader = CombinedLoader(dataloaders, mode="max_size_cycle")
# train_size = len(train_dataloader)

In [None]:
# initialize Evaluation dataset
data_dict_dev = read_data("dev")
# df_dev = pd.concat([data_dict_dev[k] for k in data_dict_dev.keys()])

# df_dev["sl_1s"] = df_dev["soft_list"].apply(extract_soft_labels)
for k in data_dict_dev.keys():
  data_dict_dev[k]["sl_1s"] = data_dict_dev[k]["soft_list"].apply(extract_soft_labels)

dev_batch_size = 64
dev_dataloaders = {}

for key in data_dict_dev.keys():
  dev_dataset = CustomLabelDataset(data_dict_dev[key])

  dev_dataloader = DataLoader(
      dev_dataset,
      batch_size=dev_batch_size)
  if "HS-" in key:
    dev_dataloaders["HS"] = dev_dataloader
  elif "MD-" in key:
    dev_dataloaders["MD"] = dev_dataloader
  elif "Conv" in key:
    dev_dataloaders["Abu"] = dev_dataloader
  elif "MIS" in key:
    dev_dataloaders["Mis"] = dev_dataloader
  
# Use dict instead of CombinedLoader for evaluation
# dev_dataloader = CombinedLoader(dev_dataloaders, mode="max_size_cycle")


In [None]:
data_dict_dev.keys(), dev_dataloaders.keys()

(dict_keys(['ArMIS', 'MD-Agreement', 'ConvAbuse', 'HS-Brexit']),
 dict_keys(['Mis', 'MD', 'Abu', 'HS']))

## Optimization

In [None]:
# Loss
# nll_loss = nn.CrossEntropyLoss()
# kl_loss = nn.KLDivLoss(reduction="batchmean")
# loss_fn = nn.CrossEntropyLoss()
loss_fn = nn.BCELoss()

## Before Training

[]

In [None]:
all_tasks = ['Mis', 'MD', 'Abu', 'HS']

In [None]:
# from drive.MyDrive.cicl_data.helpers import ce_eval_func
def ce_eval_func_task(model, eval_dataloader, task, epsilon=1e-12, device="cuda"):
  model.eval()
  cross_error = 0
  for i, batch in enumerate(eval_dataloader):
    input_ids = batch[0]["input_ids"].to(device, dtype=torch.long)
    attention_mask = batch[0]["attention_mask"].to(device, dtype=torch.long)
    soft_labels = batch[2].to(device, dtype=torch.float64)

    with torch.no_grad():
      _, pred = model(input_ids, attention_mask=attention_mask, task=task)

    pred = pred.reshape(len(pred), 1)
    probabilities = torch.cat((1-pred, pred), dim=-1)
    # probabilities = torch.softmax(pred, axis=-1)
    
    predictions = torch.clip(probabilities, epsilon, 1. - epsilon)
    cross_error += -torch.sum(soft_labels * torch.log(predictions + 1e-9))
  ce_score = cross_error / len(eval_dataloader.dataset)
  # print(k, ce_score)
  return ce_score.item()


# from drive.MyDrive.cicl_data.helpers import ce_eval_func
def ce_eval_func(model, eval_dataloader, epsilon=1e-12, device="cuda"):
  model.eval()
  all_ce_scores = []

  for task in eval_dataloader.keys():
    cross_error = 0
    for i, batch in enumerate(eval_dataloader[task]):
      input_ids = batch[0]["input_ids"].to(device, dtype=torch.long)
      attention_mask = batch[0]["attention_mask"].to(device, dtype=torch.long)
      soft_labels = batch[2].to(device, dtype=torch.float64)

      with torch.no_grad():
        _, pred = model(input_ids, attention_mask=attention_mask, task=task) #
      pred = pred.reshape(len(pred), 1)
      probabilities = torch.cat((1-pred, pred), dim=-1)
      # probabilities = torch.softmax(pred, axis=-1)
      
      predictions = torch.clip(probabilities, epsilon, 1. - epsilon)
      cross_error += -torch.sum(soft_labels * torch.log(predictions + 1e-9))
    ce_score = cross_error / len(eval_dataloader[task].dataset)
    # print(k, ce_score)
    all_ce_scores.append(ce_score.item())
  return all_ce_scores




In [None]:
all_ce_before = []
for current_task in all_tasks:
  ce_before = ce_eval_func_task(model, dev_dataloaders[current_task], current_task, device=device)
  print(f"{current_task} - CE before training: {ce_before}")
  wandb.log({f"eval/ce_before_{current_task}": ce_before})
  all_ce_before.append(ce_before)

wandb.log({"eval/ce_before_training": np.array(all_ce_before).mean()})
print(f"Mean CE before training: {np.array(all_ce_before).mean()}")

Mis - CE before training: 0.6914898645714476
MD - CE before training: 0.6797025561632969
Abu - CE before training: 0.6466916819287633
HS - CE before training: 0.7137583381067459
Mean CE before training: 0.6829106101925635


## Training

In [None]:
from torch.optim import Adam
from transformers import get_cosine_with_hard_restarts_schedule_with_warmup

In [None]:
# Optimizer
num_epochs = 100
LR = 1e-5
optimizer = Adam(model.parameters(), lr=LR)

# num_training_steps = num_epochs * len(train_dataloader)
#lr_scheduler = get_scheduler( name="constant", optimizer=optimizer, num_warmup_steps=0, num_training_steps=num_training_steps)

In [None]:
def enable_grad_of_head(model, task):
  for name, param in model.named_parameters():
    if f"_{task.lower()}" in name:
      param.requires_grad = True
    else:
      param.requires_grad = False

In [None]:
for iteration, task in enumerate(all_tasks):
  # Train
  print(f"Start training: {task}")
  last_ce = 10
  smallest_ce = 10
  eval_counter = False
  task_dataloader = train_dataloaders[task]
  
  # Load best model of last task
  if iteration != 0:
    model.load_state_dict(torch.load('model.pt'))

  # Set params.requires_grad = False for unused heads?
  enable_grad_of_head(model, task)
  optimizer = AdamW(model.parameters(), lr=LR)
  train_steps = int(num_epochs * len(task_dataloader) * 0.95)
  warmup_steps = num_epochs * len(task_dataloader) - train_steps
  lr_scheduler = get_cosine_with_hard_restarts_schedule_with_warmup(optimizer, num_warmup_steps=warmup_steps, num_training_steps=train_steps, num_cycles=num_epochs/10)

  for e in range(num_epochs):
    model.train()
    loss_batches = 0
    epoch_loss = 0
    epoch_len = len(task_dataloader)
    
    log_n_batches = int(epoch_len/2)

    for i, batch in enumerate(task_dataloader):
      optimizer.zero_grad()
      loss = 0
      
      input_ids = batch[0]["input_ids"].to(device, dtype=torch.long)
      attention_mask = batch[0]["attention_mask"].to(device, dtype=torch.long)
      # hard_label_1h = batch[1].to(device)
      # soft_labels = batch[2].to(device, dtype=torch.float64)
      hard_label = batch[3].to(device, dtype=torch.float64)
      soft_labels_1 = batch[4].to(device, dtype=torch.float64)

      # predict
      pred_hl, pred_sl = model(input_ids=input_ids, attention_mask=attention_mask, task=task)

      # Loss
      loss = loss_fn(pred_sl, soft_labels_1)*2
      loss += loss_fn(pred_hl, hard_label) 
      loss = loss/2

      loss.backward()
      optimizer.step()
      lr_scheduler.step()

      # Log
      loss_batches += loss.item()
      epoch_loss += loss.item()


      if i % log_n_batches == 0:
        if i != 0:
          # print(f"{e+1} - {task}: Last {log_n_batches} batches avg loss: {loss_batches/log_n_batches:>7f}  [{i}/{epoch_len}]")
          wandb.log({f"train/loss_over_batches_{task}": loss_batches/log_n_batches})
        loss_batches = 0
    
    epoch_loss /= i  
    print(f"{task} - Epoch [{e+1}/{num_epochs}] mean loss: {epoch_loss:>6f}")
    wandb.log({f"train/epoch_loss_{task}": epoch_loss})

    # Eval error
    ce = ce_eval_func_task(model, dev_dataloaders[task], task, device=device)
    print(f"{task} - Epoch [{e+1}/{num_epochs}] Mean CE  : {ce:>6f}")
    wandb.log({f"eval/epoch_ce_{task}": {task: ce}})

    # Stop after Eval CE raises 2 times in a row (Simple early stopping)
    if ce > last_ce:
      if eval_counter is True:
        print("Interrupt: Eval Error is raising")
        break;
      eval_counter = True
    elif ce < smallest_ce:
      torch.save(model.state_dict(), f'model.pt')
      print(f"{task} - Epoch [{e+1}/{num_epochs}] Save model state")
      eval_counter = False
      smallest_ce = ce
    
    last_ce = ce
  print(f"{task}: Smallest CE: {smallest_ce}")
  print("-----------------------------------------")
  print("\n \n")



Start training: Mis
Mis - Epoch [1/100] mean loss: 0.791987
Mis - Epoch [1/100] Mean CE  : 0.615513
Mis - Epoch [1/100] Save model state
Mis - Epoch [2/100] mean loss: 0.776747
Mis - Epoch [2/100] Mean CE  : 0.616267
Mis - Epoch [3/100] mean loss: 0.778867
Mis - Epoch [3/100] Mean CE  : 0.617622
Interrupt: Eval Error is raising
Mis: Smallest CE: 0.615513131456999
-----------------------------------------

 

Start training: MD
MD - Epoch [1/100] mean loss: 0.845701
MD - Epoch [1/100] Mean CE  : 0.583503
MD - Epoch [1/100] Save model state
MD - Epoch [2/100] mean loss: 0.752860
MD - Epoch [2/100] Mean CE  : 0.544265
MD - Epoch [2/100] Save model state
MD - Epoch [3/100] mean loss: 0.652463
MD - Epoch [3/100] Mean CE  : 0.528059
MD - Epoch [3/100] Save model state
MD - Epoch [4/100] mean loss: 0.599361
MD - Epoch [4/100] Mean CE  : 0.531350
MD - Epoch [5/100] mean loss: 0.581121
MD - Epoch [5/100] Mean CE  : 0.532356
Interrupt: Eval Error is raising
MD: Smallest CE: 0.5280594349869607
--

Model dependent improvements:


## Evaluation

In [None]:
model_best = MTLModel(base_model)
model_best.load_state_dict(torch.load('model.pt'))
model_best = model_best.to(device)

In [None]:
# Final Cross Entropy Error
cross_error = ce_eval_func(model_best, dev_dataloaders, device=device)
print(f"CE error: {np.array(cross_error).mean()}")
wandb.log({"dev/ce": np.array(cross_error).mean()})

CE error: 0.42402365965746025


In [None]:
model.eval()
cross_error = 0
epsilon = 1e-12
for i, batch in enumerate(dev_dataloaders["HS"]):
  input_ids = batch[0]["input_ids"].to(device, dtype = torch.long)
  attention_mask = batch[0]["attention_mask"].to(device, dtype = torch.long)
  soft_labels = batch[2].to(device)

  with torch.no_grad():
    _, pred = model_best(input_ids, attention_mask=attention_mask, task="HS")
  pred = pred.reshape(len(pred), 1)
  probabilities = torch.cat((1-pred, pred), dim=-1)
  
  predictions = torch.clip(probabilities, epsilon, 1. - epsilon)
  cross_error += -torch.sum(soft_labels * torch.log(predictions + 1e-9))
  break



In [None]:
print(predictions)
print(soft_labels)

tensor([[0.9606, 0.0394],
        [0.9262, 0.0738],
        [0.7549, 0.2451],
        [0.9601, 0.0399],
        [0.9644, 0.0356],
        [0.9528, 0.0472],
        [0.9640, 0.0360],
        [0.8209, 0.1791],
        [0.7713, 0.2287],
        [0.8993, 0.1007],
        [0.8088, 0.1912],
        [0.8553, 0.1447],
        [0.9236, 0.0764],
        [0.5227, 0.4773],
        [0.8554, 0.1446],
        [0.9593, 0.0407],
        [0.7671, 0.2329],
        [0.7185, 0.2815],
        [0.9144, 0.0856],
        [0.9609, 0.0391],
        [0.9374, 0.0626],
        [0.8648, 0.1352],
        [0.9681, 0.0319],
        [0.8568, 0.1432],
        [0.9384, 0.0616],
        [0.8917, 0.1083],
        [0.7087, 0.2913],
        [0.8558, 0.1442],
        [0.6817, 0.3183],
        [0.9553, 0.0447],
        [0.9663, 0.0337],
        [0.9444, 0.0556],
        [0.7789, 0.2211],
        [0.8864, 0.1136],
        [0.9617, 0.0383],
        [0.9393, 0.0607],
        [0.2098, 0.7902],
        [0.9725, 0.0275],
        [0.9

In [None]:
pred_hl = pred.reshape(len(pred_hl), 1)
pred_hl = torch.cat((1-pred_hl, pred_hl), dim=-1)
print(pred_hl)

### Finish

In [None]:
# torch.save(model.state_dict(), 'model.pt')
# model.load_state_dict(torch.load(PATH), strict=False)
artifact = wandb.Artifact(name='model_param', type='model')
artifact.add_file(local_path="model.pt")
run.log_artifact(artifact);

In [None]:
wandb.finish()

## TSV files - Practice phase

In [None]:
import os
import csv

In [None]:
filepaths = ["/content/ArMIS_results.tsv", "/content/ConvAbuse_results.tsv", "/content/HS-Brexit_results.tsv", "/content/MD-Agreement_results.tsv"]
epsilon = 1e-12

for fp in filepaths:
  if os.path.exists(fp):
    os.remove(fp)

for key in data_dict_dev.keys():
  data_dict_dev[key]["sl_1s"] = data_dict_dev[key]["soft_list"].apply(extract_soft_labels)
  tsv_dataset = CustomLabelDataset(data_dict_dev[key])
  tsv_dataloader = DataLoader(tsv_dataset, shuffle=False, batch_size=1)
  filepath_write = f"/content/{key}_results.tsv"
  if "HS-" in key:
    task = "HS"
  elif "MD-" in key:
    task = "MD"
  elif "Conv" in key:
    task = "Abu"
  elif "MIS" in key:
    task = "Mis"
    
  with open(filepath_write, 'w', newline='') as tsvfile:
      writer = csv.writer(tsvfile, delimiter='\t', lineterminator='\n')
      for i, batch in enumerate(tqdm(tsv_dataloader, 0)):
        input_ids = batch[0]["input_ids"].to(device, dtype = torch.long)
        attention_mask = batch[0]["attention_mask"].to(device, dtype = torch.long)
        token_type_ids = batch[0]["token_type_ids"].to(device, dtype = torch.long)

        with torch.no_grad():
          pred = model(input_ids, attention_mask=attention_mask, token_type_ids=token_type_ids, task=task)
        # logits = pred.logits
        # probability = torch.softmax(pred, axis=-1)
        pred = pred.reshape(len(pred), 1)
        probability = torch.cat((1-pred, pred), dim=-1)
        # probability = torch.softmax(pred, axis=-1)
        prediction = torch.round(pred)
        probability = torch.clip(probability, epsilon, 1. - epsilon) # Really necessary?
        writer.writerow([int(prediction[0].item()), probability[0][0].item(), probability[0][1].item()])


In [None]:
from zipfile import ZipFile

filepath = "res.zip" 

if os.path.exists(filepath):
    os.remove(filepath)

#loop over filepath names throws an string index out of range for whatever reason(also can't use content here, not sure why)
with ZipFile(filepath, 'w') as zipObj:
  zipObj.write("MD-Agreement_results.tsv")
  zipObj.write("ArMIS_results.tsv")
  zipObj.write("HS-Brexit_results.tsv")
  zipObj.write("ConvAbuse_results.tsv")

In [None]:
from google.colab import files
files.download("res.zip")

### TSV Files - Evaluation Phase

In [None]:
def read_data_test(usage="train"):
  """@usage: 'train'/'dev'"""
  data = dict()
  datasets = ['ArMIS','MD-Agreement','ConvAbuse', 'HS-Brexit'] 

  for current_dataset in datasets:
    data[current_dataset] = {}
    current_file = '/content/drive/MyDrive/cicl_data/' + current_dataset + '_dataset/' + current_dataset + '_'+ usage +'.json' 
    data[current_dataset] = json.load(open(current_file, 'r', encoding = 'UTF-8'))                                   
 
  def extract_soft_labels(row):
    return list(row.values())

  def transform_data(data, name):
    data = data[name]
    df = pd.DataFrame(data).transpose()
    df = df.astype({"hard_label": int}, errors='ignore') 
    df['data_set'] = name
    df["soft_list"] = df["soft_label"].apply(extract_soft_labels)
    return df

  dfs = [transform_data(data, k) for k in data.keys()]

  data_dict = {'ArMIS': dfs[0],'MD-Agreement': dfs[1],'ConvAbuse': dfs[2], 'HS-Brexit': dfs[3]}
  # df = pd.concat(dfs)
  return data_dict

In [None]:
class TestLabelDataset(Dataset):
    def __init__(self, df_all):
        self.text = list(map(self.tokenize_func, df_all["text"]))

    def __len__(self):
        return len(self.text)
      
    def tokenize_func(self, text):
        return tokenizer(text, truncation=True, max_length=MaxLen, padding="max_length", add_special_tokens=True)

    def __getitem__(self, idx):
        input = {"attention_mask": torch.tensor(self.text[idx]["attention_mask"]),
                 "input_ids": torch.tensor(self.text[idx]["input_ids"])}
        return input


In [None]:
data_dict_test = read_data_test("test")

In [None]:
MaxLen = 240

In [None]:
filepaths = ["/content/ArMIS_results.tsv", "/content/ConvAbuse_results.tsv", "/content/HS-Brexit_results.tsv", "/content/MD-Agreement_results.tsv"]
epsilon = 1e-12

for fp in filepaths:
  if os.path.exists(fp):
    os.remove(fp)

In [None]:
filepaths = ["/content/ArMIS_results.tsv", "/content/ConvAbuse_results.tsv", "/content/HS-Brexit_results.tsv", "/content/MD-Agreement_results.tsv"]
epsilon = 1e-12

for fp in filepaths:
  if os.path.exists(fp):
    os.remove(fp)

for key in data_dict_test.keys():
  # data_dict_test[key]["sl_1s"] = data_dict_test[key]["soft_list"].apply(extract_soft_labels)
  tsv_dataset = TestLabelDataset(data_dict_test[key])
  tsv_dataloader = DataLoader(tsv_dataset, shuffle=False, batch_size=1)
  filepath_write = f"/content/{key}_results.tsv"
  
  if "HS-" in key:
    task = "HS"
  elif "MD-" in key:
    task = "MD"
  elif "Conv" in key:
    task = "Abu"
  elif "MIS" in key:
    task = "Mis"

  with open(filepath_write, 'w', newline='') as tsvfile:
      writer = csv.writer(tsvfile, delimiter='\t', lineterminator='\n')
      for i, batch in enumerate(tqdm(tsv_dataloader, 0)):
        input_ids = batch["input_ids"].to(device, dtype = torch.long)
        attention_mask = batch["attention_mask"].to(device, dtype = torch.long)

        with torch.no_grad():
          predH, predS = best_model(input_ids, attention_mask=attention_mask, task=task)
        pred = predS.reshape(len(predS), 1)
        probability = torch.cat((1-predS, predS), dim=-1)
        # probability = torch.softmax(pred2, axis=-1)
        prediction = torch.round(predH)
        probability = torch.clip(probability, epsilon, 1. - epsilon)
        writer.writerow([int(prediction[0].item()), probability[0].item(), probability[1].item()])


In [None]:
from zipfile import ZipFile

filepath = "res_test.zip" 

if os.path.exists(filepath):
    os.remove(filepath)

#loop over filepath names throws an string index out of range for whatever reason(also can't use content here, not sure why)
with ZipFile(filepath, 'w') as zipObj:
  zipObj.write("MD-Agreement_results.tsv")
  zipObj.write("ArMIS_results.tsv")
  zipObj.write("HS-Brexit_results.tsv")
  zipObj.write("ConvAbuse_results.tsv")

In [None]:
from google.colab import files
files.download(filepath)