# Post-Processing

In [1]:
%pip install datasets transformers scikit-learn pandas torch simpletransformers scipy wandb

[0mCollecting datasets
  Downloading datasets-2.16.1-py3-none-any.whl.metadata (20 kB)
Collecting simpletransformers
  Downloading simpletransformers-0.64.5-py3-none-any.whl.metadata (42 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m42.4/42.4 kB[0m [31m2.0 MB/s[0m eta [36m0:00:00[0m
Collecting wandb
  Downloading wandb-0.16.2-py3-none-any.whl.metadata (9.8 kB)
Collecting dill<0.3.8,>=0.3.0 (from datasets)
  Downloading dill-0.3.7-py3-none-any.whl.metadata (9.9 kB)
Collecting multiprocess (from datasets)
  Downloading multiprocess-0.70.15-py310-none-any.whl.metadata (7.2 kB)
Collecting aiohttp (from datasets)
  Downloading aiohttp-3.9.1-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (7.4 kB)
Collecting seqeval (from simpletransformers)
  Downloading seqeval-1.2.2.tar.gz (43 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m43.6/43.6 kB[0m [31m5.6 MB/s[0m eta [36m0:00:00[0m
[?25h  Preparing metadata (setup.py) ... 

In [2]:
import os
os.environ['CUDA_LAUNCH_BLOCKING'] = "1"
os.environ["TOKENIZERS_PARALLELISM"] = "true"

In [3]:
from simpletransformers.classification import (
    ClassificationModel, ClassificationArgs
)
import pandas as pd
import logging
import torch

In [4]:
# Confirm CUDA is available
cuda_available = torch.cuda.is_available()
cuda_available

True

In [5]:
ROOT_CSV_PATH = '/home/nli/data'
OUTPUT_PATH = '/home/nli/outputs'

In [6]:
df_snli_mt_dev = pd.read_csv(os.path.join(ROOT_CSV_PATH, 'combo_nli_new_dev_subset.csv'), delimiter=";", encoding='utf-8')
df_snli_mt_eval = pd.read_csv(os.path.join(ROOT_CSV_PATH, 'combo_nli_new_eval_subset.csv'), delimiter=";", encoding='utf-8')

In [7]:
df_snli_mt_dev = df_snli_mt_dev.sample(frac=1).reset_index(drop=True)
df_snli_mt_eval = df_snli_mt_eval.sample(frac=1).reset_index(drop=True)

In [8]:
cols_to_drop = df_snli_mt_dev.columns[:2]
cols_to_drop

Index(['Unnamed: 0.1', 'Unnamed: 0'], dtype='object')

In [9]:
df_snli_mt_dev.drop(columns=cols_to_drop, axis=1, inplace=True)
df_snli_mt_eval.drop(columns=cols_to_drop, axis=1, inplace=True)

In [10]:
df_snli_mt_dev.head(5)

Unnamed: 0,premise,hypothesis,labels
0,iva jidher ukoll lili għalkemm ir-rwoli tan-ni...,In-nisa f'dawn il-jiem qed jagħmlu ħafna aktar...,neutral
1,It-temp kiber frisk biżżejjed li ma kellhomx g...,It-temp kien 72 grad.,neutral
2,'Imma... it-tnejn intom...' Daniel ħadlu rasu.,‘Imma s-sebgħa minnkom’ qal Danjel.,contradiction
3,hekk ftit differenti u ridt nagħmel din il-ħaġ...,Ma ridt nagħmel xejn bir-riċiklaġġ.,contradiction
4,U m’għandniex injoraw lill-plejers.,Il-plejers m'għandhomx jintesew.,entailment


In [11]:
df_snli_mt_dev.columns = ["text_a","text_b","labels"]
df_snli_mt_eval.columns = ["text_a","text_b","labels"]

In [12]:
def map_to_num(label):
  if label == 'entailment':
    return 0
  elif label == 'contradiction':
    return 2
  else:
    return 1

def map_to_label(num):
  if num == 0:
    return "entailment"
  elif num == 2:
    return "contradiction"
  else:
    return "neutral"

In [13]:
dev_labels = [map_to_num(x) for x in df_snli_mt_dev['labels'].to_list()]
df_snli_mt_dev['labels'] = dev_labels
df_snli_mt_dev["labels"] = df_snli_mt_dev["labels"].astype(int)

In [14]:
eval_labels = [map_to_num(x) for x in df_snli_mt_eval['labels'].to_list()]
df_snli_mt_eval['labels'] = eval_labels
df_snli_mt_eval["labels"] = df_snli_mt_eval["labels"].astype(int)

In [15]:
df_snli_mt_dev.head(3)

Unnamed: 0,text_a,text_b,labels
0,iva jidher ukoll lili għalkemm ir-rwoli tan-ni...,In-nisa f'dawn il-jiem qed jagħmlu ħafna aktar...,1
1,It-temp kiber frisk biżżejjed li ma kellhomx g...,It-temp kien 72 grad.,1
2,'Imma... it-tnejn intom...' Daniel ħadlu rasu.,‘Imma s-sebgħa minnkom’ qal Danjel.,2


In [16]:
df_snli_mt_dev.shape

(92000, 3)

In [17]:
df_snli_mt_eval.shape

(23000, 3)

In [18]:
model_args = ClassificationArgs()
model_args.num_train_epochs = 4
model_args.learning_rate = 5e-5
model_args.train_batch_size = 8
model_args.gradient_accumulation_steps = 4
model_args.fp16 = True
model_args.use_multiprocessing = False
model_args.use_multiprocessing_for_evaluation = False
model_args.use_multiprocessed_decoding = False
model_args.overwrite_output_dir = True
model_args.reprocess_input_data = True
model_args.use_multiprocessing=False
model_args.wandb_project = 'dissertation'
model_args.wandb_kwargs = {'name': "COMBO_NLI (WILLIAMS METHOD) SUBSET"}
# model_args.manual_seed = 4
model_args.max_seq_length = 512
model_args.evaluate_during_training = True
model_args.evaluate_during_training_steps = 20000
model_args.evaluate_during_training_verbose = True
model_args.save_eval_checkpoints = False
model_args.save_steps = 50000
model_args.output_dir = OUTPUT_PATH

In [19]:
model = ClassificationModel("bert", "MLRS/BERTu", num_labels=3, args=model_args, use_cuda=cuda_available)

Some weights of BertForSequenceClassification were not initialized from the model checkpoint at MLRS/BERTu and are newly initialized: ['classifier.bias', 'classifier.weight', 'bert.pooler.dense.weight', 'bert.pooler.dense.bias']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


In [20]:
from sklearn.metrics import f1_score, recall_score, precision_score, accuracy_score

def f1_multiclass(labels, preds):
    return f1_score(labels, preds, average = 'macro')

def recall_multiclass(labels, preds):
    return recall_score(labels, preds, average = 'macro')

def precision_multiclass(labels, preds):
    return precision_score(labels, preds, average = 'macro')

In [None]:
model.train_model(df_snli_mt_dev,eval_df=df_snli_mt_eval, precision=precision_multiclass, f1 = f1_multiclass, recall=recall_multiclass,  acc=accuracy_score)

In [None]:
result, model_outputs, wrong_predictions = model.eval_model(
    df_snli_mt_eval, precision=precision_multiclass, f1 = f1_multiclass, recall=recall_multiclass,  acc=accuracy_score
)

In [None]:
result

In [None]:
print("Accuracy: " + str(100*(1-len(wrong_predictions)/len(model_outputs))) + "%")

In [None]:
predictions, raw_outputs = model.predict(
    [
        [
            "Tifel jilgħab bil-ballun f'nofs ta' triq",
            "Tifel rieqed fis-sodda",
        ]
    ]
)
map_to_label(predictions[0])

In [None]:
predictions, raw_outputs = model.predict(
    [
        [
            "Tifel jilgħab bil-ballun f'nofs ta' triq",
            "Kelb fit-triq",
        ]
    ]
)
map_to_label(predictions[0])