In [None]:
%pip install datasets transformers scikit-learn pandas torch simpletransformers scipy wandb

In [None]:
import os
os.environ['CUDA_LAUNCH_BLOCKING'] = "1"
os.environ["TOKENIZERS_PARALLELISM"] = "true"

In [None]:
ROOT_CSV_PATH = '/home/nli/data'
OUTPUT_PATH = '/home/nli/outputs'

MODEL_SNLI_FULL = '/home/nli/trained_models/snli_full/'
MODEL_SNLI_LITE = '/home/nli/trained_models/snli_subset/'
MODEL_MNLI_LITE = '/home/nli/trained_models/mnli_subset/'
MODEL_COMBONLI = '/home/nli/trained_models/combo_nli_new_subset/'
MODEL_BASELINE = 'MLRS/BERTu'

MNLI_EVAL = '/home/nli/data/unique_mnli_eval.csv'
SNLI_EVAL = '/home/nli/data/unique_snli_eval.csv'

In [None]:
from simpletransformers.classification import (
    ClassificationModel, ClassificationArgs
)
import pandas as pd
import logging
import torch

cuda_available = torch.cuda.is_available()
# cuda_available


In [None]:
def map_to_num(label):
  if label == 'entailment':
    return 0
  elif label == 'contradiction':
    return 2
  else:
    return 1

def map_to_label(num):
  if num == 0:
    return "entailment"
  elif num == 2:
    return "contradiction"
  else:
    return "neutral"

In [None]:
def format_df(df):
    cols_to_drop = ['Unnamed: 0.1', 'Unnamed: 0']
    df.drop(columns=cols_to_drop, axis=1, inplace=True)
    df.columns = ["text_a","text_b","labels"]
    df_labels = [map_to_num(x) for x in df['labels'].to_list()]
    df['labels'] = df_labels
    df["labels"] = df["labels"].astype(int)
    return df

In [None]:
mnli_eval = format_df(pd.read_csv(MNLI_EVAL, delimiter=";", encoding='utf-8'))
snli_eval = format_df(pd.read_csv(SNLI_EVAL, delimiter=";", encoding='utf-8'))

In [None]:
from sklearn.metrics import f1_score, recall_score, precision_score, accuracy_score

def f1_multiclass(labels, preds):
    return f1_score(labels, preds, average = 'macro')

def recall_multiclass(labels, preds):
    return recall_score(labels, preds, average = 'macro')

def precision_multiclass(labels, preds):
    return precision_score(labels, preds, average = 'macro')

In [None]:
snli_full_model = ClassificationModel('bert', MODEL_SNLI_FULL)
snli_lite_model = ClassificationModel('bert', MODEL_SNLI_LITE)
mnli_lite_model = ClassificationModel('bert', MODEL_MNLI_LITE)
combonli_model = ClassificationModel('bert', MODEL_COMBONLI)


In [None]:
model_args = ClassificationArgs()
model_args.num_train_epochs = 4
model_args.learning_rate = 5e-5
model_args.train_batch_size = 8
model_args.gradient_accumulation_steps = 4
model_args.fp16 = True
model_args.use_multiprocessing = False
model_args.use_multiprocessing_for_evaluation = False
model_args.use_multiprocessed_decoding = False
model_args.overwrite_output_dir = True
model_args.reprocess_input_data = True
model_args.use_multiprocessing=False
model_args.wandb_project = 'dissertation'
# model_args.manual_seed = 4
model_args.max_seq_length = 512
model_args.evaluate_during_training = True
model_args.evaluate_during_training_steps = 50000
model_args.evaluate_during_training_verbose = True
model_args.save_eval_checkpoints = False
model_args.save_steps = 250000
model_args.output_dir = '/home/nli/outputs'
baseline_model = ClassificationModel('bert',"MLRS/BERTu", num_labels=3, args=model_args, use_cuda=cuda_available)

In [None]:
def evaluate_model(model_name, dataset, model, eval_data):
    result, model_outputs, wrong_predictions = model.eval_model(
        eval_data, precision=precision_multiclass, f1 = f1_multiclass, recall=recall_multiclass,  acc=accuracy_score
    )
    df = pd.DataFrame.from_dict([result])
    df['model'] = model_name
    df['dataset'] = dataset
    return df

In [None]:
df_list = []

In [None]:
df_list.append(evaluate_model("SNLI Full","SNLI Dataset", snli_full_model, snli_eval))
df_list.append(evaluate_model("SNLI Full","MNLI Dataset", snli_full_model, mnli_eval))

In [None]:
df_list.append(evaluate_model("SNLI Lite","SNLI Dataset", snli_lite_model, snli_eval))
df_list.append(evaluate_model("SNLI Lite","MNLI Dataset", snli_lite_model, mnli_eval))

In [None]:
df_list.append(evaluate_model("MNLI Lite","SNLI Dataset", mnli_lite_model, snli_eval))
df_list.append(evaluate_model("MNLI Lite","MNLI Dataset", mnli_lite_model, mnli_eval))


In [None]:
df_list.append(evaluate_model("ComboNLI" ,"SNLI Dataset", combonli_model , snli_eval))
df_list.append(evaluate_model("ComboNLI" ,"MNLI Dataset", combonli_model , mnli_eval))

In [None]:
df_list.append(evaluate_model("BERTu"    ,"SNLI Dataset", baseline_model , snli_eval))
df_list.append(evaluate_model("BERTu"    ,"MNLI Dataset", baseline_model , mnli_eval))

In [None]:
df_grouped = pd.concat(df_list)
df_grouped.to_csv('/home/nli/final_test_results.csv', encoding='utf-8',sep=';')