In [None]:
!pip install transformers==4.1.1
import torch
from tqdm import tqdm
import random
import csv
from transformers import Trainer, TrainingArguments, RobertaForSequenceClassification, RobertaTokenizer
from sklearn.metrics import accuracy_score, precision_recall_fscore_support, classification_report
from sklearn.model_selection import train_test_split
import os
from os import listdir
from os.path import isfile, join
import shutil
from pathlib import Path
import json
import pandas as pd
import shutil

from google.colab import drive
drive.mount('/gdrive')
%cd /gdrive

In [None]:
tokenizer = RobertaTokenizer.from_pretrained('ynie/roberta-large-snli_mnli_fever_anli_R1_R2_R3-nli')

In [None]:
class FEVEROUSDataset(torch.utils.data.Dataset):
    def __init__(self, encodings, labels, use_labels = True):
        self.encodings = encodings
        self.labels = labels
        self.use_labels = use_labels

    def __getitem__(self, idx):
        item = {key: torch.tensor(val[idx]) for key, val in self.encodings.items()}
        if self.use_labels:
            item['labels'] = torch.tensor(self.labels[idx])
        return item

    def __len__(self):
        return len(self.labels)

In [None]:
def compute_metrics(pred):
    labels = pred.label_ids
    preds = pred.predictions.argmax(-1)
    precision, recall, f1, _ = precision_recall_fscore_support(labels, preds, average='micro')
    acc = accuracy_score(labels, preds)
    class_rep = classification_report(labels, preds, target_names= ['NOT ENOUGH INFO', 'SUPPORTS', 'REFUTES'], output_dict=True)
    print(class_rep)
    print("Acc: {}, Recall: {}, Precision: {}, F1: {}".format(acc, recall, precision, f1))
    return {
        'accuracy': acc,
        'f1': f1,
        'precision': precision,
        'recall': recall,
        'class_rep': class_rep
    }

In [None]:
def datasetStats(sentences, labels, useAmbLabel=False):
  stats = {0:0, 1:0, 2:0}
  if useAmbLabel: stats = {0:0, 1:0, 2:0, 3:0}
  for sentence, label in zip(sentences, labels):
    counter = stats[label]
    counter += 1
    stats[label] = counter
  return stats

In [None]:
def readTSVFile(file):
  texts = []
  labels = []
  labelToUse = 0 ## CONTRADICTING / NEI
  if 'uniform_true' in file:
    labelToUse = 1 ## SUPPORTS
  if 'uniform_false' in file:
    labelToUse = 2 ## REFUTES
  with open(file) as fd:
    rd = csv.reader(fd, delimiter="\t", quotechar='"')
    for row in rd:
      texts.append(row[0])
      labels.append(labelToUse)
  return texts, labels

In [None]:
def printPredictions(text_test, labels_test, predictions, labelToFilter=None):
  for text, label, prediction in zip(text_test ,labels_test, predictions):
    if labelToFilter is not None:
      if label == labelToFilter: print(text, label, prediction)
    else:
      print(text, label, prediction)

# FEVEROUS DATASET

In [None]:
def to_sentence(selectedData):
  text = ""
  for pos in range(0, len(selectedData)):
    text += str(selectedData[pos]).strip()
    if pos + 1 < len(selectedData):
      text += " ; "
    else:
      text += "."
  return text.strip()

def to_feverous_input(claim, evidence, evidenceContext, title):
    sequence = [claim]
    sequence += [title]
    sequence += [to_sentence(evidence)]
    sequence += [to_sentence(evidenceContext)]
    return ' </s> '.join(sequence)

def toFeverousLabel(label):
  labelToUse = 0 ## CONTRADICTING / NEI
  if label == 'SUPPORTS':
    labelToUse = 1 ## SUPPORTS
  if label == 'REFUTES':
    labelToUse = 2 ## REFUTES
  return labelToUse


BASE_PATH = "MyDrive/feverous/feverousTest/" ## path to the feverous folder. Available at: https://drive.google.com/drive/folders/1GCemGB3mADdHHQli41B9e0d3Brwn0lUx?usp=sharing
fileTestFeverous = BASE_PATH+"train_with_values.jsonl"

with open(fileTestFeverous, 'r') as json_file:
    json_list = list(json_file)

count = 0
countOK = 0
sentences = []
labels = []
sentencesSet = set()
for json_str in json_list:
    #json_str = json_str.replace("\'", "\"")
    try:
      data = json.loads(json_str)
      parsed = isinstance(data, dict)
      if parsed:
        count += 1
        claim = data['claim']
        label = data['label']
        evidence = data['evidence']
        evidenceContext = data['evidence_ctxt']
        title = data['title']
        if len(evidence) > 0 or len(evidenceContext) > 0:
          #print(claim)
          #print(label)
          #print(evidence)
          #print(evidenceContext)
          #print(title)
          text = to_feverous_input(claim, evidence, evidenceContext, title)
          labelData = toFeverousLabel(label)
          countOK += 1
          sentences.append(text)
          sentencesSet.add(text)
          labels.append(labelData)
    except:
      pass
      #traceback.print_exc()
    
print("Count: ", count)
print("CountOK: ", countOK)
#print(len(sentences), len(labels))

stats = datasetStats(sentences, labels)
print(stats)

## EXTRA NEIs from FEVEROUS

In [None]:
addExtraNEIsFromFEVEROUS = True ##load more NEI from FEVEROUS
extraNEI = 100 ## number of extra NEI to add to the current dataset

BASE_PATH = "MyDrive/feverous/feverousTest/" ## path to the feverous folder
fileTestFeverous = BASE_PATH+"train_with_values_NEI.jsonl"

with open(fileTestFeverous, 'r') as json_file:
    json_list = list(json_file)

count = 0
countOK = 0
sentencesNEI = []
labelsNEI = []
for json_str in json_list:
    #json_str = json_str.replace("\'", "\"")
    try:
      data = json.loads(json_str)
      parsed = isinstance(data, dict)
      if parsed:
        count += 1
        claim = data['claim']
        label = data['label']
        evidence = data['evidence']
        evidenceContext = data['evidence_ctxt']
        title = data['title']
        if len(evidence) > 0 or len(evidenceContext) > 0:
          #print(claim)
          #print(label)
          #print(evidence)
          #print(evidenceContext)
          #print(title)
          text = to_feverous_input(claim, evidence, evidenceContext, title)
          labelData = toFeverousLabel(label)
          countOK += 1
          sentencesNEI.append(text)
          labelsNEI.append(labelData)
    except:
      pass
      #traceback.print_exc()
    
print("Count: ", count)
print("CountOK: ", countOK)

stats = datasetStats(sentencesNEI, labelsNEI)
print(stats)

if addExtraNEIsFromFEVEROUS:
  counterExtra = 0
  for newSentence, newLabel in zip(sentencesNEI,labelsNEI):
    if newSentence not in sentencesSet:
      if counterExtra < extraNEI:
        sentences.append(newSentence)
        labels.append(newLabel)
        counterExtra += 1
  #sentences += sentencesNEI
  #labels += labelsNEI
  pairs = list(zip(sentences, labels))
  random.Random(42).shuffle(pairs)
  sentences, labels = zip(*pairs)

stats = datasetStats(sentences, labels)
print(stats)

In [None]:
text_train_feverous, text_test_feverous, labels_train_feverous, labels_test_feverous = train_test_split(sentences, labels, test_size=0.2, random_state=42, shuffle=True)
print(datasetStats(text_train_feverous, labels_train_feverous))
print(datasetStats(text_test_feverous, labels_test_feverous))

In [None]:
allNeiInTest = False ## set to true if you want to put all current NEI in the test set 
numberOfNeisToAddInTest = 30 ## control the number of NEI examples to add in test
text_train_feverous_filtered = []
labels_train_feverous_filtered = []
if allNeiInTest:
  for text, label in zip(text_train_feverous, labels_train_feverous):
    if label == 0: ## NEI
      text_test_feverous.append(text)
      labels_test_feverous.append(label)
    else:
      text_train_feverous_filtered.append(text)
      labels_train_feverous_filtered.append(label)
  text_train_feverous = text_train_feverous_filtered
  labels_train_feverous = labels_train_feverous_filtered

print(len(text_train_feverous), len(labels_train_feverous))
print(len(text_test_feverous), len(labels_test_feverous))

print(datasetStats(text_train_feverous, labels_train_feverous))
print(datasetStats(text_test_feverous, labels_test_feverous))

if numberOfNeisToAddInTest is not None:
  tmp_text = []
  tmp_labels = []
  counter = 0
  for text, label in zip(text_test_feverous, labels_test_feverous):
    if label == 0: ## NEI
      if counter < numberOfNeisToAddInTest:
        tmp_text.append(text)
        tmp_labels.append(label)
        counter += 1
    else:
      tmp_text.append(text)
      tmp_labels.append(label)
  text_test_feverous = tmp_text
  labels_test_feverous = tmp_labels

print(datasetStats(text_train_feverous, labels_train_feverous))
print(datasetStats(text_test_feverous, labels_test_feverous)) 

#BASELINE RESULTS

In [None]:
import zipfile
zip_ref = zipfile.ZipFile("/gdrive/MyDrive/feverous/feverous_verdict_predictor.zip", 'r')
zip_ref.extractall("/content/")
zip_ref.close()

In [None]:
def model_trainer(model_path, test_dataset):
    # model = BertForSequenceClassification.from_pretrained('bert-base-uncased', num_labels =4)
    model = RobertaForSequenceClassification.from_pretrained(model_path, num_labels =3, return_dict=True)

    #anfs/bigdisc/rmya2/faiss_data/model_verdict_predictor/checkpoint-1500'
    training_args = TrainingArguments(
    output_dir='/content/results',          # output directory
    per_device_eval_batch_size=32,   # batch size for evaluation
    )

    trainer = Trainer(
    model=model,                         # the instantiated ðŸ¤— Transformers model to be trained
    args=training_args,                  # training arguments, defined above
    eval_dataset=test_dataset,          # evaluation dataset
    compute_metrics = compute_metrics,
    )
    return trainer, model

def claim_predictor(model_path, text_test_tokenized, labels_test):
    ### args.model_path is the only required parameter
    #tokenizer = RobertaTokenizer.from_pretrained('ynie/roberta-large-snli_mnli_fever_anli_R1_R2_R3-nli')
    #text_test_tokenized = tokenizer(text_test, padding=True, truncation=True)
    test_dataset = FEVEROUSDataset(text_test_tokenized, labels_test)

    trainer, model = model_trainer(model_path, test_dataset)
    predictions = trainer.predict(test_dataset)
    predictions = predictions.predictions.argmax(-1)
    return predictions

In [None]:
MODEL_PATH_LOCAL = '/content/feverous_verdict_predictor/'
tokenizer = RobertaTokenizer.from_pretrained('ynie/roberta-large-snli_mnli_fever_anli_R1_R2_R3-nli')
text_test_tokenized = tokenizer(text_test_feverous, padding=True, truncation=True)

In [None]:
predictions = claim_predictor(MODEL_PATH_LOCAL, text_test_tokenized, labels_test_feverous)

## Fine Tuning

In [None]:
## PARAMS
shuffleSentences = True
nei_finetuning_size = 40
addExtra = True  ## set to True to extend the current dataset with Pythia Templates Examples
addTemplates = False  ## if true extends data with Pythia Templates examples

BASE_PATH = "MyDrive/feverous/feverousTest/" ## path to the feverous folder
FOLDER_PATH = BASE_PATH + "fine-tuning/"
TEMPLATE_PATH = BASE_PATH + "feverousTemplates/"
sentences, labels = loadData(FOLDER_PATH)

nei_sentences = []
nei_labels = []
for sentence, label in zip(sentences, labels):
  if label == 0:
    nei_sentences.append(sentence)
    nei_labels.append(label)

nei_templates_sentences = []
nei_templates_labels = []
if addTemplates:
  files = [f for f in listdir(TEMPLATE_PATH) if isfile(join(TEMPLATE_PATH, f))]
  for file in files:
    if "contradicting" in file:
      fileToLoad = TEMPLATE_PATH + file
      text, labels = readTSVFile(fileToLoad)
      nei_templates_sentences += text
      nei_templates_labels += labels
  random.Random(42).shuffle(nei_templates_sentences)
  nei_templates_sentences = nei_templates_sentences[0:nei_finetuning_size]
  nei_templates_labels = nei_templates_labels[0:nei_finetuning_size]
  nei_sentences = nei_sentences + nei_templates_sentences
  nei_labels = nei_labels + nei_templates_labels

if shuffleSentences:
  random.Random(42).shuffle(nei_sentences)

sentencesAdd = nei_sentences[0:nei_finetuning_size]
labelAdd = nei_labels[0:nei_finetuning_size]

extended_train_text = text_train_feverous
extendend_train_labels = labels_train_feverous

if addExtra:
  extended_train_text = text_train_feverous + sentencesAdd
  extendend_train_labels = labels_train_feverous + labelAdd
  pairs = list(zip(extended_train_text, extendend_train_labels))
  random.Random(42).shuffle(pairs)
  extended_train_text, extendend_train_labels = zip(*pairs)

print(len(extended_train_text))
print(len(extendend_train_labels))

print(datasetStats(extended_train_text, extendend_train_labels))

In [None]:
def finetune(model_path, train_dataset, test_dataset, epochs = 1):
    model = RobertaForSequenceClassification.from_pretrained(model_path, num_labels =3, return_dict=True)

    training_args = TrainingArguments(
    output_dir='/content/resultsFineTuning',          # output directory
    num_train_epochs=epochs,              # total # of training epochs
    per_device_train_batch_size=2,  # batch size per device during training (we used 2 to avoid memory errors)
    per_device_eval_batch_size=1,   # batch size for evaluation
    warmup_steps=0,                # number of warmup steps for learning rate scheduler
    weight_decay=0.01,               # strength of weight decay
    logging_dir= os.path.join(model_path, 'logs'),            # directory for storing logs
    logging_steps=500,
    save_steps = 5900, #1200,
    learning_rate = 1e-05
    # save_strategy='epoch'
    )

    trainer = Trainer(
    model=model,                         # the instantiated ðŸ¤— Transformers model to be trained
    args=training_args,                  # training arguments, defined above
    eval_dataset=test_dataset,           # evaluation dataset
    train_dataset=train_dataset,         # training dataset
    compute_metrics = compute_metrics,
    )
    return trainer, model

def finetune_claim_predictor(model_path, train_dataset, test_dataset, epochs=1):
    trainer, model = finetune(model_path, train_dataset, test_dataset, epochs)
    trainer.train()
    predictions = trainer.predict(test_dataset)
    predictions = predictions.predictions.argmax(-1)
    return predictions

In [None]:
MODEL_PATH_LOCAL = '/content/feverous_verdict_predictor/'
text_train_tokenized = tokenizer(extended_train_text, padding=True, truncation=True)
train_dataset = FEVEROUSDataset(text_train_tokenized, extendend_train_labels)
test_dataset = FEVEROUSDataset(extended_text_test_tokenized, extended_labels_test_feverous)
epochs = 3
predictions = finetune_claim_predictor(MODEL_PATH_LOCAL, train_dataset, test_dataset, epochs=epochs)

In [None]:
printPredictions(extended_text_test_feverous, extended_labels_test_feverous, predictions, labelToFilter=0)