In [1]:
import pandas as pd
import time, datetime, numpy as np

from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, precision_recall_fscore_support
from collections import Counter

from transformers import BertTokenizer, BertForSequenceClassification
from transformers import AdamW, get_linear_schedule_with_warmup
import torch
from torch.utils.data import Dataset, DataLoader
from transformers import AdamW, get_linear_schedule_with_warmup

## Mounting Google Drive to Collab

In [2]:
from google.colab import drive
drive.mount('/content/drive')

df = pd.read_csv('/content/drive/MyDrive/[CS4248] Project Folder/data/esnli_train.csv')
val = pd.read_csv('/content/drive/MyDrive/[CS4248] Project Folder/data/esnli_val.csv')
test = pd.read_csv('/content/drive/MyDrive/[CS4248] Project Folder/data/esnli_test.csv')

Mounted at /content/drive


In [4]:
df.head()

Unnamed: 0,pairID,gold_label,Sentence1,Sentence2,Explanation_1,WorkerId,Sentence1_marked_1,Sentence2_marked_1,Sentence1_Highlighted_1,Sentence2_Highlighted_1
0,3416050480.jpg#4r1n,neutral,A person on a horse jumps over a broken down a...,A person is training his horse for a competition.,the person is not necessarily training his horse,AF0PI3RISB5Q7,A person on a horse jumps over a broken down a...,A person is *training* *his* *horse* for a co...,{},345
1,3416050480.jpg#4r1c,contradiction,A person on a horse jumps over a broken down a...,"A person is at a diner, ordering an omelette.",One cannot be on a jumping horse cannot be a d...,A36ZT2WFIA2HMF,A person *on* *a* *horse* *jumps* over a brok...,"A person *is* *at* *a* *diner,* *ordering* an...",4235,25436
2,3416050480.jpg#4r1e,entailment,A person on a horse jumps over a broken down a...,"A person is outdoors, on a horse.",a broken down airplane is outdoors,A2GK75ZQTX2RDZ,A person on a horse jumps over *a* *broken* *...,"A person is *outdoors,* on a horse.",89107,3
3,2267923837.jpg#2r1n,neutral,Children smiling and waving at camera,They are smiling at their parents,Just because they are smiling and waving at a ...,A18TOIDG32QICP,Children smiling and waving at camera,They are smiling *at* *their* *parents*,{},534
4,2267923837.jpg#2r1e,entailment,Children smiling and waving at camera,There are children present,The children must be present to see them smili...,AEX0YE6TUZRHT,*Children* *smiling* *and* *waving* at camera,There are children *present*,0132,3


## Utility Functions

In [9]:
def format_time(elapsed):
    '''
    Takes a time in seconds and returns a string hh:mm:ss
    '''
    elapsed_rounded = int(round((elapsed)))
    return str(datetime.timedelta(seconds=elapsed_rounded))

def select_cols(df, col_list):
    '''
    Select columns from a dataframe
    '''
    return df[col_list]

## Data Pre-processing Utility Functions

In [10]:
def combine_sentences(df, col_list):

    results_df = df.copy()
    results_df['combined_text'] = '[CLS]' + results_df[col_list].astype(str).agg('[SEP]'.join, axis=1)
    return results_df

Train/Test Input Data Handling

In [11]:
target_cols = ['Sentence1', 'Sentence2', 'Explanation_1', 'gold_label'] # Premise, Hypothesis, Explanation

df = select_cols(df, target_cols)
val = select_cols(val, target_cols)
test_df = select_cols(test, target_cols)

Unnamed: 0,Sentence1,Explanation_1,Sentence2,gold_label
0,A person on a horse jumps over a broken down a...,the person is not necessarily training his horse,A person is training his horse for a competition.,neutral
1,A person on a horse jumps over a broken down a...,One cannot be on a jumping horse cannot be a d...,"A person is at a diner, ordering an omelette.",contradiction
2,A person on a horse jumps over a broken down a...,a broken down airplane is outdoors,"A person is outdoors, on a horse.",entailment
3,Children smiling and waving at camera,Just because they are smiling and waving at a ...,They are smiling at their parents,neutral
4,Children smiling and waving at camera,The children must be present to see them smili...,There are children present,entailment


In [12]:
df = combine_sentences(df, target_cols[:-1])
val = combine_sentences(val, target_cols[:-1])
test_df = combine_sentences(test_df, target_cols[:-1])

lables = {
    'entailment': 0,
    'neutral': 1,
    'contradiction': 2
}

df['labels'] = df['gold_label'].map(lables)
val['labels'] = val['gold_label'].map(lables)
test_df['labels'] = test_df['gold_label'].map(lables)

In [13]:
"""DF now contains combined_text and labels columns"""

df.head()

Unnamed: 0,Sentence1,Explanation_1,Sentence2,gold_label,combined_text,labels
0,A person on a horse jumps over a broken down a...,the person is not necessarily training his horse,A person is training his horse for a competition.,neutral,[CLS]A person on a horse jumps over a broken d...,1
1,A person on a horse jumps over a broken down a...,One cannot be on a jumping horse cannot be a d...,"A person is at a diner, ordering an omelette.",contradiction,[CLS]A person on a horse jumps over a broken d...,2
2,A person on a horse jumps over a broken down a...,a broken down airplane is outdoors,"A person is outdoors, on a horse.",entailment,[CLS]A person on a horse jumps over a broken d...,0
3,Children smiling and waving at camera,Just because they are smiling and waving at a ...,They are smiling at their parents,neutral,[CLS]Children smiling and waving at camera[SEP...,1
4,Children smiling and waving at camera,The children must be present to see them smili...,There are children present,entailment,[CLS]Children smiling and waving at camera[SEP...,0


In [14]:
"""Example of a combined_text"""

df['combined_text'][0]

'[CLS]A person on a horse jumps over a broken down airplane.[SEP]the person is not necessarily training his horse[SEP]A person is training his horse for a competition.'