In [78]:
from models import NeuralNetwork, LogisticRegression
import pandas as pd
import torch
import load_data
import pickle as pkl
import torch.nn as nn

In [127]:
pd.options.display.max_rows=None
pd.options.display.max_rows=None

## Load Best Model

In [129]:
BEST_MODEL = 'models/20000_50_SUB_neural-net.pt'
VOCAB_SIZE = 20000
BATCH_SIZE = 100
EMBED_DIM = 50
NUM_CLASS = 20
HIDDEN_DIM = 100
CAT_MODE = "SUB"

label2idx = {'contradiction': 0, 'neutral': 1, 'entailment': 2}
idx2label = {0: 'contradiction', 1: 'neutral', 2: 'entailment'}

In [136]:
snli_val = pd.read_csv("data/snli_val.tsv", sep='\t')[:BATCH_SIZE]
val_data = load_data.prepare_data(snli_val)
id2token = pkl.load(open('pickle/'+str(VOCAB_SIZE)+'_id2token.pkl', 'rb'))
token2id = pkl.load(open('pickle/'+str(VOCAB_SIZE)+'_token2id.pkl', 'rb'))
indiced_val_data, val_target = load_data.token2index_dataset(val_data, token2id, 30)
val_dataset = load_data.SNLIDataset(indiced_val_data, val_target)
val_loader = torch.utils.data.DataLoader(dataset=val_dataset, 
                                           batch_size=BATCH_SIZE,
                                           collate_fn=load_data.SNLI_collate_func,
                                           shuffle=False)

In [137]:
num_embed = len(set(id2token))
emb_layer = nn.Embedding(num_embed, EMBED_DIM)
model = NeuralNetwork(emb_layer, EMBED_DIM, NUM_CLASS, HIDDEN_DIM, CAT_MODE)
model.load_state_dict(torch.load(BEST_MODEL))
model.eval()

NeuralNetwork(
  (embed): Embedding(18764, 50)
  (linear1): Linear(in_features=50, out_features=100, bias=True)
  (linear2): Linear(in_features=100, out_features=20, bias=True)
)

## Helper functions 

In [167]:
def pred_proba(prem, hyp, len_prem, len_hyp):
    with torch.no_grad():
        raw = model(prem, hyp, len_prem, len_hyp)
    return nn.Softmax()(raw)

def pred(prem, hyp, len_prem, len_hyp):
    proba = pred_proba(prem, hyp, len_prem, len_hyp)
    return proba.max(1)

def idx_to_string(idx_arr, idx2token):
    return ''.join([idx2token[idx] for idx in idx_arr])

def print_row(row):
    print('Premise: {}'.format(row.sentence1))
    print('Hypothesis: {}'.format(row.sentence2))
    print('Label: {}'.format(row.label))
    print('Pred: {}'.format(row.pred))

## Load batch of data

In [139]:
prem, len_prem, hyp, len_hyp, labels = next(iter(val_loader))

## Make Predictions

In [140]:
# preds contains confidence and class of predictions
preds = pred(prem, hyp, len_prem, len_hyp)

  after removing the cwd from sys.path.


In [156]:
results = snli_val
results['pred'] = [idx2label[idx.item()] for idx in preds.indices]
results['confidence'] = preds.values
results['correct'] = results.label == results.pred
results.head()

Unnamed: 0,sentence1,sentence2,label,pred,confidence,correct
0,"Three women on a stage , one wearing red shoes...",There are two women standing on the stage,contradiction,entailment,0.982565,False
1,"Four people sit on a subway two read books , o...","Multiple people are on a subway together , wit...",entailment,entailment,0.999467,True
2,bicycles stationed while a group of people soc...,People get together near a stand of bicycles .,entailment,entailment,0.734481,True
3,Man in overalls with two horses .,a man in overalls with two horses,entailment,entailment,0.984802,True
4,Man observes a wavelength given off by an elec...,The man is examining what wavelength is given ...,entailment,entailment,0.808949,True


## Incorrect Predictions

In [159]:
incorrect = results[results.correct == False]
incorrect.head()

Unnamed: 0,sentence1,sentence2,label,pred,confidence,correct
0,"Three women on a stage , one wearing red shoes...",There are two women standing on the stage,contradiction,entailment,0.982565,False
5,Two people are in a green forest .,The forest is not dead .,entailment,contradiction,0.999981,False
8,A group of numbered participants walk down the...,Participants wait for the beginning of the wal...,neutral,contradiction,0.917985,False
13,Man in white shirt and blue jeans looking to t...,Man has a blue shirt on .,contradiction,entailment,0.992036,False
18,A red jeep hangs from the edge of a rocky clif...,The vehicle is red .,entailment,neutral,0.781819,False


### Incorrect #1

In [168]:
incorrect_1 = incorrect.iloc[1]
print_row(incorrect_1)

Premise: Two people are in a green forest .
Hypothesis: The forest is not dead .
Label: entailment
Pred: contradiction


The model is unable to understand the contradiction of "not" dead. It probably thinks the last sentence says that the forest is dead.

### Incorrect #2

In [169]:
incorrect_2 = incorrect.iloc[0]
print_row(incorrect_2)

Premise: Three women on a stage , one wearing red shoes , black pants , and a gray shirt is sitting on a prop , another is sitting on the floor , and the third wearing a black shirt and pants is standing , as a gentleman in the back tunes an instrument .
Hypothesis: There are two women standing on the stage
Label: contradiction
Pred: entailment


Premise is very long, model may not understand the difference between two and three, especially given all the other information in the premise. Also the model may not be able to connect the number 3 with women.

## Incorrect #3

In [170]:
incorrect_3 = incorrect.iloc[3]
print_row(incorrect_3)

Premise: Man in white shirt and blue jeans looking to the side while walking down a busy sidewalk .
Hypothesis: Man has a blue shirt on .
Label: contradiction
Pred: entailment


Model unable to assign adjective to nouns, it knows that something is blue and so preidcts entailment

## Correct Predictions

In [171]:
correct = results[results.correct == True]
correct.head()

Unnamed: 0,sentence1,sentence2,label,pred,confidence,correct
1,"Four people sit on a subway two read books , o...","Multiple people are on a subway together , wit...",entailment,entailment,0.999467,True
2,bicycles stationed while a group of people soc...,People get together near a stand of bicycles .,entailment,entailment,0.734481,True
3,Man in overalls with two horses .,a man in overalls with two horses,entailment,entailment,0.984802,True
4,Man observes a wavelength given off by an elec...,The man is examining what wavelength is given ...,entailment,entailment,0.808949,True
6,Two men are listening to music through headpho...,Two men listen to music .,entailment,entailment,0.727257,True


## Correct #1

In [172]:
correct_1 = correct.iloc[2]
print_row(correct_1)

Premise: Man in overalls with two horses .
Hypothesis: a man in overalls with two horses
Label: entailment
Pred: entailment


Basically the same sentence

## Correct #2

In [173]:
correct_2 = correct.iloc[3]
print_row(correct_2)

Premise: Man observes a wavelength given off by an electronic device .
Hypothesis: The man is examining what wavelength is given off by the device .
Label: entailment
Pred: entailment


Again, large overlap in the words used

### Correct #3

In [177]:
correct_not_entailment = correct[correct.label != 'entailment']
correct_not_entailment.head()

Unnamed: 0,sentence1,sentence2,label,pred,confidence,correct
7,"Two women , one walking her dog the other push...",There is a snowstorm .,contradiction,contradiction,0.886462,True
9,Three people and a white dog are sitting in th...,Three dogs and a person are sitting in the snow .,contradiction,contradiction,0.91218,True
12,A little boy watches a Ferris Wheel in motion .,A boy is waiting in line for the Ferris Wheel .,neutral,neutral,0.987572,True
16,A woman in a light blue jacket is riding a bike .,A woman in a jacket riding a bike to work .,neutral,neutral,0.993825,True
17,A group of people dressed in Santa Claus suits...,A band plays at a beach party .,neutral,neutral,0.994159,True


In [178]:
correct_3 = correct_not_entailment.iloc[1]
print_row(correct_3)

Premise: Three people and a white dog are sitting in the sand on a beach .
Hypothesis: Three dogs and a person are sitting in the snow .
Label: contradiction
Pred: contradiction


Probably got it different due to the beach vs snow, doubt it understood the contradiction regarding the number of people and dogs.

Overall it seems the model performs well when the hypothesis does not include specific details, especially those related to a specific noun in the premise

The model performs well when there is large overlap between the two sentences in terms of words used. The model appears to be unable to understand more complex "things"