In [8]:
import logging
import json
import random
import h5py
import torch
import torch.nn as nn
import torch.nn.functional as F
from torch.autograd import Variable
import torch.optim as optim
import time
import numpy as np
import sys
from wbw.models.baseline_snli import encoder as wbw_encoder
from wbw.models.baseline_snli import LSTMTagger as wbw_gru
from da_gpu.models.baseline_snli import encoder as da_encoder
from da_gpu.models.baseline_snli import atten as da_atten
import argparse
from wbw.models.snli_data import snli_data
from wbw.models.snli_data import w2v
from torch import autograd
from IPython.display import Markdown, display

import numpy as np
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt

%matplotlib inline

In [9]:
def get_errors(dev_lbl_batch,pred):
    error_list=[]
    for i,v in enumerate(dev_lbl_batch.data.numpy()):
        if v != pred[i]:
            error_list.append((i,'T: '+labels_mapping[v]+', O: '+labels_mapping[pred[i]]))
        else:
            error_list.append((i,'T,O:'+labels_mapping[v]))
    return error_list


def update_error_analysis_dict(actual_labels, pred_labels):
    for i in range (len(actual_labels)):
        error_analysis[labels_mapping[actual_labels[i]]][labels_mapping[pred_labels[i]]] += 1

def print_sentence(batch,idx,return_result=True):
    premise = []
    hypothesis = []
    s = dev_batches[batch]
    for pword in s[0][idx]:
        premise.append(idx_to_word[pword])
    for hword in s[1][idx]:
        hypothesis.append(idx_to_word[hword])
    if return_result is True:
        return 'Premise: '+' '.join(premise)+'\nHypothesis: '+' '.join(hypothesis)+'\n'+outcomes[batch][1][idx][1]
    else:
        return 'Premise: '+' '.join(premise)+'\nHypothesis: '+' '.join(hypothesis)

def heatmap(batch,idx):
    batch_num = batch
    item_in_batch = idx
    hypothesis_words = []
    premise_words = []
    for i in dev_batches[batch_num][0][item_in_batch]:
        premise_words.append(idx_to_word[i])
    for i in dev_batches[batch_num][1][item_in_batch]:
        hypothesis_words.append(idx_to_word[i])
    df = pd.DataFrame(alphas[batch_num][:,item_in_batch].data.numpy().T, columns=hypothesis_words, index=premise_words)
    sns.heatmap(df, cmap="YlGnBu", annot=True)
    print(print_sentence(batch_num,item_in_batch))
    plt.show()

    
def printmd(string):
    display(Markdown(string))


'''
Error Analysis
'''
from collections import defaultdict

labels_mapping = {1:'neutral', 0:'entailment', 2:'contradiction'}
error_analysis = defaultdict(dict)
labels = ['neutral', 'entailment', 'contradiction']

for i in labels_mapping.values():
    for j in labels:
        error_analysis[i][j] = 0

# Load Files

In [10]:
#mypath='/Users/danielamaranto/Desktop/nlppaper/process/'
mypath='/Users/Lisa/Documents/Grad School/DS-GA 1101/data/'

In [11]:
#Load validation set
w2v_file = mypath+"snli_preprocess/glove.hdf5"

word_vecs = w2v(w2v_file).word_vecs 
dev_file = mypath+"snli_preprocess/val.hdf5"

dev_data = snli_data(dev_file, -1)
dev_batches = dev_data.batches

In [12]:
#Load index to word dictionary
word_dict_path = mypath+'snli_preprocess/word.dict'
idx_to_word = {}
with open(word_dict_path,'r') as inf:
    for line in inf:
        line = line.split()
        idx_to_word[int(line[1])] = line[0]
'''change to 0 indexing'''
idx_to_word = {k-1:v for k,v in idx_to_word.items()}

In [13]:
#Load WBW Model
#encode_model_fp = '../03_epoch-54_dev-acc-0.768_input-encoder.pt'
#atten_model_fp = '../03_epoch-54_dev-acc-0.768_lstm.pt'

encode_model_fp = '/Users/Lisa/Documents/Grad School/DS-GA 1101/nlp_project/wbw/output/04_epoch-82_dev-acc-0.780_input-encoder.pt'
atten_model_fp = '/Users/Lisa/Documents/Grad School/DS-GA 1101/nlp_project/wbw/output/04_epoch-82_dev-acc-0.780_lstm.pt'
embedding_size = 300
hidden_size = 103
train_lbl_size = 3
para_init = 0.01

#init
wbw_input_encoder = wbw_encoder(word_vecs.size(0), embedding_size, hidden_size, para_init)
gru = wbw_gru(hidden_size, train_lbl_size)

#load state (map_location converts model from gpu to cpu)
wbw_input_encoder.load_state_dict(torch.load(encode_model_fp, map_location=lambda storage, loc: storage))
gru.load_state_dict(torch.load(atten_model_fp, map_location=lambda storage, loc: storage))

In [16]:
#Load DA Model
#encode_model_fp = '../03_epoch-54_dev-acc-0.768_input-encoder.pt'
#atten_model_fp = '../03_epoch-54_dev-acc-0.768_lstm.pt'

encode_model_fp = '/Users/Lisa/Documents/Grad School/DS-GA 1101/nlp_project/da_gpu/output/ada01_epoch-109_dev-acc-0.839_input-encoder.pt'
atten_model_fp = '/Users/Lisa/Documents/Grad School/DS-GA 1101/nlp_project/da_gpu/output/ada01_epoch-109_dev-acc-0.839_inter-atten.pt'
embedding_size = 300
hidden_size = 283
train_lbl_size = 3
para_init = 0.01

#init
da_input_encoder = da_encoder(word_vecs.size(0), embedding_size, hidden_size, para_init)
atten = da_atten(hidden_size, train_lbl_size, para_init,dropout=.2)


#load state (map_location converts model from gpu to cpu)
da_input_encoder.load_state_dict(torch.load(encode_model_fp, map_location=lambda storage, loc: storage))
atten.load_state_dict(torch.load(atten_model_fp, map_location=lambda storage, loc: storage))

# Eval Dev Set

In [61]:
'''
Evaluate DA model on the dev set.
This may take a few minutes to run.
'''

da_input_encoder.eval()
atten.eval()
wbw_input_encoder.eval()
gru.eval()


correct = 0.
total = 0.
true_y=[]
pred_y_da = []
pred_y_gru = []
#alphas = []
outcomes = []
len_p = []
len_h = []
batch = []
batch_idx = []

for i in range(len(dev_batches)):
#for i in range(0,20):
    dev_src_batch, dev_tgt_batch, dev_lbl_batch = dev_batches[i]
    dev_src_batch = Variable(dev_src_batch)
    dev_tgt_batch = Variable(dev_tgt_batch)
    dev_lbl_batch = Variable(dev_lbl_batch)

    #da predictions
    dev_src_linear, dev_tgt_linear=da_input_encoder(
        dev_src_batch, dev_tgt_batch)
    log_prob, _,_=atten(dev_src_linear, dev_tgt_linear, return_attn=True)
    _, predict=log_prob.data.max(dim=1)
    pred_y_da.extend(predict.numpy()) 
    
    #gru predictions
    dev_src_linear, dev_tgt_linear=wbw_input_encoder(
    dev_src_batch, dev_tgt_batch)
    log_prob, _=gru(dev_src_linear, dev_tgt_linear, return_attn=True)
    _, predict=log_prob.data.max(dim=1)
    pred_y_gru.extend(predict.numpy())
    
    true_y.extend(dev_lbl_batch.data.numpy())
    batch_size = len(dev_lbl_batch)
    len_p.extend([dev_src_batch.data.shape[1]]*batch_size)
    len_h.extend([dev_tgt_batch.data.shape[1]]*batch_size)
    batch.extend([i]*batch_size)
    batch_idx.extend([n for n in range(batch_size)])
    
    #results = get_errors(dev_lbl_batch,predict)
    #outcomes.append((i,results))

In [62]:
df = pd.DataFrame({'batch':batch, \
                   'batch_idx':batch_idx, \
                   'true_y':true_y,\
                   'pred_y_da':pred_y_da, \
                   'pred_y_gru':pred_y_gru, \
                   'len_h':len_h, \
                   'len_p':len_p, \
                  })

In [63]:
def get_sentence(data, batch, batch_idx, sent='premise'):
    if sent=='premise':
        s=0
    else:
        s=1
    words = data[batch][s][batch_idx]
    out = []
    for word in words:
        out.append(idx_to_word[word])
    return ' '.join(word for word in out)

In [64]:
#get sentences
df['premise'] = df.apply(lambda x: get_sentence(dev_batches,x['batch'],x['batch_idx'],sent='premise'), axis=1)
df['hypothesis'] = df.apply(lambda x: get_sentence(dev_batches,x['batch'],x['batch_idx'],sent='hypothesis'), axis=1)

In [65]:
df

Unnamed: 0,batch,batch_idx,len_h,len_p,pred_y_da,pred_y_gru,true_y,premise,hypothesis
0,0,0,4,3,0,0,0,<s> trucks racing,<s> there are vehicles
1,1,0,5,3,0,0,0,<s> javelin competition,<s> there is a competition
2,2,0,7,3,1,1,1,<s> javelin competition,<s> the woman one the javelin competition
3,3,0,9,3,2,2,2,<s> trucks racing,<s> the trucks are stationary in the parking lot
4,3,1,9,3,1,1,1,<s> javelin competition,<s> the javelin competition was cancelled due ...
5,4,0,11,3,1,1,0,<s> trucks racing,<s> four trucks are racing against each other ...
6,5,0,3,4,0,0,0,<s> a hockey fight,<s> players fighting
7,6,0,4,4,2,2,2,<s> men playing football,<s> some men sleep
8,7,0,5,4,2,2,2,<s> two dogs play,<s> two cats are playing
9,7,1,5,4,0,0,0,<s> two dogs swimming,<s> two dogs in water


In [67]:
df.shape

(9841, 9)