# Natural Laguage Inference (NLI)

In [1]:
import numpy as np
import torch
import torch.nn as nn
import torch.nn.functional as F
from torch.utils.data import Dataset
from collections import Counter
import pickle as pkl
import random
import pdb
import io
import nltk
from nltk.tokenize import sent_tokenize, word_tokenize
import pandas as pd
import pdb

## Stanford NLI

In [2]:
VOCAB_SIZE = 50000
BATCH_SIZE = 32

## Load Fasttext vectors

In [3]:
def load_vectors(f):
    fin = io.open(f, 'r', encoding='utf-8', newline='\n', errors='ignore')
    data={}
    n, d = map(int, fin.readline().split())
    for i,line in enumerate(fin):
        if i == VOCAB_SIZE:
            break
        tokens = line.rstrip().split(' ')
        data[tokens[0]] = np.array(list(map(float, tokens[1:])))
        i += 1
    return data

word_vectors = load_vectors("/Users/zihaoguo/Documents/nyu/ds1011/hw2/wiki-news-300d-1M.vec")

with open('fasttext_word_vectors.p', 'wb') as f:
    pkl.dump(word_vectors, f)

# Create dictionary, id2token, token2id

In [4]:
from collections import Counter
PAD_IDX = 0
UNK_IDX = 1

def build_vocab():
    # Returns:
    # id2token: list of tokens, where id2token[i] returns token that corresponds to token i
    # token2id: dictionary where keys represent tokens and corresponding values represent indices
    word_vectors = pkl.load(open("fasttext_word_vectors.p", "rb"))
    id2token = list(word_vectors.keys())
    token2id = dict(zip(word_vectors, range(2,2+len(word_vectors)))) 
    id2token = ['<pad>', '<unk>'] + id2token
    token2id['<pad>'] = PAD_IDX 
    token2id['<unk>'] = UNK_IDX
    return word_vectors, token2id, id2token

word_vectors, token2id, id2token = build_vocab()

## Train Data

In [5]:
snli_train = pd.read_csv('/Users/zihaoguo/Documents/nyu/ds1011/hw2/snli_train.tsv', sep='\t')
sent1_train = list(snli_train["sentence1"])
sent2_train = list(snli_train["sentence2"])
train_label = list(snli_train["label"])
print(len(train_label))

100000


In [6]:
# Convert labels to integers
def labels_to_integers(data_label):
    for i in range(len(data_label)):
        if data_label[i] == "contradiction":
            data_label[i] = 0
        elif data_label[i] == "entailment":
            data_label[i] = 1
        elif data_label[i] == "neutral":
            data_label[i] = 2
    return data_label

train_label = labels_to_integers(train_label)

In [7]:
# Randomly shuffle the data so that we'll split them in the next step
SEED = 123
random.Random(SEED).shuffle(sent1_train)
random.Random(SEED).shuffle(sent2_train)
random.Random(SEED).shuffle(train_label)

#  Validation Data

In [8]:
snli_val = pd.read_csv('snli_val.tsv', sep='\t')
sent1_val = list(snli_val["sentence1"])
sent2_val = list(snli_val["sentence2"])
val_label = list(snli_val["label"])
val_label = labels_to_integers(val_label)
SEED = 123
random.Random(SEED).shuffle(sent1_val)
random.Random(SEED).shuffle(sent2_val)
random.Random(SEED).shuffle(val_label)

## Tokenize our data

In [9]:
def tokenize(sentence_list):
    return [word_tokenize(sentence_list[i]) for i in range(len(sentence_list))]

In [10]:
# train
sent1_train_tokenized = tokenize(sent1_train)
sent2_train_tokenized = tokenize(sent2_train)

# val
sent1_val_tokenized = tokenize(sent1_val)
sent2_val_tokenized = tokenize(sent2_val)

## One-hot code the tokens

In [11]:
def token2index_dataset(tokens_data):
    indices_data = []
    for tokens in tokens_data:
        index_list = [token2id[token] if token in token2id else UNK_IDX for token in tokens]
        indices_data.append(index_list)
    return indices_data

In [12]:
# train
sent1_train_indices = token2index_dataset(sent1_train_tokenized)
sent2_train_indices = token2index_dataset(sent2_train_tokenized)

# val
sent1_val_indices = token2index_dataset(sent1_val_tokenized)
sent2_val_indices = token2index_dataset(sent2_val_tokenized)

## Create the Pytorch Data Loader

In [13]:
MAX_SENTENCE_LENGTH = 80

import numpy as np
import torch
from torch.utils.data import Dataset

class NLIDataset(Dataset):
    """
    Class that represents a train/validation/test dataset that's readable for PyTorch
    """
    
    def __init__(self, sent1_data_list, sent2_data_list, target_list):
        self.sent1_data_list = sent1_data_list
        self.sent2_data_list = sent2_data_list
        self.target_list = target_list
        assert (len(self.sent1_data_list) == len(self.target_list) and len(self.sent2_data_list) == len(self.target_list))

    def __len__(self):
        return len(self.sent1_data_list)
        
    def __getitem__(self, key):
        ###
        ### Returns [[sentence, 1, tokens], [sentence, 2, tokens]]
        ###
        """
        Triggered when you call dataset[i]
        """
        sent1_tokens_idx = self.sent1_data_list[key][:MAX_SENTENCE_LENGTH]
        sent2_tokens_idx = self.sent2_data_list[key][:MAX_SENTENCE_LENGTH]
        combined_tokens_idx = [sent1_tokens_idx, sent2_tokens_idx]
        label = self.target_list[key]
        return [combined_tokens_idx, len(sent1_tokens_idx), len(sent2_tokens_idx), label]

def NLI_collate_func(batch):
    """
    Customized function for DataLoader that dynamically pads the batch so that all 
    data have the same length
    """
    sent1_data_list = []
    sent2_data_list = []
    sent1_length_list = []
    sent2_length_list = []
    label_list = []
    combined_data_list = []
    #print("collate batch: ", batch[0][0])
    #batch[0][0] = batch[0][0][:MAX_SENTENCE_LENGTH]
    for datum in batch:
        label_list.append(datum[3])
        sent1_length_list.append(datum[1])
        sent2_length_list.append(datum[2])
    # padding
    for datum in batch:
        padded_vec_1 = np.pad(np.array(datum[0][0]), pad_width=((0,MAX_SENTENCE_LENGTH-datum[1])), 
                                mode="constant", constant_values=0)
        padded_vec_2 = np.pad(np.array(datum[0][1]), pad_width=((0,MAX_SENTENCE_LENGTH-datum[2])), 
                                mode="constant", constant_values=0)
        combined_data_list.append([padded_vec_1, padded_vec_2])
    return [torch.from_numpy(np.array(combined_data_list)), 
            torch.LongTensor(sent1_length_list), torch.LongTensor(sent2_length_list), torch.LongTensor(label_list)]

In [14]:
train_dataset = NLIDataset(sent1_train_indices, sent2_train_indices, train_label)
train_loader = torch.utils.data.DataLoader(dataset=train_dataset, 
                                           batch_size=BATCH_SIZE, 
                                           collate_fn=NLI_collate_func,
                                           shuffle=True
                                          )

val_dataset = NLIDataset(sent1_val_indices, sent2_val_indices, val_label)
val_loader = torch.utils.data.DataLoader(dataset=val_dataset, 
                                           batch_size=BATCH_SIZE, 
                                           collate_fn=NLI_collate_func,
                                           shuffle=True
                                          )

## RNN model

In [15]:
weights = np.array(list(word_vectors.values()))
pad_vec = np.zeros((1, 300))
unk_vec = np.random.randn(1, 300) * 0.01
pad_unk_vecs = np.vstack((pad_vec, unk_vec))
WEIGHTS = np.vstack((pad_unk_vecs, weights))

In [368]:
weights.shape

(50000, 300)

In [301]:
class rnn(nn.Module):
    def __init__(self, hidden_size, num_layers, num_classes, emb_size= 300):
        # RNN Accepts the following hyperparams:
        # emb_size: Embedding Size
        # hidden_size: Hidden Size of layer in RNN
        # num_layers: number of layers in RNN
        # num_classes: number of output classes
        # vocab_size: vocabulary size
        super(rnn, self).__init__()

        self.num_layers, self.hidden_size = num_layers, hidden_size
        weight = torch.FloatTensor(WEIGHTS)
        self.embedding = nn.Embedding.from_pretrained(weight)
        self.rnn = nn.GRU(emb_size, hidden_size, num_layers, batch_first=True, bidirectional=True)
        self.linear1 = nn.Linear(2*hidden_size, 500)
        self.linear2 = nn.Linear(500, num_classes)
        self.dropout = nn.Dropout(0.2)

    def init_hidden(self, batch_size):
        # Function initializes the activation of recurrent neural net at timestep 0
        # Needs to be in format (num_layers, batch_size, hidden_size)
        return torch.randn(2, batch_size*2, self.hidden_size)

    def forward(self, x, sent1_lengths, sent2_lengths):
        # reset hidden state
        batch_size = x.size()[0]
                
        s1lengths = list(sent1_lengths)
        s2lengths = list(sent2_lengths)
        ordered_slengths = s1lengths + s2lengths

        reverse_sorted_indices = [x for _, x in sorted(zip(ordered_slengths, range(len(ordered_slengths))), reverse=True)]
        reverse_sorted_lengths = [x for x, _ in sorted(zip(ordered_slengths, range(len(ordered_slengths))), reverse=True)]
        reverse_sorted_lengths = np.array(reverse_sorted_lengths)
        
        sent1s = x[:, 0, :]
        sent2s = x[:, 1, :]
        ordered_sents = torch.cat([sent1s, sent2s], dim=0)
        reverse_sorted_data = torch.index_select(ordered_sents, 0, torch.tensor(reverse_sorted_indices))
        
        # get embedding
        embed = self.embedding(reverse_sorted_data) 
        self.hidden = self.init_hidden(batch_size)
        # pack padded sequence
        embed = torch.nn.utils.rnn.pack_padded_sequence(embed, reverse_sorted_lengths, batch_first=True)       
        # fprop though RNN
        rnn_out, self.hidden = self.rnn(embed, self.hidden)        
        ### MATCHING BACK
        change_it_back = [x for _, x in sorted(zip(reverse_sorted_indices, range(len(reverse_sorted_indices))))]
        self.hidden = torch.index_select(self.hidden, 1, torch.LongTensor(change_it_back)) 
        ### GRU
        hidden_sent1s = torch.cat([self.hidden[0, 0:batch_size, :], self.hidden[1, 0:batch_size, :]], dim=1)
        hidden_sent2s = torch.cat([self.hidden[0, batch_size:, :], self.hidden[1, batch_size:, :]], dim=1)
        
#         concatenation of encoded sentences
#        linear1 = self.linear1(torch.cat([hidden_sent1s, hidden_sent2s], dim=1))
#         addition of encoded sentences
#         linear1 = self.linear1(torch.tensor(hidden_sent1s) + torch.tensor(hidden_sent2s))
#         element-wise multiplication of encoded sentences
        linear1 = self.linear1(torch.tensor(hidden_sent1s)*torch.tensor(hidden_sent2s))
        linear1 = F.relu(linear1.contiguous().view(-1, linear1.size(-1))).view(linear1.shape)   
#         linear1 = self.dropout(linear1)
        logits = self.linear2(linear1)
        return logits

In [302]:
def test_model(loader, model):
    """
    Helper function that tests the model's performance on a dataset
    """
    correct = 0
    total = 0
    model.eval()
    for (data, sent1_lengths, sent2_lengths, labels) in loader:
        data_batch, sent1_length_batch, sent2_length_batch, label_batch = data, sent1_lengths, sent2_lengths, labels
        outputs = F.softmax(model(data_batch, sent1_length_batch, sent2_length_batch), dim=1)
        predicted = outputs.max(1, keepdim=True)[1]
        labels = labels
        total += labels.size(0)
        correct += predicted.eq(labels.view_as(predicted)).sum().item()
    return (100 * correct / total)

def train_model(model, lr = 0.001, num_epochs = 5, criterion = nn.CrossEntropyLoss()):
    optimizer = torch.optim.Adam(model.parameters(), lr=lr) 
    max_val_acc = 0
    for epoch in range(num_epochs):
        for i, (data, sent1_lengths, sent2_lengths, labels) in enumerate(train_loader):
            model.train()
            data_batch, sent1_length_batch, sent2_length_batch, label_batch = data, sent1_lengths, sent2_lengths, labels
            optimizer.zero_grad()
            outputs = model(data_batch, sent1_length_batch, sent2_length_batch)
            loss = criterion(outputs, label_batch)
            
            loss.backward()
            optimizer.step()
            # validate every 100 iterations
            if i > 0 and i % 100 == 0:
                # validate
                val_acc = test_model(val_loader, model)
                if val_acc > max_val_acc:
                    max_val_acc = val_acc
                print('Epoch: [{}/{}], Step: [{}/{}], Validation Acc: {}'.format( 
                           epoch+1, num_epochs, i+1, len(train_loader), val_acc))
                print('Epoch: [{}/{}], Step: [{}/{}], Training Loss: {}'.format( 
                           epoch+1, num_epochs, i+1, len(train_loader), loss))
                
    print("Max Validation Accuracy: {}".format(max_val_acc))
    return max_val_acc

In [303]:
model = rnn(emb_size = 300, hidden_size=100, num_layers=1, num_classes=3) 
train_model(model)


Epoch: [1/5], Step: [101/3125], Validation Acc: 36.5
Epoch: [1/5], Step: [101/3125], Training Loss: 1.1044721603393555
Epoch: [1/5], Step: [201/3125], Validation Acc: 40.9
Epoch: [1/5], Step: [201/3125], Training Loss: 1.0390400886535645
Epoch: [1/5], Step: [301/3125], Validation Acc: 46.5
Epoch: [1/5], Step: [301/3125], Training Loss: 0.9962356686592102
Epoch: [1/5], Step: [401/3125], Validation Acc: 46.3
Epoch: [1/5], Step: [401/3125], Training Loss: 1.012796401977539
Epoch: [1/5], Step: [501/3125], Validation Acc: 52.8
Epoch: [1/5], Step: [501/3125], Training Loss: 0.9181193113327026
Epoch: [1/5], Step: [601/3125], Validation Acc: 55.1
Epoch: [1/5], Step: [601/3125], Training Loss: 0.8689990639686584
Epoch: [1/5], Step: [701/3125], Validation Acc: 54.6
Epoch: [1/5], Step: [701/3125], Training Loss: 0.8240164518356323
Epoch: [1/5], Step: [801/3125], Validation Acc: 56.3
Epoch: [1/5], Step: [801/3125], Training Loss: 0.8364970088005066
Epoch: [1/5], Step: [901/3125], Validation Acc: 5

Epoch: [3/5], Step: [801/3125], Validation Acc: 66.9
Epoch: [3/5], Step: [801/3125], Training Loss: 0.650118887424469
Epoch: [3/5], Step: [901/3125], Validation Acc: 69.2
Epoch: [3/5], Step: [901/3125], Training Loss: 0.612956166267395
Epoch: [3/5], Step: [1001/3125], Validation Acc: 69.1
Epoch: [3/5], Step: [1001/3125], Training Loss: 0.7368797659873962
Epoch: [3/5], Step: [1101/3125], Validation Acc: 69.6
Epoch: [3/5], Step: [1101/3125], Training Loss: 0.5010706186294556
Epoch: [3/5], Step: [1201/3125], Validation Acc: 71.2
Epoch: [3/5], Step: [1201/3125], Training Loss: 0.6585914492607117
Epoch: [3/5], Step: [1301/3125], Validation Acc: 70.4
Epoch: [3/5], Step: [1301/3125], Training Loss: 0.6809333562850952
Epoch: [3/5], Step: [1401/3125], Validation Acc: 70.6
Epoch: [3/5], Step: [1401/3125], Training Loss: 0.5426694750785828
Epoch: [3/5], Step: [1501/3125], Validation Acc: 69.2
Epoch: [3/5], Step: [1501/3125], Training Loss: 0.6541162729263306
Epoch: [3/5], Step: [1601/3125], Valid

Epoch: [5/5], Step: [1501/3125], Validation Acc: 71.8
Epoch: [5/5], Step: [1501/3125], Training Loss: 0.6359270215034485
Epoch: [5/5], Step: [1601/3125], Validation Acc: 72.0
Epoch: [5/5], Step: [1601/3125], Training Loss: 0.5624775290489197
Epoch: [5/5], Step: [1701/3125], Validation Acc: 72.1
Epoch: [5/5], Step: [1701/3125], Training Loss: 0.3539153039455414
Epoch: [5/5], Step: [1801/3125], Validation Acc: 71.7
Epoch: [5/5], Step: [1801/3125], Training Loss: 0.4758181571960449
Epoch: [5/5], Step: [1901/3125], Validation Acc: 71.2
Epoch: [5/5], Step: [1901/3125], Training Loss: 0.45438140630722046
Epoch: [5/5], Step: [2001/3125], Validation Acc: 71.6
Epoch: [5/5], Step: [2001/3125], Training Loss: 0.46809372305870056
Epoch: [5/5], Step: [2101/3125], Validation Acc: 71.7
Epoch: [5/5], Step: [2101/3125], Training Loss: 0.5030506253242493
Epoch: [5/5], Step: [2201/3125], Validation Acc: 72.5
Epoch: [5/5], Step: [2201/3125], Training Loss: 0.7975351810455322
Epoch: [5/5], Step: [2301/3125

73.3

## CNN model

In [348]:
class cnn(nn.Module):
    def __init__(self, emb_size, hidden_size, num_layers, num_classes):

        super(cnn, self).__init__()

        self.num_layers, self.hidden_size = num_layers, hidden_size
        self.embedding = nn.Embedding.from_pretrained(torch.FloatTensor(WEIGHTS), freeze=True)
    
        self.conv1 = nn.Conv1d(emb_size, hidden_size, kernel_size=3, padding=1)
        self.conv2 = nn.Conv1d(hidden_size, hidden_size, kernel_size=3, padding=1)
        self.maxpool = nn.MaxPool1d(60)
        self.linear1 = nn.Linear(2*hidden_size, 500)
        self.linear2 = nn.Linear(500, num_classes)
        self.dropout = nn.Dropout(p=0.2)

    def forward(self, x, sent1_lengths, sent2_lengths):
        
        batch_size = x.size()[0]
        seq_len = x.size()[2]
        
        sent1s = torch.tensor(x[:, 0, :])
        sent2s = torch.tensor(x[:, 1, :])
        ordered_sents = torch.cat([sent1s, sent2s], dim=0)

        embed = self.embedding(ordered_sents)
        hidden = self.conv1(embed.transpose(1,2)).transpose(1,2)
        hidden = F.relu(hidden.contiguous().view(-1, hidden.size(-1))).view(2*batch_size, seq_len, hidden.size(-1))

        hidden = self.conv2(hidden.transpose(1,2)).transpose(1,2)
        hidden = F.relu(hidden.contiguous().view(-1, hidden.size(-1))).view(2*batch_size, seq_len, hidden.size(-1))
        hidden = self.maxpool(hidden.transpose(1, 2)).transpose(1, 2).squeeze(dim=1)
        
        hidden_sent1s = hidden[0:batch_size, :]
        hidden_sent2s = hidden[batch_size:, :]     
        
        linear1 = self.linear1(torch.cat([hidden_sent1s, hidden_sent2s], dim=1))
        #linear1 = self.linear1(torch.tensor(hidden_sent1s) + torch.tensor(hidden_sent2s))
        #linear1 = self.linear1(torch.tensor(hidden_sent1s)*torch.tensor(hidden_sent2s))
        linear1 = F.relu(linear1.contiguous().view(-1, linear1.size(-1))).view(linear1.shape)
        linear1 = self.dropout(linear1)
        logits = self.linear2(linear1)
        
        return logits

In [340]:
model = cnn(emb_size = 300, hidden_size=50, num_layers=1, num_classes=3)
train_model(model)


Epoch: [1/5], Step: [101/3125], Validation Acc: 43.8
Epoch: [1/5], Step: [101/3125], Training Loss: 1.1325687170028687
Epoch: [1/5], Step: [201/3125], Validation Acc: 51.5
Epoch: [1/5], Step: [201/3125], Training Loss: 0.9913889765739441
Epoch: [1/5], Step: [301/3125], Validation Acc: 57.6
Epoch: [1/5], Step: [301/3125], Training Loss: 0.9026820659637451
Epoch: [1/5], Step: [401/3125], Validation Acc: 58.3
Epoch: [1/5], Step: [401/3125], Training Loss: 0.9187735319137573
Epoch: [1/5], Step: [501/3125], Validation Acc: 58.0
Epoch: [1/5], Step: [501/3125], Training Loss: 0.9364725351333618
Epoch: [1/5], Step: [601/3125], Validation Acc: 59.6
Epoch: [1/5], Step: [601/3125], Training Loss: 0.9069799184799194
Epoch: [1/5], Step: [701/3125], Validation Acc: 59.7
Epoch: [1/5], Step: [701/3125], Training Loss: 0.7501728534698486
Epoch: [1/5], Step: [801/3125], Validation Acc: 60.6
Epoch: [1/5], Step: [801/3125], Training Loss: 0.8851989507675171
Epoch: [1/5], Step: [901/3125], Validation Acc: 

Epoch: [3/5], Step: [801/3125], Validation Acc: 66.7
Epoch: [3/5], Step: [801/3125], Training Loss: 0.763251006603241
Epoch: [3/5], Step: [901/3125], Validation Acc: 67.6
Epoch: [3/5], Step: [901/3125], Training Loss: 0.7733426690101624
Epoch: [3/5], Step: [1001/3125], Validation Acc: 67.9
Epoch: [3/5], Step: [1001/3125], Training Loss: 0.7706139087677002
Epoch: [3/5], Step: [1101/3125], Validation Acc: 67.0
Epoch: [3/5], Step: [1101/3125], Training Loss: 0.925399661064148
Epoch: [3/5], Step: [1201/3125], Validation Acc: 67.8
Epoch: [3/5], Step: [1201/3125], Training Loss: 0.7828293442726135
Epoch: [3/5], Step: [1301/3125], Validation Acc: 68.0
Epoch: [3/5], Step: [1301/3125], Training Loss: 0.7847010493278503
Epoch: [3/5], Step: [1401/3125], Validation Acc: 67.8
Epoch: [3/5], Step: [1401/3125], Training Loss: 0.6668834686279297
Epoch: [3/5], Step: [1501/3125], Validation Acc: 66.8
Epoch: [3/5], Step: [1501/3125], Training Loss: 0.6030179262161255
Epoch: [3/5], Step: [1601/3125], Valid

Epoch: [5/5], Step: [1501/3125], Validation Acc: 71.0
Epoch: [5/5], Step: [1501/3125], Training Loss: 0.6698805689811707
Epoch: [5/5], Step: [1601/3125], Validation Acc: 69.6
Epoch: [5/5], Step: [1601/3125], Training Loss: 0.6566417217254639
Epoch: [5/5], Step: [1701/3125], Validation Acc: 70.0
Epoch: [5/5], Step: [1701/3125], Training Loss: 0.45077043771743774
Epoch: [5/5], Step: [1801/3125], Validation Acc: 70.4
Epoch: [5/5], Step: [1801/3125], Training Loss: 0.6122647523880005
Epoch: [5/5], Step: [1901/3125], Validation Acc: 70.0
Epoch: [5/5], Step: [1901/3125], Training Loss: 0.5797339081764221
Epoch: [5/5], Step: [2001/3125], Validation Acc: 70.0
Epoch: [5/5], Step: [2001/3125], Training Loss: 0.5525529980659485
Epoch: [5/5], Step: [2101/3125], Validation Acc: 68.4
Epoch: [5/5], Step: [2101/3125], Training Loss: 0.46992507576942444
Epoch: [5/5], Step: [2201/3125], Validation Acc: 69.2
Epoch: [5/5], Step: [2201/3125], Training Loss: 0.6059962511062622
Epoch: [5/5], Step: [2301/3125

71.0

## Multi-Genre NLI

In [341]:
mnli_val = pd.read_csv('mnli_val.tsv', sep='\t')

In [342]:
mnli_val['genre'].unique()

array(['fiction', 'telephone', 'slate', 'government', 'travel'],
      dtype=object)

In [343]:
mnli_val_dict = {}
for x in mnli_val['genre'].unique():
    filtered = mnli_val[mnli_val['genre'] == x]
    mnli_val_dict[x] = {}
    mnli_val_dict[x]["sentence1"] = list(filtered["sentence1"])
    mnli_val_dict[x]["sentence2"] = list(filtered["sentence2"])
    mnli_val_dict[x]["label"] = labels_to_integers(list(filtered["label"]))

### RNN model

In [353]:
rnn_results = {}
model = rnn(emb_size = 300, hidden_size=100, num_layers=1, num_classes=3)
for genre in mnli_val_dict.keys():
    
    SEED =234
    random.Random(SEED).shuffle(mnli_val_dict[genre]['sentence1'])
    random.Random(SEED).shuffle(mnli_val_dict[genre]['sentence2'])
    random.Random(SEED).shuffle(mnli_val_dict[genre]['label'])
    sentence1_tokenized = tokenize(mnli_val_dict[genre]['sentence1'])
    sentence2_tokenized = tokenize(mnli_val_dict[genre]['sentence2'])
    sentence1_indices = token2index_dataset(sentence1_tokenized)
    sentence2_indices = token2index_dataset(sentence2_tokenized)
    
    dataset = NLIDataset(sentence1_indices, sentence2_indices, mnli_val_dict[genre]['label'])
    dataloader = torch.utils.data.DataLoader(dataset=dataset, 
                                             batch_size=BATCH_SIZE, 
                                             collate_fn=NLI_collate_func,
                                             shuffle=True)
    
    rnn_results[genre] = test_model(dataloader, model)
    print("Genre {} has validation accuracy: {}".format(genre, rnn_results[genre]))

Genre fiction has validation accuracy: 34.77386934673367
Genre telephone has validation accuracy: 36.517412935323385
Genre slate has validation accuracy: 34.830339321357286
Genre government has validation accuracy: 36.71259842519685
Genre travel has validation accuracy: 35.53971486761711


### CNN model

In [357]:
cnn_results = {}
model = cnn(emb_size = 300, hidden_size=50, num_layers=1, num_classes=3)
for genre in mnli_val_dict.keys():
    
    SEED =234
    random.Random(SEED).shuffle(mnli_val_dict[genre]['sentence1'])
    random.Random(SEED).shuffle(mnli_val_dict[genre]['sentence2'])
    random.Random(SEED).shuffle(mnli_val_dict[genre]['label'])
    sentence1_tokenized = tokenize(mnli_val_dict[genre]['sentence1'])
    sentence2_tokenized = tokenize(mnli_val_dict[genre]['sentence2'])
    sentence1_indices = token2index_dataset(sentence1_tokenized)
    sentence2_indices = token2index_dataset(sentence2_tokenized)
    
    dataset = NLIDataset(sentence1_indices, sentence2_indices, mnli_val_dict[genre]['label'])
    dataloader = torch.utils.data.DataLoader(dataset=dataset, 
                                             batch_size=BATCH_SIZE, 
                                             collate_fn=NLI_collate_func,
                                             shuffle=True)
    
    cnn_results[genre] = test_model(dataloader, model)
    print("Genre {} has validation accuracy: {}".format(genre, cnn_results[genre]))

Genre fiction has validation accuracy: 34.87437185929648
Genre telephone has validation accuracy: 36.517412935323385
Genre slate has validation accuracy: 34.830339321357286
Genre government has validation accuracy: 36.71259842519685
Genre travel has validation accuracy: 35.437881873727086
