In [1]:
import numpy as np
import torch
import csv
import pandas as pd
import io
from numpy import random
import torch.nn.functional as F
import matplotlib.pyplot as plt
import pickle
from torch.autograd import Variable

torch.backends.cudnn.enabled=False
BATCH_SIZE = 32

# Check GPU availability

In [2]:
if torch.cuda.is_available and torch.has_cudnn:
    device = torch.device('cuda')
else:
    device = torch.device("cpu")

# load data

In [3]:
def load_tsv(filename):
    train_premise = []
    train_hypo = []
    train_label = []
    with open(filename) as tsvfile:
      fd = csv.reader(tsvfile, delimiter='\t')
      for row in fd:
            
        train_premise.append(row[0].split())
        train_hypo.append(row[1].split())
        
        if row[2] == 'neutral': 
            train_label.append(1)
        elif row[2] == 'entailment':
            train_label.append(2)
        else:
            train_label.append(0)
            
    train_premise = train_premise[1:]
    train_hypo = train_hypo[1:]
    train_label = train_label[1:]
    
    return train_premise, train_hypo, train_label

In [5]:
train_premise,train_hypo,train_label = load_tsv('snli_train.tsv')
val_premise,val_hypo,val_label = load_tsv('snli_val.tsv')

In [7]:
words = 40000

import numpy as np

with open('wiki-news-300d-1M.vec') as f:
    loaded_embeddings_ft = np.zeros((words + 2, 300))
    words_ft = {}
    idx2words_ft = {}
    ordered_words_ft = []
    
    for i, line in enumerate(f):
        if i >= words: 
            break
        s = line.split()
        loaded_embeddings_ft[i, :] = np.asarray(s[1:])
        words_ft[s[0]] = i
        idx2words_ft[i] = s[0]
        ordered_words_ft.append(s[0])

In [None]:
idx_pad = len(words_ft)
idx_unk = len(words_ft) + 1
words_ft['<pad>'] = idx_pad
words_ft['<unk>'] = idx_unk
idx2words_ft[idx_pad] = '<pad>'
idx2words_ft[idx_unk] = '<unk>'
ordered_words_ft.append('<pad>')
ordered_words_ft.append('<unk>')
loaded_embeddings_ft[idx_pad] = np.zeros((300,))
loaded_embeddings_ft[idx_unk] = random.normal(loc=0.0, scale=1.0, size=300)

In [9]:
# convert token to id in the dataset
def token2index_dataset(tokens_data):
    indices_data = []
    for tokens in tokens_data:
        index_list = []
        for token in tokens:
            try:
                index_list.append(words_ft[token])
            except KeyError:
                index_list.append(idx_unk)
        indices_data.append(index_list)
    return indices_data

train_premise_indices = token2index_dataset(train_premise)
train_hypo_indices = token2index_dataset(train_hypo)
val_premise_indices = token2index_dataset(val_premise)
val_hypo_indices = token2index_dataset(val_hypo)

# Dataloader Class

Reference - Modified from Lab4 in DS 1011 Fall 2018

In [10]:
import torch
from torch.utils.data import Dataset

class load_dataset(Dataset):
    """
    Class that represents a train/validation/test dataset that's readable for PyTorch
    Note that this class inherits torch.utils.data.Dataset
    """
    
    def __init__(self, data_list_premise,data_list_hypo, target_list):

        self.data_list_premise = data_list_premise
        self.data_list_hypo = data_list_hypo
        self.target_list = target_list
        assert (len(self.data_list_premise) == len(self.target_list))
        assert (len(self.data_list_hypo) == len(self.target_list))

    def __len__(self):
        return len(self.data_list_premise)
        
    def __getitem__(self, key):
        
        token_idx_premise = self.data_list_premise[key]
        token_idx_hypo = self.data_list_hypo[key]
        label = self.target_list[key]
        return [token_idx_premise, token_idx_hypo, len(token_idx_premise), len(token_idx_hypo),label]

def collate_func(batch):

    data_list_premise = []
    data_list_hypo = []
    label_list = []
    length_list_premise = []
    length_list_hypo = []

    for datum in batch:
        label_list.append(datum[4])
        length_list_premise.append(datum[2])
        length_list_hypo.append(datum[3])
    
    length_list_premise_sorted = sorted(length_list_premise)
    premise_ceiling = length_list_premise_sorted[int(round(len(batch)*0.99))-1]
    length_list_hypo_sorted = sorted(length_list_hypo)
    hypo_ceiling = length_list_hypo_sorted[int(round(len(batch)*0.99))-1]
    
    max_premise = 0
    max_hypo = 0
    for datum in batch:
        if datum[2] > max_premise:
            max_premise = datum[2]
        
        if datum[3] > max_hypo:
            max_hypo = datum[3]

    for datum in batch:

        padded_vec_premise = np.pad(np.array(datum[0]), 
                                pad_width=((0,max_premise - datum[2])), 
                                mode="constant", constant_values=0)
        padded_vec_hypo = np.pad(np.array(datum[1]), 
                                pad_width=((0,max_hypo - datum[3])), 
                                mode="constant", constant_values=0)
        data_list_premise.append(padded_vec_premise)
        data_list_hypo.append(padded_vec_hypo)
        
    return [torch.from_numpy(np.array(data_list_premise)), 
            torch.from_numpy(np.array(data_list_hypo)),
            torch.LongTensor(length_list_premise), 
            torch.LongTensor(length_list_hypo),
            torch.LongTensor(label_list)]

# Dataloaders

In [11]:
train_loader = load_dataset(train_premise_indices,train_hypo_indices, train_label)
val_loader = load_dataset(val_premise_indices, val_hypo_indices, val_label)

train_dataset = load_dataset(train_premise_indices,train_hypo_indices, train_label)
train_loader = torch.utils.data.DataLoader(dataset=(train_dataset), 
                                           batch_size=BATCH_SIZE,
                                           collate_fn=collate_func,
                                           shuffle=True)

val_dataset = load_dataset(val_premise_indices,val_hypo_indices, val_label)
val_loader = torch.utils.data.DataLoader(dataset=val_dataset, 
                                           batch_size=BATCH_SIZE,
                                           collate_fn=collate_func,
                                           shuffle=True)

# RNN

Reference - Modified from Lab4 in Dpremise011 Fall 2018

In [13]:
import torch.nn as nn
class RNN(nn.Module):
    def __init__(self, weight, emb_size, hidden_size, num_layers, num_classes):
        # RNN Accepts the following hyperparams:
        # emb_size: Embedding Size
        # hidden_size: Hidden Size of layer in RNN
        # num_layers: number of layers in RNN
        # num_classes: number of output classes
        # vocab_size: vocabulary size
        super(RNN, self).__init__()

        self.num_layers, self.hidden_size = num_layers, hidden_size
                
        self.embedding = nn.Embedding.from_pretrained(torch.Tensor(weight), freeze = True).float()
        self.rnn = nn.GRU(emb_size, hidden_size,num_layers,batch_first = True, bidirectional = True)
        
        self.fc = nn.Sequential(
            nn.Linear(2*hidden_size, hidden_size), 
            nn.ReLU(inplace=True), 
            nn.Linear(hidden_size, num_classes))

        self.fc1 = nn.Linear(2*hidden_size, hidden_size)
        self.relu = nn.LeakyReLU(inplace=True)
        self.fc2 = nn.Linear(hidden_size, num_classes)
        

    def init_hidden(self, batch_size):
        # Function initializes the activation of recurrent neural net at timestep 0
        # Needs to be in format (num_layers, batch_size, hidden_size)
        hidden = torch.randn(2*self.num_layers, batch_size, self.hidden_size)

        return hidden
    
    
    def forward(self, premise, hypo, lengths_premise, lengths_hypo):
        # reset hidden state

        batch_size_premise, seq_len_premise = premise.size()
        batch_size_hypo, seq_len_hypo = hypo.size()

        self.hidden_premise = self.init_hidden(batch_size_premise).to(device)
        self.hidden_hypo = self.init_hidden(batch_size_hypo).to(device)
        
        # Compute sorted sequence lengths
        _, idx_sort_premise = torch.sort(lengths_premise, dim=0, descending=True)
        _, idx_unsort_premise = torch.sort(idx_sort_premise, dim=0)
        _, idx_sort_hypo = torch.sort(lengths_hypo, dim=0, descending=True)
        _, idx_unsort_hypo = torch.sort(idx_sort_hypo, dim=0)
        
        lengths_premise = list(lengths_premise[idx_sort_premise])
        lengths_hypo = list(lengths_hypo[idx_sort_hypo])

        rnn_input_premise = premise.index_select(0, idx_sort_premise)
        rnn_input_hypo = hypo.index_select(0, idx_sort_hypo)
        
        # get embedding of characters
        embed_premise = self.embedding(rnn_input_premise)
        embed_hypo = self.embedding(rnn_input_hypo)
        
        embed_premise = torch.nn.utils.rnn.pack_padded_sequence(embed_premise, lengths_premise, batch_first=True)
        embed_hypo = torch.nn.utils.rnn.pack_padded_sequence(embed_hypo, lengths_hypo, batch_first=True)
        
        # fprop though RNN
        rnn_out_premise, self.hidden_premise = self.rnn(embed_premise, self.hidden_premise)
        rnn_out_hypo, self.hidden_hypo = self.rnn(embed_hypo, self.hidden_hypo)
        
        hn_premise = torch.sum(self.hidden_premise, dim=0)
        hn_hypo = torch.sum(self.hidden_hypo, dim=0)
                
        output_premise = torch.index_select(hn_premise,0, idx_unsort_premise)
        output_hypo = torch.index_select(hn_hypo,0, idx_unsort_hypo)
        
        rep = output_premise*output_hypo
        res = self.fc(rep)
        
        return res

# CNN

Reference - Modified from Lab4 in Dpremise011 Fall 2018

In [14]:
class CNN(nn.Module):
    def __init__(self, weight,emb_size, hidden_size, kernel_size, num_classes):

        super(CNN, self).__init__()

        self.kernel_size, self.hidden_size = kernel_size, hidden_size
        self.embedding = nn.Embedding.from_pretrained(torch.Tensor(weight), freeze = True).float()
    
        self.conv1 = nn.Conv1d(emb_size, hidden_size, kernel_size=self.kernel_size, padding=1)
        self.conv2 = nn.Conv1d(hidden_size, hidden_size, kernel_size=self.kernel_size, padding=1)

        self.fc = nn.Sequential(
            nn.Linear(2*hidden_size, hidden_size), 
            nn.ReLU(inplace=True), 
            nn.Linear(hidden_size, num_classes))

    def forward(self, premise, hypo, lengths_premise, lengths_hypo):
        batch_size_premise, seq_len_premise = premise.size()
        batch_size_hypo, seq_len_hypo = hypo.size()
        
        embed_premise = self.embedding(premise).to(device)
        embed_hypo = self.embedding(hypo).to(device)
       
        hidden_premise = self.conv1(embed_premise.transpose(1,2)).transpose(1,2)
        hidden_hypo = self.conv1(embed_hypo.transpose(1,2)).transpose(1,2)
       
        hidden_premise = F.relu(hidden_premise.contiguous().view(-1, hidden_premise.size(-1))).view(batch_size_premise, hidden_premise.size(1), hidden_premise.size(-1))
        hidden_hypo = F.relu(hidden_hypo.contiguous().view(-1, hidden_hypo.size(-1))).view(batch_size_hypo, hidden_hypo.size(1), hidden_hypo.size(-1))
       
        hidden_premise = self.conv2(hidden_premise.transpose(1,2)).transpose(1,2)
        hidden_hypo = self.conv2(hidden_hypo.transpose(1,2)).transpose(1,2)
        
        hidden_premise = F.relu(hidden_premise.contiguous().view(-1, hidden_premise.size(-1))).view(batch_size_premise, hidden_premise.size(1), hidden_premise.size(-1))
        hidden_hypo = F.relu(hidden_hypo.contiguous().view(-1, hidden_hypo.size(-1))).view(batch_size_hypo, hidden_hypo.size(1), hidden_hypo.size(-1))
       
        hidden_premise,hidden_premise_idx = torch.max(hidden_premise, dim=1)
        
        hidden_hypo,hidden_hypo_idx = torch.max(hidden_hypo, dim=1)
        
        rep = torch.cat([hidden_premise, hidden_hypo], 1)
        output = self.fc(rep)
        
        return output

In [15]:
def test_model(loader, model):
    """
    Help function that tests the model's performance on a dataset
    @param: loader - data loader for the dataset to test against
    """
    correct = 0
    total = 0
    model.eval()
    for data_premise, data_hypo, lengths_premise,lengths_hypo, labels in loader:
        data_batch_premise, data_batch_hypo, lengths_batch_premise,lengths_batch_hypo, label_batch = data_premise.to(device), data_hypo.to(device), lengths_premise.to(device),lengths_hypo.to(device), labels.to(device)
        outputs = F.softmax(model(data_batch_premise, data_batch_hypo, lengths_batch_premise,lengths_batch_hypo), dim=1)
        predicted = outputs.max(1, keepdim=True)[1]

        total += labels.size(0)
        correct += predicted.eq(label_batch.view_as(predicted)).sum().item()
    return (100 * correct / total)

# Run RNN

Reference - Modified from Lab4 in Dpremise011 Fall 2018

In [17]:
def train_rnn(hs,model_name):
    best_acc = 0
    model = RNN(weight = loaded_embeddings_ft,
                emb_size= 300, 
                hidden_size=hs, 
                num_layers=1, 
                num_classes =3).to(device)

    learning_rate = 5e-4
    num_epochs = 7

    # Criterion and Optimizer
    criterion = torch.nn.CrossEntropyLoss()
    optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate)

    # Train the model for premise
    total_step = len(train_loader)

    val_acc_list_rnn0 = []
    train_loss_rnn0 = []
    train_acc_list_rnn0 = []

    for epoch in range(num_epochs):
        for i, (data_premise, data_hypo,lengths_premise, lengths_hypo, labels) in enumerate(train_loader):
            data_premise = data_premise.to(device)
            data_hypo = data_hypo.to(device)
            lengths_premise = lengths_premise.to(device)
            lengths_hypo = lengths_hypo.to(device)
            labels = labels.to(device)
            model.train()
            optimizer.zero_grad()

            # Forward pass
            outputs = model(data_premise, data_hypo, lengths_premise,lengths_hypo)
            loss = criterion(outputs, labels)
            train_loss_rnn0.append(loss)

            # Backward and optimize
            loss.backward()
            optimizer.step()

            # validate every 100 iterations
            if i > 0 and i % 500 == 0:
                # validate
                val_acc = test_model(val_loader, model)
                train_acc_list_rnn0.append(test_model(train_loader, model))
                val_acc_list_rnn0.append(val_acc)
                if val_acc > best_acc:
                    best_acc = val_acc
                    torch.save(model.state_dict(), model_name+'.pth')
                print('Epoch: [{}/{}], Step: [{}/{}], Train Loss: {}, Validation Acc: {}'.format(
                           epoch+1, num_epochs, i+1, len(train_loader), loss, val_acc))
    return train_acc_list_rnn0,val_acc_list_rnn0,train_loss_rnn0
        

In [18]:
train_acc_list_rnn1,val_acc_list_rnn1,train_loss_rnn1 = train_rnn(300,'rnn_hs_300')
with open('val_acc_list_rnn1', 'wb') as f:
     pickle.dump( val_acc_list_rnn1, f)
with open('train_acc_list_rnn1', 'wb') as f:
     pickle.dump(train_acc_list_rnn1, f)
with open('train_loss_rnn1', 'wb') as f:
     pickle.dump(train_loss_rnn1, f)

Epoch: [1/10], Step: [501/3125], Validation Acc: 56.2
Epoch: [1/10], Step: [1001/3125], Validation Acc: 58.5
Epoch: [1/10], Step: [1501/3125], Validation Acc: 57.9
Epoch: [1/10], Step: [2001/3125], Validation Acc: 60.0
Epoch: [1/10], Step: [2501/3125], Validation Acc: 61.6
Epoch: [1/10], Step: [3001/3125], Validation Acc: 62.3
Epoch: [2/10], Step: [501/3125], Validation Acc: 64.5
Epoch: [2/10], Step: [1001/3125], Validation Acc: 64.0
Epoch: [2/10], Step: [1501/3125], Validation Acc: 64.8
Epoch: [2/10], Step: [2001/3125], Validation Acc: 64.9


KeyboardInterrupt: 

In [42]:
train_acc_list_rnn3,val_acc_list_rnn3,train_loss_rnn3 = train_rnn(100,'rnn_hs_100')
with open('val_acc_list_rnn2', 'wb') as f:
     pickle.dump( val_acc_list_rnn3, f)
with open('train_acc_list_rnn2', 'wb') as f:
     pickle.dump(train_acc_list_rnn3, f)
with open('train_loss_rnn2', 'wb') as f:
     pickle.dump(train_loss_rnn3, f)

Epoch: [1/10], Step: [501/3125], Validation Acc: 56.8
Epoch: [1/10], Step: [1001/3125], Validation Acc: 58.1
Epoch: [1/10], Step: [1501/3125], Validation Acc: 61.1
Epoch: [1/10], Step: [2001/3125], Validation Acc: 59.2
Epoch: [1/10], Step: [2501/3125], Validation Acc: 60.4
Epoch: [1/10], Step: [3001/3125], Validation Acc: 60.2
Epoch: [2/10], Step: [501/3125], Validation Acc: 61.7
Epoch: [2/10], Step: [1001/3125], Validation Acc: 62.9
Epoch: [2/10], Step: [1501/3125], Validation Acc: 62.8
Epoch: [2/10], Step: [2001/3125], Validation Acc: 64.4
Epoch: [2/10], Step: [2501/3125], Validation Acc: 65.0
Epoch: [2/10], Step: [3001/3125], Validation Acc: 64.8
Epoch: [3/10], Step: [501/3125], Validation Acc: 66.8
Epoch: [3/10], Step: [1001/3125], Validation Acc: 66.5
Epoch: [3/10], Step: [1501/3125], Validation Acc: 64.3
Epoch: [3/10], Step: [2001/3125], Validation Acc: 66.5
Epoch: [3/10], Step: [2501/3125], Validation Acc: 67.7
Epoch: [3/10], Step: [3001/3125], Validation Acc: 65.7
Epoch: [4/10]

# Run CNN

Reference - Modified from Lab4 in Dpremise011 Fall 2018

In [36]:
def train_cnn(hs,ks,model_name):    
    best_acc = 0

    model = CNN(weight = loaded_embeddings_ft,
                emb_size = 300, 
                hidden_size=hs, 
                kernel_size=ks, 
                num_classes =3).to(device)

    learning_rate = 5e-4
    num_epochs = 7

    # Criterion and Optimizer
    criterion = torch.nn.CrossEntropyLoss()
    optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate)

    # Train the model for premise
    total_step = len(train_loader)

    val_acc_list_cnn0 = []
    train_loss_cnn0 = []
    train_acc_list_cnn0 = []

    for epoch in range(num_epochs):
        for i, (data_premise, data_hypo,lengths_premise, lengths_hypo, labels) in enumerate(train_loader):
            data_premise = data_premise.to(device)
            data_hypo = data_hypo.to(device)
            lengths_premise = lengths_premise.to(device)
            lengths_hypo = lengths_hypo.to(device)
            labels = labels.to(device)
            model.train()
            optimizer.zero_grad()

            # Forward pass
            outputs = model(data_premise, data_hypo, lengths_premise,lengths_hypo)
            loss = criterion(outputs, labels)
            train_loss_cnn0.append(loss)

            # Backward and optimize
            loss.backward()
            optimizer.step()

            # validate every 100 iterations
            if i > 0 and i % 500 == 0:
                # validate
                val_acc = test_model(val_loader, model)
                train_acc = test_model(train_loader, model)
                train_acc_list_cnn0.append(train_acc)
                val_acc_list_cnn0.append(val_acc)
                if val_acc > best_acc:
                    best_acc = val_acc
                    torch.save(model.state_dict(), model_name+'.pth''.pth')
                print('Epoch: [{}/{}], Step: [{}/{}], Train Loss: {}, Validation Acc: {}'.format(
                           epoch+1, num_epochs, i+1, len(train_loader), loss, val_acc))
    return train_acc_list_cnn0,val_acc_list_cnn0,train_loss_cnn0


In [17]:
train_acc_list_cnn2,val_acc_list_cnn2,train_loss_cnn2 = train_cnn(400,3,'cnn_hs_400_ks_3')
with open('val_acc_list_cnn1', 'wb') as f:
     pickle.dump( val_acc_list_cnn2, f)
with open('train_acc_list_cnn1', 'wb') as f:
     pickle.dump(train_acc_list_cnn2, f)
with open('train_loss_cnn1', 'wb') as f:
     pickle.dump(train_loss_cnn2, f)

Epoch: [1/10], Step: [501/3125], Train Loss: 0.9574393033981323, Validation Acc: 42.1
Epoch: [1/10], Step: [1001/3125], Train Loss: 0.9434483051300049, Validation Acc: 58.0
Epoch: [1/10], Step: [1501/3125], Train Loss: 0.8900450468063354, Validation Acc: 58.2
Epoch: [1/10], Step: [2001/3125], Train Loss: 0.8556941747665405, Validation Acc: 59.2
Epoch: [1/10], Step: [2501/3125], Train Loss: 0.7658823728561401, Validation Acc: 60.7
Epoch: [1/10], Step: [3001/3125], Train Loss: 0.7631287574768066, Validation Acc: 60.9
Epoch: [2/10], Step: [501/3125], Train Loss: 0.7859179973602295, Validation Acc: 61.4
Epoch: [2/10], Step: [1001/3125], Train Loss: 0.9447571635246277, Validation Acc: 61.2
Epoch: [2/10], Step: [1501/3125], Train Loss: 0.9533923268318176, Validation Acc: 61.5
Epoch: [2/10], Step: [2001/3125], Train Loss: 0.8006707429885864, Validation Acc: 64.0
Epoch: [2/10], Step: [2501/3125], Train Loss: 0.6546100974082947, Validation Acc: 63.1
Epoch: [2/10], Step: [3001/3125], Train Loss:

In [18]:
train_acc_list_cnn3,val_acc_list_cnn3,train_loss_cnn3 = train_cnn(100,2,'cnn_hs_100_ks_2')
with open('val_acc_list_cnn2', 'wb') as f:
     pickle.dump( val_acc_list_cnn3, f)
with open('train_acc_list_cnn2', 'wb') as f:
     pickle.dump(train_acc_list_cnn3, f)
with open('train_loss_cnn2', 'wb') as f:
     pickle.dump(train_loss_cnn3, f)

Epoch: [1/10], Step: [501/3125], Train Loss: 1.0572879314422607, Validation Acc: 42.2
Epoch: [1/10], Step: [1001/3125], Train Loss: 0.9764376878738403, Validation Acc: 55.4
Epoch: [1/10], Step: [1501/3125], Train Loss: 0.8318452835083008, Validation Acc: 56.9
Epoch: [1/10], Step: [2001/3125], Train Loss: 0.7551078200340271, Validation Acc: 58.2
Epoch: [1/10], Step: [2501/3125], Train Loss: 0.8722537755966187, Validation Acc: 58.9
Epoch: [1/10], Step: [3001/3125], Train Loss: 1.0347135066986084, Validation Acc: 60.2
Epoch: [2/10], Step: [501/3125], Train Loss: 0.834957480430603, Validation Acc: 60.4
Epoch: [2/10], Step: [1001/3125], Train Loss: 0.9010812044143677, Validation Acc: 60.0
Epoch: [2/10], Step: [1501/3125], Train Loss: 0.8308014869689941, Validation Acc: 61.1
Epoch: [2/10], Step: [2001/3125], Train Loss: 0.7764003276824951, Validation Acc: 59.9
Epoch: [2/10], Step: [2501/3125], Train Loss: 0.8452682495117188, Validation Acc: 60.5
Epoch: [2/10], Step: [3001/3125], Train Loss: 

# Plots

In [128]:
import matplotlib.pyplot as plt

In [97]:
data1 = pickle.load(open( str("val_acc_list_cnn1"), "rb" ))
data2 = pickle.load(open( str("train_acc_list_cnn1"), "rb" ))
data3 = pickle.load(open( str("train_loss_cnn1"), "rb" ))
data4 = pickle.load(open( str("val_acc_list_cnn2"), "rb" ))
data5 = pickle.load(open( str("train_acc_list_cnn2"), "rb" ))
data6 = pickle.load(open( str("train_loss_cnn2"), "rb" ))

data7 = pickle.load(open( str("val_acc_list_rnn1"), "rb" ))
data8 = pickle.load(open( str("train_acc_list_rnn1"), "rb" ))
data9 = pickle.load(open( str("train_loss_rnn1"), "rb" ))
data10 = pickle.load(open( str("val_acc_list_rnn2"), "rb" ))
data11 = pickle.load(open( str("train_acc_list_rnn2"), "rb" ))
data12 = pickle.load(open( str("train_loss_rnn2"), "rb" ))

In [None]:
plt.plot(range(42), data1, label = 'val_acc_list_cnn1')
plt.plot(range(42), data2, label = 'train_acc_list_cnn1')
plt.plot(range(42), data3, label = 'train_loss_cnn1')
plt.plot(range(42), data4, label = 'val_acc_list_cnn2')
plt.plot(range(42), data5, label = 'train_acc_list_cnn2')
plt.plot(range(42), data6, label = 'train_loss_cnn2')
plt.xlabel('Step*100')
plt.legend(loc='center left', bbox_to_anchor=(1, 0.5))

In [None]:
plt.plot(range(42), data1, label = 'val_acc_list_rnn1')
plt.plot(range(42), data2, label = 'train_acc_list_rnn1')
plt.plot(range(42), data3, label = 'train_loss_rnn1')
plt.plot(range(42), data4, label = 'val_acc_list_rnn2')
plt.plot(range(42), data5, label = 'train_acc_list_rnn2')
plt.plot(range(42), data6, label = 'train_loss_rnn2')
plt.xlabel('Step*100')
plt.legend(loc='center left', bbox_to_anchor=(1, 0.5))

# Load MultiNLI

In [80]:
def load_mnli(filename):
    train_premise = []
    train_hypo = []
    train_genre = []
    train_label = []
    with open(filename) as tsvfile:
      fd = csv.reader(tsvfile, delimiter='\t')
      for row in fd:
            
        train_premise.append(row[0].split())
        train_hypo.append(row[1].split())
        
        if row[2] == 'neutral': 
            train_label.append(1)
        elif row[2] == 'entailment':
            train_label.append(2)
        else:
            train_label.append(0)
        
        train_genre.append(row[3])
        
    train_premise = train_premise[1:]
    train_hypo = train_hypo[1:]
    
    train_label = train_label[1:]
    train_genre = train_genre[1:]
    
    return train_premise,train_hypo, train_label, train_genre

In [81]:
test_premise,test_hypo,test_label,test_genre = load_mnli('mnli_val.tsv')

In [None]:
genre_list = list(pd.Series(test_genre).unique())

In [83]:
def load_mnli_by_genre(filename,genre):
    
    premise = []
    hypo = []
    label = []
    with open(filename) as tsvfile:
      fd = csv.reader(tsvfile, delimiter='\t')
      for row in fd:
        
        if row[3] == genre:
            if len(row[0].split())<1000 and len(row[1].split())<1000:
                premise.append(row[0].split())
                hypo.append(row[1].split())

                if row[2] == 'neutral': 
                    label.append(1)
                elif row[2] == 'entailment':
                    label.append(2)
                else:
                    label.append(0)

    return premise,hypo,label

In [None]:
device = torch.device("cpu")
RNN_acc = []
CNN_acc = []
for genre in genre_list:
    test_premise,test_hypo,test_label = load_mnli_by_genre('mnli_val.tsv',genre)
    test_premise_indices = token2index_dataset(test_premise)
    test_hypo_indices = token2index_dataset(test_hypo)
    test_dataset = load_dataset(test_premise_indices,test_hypo_indices, test_label)
    test_loader = torch.utils.data.DataLoader(dataset=test_dataset, 
                                               batch_size=32,
                                               collate_fn=collate_func,
                                               shuffle=True)  

    model_rnn = RNN(weight = loaded_embeddings_ft,
                    emb_size= 300, 
                    hidden_size=400, 
                    num_layers=1, 
                    num_classes =3).to(device)
    model_rnn.load_state_dict(torch.load("rnn_hs_300.pth",map_location = "cpu"))
    model_rnn.eval()

    model_cnn = CNN(weight = loaded_embeddings_ft,
                    emb_size= 300, 
                    hidden_size=400, 
                    kernel_size=2, 
                    num_classes =3
                    ).to(device)
    model_cnn.load_state_dict(torch.load("cnn_hs_400_ks_3.pth", map_location = "cpu"))
    model_cnn.eval()
    
    rnn_test = test_model(test_loader, model_rnn)
    cnn_test = test_model(test_loader, model_cnn)
    RNN_acc.append(rnn_test)
    CNN_acc.append(cnn_test)
    print('RNN with',genre,': ', round(rnn_test, 2))
    print('CNN with',genre,': ', round(cnn_test, 2))

