In [1]:
import csv
import pandas as pd
import numpy as np
import torch.nn as nn
import torch.nn.functional as F
from torch.autograd import Variable
from collections import Counter
import io
import numpy as np
import torch
from torch.utils.data import Dataset
import pickle as pkl
import matplotlib.pyplot as plt
%matplotlib inline

BATCH_SIZE = 32

In [2]:
torch.cuda.is_available()

True

In [3]:
if torch.cuda.is_available():
    cuda = torch.device('cuda')
else:
    cuda = torch.device('cpu')

### Process data

In [4]:
mtrain = pd.read_csv('hw2_data/mnli_train.tsv', delimiter='\t')
mval = pd.read_csv('hw2_data/mnli_val.tsv', delimiter='\t')

In [5]:
mtrain.head()

Unnamed: 0,sentence1,sentence2,label,genre
0,and now that was in fifty one that 's forty ye...,It was already a problem forty years ago but n...,neutral,telephone
1,Jon could smell baked bread on the air and his...,Jon smelt food in the air and was hungry .,neutral,fiction
2,it will be like Italian basketball with the uh...,This type of Italian basketball is nothing lik...,contradiction,telephone
3,well i think that 's about uh that 's about co...,Sorry but we are not done just yet .,contradiction,telephone
4,"Good job tenure , that is -- because in yet an...","Dr. Quinn , Medicine Woman , was worked on by ...",entailment,slate


In [6]:
np.unique(mtrain['genre'])

array(['fiction', 'government', 'slate', 'telephone', 'travel'],
      dtype=object)

In [7]:
mval.groupby(['genre']).count()

Unnamed: 0_level_0,sentence1,sentence2,label
genre,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
fiction,995,995,995
government,1016,1016,1016
slate,1002,1002,1002
telephone,1005,1005,1005
travel,982,982,982


In [8]:
train_s1, train_s2, train_label, train_genre = np.array(mtrain['sentence1']), np.array(mtrain['sentence2']), np.array(mtrain['label']), np.array(mtrain['genre'])
val_s1, val_s2, val_label, val_genre = np.array(mval['sentence1']), np.array(mval['sentence2']), np.array(mval['label']), np.array(mval['genre'])


In [9]:
label_map = {"neutral":0, "entailment":1, 'contradiction':2}
train_nlabel = []
val_nlabel = []

for l in train_label:
    train_nlabel.append(label_map[l])
    
for v in val_label:
    val_nlabel.append(label_map[v])

In [10]:
def tokenize_ngram_dataset(dataset):
    token_dataset = []
    # we are keeping track of all tokens in dataset 
    # in order to create vocabulary later
    all_tokens = []
    
    for sample in dataset:
        ngram = sample.split()
        tokens = []
        for gram in ngram:
            tokens.append(gram)
        
        token_dataset.append(tokens)
        all_tokens += tokens

    return token_dataset, all_tokens

train_data_s1_token, all_train_tokens_s1 = tokenize_ngram_dataset(train_s1)
train_data_s2_token, all_train_tokens_s2 = tokenize_ngram_dataset(train_s2)
all_train_tokens = all_train_tokens_s1 + all_train_tokens_s2
val_data_s1_token, _ = tokenize_ngram_dataset(val_s1)
val_data_s2_token, _ = tokenize_ngram_dataset(val_s2)

In [11]:
max_vocab_size = 10000
# save index 0 for unk and 1 for pad
PAD_IDX = 0
UNK_IDX = 1

def build_vocab(all_tokens):
    # Returns:
    # id2token: list of tokens, where id2token[i] returns token that corresponds to token i
    # token2id: dictionary where keys represent tokens and corresponding values represent indices
    token_counter = Counter(all_tokens)
    vocab, count = zip(*token_counter.most_common(max_vocab_size))
    id2token = list(vocab)
    token2id = dict(zip(vocab, range(2,2+len(vocab)))) 
    id2token = ['<pad>', '<unk>'] + id2token
    token2id['<pad>'] = PAD_IDX 
    token2id['<unk>'] = UNK_IDX
    return token2id, id2token

token2id, id2token = build_vocab(all_train_tokens)
token_id = {}
for i, word in enumerate(id2token):
    token_id[word] = i

In [12]:
len(token_id)

10002

In [13]:
def token2index_dataset(tokens_data):
    indices_data = []
    for tokens in tokens_data:
        index_list = [token2id[token] if token in token2id else UNK_IDX for token in tokens]
        indices_data.append(index_list)
    return indices_data

train_s1_indices = token2index_dataset(train_data_s1_token)
train_s2_indices = token2index_dataset(train_data_s2_token)
val_s1_indices = token2index_dataset(val_data_s1_token)
val_s2_indices = token2index_dataset(val_data_s2_token)

#### Data Loader

In [14]:
MAX_SENTENCE_LENGTH = 200

class SenDataLoader(Dataset):
    """
    Class that represents a train/validation/test dataset that's readable for PyTorch
    Note that this class inherits torch.utils.data.Dataset
    """
    
    def __init__(self, data1, data2, target_list):
        """
        @param data_list: list of newsgroup tokens 
        @param target_list: list of newsgroup targets 

        """
        self.data1 = data1
        self.data2 = data2
        self.target_list = target_list
        assert (len(self.data1) == len(self.target_list))
        assert (len(self.data2) == len(self.target_list))

    def __len__(self):
        return len(self.data1)
        
    def __getitem__(self, key):
        """
        Triggered when you call dataset[i]
        """
        
        s1_idx = self.data1[key][:MAX_SENTENCE_LENGTH]
        s2_idx = self.data2[key][:MAX_SENTENCE_LENGTH]
        
        label = self.target_list[key]
        
        return [s1_idx, s2_idx, len(s1_idx), len(s2_idx), label]

def sen_collate_func(batch):
    """
    Customized function for DataLoader that dynamically pads the batch so that all 
    data have the same length
    """
    data1 = []
    data2 = []
    s1_length = []
    s2_length = []
    label_list = []
    
    for datum in batch:
        s1_length.append(datum[2])
        s2_length.append(datum[3])
        label_list.append(datum[4])
    # padding
    for datum in batch:
        padded_vec_s1 = np.pad(np.array(datum[0]), 
                                pad_width=((0,MAX_SENTENCE_LENGTH-datum[2])), 
                                mode="constant", constant_values=0)
        padded_vec_s2 = np.pad(np.array(datum[1]), 
                                pad_width=((0,MAX_SENTENCE_LENGTH-datum[3])), 
                                mode="constant", constant_values=0)
        data1.append(padded_vec_s1)
        data2.append(padded_vec_s2)
    
    return [torch.from_numpy(np.array(data1)), torch.from_numpy(np.array(data2)), torch.LongTensor(s1_length), torch.LongTensor(s2_length), torch.LongTensor(label_list)]


In [15]:
train_fiction_index = [i for i, x in enumerate(list(train_genre)) if x == "fiction"]
train_government_index = [i for i, x in enumerate(list(train_genre)) if x == "government"]
train_slate_index = [i for i, x in enumerate(list(train_genre)) if x == "slate"]
train_telephone_index = [i for i, x in enumerate(list(train_genre)) if x == "telephone"]
train_travel_index = [i for i, x in enumerate(list(train_genre)) if x == "travel"]
val_fiction_index = [i for i, x in enumerate(list(val_genre)) if x == "fiction"]
val_government_index = [i for i, x in enumerate(list(val_genre)) if x == "government"]
val_slate_index = [i for i, x in enumerate(list(val_genre)) if x == "slate"]
val_telephone_index = [i for i, x in enumerate(list(val_genre)) if x == "telephone"]
val_travel_index = [i for i, x in enumerate(list(val_genre)) if x == "travel"]

In [16]:
def indices_index(indices1, indices2, index):
    new_indices1 = []
    new_indices2 = []
    for i in index:
        new_indices1.append(indices1[i])
        new_indices2.append(indices2[i])
    return new_indices1, new_indices2

In [17]:
train_fiction_s1_indices, train_fiction_s2_indices = indices_index(train_s1_indices, train_s2_indices, train_fiction_index)
train_gov_s1_indices, train_gov_s2_indices = indices_index(train_s1_indices, train_s2_indices, train_government_index)
train_slate_s1_indices, train_slate_s2_indices = indices_index(train_s1_indices, train_s2_indices, train_slate_index)
train_tel_s1_indices, train_tel_s2_indices = indices_index(train_s1_indices, train_s2_indices, train_telephone_index)
train_travel_s1_indices, train_travel_s2_indices = indices_index(train_s1_indices, train_s2_indices, train_travel_index)


In [18]:
val_fiction_s1_indices, val_fiction_s2_indices = indices_index(val_s1_indices, val_s2_indices, val_fiction_index)
val_gov_s1_indices, val_gov_s2_indices = indices_index(val_s1_indices, val_s2_indices, val_government_index)
val_slate_s1_indices, val_slate_s2_indices = indices_index(val_s1_indices, val_s2_indices, val_slate_index)
val_tel_s1_indices, val_tel_s2_indices = indices_index(val_s1_indices, val_s2_indices, val_telephone_index)
val_travel_s1_indices, val_travel_s2_indices = indices_index(val_s1_indices, val_s2_indices, val_travel_index)


In [19]:
train_fiction_label = [train_nlabel[i] for i in train_fiction_index]
train_gov_label = [train_nlabel[i] for i in train_government_index]
train_slate_label = [train_nlabel[i] for i in train_slate_index]
train_tel_label = [train_nlabel[i] for i in train_telephone_index]
train_travel_label = [train_nlabel[i] for i in train_travel_index]

In [20]:
val_fiction_label = [val_nlabel[i] for i in val_fiction_index]
val_gov_label = [val_nlabel[i] for i in val_government_index]
val_slate_label = [val_nlabel[i] for i in val_slate_index]
val_tel_label = [val_nlabel[i] for i in val_telephone_index]
val_travel_label = [val_nlabel[i] for i in val_travel_index]

In [21]:
train_fiction = SenDataLoader(train_fiction_s1_indices, train_fiction_s2_indices, train_fiction_label)
train_loader_fiction = torch.utils.data.DataLoader(dataset=train_fiction, 
                                           batch_size=BATCH_SIZE,
                                           collate_fn=sen_collate_func,
                                           shuffle=False)

train_gov = SenDataLoader(train_gov_s1_indices, train_gov_s2_indices, train_gov_label)
train_loader_gov = torch.utils.data.DataLoader(dataset=train_gov, 
                                           batch_size=BATCH_SIZE,
                                           collate_fn=sen_collate_func,
                                           shuffle=False)

train_slate = SenDataLoader(train_slate_s1_indices, train_slate_s2_indices, train_slate_label)
train_loader_slate = torch.utils.data.DataLoader(dataset=train_slate, 
                                           batch_size=BATCH_SIZE,
                                           collate_fn=sen_collate_func,
                                           shuffle=False)

train_tel = SenDataLoader(train_tel_s1_indices, train_tel_s2_indices, train_tel_label)
train_loader_tel = torch.utils.data.DataLoader(dataset=train_tel, 
                                           batch_size=BATCH_SIZE,
                                           collate_fn=sen_collate_func,
                                           shuffle=False)

train_travel = SenDataLoader(train_travel_s1_indices, train_travel_s2_indices, train_travel_label)
train_loader_travel = torch.utils.data.DataLoader(dataset=train_travel, 
                                           batch_size=BATCH_SIZE,
                                           collate_fn=sen_collate_func,
                                           shuffle=False)

val_fiction = SenDataLoader(val_fiction_s1_indices, val_fiction_s2_indices, val_fiction_label)
val_loader_fiction = torch.utils.data.DataLoader(dataset=val_fiction, 
                                           batch_size=BATCH_SIZE,
                                           collate_fn=sen_collate_func,
                                           shuffle=False)

val_gov = SenDataLoader(val_gov_s1_indices, val_gov_s2_indices, val_gov_label)
val_loader_gov = torch.utils.data.DataLoader(dataset=val_gov, 
                                           batch_size=BATCH_SIZE,
                                           collate_fn=sen_collate_func,
                                           shuffle=False)

val_slate = SenDataLoader(val_slate_s1_indices, val_slate_s2_indices, val_slate_label)
val_loader_slate = torch.utils.data.DataLoader(dataset=val_slate, 
                                           batch_size=BATCH_SIZE,
                                           collate_fn=sen_collate_func,
                                           shuffle=False)

val_tel = SenDataLoader(val_tel_s1_indices, val_tel_s2_indices, val_tel_label)
val_loader_tel = torch.utils.data.DataLoader(dataset=val_tel, 
                                           batch_size=BATCH_SIZE,
                                           collate_fn=sen_collate_func,
                                           shuffle=False)

val_travel = SenDataLoader(val_travel_s1_indices, val_travel_s2_indices, val_travel_label)
val_loader_travel = torch.utils.data.DataLoader(dataset=val_travel, 
                                           batch_size=BATCH_SIZE,
                                           collate_fn=sen_collate_func,
                                           shuffle=False)

In [22]:
for i, (s1, s2, length1, length2, label) in enumerate(val_loader_fiction):
    print(s1.shape)
    print(length1)
    print(length2)
    print(label)
    break

torch.Size([32, 200])
tensor([11, 14,  5,  5, 20, 23, 18,  9,  6, 10,  4, 12,  9,  4, 20,  4, 11, 18,
         9,  6, 15, 28, 19, 10,  7,  3,  9, 10,  6, 21, 18, 11])
tensor([ 9, 13,  8,  5, 12,  5, 17, 14,  9, 10,  3, 10,  8,  6, 10, 13,  9, 13,
        10,  7,  7, 12, 12,  8,  6,  4,  7,  9,  9, 10, 14, 16])
tensor([1, 2, 0, 2, 0, 0, 2, 2, 1, 1, 1, 1, 0, 2, 0, 0, 2, 0, 2, 1, 0, 0, 0, 2,
        1, 2, 2, 0, 1, 1, 2, 1])


#### Embedding

In [23]:
def load_vectors(fname):
    fin = io.open(fname, 'r', encoding='utf-8', newline='\n', errors='ignore')
    n, d = map(int, fin.readline().split())
    data = {}
    for line in fin:
        tokens = line.rstrip().split(' ')
        data[tokens[0]] = np.asarray(tokens[1:]).astype(float)
    return data

def load_weight_matrix(vocab, pretrained_emb):
    matrix_len = len(vocab)
    weights_matrix = np.zeros((matrix_len, 300))
    words_found = 0
    for i, word in enumerate(vocab):
        try: 
            weights_matrix[i] = pretrained_emb[word]
            words_found += 1
            
        except KeyError:
            weights_matrix[i] = np.random.rand(300, ) - 0.5
    
    return weights_matrix

pretrained_emb = load_vectors('wiki-news-300d-1M.vec')
weights_matrix = load_weight_matrix(token_id, pretrained_emb)
weightsmatrix = Variable(torch.Tensor(weights_matrix), requires_grad=False)

### Accuracy on validation set

#### Best CNN

In [24]:
def test_model(loader, model):
    """
    Help function that tests the model's performance on a dataset
    @param: loader - data loader for the dataset to test against
    """
    correct = 0
    total = 0
    model.eval()
    for s1, s2, length1, length2, labels in loader:
        outputs = model(s1.to(device=cuda).long(), s2.to(device=cuda).long(), length1.to(device=cuda).long())
        outputs = F.softmax(outputs, dim=1)
        predicted = outputs.max(1, keepdim=True)[1]

        total += labels.to(device=cuda).size(0)
        correct += predicted.eq(labels.to(device=cuda).view_as(predicted)).sum().item()
    return (100 * correct / total)

In [25]:
class CNN(nn.Module):
    def __init__(self, emb_size, hidden_size, num_layers, fc_classes, num_classes, vocab_size, weights):

        super(CNN, self).__init__()

        self.num_layers, self.hidden_size = num_layers, hidden_size
        self.weights = Variable(torch.Tensor(weights), requires_grad=False)
        self.fc_classes = fc_classes
        self.num_classes = num_classes
        
        # s1
        self.embedding1 = nn.Embedding(vocab_size, emb_size, padding_idx=PAD_IDX, _weight=weights)
    
        self.conv1 = nn.Conv1d(emb_size, hidden_size, kernel_size=5, padding=2)
        self.conv2 = nn.Conv1d(hidden_size, hidden_size, kernel_size=5, padding=2)
        self.pool1 = nn.MaxPool1d(kernel_size=2, stride=2, padding=0)
        
        # s2
        self.embedding2 = nn.Embedding(vocab_size, emb_size, padding_idx=PAD_IDX, _weight=weights)
    
        self.conv3 = nn.Conv1d(emb_size, hidden_size, kernel_size=5, padding=2)
        self.conv4 = nn.Conv1d(hidden_size, hidden_size, kernel_size=5, padding=2)
        self.pool2 = nn.MaxPool1d(kernel_size=2, stride=2, padding=0)

        self.linear1 = nn.Linear(hidden_size, fc_classes)
        self.linear2 = nn.Linear(fc_classes, num_classes)

    def forward(self, s1, s2, length1):
        batch_size, seq_len = s1.size()

        # CNN for s1
        embed1 = self.embedding1(s1)
        hidden1 = self.conv1(embed1.transpose(1,2)).transpose(1,2)
        hidden1 = F.relu(hidden1.contiguous().view(-1, hidden1.size(-1))).view(batch_size, seq_len, hidden1.size(-1))
        hidden1 = self.conv2(hidden1.transpose(1,2)).transpose(1,2)
        hidden1 = F.relu(hidden1.contiguous().view(-1, hidden1.size(-1))).view(batch_size, seq_len, hidden1.size(-1))
        hidden1 = self.pool1(hidden1.transpose(1,2)).transpose(1,2)
        
        # CNN for s2
        embed2 = self.embedding2(s2)
        hidden2 = self.conv3(embed2.transpose(1,2)).transpose(1,2)
        hidden2 = F.relu(hidden2.contiguous().view(-1, hidden2.size(-1))).view(batch_size, seq_len, hidden2.size(-1))
        hidden2 = self.conv4(hidden2.transpose(1,2)).transpose(1,2)
        hidden2 = F.relu(hidden2.contiguous().view(-1, hidden2.size(-1))).view(batch_size, seq_len, hidden2.size(-1))
        hidden2 = self.pool2(hidden2.transpose(1,2)).transpose(1,2)
        
        # concat
        hidden = torch.cat((hidden1, hidden2), 1)
        
        # fully connected layer
        hidden = torch.sum(hidden, dim=1)
        
        fc1 = F.leaky_relu(self.linear1(hidden))
        fc2 = self.linear2(fc1)
        
        return fc2

In [26]:
cnn_model = pkl.load(open("cnn_kernel5.p", "rb"))
torch.save(cnn_model.state_dict(),"cnnmodel")
best_cnn = CNN(emb_size=300, hidden_size=128, num_layers=2, fc_classes=48, num_classes=3, vocab_size=len(id2token), weights=weightsmatrix)
best_cnn.load_state_dict(torch.load('cnnmodel'))

In [27]:
best_cnn.eval()

CNN(
  (embedding1): Embedding(10002, 300, padding_idx=0)
  (conv1): Conv1d(300, 128, kernel_size=(5,), stride=(1,), padding=(2,))
  (conv2): Conv1d(128, 128, kernel_size=(5,), stride=(1,), padding=(2,))
  (pool1): MaxPool1d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
  (embedding2): Embedding(10002, 300, padding_idx=0)
  (conv3): Conv1d(300, 128, kernel_size=(5,), stride=(1,), padding=(2,))
  (conv4): Conv1d(128, 128, kernel_size=(5,), stride=(1,), padding=(2,))
  (pool2): MaxPool1d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
  (linear1): Linear(in_features=128, out_features=48, bias=True)
  (linear2): Linear(in_features=48, out_features=3, bias=True)
)

In [35]:
best_cnn.to(device=cuda)

CNN(
  (embedding1): Embedding(10002, 300, padding_idx=0)
  (conv1): Conv1d(300, 128, kernel_size=(5,), stride=(1,), padding=(2,))
  (conv2): Conv1d(128, 128, kernel_size=(5,), stride=(1,), padding=(2,))
  (pool1): MaxPool1d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
  (embedding2): Embedding(10002, 300, padding_idx=0)
  (conv3): Conv1d(300, 128, kernel_size=(5,), stride=(1,), padding=(2,))
  (conv4): Conv1d(128, 128, kernel_size=(5,), stride=(1,), padding=(2,))
  (pool2): MaxPool1d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
  (linear1): Linear(in_features=128, out_features=48, bias=True)
  (linear2): Linear(in_features=48, out_features=3, bias=True)
)

In [28]:
genre = ['fiction', 'government', 'slate', 'telephone', 'travel']

In [39]:
correct = 0
total = 0
for i, (s1, s2, length1, length2, label) in enumerate(val_loader_fiction):
    outputs = best_cnn(s1.to(device=cuda).long(), s2.to(device=cuda).long(), length1.to(device=cuda).long())
    outputs = F.softmax(outputs, dim=1)
    predicted = outputs.max(1, keepdim=True)[1]
    correct = predicted.eq(label.to(device=cuda).view_as(predicted))
    total += label.to(device=cuda).size(0)
    correct += predicted.eq(label.to(device=cuda).view_as(predicted)).sum().item()

In [40]:
cnn_acc1 = test_model(val_loader_fiction, best_cnn)
cnn_acc2 = test_model(val_loader_gov, best_cnn)
cnn_acc3 = test_model(val_loader_slate, best_cnn)
cnn_acc4 = test_model(val_loader_tel, best_cnn)
cnn_acc5 = test_model(val_loader_travel, best_cnn)

In [41]:
cnn_acc = [cnn_acc1, cnn_acc2, cnn_acc3, cnn_acc4, cnn_acc5]

In [42]:
cnn_mnli = pd.DataFrame(list(zip(genre, cnn_acc)), columns=['genre','CNN Acc'])
cnn_mnli

Unnamed: 0,genre,CNN Acc
0,fiction,35.879397
1,government,31.496063
2,slate,32.335329
3,telephone,35.024876
4,travel,34.01222


#### Best RNN

In [43]:
class GRU(nn.Module):
    def __init__(self, emb_size, hidden_size, num_layers, fc_classes, num_classes, vocab_size, weights, dropout):
        # RNN Accepts the following hyperparams:
        # emb_size: Embedding Size
        # hidden_size: Hidden Size of layer in RNN
        # num_layers: number of layers in RNN
        # num_classes: number of output classes
        # vocab_size: vocabulary size
        super(GRU, self).__init__()

        self.num_layers, self.hidden_size = num_layers, hidden_size
        self.weights = Variable(torch.Tensor(weights), requires_grad=False)
        self.fc_classes = fc_classes
        self.num_classes = num_classes
        self.dropout = dropout
        # s1
        self.embedding_s1 = nn.Embedding(vocab_size, emb_size, padding_idx=PAD_IDX, _weight=weights)
        self.rnn_s1 = nn.GRU(emb_size, hidden_size, num_layers, batch_first=True, bidirectional=True)
        #s2
        self.embedding_s2 = nn.Embedding(vocab_size, emb_size, padding_idx=PAD_IDX, _weight=weights)
        self.rnn_s2 = nn.GRU(emb_size, hidden_size, num_layers, batch_first=True, bidirectional=True)
        # fully connected
        self.linear1 = nn.Linear(hidden_size * 2, fc_classes)
        self.linear2 = nn.Linear(fc_classes, num_classes)

    def init_hidden(self, batch_size):
        # Function initializes the activation of recurrent neural net at timestep 0
        # Needs to be in format (num_layers, batch_size, hidden_size)
        hidden_s1 = torch.randn(self.num_layers * 2, batch_size, self.hidden_size)
        hidden_s2 = torch.randn(self.num_layers * 2, batch_size, self.hidden_size)

        return hidden_s1.to(device=cuda), hidden_s2.to(device=cuda)

    def forward(self, s1, s2, length1):
        batch_size, seq_len = s1.size()

        self.hidden_s1, self.hidden_s2 = self.init_hidden(batch_size)

        # get embedding of characters
        embed1 = self.embedding_s1(s1)
        embed2 = self.embedding_s2(s2)
        embed1.to(device=cuda)
        embed2.to(device=cuda)
        # s1
        rnn_out_s1, hidden1 = self.rnn_s1(embed1, self.hidden_s1)
        # s2
        rnn_out_s2, hidden2 = self.rnn_s2(embed2, self.hidden_s2)
        
        
        # concat
        hidden = torch.cat((hidden1[1,:,:], hidden2[1,:,:]), 1)

        fc1 = F.leaky_relu(self.linear1(hidden))
        fc2 = self.linear2(fc1)
        
        return fc2


In [44]:
weightsmatrix = Variable(torch.Tensor(weights_matrix), requires_grad=False)
best_rnn = GRU(emb_size=300, hidden_size=128, num_layers=1, fc_classes=48, num_classes=3, vocab_size=len(id2token), weights=weightsmatrix, dropout=0.1)
best_rnn.load_state_dict(torch.load('rnn_dropout'))
best_rnn.eval()
best_rnn.to(device=cuda)

GRU(
  (embedding_s1): Embedding(10002, 300, padding_idx=0)
  (rnn_s1): GRU(300, 128, batch_first=True, bidirectional=True)
  (embedding_s2): Embedding(10002, 300, padding_idx=0)
  (rnn_s2): GRU(300, 128, batch_first=True, bidirectional=True)
  (linear1): Linear(in_features=256, out_features=48, bias=True)
  (linear2): Linear(in_features=48, out_features=3, bias=True)
)

In [45]:
rnn_acc1 = test_model(val_loader_fiction, best_rnn)
rnn_acc2 = test_model(val_loader_gov, best_rnn)
rnn_acc3 = test_model(val_loader_slate, best_rnn)
rnn_acc4 = test_model(val_loader_tel, best_rnn)
rnn_acc5 = test_model(val_loader_travel, best_rnn)
rnn_acc = [rnn_acc1, rnn_acc2, rnn_acc3, rnn_acc4, rnn_acc5]
rnn_mnli = pd.DataFrame(list(zip(genre, rnn_acc)), columns=['genre','RNN Acc'])
rnn_mnli

Unnamed: 0,genre,RNN Acc
0,fiction,37.386935
1,government,33.858268
2,slate,34.231537
3,telephone,37.114428
4,travel,36.150713
