In [1]:
import pandas as pd
import pickle as pk
import torch

In [24]:
snli_train = pd.read_csv('hw2_data/snli_train.tsv', sep = '\t')
snli_val = pd.read_csv('hw2_data/snli_val.tsv', sep = '\t')

In [25]:
label_dct = {'neutral':0, 'entailment':1, 'contradiction':2}

In [136]:
#load fast-text embedding
ft_home = './'
words_to_load = 50000

import numpy as np

with open(ft_home + 'wiki-news-300d-1M.vec') as f:
    loaded_embeddings_ft = np.zeros((words_to_load+2, 300))
    words_ft = {}
    idx2words_ft = {}
    ordered_words_ft = []
    for i, line in enumerate(f):
        if i >= words_to_load: 
            break
        s = line.split()
        loaded_embeddings_ft[i+2, :] = np.asarray(s[1:])
        words_ft[s[0]] = i+2
        idx2words_ft[i+2] = s[0]
        ordered_words_ft.append(s[0])

In [137]:
#add padding and unknow token
words_ft['<PAD>'] = 0
words_ft['<UNK>'] = 1
idx2words_ft[0] = '<PAD>'
idx2words_ft[1] = '<UNK>'
#init padding and unknow embedding to gaussian random numbers
loaded_embeddings_ft[0,:] = np.zeros(loaded_embeddings_ft.shape[1])
loaded_embeddings_ft[1,:] = np.random.normal(size = (loaded_embeddings_ft.shape[1],))

In [6]:
def preprocess(dataset, label_dct, word2id, id2word):
    dataset['sentence1'] = dataset['sentence1'].apply(lambda x: x.split()) 
    dataset['sentence2'] = dataset['sentence2'].apply(lambda x: x.split()) 
    dataset['label'] = dataset['label'].apply(lambda x: label_dct[x])
    
    def token2vocab(row):
        for i in range(len(row)):
            if row[i] in word2id.keys(): 
                row[i] = word2id[row[i]]
            else: row[i] = word2id['<UNK>']
        return row

    dataset['sentence1'] = dataset['sentence1'].apply(lambda x: token2vocab(x))
    dataset['sentence2'] = dataset['sentence2'].apply(lambda x: token2vocab(x))
    return dataset

In [4]:
snli_train_id = preprocess(snli_train, label_dct, words_ft, idx2words_ft)
snli_val_id = preprocess(snli_val, label_dct, words_ft, idx2words_ft)

In [77]:
# pk.dump(snli_train_id, open("./hw2_data/snli_train_id.pk", "wb"))
# pk.dump(snli_val_id, open("./hw2_data/snli_val_id.pk", "wb"))

In [2]:
snli_train_id = pk.load(open("./hw2_data/snli_train_id.pk", "rb"))
snli_val_id = pk.load(open("./hw2_data/snli_val_id.pk", "rb"))

In [3]:
from SNLI_DataLoader import SNLIDataset, snli_collate_func

In [4]:
train_dataset = SNLIDataset(snli_train_id)
train_loader = torch.utils.data.DataLoader(dataset=train_dataset,
                                           batch_size=32,
                                           collate_fn=snli_collate_func,
                                           shuffle=True)
val_dataset = SNLIDataset(snli_val_id)
val_loader = torch.utils.data.DataLoader(dataset=val_dataset,
                                           batch_size=32,
                                           collate_fn=snli_collate_func,
                                           shuffle=True)


In [8]:
import torch
import numpy as np

In [18]:
x = x.to(device)
y = y.to(device)

In [None]:
import torch
class RNN(nn.Module):
    def __init__(self, emb_size, hidden_size, num_layers, num_classes, vocab_size):
        # RNN Accepts the following hyperparams:
        # emb_size: Embedding Size
        # hidden_size: Hidden Size of layer in RNN
        # num_layers: number of layers in RNN
        # num_classes: number of output classes
        # vocab_size: vocabulary size
        super(RNN, self).__init__()
        
        if torch.cuda.is_available and torch.has_cudnn:
            self.device = torch.device('cuda')
        else:
            self.device = torch.device('cpu')

        self.num_layers, self.hidden_size = num_layers, hidden_size
        self.embedding = torch.nn.Embedding.from_pretrained(pre_trained_emb, freeze = False).to(self.device)
        self.rnn = torch.nn.RNN(emb_size, hidden_size, num_layers, batch_first=True).to(self.device)
        self.linear = torch.nn.Linear(hidden_size, num_classes).to(self.device)

    def init_hidden(self, batch_size):
        # Function initializes the activation of recurrent neural net at timestep 0
        # Needs to be in format (num_layers, batch_size, hidden_size)
        hidden = torch.randn(self.num_layers, batch_size, self.hidden_size).to(self.device)

        return hidden

    def forward(self, prem, prem_len, hyp, hyp_len):
        
        
        batch_size, prem_seq_len = prem.size()
        _, hyp_seq_len = hyp.size()
        self.hidden = self.init_hidden(batch_size)
        
        prem_len_new, prem_perm_index = prem_len.sort(0, descending = True)
        hyp_len_new, hyp_perm_index = hyp_len.sort(0, descending = True)
        prem = prem[prem_perm_index]; hyp = hyp[hyp_perm_index]
        
        # get embedding of characters
        prem_embed = self.embedding(prem)
        hyp_embed = self.embedding(hyp)
        
        # pack padded sequence (pad shorter sequences, and pytorch want the sequence length in descending order. 
        # First element in batch should be the longest seqeunce.)
        
        packed_prem = torch.nn.utils.rnn.pack_padded_sequence(prem_embed, list(prem_len_new.data), batch_first=True)
        packed_hyp = torch.nn.utils.rnn.pack_padded_sequence(hyp_embed, list(hyp_len_new.data), batch_first=True)
        
        prem_out, self.hidden = self.rnn(packed_prem, self.hidden)
        hyp_out,  = self.rnn(packed_hyp, self.hidden)
        
        #undo packing
        prem_out = torch.nn.utils.rnn.pad_packed_sequence(prem_out, batch_first=True)
        hyp_out = torch.nn.utils.rnn.pad_packed_sequence(hyp_out, batch_first=True)
        
        #resort
        _, prem_restore_index = prem_perm_index.sort(0)
        _, hyp_restore_index = hyp_perm_index.sort(0)
        prem_out = prem_out[prem_restore_index]
        hyp_out = hyp_out[hyp_restore_index]
        
        #concat prem and hyp result
        
        
        # sum hidden activations of RNN across time
        rnn_out = torch.sum(rnn_out, dim=1)

        logits = self.linear(rnn_out)
        return logits

In [108]:
from torch import nn

In [138]:
loaded_embeddings_ft = torch.from_numpy(loaded_embeddings_ft)
loaded_embeddings_ft = loaded_embeddings_ft.to(device)

In [167]:
emb_layer = nn.Embedding.from_pretrained(loaded_embeddings_ft, freeze = False).to(device)

In [168]:
rnn_layer = nn.RNN(300, 100, 2, batch_first=True).to(device)

In [197]:
for _,j in enumerate(train_loader):
    prem = j[0].to(device)
    prem_len = j[1].to(device)
    hyp = j[2].to(device)
    hyp_len = j[3].to(device)
    label = j[4].to(device)
    break

In [198]:
batch_size, prem_seq_len = prem.size()
_, hyp_seq_len = hyp.size()

In [199]:
prem_len_new, prem_perm_index = prem_len.sort(0, descending = True)
hyp_len_new, hyp_perm_index = hyp_len.sort(0, descending = True)
prem = prem[prem_perm_index]; hyp = hyp[hyp_perm_index]

In [200]:
prem_embed = emb_layer(prem)
hyp_embed = emb_layer(hyp)

In [201]:
packed_prem = torch.nn.utils.rnn.pack_padded_sequence(prem_embed, list(prem_len_new.data), batch_first=True)
packed_hyp = torch.nn.utils.rnn.pack_padded_sequence(hyp_embed, list(hyp_len_new.data), batch_first=True)
        

In [202]:
hidden = torch.randn(2, batch_size, 100).to(device)

In [203]:
prem_out, hidden = rnn_layer(packed_prem, hidden)

RuntimeError: CuDNN error: CUDNN_STATUS_EXECUTION_FAILED