In [107]:

import torch
import torch.nn as nn
import torch.nn.functional as F
from torch.nn.utils.rnn import pack_padded_sequence, pad_packed_sequence
import pickle
import os


with open(r"E:\\Internships_19\\Internship(Summer_19)\\Q&A_Toolkit\\Dataset_analysis\\SQuAD\\glove_word_embeddings.pkl", "rb") as input_file:
    emb_matrix = pickle.load(input_file)
    
names = ["validation_context","train_context","validation_question","train_question"]
data_dir = "E:\\Internships_19\\Internship(Summer_19)\\Q&A_Toolkit\\Dataset_analysis\\SQuAD\\"

word_index_padded =[os.path.join(data_dir + name + "_word_index_padded.pkl")  for name in names ]

with open(word_index_padded[0], "rb") as input_file:
    validation_context_word_index_padded = pickle.load(input_file)
with open(word_index_padded[1], "rb") as input_file:
    train_context_word_index_padded = pickle.load(input_file)
with open(word_index_padded[2], "rb") as input_file:
    validation_question_word_index_padded = pickle.load(input_file)
with open(word_index_padded[3], "rb") as input_file:
    train_question_word_index_padded = pickle.load(input_file)
    
validation_context_word_mask = (validation_context_word_index_padded != 0).type(torch.int32) 
train_context_word_mask = (train_context_word_index_padded != 0).type(torch.int32) 
validation_question_word_mask = (validation_question_word_index_padded != 0).type(torch.int32) 
train_question_word_mask = (train_question_word_index_padded != 0).type(torch.int32) 


def get_pretrained_embedding(embedding_matrix):
    embedding = nn.Embedding(*embedding_matrix.shape)
    embedding.weight = nn.Parameter(torch.from_numpy(embedding_matrix).float())
    embedding.weight.requires_grad = False
    return embedding


def init_lstm_forget_bias(lstm):
    for names in lstm._all_weights:
        for name in names:
            if name.startswith('bias_'):
                # set forget bias to 1
                bias = getattr(lstm, name)
                n = bias.size(0)
                start, end = n // 4, n // 2
                bias.data.fill_(0.)
                bias.data[start:end].fill_(1.)

class Word_Level_Encoder(nn.Module):
    
    def __init__(self, hidden_dim, embedding_matrix, dropout_ratio):
        super(Word_Level_Encoder, self).__init__()
        self.hidden_dim = hidden_dim

        self.embedding = get_pretrained_embedding(embedding_matrix)
        self.emb_dim = self.embedding.embedding_dim

        self.encoder = nn.LSTM(self.emb_dim, hidden_dim, 1, batch_first=True,
                              bidirectional=False, dropout=dropout_ratio)
        init_lstm_forget_bias(self.encoder)
        self.dropout_emb = nn.Dropout(p=dropout_ratio)
        self.sentinel = nn.Parameter(torch.rand(hidden_dim,))

    def forward(self, seq, mask):
        lens = torch.sum(mask, 1)
        lens_sorted, lens_argsort = torch.sort(lens, 0, True)
        _, lens_argsort_argsort = torch.sort(lens_argsort, 0)
        seq_ = torch.index_select(seq, 0, lens_argsort)

        seq_embd = self.embedding(seq_)
        packed = pack_padded_sequence(seq_embd, lens_sorted, batch_first=True)
        output, _ = self.encoder(packed)
        e, _ = pad_packed_sequence(output, batch_first=True)
        e = e.contiguous()
        e = torch.index_select(e, 0, lens_argsort_argsort)  # B x m x 2l
        e = self.dropout_emb(e)

        b, _ = list(mask.size())
        # copy sentinel vector at the end
        sentinel_exp = self.sentinel.unsqueeze(0).expand(b, self.hidden_dim).unsqueeze(1).contiguous()  # B x 1 x l
        lens = lens.unsqueeze(1).expand(b, self.hidden_dim).unsqueeze(1)

        sentinel_zero = torch.zeros(b, 1, self.hidden_dim)
        
        e = torch.cat([e, sentinel_zero], 1)  # B x m + 1 x l
        e = e.scatter_(1, lens, sentinel_exp)

        return e
    
    
hidden_dim = 300
dropout_ratio = 0.2
encoder = Word_Level_Encoder(hidden_dim, emb_matrix, dropout_ratio)

e = encoder(validation_context_word_index_padded.type(torch.long)[:10],validation_context_word_mask[:10])

In [120]:
validation_context_word_mask[20].size()

torch.Size([400])

In [141]:
a  = torch.arange(1,10)
b= a.view((3,3))
b[:,torch.arange(0,2)]

tensor([[1, 2],
        [4, 5],
        [7, 8]])

In [124]:
# (1.0 - validation_context_word_mask[0].float()) * (-1e30)

In [24]:
import numpy as np
a = np.zeros(5)
b = np.expand_dims(a,1).astype(int)
np.shape(np.arange(0,5))

(5,)

In [125]:
torch.zeros(10, ).long()

tensor([0, 0, 0, 0, 0, 0, 0, 0, 0, 0])

In [27]:
indices = np.arange(1,6)
seq_lens = np.arange(1,6)
(indices <= np.expand_dims(seq_lens, 1)).astype(int)

array([[1, 0, 0, 0, 0],
       [1, 1, 0, 0, 0],
       [1, 1, 1, 0, 0],
       [1, 1, 1, 1, 0],
       [1, 1, 1, 1, 1]])

In [29]:
torch.sum(np.ones(3),1)

TypeError: sum() received an invalid combination of arguments - got (numpy.ndarray, int), but expected one of:
 * (Tensor input)
 * (Tensor input, torch.dtype dtype)
      didn't match because some of the arguments have invalid types: ([31;1mnumpy.ndarray[0m, [31;1mint[0m)
 * (Tensor input, tuple of ints dim, torch.dtype dtype, Tensor out)
 * (Tensor input, tuple of ints dim, bool keepdim, torch.dtype dtype, Tensor out)
 * (Tensor input, tuple of ints dim, bool keepdim, Tensor out)


In [31]:
qn_ids = np.arange(1,6)
qn_ids = np.array(qn_ids) # shape (batch_size, question_len)
qn_mask = (qn_ids != 0).astype(np.int32) # 

In [32]:
qn_mask

array([1, 1, 1, 1, 1])