In [1]:
from load_utils import prepare_data
import numpy as np
import torch
from torch import nn
import torch.nn.functional as F

In [2]:
def load_glove(file_path, small=True):
    idx = 0
    vectors = {}
    word2idx = {}
    words = []
    with open(file_path, encoding='utf8') as lines:
        for line in lines:
            if small and idx > 5000:
                break
            line = line.split()
            word2idx[line[0].lower()] = idx
            vectors[line[0].lower()] = np.array(list(line[1:]), dtype='float')
            idx += 1
    return vectors, word2idx

In [3]:
vec, word2idx = load_glove('glove.42B.300d/glove.42B.300d.txt')

In [4]:
word2idx.keys()



In [5]:
q, a, pairs, vector = prepare_data('test', 'glove.42B.300d/glove.42B.300d.txt')

Reading test -------
Read 4041 sentence pairs
Counting words
Counted words:
In questions: 6723 words
In answers: 4670 words


In [6]:
matrix_len = q.n_words
weights_matrix = np.zeros((matrix_len, 300))
word_found = 0
for i, word in enumerate(q.word2index):
    try:
        weights_matrix[i] = vector[word]
    except:
        continue

In [7]:
matrix_len

6723

## Encoder

In [8]:
class Encoder(nn.Module):
    
    def __init__(self, batch_size, vocabulary_size, embedding_dim, hidden_size):
        
        super(Encoder, self).__init__()
        
        self.batch_size = batch_size
        self.hidden_size = hidden_size
        self.embedding_dim = embedding_dim
        self.embedding = nn.Embedding(vocabolary_size, embedding_dim)
        self.embedding.weight.data.copy(torch.from_numpy(weights_matrix))
        self.gru = nn.GRU(hidden_size, hidden_size, dropout=0.3)
        
        
    def forward(self, input, hidden):
        
        embedded = self.embedding(input)
        output, hidden = self.gru(output, hidden)
        
        return output, hidden
    
    def initHidden(self):
        
        return torch.zeros((self.batch_size, 1, self.hidden_size), device=device)
        
        

In [9]:
attn = nn.Linear(100, 100)
v = nn.Parameter(torch.rand(100))

In [10]:
v.size(0)

100

In [11]:
encoder_outputs = torch.rand((32, 20, 100))

In [12]:
hidden = torch.rand((32, 1, 100))

In [13]:
def score (hidden, encoder_outputs):
    encoder_outputs = encoder_outputs.transpose(1, 2)
    energy = torch.bmm(hidden, encoder_outputs)
    return energy.squeeze(1)

In [14]:
score(hidden, encoder_outputs).shape

torch.Size([32, 20])

In [15]:
class Attention(nn.Module):
    
    def __init__(self, hidden_size):
        super(Attention, self).__init__()
        
        self.hidden_size = hidden_size
        self.attn = nn.Linear(self.hidden_size, hidden_size)
        
    def forward(self, hidden, encoder_outputs):
        
        encoder_outputs = self.attn(encoder_outputs)
        encoder_outputs = encoder_outputs.transpose(1, 2)
        energy = torch.bmm(hidden, encoder_outputs)
        att_energy = energy.squeeze(1)
        
        return F.softmax(att_energy, dim=1).unsqueeze(1)

In [16]:
encoder_outputs.transpose(1, 2).size()

torch.Size([32, 100, 20])

In [17]:
encoder_outputs = attn(encoder_outputs)
encoder_outputs = encoder_outputs.transpose(1, 2)
energy = torch.bmm(hidden, encoder_outputs)

In [18]:
energy.size()

torch.Size([32, 1, 20])

In [19]:
dropout = nn.Dropout(0.2, inplace=True)

In [20]:
dec_inp = torch.randint(500, (32, 1))

In [21]:
dec_inp.dtype

torch.int64

In [22]:
encoder_outputs = torch.rand((32, 20, 100))

In [23]:
embed = nn.Embedding(500, 100)

embedded = embed(dec_inp)
embedded = dropout(embedded)
attn_weight = score(hidden, encoder_outputs).unsqueeze(1)

In [24]:
attn_weight.size()

torch.Size([32, 1, 20])

In [25]:
embedded.size()

torch.Size([32, 1, 100])

In [26]:
context = attn_weight.bmm(encoder_outputs)

In [27]:
context.size()

torch.Size([32, 1, 100])

In [28]:
rnn_input= torch.cat([embedded, context], 2)

In [29]:
rnn_input.size()

torch.Size([32, 1, 200])

In [30]:
class Decoder(nn.Module):
    
    def __init__(self, embed_size, hidden_size, output_size, dropout):
        
        self.embed_size = embed_size
        self.hidden_size = hidden_size
        self.output_size = output_size
        
        self.embedded = nn.Embedding(output_size, self.embed_size)
        self.dropout = nn.Dropout(dropout, inplace=True)
        self.attention = Attention(hidden_size)
        self.gru = nn.GRU(hidden_size+embed_size, hidden_size, dropout=dropout)
        self.out = nn.Linear(hidden_size*2, output_size)
        
    
    def forward(self, input, last_hidden, encoder_outputs):
        
        embedded = self.embed(input)
        embedded = self.dropout(embedded)
        attn_weights = self.attention(last_hidden, encoder_outputs)
        context = attn_weights.bmm(encoder_outputs)
        rnn_input = torch.cat([embedded, context], 2)
        output, hidden = self.gru(rnn_input, last_hidden)
        output = output.squeeze(1)
        context = context.squeeze(1)
        output = self.out(torch.cat([output, context], 1))
        output = F.log_softmax(output, dim=1)
        
        return output, hidden, attn_weights

In [31]:
class Seq2Seq(nn.Module):
    
    def __init__(self, encoder, decoder, max_out_length, device, criterion):
        
        super(Seq2Seq, self).__init__()
        
        self.encoder = encoder
        self.decoder = decoder
        self.batch_size = encoder.batch_size
        self.output_size = decoder.output_size
        self.max_length = max_out_length
        self.device = device
        self.criterion = criterion
        self.SOS_token = 0 

    
    def forward(self, src, trg, teacher_forcing_ratio = 0.5):
        
        loss = 0
        decoder_outputs = torch.zeros((self.batch_size, self.max_length, self.output_size), device=self.device)
        
        encoder_outputs, encoder_hidden = self.encoder(src)
        
        decoder_input = torch.tensor([self.batch_size*[self.SOS_token]], device = self.device)
        decoder_hidden = encoder_hidden
        
        use_teacher_forcing = True if random.random() < teacher_forcing_ratio else False
        
        if use_teacher_forcing:
            for inp in range(self.max_out_length):
                decoder_output, decoder_hidden, decoder_attention = self.decoder(decoder_input, decoder_hidden, encoder_outputs)
                decoder_outputs[:, inp, :] = decoder_output
                loss += self.criterion(decoder_output, trg[inp]) 
                
                decoder_input = trg[inp]
        else:
            for inp in range(self.max_out_length):
                decoder_output, decoder_hidden, decoder_attention = self.decoder(decoder_input, decoder_hidden, encoder_outputs)
                decoder_outputs[:, inp, :] = decoder_output
                topv, topi = decoder_output.topk(1)
                decoder_input = topi.squeeze().detach()
                loss += self.criterion(decoder_output, trg[inp]) 
        
        return decoder_outputs, loss

In [32]:
decoderout = torch.rand(32, 500)

In [33]:
decoderout.size()

torch.Size([32, 500])

In [34]:
topv, topi = decoderout.topk(1)

In [35]:
topi.squeeze().detach()

tensor([268, 435, 445, 110, 491, 232, 197, 184, 392, 157, 202, 201, 230, 187,
        167, 355, 195,  94, 361, 479, 251, 338, 468,  52, 334,  17, 146, 109,
        113, 155,  35,  12])

In [36]:
topv

tensor([[0.9995],
        [1.0000],
        [0.9946],
        [0.9993],
        [0.9987],
        [0.9993],
        [0.9929],
        [0.9998],
        [0.9972],
        [0.9999],
        [0.9978],
        [0.9991],
        [0.9994],
        [0.9982],
        [0.9991],
        [0.9966],
        [1.0000],
        [0.9984],
        [0.9990],
        [0.9984],
        [0.9962],
        [0.9999],
        [0.9959],
        [0.9979],
        [0.9982],
        [0.9989],
        [0.9997],
        [0.9989],
        [0.9980],
        [0.9982],
        [0.9991],
        [0.9982]])

In [37]:
torch.tensor([32*[0]])

tensor([[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
         0, 0, 0, 0, 0, 0, 0, 0]])

In [45]:
loop_1 = topi.detach()
loop_2 = topi.detach()

In [55]:
torch.cat([loop_1, loop_2], dim=-1).numpy().tolist()

[[268, 268],
 [435, 435],
 [445, 445],
 [110, 110],
 [491, 491],
 [232, 232],
 [197, 197],
 [184, 184],
 [392, 392],
 [157, 157],
 [202, 202],
 [201, 201],
 [230, 230],
 [187, 187],
 [167, 167],
 [355, 355],
 [195, 195],
 [94, 94],
 [361, 361],
 [479, 479],
 [251, 251],
 [338, 338],
 [468, 468],
 [52, 52],
 [334, 334],
 [17, 17],
 [146, 146],
 [109, 109],
 [113, 113],
 [155, 155],
 [35, 35],
 [12, 12]]

In [67]:
a=torch.empty((32,1), dtype=torch.int32, device = 'cpu')

In [72]:
a = torch.cat([a, loop_1], dim=-1)

In [73]:
a

tensor([[  0, 268, 268, 268],
        [  0, 435, 435, 435],
        [  0, 445, 445, 445],
        [  0, 110, 110, 110],
        [  0, 491, 491, 491],
        [  0, 232, 232, 232],
        [  0, 197, 197, 197],
        [  0, 184, 184, 184],
        [  0, 392, 392, 392],
        [  0, 157, 157, 157],
        [  0, 202, 202, 202],
        [  0, 201, 201, 201],
        [  0, 230, 230, 230],
        [  0, 187, 187, 187],
        [  0, 167, 167, 167],
        [  0, 355, 355, 355],
        [  0, 195, 195, 195],
        [  0,  94,  94,  94],
        [  0, 361, 361, 361],
        [  0, 479, 479, 479],
        [  0, 251, 251, 251],
        [  0, 338, 338, 338],
        [  0, 468, 468, 468],
        [  0,  52,  52,  52],
        [  0, 334, 334, 334],
        [  0,  17,  17,  17],
        [  0, 146, 146, 146],
        [  0, 109, 109, 109],
        [  0, 113, 113, 113],
        [  0, 155, 155, 155],
        [  0,  35,  35,  35],
        [  0,  12,  12,  12]])

In [78]:
l = a[:, 1:].numpy().tolist()
l

[[268, 268, 268],
 [435, 435, 435],
 [445, 445, 445],
 [110, 110, 110],
 [491, 491, 491],
 [232, 232, 232],
 [197, 197, 197],
 [184, 184, 184],
 [392, 392, 392],
 [157, 157, 157],
 [202, 202, 202],
 [201, 201, 201],
 [230, 230, 230],
 [187, 187, 187],
 [167, 167, 167],
 [355, 355, 355],
 [195, 195, 195],
 [94, 94, 94],
 [361, 361, 361],
 [479, 479, 479],
 [251, 251, 251],
 [338, 338, 338],
 [468, 468, 468],
 [52, 52, 52],
 [334, 334, 334],
 [17, 17, 17],
 [146, 146, 146],
 [109, 109, 109],
 [113, 113, 113],
 [155, 155, 155],
 [35, 35, 35],
 [12, 12, 12]]

In [81]:
seq_length = np.random.randint(3, size=32)

In [82]:
seq_length

array([0, 0, 1, 2, 2, 1, 2, 0, 0, 2, 1, 0, 2, 0, 2, 1, 1, 1, 1, 1, 2, 0,
       0, 2, 1, 2, 0, 0, 1, 2, 0, 1])

In [94]:
l[2][:seq_length[2]+1]

[445, 445]

In [97]:
[word[:length+1] for word, length in zip(l, seq_length)]

[[268],
 [435],
 [445, 445],
 [110, 110, 110],
 [491, 491, 491],
 [232, 232],
 [197, 197, 197],
 [184],
 [392],
 [157, 157, 157],
 [202, 202],
 [201],
 [230, 230, 230],
 [187],
 [167, 167, 167],
 [355, 355],
 [195, 195],
 [94, 94],
 [361, 361],
 [479, 479],
 [251, 251, 251],
 [338],
 [468],
 [52, 52, 52],
 [334, 334],
 [17, 17, 17],
 [146],
 [109],
 [113, 113],
 [155, 155, 155],
 [35],
 [12, 12]]