In [1]:
from utils import prepare_data
import numpy as np
import torch
from torch import nn
import torch.nn.functional as F

In [2]:
def load_glove(file_path, small=True):
    idx = 0
    vectors = {}
    word2idx = {}
    words = []
    with open(file_path, encoding='utf8') as lines:
        for line in lines:
            if small and idx > 5000:
                break
            line = line.split()
            word2idx[line[0].lower()] = idx
            vectors[line[0].lower()] = np.array(list(line[1:]), dtype='float')
            idx += 1
    return vectors, word2idx

In [3]:
vec, word2idx = load_glove('glove.42B.300d/glove.42B.300d.txt')

In [4]:
word2idx.keys()



In [5]:
q, a, pairs, vector = prepare_data('test', 'glove.42B.300d/glove.42B.300d.txt')

Reading test -------
Read 4041 sentence pairs
Counting words
Counted words:
In questions: 5087 words
In answers: 394 words


In [6]:
matrix_len = q.n_words
weights_matrix = np.zeros((matrix_len, 300))
word_found = 0
for i, word in enumerate(q.word2index):
    try:
        weights_matrix[i] = vector[word]
    except:
        continue

In [7]:
matrix_len

5087

## Encoder

In [8]:
class Encoder(nn.Module):
    
    def __init__(self, batch_size, vocabulary_size, embedding_dim, hidden_size):
        
        super(Encoder, self).__init__()
        
        self.batch_size = batch_size
        self.hidden_size = hidden_size
        self.embedding_dim = embedding_dim
        self.embedding = nn.Embedding(vocabolary_size, embedding_dim)
        self.embedding.weight.data.copy(torch.from_numpy(weights_matrix))
        self.gru = nn.GRU(hidden_size, hidden_size, dropout=0.3)
        
        
    def forward(self, input, hidden):
        
        embedded = self.embedding(input)
        output, hidden = self.gru(output, hidden)
        
        return output, hidden
    
    def initHidden(self):
        
        return torch.zeros((self.batch_size, 1, self.hidden_size), device=device)
        
        

In [9]:
attn = nn.Linear(100, 100)
v = nn.Parameter(torch.rand(100))

In [10]:
v.size(0)

100

In [11]:
encoder_outputs = torch.rand((32, 20, 100))

In [12]:
hidden = torch.rand((32, 1, 100))

In [13]:
def score (hidden, encoder_outputs):
    encoder_outputs = encoder_outputs.transpose(1, 2)
    energy = torch.bmm(hidden, encoder_outputs)
    return energy.squeeze(1)

In [14]:
score(hidden, encoder_outputs).shape

torch.Size([32, 20])

In [15]:
class Attention(nn.Module):
    
    def __init__(self, hidden_size):
        super(Attention, self).__init__()
        
        self.hidden_size = hidden_size
        self.attn = nn.Linear(self.hidden_size, hidden_size)
        
    def forward(self, hidden, encoder_outputs):
        
        encoder_outputs = self.attn(encoder_outputs)
        encoder_outputs = encoder_outputs.transpose(1, 2)
        energy = torch.bmm(hidden, encoder_outputs)
        att_energy = energy.squeeze(1)
        
        return F.softmax(att_energy, dim=1).unsqueeze(1)

In [16]:
encoder_outputs.transpose(1, 2).size()

torch.Size([32, 100, 20])

In [17]:
encoder_outputs = attn(encoder_outputs)
encoder_outputs = encoder_outputs.transpose(1, 2)
energy = torch.bmm(hidden, encoder_outputs)

In [18]:
energy.size()

torch.Size([32, 1, 20])

In [19]:
dropout = nn.Dropout(0.2, inplace=True)

In [20]:
dec_inp = torch.randint(500, (32, 1))

In [21]:
dec_inp.dtype

torch.int64

In [22]:
encoder_outputs = torch.rand((32, 20, 100))

In [23]:
embed = nn.Embedding(500, 100)

embedded = embed(dec_inp)
embedded = dropout(embedded)
attn_weight = score(hidden, encoder_outputs).unsqueeze(1)

In [24]:
attn_weight.size()

torch.Size([32, 1, 20])

In [25]:
embedded.size()

torch.Size([32, 1, 100])

In [26]:
context = attn_weight.bmm(encoder_outputs)

In [27]:
context.size()

torch.Size([32, 1, 100])

In [28]:
rnn_input= torch.cat([embedded, context], 2)

In [29]:
rnn_input.size()

torch.Size([32, 1, 200])

In [30]:
class Decoder(nn.Module):
    
    def __init__(self, embed_size, hidden_size, output_size, dropout):
        
        self.embed_size = embed_size
        self.hidden_size = hidden_size
        self.output_size = output_size
        
        self.embedded = nn.Embedding(output_size, self.embed_size)
        self.dropout = nn.Dropout(dropout, inplace=True)
        self.attention = Attention(hidden_size)
        self.gru = nn.GRU(hidden_size+embed_size, hidden_size, dropout=dropout)
        self.out = nn.Linear(hidden_size*2, output_size)
        
    
    def forward(self, input, last_hidden, encoder_outputs):
        
        embedded = self.embed(input)
        embedded = self.dropout(embedded)
        attn_weights = self.attention(last_hidden, encoder_outputs)
        context = attn_weights.bmm(encoder_outputs)
        rnn_input = torch.cat([embedded, context], 2)
        output, hidden = self.gru(rnn_input, last_hidden)
        output = output.squeeze(1)
        context = context.squeeze(1)
        output = self.out(torch.cat([output, context], 1))
        output = F.log_softmax(output, dim=1)
        
        return output, hidden, attn_weights