In [2]:
# Required imports
import torch
import numpy as np
import pandas as pd
import pickle
from torch.nn import Linear, Embedding, RNN, GRU, LSTM
from torch.nn import Sigmoid, LogSoftmax
from torch.optim import SGD
from torch.nn import BCELoss, NLLLoss, CrossEntropyLoss
from string import punctuation
import itertools
from tqdm import tqdm

from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score
from sklearn.preprocessing import LabelBinarizer, LabelEncoder

In [4]:
class encoder(torch.nn.Module):
    def __init__(self, vocab_size, embedding_dim, hidden_dim, batch_size):
        super(encoder, self).__init__()
        self.hidden_dim = hidden_dim
        self.embedding = Embedding(num_embeddings=vocab_size, 
                                   embedding_dim=embedding_dim)
        self.rnn = LSTM(input_size=embedding_dim, 
                       hidden_size=hidden_dim)
        self.batch_size = batch_size
        self.softmax = LogSoftmax()
        self.hidden = self.init_hidden()
                
    def forward(self, x):
        e = self.embedding(x)
        e = e.view(len(x), self.batch_size, -1)
        out, self.hidden = self.rnn(e, self.hidden)
        return out, self.hidden
                  
    def init_hidden(self):
        h0 = torch.autograd.Variable(torch.zeros(1, self.batch_size, self.hidden_dim))
        c0 = torch.autograd.Variable(torch.zeros(1, self.batch_size, self.hidden_dim))
        return (h0, c0)
    
class decoder(torch.nn.Module):
    def __init__(self, vocab_size, embedding_dim, hidden_dim, output_dim, batch_size):
        super(decoder, self).__init__()
        self.hidden_dim = hidden_dim
        self.embedding = Embedding(num_embeddings=vocab_size, 
                                   embedding_dim=embedding_dim)
        self.rnn = LSTM(input_size=embedding_dim, 
                       hidden_size=hidden_dim)
        self.linear = Linear(hidden_dim, output_dim)
        self.batch_size = batch_size
        self.softmax = LogSoftmax()
        self.hidden = self.init_hidden()
                
    def forward(self, input, hidden):
        self.hidden = hidden
        e = self.embedding(input)
        e = e.view(len(input), self.batch_size, -1)
        out, self.hidden = self.rnn(e, self.hidden)
        output = self.linear(out[0])
        so = self.softmax(output)
        return so, self.hidden
                  
    def init_hidden(self):
        h0 = torch.autograd.Variable(torch.zeros(1, self.batch_size, self.hidden_dim))
        c0 = torch.autograd.Variable(torch.zeros(1, self.batch_size, self.hidden_dim))
        return (h0, c0)
    
class seq2seq(torch.nn.Module):
    def __init__(self, encoder, decoder):
        super(seq2seq, self).__init__()
        self.enc = encoder
        self.dec = decoder
                
    def forward(self, input_seq, output_seq, p_tf=0):
        outputs = []
        
        enc.hidden = self.enc.init_hidden()
        dec.hidden = self.dec.init_hidden()        
        
        enc_output, enc_hidden = enc(torch.LongTensor(input_seq))
        context = enc_output[-1].unsqueeze(0)
        
        dec_output, hidden = dec(torch.LongTensor([output_seq[0]]), (context, context))
        outputs.append(dec_output)
        for i in range(1,output_seq.shape[0]):
            dec_input = torch.LongTensor([output_seq[i]])
            dec_output, hidden = dec(dec_input, hidden) 
            outputs.append(dec_output)
        return torch.stack(outputs).squeeze(1)
        
        

In [3]:
translation_indices = pickle.load(open('../data/translation_indices.pkl', 'rb'))

In [24]:
input_size = len(translation_indices['input2idx'])
output_size = len(translation_indices['output2idx'])

enc_vocab_size = input_size
enc_embedding_dim = 100
enc_hidden_dim = 50

dec_vocab_size = output_size
dec_embedding_dim = 50
dec_hidden_dim = 50
dec_output_dim = output_size

enc = encoder(enc_vocab_size, enc_embedding_dim, enc_hidden_dim, batch_size=1)
dec = decoder(dec_vocab_size, dec_embedding_dim, dec_hidden_dim, dec_output_dim, batch_size=1)
s2s = seq2seq(enc, dec)

state_dict = torch.load('../data/s2s.pt')
s2s.load_state_dict(state_dict)

RuntimeError: Error(s) in loading state_dict for seq2seq:
	size mismatch for enc.embedding.weight: copying a param with shape torch.Size([17267, 100]) from checkpoint, the shape in current model is torch.Size([17148, 100]).
	size mismatch for dec.embedding.weight: copying a param with shape torch.Size([21854, 50]) from checkpoint, the shape in current model is torch.Size([21842, 50]).
	size mismatch for dec.linear.weight: copying a param with shape torch.Size([21854, 50]) from checkpoint, the shape in current model is torch.Size([21842, 50]).
	size mismatch for dec.linear.bias: copying a param with shape torch.Size([21854]) from checkpoint, the shape in current model is torch.Size([21842]).

In [21]:
state_dict = torch.load('../data/s2s.pt')

In [25]:
input_size

17148

In [26]:
output_size

21842