In [39]:
import numpy as np
import random
import os

import torch
torch.use_deterministic_algorithms(True)  # to help make code deterministic
torch.backends.cudnn.benchmark = False  # to help make code deterministic
import torch.nn as nn
from torchinfo import summary

np.random.seed(0)  # to help make code deterministic
torch.manual_seed(0)  # to help make code deterministic
random.seed(0)  # to help make code deterministic

import pickle

import torchtext
from torchtext.data import get_tokenizer
from torchtext.vocab import build_vocab_from_iterator

from UDA_pytorch_utils import UDA_pytorch_classifier_fit, \
    UDA_plot_train_val_accuracy_vs_epoch, UDA_pytorch_classifier_predict, \
    UDA_compute_accuracy, UDA_get_rnn_last_time_step_outputs

In [27]:
with open('vocab.pkl', 'rb') as input:
    vocab = pickle.load(input)

In [29]:
with open('embedding_matrix', 'rb') as input:
    embedding_matrix = pickle.load(input)

In [43]:
tokenizer = get_tokenizer('spacy', language='en_core_web_sm')

In [47]:
def tokenizer_lowercase(text):
    return [token.lower() for token in tokenizer(text)]

In [33]:
class EmbeddingLSTMLinearModel(nn.Module):
    def __init__(self, embedding_matrix, num_lstm_output_nodes, num_final_output_nodes):
        super().__init__()
        self.embedding_layer = nn.Embedding.from_pretrained(embedding_matrix)
        self.lstm_layer = nn.LSTM(embedding_matrix.shape[1], num_lstm_output_nodes)
        self.linear_layer = nn.Linear(num_lstm_output_nodes, num_final_output_nodes)

    def forward(self, text_encodings, lengths):
        embeddings = self.embedding_layer(text_encodings)

        rnn_last_time_step_outputs = \
            UDA_get_rnn_last_time_step_outputs(embeddings, lengths, self.lstm_layer)

        return self.linear_layer(rnn_last_time_step_outputs)

In [35]:
simple_lstm_model = EmbeddingLSTMLinearModel(embedding_matrix, 32, 2)

In [41]:
summary(simple_lstm_model,
        input_data=[torch.zeros((7, 5), dtype=torch.long),
                    torch.tensor([3, 2, 5, 1, 7], dtype=torch.long)])

Layer (type:depth-idx)                   Output Shape              Param #
EmbeddingLSTMLinearModel                 [5, 2]                    --
├─Embedding: 1-1                         [7, 5, 100]               (9,066,700)
├─LSTM: 1-2                              [18, 32]                  17,152
├─Linear: 1-3                            [5, 2]                    66
Total params: 9,083,918
Trainable params: 17,218
Non-trainable params: 9,066,700
Total mult-adds (Units.MEGABYTES): 73.35
Input size (MB): 0.00
Forward/backward pass size (MB): 0.03
Params size (MB): 36.34
Estimated Total Size (MB): 36.37

In [45]:
simple_lstm_model.load_state_dict(torch.load('imdb_lstm_epoch20.pt'))

<All keys matched successfully>

In [119]:
sent_val = UDA_pytorch_classifier_predict(simple_lstm_model,
                               [vocab(tokenizer_lowercase('I loved this movie'))],
                               rnn=True).numpy()[0]

sent_val

1