In [None]:
import os
import numpy as np
import time
import math
from data_utils import * 
import torch
import torch.nn as nn
import torch.nn.functional as F
from torch.autograd import Variable


In [None]:
## A simple LSTM or bilSTM model
class RNNModel(nn.Module):
    """Container module with an encoder, a recurrent module, and a decoder."""

    def __init__(self, ntoken, ninp, nhid, nlayers, dropout=0.2):
        super(RNNModel, self).__init__()
        self.drop = nn.Dropout(dropout)
        self.encoder = nn.Embedding(ntoken, ninp) # Token2Embeddings
       
        self.rnn = nn.LSTM(ninp, ninp, nlayers, dropout=dropout)
        
        self.decoder = nn.Linear(nhid, ntoken, bias=False)

        # Optionally tie weights as in:
        # "Using the Output Embedding to Improve Language Models" (Press & Wolf 2016)
        # https://arxiv.org/abs/1608.05859
        # and
        # "Tying Word Vectors and Word Classifiers: A Loss Framework for Language Modeling" (Inan et al. 2016)
        # https://arxiv.org/abs/1611.01462
    
        self.decoder.weight = self.encoder.weight

        self.init_weights()

        
        self.nhid = nhid
        self.nlayers = nlayers

    def init_weights(self):
        initrange = 0.1
        nn.init.uniform_(self.encoder.weight, -initrange, initrange)
        nn.init.zeros_(self.decoder.weight)
        nn.init.uniform_(self.decoder.weight, -initrange, initrange)
       

    def forward(self, input, hidden):
        emb = self.drop(self.encoder(input))
        output, hidden = self.rnn(emb, hidden)
        output = self.drop(output)
        decoded = self.decoder(output.view(output.size(0)*output.size(1), output.size(2)))
        return decoded, hidden

    def init_hidden(self, bsz):
        weight = next(self.parameters()).data
       
        return (Variable(weight.new(self.nlayers, bsz, self.nhid).zero_()),
                    Variable(weight.new(self.nlayers, bsz, self.nhid).zero_()))
        

In [None]:
device = torch.device('cuda') if torch.cuda.is_available() else torch.device('cpu')

In [None]:
train_batch_size = 32
eval_batch_size = 10

In [None]:
#download data from ptb and add in your path
corpus_raw = Corpus('/Users/arijitsehanobish/simple-examples/ptb_data')

In [None]:
train_data = batchify(corpus_raw.train, train_batch_size) # size(total_len//bsz, bsz)
val_data = batchify(corpus_raw.valid, eval_batch_size)
test_data = batchify(corpus_raw.test, eval_batch_size)

In [None]:
interval = 200 # interval to report
ntokens = len(corpus_raw.dictionary)

# choose bidirectional vs unidirectional model and other model hyperparameters
directions = 1
hidden_size = 200

n_layers = 2
net = RNNModel(ntokens, hidden_size, hidden_size, n_layers, dropout=.2)
bptt = 64

In [None]:
#Download from drive and put the right path
net.load_state_dict(torch.load('/Users/arijitsehanobish/simple-examples/models/lstm_tied_warmstart_try4.pkl'))
net.to(device)

### Need 3 things: 1) get true softmax scores via the function (def get_true_softmax_scores(data_source, net, bptt)), 2) class embeddings (ntokens, hidden_size) (def get_class_embeddings(net)), 3) output of the tokens (def get_model_embeddings(data_source, net, bptt))

net is the model. data_source is the test_data, bptt=64