# Loading an LSTM NLP model for CPU inference

## Getting Started

I'll need fastai and torchtext. Installing Conda and fastai can take a while, but they set up your environment nicely for you.

See installation instructions here: [https://github.com/fastai/fastai](https://github.com/fastai/fastai)

In [3]:
from fastai.nlp import *
from fastai.lm_rnn import *
from fastai import sgdr
from torchtext import vocab, data

Next I create the class for our LSTM model. 

You can see the explanation for this in lessons 6 & 7 of fast.ai where you get tocreate various types of RNNs from scratch. LSTM is the final type in the lesson.

In [4]:
class CharSeqStatefulLSTM(nn.Module):
    def __init__(self, vocab_size, n_fac, bs, nl):
        super().__init__()
        self.vocab_size,self.nl = vocab_size,nl
        self.e = nn.Embedding(vocab_size, n_fac)
        self.rnn = nn.LSTM(n_fac, n_hidden, nl, dropout=0.5)
        self.l_out = nn.Linear(n_hidden, vocab_size)
        self.init_hidden(bs)
        
    def forward(self, cs, **kwargs):
        bs = cs[0].size(0)
        if self.h[0].size(1) != bs: self.init_hidden(bs)
        self.rnn.flatten_parameters()
        self.h = (self.h[0].cpu(), self.h[1].cpu())
        ecs = self.e(cs)
        outp,h = self.rnn(ecs, self.h)
        return F.log_softmax(self.l_out(outp), dim=-1).view(-1, self.vocab_size)
    
    def init_hidden(self, bs):
        self.h = (V(torch.zeros(self.nl, bs, n_hidden)),
                  V(torch.zeros(self.nl, bs, n_hidden)))

**
Paths to training data need to be set for when the NLP model data is defined below.

In [9]:
PATH='data/proverbs/'
TRN_PATH = 'train/'
VAL_PATH = 'valid/'
TRN = PATH + TRN_PATH
VAL = PATH + VAL_PATH

In [10]:
PATH, TRN, VAL

('data/proverbs/', 'data/proverbs/train/', 'data/proverbs/valid/')

In [11]:
TEXT = data.Field(lower=True, tokenize=list)
bs=64; bptt=8; n_fac=42; n_hidden=128

TEXT

<torchtext.data.field.Field at 0x20486f86dd8>

In [12]:
FILES = dict(train=TRN_PATH, validation=VAL_PATH, test=VAL_PATH)
md = LanguageModelData.from_text_files(PATH, TEXT, **FILES, bs=bs, bptt=bptt, min_freq=3)

In [13]:
md

<fastai.nlp.LanguageModelData at 0x20486f86da0>

In [14]:
m = CharSeqStatefulLSTM(md.nt, n_fac, 256, 2)


In [15]:
m.load_state_dict(torch.load(f'{PATH}models/gen_0_dict', map_location=lambda storage, loc: storage))


FileNotFoundError: [Errno 2] No such file or directory: 'data/proverbs/models/gen_0_dict'

In [40]:
m = m.cpu()


In [41]:
m.eval()

CharSeqStatefulLSTM(
  (e): Embedding(37, 42)
  (rnn): LSTM(42, 128, num_layers=2, dropout=0.5)
  (l_out): Linear(in_features=128, out_features=37, bias=True)
)

In [44]:
def get_next(inp):
    idxs = TEXT.numericalize(inp, device=-1)
    pid = idxs.transpose(0,1)
    pid = pid.cpu()
    vpid = VV(pid)
    vpid = vpid.cpu()
    p = sel_m(vpid)
    r = torch.multinomial(p[-1].exp(), 1)
    return TEXT.vocab.itos[to_np(r)[0]]

In [45]:
def get_next_n(inp, n):
    res = inp
    for i in range(n):
        c = get_next(inp)
        res += c
        inp = inp[1:]+c
        if c == '.': break
    return res

In [51]:
get_next_n('People ', 1000, 2)

'People only the consists.'