<a href="https://colab.research.google.com/github/paruliansaragi/cnn-fastai/blob/master/fastairnn.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [0]:
!pip install fastai==0.7.0
!pip install torchtext==0.2.3

In [0]:
!mkdir data

In [0]:
%reload_ext autoreload
%autoreload 2
%matplotlib inline

from fastai.io import *
from fastai.conv_learner import *
from fastai.column_data import *

In [0]:
PATH='data/nietzsche/'

In [0]:
get_data("https://s3.amazonaws.com/text-datasets/nietzsche.txt", f'{PATH}nietzsche.txt')
text = open(f'{PATH}nietzsche.txt').read()
print('corpus length:', len(text))

nietzsche.txt: 606kB [00:01, 458kB/s]                            

corpus length: 600893





In [0]:
text[:400]

'PREFACE\n\n\nSUPPOSING that Truth is a woman--what then? Is there not ground\nfor suspecting that all philosophers, in so far as they have been\ndogmatists, have failed to understand women--that the terrible\nseriousness and clumsy importunity with which they have usually paid\ntheir addresses to Truth, have been unskilled and unseemly methods for\nwinning a woman? Certainly she has never allowed herself '

In [0]:

chars = sorted(list(set(text)))
vocab_size = len(chars)+1
print('total chars:', vocab_size)

total chars: 85


In [0]:

chars.insert(0, "\0")

''.join(chars[1:-6])

'\n !"\'(),-.0123456789:;=?ABCDEFGHIJKLMNOPQRSTUVWXYZ[]_abcdefghijklmnopqrstuvwxy'

In [0]:
char_indices = {c: i for i, c in enumerate(chars)}
indices_char = {i: c for i, c in enumerate(chars)}

In [0]:
idx = [char_indices[c] for c in text]

idx[:10]

[40, 42, 29, 30, 25, 27, 29, 1, 1, 1]

In [0]:
''.join(indices_char[i] for i in idx[:70])


'PREFACE\n\n\nSUPPOSING that Truth is a woman--what then? Is there not gro'

In [0]:

cs=3
c1_dat = [idx[i]   for i in range(0, len(idx)-cs, cs)]
c2_dat = [idx[i+1] for i in range(0, len(idx)-cs, cs)]
c3_dat = [idx[i+2] for i in range(0, len(idx)-cs, cs)]
c4_dat = [idx[i+3] for i in range(0, len(idx)-cs, cs)]

In [0]:
x1 = np.stack(c1_dat)
x2 = np.stack(c2_dat)
x3 = np.stack(c3_dat)

In [0]:
y = np.stack(c4_dat)


In [0]:
x1[:4], x2[:4], x3[:4]


(array([40, 30, 29,  1]), array([42, 25,  1, 43]), array([29, 27,  1, 45]))

In [0]:
y[:4]


array([30, 29,  1, 40])

In [0]:
x1.shape, y.shape


((200297,), (200297,))

In [0]:
n_hidden = 256


In [0]:
n_fac = 42


In [0]:
class Char3Model(nn.Module):
    def __init__(self, vocab_size, n_fac):
        super().__init__()
        self.e = nn.Embedding(vocab_size, n_fac)

        # The 'green arrow' from our diagram - the layer operation from input to hidden
        self.l_in = nn.Linear(n_fac, n_hidden)

        # The 'orange arrow' from our diagram - the layer operation from hidden to hidden
        self.l_hidden = nn.Linear(n_hidden, n_hidden)
        
        # The 'blue arrow' from our diagram - the layer operation from hidden to output
        self.l_out = nn.Linear(n_hidden, vocab_size)
        
    def forward(self, c1, c2, c3):
        in1 = F.relu(self.l_in(self.e(c1)))
        in2 = F.relu(self.l_in(self.e(c2)))
        in3 = F.relu(self.l_in(self.e(c3)))
        
        h = V(torch.zeros(in1.size()).cuda())
        h = F.tanh(self.l_hidden(h+in1))
        h = F.tanh(self.l_hidden(h+in2))
        h = F.tanh(self.l_hidden(h+in3))
        
        return F.log_softmax(self.l_out(h))

In [0]:
md = ColumnarModelData.from_arrays('.', [-1], np.stack([x1,x2,x3], axis=1), y, bs=512)


In [0]:
m = Char3Model(vocab_size, n_fac).cuda()


In [0]:
it = iter(md.trn_dl)
*xs,yt = next(it)
t = m(*V(xs))

In [0]:
opt = optim.Adam(m.parameters(), 1e-2)


In [0]:
fit(m, md, 1, opt, F.nll_loss)


HBox(children=(IntProgress(value=0, description='Epoch', max=1), HTML(value='')))

epoch      trn_loss   val_loss   
    0      2.127548   1.30557   



[array([1.30557])]

In [0]:
set_lrs(opt, 0.001)


In [0]:
fit(m, md, 1, opt, F.nll_loss)


HBox(children=(IntProgress(value=0, description='Epoch', max=1), HTML(value='')))

epoch      trn_loss   val_loss   
    0      1.870887   0.438312  



[array([0.43831])]

In [0]:
def get_next(inp):
    idxs = T(np.array([char_indices[c] for c in inp]))
    p = m(*VV(idxs))
    i = np.argmax(to_np(p))
    return chars[i]

In [0]:
get_next('y. ')


'T'

In [0]:
get_next('cun')

'd'

In [0]:
get_next('twa')

't'

In [0]:
get_next('dic')

'a'

In [0]:
cs=8

In [0]:
c_in_dat = [[idx[i+j] for i in range(cs)] for j in range(len(idx)-cs)]


In [0]:
c_out_dat = [idx[j+cs] for j in range(len(idx)-cs)]


In [0]:
xs = np.stack(c_in_dat, axis=0)


In [0]:
xs.shape


(600885, 8)

In [0]:
y = np.stack(c_out_dat)


In [0]:
xs[:cs,:cs]


array([[40, 42, 29, 30, 25, 27, 29,  1],
       [42, 29, 30, 25, 27, 29,  1,  1],
       [29, 30, 25, 27, 29,  1,  1,  1],
       [30, 25, 27, 29,  1,  1,  1, 43],
       [25, 27, 29,  1,  1,  1, 43, 45],
       [27, 29,  1,  1,  1, 43, 45, 40],
       [29,  1,  1,  1, 43, 45, 40, 40],
       [ 1,  1,  1, 43, 45, 40, 40, 39]])

In [0]:
y[:cs]

array([ 1,  1, 43, 45, 40, 40, 39, 43])

In [0]:
val_idx = get_cv_idxs(len(idx)-cs-1)


In [0]:
md = ColumnarModelData.from_arrays('.', val_idx, xs, y, bs=512)


In [0]:
class CharLoopModel(nn.Module):
    # This is an RNN!
    def __init__(self, vocab_size, n_fac):
        super().__init__()
        self.e = nn.Embedding(vocab_size, n_fac)
        self.l_in = nn.Linear(n_fac, n_hidden)
        self.l_hidden = nn.Linear(n_hidden, n_hidden)
        self.l_out = nn.Linear(n_hidden, vocab_size)
        
    def forward(self, *cs):
        bs = cs[0].size(0)
        h = V(torch.zeros(bs, n_hidden).cuda())
        for c in cs:
            inp = F.relu(self.l_in(self.e(c)))
            h = F.tanh(self.l_hidden(h+inp))
        
        return F.log_softmax(self.l_out(h), dim=-1)

In [0]:
m = CharLoopModel(vocab_size, n_fac).cuda()
opt = optim.Adam(m.parameters(), 1e-2)

In [0]:
fit(m, md, 1, opt, F.nll_loss)


HBox(children=(IntProgress(value=0, description='Epoch', max=1), HTML(value='')))

epoch      trn_loss   val_loss   
    0      2.006234   1.984492  



[array([1.98449])]

In [0]:
set_lrs(opt, 0.001)


In [0]:
fit(m, md, 1, opt, F.nll_loss)


HBox(children=(IntProgress(value=0, description='Epoch', max=1), HTML(value='')))

epoch      trn_loss   val_loss   
    0      1.695356   1.69987   



[array([1.69987])]

In [0]:
class CharLoopConcatModel(nn.Module):
    def __init__(self, vocab_size, n_fac):
        super().__init__()
        self.e = nn.Embedding(vocab_size, n_fac)
        self.l_in = nn.Linear(n_fac+n_hidden, n_hidden)
        self.l_hidden = nn.Linear(n_hidden, n_hidden)
        self.l_out = nn.Linear(n_hidden, vocab_size)
        
    def forward(self, *cs):
        bs = cs[0].size(0)
        h = V(torch.zeros(bs, n_hidden).cuda())
        for c in cs:
            inp = torch.cat((h, self.e(c)), 1)
            inp = F.relu(self.l_in(inp))
            h = F.tanh(self.l_hidden(inp))
        
        return F.log_softmax(self.l_out(h), dim=-1)

In [0]:

m = CharLoopConcatModel(vocab_size, n_fac).cuda()
opt = optim.Adam(m.parameters(), 1e-3)

In [0]:
it = iter(md.trn_dl)
*xs,yt = next(it)
t = m(*V(xs))

In [0]:
fit(m, md, 1, opt, F.nll_loss)

HBox(children=(IntProgress(value=0, description='Epoch', max=1), HTML(value='')))

epoch      trn_loss   val_loss   
    0      1.799617   1.79407   



[array([1.79407])]

In [0]:
set_lrs(opt, 1e-4)


In [0]:
fit(m, md, 1, opt, F.nll_loss)


HBox(children=(IntProgress(value=0, description='Epoch', max=1), HTML(value='')))

epoch      trn_loss   val_loss   
    0      1.705426   1.703607  



[array([1.70361])]

In [0]:

def get_next(inp):
    idxs = T(np.array([char_indices[c] for c in inp]))
    p = m(*VV(idxs))
    i = np.argmax(to_np(p))
    return chars[i]

In [0]:
get_next('for thos')

'e'

In [0]:
get_next('part of ')


't'

In [0]:
get_next('your a cun')

't'

In [0]:
class CharRnn(nn.Module):
    def __init__(self, vocab_size, n_fac):
        super().__init__()
        self.e = nn.Embedding(vocab_size, n_fac)
        self.rnn = nn.RNN(n_fac, n_hidden)
        self.l_out = nn.Linear(n_hidden, vocab_size)
        
    def forward(self, *cs):
        bs = cs[0].size(0)
        h = V(torch.zeros(1, bs, n_hidden))
        inp = self.e(torch.stack(cs))
        outp,h = self.rnn(inp, h)
        
        return F.log_softmax(self.l_out(outp[-1]), dim=-1)

In [0]:
m = CharRnn(vocab_size, n_fac).cuda()
opt = optim.Adam(m.parameters(), 1e-3)

In [0]:
it = iter(md.trn_dl)
*xs,yt = next(it)

In [0]:
t = m.e(V(torch.stack(xs)))
t.size()

torch.Size([8, 512, 42])

In [0]:
ht = V(torch.zeros(1, 512,n_hidden))
outp, hn = m.rnn(t, ht)
outp.size(), hn.size()

(torch.Size([8, 512, 256]), torch.Size([1, 512, 256]))

In [0]:
t = m(*V(xs)); t.size()


torch.Size([512, 85])

In [0]:
fit(m, md, 4, opt, F.nll_loss)


HBox(children=(IntProgress(value=0, description='Epoch', max=4), HTML(value='')))

epoch      trn_loss   val_loss   
    0      1.872218   1.833931  
    1      1.667441   1.670125  
    2      1.591687   1.598041  
    3      1.521804   1.547058  



[array([1.54706])]

In [0]:
set_lrs(opt, 1e-4)


In [0]:
fit(m, md, 2, opt, F.nll_loss)

HBox(children=(IntProgress(value=0, description='Epoch', max=2), HTML(value='')))

epoch      trn_loss   val_loss   
    0      1.469901   1.508844  
    1      1.460274   1.503637  



[array([1.50364])]

In [0]:
def get_next(inp):
    idxs = T(np.array([char_indices[c] for c in inp]))
    p = m(*VV(idxs))
    i = np.argmax(to_np(p))
    return chars[i]

In [0]:
get_next('for thos')


'e'

In [0]:
def get_next_n(inp, n):
    res = inp
    for i in range(n):
        c = get_next(inp)
        res += c
        inp = inp[1:]+c
    return res

In [0]:
get_next_n('for thos', 40)

'for those of the same the same the same the same'

In [0]:
c_in_dat = [[idx[i+j] for i in range(cs)] for j in range(0, len(idx)-cs-1, cs)]

In [0]:
c_out_dat = [[idx[i+j] for i in range(cs)] for j in range(1, len(idx)-cs, cs)]

In [0]:
xs = np.stack(c_in_dat)
xs.shape

(75111, 8)

In [0]:
ys = np.stack(c_out_dat)
ys.shape

(75111, 8)

In [0]:
xs[:cs,:cs]

array([[40, 42, 29, 30, 25, 27, 29,  1],
       [ 1,  1, 43, 45, 40, 40, 39, 43],
       [33, 38, 31,  2, 73, 61, 54, 73],
       [ 2, 44, 71, 74, 73, 61,  2, 62],
       [72,  2, 54,  2, 76, 68, 66, 54],
       [67,  9,  9, 76, 61, 54, 73,  2],
       [73, 61, 58, 67, 24,  2, 33, 72],
       [ 2, 73, 61, 58, 71, 58,  2, 67]])

In [0]:
ys[:cs,:cs]

array([[42, 29, 30, 25, 27, 29,  1,  1],
       [ 1, 43, 45, 40, 40, 39, 43, 33],
       [38, 31,  2, 73, 61, 54, 73,  2],
       [44, 71, 74, 73, 61,  2, 62, 72],
       [ 2, 54,  2, 76, 68, 66, 54, 67],
       [ 9,  9, 76, 61, 54, 73,  2, 73],
       [61, 58, 67, 24,  2, 33, 72,  2],
       [73, 61, 58, 71, 58,  2, 67, 68]])

In [0]:
val_idx = get_cv_idxs(len(xs)-cs-1)
md = ColumnarModelData.from_arrays('.', val_idx, xs, ys, bs=512)

In [0]:
class CharSeqRnn(nn.Module):
    def __init__(self, vocab_size, n_fac):
        super().__init__()
        self.e = nn.Embedding(vocab_size, n_fac)
        self.rnn = nn.RNN(n_fac, n_hidden)
        self.l_out = nn.Linear(n_hidden, vocab_size)
        
    def forward(self, *cs):
        bs = cs[0].size(0)
        h = V(torch.zeros(1, bs, n_hidden))
        inp = self.e(torch.stack(cs))
        outp,h = self.rnn(inp, h)
        return F.log_softmax(self.l_out(outp), dim=-1)

In [0]:
m = CharSeqRnn(vocab_size, n_fac).cuda()
opt = optim.Adam(m.parameters(), 1e-3)

In [0]:
it = iter(md.trn_dl)
*xst,yt = next(it)

In [0]:
def nll_loss_seq(inp, targ):
    sl,bs,nh = inp.size()
    targ = targ.transpose(0,1).contiguous().view(-1)
    return F.nll_loss(inp.view(-1,nh), targ)

In [0]:
fit(m, md, 4, opt, nll_loss_seq)

HBox(children=(IntProgress(value=0, description='Epoch', max=4), HTML(value='')))

epoch      trn_loss   val_loss   
    0      2.589396   2.409524  
    1      2.285837   2.194839  
    2      2.133869   2.083976  
    3      2.04421    2.009334  



[array([2.00933])]

In [0]:
set_lrs(opt, 1e-4)

In [0]:
fit(m, md, 1, opt, nll_loss_seq)

HBox(children=(IntProgress(value=0, description='Epoch', max=1), HTML(value='')))

epoch      trn_loss   val_loss   
    0      1.99138    1.995213  



[array([1.99521])]

In [0]:
m = CharSeqRnn(vocab_size, n_fac).cuda()
opt = optim.Adam(m.parameters(), 1e-2)

In [0]:
m.rnn.weight_hh_l0.data.copy_(torch.eye(n_hidden))


    1     0     0  ...      0     0     0
    0     1     0  ...      0     0     0
    0     0     1  ...      0     0     0
       ...          ⋱          ...       
    0     0     0  ...      1     0     0
    0     0     0  ...      0     1     0
    0     0     0  ...      0     0     1
[torch.cuda.FloatTensor of size 256x256 (GPU 0)]

In [0]:
fit(m, md, 4, opt, nll_loss_seq)

HBox(children=(IntProgress(value=0, description='Epoch', max=4), HTML(value='')))

epoch      trn_loss   val_loss   
    0      2.299325   2.1303    
    1      2.035185   1.971138  
    2      1.933534   1.92611   
    3      1.880462   1.88562   



[array([1.88562])]

In [0]:
set_lrs(opt, 1e-3)

In [0]:
fit(m, md, 4, opt, nll_loss_seq)

HBox(children=(IntProgress(value=0, description='Epoch', max=4), HTML(value='')))

epoch      trn_loss   val_loss   
    0      1.784481   1.809031  
    1      1.770662   1.80185   
    2      1.762331   1.796393  
    3      1.757577   1.792122  



[array([1.79212])]

In [0]:
from torchtext import vocab, data

from fastai.nlp import *
from fastai.lm_rnn import *

PATH='data/nietzsche/'

TRN_PATH = 'trn/'
VAL_PATH = 'val/'
TRN = f'{PATH}{TRN_PATH}'
VAL = f'{PATH}{VAL_PATH}'

# Note: The student needs to practice her shell skills and prepare her own dataset before proceeding:
# - trn/trn.txt (first 80% of nietzsche.txt)
# - val/val.txt (last 20% of nietzsche.txt)

%ls {PATH}

nietzsche.txt


In [0]:
!mkdir data/nietzsche/trn/

In [0]:
!mkdir data/nietzsche/val/

In [0]:
!wc -l train.txt

9934 train.txt


In [0]:
9934 * 0.8

7947.200000000001

In [0]:
train_len = len(text) * 0.8
train_len

480714.4

In [0]:
! head -n 7947 data/nietzsche/nietzsche.txt > train.txt
! tail -n +7947 data/nietzsche/nietzsche.txt > val.txt

In [0]:
!wc val.txt

  1988  18595 110113 val.txt


In [0]:
!wc train.txt

  7947  80527 490861 train.txt


In [0]:
!mv train.txt data/nietzsche/trn/
!mv val.txt data/nietzsche/val/

In [0]:
%ls {PATH}trn

train.txt


In [0]:
text_trn = open(f'data/nietzsche/trn/train.txt').read()
print('corpus length:', len(text_trn))

corpus length: 490854


In [0]:
TEXT = data.Field(lower=True, tokenize=list)
bs=64; bptt=8; n_fac=42; n_hidden=256

FILES = dict(train=TRN_PATH, validation=VAL_PATH, test=VAL_PATH)
md = LanguageModelData.from_text_files(PATH, TEXT, **FILES, bs=bs, bptt=bptt, min_freq=3)

len(md.trn_dl), md.nt, len(md.trn_ds), len(md.trn_ds[0].text)

(942, 55, 1, 482908)

In [0]:
class CharSeqStatefulRnn(nn.Module):
    def __init__(self, vocab_size, n_fac, bs):
        self.vocab_size = vocab_size
        super().__init__()
        self.e = nn.Embedding(vocab_size, n_fac)
        self.rnn = nn.RNN(n_fac, n_hidden)
        self.l_out = nn.Linear(n_hidden, vocab_size)
        self.init_hidden(bs)
        
    def forward(self, cs):
        bs = cs[0].size(0)
        if self.h.size(1) != bs: self.init_hidden(bs)
        outp,h = self.rnn(self.e(cs), self.h)
        self.h = repackage_var(h)
        return F.log_softmax(self.l_out(outp), dim=-1).view(-1, self.vocab_size)
    
    def init_hidden(self, bs): self.h = V(torch.zeros(1, bs, n_hidden))

In [0]:
m = CharSeqStatefulRnn(md.nt, n_fac, 512).cuda()
opt = optim.Adam(m.parameters(), 1e-3)

In [0]:
fit(m, md, 4, opt, F.nll_loss)

HBox(children=(IntProgress(value=0, description='Epoch', max=4), HTML(value='')))

epoch      trn_loss   val_loss   
    0      1.890235   1.856715  
    1      1.712282   1.698824  
    2      1.636661   1.645656  
    3      1.583917   1.602187  



[array([1.60219])]

In [0]:
set_lrs(opt, 1e-4)
fit(m, md, 4, opt, F.nll_loss)

HBox(children=(IntProgress(value=0, description='Epoch', max=4), HTML(value='')))

epoch      trn_loss   val_loss   
    0      1.498253   1.558622  
    1      1.503319   1.552588  
    2      1.503427   1.550262  
    3      1.501137   1.545472  



[array([1.54547])]

In [0]:
def RNNCell(input, hidden, w_ih, w_hh, b_ih, b_hh):
    return F.tanh(F.linear(input, w_ih, b_ih) + F.linear(hidden, w_hh, b_hh))

In [0]:
class CharSeqStatefulRnn2(nn.Module):
    def __init__(self, vocab_size, n_fac, bs):
        super().__init__()
        self.vocab_size = vocab_size
        self.e = nn.Embedding(vocab_size, n_fac)
        self.rnn = nn.RNNCell(n_fac, n_hidden)
        self.l_out = nn.Linear(n_hidden, vocab_size)
        self.init_hidden(bs)
        
    def forward(self, cs):
        bs = cs[0].size(0)
        if self.h.size(1) != bs: self.init_hidden(bs)
        outp = []
        o = self.h
        for c in cs: 
            o = self.rnn(self.e(c), o)
            outp.append(o)
        outp = self.l_out(torch.stack(outp))
        self.h = repackage_var(o)
        return F.log_softmax(outp, dim=-1).view(-1, self.vocab_size)
    
    def init_hidden(self, bs): self.h = V(torch.zeros(1, bs, n_hidden))

In [0]:
m = CharSeqStatefulRnn2(md.nt, n_fac, 512).cuda()
opt = optim.Adam(m.parameters(), 1e-3)

In [0]:
fit(m, md, 4, opt, F.nll_loss)

HBox(children=(IntProgress(value=0, description='Epoch', max=4), HTML(value='')))

epoch      trn_loss   val_loss   
    0      1.896788   1.870515  
    1      1.718992   1.716694  
    2      1.631642   1.646437  
    3      1.575524   1.602368  



[array([1.60237])]

In [0]:
class CharSeqStatefulGRU(nn.Module):
    def __init__(self, vocab_size, n_fac, bs):
        super().__init__()
        self.vocab_size = vocab_size
        self.e = nn.Embedding(vocab_size, n_fac)
        self.rnn = nn.GRU(n_fac, n_hidden)
        self.l_out = nn.Linear(n_hidden, vocab_size)
        self.init_hidden(bs)
        
    def forward(self, cs):
        bs = cs[0].size(0)
        if self.h.size(1) != bs: self.init_hidden(bs)
        outp,h = self.rnn(self.e(cs), self.h)
        self.h = repackage_var(h)
        return F.log_softmax(self.l_out(outp), dim=-1).view(-1, self.vocab_size)
    
    def init_hidden(self, bs): self.h = V(torch.zeros(1, bs, n_hidden))

In [0]:
# From the pytorch source code - for reference
def GRUCell(input, hidden, w_ih, w_hh, b_ih, b_hh):
    gi = F.linear(input, w_ih, b_ih)
    gh = F.linear(hidden, w_hh, b_hh)
    i_r, i_i, i_n = gi.chunk(3, 1)
    h_r, h_i, h_n = gh.chunk(3, 1)

    resetgate = F.sigmoid(i_r + h_r)
    inputgate = F.sigmoid(i_i + h_i)
    newgate = F.tanh(i_n + resetgate * h_n)
    return newgate + inputgate * (hidden - newgate)

In [0]:
m = CharSeqStatefulGRU(md.nt, n_fac, 512).cuda()
opt = optim.Adam(m.parameters(), 1e-3)

In [0]:
fit(m, md, 6, opt, F.nll_loss)

HBox(children=(IntProgress(value=0, description='Epoch', max=6), HTML(value='')))

epoch      trn_loss   val_loss   
    0      1.784428   1.751474  
    1      1.584774   1.593751  
    2      1.506882   1.526864  
    3      1.46045    1.501538  
    4      1.408805   1.472825  
    5      1.382335   1.462792  



[array([1.46279])]

In [0]:
set_lrs(opt, 1e-4)

In [0]:
fit(m, md, 3, opt, F.nll_loss)

HBox(children=(IntProgress(value=0, description='Epoch', max=3), HTML(value='')))

epoch      trn_loss   val_loss   
    0      1.295215   1.43029   
    1      1.300132   1.426604  
    2      1.296326   1.424891  



[array([1.42489])]

In [0]:
from fastai import sgdr

n_hidden=512

In [0]:
class CharSeqStatefulLSTM(nn.Module):
    def __init__(self, vocab_size, n_fac, bs, nl):
        super().__init__()
        self.vocab_size,self.nl = vocab_size,nl
        self.e = nn.Embedding(vocab_size, n_fac)
        self.rnn = nn.LSTM(n_fac, n_hidden, nl, dropout=0.5)
        self.l_out = nn.Linear(n_hidden, vocab_size)
        self.init_hidden(bs)
        
    def forward(self, cs):
        bs = cs[0].size(0)
        if self.h[0].size(1) != bs: self.init_hidden(bs)
        outp,h = self.rnn(self.e(cs), self.h)
        self.h = repackage_var(h)
        return F.log_softmax(self.l_out(outp), dim=-1).view(-1, self.vocab_size)
    
    def init_hidden(self, bs):
        self.h = (V(torch.zeros(self.nl, bs, n_hidden)),
                  V(torch.zeros(self.nl, bs, n_hidden)))

In [0]:
m = CharSeqStatefulLSTM(md.nt, n_fac, 512, 2).cuda()
lo = LayerOptimizer(optim.Adam, m, 1e-2, 1e-5)

In [0]:
os.makedirs(f'{PATH}models', exist_ok=True)

In [0]:
fit(m, md, 2, lo.opt, F.nll_loss)

HBox(children=(IntProgress(value=0, description='Epoch', max=2), HTML(value='')))

epoch      trn_loss   val_loss   
    0      1.887493   1.798985  
    1      1.749605   1.671839  



[array([1.67184])]

In [0]:
on_end = lambda sched, cycle: save_model(m, f'{PATH}models/cyc_{cycle}')
cb = [CosAnneal(lo, len(md.trn_dl), cycle_mult=2, on_cycle_end=on_end)]
fit(m, md, 2**4-1, lo.opt, F.nll_loss, callbacks=cb)

HBox(children=(IntProgress(value=0, description='Epoch', max=15), HTML(value='')))

epoch      trn_loss   val_loss   
    0      1.586663   1.526909  
    1      1.625139   1.549739  
    2      1.51248    1.465625  
    3      1.628218   1.560346  
    4      1.564948   1.503343  
    5      1.491297   1.448197  
    6      1.438112   1.414791  
    7      1.601538   1.539628  
    8      1.577365   1.509879  
    9      1.550291   1.499286  
    10     1.514749   1.470163  
    11     1.47566    1.436283  
    12     1.425098   1.409175  
    13     1.395522   1.386713  
    14     1.372973   1.372384  



[array([1.37238])]

In [0]:
on_end = lambda sched, cycle: save_model(m, f'{PATH}models/cyc_{cycle}')
cb = [CosAnneal(lo, len(md.trn_dl), cycle_mult=2, on_cycle_end=on_end)]
fit(m, md, 2**6-1, lo.opt, F.nll_loss, callbacks=cb)

HBox(children=(IntProgress(value=0, description='Epoch', max=63), HTML(value='')))

epoch      trn_loss   val_loss   
    0      1.360732   1.370317  
    1      1.366003   1.368599  
    2      1.358362   1.367812  
    3      1.363797   1.366469  
    4      1.351339   1.365582  
    5      1.353955   1.362454  
    6      1.346787   1.362125  
    7      1.351259   1.362157  
    8      1.348548   1.359618  
    9      1.33526    1.355719  
    10     1.33044    1.354405  
    11     1.328665   1.351176  
    12     1.315284   1.349137  
    13     1.319213   1.348558  
    14     1.317282   1.348405  
    15     1.320339   1.351857  
    16     1.316967   1.349683  
    17     1.312635   1.349423  
    18     1.306254   1.347713  
    19     1.297986   1.344211  
    20     1.29025    1.342989  
    21     1.285528   1.340836  
    22     1.281253   1.341076  
    23     1.274683   1.340627  
    24     1.270366   1.33946   
    25     1.26462    1.33913   
    26     1.261117   1.339098  
    27     1.257569   1.338953  
    28     1.257778   1.339335  
    29   

[array([1.35987])]

In [0]:
def get_next(inp):
    idxs = TEXT.numericalize(inp)
    p = m(VV(idxs.transpose(0,1)))
    r = torch.multinomial(p[-1].exp(), 1)
    return TEXT.vocab.itos[to_np(r)[0]]

In [0]:
def get_next_n(inp, n):
    res = inp
    for i in range(n):
        c = get_next(inp)
        res += c
        inp = inp[1:]+c
    return res

In [0]:
print(get_next_n('for thos', 400))

for those more value to the criers in. some times to honouring innocent ow the "for the "most inuljust, in a far as the strengthlyaids, easily--what. duces hut newssmagnerar. in all than instance--in follow-"was former diskindri. laid, clumsy: ye had distortenni, and expressing in mitto's short!_ we crusled hitherto, is a view, of though had so christians of an attimes when a music have not, appears as we


In [0]:
print(get_next('jack is a a stupi'))

d
