In [1]:
%reload_ext autoreload
%autoreload 2
%matplotlib inline

from fastai.io import *
from fastai.conv_learner import *

from fastai.column_data import *

# Create RNN "from scratch"

# Data
Reuse data from lecture example

In [2]:
PATH='data/nietzsche/'

In [3]:
get_data("https://s3.amazonaws.com/text-datasets/nietzsche.txt", f'{PATH}nietzsche.txt')
text = open(f'{PATH}nietzsche.txt').read()
print('corpus length:', len(text))

corpus length: 600901


In [4]:
chars = sorted(list(set(text)))
vocab_size = len(chars)+1
print('total chars:', vocab_size)

total chars: 86


In [5]:
chars.insert(0, "\0")

''.join(chars[1:-6])

'\n !"\'(),-.0123456789:;=?ABCDEFGHIJKLMNOPQRSTUVWXYZ[]_abcdefghijklmnopqrstuvwxyz'

In [6]:
char_indices = {c: i for i, c in enumerate(chars)}
indices_char = {i: c for i, c in enumerate(chars)}

In [7]:
idx = [char_indices[c] for c in text]

idx[:10]

[40, 42, 29, 30, 25, 27, 29, 1, 1, 1]

In [8]:
''.join(indices_char[i] for i in idx[:70])

'PREFACE\n\n\nSUPPOSING that Truth is a woman--what then? Is there not gro'

In [9]:
# Number of times RNN will loop
cs=8

In [10]:
c_in_dat = [[idx[i+j] for i in range(cs)] for j in range(len(idx)-cs)]

In [11]:
c_out_dat = [idx[j+cs] for j in range(len(idx)-cs)]

In [12]:
xs = np.stack(c_in_dat, axis=0)

In [13]:
xs.shape

(600893, 8)

In [14]:
y = np.stack(c_out_dat)

In [15]:
xs[:cs,:cs]

array([[40, 42, 29, 30, 25, 27, 29,  1],
       [42, 29, 30, 25, 27, 29,  1,  1],
       [29, 30, 25, 27, 29,  1,  1,  1],
       [30, 25, 27, 29,  1,  1,  1, 43],
       [25, 27, 29,  1,  1,  1, 43, 45],
       [27, 29,  1,  1,  1, 43, 45, 40],
       [29,  1,  1,  1, 43, 45, 40, 40],
       [ 1,  1,  1, 43, 45, 40, 40, 39]])

In [16]:
y[:cs]

array([ 1,  1, 43, 45, 40, 40, 39, 43])

In [17]:
val_idx = get_cv_idxs(len(idx)-cs-1)

In [18]:
md = ColumnarModelData.from_arrays('.', val_idx, xs, y, bs=512)

# Model

In [21]:
class RNN1(nn.Module):
    """
    RNN with 1 hidden layer
    
    output(next char) w/ softmax
    ^
    |
    hidden (tanh)
    ^
    |
    inputs (concat+ReLU) <-  prev hidden
    ^
    |
    embedding
    ^
    |
    Char
    """
    def __init__(self, vocab_size, n_hidden, n_embedding):
        super().__init__()
        self.n_hidden = n_hidden
        self.embed = nn.Embedding(vocab_size, n_embedding)
        self.l_input = nn.Linear(n_embedding + n_hidden, n_hidden)
        self.l_hidden = nn.Linear(n_hidden, n_hidden)
        self.l_output = nn.Linear(n_hidden, vocab_size)
    
    def forward(self, *chars):
        batch_size = chars[0].size(0)
        h = V(torch.zeros(batch_size, self.n_hidden).cuda())
        
        for c in chars:
            embedded = self.embed(c)
            inputs = torch.cat((embedded, h,), 1)
            h = F.relu(self.l_input(inputs))
            h = F.tanh(self.l_hidden(h))
        
        return F.log_softmax(self.l_output(h), dim=-1)

## Test it works

In [22]:
m = RNN1(vocab_size, 256, 42).cuda()
opt = optim.Adam(m.parameters(), 1e-3)

In [23]:
it = iter(md.trn_dl)
*xs,yt = next(it)
t = m(*V(xs))

In [24]:
t

Variable containing:
-4.4762 -4.3615 -4.4217  ...  -4.4022 -4.4969 -4.4334
-4.4953 -4.4275 -4.4524  ...  -4.4312 -4.5098 -4.4076
-4.4809 -4.4701 -4.4119  ...  -4.3772 -4.4848 -4.4577
          ...             ⋱             ...          
-4.4849 -4.4832 -4.4230  ...  -4.4017 -4.4837 -4.4599
-4.4942 -4.3981 -4.5547  ...  -4.4781 -4.4696 -4.5314
-4.4281 -4.3671 -4.4615  ...  -4.4724 -4.4427 -4.5077
[torch.cuda.FloatTensor of size 512x86 (GPU 0)]

# Train

In [25]:
fit(m, md, 1, opt, F.nll_loss)

HBox(children=(IntProgress(value=0, description='Epoch', max=1), HTML(value='')))

epoch      trn_loss   val_loss                                                                                         
    0      1.874591   1.855908  



[array([1.85591])]

In [26]:
set_lrs(opt, 1e-4)

In [27]:
fit(m, md, 1, opt, F.nll_loss)

HBox(children=(IntProgress(value=0, description='Epoch', max=1), HTML(value='')))

epoch      trn_loss   val_loss                                                                                         
    0      1.776207   1.770816  



[array([1.77082])]

# Qualitative evaluation

In [28]:
def get_next(inp):
    idxs = T(np.array([char_indices[c] for c in inp]))
    p = m(*VV(idxs))
    i = np.argmax(to_np(p))
    return chars[i]

In [29]:
get_next('for thos')

'e'

In [30]:
def get_next_n(inp, n):
    res = inp
    for i in range(n):
        c = get_next(inp)
        res += c
        inp = inp[1:]+c
    return res

In [31]:
print(get_next_n('for thos', 400))

for those the senting the senting the senting the senting the senting the senting the senting the senting the senting the senting the senting the senting the senting the senting the senting the senting the senting the senting the senting the senting the senting the senting the senting the senting the senting the senting the senting the senting the senting the senting the senting the senting the senting th


In [32]:
print(get_next_n('I am a fis', 400))

I am a fister and the senting the senting the senting the senting the senting the senting the senting the senting the senting the senting the senting the senting the senting the senting the senting the senting the senting the senting the senting the senting the senting the senting the senting the senting the senting the senting the senting the senting the senting the senting the senting the senting the sent


In [33]:
print(get_next_n('who is my', 400))

who is my the senting the senting the senting the senting the senting the senting the senting the senting the senting the senting the senting the senting the senting the senting the senting the senting the senting the senting the senting the senting the senting the senting the senting the senting the senting the senting the senting the senting the senting the senting the senting the senting the senting the
