https://github.com/fastai/courses/blob/master/deeplearning1/nbs/char-rnn.ipynb

In [1]:
from IPython.core.interactiveshell import InteractiveShell
InteractiveShell.ast_node_interactivity = "all"

In [2]:
%matplotlib inline
import importlib
import matplotlib.pyplot as plt
import matplotlib.ticker
import numpy as np
import random
import time
import os
import pandas as pd
import csv
import math
import bcolz
import pickle
import re
import pathlib
import datetime

import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
from torch.autograd import Variable

SOS_TOKEN = '<SOS>' # Start Of Word

# Data setup

In [3]:
# importing keras might cause problems with cudann version etc
# import keras # some good utils in here
# path = keras.utils.data_utils.get_file('nietzsche.txt', origin="https://s3.amazonaws.com/text-datasets/nietzsche.txt")

In [4]:
path = '/home/ohu/.keras/datasets/nietzsche.txt'
text = open(path).read().lower()
path
print('corpus length:', len(text))

'/home/ohu/.keras/datasets/nietzsche.txt'

corpus length: 600893


In [5]:
!tail {path} -n5

not to be regarded as a pure type. Knowledge and science--as far as they
existed--and superiority to the rest of mankind by logical discipline
and training of the intellectual powers were insisted upon by the
Buddhists as essential to sanctity, just as they were denounced by the
christian world as the indications of sinfulness.

In [6]:
chars = sorted(list(set(text)))
print('total chars:', len(chars))
''.join(chars)

total chars: 57


'\n !"\'(),-.0123456789:;=?[]_abcdefghijklmnopqrstuvwxyzäæéë'

In [7]:
char_indices = dict((c, i) for i, c in enumerate(chars))
char_indices['\n']

0

# Utils

In [8]:
def string_to_tensor(in_str, chars_index=char_indices, as_variable=True):
    """Onehot encoded tensor of string"""
    tensor_length = len(in_str)
    tensor = torch.zeros(1, tensor_length, len(chars_index))
    for li, letter in enumerate(in_str):
        tensor[0, li, chars_index[letter]] = 1
    if as_variable:
        tensor = Variable(tensor).cuda()
    return tensor

string_to_tensor('hello', as_variable=False).size()

torch.Size([1, 5, 57])

In [9]:
def time_since(since):
    now = time.time()
    s = now - since
    m = math.floor(s / 60)
    s -= m * 60
    return '%dm %ds' % (m, s)

# Char Rnn

In [255]:
class CharRNN(nn.Module):
    def __init__(self, input_size, hidden_size):
        super(CharRNN, self).__init__()
        
        self.input_size = input_size
        self.hidden_size = hidden_size
        self.layers = 1
        
        self.rnn = nn.GRU(input_size, hidden_size, batch_first=True)
        self.lin_output = nn.Linear(hidden_size, input_size)
        
        # self.log_softmax = nn.LogSoftmax(dim=1) # current releas doesn't yet support dimensions
        
    def forward(self, input_char_vs, hidden = None):
        batch_size = input_char_vs.size()[0]
        if hidden is None:
            hidden = self.init_hidden(batch_size)
        
        rnn_outputs, hidden = self.rnn(input_char_vs, hidden)
        
        outputs = self.lin_output(rnn_outputs)
        #outputs = self.log_softmax(outputs)
        
        #outputs = F.log_softmax(outputs[0])

        return outputs, hidden

    def init_hidden(self, batch_size=1):
        return Variable(torch.zeros(self.layers, batch_size, self.hidden_size)).cuda()
    
model = CharRNN(len(chars), 512).cuda()
model

CharRNN (
  (rnn): GRU(57, 512, batch_first=True)
  (lin_output): Linear (512 -> 57)
)

In [100]:
#tmp = Variable(torch.FloatTensor([2]).view(1,1,-1)).cuda()
#tmp = Variable(string_to_tensor('hello')).cuda()
tmp = model(string_to_tensor('hello'))
len(tmp[0][0])
tmp[0].size()
tmp[1].size()
#chars[tmp[0].topk(1)[1].data[0][0]]

5

torch.Size([1, 5, 57])

torch.Size([1, 1, 512])

In [102]:
tmp = torch.cat((string_to_tensor('hello'), string_to_tensor('hello')))
#tmp.size()

tmp = model(tmp)
tmp[0].size()
tmp[1].size()

torch.Size([2, 5, 57])

torch.Size([1, 2, 512])

# Eval

In [310]:
def print_example(iters=320, choice=True):
    seed_string="ethics is a basic foundation of all that"
    for i in range(iters):
        #x=np.array([char_indices[c] for c in seed_string[-40:]])[np.newaxis,:]
        x = string_to_tensor(seed_string)
        output, hidden = model(x)
        output = output[0, -1]
        if choice:
            #next_char = np.random.choice(chars, p=F.softmax(output)[0].data.cpu().numpy())
            next_char = np.random.choice(chars, p=F.softmax(output).data.cpu().numpy())
        else:
            next_char_idx = output.topk(1)[1].data[0] # [0]
            next_char = chars[next_char_idx]
        # return next_char
        seed_string = seed_string + next_char
    print(seed_string)
#tmp = print_example(choice=False)
tmp = 200
print_example(iters=tmp, choice=True)
print('------------------')
print_example(iters=tmp, choice=False)

ethics is a basic foundation of all that is onct them among expedied of morality and saces y un in tho
go above all eart, even in which us
for asmost fordentance wey
for young not or as a philosophers wi hellogs become pleasure! but littlas
------------------
ethics is a basic foundation of all that is the most definite the sense of the most suffering of the most suffering of the most suffering of the most suffering of the most suffering of the most suffering of the most suffering of the most su


In [315]:
# This is faster that uses the existing state untill the end
def print_example(iters=320, choice=True):
    seed_string="ethics is a basic foundation of all that"
    hidden = model.init_hidden()
    output, hidden = model(string_to_tensor(seed_string), hidden)
    
    for i in range(iters):
        output = output[0, -1]
        if choice:
            next_char = np.random.choice(chars, p=F.softmax(output).data.cpu().numpy())
        else:
            next_char_idx = output.topk(1)[1].data[0] 
            next_char = chars[next_char_idx]
        
        seed_string = seed_string + next_char
        output, hidden = model(string_to_tensor(next_char), hidden)
    print(seed_string)
tmp = 200
print_example(iters=tmp, choice=True)
print('------------------')
print_example(iters=tmp, choice=False)

ethics is a basic foundation of all that_ for servon--and wither one may make
fal courage, the "bellow and therevore in the exceptifeal unfordunantal courses

f or that even such focciet is weloble evad. "132. why nayely in
iclart inta disf
------------------
ethics is a basic foundation of all that is the most definite the sense of the most suffering of the most suffering of the most suffering of the most suffering of the most suffering of the most suffering of the most suffering of the most su


# Samples

In [106]:
def get_random_sample_string(length=41):
    sample_place = random.randint(0, len(text)-length-1)
    sample = text[sample_place:sample_place+length]
    return sample
get_random_sample_string()

'ng on this possibility, i\nhappen to becom'

In [107]:
sample_size = 50000

sample_sentence_size = 40
sample_data = torch.zeros(sample_size, sample_sentence_size, len(chars)) # .cuda()
#sample_target = torch.zeros((sample_size, sample_sentence_size, 1), torch.LongTensor)
sample_target = torch.LongTensor(sample_size, sample_sentence_size).zero_() # .cuda()
for i in range(sample_size):
    sample = get_random_sample_string(sample_sentence_size+1)
    sample = [char_indices[c] for c in sample]
    for j in range(sample_sentence_size):
        sample_data[i][j][sample[j]] = 1
        sample_target[i][j] = sample[j+1]

In [108]:
i = 5
j = 10
sample_data[i].topk(1)[1][j][0] == sample_target[i][j-1]
''.join([chars[c[0]] for c in sample_data[i].topk(1)[1]])

True

'esentful disdain is compatible with this'

In [372]:
clip_parameters_value = 0.25

def train_single(optimizer, loss_function, batch_size=64):
    model.zero_grad()
    loss = 0
    output_lines = []
    
    sample_i = random.randint(0, len(sample_data)-1-batch_size)
    #x = sample_data[sample_i].view(1,40,len(chars))
    #y = sample_target[sample_i]
    
    x = sample_data[sample_i:sample_i+batch_size].view(batch_size,40,len(chars))
    y = sample_target[sample_i:sample_i+batch_size]
    
    target = Variable(y).cuda()

    hidden = model.init_hidden(batch_size)
    outputs, hidden = model(Variable(x).cuda(), hidden)

    for i in range(len(outputs)):
        loss += loss_function(outputs[i], target[i])
        
        output_lines.append(''.join([chars[c.data[0]] for c in outputs[i].topk(1)[1]]))
    
    if clip_parameters_value:
        torch.nn.utils.clip_grad_norm(model.parameters(), clip_parameters_value)
        
    loss.backward()
    optimizer.step()

    return output_lines, loss.data[0]/batch_size

tmp = train_single(torch.optim.Adam(model.parameters(), lr=0.0001), torch.nn.CrossEntropyLoss())
tmp[0][0:2]
tmp[1]

['-et tn is the peank wpor on timan lature',
 'eni y the siegting stomowisrctures of sa']

1.145879864692688

In [370]:
%%timeit
train_single(torch.optim.Adam(model.parameters(), lr=0.001), torch.nn.CrossEntropyLoss(), batch_size=64)

62.3 ms ± 497 µs per loop (mean ± std. dev. of 7 runs, 10 loops each)


In [293]:
def train_iterations(n_iters=100000, lr=0.001, batch_size=64,
                     print_every=10000, plot_every=1000):
    
    def print_infos():
        print("{:>6d} {:>4.0%} ({:>8}) {:>7.3f}   | {:>6.2f}: {}".format(
          model_training.iterations, iteration/n_iters, time_since(start),
          current_loss/current_loss_iter, loss, result))
    
    
    global optimizer
    optimizer = torch.optim.Adam(model.parameters(), lr=lr)
    
    start = time.time()
    model.train()

    current_loss = 0
    current_loss_iter = 1
    
    for iteration in range(1, n_iters + 1):
        model_training.iterations += 1
        
        #use_teacher_forcing = random.random() < teacher_forcing_ratio
        use_teacher_forcing = False
        
        #loss_function=nn.NLLLoss()
        loss_function = torch.nn.CrossEntropyLoss()
        result, loss = train_single(optimizer=optimizer, loss_function=loss_function, batch_size=batch_size)
        result = result[0]
        
        current_loss += loss
        current_loss_iter += 1

        # Print iter number, loss, name and guess
        if iteration % print_every == 0:
            print_infos()

        # Add current loss avg to list of losses
        if iteration % plot_every == 0:
            model_training.losses.append(current_loss / plot_every)
            model_training.learning_rates.append(lr)
            current_loss = 0
            current_loss_iter = 1

    print_infos()


In [294]:
from pytorch_utils_oh_3 import ModelTraining
MODEL_SAVE_PATH = 'char_rnn_fast_ai_testing_2'

In [295]:
model = CharRNN(len(chars), 512).cuda()
model
model_training = ModelTraining(MODEL_SAVE_PATH, [model])

CharRNN (
  (rnn): GRU(57, 512, batch_first=True)
  (lin_output): Linear (512 -> 57)
)

Save path: data/models/char_rnn_fast_ai_testing_2


In [296]:
train_iterations(n_iters=10, print_every=5)

     5  50% (   0m 0s)   3.271   |   3.78:                                         
    10 100% (   0m 0s)   3.307   |   3.25: oo      eeeeeeeeeeeeeeeeeeeeeeeeeeeeeeee
    10 100% (   0m 0s)   3.307   |   3.25: oo      eeeeeeeeeeeeeeeeeeeeeeeeeeeeeeee


In [297]:
train_iterations(n_iters=10000//64, print_every=1000//64)

    25  10% (   0m 0s)   2.907   |   3.06: o                                       
    40  19% (   0m 1s)   2.959   |   2.97: e                                       
    55  29% (   0m 2s)   2.946   |   2.88: ne  e     e                     e  e    
    70  38% (   0m 3s)   2.911   |   2.73:     thtn  tn  te  th   te t  an ton te  
    85  48% (   0m 4s)   2.861   |   2.57: en  n tn    ao      aoeente    an the  t
   100  58% (   0m 5s)   2.814   |   2.56:   ethe tnd es  e tn to  en t  aan  tane 
   115  67% (   0m 6s)   2.768   |   2.45: hrn tor  onn e tnd ahr    theneetne e to
   130  77% (   0m 7s)   2.726   |   2.44: d n e  tore       ao  tnte t eng ah   -o
   145  87% (   0m 8s)   2.689   |   2.38: etereeth   totte n thet onteet to heng  
   160  96% (   0m 9s)   2.656   |   2.35: n terlltn ohete ahe senn  af trtere td a
   166 100% (   0m 9s)   2.644   |   2.29: y   tn on anlen  ahe serherstf thn  re a


In [298]:
print_example()

ethics is a basic foundation of all thatuad: of al, whall hursirligalio
 hit o ss,uce they venxorerinny
-fam ndevysathian nfermiblit somlg co

kar dily us, eppuili?s in thel, anl couk hite anst--in sur curonelila09 
he erethe her ersgile attofor soneis
;hof fhashe phitho tyo
ngomin cndld -frened
stidad thesend. hremall--fad no the kimang laent igrtgurothest 


In [299]:
train_iterations(n_iters=10000//64, print_every=1000//64)

   181  10% (   0m 0s)   2.216   |   2.34: n ao     i an anle s  aoin    ahe setnae
   196  19% (   0m 1s)   2.265   |   2.30:  e   an the shree th thee etnd af tn on 
   211  29% (   0m 2s)   2.276   |   2.30: hhetg  reeean  rtertth   nn aoreue  et t
   226  38% (   0m 3s)   2.271   |   2.23:  e rn ohree  the shree tnlrli   y ton  o
   241  48% (   0m 4s)   2.266   |   2.24: hhrl  ahe  tete toaerl n toueniin ohe so
   256  58% (   0m 5s)   2.260   |   2.23: nd aenh yng    an tn nt  ahir  er aete t
   271  67% (   0m 6s)   2.251   |   2.13: y tn enes sn  tnd ahet theat oor     the
   286  77% (   0m 7s)   2.243   |   2.15:  thtn sg ond aocheng  ahe sertiretg ond 
   301  87% (   0m 8s)   2.234   |   2.15: hor nysn oioh  trperriohet tr  r af e  a
   316  96% (   0m 9s)   2.225   |   2.14: e aiil ohth n     and tass e ng   an anp
   322 100% (   0m 9s)   2.222   |   2.11: e n    the  tase  ahriase  an thee  af t


In [300]:
print_example()

ethics is a basic foundation of all that iver bee there to prifing,r inforela indd jurebly cosy thit hat, neprequens bainy ong thy whens ho ppan whymenst fucl ying rescati, so--s!; tlitss ant pricarass of thin thes; one mond
bee enond sichem: stalld itsirt for ene belabica by somacing aveul of were-thee mentape kever, ia cormel.--the beend

ov toall! and liv


In [301]:
train_iterations(n_iters=100000//64, print_every=10000//64)

   478  10% (   0m 9s)   2.047   |   1.97: s aeaoict oor tntana   -an is aesheahee 
   634  20% (  0m 18s)   1.974   |   1.87: n erent irrhon  af thme tnd tn eleeeon  
   790  30% (  0m 27s)   1.909   |   1.69: tf the somtratt oi certere  ah trecent -
   946  40% (  0m 37s)   1.853   |   1.62: thrlh nn  fn evidual  ond arriod   

0..
  1102  50% (  0m 46s)   1.806   |   1.55:  e tne soetifttnd siarht  ohine arsi on 
  1258  60% (  0m 55s)   1.764   |   1.55: tor tsar tudgtess aurange tuietr ond prr
  1414  70% (   1m 5s)   1.484   |   1.51:  theue wn elpouetid tersods af teapeayne
  1570  80% (  1m 14s)   1.471   |   1.50:  rle  tut in tact thuan in tvtential y a
  1726  90% (  1m 24s)   1.456   |   1.40: s ess  iuch aener t on ntpossible  ahi v
  1882 100% (  1m 33s)   1.441   |   1.40:   oorlidity of the mortsrated tonsertion
  1884 100% (  1m 34s)   1.441   |   1.33: h lf iash aare in thit ihe  aeaard ts a 


In [302]:
print_example()

ethics is a basic foundation of all that we remonse to the crually through id a out does at a sumptits to which he have--it promate, a more rightatoo or the reflect of the wirld: the
weazer, of the tendence of delicate, and that it is an ond an always represent
theme readily conecting. a
pessific tain of much and
waineratty!" if unfance of tifterediesticl. o


In [303]:
print_example()

ethics is a basic foundation of all that we gave our"dinacble nature and handly only who was
problem: alone, without firmly un. ouh
are and as above all, happoned from guist or geamss, how veould that even more
thought,--it is no trroughout pribe wottly existences. his famoruminy of
the feli my this. "hen, will advess is the severed, cholariss, tos ubous


1


In [304]:
print_example()

ethics is a basic foundation of all that overchumo away is utmone! mean borass.
the work (in the has lorg, wes, primidedy of instance he think, to the thing of life. im the goescribly,---have love been pridily. which homes is has never century soil, now
those for also salt the absors
every he you bad geast the same, wired undrasm how dothised,
the firmly one


In [374]:
# training time seconds per one sample
(60+34)/(1884*64)

0.0007795912951167729

In [317]:
train_iterations(n_iters=100000//64, print_every=10000//64)

  2040  10% (   0m 9s)   1.369   |   1.25: --t  i pagtory of the semelopment of the
  2196  20% (  0m 18s)   1.362   |   1.40: ... tveny sispetoengerson tore tnteid af
  2352  30% (  0m 28s)   1.351   |   1.34:   tf thich h wonsiience,toould be torpne
  2508  40% (  0m 37s)   1.342   |   1.31:  ci  aesself aor the same of tis sood na
  2664  50% (  0m 47s)   1.332   |   1.17:  oas tts wnlhough  tn sact, tt is ahe sa
  2820  60% (  0m 56s)   1.321   |   1.24: nh ihe  aive   tnroost of consrmplibor t
  2976  70% (   1m 7s)   1.239   |   1.20:  an tuience  tn thet the sew phychologis
  3132  80% (  1m 17s)   1.237   |   1.31:   in tll  ihinher tehind tvery oose is t
  3288  90% (  1m 27s)   1.230   |   1.24: iost eiaicrous axfectsof t lon wf tnowle
  3444 100% (  1m 37s)   1.221   |   1.20: etity th brt an  the r sorl andoar,of th
  3446 100% (  1m 37s)   1.221   |   1.22: euiesseon, (] that tt tlso aolled tion" 


In [318]:
print_example()

ethics is a basic foundation of all that of their
eater fundamental beleeving man; a sensitive youngest restraining the
oldge. shat such destructed un3asions, rearmes most difficult to dety its." 
for the
dritk? indesended or right by experience, to have neither says that heart from the indispensable
misousheples itself view day but to be worthy, on the
sake


In [319]:
print_example()

ethics is a basic foundation of all that sympathy of might not precesses: that shave as eye of lost find the believe those witedle thus, indication of the fation, distracted with,
that woman show the farch of superioris. if a hesed and too much. but man begries we hive all relation still highest has a handed in the highest was thought is at little to that it
