In [1]:
import string
import random
import re
import torch
import torch.nn as nn
from torch.autograd import Variable
from torch.nn import init

In [2]:
text = open('./tinyshakespeare.txt').read()
text_len = len(text)

all_characters = string.printable
n_characters = len(all_characters)

In [3]:
def get_chunk(chunk_len=100):
    start_index = random.randint(0, text_len - chunk_len)
    end_index = start_index + chunk_len + 1
    return text[start_index:end_index]

def to_tensor(string, dtype='float'):
    tensor = torch.zeros(len(string)).long()
    for c in range(len(string)):
        tensor[c] = all_characters.index(string[c])
    return Variable(tensor)

def random_training_set():    
    chunk = get_chunk()
    inp = to_tensor(chunk[:-1])
    target = to_tensor(chunk[1:])
    inp.cuda()
    target.cuda()
    return inp, target

In [29]:
class Model(nn.Module):
    def __init__(self, input_size, hidden_size, output_size, n_layers=1, htype='rnn', lr=0.005):
        super(Model, self).__init__()
        self.input_size = input_size
        self.hidden_size = hidden_size
        self.output_size = output_size
        self.n_layers = n_layers
        self.htype = htype
        
        self.encoder = nn.Embedding(input_size, hidden_size)
        print(htype)
        if htype=='rnn':
            self.model = nn.RNN(hidden_size, hidden_size, n_layers)
        elif htype=='lstm':
            self.model = nn.LSTM(hidden_size, hidden_size, n_layers)
        elif htype=='gru':
            self.model = nn.GRU(hidden_size, hidden_size, n_layers)
        self.decoder = nn.Linear(hidden_size, output_size)
        
        self.optimizer = torch.optim.Adam(self.parameters(), lr=lr)
        self.criterion = nn.CrossEntropyLoss()
    
    def forward(self, input, hidden):
        input = self.encoder(input.view(1, -1))
        output, hidden = self.model(input.view(1, 1, -1), hidden)
        output = self.decoder(output.view(1, -1))
        return output, hidden
    
    def evaluate(self, prime_str='T', predict_len=500, temperature=0.8):
        hidden = self.init_hidden()
        prime_input = to_tensor(prime_str)
        predicted = prime_str
        
        if cuda:
            hidden = hidden.cuda()
            prime_input = prime_input.cuda()

        # Use priming string to "build up" hidden state
        for p in range(len(prime_str) - 1):
            _, hidden = self(prime_input[p], hidden)
        inp = prime_input[-1]

        for p in range(predict_len):
            output, hidden = self(inp, hidden)

            # Sample from the network as a multinomial distribution
            output_dist = output.data.view(-1).div(temperature).exp()
            top_i = torch.multinomial(output_dist, 1)[0]

            # Add predicted character to string and use as next input
            predicted_char = all_characters[top_i]
            predicted += predicted_char
            inp = to_tensor(predicted_char)
            if cuda:
                inp = inp.cuda()

        return predicted
    
    def train(self, inp, target, chunk_len=100):
        hidden = self.init_hidden()
        hidden.cuda()
        self.zero_grad()
        loss = 0

        for c in range(chunk_len):
            output, hidden = self(inp[c], hidden)
            loss += self.criterion(output, target[c])

        loss.backward()
        self.optimizer.step()

        return loss.data[0]/chunk_len

    def init_hidden(self):
        if self.htype == "lstm":
            return (Variable(torch.zeros(self.n_layers, 1, self.hidden_size)),
                    Variable(torch.zeros(self.n_layers, 1, self.hidden_size)))
        return Variable(torch.zeros(self.n_layers, 1, self.hidden_size),\
                        requires_grad=True)

In [36]:
n_epochs = 2000
print_every = 100
plot_every = 10
hidden_size = 100
n_layers = 1
lr = 0.005

In [26]:
model_rnn = Model(n_characters, hidden_size, n_characters, n_layers=2)
print(model)

rnn
Model(
  (encoder): Embedding(100, 100)
  (model): RNN(100, 100)
  (decoder): Linear(in_features=100, out_features=100, bias=True)
  (criterion): CrossEntropyLoss(
  )
)


In [7]:
all_losses = []
loss_avg = 0

for epoch in range(1, n_epochs + 1):
    loss = model_rnn.train(*random_training_set())       
    loss_avg += loss

    if epoch % print_every == 0:
        print('[(%d %d%%) %.4f]' % (epoch, 1.*epoch/n_epochs*100, loss))
        print(model_rnn.evaluate('Wh', 100))
        print('\n')

    if epoch % plot_every == 0:
        all_losses.append(loss_avg / plot_every)
        loss_avg = 0

[(100 5%) 2.4045]
Whin, wat hall tim wis meand hou le. sha
At ig d toEd ol thou, torhid ueg mat om te, touprowhand att t


[(200 10%) 2.6222]
Whad soe and nod dus and en, sand pade ing, ay hon and pid, rony wave
Toire, hemy mele mid! sour.

AfA


[(300 15%) 2.3643]
Whing je attant mreang the hext not fa comt thagh!
In unte the nothe sugh, I fats mato thot o tave the


[(400 20%) 2.0662]
Whentangshat for heme nathis so lonfed theld youphat and hatind for sinls: thold is:
Heven yat of poth


[(500 25%) 1.9115]
Who@s I, thet we diof yough prow lay to of hilkho, and tr at fade domy serwice, the shis of int of cor


[(600 30%) 2.2830]
Whou me then ape and theall dook beif the frak's thour 'ther thech
uposbech tho dee pere cords shim
Is


[(700 35%) 1.8375]
Whan is mate?

SRING IVINI:
Wame whas,
Work the me then noss Mato blaye lor prishirs the llarty'ther f


[(800 40%) 1.8653]
Whall you Buss the prack his inbry be kince prish be with for of that lould hast swort now of thou pos


[(900 45%

In [12]:
print(model.evaluate('We', 500))

Wertaming ome! with of and by dost will your detter'--noo stath alver
argee.

Sou;
I cumed;
The congely vise prot e fatt he but yow the here tayblest your oor hands whis promer of unhim sod wetter.

LADY:
Priushes, firrwores! I
we vear pargatore a were crom hich to carke not holl am shours;
Iy it wherat your spee him not be deeps
I shout peard hade to have: and sharo!
If yoursessire.

LADY OO:
wordy yours my and the upon he hever again hear my ly con amer me of thee hery to when a me ate I wall'd 


In [15]:
model = Model(n_characters, hidden_size, n_characters, n_layers, htype='gru')
print(model)

gru
Model(
  (encoder): Embedding(100, 100)
  (model): GRU(100, 100)
  (decoder): Linear(in_features=100, out_features=100, bias=True)
  (criterion): CrossEntropyLoss(
  )
)


In [16]:
all_losses = []
loss_avg = 0

for epoch in range(1, n_epochs + 1):
    loss = model.train(*random_training_set())       
    loss_avg += loss

    if epoch % print_every == 0:
        print('[(%d %d%%) %.4f]' % (epoch, 1.*epoch/n_epochs*100, loss))
        #print(model.evaluate('Wh', 100))
        #print('\n')

    if epoch % plot_every == 0:
        all_losses.append(loss_avg / plot_every)
        loss_avg = 0

[(100 5%) 2.2694]
[(200 10%) 2.0985]
[(300 15%) 2.0937]
[(400 20%) 2.0440]
[(500 25%) 2.2702]
[(600 30%) 2.0048]
[(700 35%) 2.1530]
[(800 40%) 2.1840]
[(900 45%) 2.0941]
[(1000 50%) 2.1113]
[(1100 55%) 1.8443]
[(1200 60%) 2.1550]
[(1300 65%) 2.1281]
[(1400 70%) 1.9685]
[(1500 75%) 1.9968]
[(1600 80%) 1.8302]
[(1700 85%) 1.7155]
[(1800 90%) 2.2517]
[(1900 95%) 1.7101]
[(2000 100%) 1.7692]


In [21]:
print(model.evaluate('Ki', 500))

Kinost
O, as leansed cate of love:
Good hopbe to sece,
Which the to prearviced wet the states ope's
To arse a such to timen alb
ingre mood you so
dake to and ight eed my lond.
That Wauce
What to love a mood,
Le of conte the time take to beveniose and he walf consiford:
With the hem the to mad.

DUCHESTER:
Is batigh than to to to sin to pay we the stard of as grate to mine
The m the stade blood
The me flousming the searss.

Nor Kather:
To sayer at stards of gat of to beace ut the hable.

CAJUDIO:




In [30]:
model_lstm = Model(n_characters, hidden_size, n_characters, n_layers, htype='lstm')
print(model_lstm)

lstm
Model(
  (encoder): Embedding(100, 100)
  (model): LSTM(100, 100)
  (decoder): Linear(in_features=100, out_features=100, bias=True)
  (criterion): CrossEntropyLoss(
  )
)


In [31]:
all_losses = []
loss_avg = 0

for epoch in range(1, n_epochs + 1):
    loss = model_lstm.train(*random_training_set())       
    loss_avg += loss

    if epoch % print_every == 0:
        print('[(%d %d%%) %.4f]' % (epoch, 1.*epoch/n_epochs*100, loss))
        #print(model.evaluate('Wh', 100))
        #print('\n')

    if epoch % plot_every == 0:
        all_losses.append(loss_avg / plot_every)
        loss_avg = 0

[(100 5%) 2.3804]
[(200 10%) 2.3588]
[(300 15%) 2.4034]
[(400 20%) 2.1262]
[(500 25%) 2.1673]
[(600 30%) 2.0382]
[(700 35%) 1.9352]
[(800 40%) 1.8142]
[(900 45%) 2.1993]
[(1000 50%) 2.0887]
[(1100 55%) 2.3438]
[(1200 60%) 2.0311]
[(1300 65%) 2.0284]
[(1400 70%) 1.8825]
[(1500 75%) 1.6077]
[(1600 80%) 2.0182]
[(1700 85%) 2.0066]
[(1800 90%) 2.0296]
[(1900 95%) 1.8115]
[(2000 100%) 1.9400]


In [35]:
print(model_lstm.evaluate('A', 500))

ABANGBRUT:
You groud 'tis a a trauch head pauding graid for the diefore fance, and with the send and the lauld should and thest that now revam or greed?
The cald made this flich think that this houne.

Clown:
Tage it fring my you stiund,
Take to the caumple the caund the cabrockbent, lest the the camaul fathing Kate and me,
The you and pardses will I greest stands! Shepprace:
The heron mond the she mens I lark and the come, shing hip prock preats, awn and a spather, and derters all make the or me
