## Char-rnn Text Generator - Training on Pride and Prejudice

In [82]:
import unidecode
import string
import random
import re

### Check Whether GPU is avaiable

In [81]:
use_gpu = torch.cuda.is_available()

## Load in text file

- all_characters is inclusive of all alphabets and punctuations
- file_len is the file length

In [83]:
all_characters = string.printable
n_characters = len(all_characters)

file = unidecode.unidecode(open('data/pride_and_prejudice.txt').read())
file_len = len(file)
print('file_len =', file_len)

file_len = 703564


Get a random chunk of text from the book with specified chunk length.
This will be used to generate training samples.

In [84]:
chunk_len = 300

def random_chunk(chunk_len = 100):
    start_index = random.randint(0, file_len - chunk_len)
    end_index = start_index + chunk_len + 1
    return file[start_index:end_index]

print(random_chunk(300))

an angel more beautiful. Darcy, on the contrary, had seen a
collection of people in whom there was little beauty and no fashion, for
none of whom he had felt the smallest interest, and from none received
either attention or pleasure. Miss Bennet he acknowledged to be pretty,
but she smiled too much.



## Start model building

### Define RNN: use LSTM or GRU

In [96]:
import torch
import torch.nn as nn
from torch.autograd import Variable

## With option of GRU or LSTM, can specify dropout rate
class RNN(nn.Module):
    def __init__(self, input_size, hidden_size, output_size, 
                 dropout=0, model="gru",n_layers=1):
        super(RNN, self).__init__()
        self.input_size = input_size
        self.hidden_size = hidden_size
        self.output_size = output_size
        self.n_layers = n_layers
        self.model = model
        
        self.encoder = nn.Embedding(input_size, hidden_size)
        if self.model == "gru":
            self.rnn = nn.GRU(hidden_size, hidden_size, n_layers, dropout=dropout)
        elif self.model == "lstm":
            self.rnn = nn.LSTM(hidden_size, hidden_size, n_layers, dropout=dropout)
        self.decoder = nn.Linear(hidden_size, output_size)
    
    def forward(self, input, hidden):
        batch_size = input.size(0)
        encoded = self.encoder(input)
        output, hidden = self.rnn(encoded.view(1, batch_size, -1), hidden)
        output = self.decoder(output.view(batch_size, -1))
        return output, hidden

    def init_hidden(self,batch_size):
        if self.model == "lstm":
            return (Variable(torch.zeros(self.n_layers, batch_size, self.hidden_size)),
                    Variable(torch.zeros(self.n_layers, batch_size, self.hidden_size)))
        return Variable(torch.zeros(self.n_layers, batch_size, self.hidden_size))


### One-hot-encoding of a character

In [1]:
#retrun a long tensor
def char2tensor(string):
    tensor = torch.zeros(len(string)).long()
    for c in range(len(string)):
        tensor[c] = all_characters.index(string[c])
    return tensor

### Generate a random training set with specified chunk length and batch size

In [87]:
def random_training_set(chunk_len,batch_size):    
    inp = torch.LongTensor(batch_size, chunk_len)
    target = torch.LongTensor(batch_size, chunk_len)
    for i in range(batch_size):
        chunk = random_chunk(chunk_len)
        inp[i] = char2tensor(chunk[:-1])
        target[i] = char2tensor(chunk[1:])
    if use_gpu:
        inp = inp.cuda()
        target = target.cuda()
    else:
        inp = Variable(inp)
        target = Variable(target)
    return inp, target

### Define Evaluation Function: 

In [101]:
#temperature will control how conservative/diverse the softmax output will be
def evaluate(decoder, prime_str='A', predict_len=100, temperature=0.8, use_gpu = False):
    hidden = decoder.init_hidden(1)
    prime_input = char2tensor(prime_str).unsqueeze(0)
    
    if use_gpu:
        if isinstance(hidden, tuple):
            hidden = (hidden[0].cuda(), hidden[1].cuda())
        else:
            hidden = hidden.cuda()
        prime_input = prime_input.cuda()
    
    predicted = prime_str
    
    # Use priming string to "build up" hidden state
    for p in range(len(prime_str) - 1):
        _, hidden = decoder(prime_input[:,p], hidden)
    
    inp = prime_input[:,-1]
    
    for p in range(predict_len):
        output, hidden = decoder(inp, hidden)
        
        # Sample from the network as a multinomial distribution
        output_dist = output.data.view(-1).div(temperature).exp()
        top_i = torch.multinomial(output_dist, 1)[0]
        
        # Add predicted character to string and use as next input
        predicted_char = all_characters[top_i]
        predicted += predicted_char
        inp = char2tensor(predicted_char).unsqueeze(0)
        if use_gpu:
            inp = inp.cuda()

    return predicted

### Model training and record loss

In [90]:
def train(inp,target,batch_size):
    hidden = decoder.init_hidden(batch_size)
    if use_gpu:
        hidden = hidden.cuda()
    decoder.zero_grad()
    loss = 0
    for c in range(chunk_len):
        output, hidden = decoder(inp[:,c], hidden)
        loss += criterion(output.view(batch_size, -1), target[:,c])

    loss.backward()
    decoder_optimizer.step()

    return loss.data[0] / chunk_len

### Start Model Training

In [105]:
n_epochs = 5000
print_every = 200
plot_every = 10
hidden_size = 512
n_layers = 2
lr = 0.005
batch_size = 100
chunk_len = 300
dropout = 0.2

decoder = RNN(n_characters, hidden_size, n_characters,"gru", n_layers, dropout = dropout)
if use_gpu:
    decoder.cuda()

decoder_optimizer = torch.optim.Adam(decoder.parameters(), lr=lr)
criterion = nn.CrossEntropyLoss()

all_losses = []
loss_avg = 0

for epoch in range(1, n_epochs + 1):
    loss = train(*random_training_set(chunk_len,batch_size),batch_size)       
    loss_avg += loss

    if epoch % print_every == 0:
        print('[(%d %d%%) %.4f]' % ( epoch, epoch / n_epochs * 100, loss))
        print(evaluate(decoder,'Wh', 100, use_gpu=use_gpu), '\n')

    if epoch % plot_every == 0:
        all_losses.append(loss_avg / plot_every)
        loss_avg = 0



[(100 3%) 2.5491]
Whet heprens opalin, fod ad, a misher thgheiland te and we
he hosut the th sepsos Ches hercor is otist 

[(200 6%) 2.1592]
Whtis, bebad ther, hezs and the ad of hereke doull thend of she to Elisto sady non at the, in to ther  

[(300 10%) 2.0794]
Wht anoth, whibll I rerbe anitathe cirtend and
of the harn. Dyoir wissipeeth atdare she it his be than 

[(400 13%) 1.9223]
Whe whis med to forchid, at hat, and of faviess had I
sur, but of allaclines that she apce. Hit of for 

[(500 16%) 2.1074]
Whince of has she for parthing such home thighter.

And in the he at suth for spene his at pale on rec 

[(600 20%) 1.7216]
Whelly And all here of chat intain whit everest homenced thell not they her wam's sall indedfattming t 



KeyboardInterrupt: 