<a href="https://colab.research.google.com/github/ishandahal/stats453-deep_learning_torch/blob/main/Rnn/character_rnn.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

### Character RNN

Simple character RNN that generates bits of text based on a novel

In [16]:
pip install unidecode



In [17]:
import torch
import torch.nn.functional as F
import torch.nn as nn
from torchtext import data
from torchtext import datasets
import time
import random 
import unidecode
import string
import random 
import re

torch.backends.cudnn.deterministic = True

In [18]:
RANDOM_SEED = 123
torch.manual_seed(RANDOM_SEED)

DEVICE = torch.device('cuda:0' if torch.cuda.is_available() else 'cpu')

TEXT_PORTION_SIZE = 200

NUM_ITER = 20000
LEARNING_RATE = 0.005
EMBEDDING_DIM = 100
HIDDEN_DIM = 100
NUM_HIDDEN = 1

## Dataset

In [19]:
!wget http://www.gutenberg.org/files/98/98-0.txt

--2020-12-29 04:31:09--  http://www.gutenberg.org/files/98/98-0.txt
Resolving www.gutenberg.org (www.gutenberg.org)... 152.19.134.47, 2610:28:3090:3000:0:bad:cafe:47
Connecting to www.gutenberg.org (www.gutenberg.org)|152.19.134.47|:80... connected.
HTTP request sent, awaiting response... 200 OK
Length: 807615 (789K) [text/plain]
Saving to: ‘98-0.txt.1’


2020-12-29 04:31:09 (3.23 MB/s) - ‘98-0.txt.1’ saved [807615/807615]



Convert all characters to ASCii characters provided by ```string.printable```

In [20]:
string.printable

'0123456789abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ!"#$%&\'()*+,-./:;<=>?@[\\]^_`{|}~ \t\n\r\x0b\x0c'

In [21]:
with open('./98-0.txt', 'r') as f:
    textfile = f.read()

# # convert special characters 
textfile = unidecode.unidecode(textfile)

# strip extra whitespaces 
textfile = re.sub(' +', ' ', textfile)

TEXT_LENGTH = len(textfile)

print(f"Number of characters in text: {TEXT_LENGTH}")

Number of characters in text: 776911


Divide the text into smaller portions:

In [22]:
random.seed(RANDOM_SEED)

def random_portion(textfile):
    start_index = random.randint(0, TEXT_LENGTH - TEXT_PORTION_SIZE)
    end_index = start_index + TEXT_PORTION_SIZE + 1
    return textfile[start_index:end_index]

print(random_portion(textfile))

and dancing, a dozen
together. When the wine was gone, and the places where it had been
most abundant were raked into a gridiron-pattern by fingers, these
demonstrations ceased, as suddenly as they had


Define a function that converts characters into tensors of integers (type long)

In [23]:
def char_to_tensor(text):
    lst = [string.printable.index(c) for c in text]
    tensor = torch.tensor(lst).long()
    return tensor

print(char_to_tensor('abcDEF'))

tensor([10, 11, 12, 39, 40, 41])


Putting it together to make a function that draws random batches for training:

In [24]:
def draw_random_sample(textfile):
    text_long = char_to_tensor(random_portion(textfile))
    inputs = text_long[:-1]
    targets = text_long[1:]
    return inputs, targets

In [25]:
draw_random_sample(textfile)

(tensor([17, 14, 94, 11, 10, 23, 20, 73, 94, 10, 23, 13, 94, 27, 14, 31, 14, 10,
         21, 94, 29, 24, 94, 48, 27, 75, 94, 47, 24, 27, 27, 34, 94, 29, 17, 14,
         94, 11, 27, 18, 16, 17, 29, 23, 14, 28, 28, 96, 24, 15, 94, 29, 17, 14,
         94, 54, 24, 17, 24, 94, 17, 24, 27, 18, 35, 24, 23, 75, 94, 54, 24, 73,
         94, 17, 14, 94, 25, 30, 28, 17, 14, 13, 94, 24, 25, 14, 23, 94, 29, 17,
         14, 94, 13, 24, 24, 27, 94, 32, 18, 29, 17, 94, 29, 17, 14, 94, 32, 14,
         10, 20, 94, 27, 10, 29, 29, 21, 14, 96, 18, 23, 94, 18, 29, 28, 94, 29,
         17, 27, 24, 10, 29, 73, 94, 28, 29, 30, 22, 11, 21, 14, 13, 94, 13, 24,
         32, 23, 94, 29, 17, 14, 94, 29, 32, 24, 94, 28, 29, 14, 25, 28, 73, 94,
         16, 24, 29, 94, 25, 10, 28, 29, 94, 29, 17, 14, 94, 29, 32, 24, 94, 10,
         23, 12, 18, 14, 23, 29, 96, 12, 10, 28, 17, 18, 14, 27, 28, 73, 94, 10,
         23, 13]),
 tensor([14, 94, 11, 10, 23, 20, 73, 94, 10, 23, 13, 94, 27, 14, 31, 14, 10, 21,
         

## Model

In [26]:
class RNN(nn.Module):
    def __init__(self, input_dim, emb_dim, 
                 hid_dim, out_dim, num_layers):
        super(RNN, self).__init__()

        self.hid_dim = hid_dim
        self.num_layers = num_layers

        self.embed = nn.Embedding(input_dim, emb_dim)
        self.gru = nn.GRU(input_size=emb_dim,
                          hidden_size=hid_dim,
                          num_layers=num_layers)
        self.fc = nn.Linear(hid_dim, out_dim)
        self.init_hidden = nn.Parameter(torch.zeros(
                                            num_layers, 1, hid_dim))
        
    def forward(self, features, hidden):
        embedded = self.embed(features.view(1, -1))
        output, hidden = self.gru(embedded.view(1, 1, -1), hidden)
        output = self.fc(output.view(1, -1))
        return output, hidden

    def init_zero_state(self):
        init_hidden = torch.zeros(self.num_layers, 1, self.hid_dim).to(DEVICE)
        return init_hidden        

In [27]:
torch.manual_seed(RANDOM_SEED)
model = RNN(len(string.printable), EMBEDDING_DIM, HIDDEN_DIM,
            len(string.printable), NUM_HIDDEN)
model = model.to(DEVICE)
optimizer =  torch.optim.Adam(model.parameters(), lr=LEARNING_RATE)

# Training

In [28]:
def evaluate(model, prime_str='A', predict_len=100, temperature=0.8):

    hidden = model.init_zero_state()
    prime_input = char_to_tensor(prime_str)
    predicted = prime_str

    ## Use priming string to "build up" hidden state
    for p in range(len(prime_str) - 1):
        _, hidden = model(prime_input[p].to(DEVICE), hidden.to(DEVICE))
    inp = prime_input[-1]

    for p in range(predict_len):
        output, hidden = model(inp.to(DEVICE), hidden.to(DEVICE))

        # Sample for the network as a multinomial distribution 
        output_dist = output.data.view(-1).div(temperature).exp()
        top_i = torch.multinomial(output_dist, 1)[0]

        # Add predicted character to string and use as next input
        predicted_char = string.printable[top_i]
        predicted += predicted_char
        inp = char_to_tensor(predicted_char)
    
    return predicted

In [29]:
start_time = time.time()
for iteration in range(NUM_ITER):

    ## Forward and Back Prop

    hidden = model.init_zero_state()
    optimizer.zero_grad()

    loss = 0.
    inputs, targets = draw_random_sample(textfile)
    inputs, targets = inputs.to(DEVICE), targets.to(DEVICE)
    for c in range(TEXT_PORTION_SIZE):
        outputs, hidden = model(inputs[c], hidden)
        loss += F.cross_entropy(outputs, targets[c].view(1))

    loss /= TEXT_PORTION_SIZE
    loss.backward()

    ## Update parameters
    optimizer.step()

    ## logging 
    with torch.set_grad_enabled(False):
        if not iteration % 1000:
            print(f"Time elapsed: {(time.time() - start_time)/60:.2f} min")
            print(f"Iteration {iteration} | Loss {loss.item():.2f}\n\n")
            print(evaluate(model, 'Th', 200), '\n')
            print(50*'=')

Time elapsed: 0.00 min
Iteration 0 | Loss 4.60


Th>\T-\`#Xx&fW6B^Lm1^uAqrU?w8=kH;,XATEO
+pgDgbu=U}kY3ysMJ\2<@Gh:`|ch1@ tA^tj|A0"Y
)T,I?G%CyJQ^$G)Oqw(37-Yq9lFo2{6!*9tO5?E[#M4)!s@2)iRxvC8Wx:"$6zf?G98c8HKP79/TWIYv,d1_L)G]RxymA^8^Fbr"a^nDu_aGc( 

Time elapsed: 3.04 min
Iteration 1000 | Loss 1.94


The that low was fatter out good reppentare stook bether) halys ortion, but Foun in not monser the was a carges on in but
refured acponster of the one fow of her ot that the sconters to him bundry doon  

Time elapsed: 6.13 min
Iteration 2000 | Loss 1.64


There, from in sain
why. The rose. Then courting of a must and steable of presibmor. I
shook it in the distared the startely the contory, worn, who not of they to be pain the drew not
that while others  

Time elapsed: 9.20 min
Iteration 3000 | Loss 1.73


Throw intial bus, and frongly and were his bow, its before bedowed
him asked that a very the long at the frogisch wife of him, and the
is me of armined; and and of fae preseed it w