In [1]:
import torch
from torch import nn
import torch.nn.functional as F 
import numpy as np
import random
import matplotlib.pyplot as plt

# One-hot encoding matrix with dim (seq_length,K)
# (creating one hot encoded rows for each character in a sequence )
def one_hot(text_seq):
    data = np.zeros((len(text_seq), K), dtype=np.float32)
    for i in range(len(text_seq)):
        char_as_int = text_seq[i]
        data[i][char_as_int] = 1
    return data

def get_all_sequences(book_text):
    # Translate text to integers
    book_as_ints = np.array([c2i[c] for c in book_text])
    n_seq = len(book_as_ints)//seq_length
    book_text = book_text[:n_seq*seq_length]
    batches = []
    for i in range(0, len(book_text), seq_length):
        batches.append(one_hot(book_as_ints[i:i+seq_length]))
    return np.array(batches, dtype=np.float32)

# one_hots = (seq_length, K)
def one_hot_to_txt(one_hots):
    chars = []
    for arr in one_hots:
        idx = np.where(arr == 1)[0][0]
        chars.append(i2c[idx])
    return ''.join(chars)

def one_hot_to_ind(one_hots):
    indices = []
    for arr in one_hots:
        indices.append(np.where(arr==1)[0][0])
    return np.array(indices)

def get_next_char(s):
    s = nn.functional.softmax(s, dim=1)
    l = torch.cumsum(s, dim=1)
    a = random.uniform(0,1)
    ls = (l - a).cpu().detach().numpy()
    #print(ls)
    idx = np.where(ls > 0)
    idx = idx[1][0]
    onehot = np.zeros(K, dtype=np.float32)
    onehot[idx] = 1
    onehot = torch.from_numpy(onehot).view(1,1,K).float().to(device)
    return idx, i2c[idx], onehot

def synthesize_text(model, char, length):
    model.eval()
    generated_seq = [char]
    x0 = np.zeros(K, dtype=np.float32)
    x0[c2i[char]] = 1
    x0 = torch.from_numpy(x0)
    x0 = x0.view(1,1,K).to(device)
    hidden = model.init_hidden(1)
    for i in range(length):
        #print("x0 shape: " + str(x0.shape))
        #print("hidden shape: " + str(hidden.shape))
        s, hidden = model(x0, hidden)
        i, c, x0 = get_next_char(s)
        generated_seq.append(c)
    model.train()
    return ''.join(generated_seq)

# Vanilla RNN for character-level generation
class VanillaRNN(nn.Module):
    def __init__(self, input_size, output_size, hidden_size, n_layers):
        super().__init__()
        self.hidden_size = hidden_size
        self.n_layers = n_layers
        self.rnn = nn.RNN(input_size, hidden_size, n_layers, batch_first=True)
        self.fc = nn.Linear(hidden_size, len(chars))

    def forward(self, x, hidden):
        # Passing in the input and hidden state into the model and obtaining outputs
        out, hidden = self.rnn(x, hidden)
        # Reshaping the outputs such that it can be fit into the fully connected layer
        out = out.contiguous().view(-1, self.hidden_size)
        out = self.fc(out)

        return out, hidden

    def init_hidden(self, batch_size):
        # This method generates the first hidden state of zeros which we'll use in the forward pass
        # We'll send the tensor holding the hidden state to the device we specified earlier as well
        if cuda_available:
            hidden = torch.zeros(self.n_layers, batch_size, self.hidden_size).cuda()
        else:
            hidden = torch.zeros(self.n_layers, batch_size, self.hidden_size)
        return hidden

# Saves loss and generted text during training    
lossarr = []
strarr = []
def train(model, n_epochs, clip, debug = 0, steps_during_debug=2):
    model.train()
    update_step = 0
    smooth_loss = 0
    debug = 0
    #print("Numbers of update steps per epoch: " + str(x.shape[0]))
    for epoch in range(1, n_epochs + 1):
        #OBSSS!!! HARDCODED BATCH MIGHT CHANGE LATER
        hidden = model.init_hidden(1)
        for i in range(x.shape[0]):
            if debug: 
                if(i > steps_during_debug): 
                    break    
            # Detatching hidden state from the computational graph 
            # (see truncated back propagation through time) such that
            # the graph doesn't become super big, slowing down computations
            # and exploding the gradient. 
            
            # Note however that the hidden state VALUE will still be inherited
            # We just say that it doesn't depend on previous hidden states
            hidden.detach_()
            hidden = hidden.to(device)
            
            # Clear gradients between update steps, won't be cleared automatically. 
            opt.zero_grad() 
            
            #Remember that input should have dimensions (batch_size, seq_size, number_of_classes)
            seq = x[i]
            seq = torch.from_numpy(seq).view(1,seq_length,K)
            #if cuda_available: 
            seq = seq.to(device)
            #print("seq shape: " + str(seq.shape))        

            #Should have shape (25) (not (25,80))
            target = y[i]
            target = one_hot_to_ind(target)
            target = torch.from_numpy(target)
            target = target.to(device)
            #print("target shape: " + str(target.shape))

            output, hidden = model(seq, hidden)
            loss = criterion(output, target)
            
            if update_step == 0:
                smooth_loss = loss
            else:
                smooth_loss = smooth_loss*0.999 + loss*0.001
            if update_step%500 == 0:
                lossarr.append(smooth_loss.item())
                print(len(lossarr))
                print("\n\n===========================================================================================")
                print("Update Step: " + str(update_step) +"/" + str(x.shape[0]) + ", " + str((update_step/x.shape[0])*100) +"%")
                print(str(smooth_loss))
                print("========================================= TEXT ============================================")
                print(synthesize_text(model, 'H', 50))
                print("===========================================================================================")
            if update_step%20000 == 0:
                strarr.append(synthesize_text(model, 'H', 50))
            
            # BPTT 
            loss.backward() 
            # Clip gradients such that they don't explode
            nn.utils.clip_grad_norm_(model.parameters(), clip) 
            # Update the gradients
            opt.step() 
            update_step += 1
    model.eval()


In [None]:
cuda_available = torch.cuda.is_available()
if cuda_available:
    device = torch.device("cuda")
else:
    device = torch.device("cpu")

print("Running on: " + str(device))

# Open file and get text
file = open("Clean_All_Books.txt", "r", encoding="utf8")
book = file.read()
file.close()

# Tokenize chars in text
chars = list(set(book))
chars.sort()
i2c = dict(enumerate(chars))
c2i = {i2c[i]: i for i in i2c.keys()}

# Hyper-parameters
# Num hidden nodes
m = 100
# Batch Size
seq_length = 25
# Number of chars
K = len(chars)
# Learning rate
eta = 0.1
      
model = VanillaRNN(input_size = K, output_size = K, hidden_size = m, n_layers=1)
model.to(device)
criterion = nn.CrossEntropyLoss()
opt = torch.optim.Adagrad(model.parameters(), lr=eta)
# model.parameters are all params e.g U, V, b, c etc, see print below
#for param in list(model.parameters()):
#    print(param.shape)
x = get_all_sequences(book)
#print(np.shape(x))
#print(one_hot_to_txt(x[0]))
# If even number of chars (in relation to seq_length), first char will be last target
y = get_all_sequences(book[1:] + book[0])

Running on: cuda


In [None]:
train(model, 2, 5)

Numbers of update steps per epoch: 256485
1


Update Step: 0/256485, 0.0%
tensor(4.5091, device='cuda:0', grad_fn=<NllLossBackward>)
Hk8XC2,LBu—~QVq:Qg4txj\C%0j!Ov*welwR’
R1:&?NG4R"E,d
2


Update Step: 500/256485, 0.1949431740647601%
tensor(3.9956, device='cuda:0', grad_fn=<AddBackward0>)
H, . d a’o, ee toe opabe ad n fly 
uie cey, ageh n 
3


Update Step: 1000/256485, 0.3898863481295202%
tensor(3.4975, device='cuda:0', grad_fn=<AddBackward0>)
Hw hotipt’ore’selad fethes in Cro, Doothonithetunt 
4


Update Step: 1500/256485, 0.5848295221942803%
tensor(3.1111, device='cuda:0', grad_fn=<AddBackward0>)
Hen., ommufveod thamm sacon’d capumin’conb t Tord ’
5


Update Step: 2000/256485, 0.7797726962590404%
tensor(2.8525, device='cuda:0', grad_fn=<AddBackward0>)
Harlked,,””” 
rediy goabll tars, is bis Dus 

fivde
6


Update Step: 2500/256485, 0.9747158703238007%
tensor(2.6648, device='cuda:0', grad_fn=<AddBackward0>)
HoTGawt atoutey licge arf bedpurlit 
ouccus starnaO
7


Update Step: 3000/2564

Ha. 
doung congdo’s on they yee Potixgh ounn — 
Iak
39


Update Step: 19000/256485, 7.407840614460885%
tensor(2.0206, device='cuda:0', grad_fn=<AddBackward0>)
Harrye goterirg revim ofly 
seining Herry domb, the
40


Update Step: 19500/256485, 7.602783788525645%
tensor(1.9923, device='cuda:0', grad_fn=<AddBackward0>)
HE. 
““Not you of houts, she Dontuslnoming dreych 

41


Update Step: 20000/256485, 7.7977269625904055%
tensor(2.0060, device='cuda:0', grad_fn=<AddBackward0>)
Harrying had the kbom comlyoowtanengle nothing magl
42


Update Step: 20500/256485, 7.992670136655165%
tensor(2.0141, device='cuda:0', grad_fn=<AddBackward0>)
Harres mewfile. 
“They’rd 
of statar onet dins byop
43


Update Step: 21000/256485, 8.187613310719925%
tensor(2.0024, device='cuda:0', grad_fn=<AddBackward0>)
Harry sho geee sigch, ghougking wald lash 
jutt 
op
44


Update Step: 21500/256485, 8.382556484784685%
tensor(2.0084, device='cuda:0', grad_fn=<AddBackward0>)
H. Wyin tro got park op Md farriedly, to to b

Update Step: 37500/256485, 14.62073805485701%
tensor(1.8828, device='cuda:0', grad_fn=<AddBackward0>)
He 
ret corf theided it vereme 
ald allye, they for
77


Update Step: 38000/256485, 14.81568122892177%
tensor(1.8875, device='cuda:0', grad_fn=<AddBackward0>)
Harry FAnden, and Pippeen moved they off 
Dumbly. T
78


Update Step: 38500/256485, 15.010624402986531%
tensor(1.9322, device='cuda:0', grad_fn=<AddBackward0>)
Harry bambouce in forenage the wind the Drizing lis
79


Update Step: 39000/256485, 15.20556757705129%
tensor(1.9427, device='cuda:0', grad_fn=<AddBackward0>)
Harryesturd Unnh to a 
at that e suve honly the rea
80


Update Step: 39500/256485, 15.40051075111605%
tensor(1.9356, device='cuda:0', grad_fn=<AddBackward0>)
HE Prot. I mago inge sot of hold har glapay. sarm.”
81


Update Step: 40000/256485, 15.595453925180811%
tensor(1.9347, device='cuda:0', grad_fn=<AddBackward0>)
Hary but 
tame erne — it a 
to crade 
he had deam h
82


Update Step: 40500/256485, 15.79039709924557

HA. Iy Qurringove, padististing had hermed intind h
114


Update Step: 56500/256485, 22.028578669317895%
tensor(1.8988, device='cuda:0', grad_fn=<AddBackward0>)
HE. 
“Pryfeathiambood At ext to foully of histaincr
115


Update Step: 57000/256485, 22.223521843382656%
tensor(1.8676, device='cuda:0', grad_fn=<AddBackward0>)
H rras for belfint was seabpleted. AN!” he 
and sai
116


Update Step: 57500/256485, 22.418465017447414%
tensor(1.8332, device='cuda:0', grad_fn=<AddBackward0>)
He cat. ... 
Harry was shing 
the man pay.” 
Ighty 
117


Update Step: 58000/256485, 22.613408191512175%
tensor(1.8080, device='cuda:0', grad_fn=<AddBackward0>)
HE.... 
But — ” seashes mif his stoom. Harry looke 
118


Update Step: 58500/256485, 22.808351365576936%
tensor(1.8066, device='cuda:0', grad_fn=<AddBackward0>)
Haro in the the some.. 
“Lupinis a doing with himmo
119


Update Step: 59000/256485, 23.003294539641693%
tensor(1.8289, device='cuda:0', grad_fn=<AddBackward0>)
He irenshishes wanged araw acroye 

“Ohe seaff bon, frobble!”
151


Update Step: 75000/256485, 29.24147610971402%
tensor(1.8676, device='cuda:0', grad_fn=<AddBackward0>)
Har e. rrapbew ... 
“Proness.” 
the pixsow thourd D
152


Update Step: 75500/256485, 29.436419283778783%
tensor(1.8804, device='cuda:0', grad_fn=<AddBackward0>)
Hard histled Plele of the 
hisping, 
wa mort warpin
153


Update Step: 76000/256485, 29.63136245784354%
tensor(1.8759, device='cuda:0', grad_fn=<AddBackward0>)
Harm!” 
youre shey, perping seeed wis willed Magily
154


Update Step: 76500/256485, 29.8263056319083%
tensor(1.8483, device='cuda:0', grad_fn=<AddBackward0>)
HE Rindhrough to could. 
“Aillely Mabby pout 
oned 
155


Update Step: 77000/256485, 30.021248805973062%
tensor(1.8368, device='cuda:0', grad_fn=<AddBackward0>)
Harry. ... fid thend they 
edsull 
saffy narnile to
156


Update Step: 77500/256485, 30.21619198003782%
tensor(1.8296, device='cuda:0', grad_fn=<AddBackward0>)
HLRIT, 
9heme 
backed reald of freemuted beaping a 
157


Update 

Update Step: 93500/256485, 36.45437355011014%
tensor(1.7839, device='cuda:0', grad_fn=<AddBackward0>)
Har ’N. 
He and stupning — a Greant, ham to sems th
189


Update Step: 94000/256485, 36.6493167241749%
tensor(1.7928, device='cuda:0', grad_fn=<AddBackward0>)
Harry sarye!” Harry car they he snurnen 
his your,”
190


Update Step: 94500/256485, 36.84425989823966%
tensor(1.7981, device='cuda:0', grad_fn=<AddBackward0>)
Hary whotersand tood and watkeling charcroye up on 
191


Update Step: 95000/256485, 37.039203072304424%
tensor(1.7794, device='cuda:0', grad_fn=<AddBackward0>)
He buthing 
and the At wearmown.” 
Harry out was — 
192


Update Step: 95500/256485, 37.23414624636918%
tensor(1.8037, device='cuda:0', grad_fn=<AddBackward0>)
Harry Sarry head, brimit-not you litter gow aloul P
193


Update Step: 96000/256485, 37.42908942043394%
tensor(1.7960, device='cuda:0', grad_fn=<AddBackward0>)
Har warmever, and to Harry Harry hore pro was yel, 
194


Update Step: 96500/256485, 37.6240325944

In [None]:
generated_seq = synthesize_text(model, 'H', 1000)
print(generated_seq)

In [2]:
# Prime the network with a context before generation,
# i.e only pass the hidden state forward, not the predicted
# character.
def synthesize_text2(model, prime, length):
    model.eval()
    generated_seq = [char for char in prime]
    hidden = model.init_hidden(1)
    
    #Used to prime the network
    for char in prime:
        x0 = np.zeros(K, dtype=np.float32)
        x0[c2i[char]] = 1
        x0 = torch.from_numpy(x0)
        x0 = x0.view(1,1,K).to(device)
        c, hidden = model(x0, hidden)
    
    c, i, x0 = get_next_char(c)
    generated_seq.append(i)
    
    #x0 = np.zeros(K, dtype=np.float32)
    x0 = x0.view(1,1,K).to(device)
    
    for i in range(length):
        #print("x0 shape: " + str(x0.shape))
        #print("hidden shape: " + str(hidden.shape))
        s, hidden = model(x0, hidden)
        i, c, x0 = get_next_char(s)
        generated_seq.append(c)
    model.train()
    
   # returnString = ""
    
   # for c in generated_seq:
       # returnString += str(c)
    
    return ''.join(generated_seq)

In [None]:
generated_seq = synthesize_text2(model, 'Harry Potter', 1000)
print(generated_seq)

In [None]:
import matplotlib.pyplot as plt
xaxis = range(len(lossarr))
print(lossarr)
plt.plot(xaxis,lossarr)
plt.show()

In [None]:
for str in strarr:
    print(str)
    print()

In [None]:
# change the name, for saving multiple files
model_name = 'vanilla_rnn_all_books.net'

checkpoint = {'hidden_size': model.hidden_size,
              'n_layers': model.n_layers,
              'state_dict': model.state_dict(),
              'tokens': chars}

with open(model_name, 'wb') as f:
    torch.save(checkpoint, f)

In [None]:
# Custom Accuracy Measure, measures amount of actual (correct) words generated.
import re
#generated_words = generated_seq.split()
trials = 100
mean_actual_word_count = 0
f = open('goblet_book.txt').read()

for i in range(100):
    generated_words = re.findall(r"[\w']+", synthesize_text2(model, 'Harry Potter', 1000))
    #print(generated_words)
    actual_words_count = 0
    for word in generated_words: 
        if word in f:
            #print("hey! this word matched!")
            #print(word)
            actual_words_count += 1

    actual_words_ratio = actual_words_count/len(generated_words)
    mean_actual_word_count += actual_words_ratio
    #print("synthesized " + str(actual_words_count) + " real words, out of " + str(len(generated_words)) + " attempts.")
    #print("actual word ratio: " + str(actual_words_ratio))

print("Mean of correct words generated: ")
print(mean_actual_word_count/trials)
