In [1]:
import torch
from torch import nn
import torch.nn.functional as F 
import numpy as np 
import random

# One-hot encoding matrix with dim (seq_length,K)
# (creating one hot encoded rows for each character in a sequence )
def one_hot(text_seq):
    data = np.zeros((len(text_seq), K), dtype=np.float32)
    for i in range(len(text_seq)):
        char_as_int = text_seq[i]
        data[i][char_as_int] = 1
    return data

def get_all_sequences(book_text):
    # Translate text to integers
    book_as_ints = np.array([c2i[c] for c in book_text])
    n_seq = len(book_as_ints)//seq_length
    book_text = book_text[:n_seq*seq_length]
    batches = []
    for i in range(0, len(book_text), seq_length):
        batches.append(one_hot(book_as_ints[i:i+seq_length]))
    return np.array(batches, dtype=np.float32)

# one_hots = (seq_length, K)
def one_hot_to_txt(one_hots):
    chars = []
    for arr in one_hots:
        idx = np.where(arr == 1)[0][0]
        chars.append(i2c[idx])
    return ''.join(chars)

def one_hot_to_ind(one_hots):
    indices = []
    for arr in one_hots:
        indices.append(np.where(arr==1)[0][0])
    return np.array(indices)

def get_next_char(s):
    s = nn.functional.softmax(s, dim=1)
    l = torch.cumsum(s, dim=1)
    a = random.uniform(0,1)
    ls = (l - a).cpu().detach().numpy()
    #print(ls)
    idx = np.where(ls > 0)
    idx = idx[1][0]
    onehot = np.zeros(K, dtype=np.float32)
    onehot[idx] = 1
    onehot = torch.from_numpy(onehot).view(1,1,K).float().to(device)
    return idx, i2c[idx], onehot

def synthesize_text(model, char, length):
    model.eval()
    generated_seq = [char]
    x0 = np.zeros(K, dtype=np.float32)
    x0[c2i[char]] = 1
    x0 = torch.from_numpy(x0)
    x0 = x0.view(1,1,K).to(device)
    hidden = model.init_hidden(1)
    for i in range(length):
        #print("x0 shape: " + str(x0.shape))
        #print("hidden shape: " + str(hidden.shape))
        s, hidden = model(x0, hidden)
        i, c, x0 = get_next_char(s)
        generated_seq.append(c)
    model.train()
    return ''.join(generated_seq)

# Layered LSTM for character-level generation
class MultiLayerLSTM(nn.Module):
    def __init__(self, input_size, output_size, hidden_size, n_layers):
        super().__init__()
        self.hidden_size = hidden_size
        self.n_layers = n_layers
        self.lstm = nn.LSTM(input_size, hidden_size, n_layers, batch_first=True)
        self.fc = nn.Linear(hidden_size, len(chars))

    def forward(self, x, hidden):
        # Passing in the input and hidden state into the model and obtaining outputs
        out, hidden = self.lstm(x, hidden)
        # Reshaping the outputs such that it can be fit into the fully connected layer
        out = out.contiguous().view(-1, self.hidden_size)
        out = self.fc(out)

        return out, hidden

    def init_hidden(self, batch_size):
        # This method generates the first hidden state of zeros which we'll use in the forward pass
        # We'll send the tensor holding the hidden state to the device we specified earlier as well
        if cuda_available:
            hidden = (torch.zeros(self.n_layers, batch_size, self.hidden_size).cuda(),
                      torch.zeros(self.n_layers, batch_size, self.hidden_size).cuda())
        else:
            hidden = (torch.zeros(self.n_layers, batch_size, self.hidden_size),
                     torch.zeros(self.n_layers, batch_size, self.hidden_size))
        return hidden

# Saves loss and generted text during training    
lossarr = []
strarr = []
def train(model, n_epochs, clip, debug = 0, steps_during_debug=2):
    model.train()
    update_step = 0
    smooth_loss = 0
    debug = 0
    #print("Numbers of update steps per epoch: " + str(x.shape[0]))
    for epoch in range(1, n_epochs + 1):
        #OBSSS!!! HARDCODED BATCH MIGHT CHANGE LATER
        hidden = model.init_hidden(1)
        for i in range(x.shape[0]):
            if debug: 
                if(i > steps_during_debug): 
                    break    
            # Detatching hidden state from the computational graph 
            # (see truncated back propagation through time) such that
            # the graph doesn't become super big, slowing down computations
            # and exploding the gradient. 
            
            # Note however that the hidden state VALUE will still be inherited
            # We just say that it doesn't depend on previous hidden states
            hidden = tuple([each.data for each in hidden])
            #hidden[0].detach_()
            #hidden[1].detach_()
            #hidden[0] = hidden[0].to(device)
            #hidden[1] = hidden[1].to(device)
            
            # Clear gradients between update steps, won't be cleared automatically. 
            opt.zero_grad() 
            
            #Remember that input should have dimensions (batch_size, seq_size, number_of_classes)
            seq = x[i]
            seq = torch.from_numpy(seq).view(1,seq_length,K)
            #if cuda_available: 
            seq = seq.to(device)
            #print("seq shape: " + str(seq.shape))        

            #Should have shape (25) (not (25,80))
            target = y[i]
            target = one_hot_to_ind(target)
            target = torch.from_numpy(target)
            target = target.to(device)
            #print("target shape: " + str(target.shape))

            output, hidden = model(seq, hidden)
            loss = criterion(output, target)
            
            if update_step == 0:
                smooth_loss = loss
            else:
                smooth_loss = smooth_loss*0.999 + loss*0.001
            if update_step%500 == 0:
                lossarr.append(smooth_loss.item())
                print("\n\n===========================================================================================")
                print("Update Step: " + str(update_step) +"/" + str(x.shape[0]) + ", " + str((update_step/x.shape[0])*100) +"%")
                print(str(smooth_loss))
                print("========================================= TEXT ============================================")
                print(synthesize_text(model, 'H', 50))
                print("===========================================================================================")
            if update_step%5000 == 0:
                strarr.append(synthesize_text(model, 'H', 50))
            
            # BPTT 
            loss.backward() 
            # Clip gradients such that they don't explode
            nn.utils.clip_grad_norm_(model.parameters(), clip) 
            # Update the gradients
            opt.step() 
            update_step += 1
    model.eval()


In [2]:
cuda_available = torch.cuda.is_available()
if cuda_available:
    device = torch.device("cuda")
else:
    device = torch.device("cpu")

print("Running on: " + str(device))

# Open file and get text
file = open("Clean_All_Books.txt", "r", encoding="utf8")
book = file.read()
file.close()

# Tokenize chars in text
chars = list(set(book))
chars.sort()
i2c = dict(enumerate(chars))
c2i = {i2c[i]: i for i in i2c.keys()}

# Hyper-parameters
# Num hidden nodes
m = 100
# Batch Size (batch meaning same as lab)
seq_length = 25
# Number of chars
K = len(chars)
# Learning rate
eta = 0.1
      
model = MultiLayerLSTM(input_size = K, output_size = K, hidden_size = m, n_layers=2)
model.to(device)
criterion = nn.CrossEntropyLoss()
opt = torch.optim.Adagrad(model.parameters(), lr=eta)
# model.parameters are all params e.g U, V, b, c etc, see print below
#for param in list(model.parameters()):
#    print(param.shape)
x = get_all_sequences(book)
#print(np.shape(x))
#print(one_hot_to_txt(x[0]))
# If even number of chars (in relation to seq_length), first char will be last target
y = get_all_sequences(book[1:] + book[0])

Running on: cuda


In [None]:
train(model, 2, 5)



Update Step: 0/256485, 0.0%
tensor(4.5543, device='cuda:0', grad_fn=<NllLossBackward>)
H:tC7DyFII5“gu”l-lI!I65f—0.M□>“; p
*~X>q!’aVkJYqX?*


Update Step: 500/256485, 0.1949431740647601%
tensor(3.9042, device='cuda:0', grad_fn=<AddBackward0>)
Hun., Mte, luojmbatodlebdy sht, 
imm’w  ghs ir
uns,


Update Step: 1000/256485, 0.3898863481295202%
tensor(3.3052, device='cuda:0', grad_fn=<AddBackward0>)
HwB. ot ther weoon . 
thoye 
sror Phobo oe 
Oe’tm 



Update Step: 1500/256485, 0.5848295221942803%
tensor(2.8571, device='cuda:0', grad_fn=<AddBackward0>)
Haho doo
ritkonpwteed his hithy Mn a, be Afeary Ful


Update Step: 2000/256485, 0.7797726962590404%
tensor(2.5356, device='cuda:0', grad_fn=<AddBackward0>)
HHa,, “Woncon mair then stert. “Yesade teed apworeg


Update Step: 2500/256485, 0.9747158703238007%
tensor(2.2898, device='cuda:0', grad_fn=<AddBackward0>)
Ha. Oars Patt,, a theed w- pandnars pettir Harry ha


Update Step: 3000/256485, 1.1696590443885606%
tensor(2.1514, device='cuda:0', 



Update Step: 19000/256485, 7.407840614460885%
tensor(1.5701, device='cuda:0', grad_fn=<AddBackward0>)
Hsn 
his gleat not was fatemelfighing his beasn’s h


Update Step: 19500/256485, 7.602783788525645%
tensor(1.5414, device='cuda:0', grad_fn=<AddBackward0>)
HWr you feen on the window yes prein, it off the gr


Update Step: 20000/256485, 7.7977269625904055%
tensor(1.5747, device='cuda:0', grad_fn=<AddBackward0>)
Harry while been things cight, yin. As out of Mr. E


Update Step: 20500/256485, 7.992670136655165%
tensor(1.5882, device='cuda:0', grad_fn=<AddBackward0>)
Hvelly way to cime it left to to him a out. Then  t


Update Step: 21000/256485, 8.187613310719925%
tensor(1.5852, device='cuda:0', grad_fn=<AddBackward0>)
H” Ron 
withten han, into it and stullion whichered


Update Step: 21500/256485, 8.382556484784685%
tensor(1.5821, device='cuda:0', grad_fn=<AddBackward0>)
Har hemelly!” 
Uncle Vernon whisper at Harry of his


Update Step: 22000/256485, 8.577499658849446%
tensor(1.5728, 

HArm Drayegain,” said Ron hagiculfolied at the dryi


Update Step: 38000/256485, 14.81568122892177%
tensor(1.4497, device='cuda:0', grad_fn=<AddBackward0>)
Harry,” 
Harry odd and 
Riddle time. they under mea


Update Step: 38500/256485, 15.010624402986531%
tensor(1.4953, device='cuda:0', grad_fn=<AddBackward0>)
HVGEY OCO AT HqLever. We’lv gone the and hurry 
on 


Update Step: 39000/256485, 15.20556757705129%
tensor(1.5016, device='cuda:0', grad_fn=<AddBackward0>)
Harry,” 
Harry were canding a wall-stanciny, but th


Update Step: 39500/256485, 15.40051075111605%
tensor(1.4948, device='cuda:0', grad_fn=<AddBackward0>)
Hh kitter, the other Petunized rarted his farched y


Update Step: 40000/256485, 15.595453925180811%
tensor(1.5079, device='cuda:0', grad_fn=<AddBackward0>)
Hmiously as Mr. Get 
I’m purple, and Weasley penple


Update Step: 40500/256485, 15.79039709924557%
tensor(1.5355, device='cuda:0', grad_fn=<AddBackward0>)
Harry for then out 
fan futime up to sout — ’ed — b


Update S

Update Step: 56500/256485, 22.028578669317895%
tensor(1.4495, device='cuda:0', grad_fn=<AddBackward0>)
Harry 
again, they looked 
be the team to for abaci


Update Step: 57000/256485, 22.223521843382656%
tensor(1.4208, device='cuda:0', grad_fn=<AddBackward0>)
Harry’s laused down. Professor 
“Hand, we — ” 
Prof


Update Step: 57500/256485, 22.418465017447414%
tensor(1.3950, device='cuda:0', grad_fn=<AddBackward0>)
Hbol talking — Madam Malfoy 
got hidden that stmash


Update Step: 58000/256485, 22.613408191512175%
tensor(1.3845, device='cuda:0', grad_fn=<AddBackward0>)
Har bowle.” 
“Cole,” 
Harry jake bost, Ron now. “CA


Update Step: 58500/256485, 22.808351365576936%
tensor(1.3722, device='cuda:0', grad_fn=<AddBackward0>)
Hr three sink. ... I mean Laven the fire?” 
“I bega


Update Step: 59000/256485, 23.003294539641693%
tensor(1.3864, device='cuda:0', grad_fn=<AddBackward0>)
HK SpURVEnE 
be onto his how in the Wanged on. Afte


Update Step: 59500/256485, 23.198237713706455%
tensor(1.37



Update Step: 75500/256485, 29.436419283778783%
tensor(1.4446, device='cuda:0', grad_fn=<AddBackward0>)
Har Porray. 
“They had loudly, after -brisies gare 


Update Step: 76000/256485, 29.63136245784354%
tensor(1.4431, device='cuda:0', grad_fn=<AddBackward0>)
HS up held of pouster quintlescable taking his turn


Update Step: 76500/256485, 29.8263056319083%
tensor(1.4041, device='cuda:0', grad_fn=<AddBackward0>)
HT WOW AN. No Ah.” 
Dumbledore was lass in the cove


Update Step: 77000/256485, 30.021248805973062%
tensor(1.3844, device='cuda:0', grad_fn=<AddBackward0>)
HoS, Moody, wanted. Asked turned about the 
drogned


Update Step: 77500/256485, 30.21619198003782%
tensor(1.3807, device='cuda:0', grad_fn=<AddBackward0>)
HARC’ OAY,” Hermione’s thinks. 
Moody something fou


Update Step: 78000/256485, 30.41113515410258%
tensor(1.4025, device='cuda:0', grad_fn=<AddBackward0>)
Hh aboous off in there’s obseds were lay flashy pal


Update Step: 78500/256485, 30.606078328167342%
tensor(1.4049,

“Where, then turned


Update Step: 94500/256485, 36.84425989823966%
tensor(1.3621, device='cuda:0', grad_fn=<AddBackward0>)
Harry squeaked through the Great Voldly desing him.


Update Step: 95000/256485, 37.039203072304424%
tensor(1.3485, device='cuda:0', grad_fn=<AddBackward0>)
Hve Neville, Harry, thought; the hat to sas no, hop


Update Step: 95500/256485, 37.23414624636918%
tensor(1.3624, device='cuda:0', grad_fn=<AddBackward0>)
HW. Whee ten all-,” said Bagman and 
said that I wa


Update Step: 96000/256485, 37.42908942043394%
tensor(1.3600, device='cuda:0', grad_fn=<AddBackward0>)
Hary — ” 
Hermione Harry fan perpaser. “Very whose 


Update Step: 96500/256485, 37.6240325944987%
tensor(1.3446, device='cuda:0', grad_fn=<AddBackward0>)
H.. Trought be toul DO H’ Thouse, a second, so disi


Update Step: 97000/256485, 37.81897576856346%
tensor(1.3319, device='cuda:0', grad_fn=<AddBackward0>)
HN.” 
“Harry Fies here, ’s nose since Klutch. ... A


Update Step: 97500/256485, 38.013918942628

In [None]:
# Prime the network with a context before generation,
# i.e only pass the hidden state forward, not the predicted
# character.
def synthesize_text2(model, prime, length):
    model.eval()
    generated_seq = [char for char in prime]
    hidden = model.init_hidden(1)
    
    #Used to prime the network
    for char in prime:
        x0 = np.zeros(K, dtype=np.float32)
        x0[c2i[char]] = 1
        x0 = torch.from_numpy(x0)
        x0 = x0.view(1,1,K).to(device)
        c, hidden = model(x0, hidden)
    
    c, i, x0 = get_next_char(c)
    generated_seq.append(i)
    
    #x0 = np.zeros(K, dtype=np.float32)
    x0 = x0.view(1,1,K).to(device)
    
    for i in range(length):
        #print("x0 shape: " + str(x0.shape))
        #print("hidden shape: " + str(hidden.shape))
        s, hidden = model(x0, hidden)
        i, c, x0 = get_next_char(s)
        generated_seq.append(c)
    model.train()
    
   # returnString = ""
    
   # for c in generated_seq:
       # returnString += str(c)
    
    return ''.join(generated_seq)

In [None]:
generated_seq = synthesize_text(model, 'H', 1000)
print(generated_seq)

In [None]:
generated_seq = synthesize_text2(model, 'Harry Potter ', 1000)
print(generated_seq)

In [None]:
import matplotlib.pyplot as plt
xaxis = range(len(lossarr))
plt.plot(xaxis,lossarr)
plt.show()
print(lossarr)

In [None]:
for str in strarr:
    print(str)
    print()

In [None]:
# change the name, for saving multiple files
model_name = 'lstm_2_layers_all_books.net'

checkpoint = {'hidden_size': model.hidden_size,
              'n_layers': model.n_layers,
              'state_dict': model.state_dict(),
              'tokens': chars}

with open(model_name, 'wb') as f:
    torch.save(checkpoint, f)

In [None]:
import re
#generated_words = generated_seq.split()
trials = 100
mean_actual_word_count = 0
f = open('goblet_book.txt').read()

for i in range(100):
    generated_words = re.findall(r"[\w']+", synthesize_text2(model, 'Harry Potter', 1000))
    #print(generated_words)
    actual_words_count = 0
    for word in generated_words: 
        if word in f:
            #print("hey! this word matched!")
            #print(word)
            actual_words_count += 1

    actual_words_ratio = actual_words_count/len(generated_words)
    mean_actual_word_count += actual_words_ratio
    #print("synthesized " + str(actual_words_count) + " real words, out of " + str(len(generated_words)) + " attempts.")
    #print("actual word ratio: " + str(actual_words_ratio))

print("Mean of correct words generated: ")
print(mean_actual_word_count/trials)
