In [2]:
# In this problem we will create a NN to write Shakespeare
# plays.
# The training data is included in the Training Data 
# subfolder, and was taken from here:
# https://github.com/karpathy/char-rnn/blob/master/data/tinyshakespeare/input.txt
#
# The idea for the project came from this awesome blog post:
# https://karpathy.github.io/2015/05/21/rnn-effectiveness/ 
# I highly recommend you read it. It is great.

# Packages

import numpy as np
import torch
import os

In [15]:
### -------- Import Data and Data Preprocessing -------- ###
# you must include the appropriate data preprocessing steps

# Load the data
with open('Training Data/3-RNN_input.txt', 'r') as file:
    data = file.read()

chars = list(set(data))
int2char = dict(enumerate(chars))
char2int = {char: index for index, char in int2char.items()}
data = torch.tensor([char2int[char] for char in data])

print(data.shape)

torch.Size([1115394])


In [16]:
### ------------ Exploratory Data Analysis ------------- ###
# Output two pieces of information that you found 
# informative as well as a print statement of why they
# assisted you in choosing your model parameters

print('The number of unique characters in the data is:', len(chars))
print('This helps me set up the output of the data, as there need to be 65 choices')

print('The length of the data is:', len(data))
print('I can choose a rather long sequence to learn from because I have a lot of data')

The number of unique characters in the data is: 65
This helps me set up the output of the data, as there need to be 65 choices
The length of the data is: 1115394
I can choose a rather long sequence to learn from because I have a lot of data


In [22]:
### ---------------- Model Definition ------------------ ###
# Use an LSTM

class Model(torch.nn.Module):
    def __init__(self, hidden_size=256, num_layers=2):
        super(Model, self).__init__()
        self.hidden_size = hidden_size
        self.lstm = torch.nn.LSTM(input_size=1, hidden_size=hidden_size, num_layers=2, batch_first=True)
        self.linear = torch.nn.Linear(hidden_size, len(chars))
        self.hidden = self.init_hidden()
    
    def init_hidden(self):
        self.hidden = (torch.zeros(2, 1, self.hidden_size), torch.zeros(2, 1, self.hidden_size))

    def forward(self, x):
        if self.training:
            self.hidden = self.init_hidden()
        x, self.hidden = self.lstm(x, self.hidden)
        x = self.linear(x)
        return x[:, -1, :]

In [23]:
### --------- Optimizer and Loss Definition ------------ ###
# Output a print statement supporting your optimizer and 
# loss function choices

model = Model(hidden_size=512, num_layers=3)
sequence_length = 50
optimizer = torch.optim.Adam(model.parameters(), lr=0.001)
print("I'm using Adam because it is the best optimizer for most problems")
loss_function = torch.nn.CrossEntropyLoss()
print("Since we are doing a multivariate classification, cross entropy loss is a good choice")
print("It also does the one-hot encoding for us")

I'm using Adam because it is the best optimizer for most problems
Since we are doing a multivariate classification, cross entropy loss is a good choice
It also does the one-hot encoding for us


In [24]:
### ------------------ Batch Setup --------------------- ###

data_order = torch.randperm(len(data) - sequence_length)
def batches(batch_size=10):
    for i in range(0, len(data_order), batch_size):
        start = data_order[i:i+batch_size]
        end = start + sequence_length
        
        x = torch.stack([data[start:end] for start, end in zip(start, end)]).view(-1, sequence_length, 1)
        y = torch.stack([data[end] for end in end])
        x = x.float()
        yield x, y

In [25]:
### ---------------- Training pt I --------------------- ###
# Train 10 epochs

num_epochs = 100

for epoch in range(num_epochs):
    model.train()
    batch_num = 0
    for x, y in batches(batch_size=100):
        optimizer.zero_grad()

        output = model(x)

        loss = loss_function(output, y)
        
        loss.backward()
        optimizer.step()

        if batch_num % 1000 == 0:
            print(f'Epoch {epoch+1} Batch {batch_num} Loss: {loss}')

        batch_num += 1
    print(f'Epoch {epoch+1} completed\n')

# Save the model
torch.save(model.state_dict(), 'rnn_model.pth')

Epoch 1 Batch 0 Loss: 4.168362140655518
Epoch 1 Batch 1000 Loss: 2.722783088684082
Epoch 1 Batch 2000 Loss: 2.526240110397339
Epoch 1 Batch 3000 Loss: 2.0892553329467773
Epoch 1 Batch 4000 Loss: 1.991926670074463
Epoch 1 Batch 5000 Loss: 1.8739547729492188
Epoch 1 Batch 6000 Loss: 1.8590413331985474
Epoch 1 Batch 7000 Loss: 1.7220540046691895
Epoch 1 Batch 8000 Loss: 1.71344792842865
Epoch 1 Batch 9000 Loss: 1.8991285562515259
Epoch 1 Batch 10000 Loss: 1.8734818696975708
Epoch 1 Batch 11000 Loss: 1.8212858438491821
Epoch 1 completed

Epoch 2 Batch 0 Loss: 1.7796648740768433
Epoch 2 Batch 1000 Loss: 1.818304419517517
Epoch 2 Batch 2000 Loss: 1.908031940460205
Epoch 2 Batch 3000 Loss: 1.6090906858444214
Epoch 2 Batch 4000 Loss: 1.6651911735534668
Epoch 2 Batch 5000 Loss: 1.5445263385772705
Epoch 2 Batch 6000 Loss: 1.6057806015014648
Epoch 2 Batch 7000 Loss: 1.4871100187301636
Epoch 2 Batch 8000 Loss: 1.473750352859497
Epoch 2 Batch 9000 Loss: 1.7852187156677246
Epoch 2 Batch 10000 Loss: 

KeyboardInterrupt: 

In [26]:
torch.save(model.state_dict(), 'rnn_model.pth')

In [27]:
### ---------------- Testing pt I ---------------------- ###
# Write an essay with a minimum of 2,000 characters and 
# save the output as a PDF named "RNN_pt1.pdf"

# model.load_state_dict(torch.load("rnn_model.pth"))

model.eval()
model.init_hidden()
output = []
x = torch.tensor([char2int[char] for char in 'The ']).view(1, -1, 1).float()
for i in range(2000):
    y = model(x)
    prediction = torch.argmax(y, dim=1)
    output.append(int2char[prediction.item()])
    x = torch.cat([x, prediction.view(1, 1, 1).float()], dim=1)[:, -sequence_length:, :]

output = ''.join(output)

with open('RNN_pt3.txt', 'w') as file:
    file.write(output)


In [None]:
### --------------- Training pt II --------------------- ###
# Train an ADDITIONAL 100 epochs

In [None]:
### --------------- Testing pt II ---------------------- ###
# Write an essay with a minimum of 2,000 characters and 
# save the output as a PDF named "RNN_pt2.pdf"

In [None]:
### -------------- Training pt III --------------------- ###
# Train until you can get it to write a good essay. Take
# advantage of the fact that pytorch doesn't reset your model
# unless you reinstantiate it in the "Model Definition" cell
#
# If after 3 hours it still doesn't make a meaningful essay
# then change some hyperparameters and try again. You can 
# look to the blog post for hyperparameter inspiration.

print("I was only able to train 15 epochs as it took 48 hours to do so.")

In [None]:
### -------------- Testing pt III ---------------------- ###
# Write an essay with a minimum of 2,000 characters and 
# save the output as a PDF named "RNN_pt3.pdf"
#
# Output a print statement commenting on wether or not you
# enjoyed this problem and why or why not.

print("I am happy with the output of my network, but I believe that it took to long to get there and took too much processing power and energy.")