In [None]:
import os
import re
import random
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
import fileinput
from keras.models import Sequential
from keras.layers import Dense
from keras.layers import Activation
from keras.layers import Lambda
from keras.layers import LSTM
from keras.optimizers import RMSprop
import numpy as np
import random
import requests
from google.colab import output

#Preprocessing

In [None]:
def remove_punct(text):
  remove = ',!:;^_`()"#$\r\n123456789'
  for chr in remove:
    if chr in text:
      text = text.replace(chr, ' ')
  return text.strip()

In [None]:
shakespeare_file = 'https://raw.githubusercontent.com/charlesincharge/Caltech-CS155-2022/main/miniprojects/miniproject3/data/shakespeare.txt'
text = requests.get(shakespeare_file).text
text = text.lower() # make lowercase
text = remove_punct(text) # remove punctuation
text = ' '.join(text.split()) # make there a max of one space between words

In [None]:
# generate x and y data
X_seqs = []
Y_chars = []
len_seq = 40

for i in range(0, len(text)-len_seq, 2):
    X_seqs.append(text[i:i+len_seq]) # X: sequences
    Y_chars.append(text[i+len_seq])  # Y: chars (the first char after the end of the corresponding seq)

In [None]:
# generate char map
chars = sorted(list(set(text)))
char_map = {}
idx = 0
for ch in chars:
  char_map[ch] = idx
  idx += 1

In [None]:
# one hot encode a character based on char_map
def vectorize_data(char_map, char):
  encoded = np.zeros(len(char_map))
  encoded[char_map[char]] = 1
  return np.array(encoded)

In [None]:
# represent each character in x and y by their one hot encoded
X_train = np.array([np.array([vectorize_data(char_map, char) for char in X_seqs[i]]) for i in range(len(X_seqs))])
Y_train = np.array([vectorize_data(char_map, char) for char in Y_chars])

# Training

In [None]:
model = Sequential()
model.add(LSTM(125, input_shape = (40, len(char_map))))
model.add(Dense(len(char_map), activation='softmax'))
print(model.summary())

Model: "sequential"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 lstm (LSTM)                 (None, 125)               79000     
                                                                 
 dense (Dense)               (None, 32)                4032      
                                                                 
Total params: 83,032
Trainable params: 83,032
Non-trainable params: 0
_________________________________________________________________
None


In [None]:
model.compile(optimizer = 'RMSprop', loss="categorical_crossentropy", metrics = ["accuracy"])
model.fit(X_train, Y_train, epochs = 75, batch_size = 128)

Epoch 1/75
Epoch 2/75
Epoch 3/75
Epoch 4/75
Epoch 5/75
Epoch 6/75
Epoch 7/75
Epoch 8/75
Epoch 9/75
Epoch 10/75
Epoch 11/75
Epoch 12/75
Epoch 13/75
Epoch 14/75
Epoch 15/75
Epoch 16/75
Epoch 17/75
Epoch 18/75
Epoch 19/75
Epoch 20/75
Epoch 21/75
Epoch 22/75
Epoch 23/75
Epoch 24/75
Epoch 25/75
Epoch 26/75
Epoch 27/75
Epoch 28/75
Epoch 29/75
Epoch 30/75
Epoch 31/75
Epoch 32/75
Epoch 33/75
Epoch 34/75
Epoch 35/75
Epoch 36/75
Epoch 37/75
Epoch 38/75
Epoch 39/75
Epoch 40/75
Epoch 41/75
Epoch 42/75
Epoch 43/75
Epoch 44/75
Epoch 45/75
Epoch 46/75
Epoch 47/75
Epoch 48/75
Epoch 49/75
Epoch 50/75
Epoch 51/75
Epoch 52/75
Epoch 53/75
Epoch 54/75
Epoch 55/75
Epoch 56/75
Epoch 57/75
Epoch 58/75
Epoch 59/75
Epoch 60/75
Epoch 61/75
Epoch 62/75
Epoch 63/75
Epoch 64/75
Epoch 65/75
Epoch 66/75
Epoch 67/75
Epoch 68/75
Epoch 69/75
Epoch 70/75
Epoch 71/75
Epoch 72/75
Epoch 73/75
Epoch 74/75
Epoch 75/75


<keras.callbacks.History at 0x7f45145c97f0>

# Generate Output

In [None]:
def predict_char(input, temp):
    probs = np.exp(np.log(input) / temp)
    probs /= np.sum(probs)
    idx = np.random.choice(np.arange(0, len(char_map)), p=probs)
    for i in char_map.keys():
        if char_map[i] == idx:
            return i
    return ""

In [None]:
def gen_sonnet(temp, num_lines, line_len):
    sonnet = "shall i compare thee to a summer's day? "
    for line in range(num_lines * line_len):
        layer = np.zeros((1, line_len, len(char_map)))
        for i in range(line_len):
            char = sonnet[len(sonnet) - line_len + i]
            layer[0,i] = vectorize_data(char_map, char)
        sonnet += predict_char(model.predict(layer)[0], temp)
    return sonnet

In [None]:
def print_sonnet(txt, num_lines, line_len):
    for i in range(1, num_lines * line_len):
        if i % line_len == 0:
            print(txt[i-1])
        else:
            print(txt[i-1], end = '')

In [None]:
t1 = gen_sonnet(0.25, 14, 40)
t2 = gen_sonnet(0.75, 14, 40)
t3 = gen_sonnet(1.5, 14, 40)
output.clear()

In [None]:
print('Sonnet with temp = 0.25')
print('----------------------')
print_sonnet(t1, 14, 40)

Sonnet with temp = 0.25
----------------------
shall i compare thee to a summer's day? 
thou my love thy state of thy shall of m
y dost did art and to self thee love the
 self mine eyes whe eeppeated to my self
 i am my love and that grain of thy self
 alons ad to the fearth make thoughts mo
rtelled with thengs your seep appert who
 hast beaven beate. thy should i leave w
hine eye hath a see in see in them false
 as forting of enten and hask the sweet 
beauty's from mightrow and though should
 you trought a agat for my love now the 
dearty might and yet the beauty the worl
d will be woos and stating hath to glea

In [None]:
print('Sonnet with temp = 0.75')
print('----------------------')
print_sonnet(t2, 14, 40)

Sonnet with temp = 0.75
----------------------
shall i compare thee to a summer's day? 
thou art branse o nater doth past steres
e not bearows all to chat fan is fair gi
ve would tables bright as true thoo shal
t her wied sumpary for thy soul foow o n
o len accent to leade own reveit tought 
in a know the gave thou of but hin in th
e is self my soull asse inet. both heave
nts of eyss wait thou sen-till well of y
our self wor well be the death after bin
d. so worthy so that so that who herker 
bettired and beauty and were swares and 
beauty aboth to my self wearth destake t
heir raysoms treesscrengs to purse vink

In [None]:
print('')
print('Sonnet with temp = 1.5')
print('----------------------')
print_sonnet(t3, 14, 40)


Sonnet with temp = 1.5
----------------------
shall i compare thee to a summer's day? 
and mune all wouthse pyoved tsill of thy
 heart's frome 'tong. whrich heart. then
 yet self men. i seent upind mour's had.
 on wrondleed hasd in impresing thy forr
ous offee so iftent with this play far f
lom that bebind hay refo but which alavi
ng heath deyoiess tingurieden bies chads
e ke conit unfrllomst ulfantresed vickun
t of a rfifute pispiod od far theil dowi
ng that wrut'st an dost fassing arind my
 love eneing not no to i de this poversu
llst wear gzeats both tiless peigeerye. 
but be apone to pitwance a cishoan conm