In [None]:
# Imports
import numpy as np

import os

from keras.models import Sequential
from keras.layers import Dense
from keras.layers import Dropout
from keras.layers import LSTM
from keras.callbacks import ModelCheckpoint
from keras.utils import np_utils

'''
Adjust sequence length
'''
# Setting the sequence length
seq_len = 50

# Loading in Shanties lyrics corpus
shanties = open('shanties_all.txt', encoding='utf-8').read()

# Convering characters to integers

# Creating a list of all unique characters
chars_list = sorted(list(set(shanties)))

# Creating a dictionary to map each unique character to a number
chars_to_ints = dict((c, i) for i, c in enumerate(chars_list))

# Checking length of corpus and unique characters
len_shanties = len(shanties)
n_chars = len(chars_list)

print(f'Total length of corpus  :  {len_shanties}')
print(f'Total unique characters :  {n_chars}')

# Creating a list of patterns for the entire corpus

X_data = []
y_data = []
for i in range(0, len_shanties - seq_len, 1):
    seq_in = shanties[i:i + seq_len]
    seq_out = shanties[i + seq_len]
    X_data.append([chars_to_ints[char] for char in seq_in])
    y_data.append(chars_to_ints[seq_out])

total_patterns = len(X_data)
print(f'Total number of {seq_len} character lenght patters: {total_patterns}')

# Reshaping Data for use in LSTM networks
X = np.reshape(X_data, (total_patterns, seq_len, 1))

# Normalzing X data
X = X / float(n_chars)

# One hot encode to the output variable
y = np_utils.to_categorical(y_data)

# Creating directory to store model weights
os.mkdir(f'{seq_len}-char-model-weights-test')

# Creating a checkpoint to find best weights
checkpoint_name = './' + str(seq_len) + '-char-model-weights/' + str(seq_len) + '-char-sequence/' + str(seq_len) + '-char-seq-weights-improvement-{epoch:02d}-{loss:.4f}.hdf5'
checkpoint = ModelCheckpoint(checkpoint_name, monitor='loss', verbose=1, save_best_only=True, mode='min')
callbacks_list = [checkpoint]

# Defining LSTM model
model = Sequential()

# Adding layers
model = Sequential()
model.add(LSTM(256, input_shape=(X.shape[1], X.shape[2]), return_sequences=True))
model.add(Dropout(0.2))
model.add(LSTM(256))
model.add(Dropout(0.2))
model.add(Dense(y.shape[1], activation='softmax'))

# Compiling model
model.compile(loss='categorical_crossentropy', optimizer='adam')

# Fit model
model.fit(X, y, epochs=20, batch_size=128, callbacks=callbacks_list)

# Saving Model
model.save(f'{seq_len}-char-seq-shanty-writer.h5')