In [0]:
# Imports
import numpy as np
import pickle

from google.colab import files
from google.colab import drive

from keras.models import Sequential
from keras.layers import Dense
from keras.layers import Dropout
from keras.layers import LSTM
from keras.callbacks import ModelCheckpoint
from keras.utils import np_utils

In [1]:
# Mounting Google Drive
from google.colab import drive
drive.mount('/content/drive')

Go to this URL in a browser: https://accounts.google.com/o/oauth2/auth?client_id=947318989803-6bn6qk8qdgf4n4g3pfee6491hc0brc4i.apps.googleusercontent.com&redirect_uri=urn%3aietf%3awg%3aoauth%3a2.0%3aoob&response_type=code&scope=email%20https%3a%2f%2fwww.googleapis.com%2fauth%2fdocs.test%20https%3a%2f%2fwww.googleapis.com%2fauth%2fdrive%20https%3a%2f%2fwww.googleapis.com%2fauth%2fdrive.photos.readonly%20https%3a%2f%2fwww.googleapis.com%2fauth%2fpeopleapi.readonly

Enter your authorization code:
··········
Mounted at /content/drive


In [0]:
# Loading in Shanties lyrics corpus
shanties = open('/content/drive/My Drive/dsi/submissions/capstone/data/shanties_all.txt', encoding='utf-8').read()

In [0]:
# Convering characters to integers

In [0]:
# Creating a list of all unique characters
chars_list = sorted(list(set(shanties)))

In [0]:
# Creating a dictionary to map each unique character to a number
chars_to_ints = dict((c, i) for i, c in enumerate(chars_list))

In [8]:
# Checking length of corpus and unique characters
len_shanties = len(shanties)
n_chars = len(chars_list)

print(f'Total length of corpus  :  {len_shanties}')
print(f'Total unique characters :  {n_chars}')

Total length of corpus  :  698604
Total unique characters :  27


In [9]:
# Creating a list of patterns for the entire corpus
seq_len = 50
X_data = []
y_data = []
for i in range(0, len_shanties - seq_len, 1):
    seq_in = shanties[i:i + seq_len]
    seq_out = shanties[i + seq_len]
    X_data.append([chars_to_ints[char] for char in seq_in])
    y_data.append(chars_to_ints[seq_out])

total_patterns = len(X_data)
print(f'Total number of {seq_len} character lenght patters: {total_patterns}')

Total number of 50 character lenght patters: 698554


In [0]:
# Reshaping Data for use in LSTM networks
X = np.reshape(X_data, (total_patterns, seq_len, 1))

# Normalzing X data
X = X / float(n_chars)

# One hot encode to the output variable
y = np_utils.to_categorical(y_data)

In [0]:
# Creating a checkpoint to find best weights
checkpoint_name = 'weights-improvement-{epoch:02d}-{loss:.4f}.hdf5'
checkpoint = ModelCheckpoint(checkpoint_name, monitor='loss', verbose=1, save_best_only=True, mode='min')
callbacks_list = [checkpoint]

In [12]:
# Defining LSTM model
model = Sequential()

# Adding layers
model = Sequential()
model.add(LSTM(256, input_shape=(X.shape[1], X.shape[2]), return_sequences=True))
model.add(Dropout(0.2))
model.add(LSTM(256))
model.add(Dropout(0.2))
model.add(Dense(y.shape[1], activation='softmax'))

# Compiling model
model.compile(loss='categorical_crossentropy', optimizer='adam')





Instructions for updating:
Please use `rate` instead of `keep_prob`. Rate should be set to `rate = 1 - keep_prob`.




In [14]:
# Confirming to connected to GPU
device_name = tf.test.gpu_device_name()
if device_name != '/device:GPU:0':
  raise SystemError('GPU device not found')
print('Found GPU at: {}'.format(device_name))

TensorFlow is already loaded. Please restart the runtime to change versions.
Found GPU at: /device:GPU:0


In [15]:
# Fit model
model.fit(X, y, epochs=20, batch_size=128, callbacks=callbacks_list)

Instructions for updating:
Use tf.where in 2.0, which has the same broadcast rule as np.where



Epoch 1/20






Epoch 00001: loss improved from inf to 2.35164, saving model to weights-improvement-01-2.3516.hdf5
Epoch 2/20

Epoch 00002: loss improved from 2.35164 to 1.98712, saving model to weights-improvement-02-1.9871.hdf5
Epoch 3/20

Epoch 00003: loss improved from 1.98712 to 1.84423, saving model to weights-improvement-03-1.8442.hdf5
Epoch 4/20

Epoch 00004: loss improved from 1.84423 to 1.75508, saving model to weights-improvement-04-1.7551.hdf5
Epoch 5/20

Epoch 00005: loss improved from 1.75508 to 1.69250, saving model to weights-improvement-05-1.6925.hdf5
Epoch 6/20

Epoch 00006: loss improved from 1.69250 to 1.64330, saving model to weights-improvement-06-1.6433.hdf5
Epoch 7/20

Epoch 00007: loss improved from 1.64330 to 1.60766, saving model to weights-improvement-07-1.6077.hdf5
Epoch 8/20

Epoch 00008: loss improved from 1.60766 to 1.57835, saving model to weights-improveme

<keras.callbacks.History at 0x7f2115104f28>

In [0]:
# Pickling Model
pickle.dump(model, open('shanty_writer_collab.p', 'wb'))

In [0]:
# Saving Model to local machine
files.download('shanty_writer_collab.p')

In [0]:
model_save_name = 'shanty_writer_collab.pt'


In [0]:
f'/content/drive/My Drive/dsi/submissions/capstone/code/model-building/{model_save_name}'

In [0]:
model.save(f'/content/drive/My Drive/dsi/submissions/capstone/code/model-building/shanty_writer_collab_v2')