# This notebook represents the same functionality but run on Google colab TPUs

In [None]:
!git clone https://github.com/bugsiesegal/harry-potter-remix-bot

In [41]:
import sys
sys.path.append('/content/harry-potter-remix-bot/src')
from numpy import array
import os
import numpy as np
import pandas as pd
from tensorflow import keras
import tensorflow as tf
import parameters
from data_preprocess import loaddata, remove_impuritys, one_hot_encode, one_hot_decode, number_encode, predict_text

In [None]:
#STEP: Upload data to colab runtime environment
%mv '/content/1.txt' '/content/harry-potter-remix-bot/data/raw'

In [7]:
#Pre-processing
books=loaddata(parameters.colab_datapath, 1)
text=remove_impuritys(books)
characters = sorted(list(set(text)))
n_to_char = {n:char for n, char in enumerate(characters)}
char_to_n = {char:n for n, char in enumerate(characters)}

In [8]:
X = []
Y = []
length = len(text)
seq_length = 100
for i in range(0, length-seq_length, 1):
    sequence = text[i:i + seq_length]
    label =text[i + seq_length]
    X.append([char_to_n[char] for char in sequence])
    Y.append(char_to_n[label])

In [9]:
X_modified = np.reshape(X, (len(X), seq_length, 1))
X_modified = X_modified / float(len(characters))
Y_modified = np_utils.to_categorical(Y)

In [10]:
#Re-written with tf's keras package for error-free conversion into a tpu model
def create_model():
  return keras.Sequential([
    keras.layers.LSTM(256, input_shape=(X_modified[0].shape), name = "LSTM_1", return_sequences = True),
    keras.layers.Dropout(0.2, name = "Dropout_1"),
    keras.layers.LSTM(256, name = "LSTM_2"),
    keras.layers.Dropout(0.2, name = "Dropout_2"),
    keras.layers.Dense(128,  name = "Dense_1"),
    keras.layers.Dense(64,  name = "Dense_2"),
    keras.layers.Dense(Y_modified.shape[1], activation=tf.nn.softmax, name = "softmax")
  ])

model = create_model()
model.compile(loss='categorical_crossentropy', optimizer='adam')


AttributeError: 'Model' object has no attribute 'add'

In [39]:
#Setting up TPU connection and converting keras model to tpu
TPU_WORKER = 'grpc://' + os.environ['COLAB_TPU_ADDR']

tpu_model = tf.contrib.tpu.keras_to_tpu_model(
model,
strategy=tf.contrib.tpu.TPUDistributionStrategy(
    tf.contrib.cluster_resolver.TPUClusterResolver(TPU_WORKER)))

In [40]:
x_train=X_modified[:int(len(X_modified)*0.8)]
y_train=Y_modified[:int(len(Y_modified)*0.8)]
x_test=X_modified[int(len(X_modified)*0.8):]
y_test=Y_modified[int(len(Y_modified)*0.8):]

In [None]:
tpu_model.fit(x_train, y_train, epochs=500, batch_size=1024, validation_split=0.1)

In [None]:
tpu_model.save('Layers_7_Dropout_2_Epochs_500_.h5')
#Download to local machine
from google.colab import files
files.download("Layers_7_Dropout_2_Epochs_500_.h5")

In [None]:
#Converting to CPU due to TPU batch size divisible by 8 requirement/error
cpu_model = tpu_model.sync_to_cpu()

In [None]:
predict_text(model, X[99], n_to_char)