# Importar bibliotecas / Import Libraries

In [None]:
import pandas as pd
import time
import os
import keras
import numpy as np 
import sklearn
from sklearn.preprocessing import LabelEncoder
from keras.layers import Dense, LSTM, Dropout
from keras.models import Sequiential
from keras.callbacks import ModelCheckpoint, EarlyStopping
from keras.utils import to_categorical
from sklearn.preprocessing import MinMaxScaler
import matplotlib.pyplot as pyplot
from sklearn.externals import joblib
import itertools

# Cargar dataset / Loading dataset

In [None]:
dataset_main = pd.read_csv('./output/new_dataset_1.csv', index_col = 0)
dataset_main.head(12)

In [None]:
labels = dataset_main.iloc[:, -1].values
y_train = []
for l in labels:
    y_train.append(l[-5:-4])
y_train[1]

In [None]:
ds_x_train = dataset_main.iloc[:,:-2].values

In [None]:
ds_y_train = np.array(y_train)

# MinMax

In [None]:
scalar = MinMaxScaler()
print(ds_x_train)
ds_x_train = scalar.fit_transform(ds_x_train)

# Guardar scalar para usar en inferencia

In [None]:
joblib.dump(scalar, './output/Bible_new_Minmaxscalar.pkl')

# Block creation

In [None]:
Y_train = []
count = 0
for i in range(11, len(ds_y_train), 12):
    count += 1
    # X_train.append(ds_x_train[i-12:i,:])
    Y_train.append(ds_y_train[i])
Y_train = np.array(Y_train)
count

In [None]:
blocks = int(len(ds_x_train) / 12)
X_train = np.array(np.split(ds_x_train, blocks))
X_train.shape

## Label Encoding

In [None]:
label_encoder = LabelEncoder()
Y_train = label_encoder.fit_transform(Y_train)

label_encoder.classes_

In [None]:
# Save the label encoder to use in inference
joblib.dump(label_encoder, './Bible_label_john_adj_12d_aug3.pkl')

# Spliting the training and validation data

In [None]:
from sklearn.model_selection import train_test_split

x_train, x_test, y_train, y_test = train_test_split(X_train, Y_train, test_size = 0.2, random_state = 0)

In [None]:
# save the data incase needed
np.save('./output/x_test_18d_aug1.npy', x_test)
np.save('./output/x_train_18d_aug1.npy', x_train)

# One Hot encoding the labels

In [None]:
y_train = to_categorical(y_train)
y_test = to_categorical(y_test)
np.save('./output/y_test_18d_aug1.npy', y_test)
np.save('./output/y_train_18d_aug1.npy', y_train)

# Chechpoint path and early stopping configuration

In [None]:
from keras.callbacks import EarlyStopping

callbacks1 = ModelCheckpoint('./output/new_dataset_SimpleLSTM,hdf5', save_best_only = True)
callbacks2 = EarlyStopping(monitor = 'val_acc', patience = 100, vebose = 1)
callbacks = [callbacks1, callbacks2]

# Model architecture creation

## Model V1 : Simple LSTM

In [None]:
# Simple LSTM 8

model = Sequential()

model.add(LSTM(34, input_shape = 12, 96), return_sequences = True, activation = 'sigmoid'))

model.add(LSTM(34, activation = 'sigmoid'))

model.add(Dense(64))

model.add(Dense(9, activation = 'softmax'))

## Model V2: LSTM with Dropout

In [None]:
# 3 LSTM with dropout

model = Sequential()

model.add(LSTM(256, input_shape = (18, 12), activation = 'sigmoid', return_sequences = True))

model.add(Dropout(0.1))

model.add(LSTM(128, return_sequences = True, activation = 'sigmoid'))

model.add(Dropout(0.1))

model.add(LSTM(64, activation = 'sigmoid'))

model.add(Dense(64))

model.add(Dropout(0.1))

model.add(Dense(64))

model.add(Dense(9, activation = 'softmax'))


# Training phase

In [None]:
model.compile(optimizer = 'adam', loss='categorical_crossentropy', metrics = ['accuracy'])

In [None]:
model.fit(x_train, y_train, validation_data = [x_test, y_test], epochs=600, callbacks = callbacks)

In [None]:
y_pred = label_encoder.inverse_transform(model.predict_classes(np.expand_dims(x_test[1], axis = 0)))

In [None]:
output = []
for i in range(len(y_test)):
    output.append(label_encoder.inverse_transform(np.expand_dims(np.argmax(y_test[i]), axis=0)))
output = np.array(output)

# Confusion Matrix

In [None]:
from sklearn.metrics import accuracy_score

accuracy_score(output, y_pred)

In [None]:
from sklearn.metrics import confusion_matrix

cm = confusion_matrix(output, y_pred)

In [None]:
def plot_confusion_matrix(cm, classes, normalize = False, title = 'Confusion matrix', cmap = plt.cm.Blues):
    """
    This function prints and plots the confusion matrix.
    Normalization can be applied by setting 'normalize=True'.
    """

    if normalize:
        cm = cm.astype('float') / cm.sum(axis=1)[:, np.newaxis]
        print('Normalized confusion matrix')
    else:
        print('Confusion matrix, without normalization')
    
    print(cm)

    plt.imshow(cm, interpolation = 'nearest', cmap = cmap)
    plt.title(title)
    plt.colorbar()
    tick_marks = np.arange(len(classes))
    plt.xticks(tick_marks, classes, rotation=90)
    plt.yticks(tick_marks, classes)

    fmt = '.2f' if normalize else 'd'
    thresh = cm.max() / 2
    for i, j in itertools.product(range(cm.shape[0]), range(cm.shape[1])):
        plt.text(j, i, format(cm[i, j], fmt), horizontalalignment = 'center',
                    color = 'white' if cm[i, j] > thresh else 'black')
    
    plt.ylabel('True label')
    plt.xlabel('Predicted label')
    plt.tight_layout()
    plt.savefig('./output/aug3_bible_3_16.png')

In [None]:
plot_confusion_matrix(cm, sorted(os.listdir('./output/john_3_16/')))