In [None]:
import sys, os, inspect
currentdir = os.path.dirname(os.path.abspath(inspect.getfile(inspect.currentframe())))
parentdir = os.path.dirname(currentdir)
sys.path.insert(0, parentdir)

In [None]:
# from src.generator import data_generator, clip_generator, label_generator
from src.data import data_generator
from src.util import characters, id_from_char, char_from_id
from src.export import display_progress

from pymongo import MongoClient
from tensorflow import keras

import numpy as np
import pickle

In [None]:
for character in characters:
    print(character)

# The Model

Source code for model creation is in src/models.py

In [None]:
# polynomial_schedule = keras.optimizers.schedules.PolynomialDecay(
#     initial_learning_rate = .1,
#     end_learning_rate = .0001,
#     decay_steps=100000,
#     power=.5)

# lr_schedule = keras.optimizers.schedules.PiecewiseConstantDecay(
#     boundaries = [ 10000, 20000 ],
#     values     = [.001, .0005, .0001 ])

In [None]:
# from tensorflow.keras.optimizers import Adam, Nadam
# adam = Adam(learning_rate=.0001)
# nadam = Nadam(learning_rate=.0001)

In [None]:
# # to make a new model
# from src.models import custom_mk5 as Model
# model = Model()

# to load the saved model
model = keras.models.load_model('../models/custom_mk2')

# from src.models import focal_loss, top_8_accuracy
# model.compile(loss=focal_loss,
#               optimizer=adam,
#               metrics=['accuracy', top_8_accuracy])

In [None]:
model.summary()

# Testing

In [None]:
data_test = data_generator(input_directory='../data/character/test',
                           batch_size = 25,
                           num_batches = 100,
                           shuffle=True)

In [None]:
score = model.evaluate(data_test, verbose=1)

print('\nTest score:', round(score[0], 3))
print(f'Test accuracy: {round(score[1]*100)}%')
print(f'Test test top 8 categorical accuracy: {round(score[2]*100)}%') 

# Confusion Matrix

In [None]:
import pandas as pd
import seaborn as sn
import matplotlib.pyplot as plt

In [None]:
def get_conf_matrix(labels_as_id, predictions_as_id):
    conf_matrix = np.zeros((27,27))
    for i_real, i_pred in zip(labels_as_id, predictions_as_id):
        conf_matrix[i_real, i_pred] += 1
    return conf_matrix

In [None]:
num_batches = 100
data_test = data_generator(input_directory='../data/character/test',
                           batch_size = 100,
                           num_batches = num_batches,
                           shuffle=True,
                           onehot=False)

In [None]:
recalls = {k:0 for k in range(1,27)}
precisions = {k:0 for k in range(1,27)}

batch_preds = []
batch_labels = []
i = 0

for xi, yi in data_test:
    batch_preds.append(np.argmax(model.predict(xi), axis=1))
    batch_labels.append(yi)
    
    i+=1
    display_progress(i, num_batches)
display_progress(num_batches, num_batches)
    
pred = np.concatenate(batch_preds)
labels = np.concatenate(batch_labels)

conf_matrix = get_conf_matrix(labels, pred)
conf_df = pd.DataFrame(conf_matrix[1:,1:], index=characters, columns=characters)

In [None]:
# sn.heatmap(conf_df, annot=False, mask=conf_df <= 0, cmap='viridis')
fig, ax = plt.subplots(figsize=(13,10))
sn.heatmap(np.log(1 + conf_df), annot=False, ax=ax, square=False)
fig.savefig('../images/confusion_matrix.png')

# Training

In [None]:
# Define Data generation
data_train = data_generator(input_directory='../data/character/train',
                            batch_size = 32,
                            shuffle=True,
                            repeat=True)

In [None]:
# Training Loop
model.fit(data_train,
          epochs=10,
          steps_per_epoch=1000,
          verbose=1)

score = model.evaluate(data_train, steps=50, verbose=0)
print('\nTest score:', round(score[0], 3))
print(f'Test accuracy: {round(score[1]*100)}%')
print(f'Test test top 8 accuracy: {round(score[2]*100)}%')

In [None]:
# Define Data generation
data_train = data_generator(input_directory='../data/character/train',
                            batch_size = 64,
                            shuffle=True,
                            repeat=True)

In [None]:
# Training Loop
model.fit(data_train,
          epochs=10,
          steps_per_epoch=500,
          verbose=1)

score = model.evaluate(data_train, steps=50, verbose=0)
print('\nTest score:', round(score[0], 3))
print(f'Test accuracy: {round(score[1]*100)}%')
print(f'Test test top 8 accuracy: {round(score[2]*100)}%')

In [None]:
# Define Data generation
data_train = data_generator(input_directory='../data/character/train',
                            batch_size = 64,
                            shuffle=True,
                            repeat=True)

In [None]:
# Training Loop
model.fit(data_train,
          epochs=5,
          steps_per_epoch=500,
          verbose=1)

score = model.evaluate(data_train, steps=50, verbose=0)
print('\nTest score:', round(score[0], 3))
print(f'Test accuracy: {round(score[1]*100)}%')
print(f'Test test top 8 accuracy: {round(score[2]*100)}%')

---

Change learning rate to `.0001`

In [None]:
# from tensorflow.keras.optimizers import Adam, Nadam
# adam = Adam(learning_rate=.0001)
## nadam = Nadam(learning_rate=.0001)

# from src.models import focal_loss, top_8_accuracy
# model.compile(loss=focal_loss,
#               optimizer=adam,
#               metrics=['accuracy', top_8_accuracy

In [None]:
# Define Data generation
data_train = data_generator(input_directory='../data/character/train',
                            batch_size = 128,
                            shuffle=True,
                            repeat=True)

In [None]:
# Training Loop
model.fit(data_train,
          epochs=5,
          steps_per_epoch=250,
          verbose=1)

score = model.evaluate(data_train, steps=50, verbose=0)
print('\nTest score:', round(score[0], 3))
print(f'Test accuracy: {round(score[1]*100)}%')
print(f'Test test top 8 accuracy: {round(score[2]*100)}%')

In [None]:
model.save('../models/custom_mk4')