In [None]:
from src.data import player_data
from src.util import characters, display_progress
from src.transfer import replace_head
from tensorflow import keras
from tensorflow_addons.losses import SigmoidFocalCrossEntropy as Focal
import numpy as np
import pickle
import os

# visualization
import seaborn as sn
import pandas as pd
from sklearn.metrics import confusion_matrix

# Model with Pre-Trained Base

In [None]:
# load model
model = keras.models.load_model('models/SSBML-Base-Model')

# replace head
model = replace_head(model)

In [None]:
model.summary()

# The Players

In [None]:
!ls data/player

In [None]:
# how many clips does a player have?
!ls data/player/Blynde/train | wc -l

# The Player Data

Player Data can be found in data/player/\<player name\>

Nonplayer Data is taken from the large dataset data/character

In [None]:
# name of the player we want to train/test on
player_name = 'Blynde'

player_dir = os.path.join('data/player', player_name)
player_train_dir = os.path.join(player_dir, 'train')
player_test_dir = os.path.join(player_dir, 'test')

nonplayer_dir = 'data/character'
nonplayer_train_dir = os.path.join(nonplayer_dir, 'train')
nonplayer_test_dir = os.path.join(nonplayer_dir, 'test')

In [None]:
print(f'Training Data Located at: \n\t- {player_train_dir} \n\t- {nonplayer_train_dir} \n')
print(f'Testing Data Located at: \n\t- {player_test_dir} \n\t- {nonplayer_test_dir} \n')

# Testing

In [None]:
# testing data
data_test_one_round = player_data(
    player_test_dir,
    nonplayer_test_dir,
    batch_size = 32,
    repeat = False,
    ratio = 1,
    onehot = True
)

score = model.evaluate(data_test_one_round, verbose=1)
print('\nTest score:', round(score[0], 3))
print(f'Test accuracy: {round(score[1]*100)}%')
print(f'Test precision: {round(score[2]*100)}%')
print(f'Test recall: {round(score[3]*100)}%')

# Confusion Matrix

In [None]:
data_conf = player_data(
    player_test_dir,
    nonplayer_test_dir,
    batch_size = 10,
    repeat = False,
    onehot = True
#     ratio=.1,
)

batch_preds = []
batch_labels = []
i = 0

for xi, yi in data_conf:
    batch_preds.append(np.argmax(model.predict(xi), axis=1))#.astype(int).reshape((-1)))
    batch_labels.append(np.argmax(yi, axis=1))
    
    # just in case
    i += 1
    if i > 50000:
        break
    
#     i+=1
#     display_progress(i, num_batches)
# display_progress(num_batches, num_batches)
    
pred = np.concatenate(batch_preds)
labels = np.concatenate(batch_labels)

conf_matrix = confusion_matrix(labels, pred, normalize='all') # reverse so true positive is top left, true negative is bottom right
conf_matrix = np.around(conf_matrix, 3)
conf_df = pd.DataFrame(conf_matrix, index=['present', 'not present'], columns=['detected', 'not detected'])

conf_df

# Training

In [None]:
batch_size = 32

# training data
imbalanced_data = player_data(
    player_train_dir,
    nonplayer_train_dir,
    repeat = True,
    batch_size = batch_size,
    ratio=1/10,
)

# training data
balanced_data = player_data(
    player_train_dir,
    nonplayer_train_dir,
    repeat = True,
    batch_size = batch_size,
    ratio=1,
)

# testing data
balanced_test_data = player_data(
    player_test_dir,
    nonplayer_test_dir,
    repeat = True,
    batch_size = batch_size,
    ratio=1,
)

imbalanced_test_data = player_data(
    player_test_dir,
    nonplayer_test_dir,
    repeat = True,
    batch_size = batch_size,
    ratio=1/10,
)

In [None]:
# train on imbalance classes first
model.fit(
    imbalanced_data,
    epochs = 2,
    steps_per_epoch = 4000,
    verbose = 1,
)
# then train on balanced classes
model.fit(
    balanced_data,
    epochs = 2,
    steps_per_epoch = 400,
    verbose = 1,
);

In [None]:
# Balanced Classes
score = model.evaluate(balanced_test_data, verbose=0, steps=40)
print('\nTest score:', round(score[0], 3))
print(f'Test accuracy: {round(score[1]*100)}%')
print(f'Test precision: {round(score[2]*100)}%')
print(f'Test recall: {round(score[3]*100)}%')

In [None]:
# Imbalance Classes
score = model.evaluate(imbalanced_test_data, verbose=0, steps=40)
print('\nTest score:', round(score[0], 3))
print(f'Test accuracy: {round(score[1]*100)}%')
print(f'Test precision: {round(score[2]*100)}%')
print(f'Test recall: {round(score[3]*100)}%')

# Fine Tuning

In [None]:
# unfreeze layers

# reduce learning rate
optimizer = keras.optimizers.Adam(
    learning_rate = .00005,
)
# compile
model.compile(
    loss=loss,
    optimizer=optimizer,
    metrics=['accuracy']
)

In [None]:
# Training Loop
model.fit(
    imbalanced_data,
    epochs = 5,
    steps_per_epoch = 4000,
    verbose = 1,
)

score = model.evaluate(imbalanced_test_data, verbose=0, steps=400)
print('\nTest score:', round(score[0], 3))
print(f'Test accuracy: {round(score[1]*100)}%')
print(f'Test precision: {round(score[2]*100)}%')
print(f'Test recall: {round(score[3]*100)}%')