In [1]:
# local imports
from src.transfer import replace_head, METRICS
from src.data import player_data
from src.util import display_progress

# computation / deep learning imports
from tensorflow import keras
import numpy as np

# os / filesystem imports
import pickle
import os

# visualization imports
from sklearn.metrics import confusion_matrix
import pandas as pd

# Model with Pre-Trained Base

In [2]:
# load model
model = keras.models.load_model('models/SSBML-Base-Model')

# replace head
model = replace_head(model)



In [3]:
model.summary()

Model: "SSBML-Transfer-Model"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
SSBML-Base-Model (Sequential (None, 512)               6537842   
_________________________________________________________________
Binary-Classifier (Sequentia (None, 1)                 83329     
Total params: 6,621,171
Trainable params: 82,817
Non-trainable params: 6,538,354
_________________________________________________________________


# The Players

In [4]:
!ls data/player

Blynde	CuckDaddy  gh0st  ixwonkr  Lie0x  TCBL


# The Player Data

Player Data (clips played by our chosen player) can be found in data/player/\<player name\>

Nonplayer Data (clips not played by our chosen player) is taken from the large dataset data/character

In [5]:
# name of the player we want to train/test on
player_name = 'Blynde'

In [6]:
# set all filepath related variables

player_dir = os.path.join('data/player', player_name)
player_train_dir = os.path.join(player_dir, 'train')
player_test_dir = os.path.join(player_dir, 'test')
player_train_sample_size = len(os.listdir(player_train_dir))
player_test_sample_size = len(os.listdir(player_test_dir))

nonplayer_dir = 'data/character'
nonplayer_train_dir = os.path.join(nonplayer_dir, 'train')
nonplayer_test_dir = os.path.join(nonplayer_dir, 'test')

print(f'Training Data Located at: \n\t- {player_train_dir} \n\t- {nonplayer_train_dir} \n')
print(f'Testing Data Located at: \n\t- {player_test_dir} \n\t- {nonplayer_test_dir} \n')
print(f'Player Training Data Sample size: \n\t- {player_train_sample_size} \n')
print(f'Player Testing Data Sample size: \n\t- {player_test_sample_size} \n')

Training Data Located at: 
	- data/player/Blynde/train 
	- data/character/train 

Testing Data Located at: 
	- data/player/Blynde/test 
	- data/character/test 

Player Training Data Sample size: 
	- 4286 

Player Testing Data Sample size: 
	- 499 



# Training

Train the model on a mix of the chosen player's clips,
and random anonymous clips from the Melee Public SLP Dataset

In [7]:
# Adjustable Parameters
# =====================

batch_size = 16

# Anonymous clips : Chosen Player's clips
# for imbalanced dataset
imbalance_ratio = 10

# Affects batch_size and steps_per_epoch
# Example: ratio of 2 would effectively 
#          double batch size and 
#          cut steps_per_epoch in half
imbalanced_tuning_ratio = 1
balanced_tuning_ratio = 1

# =====================

# Calculate number of steps per epoch for balanced and unbalanced data.
# One Epoch should iterate through our player's clips once, mixing them
# with random anonymous clips at our given imbalance ratio (balanced ratio is 1:1)
imbalanced_steps = (
    (player_train_sample_size * (1 + imbalance_ratio))
    // (imbalanced_tuning_ratio * batch_size)
)
    
balanced_steps = (
    (player_train_sample_size * 2)
    // (balanced_tuning_ratio * batch_size)
)
    
imbalanced_test_steps = (
    (player_test_sample_size * (1 + imbalance_ratio))
    // (imbalanced_tuning_ratio * batch_size)
)
    
balanced_test_steps = (
    (player_test_sample_size * 2)
    // (balanced_tuning_ratio * batch_size)
)

# training data
imbalanced_data = player_data(
    player_train_dir,
    nonplayer_train_dir,
    repeat = True,
    batch_size = batch_size * imbalanced_tuning_ratio,
    ratio = imbalance_ratio,
)

balanced_data = player_data(
    player_train_dir,
    nonplayer_train_dir,
    repeat = True,
    batch_size = batch_size * balanced_tuning_ratio,
)

# testing data
imbalanced_test_data = player_data(
    player_test_dir,
    nonplayer_test_dir,
    repeat = True,
    batch_size = batch_size * imbalanced_tuning_ratio,
    ratio = imbalance_ratio,
)

balanced_test_data = player_data(
    player_test_dir,
    nonplayer_test_dir,
    repeat = True,
    batch_size = batch_size * balanced_tuning_ratio,
)

## Training on Imbalanced Classes

In [8]:
model.fit(
    imbalanced_data,
    epochs = 1,
    steps_per_epoch = imbalanced_steps,
    verbose = 1,
);



In [9]:
# Imbalanced Classes
score = model.evaluate(imbalanced_test_data, verbose=0, steps=imbalanced_test_steps)
print('\nTest score:', round(score[0], 3))
print(f'- accuracy: {round(score[1]*100)}%')
print(f'- precision: {round(score[2]*100)}%')
print(f'- recall: {round(score[3]*100)}%')
print(f'- specificity: {round(score[4]*100)}%')


Test score: 0.023
- accuracy: 92%
- precision: 68%
- recall: 31%
- specificity: 99%


## Training on Balanced Classes

In [10]:
model.fit(
    balanced_data,
    epochs = 5,
    steps_per_epoch = balanced_steps,
    verbose = 1,
);

Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5


In [11]:
# Balanced Classes
score = model.evaluate(balanced_test_data, verbose=0, steps=balanced_test_steps)
print('\nTest score:', round(score[0], 3))
print(f'- accuracy: {round(score[1]*100)}%')
print(f'- precision: {round(score[2]*100)}%')
print(f'- recall: {round(score[3]*100)}%')
print(f'- specificity: {round(score[4]*100)}%')


Test score: 0.042
- accuracy: 84%
- precision: 90%
- recall: 77%
- specificity: 91%


# Testing

Test the model on one pass of the given player's clips, 
at the given imbalance ratio

In [12]:
# Adjustable Parameters
# =====================

batch_size = 16

# Ratio of Anonymous clips : Chosen Player's clips
imbalance_ratio = 1

# =====================

data_test_one_round = player_data(
    player_test_dir,
    nonplayer_test_dir,
    batch_size = batch_size,
    repeat = False,
    ratio = imbalance_ratio,
)

score = model.evaluate(data_test_one_round, verbose=1)
print('\nTest score:', round(score[0], 3))
print(f'- accuracy: {round(score[1]*100)}%')
print(f'- precision: {round(score[2]*100)}%')
print(f'- recall: {round(score[3]*100)}%')
print(f'- specificity: {round(score[4]*100)}%')


Test score: 0.037
- accuracy: 85%
- precision: 93%
- recall: 77%
- specificity: 94%


## Confusion Matrix

In [13]:
# Adjustable Parameters
# =====================

batch_size = 16

# Ratio of Anonymous clips : Chosen Player's clips
imbalance_ratio = 1

# =====================

# define data generation
data_conf = player_data(
    player_test_dir,
    nonplayer_test_dir,
    batch_size = batch_size,
    ratio = imbalance_ratio,
)

# predict over test data
batch_preds = []
batch_labels = []
i = 0
N = player_test_sample_size
for xi, yi in data_conf:
    batch_preds.append(model.predict(xi).round().astype(int))
    batch_labels.append(yi.astype(int))
    
    # progess bar
    i = int(i + np.sum(yi))
    display_progress(i, N)
display_progress(N, N)
print('\n')

pred = np.concatenate(batch_preds)
labels = np.concatenate(batch_labels)

# create confusion matrix
conf_matrix = confusion_matrix(labels, pred, normalize='all')[::-1, ::-1] # reverse so true positive is top left, true negative is bottom right
conf_matrix = np.around(conf_matrix, 3)
conf_df = pd.DataFrame(conf_matrix, index=['present', 'not present'], columns=['detected', 'not detected'])

conf_df

[####################] 499 of 499 - 100.0% 



Unnamed: 0,detected,not detected
present,0.384,0.118
not present,0.037,0.461
