In [1]:
from src.data import player_data
from src.util import characters, display_progress
from src.transfer import ssbml_transfer_model, onehot_head, onehot_metrics
from tensorflow import keras
from tensorflow_addons.losses import SigmoidFocalCrossEntropy as Focal
import numpy as np
import pickle
import os

# visualization
import seaborn as sn
import pandas as pd
from sklearn.metrics import confusion_matrix

# Model with Pre-Trained Base

In [2]:
model = ssbml_transfer_model(
    head = onehot_head, 
    loss = Focal(),
    metrics = onehot_metrics)



In [3]:
model.summary()

Model: "SSBML-Transfer-Model"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
SSBML-Base-Model (Sequential (None, 512)               6537842   
_________________________________________________________________
onehot_binary_classifier (Se (None, 2)                 83458     
Total params: 6,621,300
Trainable params: 82,946
Non-trainable params: 6,538,354
_________________________________________________________________


# The Players

In [4]:
!ls data/player

Blynde	CuckDaddy  Lie0x  TCBL	gh0st  ixwonkr


In [5]:
# how many clips does a player have?
!ls data/player/CuckDaddy/train | wc -l

11758


# The Player Data

Player Data can be found in data/player/\<player name\>

Nonplayer Data is taken from the large dataset data/character

In [6]:
# name of the player we want to train/test on
player_name = 'CuckDaddy'

player_dir = os.path.join('data/player', player_name)
player_train_dir = os.path.join(player_dir, 'train')
player_test_dir = os.path.join(player_dir, 'test')

nonplayer_dir = 'data/character'
nonplayer_train_dir = os.path.join(nonplayer_dir, 'train')
nonplayer_test_dir = os.path.join(nonplayer_dir, 'test')

In [7]:
print(f'Training Data Located at: \n\t- {player_train_dir} \n\t- {nonplayer_train_dir} \n')
print(f'Testing Data Located at: \n\t- {player_test_dir} \n\t- {nonplayer_test_dir} \n')

Training Data Located at: 
	- data/player/CuckDaddy/train 
	- data/character/train 

Testing Data Located at: 
	- data/player/CuckDaddy/test 
	- data/character/test 



# Testing

In [10]:
# testing data
data_test_one_round = player_data(
    player_test_dir,
    nonplayer_test_dir,
    batch_size = 32,
    repeat = False,
    ratio = 1,
    onehot = True
)

score = model.evaluate(data_test_one_round, verbose=1)
print('\nTest score:', round(score[0], 3))
print(f'Test accuracy: {round(score[1]*100)}%')
print(f'Test precision: {round(score[2]*100)}%')
print(f'Test recall: {round(score[3]*100)}%')


Test score: 6.216
Test accuracy: 47%
Test precision: 49%
Test recall: 11%


# Confusion Matrix

In [12]:
data_conf = player_data(
    player_test_dir,
    nonplayer_test_dir,
    batch_size = 25,
    repeat = False,
#     ratio=.1,
)

batch_preds = []
batch_labels = []
i = 0

for xi, yi in data_conf:
    batch_preds.append(np.around(model.predict(xi)).astype(int).reshape((-1)))
    batch_labels.append(yi)
    
    # just in case
    i += 1
    if i > 50000:
        break
    
#     i+=1
#     display_progress(i, num_batches)
# display_progress(num_batches, num_batches)
    
pred = np.concatenate(batch_preds)
labels = np.concatenate(batch_labels)

conf_matrix = confusion_matrix(labels, pred, normalize='all') # reverse so true positive is top left, true negative is bottom right
conf_matrix = np.around(conf_matrix, 3)
conf_df = pd.DataFrame(conf_matrix, index=['true player', 'true nonplayer'], columns=['predicted player', 'predicted nonplayer'])

conf_df

Unnamed: 0,predicted player,predicted nonplayer
true player,0.048,0.432
true nonplayer,0.101,0.419


# Training

In [11]:
# training data
onehot = True
imbalanced_data = player_data(
    player_train_dir,
    nonplayer_train_dir,
    repeat = True,
    batch_size = 32,
    ratio=1/10,
    onehot = onehot,
)

# training data
balanced_data = player_data(
    player_train_dir,
    nonplayer_train_dir,
    repeat = True,
    batch_size = 32,
    ratio=1,
    onehot = onehot,
)

# testing data
balanced_test_data = player_data(
    player_test_dir,
    nonplayer_test_dir,
    repeat = True,
    batch_size = 25,
    ratio=1,
    onehot = onehot,
)

imbalanced_test_data = player_data(
    player_test_dir,
    nonplayer_test_dir,
    repeat = True,
    batch_size = 25,
    ratio=1/10,
    onehot = onehot,
)

In [12]:
# train on imbalance classes first
model.fit(
    imbalanced_data,
    epochs = 2,
    steps_per_epoch = 4000,
    verbose = 1,
)
# then train on balanced classes
model.fit(
    balanced_data,
    epochs = 2,
    steps_per_epoch = 400,
    verbose = 1,
);

Epoch 1/2
Epoch 2/2
Epoch 1/2
Epoch 2/2


In [13]:
# Balanced Classes
score = model.evaluate(balanced_test_data, verbose=0, steps=40)
print('\nTest score:', round(score[0], 3))
print(f'Test accuracy: {round(score[1]*100)}%')
print(f'Test precision: {round(score[2]*100)}%')
print(f'Test recall: {round(score[3]*100)}%')


Test score: 0.072
Test accuracy: 88%
Test precision: 95%
Test recall: 82%


In [14]:
# Imbalance Classes
score = model.evaluate(balanced_test_data, verbose=0, steps=40)
print('\nTest score:', round(score[0], 3))
print(f'Test accuracy: {round(score[1]*100)}%')
print(f'Test precision: {round(score[2]*100)}%')
print(f'Test recall: {round(score[3]*100)}%')


Test score: 0.077
Test accuracy: 87%
Test precision: 95%
Test recall: 81%


Very interesting result. After training first on imbalanced and then balanced training data, the model performs the exactly same on balanced and imbalanced test data.

# Fine Tuning

In [None]:
# unfreeze layers

# reduce learning rate
optimizer = keras.optimizers.Adam(
    learning_rate = .00005,
)
# compile
model.compile(
    loss=loss,
    optimizer=optimizer,
    metrics=['accuracy']
)

In [None]:
# Training Loop
model.fit(
    imbalanced_data,
    epochs = 5,
    steps_per_epoch = 4000,
    verbose = 1,
)

score = model.evaluate(imbalanced_test_data, verbose=0, steps=400)
print('\nTest score:', round(score[0], 3))
print(f'Test accuracy: {round(score[1]*100)}%')
print(f'Test precision: {round(score[2]*100)}%')
print(f'Test recall: {round(score[3]*100)}%')