In [1]:
# local imports
from src.transfer import replace_head
from src.data import player_data
from src.util import display_progress

# computation / deep learning imports
from tensorflow import keras
import numpy as np

# os / filesystem imports
import pickle
import os

# visualization imports
from sklearn.metrics import confusion_matrix
import pandas as pd

# Model with Pre-Trained Base

In [2]:
# load model
model = keras.models.load_model('models/SSBML-Base-Model')

# replace head
model = replace_head(model)



In [3]:
model.summary()

Model: "SSBML-Transfer-Model"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
SSBML-Base-Model (Sequential (None, 512)               6537842   
_________________________________________________________________
Binary-Classifier (Sequentia (None, 2)                 83458     
Total params: 6,621,300
Trainable params: 82,946
Non-trainable params: 6,538,354
_________________________________________________________________


# The Players

In [4]:
!ls data/player

Blynde	CuckDaddy  gh0st  ixwonkr  Lie0x  TCBL


# The Player Data

Player Data (games played by our chosen player) can be found in data/player/\<player name\>

Nonplayer Data (games not played by our chosen player) is taken from the large dataset data/character

In [5]:
# name of the player we want to train/test on
player_name = 'CuckDaddy'

player_dir = os.path.join('data/player', player_name)
player_train_dir = os.path.join(player_dir, 'train')
player_test_dir = os.path.join(player_dir, 'test')
player_train_sample_size = len(os.listdir(player_train_dir))
player_test_sample_size = len(os.listdir(player_test_dir))

nonplayer_dir = 'data/character'
nonplayer_train_dir = os.path.join(nonplayer_dir, 'train')
nonplayer_test_dir = os.path.join(nonplayer_dir, 'test')

In [6]:
print(f'Training Data Located at: \n\t- {player_train_dir} \n\t- {nonplayer_train_dir} \n')
print(f'Testing Data Located at: \n\t- {player_test_dir} \n\t- {nonplayer_test_dir} \n')
print(f'Player Training Data Sample size: \n\t- {player_train_sample_size}')
print(f'Player Testing Data Sample size: \n\t- {player_test_sample_size}')

Training Data Located at: 
	- data/player/CuckDaddy/train 
	- data/character/train 

Testing Data Located at: 
	- data/player/CuckDaddy/test 
	- data/character/test 

Player Training Data Sample size: 
	- 11758
Player Testing Data Sample size: 
	- 1333


# Training

In [7]:
# Adjustable Parameters
# =====================

batch_size = 16
imbalance_ratio = 10
imbalanced_tuning_ratio = 1
balanced_tuning_ratio = 4

# =====================

# Calculate number of steps per epoch for balanced and unbalanced data.
# One Epoch should iterate through our player's games once, mixing them
# with random anonymous games at our given imbalance ratio (balanced ratio is 1:1)
imbalanced_steps = (
    (player_train_sample_size * (1 + imbalance_ratio))
    // (imbalanced_tuning_ratio * batch_size)
)
    
balanced_steps = (
    (player_train_sample_size * 2)
    // (balanced_tuning_ratio * batch_size)
)
    
imbalanced_test_steps = (
    (player_test_sample_size * (1 + imbalance_ratio))
    // (imbalanced_tuning_ratio * batch_size)
)
    
balanced_test_steps = (
    (player_test_sample_size * 2)
    // (balanced_tuning_ratio * batch_size)
)
    
# training data
imbalanced_data = player_data(
    player_train_dir,
    nonplayer_train_dir,
    repeat = True,
    batch_size = batch_size * imbalanced_tuning_ratio,
    ratio = 1/imbalance_ratio,
)

balanced_data = player_data(
    player_train_dir,
    nonplayer_train_dir,
    repeat = True,
    batch_size = batch_size * balanced_tuning_ratio,
)

# testing data
imbalanced_test_data = player_data(
    player_test_dir,
    nonplayer_test_dir,
    repeat = True,
    batch_size = batch_size * imbalanced_tuning_ratio,
    ratio=1/imbalance_ratio,
)

balanced_test_data = player_data(
    player_test_dir,
    nonplayer_test_dir,
    repeat = True,
    batch_size = batch_size * balanced_tuning_ratio,
)

In [8]:
# train on imbalanced classes first
model.fit(
    imbalanced_data,
    epochs = 5,
    steps_per_epoch = imbalanced_steps,
    verbose = 1,
)
# then train on balanced classes
model.fit(
    balanced_data,
    epochs = 2,
    steps_per_epoch = balanced_steps,
    verbose = 1,
);

Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5
Epoch 1/2
Epoch 2/2


In [9]:
# Balanced Classes
score = model.evaluate(balanced_test_data, verbose=0, steps=balanced_test_steps)
print('\nTest score:', round(score[0], 3))
print(f'Test accuracy: {round(score[1]*100)}%')
print(f'Test precision: {round(score[2]*100)}%')
print(f'Test recall: {round(score[3]*100)}%')


Test score: 0.062
Test accuracy: 90%
Test precision: 90%
Test recall: 92%


In [10]:
# Imbalanced Classes
score = model.evaluate(imbalanced_test_data, verbose=0, steps=imbalanced_test_steps)
print('\nTest score:', round(score[0], 3))
print(f'Test accuracy: {round(score[1]*100)}%')
print(f'Test precision: {round(score[2]*100)}%')
print(f'Test recall: {round(score[3]*100)}%')


Test score: 0.066
Test accuracy: 89%
Test precision: 45%
Test recall: 92%


# Testing

In [11]:
# testing data
data_test_one_round = player_data(
    player_test_dir,
    nonplayer_test_dir,
    batch_size = 32,
    repeat = False,
    ratio = 1,
    onehot = True
)

score = model.evaluate(data_test_one_round, verbose=1)
print('\nTest score:', round(score[0], 3))
print(f'Test accuracy: {round(score[1]*100)}%')
print(f'Test precision: {round(score[2]*100)}%')
print(f'Test recall: {round(score[3]*100)}%')


Test score: 0.063
Test accuracy: 90%
Test precision: 90%
Test recall: 92%


# Confusion Matrix

In [12]:
# Adjustable Parameters
# =====================

batch_size = 16

# Ratio of Anonymous games : Chosen Player's games
ratio = 1 

# =====================

# define data generation
data_conf = player_data(
    player_test_dir,
    nonplayer_test_dir,
    batch_size = batch_size,
    ratio = ratio,
)

# predict over test data
batch_preds = []
batch_labels = []
i = 0
N = player_test_sample_size
for xi, yi in data_conf:
    batch_preds.append(np.argmax(model.predict(xi), axis=1))
    batch_labels.append(np.argmax(yi, axis=1))
    
    # progess bar
    i = int(i + np.sum(yi[:, 0]))
    display_progress(i, N)
display_progress(N, N)

pred = np.concatenate(batch_preds)
labels = np.concatenate(batch_labels)

# create confusion matrix
conf_matrix = confusion_matrix(labels, pred, normalize='all') # reverse so true positive is top left, true negative is bottom right
conf_matrix = np.around(conf_matrix, 3)
conf_df = pd.DataFrame(conf_matrix, index=['present', 'not present'], columns=['detected', 'not detected'])

conf_df

[####################] 1333 of 1333 - 100.0% 

Unnamed: 0,detected,not detected
present,0.453,0.042
not present,0.06,0.445
