In [1]:
from src.get_from_mongo import get_data
from sklearn.model_selection import train_test_split
from pymongo import MongoClient
import numpy as np
import tensorflow as tf
import pickle

In [2]:
id_from_char = {
    'CAPTAIN_FALCON' : 1,
    'FOX'            : 2,
    'MARTH'          : 3,
    'PEACH'          : 4,
    'PIKACHU'        : 5,
    'ICE_CLIMBERS'   : 6,
    'JIGGLYPUFF'     : 7,
    'SAMUS'          : 8,
    'SHEIK'          : 9,
    'FALCO'          : 10,
    'DR_MARIO'       : 11,
    'GANONDORF'      : 12,
}

char_from_id = {v:k for k, v in id_from_char.items()}

In [3]:
database_name = 'slippi'
collection_name = 'melee_public_slp_dataset'

# Connect to the hosted MongoDB instance
client = MongoClient('localhost', 27017)
db = client[database_name]
collection = db[collection_name]

In [4]:
query = {"$or":[
    {"character":'CAPTAIN_FALCON'}, 
    {"character":'FOX'}, 
    {"character":'MARTH'}, 
    {"character":'PEACH'}, 
    {"character":'PIKACHU'}, 
    {"character":'ICE_CLIMBERS'}, 
    {"character":'JIGGLYPUFF'}, 
    {"character":'SAMUS'}, 
    {"character":'SHEIK'}, 
    {"character":'FALCO'}, 
    {"character":'DR_MARIO'}, 
    {"character":'GANONDORF'}, 
]}
bytestreams, characters = get_data(database_name, 
                                   collection_name, 
                                   query=query,
                                   get=6000)
print(f'Documents retrieved: {len(characters)}')

Documents retrieved: 6000


In [5]:
x = []
y = []
for bytestream, character in zip(bytestreams, characters):
    istream = pickle.loads(bytestream).toarray()
    T = 30     # clip length in seconds
    F = T * 60 # clip length in frames
    f = 0      # clip starting frame
    while f+F < istream.shape[0]:
        y.append(id_from_char[character])
        x.append(istream[f:f+F])
        f += F

X = np.stack(x, axis=0)
Y = tf.one_hot(y, 13)

In [6]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=.3)
Y_train = tf.one_hot(y_train, 13)
Y_test  = tf.one_hot(y_test, 13)

In [7]:
from tensorflow import keras

from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Dropout, Flatten, InputLayer
from tensorflow.keras.layers import Conv1D, MaxPooling1D

In [8]:
model = Sequential()

model.add(InputLayer(input_shape=X.shape[1:]))

model.add(Dropout(.2))

# first conv layer
model.add(Conv1D(100, #num of features extracted from istream
                 60, #number of frames filter can see at once
                 activation='relu'))

model.add(Dropout(.2))

model.add(MaxPooling1D(pool_size=5))

model.add(Conv1D(80,
                 30,
                 activation='relu'))

model.add(MaxPooling1D(pool_size=5))

model.add(Conv1D(60,
                 15,
                 activation='relu'))

model.add(MaxPooling1D(pool_size=5))

model.add(Flatten())

model.add(Dense(40, activation='relu'))

model.add(Dropout(.2))

# final output layer
model.add(Dense(13, activation='softmax'))
                
model.compile(loss='categorical_crossentropy',
              optimizer='adam',
              metrics=['accuracy'])

In [9]:
model.summary()

Model: "sequential"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
dropout (Dropout)            (None, 1800, 13)          0         
_________________________________________________________________
conv1d (Conv1D)              (None, 1741, 100)         78100     
_________________________________________________________________
dropout_1 (Dropout)          (None, 1741, 100)         0         
_________________________________________________________________
max_pooling1d (MaxPooling1D) (None, 348, 100)          0         
_________________________________________________________________
conv1d_1 (Conv1D)            (None, 319, 80)           240080    
_________________________________________________________________
max_pooling1d_1 (MaxPooling1 (None, 63, 80)            0         
_________________________________________________________________
conv1d_2 (Conv1D)            (None, 49, 60)            7

In [10]:
# during fit process watch train and test error simultaneously
model.fit(X_train, Y_train, batch_size=100, epochs=5,
          verbose=1, validation_data=(X_test, Y_test))

score = model.evaluate(X_test, Y_test, verbose=0)
print('\nTest score:', round(score[0], 3))
print(f'Test accuracy: {round(score[1]*100)}%')  # this is the one we care about

Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5

Test score: 0.51
Test accuracy: 84%


In [15]:
def get_conf_matrix(labels_as_id, predictions_as_id):
    conf_matrix = np.zeros((13,13))
    for i_real, i_pred in zip(labels_as_id, predictions_as_id):
        conf_matrix[i_real, i_pred] += 1
    return conf_matrix

In [12]:
pred = np.argmax(model.predict(X_test), axis = 1)

In [16]:
conf_matrix = get_conf_matrix(y_test, pred)
char_id = 0

In [28]:
char_id += 1
row = conf_matrix[char_id, :]
sorted_row_indices = np.argsort(row)[::-1]
print(f'{char_from_id[char_id]} : prediction frequencies\n-----------------')
for i in sorted_row_indices:
    if i > 0:
        print(f'{char_from_id[i]} : {row[i]}')

GANONDORF : prediction frequencies
-----------------
CAPTAIN_FALCON : 82.0
MARTH : 18.0
SHEIK : 5.0
SAMUS : 3.0
FOX : 3.0
FALCO : 2.0
GANONDORF : 0.0
DR_MARIO : 0.0
JIGGLYPUFF : 0.0
ICE_CLIMBERS : 0.0
PIKACHU : 0.0
PEACH : 0.0


In [29]:
def test(i, ii):
    character = characters[i]
    istream = pickle.loads(bytestreams[i])

    y_test = []
    x_test = []
    f=0

    while f+F < istream.shape[0]:
        y_test.append(id_from_char[character])
        x_test.append(istream[f:f+F])
        f += F
        
    try:
        d = x_test[ii].toarray()
    except IndexError:
        test(i, ii-1)
        return
    
    pred = char_from_id[np.argmax(model.predict(d.reshape(1,d.shape[0],d.shape[1])))]


    print(f'''
    Actual Character:
    ---------------
    {char_from_id[y_test[ii]]}
    ''')

    print(f'''
    Detected Character:
    ---------------
    {pred}
    ''')

In [None]:
# i = 602 # use this to check different entries
i = 630 # use this to check different entries
ii = 3 # use this to look at different clips from that entry

In [None]:
print(characters[i], '\n', f'i={i}')

In [None]:
test(i, ii)
i+=1

In [None]:
i-=1