In [1]:
from src.get_from_mongo import get_data
from sklearn.model_selection import train_test_split
from pymongo import MongoClient
import numpy as np
import tensorflow as tf
import pickle

In [2]:
id_from_char = {
    'CAPTAIN_FALCON' : 1 ,
    'DONKEY_KONG'    : 2 ,
    'FOX'            : 3 ,
    'GAME_AND_WATCH' : 4 ,
    'KIRBY'          : 5 ,
    'BOWSER'         : 6 ,
    'LINK'           : 7 ,
    'LUIGI'          : 8 ,
    'MARIO'          : 9 ,
    'MARTH'          : 10 ,
    'MEWTWO'         : 11 ,
    'NESS'           : 12 ,
    'PEACH'          : 13 ,
    'PIKACHU'        : 14 ,
    'ICE_CLIMBERS'   : 15 ,
    'JIGGLYPUFF'     : 16 ,
    'SAMUS'          : 17 ,
    'YOSHI'          : 18 ,
    'ZELDA'          : 19 ,
    'SHEIK'          : 20 ,
    'FALCO'          : 21 ,
    'YOUNG_LINK'     : 22 ,
    'DR_MARIO'       : 23 ,
    'ROY'            : 24 ,
    'PICHU'          : 25 ,
    'GANONDORF'      : 26 ,
}

char_from_id = {v:k for k, v in id_from_char.items()}

In [3]:
database_name = 'slippi'
collection_name = 'melee_clips_30s'

# Connect to the hosted MongoDB instance
client = MongoClient('localhost', 27017)
db = client[database_name]
collection = db[collection_name]

In [14]:
def data_generator(clip_collection=collection, # collection containing clips
                   batch_size = 100
):
#     I_ = np.random.permutation(I)
#     i = 0
#     while i < len(I_):
#         try:
#             query = {'$or':[
#                 {'clip_id':int(clip_id)} for clip_id in I_[i:i+batch_size]
#             ]}
#         except IndexError:
#             query = {'$or':[
#                 {'clip_id':int(clip_id)} for clip_id in I_[i:]
#             ]}
        cur = clip_collection.find()
            
        while cur.alive:
            
            xi = []
            yi = []
            
            for _ in range(batch_size):
                clip = next(cur)
                xi.append(pickle.loads(clip['istream']).toarray())
                yi.append(id_from_char[clip['character']])

            Xi = np.stack(xi, axis=0)
            Yi = tf.one_hot(yi, 26)

            yield Xi, Yi

In [9]:
def get_test_data(clip_collection=collection, # collection containing clips
                  size = 100
):
    bytestreams, characters = get_data('slippi', 
                                       'melee_public_slp_dataset',
                                       get=6000)

In [10]:
# gen = data_generator(I_train, batch_size=10)

In [11]:
# Xi, Yi = next(gen)
# for Y in Yi:
#     print(char_from_id[np.argmax(Y)])

In [12]:
from tensorflow import keras

from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Dropout, Flatten, InputLayer
from tensorflow.keras.layers import Conv1D, MaxPooling1D

In [17]:
model = Sequential()

# first conv layer
model.add(Conv1D(100, #num of features extracted from istream
                 60, #number of frames filter can see at once
                 activation='relu'))

model.add(MaxPooling1D(pool_size=5))

model.add(Conv1D(80,
                 30,
                 activation='relu'))

model.add(MaxPooling1D(pool_size=5))

model.add(Conv1D(60,
                 15,
                 activation='relu'))

model.add(MaxPooling1D(pool_size=5))

model.add(Flatten())

model.add(Dense(40, activation='relu'))

# final output layer
model.add(Dense(26, activation='softmax'))
                
model.compile(loss='categorical_crossentropy',
              optimizer='adam',
              metrics=['accuracy'])

In [18]:
# Get ids for train and test datasets
N = collection.estimated_document_count()
# because it's an estimate, fudge it down to avoid accidental index errors
N = int(N * .999)
print(f'N = {N}')
I_train, I_test = train_test_split(np.arange(N), test_size=.3, random_state=11)

N = 915730


In [24]:
data = data_generator(batch_size=100)

In [27]:
# during fit process watch train and test error simultaneously
model.fit(data, epochs=1, steps_per_epoch=500, verbose=1)

score = model.evaluate(data, steps=10, verbose=0)
print('\nTest score:', round(score[0], 3))
print(f'Test accuracy: {round(score[1]*100)}%')  # this is the one we care about


Test score: 3.45
Test accuracy: 23%


In [None]:
def get_conf_matrix(labels_as_id, predictions_as_id):
    conf_matrix = np.zeros((27,27))
    for i_real, i_pred in zip(labels_as_id, predictions_as_id):
        conf_matrix[i_real, i_pred] += 1
    return conf_matrix

In [None]:
pred = np.argmax(model.predict(X_test), axis = 1)

In [None]:
conf_matrix = get_conf_matrix(y_test, pred)
char_id = 0

In [None]:
char_id += 1
row = conf_matrix[char_id, :]
sorted_row_indices = np.argsort(row)[::-1]
print(f'{char_from_id[char_id]} : prediction frequencies\n-----------------')
for i in sorted_row_indices:
    if i > 0:
        print(f'{char_from_id[i]} : {row[i]}')

In [None]:
def test(i, ii):
    character = characters[i]
    istream = pickle.loads(bytestreams[i])

    y_test = []
    x_test = []
    f=0

    while f+F < istream.shape[0]:
        y_test.append(id_from_char[character])
        x_test.append(istream[f:f+F])
        f += F
        
    try:
        d = x_test[ii].toarray()
    except IndexError:
        test(i, ii-1)
        return
    
    pred = char_from_id[np.argmax(model.predict(d.reshape(1,d.shape[0],d.shape[1])))]


    print(f'''
    Actual Character:
    ---------------
    {char_from_id[y_test[ii]]}
    ''')

    print(f'''
    Detected Character:
    ---------------
    {pred}
    ''')

In [None]:
# i = 602 # use this to check different entries
i = 630 # use this to check different entries
ii = 3 # use this to look at different clips from that entry

In [None]:
print(characters[i], '\n', f'i={i}')

In [None]:
test(i, ii)
i+=1

In [None]:
i-=1