In [55]:
import numpy as np
from tensorflow import keras
from sklearn.metrics import accuracy_score, f1_score, confusion_matrix
from sklearn.metrics import plot_confusion_matrix
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
import json
from os import listdir
from os.path import isfile, join

In [56]:
files = [f for f in listdir("data") if isfile(join("data", f))]
data = []
print("Reading files")
for file in files:
    print(file)
    with open("data/" + file, "r", encoding='utf-8') as f:
        data += json.load(f)
# print(cleaned)
cleaned = [entry for entry in data if entry[2] < 0] # remove those entries where the ball is flying in the direction of the opponent because we don't do anything in that case anyway.
print(len(cleaned))

Reading files
080ec967-734d-4acc-9f92-c315bf12155c.json
0ed6797e-adaf-4fed-8de7-a4a7b2eafeb4.json
386f2394-0821-4e9f-9997-f10adfb05480.json
5136618e-f2a1-48dd-bb30-3fa14cb8f2f0.json
593e6938-77f1-48a8-8266-2f30d54f0c94.json
5ebdf527-214c-4302-b516-130d1af5f542.json
75c10d99-864d-434c-8ee8-60e0fbc74bfe.json
7ce583e4-f715-4349-b2a3-aafaa9ce3aa0.json
83f49c94-1004-4d9b-adde-32a4083e965e.json
a6312746-d888-4fe9-a513-83aa42f4c319.json
aa94cfee-a454-4fbf-ab9a-8ea13672077c.json
aea49f3e-de71-456b-af60-165a0aaf64da.json
cdd533ba-8114-40da-b4e2-fdf06f319ee3.json
d939b815-5ca6-4730-8fdf-2d08660a6fe7.json
dd7d22f3-1d2a-4f3e-9fc4-7c36e3b6fd7f.json
e21a3c31-3b37-4415-bc2c-bfc5febdb9f9.json
e40d22d7-7699-4b71-94d1-e78fcca74a3d.json
f4535cf2-111b-436c-ac78-c1c85fb4bf66.json
f9d1d1a0-e61e-497c-a0bd-bd6dff1ceffe.json
1930


In [57]:
X = np.array([[entry[0], entry[1], entry[2], entry[3], entry[4]] for entry in cleaned])
y = np.array([entry[6] for entry in cleaned])

In [58]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

In [59]:
scaler = StandardScaler()
scaler.fit(X_train)
scaler.transform(X_train)
scaler.transform(X_test)

array([[ 1.54533616,  1.36502781, -0.40901216,  0.91306381, -0.22883742],
       [ 0.32392761,  0.21691314,  0.88355421,  1.30790584,  1.55653389],
       [-0.47538786,  1.37176549,  0.72633045,  1.27226808, -1.30006021],
       ...,
       [-0.26068273,  0.95152751, -0.40901216, -0.85416191,  0.12823684],
       [-0.49580149, -0.94170798,  0.64940896,  1.2538786 , -0.22883742],
       [-0.39488881, -1.08495544,  0.64940896,  1.2538786 , -0.22883742]])

In [60]:
from sklearn.neighbors import KNeighborsClassifier
from sklearn.model_selection import GridSearchCV

In [61]:
estimator_KNN = KNeighborsClassifier(algorithm='auto')

In [62]:
parameters_KNN = {
    'n_neighbors': (1,2,4,8,10,12,16,24),
    'leaf_size': (1, 4, 8, 12, 16, 20),
    'p': (1,2),
    'weights': ('uniform', 'distance'),
    'metric': ('minkowski', 'chebyshev')}
                   
grid_search_KNN = GridSearchCV(
    estimator=estimator_KNN,
    param_grid=parameters_KNN,
    scoring = 'f1_micro',
    n_jobs = -1,
    cv = 5
)
grid_search_KNN.fit(X_train, y_train)
print(grid_search_KNN.best_params_ )
print('Best Score - KNN:', grid_search_KNN.best_score_ )

{'leaf_size': 12, 'metric': 'minkowski', 'n_neighbors': 1, 'p': 1, 'weights': 'uniform'}
Best Score - KNN: 0.8393540116841087


In [63]:
knn = KNeighborsClassifier(leaf_size=12, metric='minkowski', n_neighbors=1, p=1, weights='uniform')
knn.fit(X_train, y_train)

KNeighborsClassifier(leaf_size=12, n_neighbors=1, p=1)

In [64]:
y_pred = knn.predict(X_test)

In [65]:
f1 = f1_score(y_test, y_pred, average='micro')
print ("test f1 score: %.2f" % f1)

test f1 score: 0.84
