In [158]:
import numpy as np
from tensorflow import keras
from sklearn.metrics import accuracy_score, f1_score, confusion_matrix
from sklearn.metrics import plot_confusion_matrix
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
import json
from os import listdir
from os.path import isfile, join
from matplotlib import pyplot as plt
import pandas as pd
from sklearn.utils import resample
from sklearn.neighbors import KNeighborsClassifier
from sklearn.model_selection import GridSearchCV

In [142]:
files = [f for f in listdir("data") if isfile(join("data", f))]
data = []
for file in files:
    with open("data/" + file, "r", encoding='utf-8') as f:
        data += json.load(f)
cleaned = [entry for entry in data if entry[2] < 0] # remove those entries where the ball is flying in the direction of the opponent because we don't do anything in that case anyway.

In [143]:
copy = cleaned.copy()
for entry in copy:
    action = entry[6]
    if action == [1, 0, 0]:
        entry[6] = 'nothing'
    elif action == [0, 1, 0]:
        entry[6] = 'up'
    else:
        entry[6] = 'down'
raw_data = np.array(copy)

In [144]:
data = {'ballX': raw_data[:,0], 'ballY': raw_data[:,1], 'velocityX': raw_data[:,2], 'velocityY': raw_data[:,3], 'paddlePos1': raw_data[:,4], 'paddlePos2': raw_data[:,5], 'action': raw_data[:,6]}
df = pd.DataFrame(data)
df.head()

Unnamed: 0,ballX,ballY,velocityX,velocityY,paddlePos1,paddlePos2,action
0,0.5,0.5,-0.5000000000000004,-0.8660254037844385,0.5,0.5,nothing
1,0.48125,0.4566987298107781,-0.5000000000000004,-0.8660254037844385,0.5,0.5,nothing
2,0.4625,0.4133974596215561,-0.5000000000000004,-0.8660254037844385,0.5,0.5,nothing
3,0.4437499999999999,0.3700961894323342,-0.5000000000000004,-0.8660254037844385,0.5,0.5,nothing
4,0.4249999999999999,0.3267949192431123,-0.5000000000000004,-0.8660254037844385,0.5,0.5,nothing


In [145]:
df.dtypes

ballX         object
ballY         object
velocityX     object
velocityY     object
paddlePos1    object
paddlePos2    object
action        object
dtype: object

In [146]:
df.ballX = df.ballX.astype(np.float32)
df.ballY = df.ballY.astype(np.float32)
df.velocityX = df.velocityX.astype(np.float32)
df.velocityY = df.velocityY.astype(np.float32)
df.paddlePos1 = df.paddlePos1.astype(np.float32)
df.paddlePos2 = df.paddlePos2.astype(np.float32)
df.action = df.action.astype('category')

In [147]:
df.dtypes

ballX          float32
ballY          float32
velocityX      float32
velocityY      float32
paddlePos1     float32
paddlePos2     float32
action        category
dtype: object

In [151]:
df_majority = df[df.action == 'nothing']
df_minority1 = df[df.action == 'up']
df_minority2 = df[df.action == 'down']
print(df_majority.shape)
print(df_minority1.shape)
print(df_minority2.shape)
df_majority_downsampled = resample(df_majority, 
                                 replace=False,
                                 n_samples=len(df_minority1),
                                 random_state=42)
df_downsampled = pd.concat([df_majority_downsampled, df_minority1, df_minority2])
df_downsampled.action.value_counts()

(1221, 7)
(346, 7)
(363, 7)


down       363
up         346
nothing    346
Name: action, dtype: int64

In [152]:
X = df.drop('action', 1)
y = df['action']

In [153]:
X.head()

Unnamed: 0,ballX,ballY,velocityX,velocityY,paddlePos1,paddlePos2
0,0.5,0.5,-0.5,-0.866025,0.5,0.5
1,0.48125,0.456699,-0.5,-0.866025,0.5,0.5
2,0.4625,0.413397,-0.5,-0.866025,0.5,0.5
3,0.44375,0.370096,-0.5,-0.866025,0.5,0.5
4,0.425,0.326795,-0.5,-0.866025,0.5,0.5


In [154]:
y.head()

0    nothing
1    nothing
2    nothing
3    nothing
4    nothing
Name: action, dtype: category
Categories (3, object): ['down', 'nothing', 'up']

In [155]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

In [157]:
scaler = StandardScaler()
scaler.fit(X_train)
scaler.transform(X_train)
scaler.transform(X_test)

array([[ 1.5453361 ,  1.3650279 , -0.40901205,  0.9130638 , -0.22883739,
        -0.22883739],
       [ 0.32392758,  0.21691322,  0.88355404,  1.3079059 ,  1.5565339 ,
         1.5565339 ],
       [-0.47538787,  1.3717656 ,  0.7263303 ,  1.272268  , -1.3000603 ,
        -1.3000603 ],
       ...,
       [-0.26068276,  0.9515275 , -0.40901205, -0.8541619 ,  0.12823692,
         0.12823692],
       [-0.4958015 , -0.94170797,  0.649409  ,  1.2538786 , -0.22883739,
        -0.22883739],
       [-0.39488885, -1.0849555 ,  0.649409  ,  1.2538786 , -0.22883739,
        -0.22883739]], dtype=float32)

In [159]:
estimator_KNN = KNeighborsClassifier(algorithm='auto')

In [160]:
parameters_KNN = {
    'n_neighbors': (1,2,4,8,10,12,16,24),
    'leaf_size': (1, 4, 8, 12, 16, 20),
    'p': (1,2),
    'weights': ('uniform', 'distance'),
    'metric': ('minkowski', 'chebyshev')}
                   
grid_search_KNN = GridSearchCV(
    estimator=estimator_KNN,
    param_grid=parameters_KNN,
    scoring = 'f1_micro',
    n_jobs = -1,
    cv = 5
)
grid_search_KNN.fit(X_train, y_train)
print(grid_search_KNN.best_params_ )
print('Best Score - KNN:', grid_search_KNN.best_score_ )

{'leaf_size': 8, 'metric': 'minkowski', 'n_neighbors': 1, 'p': 1, 'weights': 'uniform'}
Best Score - KNN: 0.8620203421174294


In [161]:
knn = KNeighborsClassifier(leaf_size=8, metric='minkowski', n_neighbors=1, p=1, weights='uniform')
knn.fit(X_train, y_train)

KNeighborsClassifier(leaf_size=8, n_neighbors=1, p=1)

In [162]:
y_pred = knn.predict(X_test)

In [163]:
f1 = f1_score(y_test, y_pred, average='micro')
print ("test f1 score: %.2f" % f1)

test f1 score: 0.87
