## Data Analysis from playstyle classification

## Imports

In [1]:
import os
import pickle
import requests
import pandas as pd
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.metrics import confusion_matrix, ConfusionMatrixDisplay, mean_squared_error, balanced_accuracy_score
from sklearn.model_selection import train_test_split, cross_val_score, GridSearchCV
from imblearn.pipeline import Pipeline
from sklearn.preprocessing import StandardScaler, PolynomialFeatures
from sklearn.ensemble import AdaBoostClassifier, RandomForestClassifier, StackingClassifier, ExtraTreesClassifier
from sklearn.neighbors import KNeighborsRegressor, KNeighborsClassifier
from sklearn.linear_model import LogisticRegression
from tensorflow.keras.models import Sequential, load_model
from tensorflow.keras.layers import Dense
from tensorflow.keras.utils import to_categorical
from tensorflow.keras.callbacks import EarlyStopping


2023-05-14 15:15:30.350154: I tensorflow/core/platform/cpu_feature_guard.cc:182] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.
To enable the following instructions: AVX2 FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.


In [2]:
boay = pd.read_csv('../../1-data-collection/data/boay00.csv')
cho = pd.read_csv('../../1-data-collection/data/cho_shmo.csv')
vinu = pd.read_csv('../../1-data-collection/data/vinu.csv')
oski = pd.read_csv('../../1-data-collection/data/oski.csv')
eden = pd.read_csv('../../1-data-collection/data/eden.csv')


In [3]:
cho.drop(columns = 'Unnamed: 0', inplace = True)
boay.drop(columns = 'Unnamed: 0', inplace = True)
vinu.drop(columns = 'Unnamed: 0', inplace = True)
oski.drop(columns = 'Unnamed: 0', inplace = True)
eden.drop(columns = 'Unnamed: 0', inplace = True)

loading in the et model from 3-modeling-playstyle/code/

In [26]:
with open('../../et_model_4.pkl', 'rb') as picklefile:
    et_model = pickle.load(picklefile)

In [5]:
to_drop = [
    'shots_against',
    'goals_against',
    'shooting_percentage',
    'bpm',
    'amount_stolen_big',
    'amount_stolen_small',
    'count_collected_big',
    'count_collected_small',
    'count_stolen_small',
    'count_stolen_big',
    'amount_overfill_stolen',
    'time_zero_boost',
    'time_full_boost',
    'time_boost_0_25',
    'time_boost_25_50',
    'time_boost_50_75',
    'time_boost_75_100',
    'avg_speed',
    'total_distance',
    'time_supersonic_speed',
    'time_boost_speed',
    'time_slow_speed',
    'time_ground',
    'time_low_air',
    'time_high_air',
    'time_powerslide',
    'time_defensive_third',
    'time_neutral_third',
    'time_offensive_third',
    'time_defensive_half',
    'time_offensive_half',
    'time_behind_ball',
    'time_infront_ball',
    'time_most_back',
    'time_most_forward',
    'time_closest_to_ball',
    'time_farthest_from_ball',
#     'goals_against_while_last_defender',
    'mvp',
    'taken',
    'goals',
    'assists',
    'avg_distance_to_ball_possession',
    'shots',
    'amount_stolen'
]

In [6]:
boay.drop(columns = to_drop, inplace = True)
cho.drop(columns = to_drop, inplace = True)
vinu.drop(columns = to_drop, inplace = True)
oski.drop(columns = to_drop, inplace = True)
eden.drop(columns = to_drop, inplace = True)


In [7]:
for col in boay.columns[:-1]:
    boay[col] = boay[col].astype(float)

for col in cho.columns[:-1]:
    cho[col] = cho[col].astype(float)
    
for col in vinu.columns[:-1]:
    vinu[col] = vinu[col].astype(float)
    
for col in oski.columns[:-1]:
    oski[col] = oski[col].astype(float)
    
for col in eden.columns[:-1]:
    eden[col] = eden[col].astype(float)

In [8]:
cho.drop(columns = 'goals_against_while_last_defender', inplace = True)
vinu.drop(columns = 'goals_against_while_last_defender', inplace = True)

In [9]:
eden.drop(columns = 'goals_against_while_last_defender', inplace = True)

In [10]:
# instantiated ss and poly
poly = PolynomialFeatures(interaction_only= True)
ss = StandardScaler()

In [37]:
cho_preds = pd.DataFrame(et_model.predict_proba(cho.drop(columns = 'player_name')), columns = ['Monkeymoon','Oski','Vati'])

print(pd.DataFrame(et_model.predict(cho.drop(columns = 'player_name'))).value_counts())

print(cho_preds.mean())

M0nkey M00n    18
Oski            1
dtype: int64
Monkeymoon    0.458823
Oski          0.269122
Vati          0.272055
dtype: float64


- cho shmo is predicted to play like monkey moon in 18 of 19 games, and the probabilities indicate some resemblance to oski and vatira
- it is likely that cho does not match any playstyle too closely however

In [28]:
boay_preds = pd.DataFrame(et_model.predict_proba(boay.drop(columns = 'player_name')), columns = ['Monkeymoon','Oski','Vati'])

print(pd.DataFrame(et_model.predict(boay.drop(columns = 'player_name'))).value_counts())

print(boay_preds.mean())

M0nkey M00n    11
Vati            6
Oski            2
dtype: int64
Monkeymoon    0.407955
Oski          0.233010
Vati          0.359034
dtype: float64


- boay has a stronger match to monkey moon and vatira than oski, however since all the scores are close in value, boay does not match any playstyle too closely

In [29]:
vinu_preds = pd.DataFrame(et_model.predict_proba(vinu.drop(columns = 'player_name')), columns = ['Monkeymoon','Oski','Vati'])

print(pd.DataFrame(et_model.predict(vinu.drop(columns = 'player_name'))).value_counts())

print(vinu_preds.mean())

M0nkey M00n    28
Vati            5
Oski            1
dtype: int64
Monkeymoon    0.469814
Oski          0.233730
Vati          0.296456
dtype: float64


- vinu strongly resembles monkey moon, and shares a minor match with vatira

In [30]:
oski.dropna(inplace = True)

In [31]:
oski_preds = pd.DataFrame(et_model.predict_proba(oski.drop(columns = 'player_name')), columns = ['Monkeymoon','Oski','Vati'])

print(pd.DataFrame(et_model.predict(oski.drop(columns = 'player_name'))).value_counts())

print(oski_preds.mean())

Oski           310
Vati            11
M0nkey M00n      2
dtype: int64
Monkeymoon    0.028905
Oski          0.878778
Vati          0.092318
dtype: float64


- as expected, oski has a high prediction for their own class

In [32]:
eden.dropna(inplace = True)

In [33]:
eden_preds = pd.DataFrame(et_model.predict_proba(eden.drop(columns = 'player_name')), columns = ['Monkeymoon','Oski','Vati'])

print(pd.DataFrame(et_model.predict(eden.drop(columns = 'player_name'))).value_counts())

print(eden_preds.mean())

Vati           23
M0nkey M00n    20
Oski            6
dtype: int64
Monkeymoon    0.345987
Oski          0.245163
Vati          0.408850
dtype: float64


- eden has no clear connection to any of the three playstyles from these games and the et model
- most similar to vatira and monkey moon

In [40]:
model = load_model('../../tf_save_model_3.h5')

In [38]:
with open('../../ss_ps.pkl', 'rb') as picklefile:
    ss = pickle.load(picklefile)

In [39]:
with open('../../poly_ps.pkl', 'rb') as picklefile:
    poly = pickle.load(picklefile)

In [46]:
boay_nn_preds = model.predict(poly.transform(ss.transform(boay.drop(columns = 'player_name'))))

boay_nn_preds = pd.DataFrame(boay_nn_preds, columns = ['monkeymoon','oski','vatira'])

boay_nn_preds.mean()



monkeymoon    0.329779
oski          0.197065
vatira        0.473156
dtype: float32

In [47]:
cho_nn_preds = model.predict(poly.transform(ss.transform(cho.drop(columns = 'player_name'))))

cho_nn_preds = pd.DataFrame(cho_nn_preds, columns = ['monkeymoon','oski','vatira'])

cho_nn_preds.mean()



monkeymoon    0.280151
oski          0.261441
vatira        0.458407
dtype: float32

In [49]:
oski_nn_preds = model.predict(poly.transform(ss.transform(oski.drop(columns = 'player_name'))))

oski_nn_preds = pd.DataFrame(oski_nn_preds, columns = ['monkeymoon','oski','vatira'])

oski_nn_preds.mean()



monkeymoon    0.072459
oski          0.745346
vatira        0.182195
dtype: float32

In [52]:
vinu_nn_preds = model.predict(poly.transform(ss.transform(vinu.drop(columns = 'player_name'))))

vinu_nn_preds = pd.DataFrame(vinu_nn_preds, columns = ['monkeymoon','oski','vatira'])

vinu_nn_preds.mean()



monkeymoon    0.483403
oski          0.188337
vatira        0.328260
dtype: float32

In [50]:
eden_nn_preds = model.predict(poly.transform(ss.transform(eden.drop(columns = 'player_name'))))

eden_nn_preds = pd.DataFrame(eden_nn_preds, columns = ['monkeymoon','oski','vatira'])

eden_nn_preds.mean()



monkeymoon    0.256505
oski          0.275255
vatira        0.468240
dtype: float32

## Neural Network Conclusions
---

- Cho, boay, and eden all has closer similarities to vatira on average than the other two pros, oski and monkey moon
- Oski has the closest match with oski
- Vinu has the closest match with monkey moon, unlike any of the other players analysed here

In [73]:
boay.to_csv('../../3-modeling-playstyle/data/boay_.csv')