# Setup

In [993]:
import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt
import matplotlib as mpl
from scipy import stats

from sklearn.model_selection import train_test_split, RandomizedSearchCV, GridSearchCV, RepeatedKFold, cross_val_score
import sklearn.metrics as skm
from sklearn.metrics import accuracy_score
from sklearn.cluster import AffinityPropagation as AP
from sklearn.preprocessing import MinMaxScaler

from sklearn.ensemble import RandomForestClassifier, BaggingClassifier, AdaBoostClassifier, VotingClassifier
from sklearn.linear_model import LogisticRegression
from sklearn.neighbors import KNeighborsClassifier
from sklearn.tree import DecisionTreeClassifier
from sklearn.naive_bayes import MultinomialNB
from sklearn.svm import LinearSVC
from xgboost import XGBRegressor, XGBClassifier

# Keras specific
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import layers
import keras
from keras.models import Sequential
from keras.layers import Dense
from keras.utils import to_categorical 

import re
import time
import datetime as dt

import sys
import os
sys.path.append(os.path.abspath
                (os.path.join
                 (os.path.dirname("constants.py"), '..')))
from constants import *
from scripts import *

import warnings
pd.set_option('display.max_rows', 100)
pd.set_option('display.max_columns', None)
warnings.filterwarnings('ignore')

hide_toggle()

# Load

In [1081]:
teamDataTable = pd.read_pickle("Data/raw_data/teamDataTable.pkl")
playerDataTable = pd.read_pickle("Data/raw_data/playerDataTable.pkl")

matchList = pd.read_pickle("Data/raw_data/matchList.pkl")
teamMatchList = pd.read_pickle("Data/raw_data/teamMatchList.pkl")
playerMatchList = pd.read_pickle("Data/raw_data/playerMatchList.pkl")

In [1082]:
df = matchList.copy()

offCols = ['Blue', 'Red', 'Tournament', 'TOP_Blue', 'JNG_Blue',
           'MID_Blue', 'ADC_Blue', 'SUP_Blue', 'TOP_Red', 'JNG_Red', 'MID_Red',
           'ADC_Red', 'SUP_Red', 'matchCode', 'blueKills', 'redKills', 'TournamentRegion',
           'totalKills', 'Semester', 'Split', 'Year', 'realSemester', 'realYear']
    
currentTarget = 'Score'

print(len(df))

29602


In [1083]:
dfToSplit = df.copy()
dfToSplit = dfToSplit.sort_values(by=['Date'],ascending=True)

if 'Team_Blue_Win_rate' in  dfToSplit.columns:
    dfToSplit.dropna(subset='Team_Blue_Win_rate',inplace=True)
    dfToSplit.dropna(subset='Team_Red_Win_rate',inplace=True)
    
elif 'TOP_Blue_Win_rate' in  dfToSplit.columns:
    dfToSplit.dropna(subset='TOP_Blue_Win_rate',inplace=True)
    dfToSplit.dropna(subset='TOP_Red_Win_rate',inplace=True)
    
if 'Score' in dfToSplit.columns: dfToSplit = dfToSplit[dfToSplit['Score']!=2]

for col in dfToSplit.columns:
    dfToSplit[col] = dfToSplit[col].fillna(0)
    
featCols = dfToSplit.drop(currentTarget,axis=1).columns
regions = dfToSplit['TournamentRegion'].unique()
regions = [x for x in regions if len(dfToSplit[dfToSplit['TournamentRegion']==x])>=300]

print(len(dfToSplit))

29276


# KERAS

In [876]:
dfKeras = dfToSplit.copy()
dfKeras = dfKeras.sort_values(by=['Date'],ascending=True)

dfKeras = dfKeras.drop(offCols,axis=1)

In [877]:
to_remove = 'Date','totalKills','Score'

target_column = targets[r]
predictors = list(dfKeras.columns)
predictors = [i for i in predictors if i not in to_remove]
print(len(dfKeras.dropna()))
#dfKeras[predictors] = dfKeras[predictors]/dfKeras[predictors].max()

15512


In [878]:
x = dfKeras[predictors].values
y = dfKeras[target_column].values

x_train, x_test, y_train, y_test = train_test_split(x, y, test_size=0.30, random_state=40, shuffle=False)

print(str(len(x_train))+'\n'+str(len(x_test)))

10858
4654


In [879]:
if r==0:
    y_train = to_categorical(y_train)
    y_test = to_categorical(y_test)

    count_classes = y_test.shape[1]
    print(count_classes)

2


In [880]:
model = Sequential()
model.add(Dense(500, activation='relu', input_dim=len(predictors)))
model.add(Dense(100, activation='relu'))
model.add(Dense(50, activation='relu'))
model.add(Dense(2, activation='softmax'))

model.compile(optimizer='adam', 
              loss='binary_crossentropy',
              metrics=['accuracy'])

model.fit(x_train, y_train,
          batch_size=128,
          epochs=20,
          verbose=1,
          validation_data=(x_test, y_test))

Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20
Epoch 15/20
Epoch 16/20
Epoch 17/20
Epoch 18/20
Epoch 19/20
Epoch 20/20


<keras.callbacks.History at 0x11a969b3790>

In [881]:
pred_train= model.predict(x_train)
scores = model.evaluate(x_train, y_train, verbose=0)
print('Accuracy on training data: {}% \nError on training data: {}\n'.format(scores[1], 1 - scores[1]))   
 
pred_test= model.predict(x_test)
scores2 = model.evaluate(x_test, y_test, verbose=0)
print('Accuracy on test data: {}% \n Error on test data: {}\n'.format(scores2[1], 1 - scores2[1]))

y_pred = model.predict(x_test)
teste = pd.DataFrame(y_test).round(0)[0]
prev = pd.DataFrame(y_pred).round(0)[0]
print(f'acc score: {accuracy_score(teste, prev)}')
print(f'len: {len(y_pred)}')

Accuracy on training data: 0.8678393959999084% 
Error on training data: 0.13216060400009155

Accuracy on test data: 0.5801461338996887% 
 Error on test data: 0.4198538661003113

acc score: 0.5801461108723679
len: 4654


# Notes