In [1]:
import pandas as pd
import numpy as np

from xgboost import XGBClassifier
from sklearn.model_selection import train_test_split, GridSearchCV
from sklearn.preprocessing import OneHotEncoder, StandardScaler
from sklearn.pipeline import Pipeline, make_pipeline
from sklearn.svm import SVC
from sklearn.compose import ColumnTransformer
from sklearn.metrics import accuracy_score, recall_score, precision_score, f1_score, roc_auc_score, plot_confusion_matrix
from sklearn.linear_model import LogisticRegression

import sys
if not 'Notebooks/Individual/Jake' in sys.path:
    sys.path.append('Notebooks/Individual/jake')
from functions import ScoreModel, FeatureImp

import IPython
sound_file = '../../../archive/sounds/puzzle_solved_jingle.wav'

pd.set_option('display.max_columns', None)

In [2]:
df_pro = pd.read_csv("../../../archive/processed/pro.csv")
df_soloq = pd.read_csv("../../../archive/processed/soloq.csv")

display(df_pro.head(3), df_soloq.head(3)) #sanity check

Unnamed: 0,blueWins,blueFirstBlood,blueTotalExperience,blueGoldDiff,blueExperienceDiff,blueCSDiff,blueKills,blueAssists,redKills,redTotalExperience,redAssists,blueCSPerMin,redCSPerMin,blueGoldPerMin,redGoldPerMin,blueAvgPickrate,redAvgPickrate,bluePredWinrate,redPredWinrate
0,1,1.0,20459.0,1793.0,2365.0,65.0,1.0,0.0,0.0,18094.0,0.0,38.2,31.7,1629.1,1449.8,0.092679,0.158275,0.48367,0.487527
1,0,1.0,19210.0,759.0,171.0,-8.0,1.0,3.0,0.0,19039.0,0.0,34.0,34.8,1562.3,1486.4,0.24274,0.129162,0.502444,0.481707
2,0,1.0,16987.0,73.0,-1.0,-24.0,3.0,3.0,2.0,16988.0,4.0,27.5,29.9,1504.3,1497.0,0.1446,0.107876,0.492119,0.498776


Unnamed: 0,blueWins,blueFirstBlood,blueKills,blueAssists,blueTotalExperience,blueGoldDiff,blueExperienceDiff,blueCSPerMin,blueGoldPerMin,redKills,redAssists,redTotalExperience,redCSPerMin,redGoldPerMin,bluePredWinrate,redPredWinrate,blueAvgPickrate,redAvgPickrate,blueCSDiff
0,0,1,9,11,17039,643,-8,19.5,1721.0,6,8,17047,19.7,1656.7,0.489394,0.509859,0.08679,0.103614,-2.0
1,0,0,5,5,16265,-2908,-1173,17.4,1471.2,5,2,17438,24.0,1762.0,0.492787,0.492763,0.124203,0.121591,-66.0
2,0,0,7,4,16221,-1172,-1033,18.6,1611.3,11,14,17254,20.3,1728.5,0.489751,0.49858,0.086608,0.112683,-17.0


In [3]:
display(df_pro.info(), df_soloq.info())

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 8278 entries, 0 to 8277
Data columns (total 19 columns):
 #   Column               Non-Null Count  Dtype  
---  ------               --------------  -----  
 0   blueWins             8278 non-null   int64  
 1   blueFirstBlood       8278 non-null   float64
 2   blueTotalExperience  7728 non-null   float64
 3   blueGoldDiff         7728 non-null   float64
 4   blueExperienceDiff   7728 non-null   float64
 5   blueCSDiff           7728 non-null   float64
 6   blueKills            7728 non-null   float64
 7   blueAssists          7728 non-null   float64
 8   redKills             7728 non-null   float64
 9   redTotalExperience   7728 non-null   float64
 10  redAssists           7728 non-null   float64
 11  blueCSPerMin         7728 non-null   float64
 12  redCSPerMin          7728 non-null   float64
 13  blueGoldPerMin       7728 non-null   float64
 14  redGoldPerMin        7728 non-null   float64
 15  blueAvgPickrate      8278 non-null   f

None

None

In [4]:
df_temp = pd.concat([df_soloq, df_pro], ignore_index=True)

df_pro = df_temp.iloc[9879:,:]
df_pro.head(3)
df_pro.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 8278 entries, 9879 to 18156
Data columns (total 19 columns):
 #   Column               Non-Null Count  Dtype  
---  ------               --------------  -----  
 0   blueWins             8278 non-null   int64  
 1   blueFirstBlood       8278 non-null   float64
 2   blueKills            7728 non-null   float64
 3   blueAssists          7728 non-null   float64
 4   blueTotalExperience  7728 non-null   float64
 5   blueGoldDiff         7728 non-null   float64
 6   blueExperienceDiff   7728 non-null   float64
 7   blueCSPerMin         7728 non-null   float64
 8   blueGoldPerMin       7728 non-null   float64
 9   redKills             7728 non-null   float64
 10  redAssists           7728 non-null   float64
 11  redTotalExperience   7728 non-null   float64
 12  redCSPerMin          7728 non-null   float64
 13  redGoldPerMin        7728 non-null   float64
 14  bluePredWinrate      8278 non-null   float64
 15  redPredWinrate       8278 non-null

In [5]:
X_test = df_pro.drop("blueWins", axis=1)
y_test = df_pro["blueWins"]

X = df_soloq.drop("blueWins", axis=1)
y = df_soloq["blueWins"]

X_train, X_val, y_train, y_val = train_test_split(X, y, random_state=57)

boost_model = XGBClassifier(random_state=57, objective="reg:logistic")

param_grid = {
    'learning_rate': [0.01, 0.05, 0.1, 0.2],
    'max_depth': [2, 3, 4, 5],
    'min_child_weight': [1, 2, 3, 4, 5, 6],
    'subsample': [0.4, 0.5, 0.6, 0.7],
    'n_estimators': [30, 50, 70, 100]
}

gridsearch = GridSearchCV(boost_model, param_grid, cv=3, scoring="accuracy", n_jobs=1)
gridsearch.fit(X_train, y_train)
                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                             
best_parameters = gridsearch.best_params_

print("Best Parameters: ")
print(best_parameters)

KeyboardInterrupt: 

In [None]:
print("Training Scores")
print(ScoreModel(gridsearch, X_train, y_train))

print("Validation Scores")
print(ScoreModel(gridsearch, X_val, y_val))

In [None]:
print("Test Scores")
print(ScoreModel(gridsearch, X_test, y_test))

In [None]:
X_test2 = df_soloq.drop("blueWins", axis=1)
y_test2 = df_soloq["blueWins"]

X2 = df_pro.drop("blueWins", axis=1)
y2 = df_pro["blueWins"]

X_train2, X_val2, y_train2, y_val2 = train_test_split(X2, y2, random_state=57)

boost_model2 = XGBClassifier(random_state=57, objective="reg:logistic")

param_grid = {
    'learning_rate': [0.01, 0.05, 0.1, 0.2],
    'max_depth': [2, 3, 4, 5],
    'min_child_weight': [1, 2, 3, 4, 5, 6],
    'subsample': [0.4, 0.5, 0.6, 0.7],
    'n_estimators': [30, 50, 70, 100]
}

gridsearch2 = GridSearchCV(boost_model2, param_grid, cv=3, scoring="accuracy", n_jobs=1)
gridsearch2.fit(X_train2, y_train2)
                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                             
best_parameters = gridsearch2.best_params_

print("Best Parameters: ")
print(best_parameters)

In [None]:
print("Training Scores")
print(ScoreModel(gridsearch2, X_train2, y_train2))

print("Validation Scores")
print(ScoreModel(gridsearch2, X_val2, y_val2))

In [None]:
print("Test Scores")
print(ScoreModel(gridsearch2, X_test2, y_test2))

In [None]:
IPython.display.Audio(sound_file, autoplay=True, rate=1000)