In [1]:
import pandas as pd
import numpy as np

from xgboost import XGBClassifier
from sklearn.model_selection import train_test_split, GridSearchCV
from sklearn.preprocessing import OneHotEncoder, StandardScaler
from sklearn.pipeline import Pipeline, make_pipeline
from sklearn.compose import ColumnTransformer
from sklearn.metrics import accuracy_score, recall_score, precision_score, f1_score, roc_auc_score, plot_confusion_matrix
from sklearn.linear_model import LogisticRegression
from sklearn.ensemble import RandomForestClassifier
from sklearn.tree import DecisionTreeClassifier

pd.set_option('display.max_columns', None)

In [7]:
df = pd.read_csv("../../../archive/2021_LoL_esports_match_data_from_YaBoi_20210713.csv")
df.head()

Unnamed: 0,gameId,blueChampions,blueWins,blueFirstBlood,blueFirstBloodAssist,blueTotalGold,blueTotalExperience,blueTotalCS,blueGoldDiff,blueExperienceDiff,blueCsdiffat10,blueKills,blueAssists,blueDeaths,redChampions,redFirstBloodAssist,redTotalGold,redTotalExperience,redTotalCS,redAssists
0,ESPORTSTMNT03/1632489,"[('Mordekaiser', 'top'), ('Graves', 'jng'), ('...",1,1.0,0.0,16291.0,20459.0,382.0,1793.0,2365.0,65.0,1.0,0.0,0.0,"[('Ornn', 'top'), ('Kindred', 'jng'), ('Oriann...",0.0,14498.0,18094.0,317.0,0.0
1,ESPORTSTMNT03/1632500,"[('Gragas', 'top'), ('Graves', 'jng'), ('Rumbl...",0,1.0,0.0,15623.0,19210.0,340.0,759.0,171.0,-8.0,1.0,3.0,0.0,"[('Ornn', 'top'), ('Olaf', 'jng'), ('Syndra', ...",0.0,14864.0,19039.0,348.0,0.0
2,ESPORTSTMNT03/1632502,"[('Gragas', 'top'), ('Graves', 'jng'), ('Zoe',...",0,1.0,0.0,15043.0,16987.0,275.0,73.0,-1.0,-24.0,3.0,3.0,2.0,"[('Rumble', 'top'), ('Nidalee', 'jng'), ('Yone...",0.0,14970.0,16988.0,299.0,4.0
3,6909-9183,"[('Karma', 'top'), ('Pantheon', 'jng'), ('Synd...",0,1.0,0.0,16177.0,19640.0,380.0,732.0,75.0,20.0,1.0,1.0,0.0,"[('Aatrox', 'top'), ('Nidalee', 'jng'), ('Vikt...",0.0,15445.0,19565.0,360.0,0.0
4,6909-9184,"[('Jax', 'top'), ('Hecarim', 'jng'), ('Orianna...",0,0.0,0.0,16752.0,20020.0,361.0,1502.0,1164.0,40.0,2.0,2.0,1.0,"[('Camille', 'top'), ('Lillia', 'jng'), ('Vikt...",0.0,15250.0,18856.0,321.0,1.0


In [8]:
[('blueFirstBlood', 1.25),
 ('blueDragons', 4.9),
 ('blueHeralds', 2.96),
 ('blueAvgLevel', 4.19),
 ('blueTotalExperience', 2.6499999),
 ('blueTotalJungleMinionsKilled', 2.8799999),
 ('blueGoldDiff', 32.41),
 ('blueExperienceDiff', 13.500001),
 ('blueGoldPerMin', 3.05),
 ('redDragons', 4.5),
 ('redHeralds', 3.6200001),
 ('redAvgLevel', 1.73),
 ('redTotalExperience', 2.76),
 ('redTotalJungleMinionsKilled', 1.9),
 ('redGoldPerMin', 3.81),
 ('bluePredWinrate', 4.4700003),
 ('redPredWinrate', 3.99),
 ('blueAvgPickrate', 2.6399999),
 ('redAvgPickrate', 2.78)]

[('blueFirstBlood', 1.25),
 ('blueDragons', 4.9),
 ('blueHeralds', 2.96),
 ('blueAvgLevel', 4.19),
 ('blueTotalExperience', 2.6499999),
 ('blueTotalJungleMinionsKilled', 2.8799999),
 ('blueGoldDiff', 32.41),
 ('blueExperienceDiff', 13.500001),
 ('blueGoldPerMin', 3.05),
 ('redDragons', 4.5),
 ('redHeralds', 3.6200001),
 ('redAvgLevel', 1.73),
 ('redTotalExperience', 2.76),
 ('redTotalJungleMinionsKilled', 1.9),
 ('redGoldPerMin', 3.81),
 ('bluePredWinrate', 4.4700003),
 ('redPredWinrate', 3.99),
 ('blueAvgPickrate', 2.6399999),
 ('redAvgPickrate', 2.78)]

In [9]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 8278 entries, 0 to 8277
Data columns (total 20 columns):
 #   Column                Non-Null Count  Dtype  
---  ------                --------------  -----  
 0   gameId                8274 non-null   object 
 1   blueChampions         8278 non-null   object 
 2   blueWins              8278 non-null   int64  
 3   blueFirstBlood        8278 non-null   float64
 4   blueFirstBloodAssist  8278 non-null   float64
 5   blueTotalGold         7728 non-null   float64
 6   blueTotalExperience   7728 non-null   float64
 7   blueTotalCS           7728 non-null   float64
 8   blueGoldDiff          7728 non-null   float64
 9   blueExperienceDiff    7728 non-null   float64
 10  blueCsdiffat10        7728 non-null   float64
 11  blueKills             7728 non-null   float64
 12  blueAssists           7728 non-null   float64
 13  blueDeaths            7728 non-null   float64
 14  redChampions          8278 non-null   object 
 15  redFirstBloodAssist  

In [12]:
col = ["gameId", "blueWins", "blueChampions", "redChampions"]
X = df.drop(columns = col, axis = 1)
y = df["blueWins"]

X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=57)

boost_model = XGBClassifier(random_state=57, objective="reg:logistic",
                            learning_rate= 0.05, max_depth = 3, min_child_weight = 5,
                            n_estimators = 100, subsample = 0.6)
boost_model.fit(X_train, y_train)

XGBClassifier(base_score=0.5, booster='gbtree', colsample_bylevel=1,
              colsample_bynode=1, colsample_bytree=1, gamma=0, gpu_id=-1,
              importance_type='gain', interaction_constraints='',
              learning_rate=0.05, max_delta_step=0, max_depth=3,
              min_child_weight=5, missing=nan, monotone_constraints='()',
              n_estimators=100, n_jobs=0, num_parallel_tree=1,
              objective='reg:logistic', random_state=57, reg_alpha=0,
              reg_lambda=1, scale_pos_weight=1, subsample=0.6,
              tree_method='exact', validate_parameters=1, verbosity=None)

In [14]:
train_pred = boost_model.predict(X_train)
test_pred = boost_model.predict(X_test)

train_acc = accuracy_score(y_train, train_pred)

print("Training Scores")
print("Accuracy: ", train_acc)

test_acc = accuracy_score(y_test, test_pred)

print("Test Scores")
print("Accuracy: ", test_acc)

Training Scores
Accuracy:  0.7222938144329897
Test Scores
Accuracy:  0.6956521739130435


In [15]:
features = list(zip(X_train.columns, 100*(np.round(boost_model.feature_importances_, 4))))
features

[('blueFirstBlood', 6.45),
 ('blueFirstBloodAssist', 0.0),
 ('blueTotalGold', 3.6),
 ('blueTotalExperience', 3.7500002),
 ('blueTotalCS', 4.2),
 ('blueGoldDiff', 40.28),
 ('blueExperienceDiff', 8.91),
 ('blueCsdiffat10', 6.79),
 ('blueKills', 3.5300002),
 ('blueAssists', 3.4),
 ('blueDeaths', 2.96),
 ('redFirstBloodAssist', 0.0),
 ('redTotalGold', 4.7),
 ('redTotalExperience', 3.72),
 ('redTotalCS', 3.44),
 ('redAssists', 4.27)]

In [18]:
boost_model2 = XGBClassifier(random_state=57, objective="reg:logistic")

param_grid = {
    'learning_rate': [0.01, 0.05, 0.1, 0.2],
    'max_depth': [2, 3, 4, 5],
    'min_child_weight': [1, 2, 3, 4, 5, 6],
    'subsample': [0.4, 0.5, 0.6, 0.7],
    'n_estimators': [30, 50, 100]
}

gridsearch = GridSearchCV(boost_model, param_grid, cv=3, scoring="accuracy", n_jobs=1)
gridsearch.fit(X_train, y_train)
gridsearch.best_params_

KeyboardInterrupt: 

In [17]:
train_pred = gridsearch.predict(X_train)
test_pred = gridsearch.predict(X_test)

train_acc = accuracy_score(y_train, train_pred)

print("Training Scores")
print("Accuracy: ", train_acc)

test_acc = accuracy_score(y_test, test_pred)

print("Test Scores")
print("Accuracy: ", test_acc)

Training Scores
Accuracy:  0.7200386597938144
Test Scores
Accuracy:  0.6980676328502415
