In [1]:
import pandas as pd
import numpy as np

from xgboost import XGBClassifier
from sklearn.model_selection import train_test_split, GridSearchCV
from sklearn.preprocessing import OneHotEncoder, StandardScaler
from sklearn.pipeline import Pipeline, make_pipeline
from sklearn.svm import SVC
from sklearn.compose import ColumnTransformer
from sklearn.metrics import accuracy_score, recall_score, precision_score, f1_score, roc_auc_score, plot_confusion_matrix
from sklearn.linear_model import LogisticRegression


pd.set_option('display.max_columns', None)

In [2]:
df = pd.read_csv("../../../archive/with_rates.csv")

df.head()

Unnamed: 0,gameId,blueWins,blueWardsPlaced,blueWardsDestroyed,blueFirstBlood,blueKills,blueAssists,blueDragons,blueHeralds,blueTowersDestroyed,blueAvgLevel,blueTotalExperience,blueTotalJungleMinionsKilled,blueGoldDiff,blueExperienceDiff,blueCSPerMin,blueGoldPerMin,redWardsPlaced,redWardsDestroyed,redKills,redAssists,redDragons,redHeralds,redTowersDestroyed,redAvgLevel,redTotalExperience,redTotalJungleMinionsKilled,redCSPerMin,redGoldPerMin,champions,blueChamps,redChamps,bluePredWinrate,redPredWinrate,blueAvgPickrate,redAvgPickrate
0,4519157822,0,28,2,1,9,11,0,0,0,6.6,17039,36,643,-8,19.5,1721.0,15,6,6,8,0,0,0,6.8,17047,55,19.7,1656.7,"[111, 91, 236, 76, 54, 26, 235, 82, 421, 69]","[111, 91, 236, 76, 54]","[26, 235, 82, 421, 69]",0.489394,0.509859,0.08679,0.103614
1,4523371949,0,12,1,0,5,5,0,0,0,6.6,16265,43,-2908,-1173,17.4,1471.2,12,1,5,2,1,1,1,6.8,17438,52,24.0,1762.0,"[4, 350, 75, 145, 875, 110, 59, 53, 134, 39]","[4, 350, 75, 145, 875]","[110, 59, 53, 134, 39]",0.492787,0.492763,0.124203,0.121591
2,4521474530,0,15,0,0,7,4,1,0,0,6.4,16221,46,-1172,-1033,18.6,1611.3,15,3,11,14,0,0,0,6.8,17254,28,20.3,1728.5,"[421, 84, 236, 43, 45, 145, 154, 92, 134, 223]","[421, 84, 236, 43, 45]","[145, 154, 92, 134, 223]",0.489751,0.49858,0.086608,0.112683
3,4524384067,0,43,1,0,4,5,0,1,0,7.0,17954,55,-1321,-7,20.1,1515.7,15,2,5,10,0,0,0,7.0,17961,47,23.5,1647.8,"[64, 134, 516, 432, 429, 110, 412, 58, 245, 238]","[64, 134, 516, 432, 429]","[110, 412, 58, 245, 238]",0.501475,0.508625,0.162425,0.144974
4,4436033771,0,75,4,0,6,6,0,0,0,7.0,18543,57,-1004,230,21.0,1640.0,17,2,6,7,1,0,0,7.0,18313,67,22.5,1740.4,"[101, 266, 245, 143, 21, 51, 86, 69, 235, 32]","[101, 266, 245, 143, 21]","[51, 86, 69, 235, 32]",0.494971,0.511632,0.12228,0.077761


In [3]:
df.describe()

Unnamed: 0,gameId,blueWins,blueWardsPlaced,blueWardsDestroyed,blueFirstBlood,blueKills,blueAssists,blueDragons,blueHeralds,blueTowersDestroyed,blueAvgLevel,blueTotalExperience,blueTotalJungleMinionsKilled,blueGoldDiff,blueExperienceDiff,blueCSPerMin,blueGoldPerMin,redWardsPlaced,redWardsDestroyed,redKills,redAssists,redDragons,redHeralds,redTowersDestroyed,redAvgLevel,redTotalExperience,redTotalJungleMinionsKilled,redCSPerMin,redGoldPerMin,bluePredWinrate,redPredWinrate,blueAvgPickrate,redAvgPickrate
count,9879.0,9879.0,9879.0,9879.0,9879.0,9879.0,9879.0,9879.0,9879.0,9879.0,9879.0,9879.0,9879.0,9879.0,9879.0,9879.0,9879.0,9879.0,9879.0,9879.0,9879.0,9879.0,9879.0,9879.0,9879.0,9879.0,9879.0,9879.0,9879.0,9879.0,9879.0,9879.0,9879.0
mean,4500084000.0,0.499038,22.288288,2.824881,0.504808,6.183925,6.645106,0.36198,0.187974,0.051422,6.916004,17928.110133,50.509667,14.414111,-33.620306,21.669956,1650.345551,22.367952,2.72315,6.137666,6.662112,0.413098,0.160036,0.043021,6.925316,17961.730438,51.313088,21.734923,1648.90414,0.50008,0.49992,0.110263,0.110744
std,27573280.0,0.500024,18.019177,2.174998,0.500002,3.011028,4.06452,0.480597,0.390712,0.244369,0.305146,1200.523764,9.898282,2453.349179,1920.370438,2.185844,153.544664,18.457427,2.138356,2.933818,4.060612,0.492415,0.366658,0.2169,0.305311,1198.583912,10.027885,2.191167,149.088841,0.011808,0.011932,0.028147,0.027866
min,4295358000.0,0.0,5.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,4.6,10098.0,0.0,-10830.0,-9333.0,9.0,1073.0,6.0,0.0,0.0,0.0,0.0,0.0,0.0,4.8,10465.0,4.0,10.7,1121.2,0.442163,0.447961,0.02893,0.029963
25%,4483301000.0,0.0,14.0,1.0,0.0,4.0,4.0,0.0,0.0,0.0,6.8,17168.0,44.0,-1585.5,-1290.5,20.2,1541.55,14.0,1.0,4.0,4.0,0.0,0.0,0.0,6.8,17209.5,44.0,20.3,1542.75,0.492353,0.492279,0.089594,0.090728
50%,4510920000.0,0.0,16.0,3.0,1.0,6.0,6.0,0.0,0.0,0.0,7.0,17951.0,50.0,14.0,-28.0,21.8,1639.8,16.0,2.0,6.0,6.0,0.0,0.0,0.0,7.0,17974.0,51.0,21.8,1637.8,0.500203,0.500119,0.109667,0.110355
75%,4521733000.0,1.0,20.0,4.0,1.0,8.0,9.0,1.0,0.0,0.0,7.2,18724.0,56.0,1596.0,1212.0,23.2,1745.9,20.0,4.0,8.0,9.0,1.0,0.0,0.0,7.2,18764.5,57.0,23.3,1741.85,0.508213,0.508051,0.129709,0.129446
max,4527991000.0,1.0,250.0,27.0,1.0,22.0,29.0,1.0,1.0,4.0,8.0,22224.0,92.0,11467.0,8348.0,28.3,2370.1,276.0,24.0,22.0,28.0,1.0,1.0,2.0,8.2,22269.0,92.0,28.9,2273.2,0.542263,0.54656,0.231623,0.213686


In [4]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 9879 entries, 0 to 9878
Data columns (total 36 columns):
 #   Column                        Non-Null Count  Dtype  
---  ------                        --------------  -----  
 0   gameId                        9879 non-null   int64  
 1   blueWins                      9879 non-null   int64  
 2   blueWardsPlaced               9879 non-null   int64  
 3   blueWardsDestroyed            9879 non-null   int64  
 4   blueFirstBlood                9879 non-null   int64  
 5   blueKills                     9879 non-null   int64  
 6   blueAssists                   9879 non-null   int64  
 7   blueDragons                   9879 non-null   int64  
 8   blueHeralds                   9879 non-null   int64  
 9   blueTowersDestroyed           9879 non-null   int64  
 10  blueAvgLevel                  9879 non-null   float64
 11  blueTotalExperience           9879 non-null   int64  
 12  blueTotalJungleMinionsKilled  9879 non-null   int64  
 13  blu

In [5]:
df["blueWins"].value_counts()

0    4949
1    4930
Name: blueWins, dtype: int64

In [11]:
drop_col = ["blueWins", "gameId", "champions", "blueChamps", "redChamps"]
y = df["blueWins"]
X = df.drop(columns=drop_col, axis=1)

ss = StandardScaler()

X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=57)

X_train_ss = ss.fit_transform(X_train)
X_test_ss = ss.transform(X_test)

boost_model = XGBClassifier(random_state=57, objective="reg:logistic")

param_grid = {
    'learning_rate': [0.01, 0.05, 0.1, 0.2],
    'max_depth': [2, 3, 4, 5],
    'min_child_weight': [1, 2, 3, 4, 5, 6],
    'subsample': [0.4, 0.5, 0.6, 0.7],
    'n_estimators': [30, 50, 100]
}

gridsearch = GridSearchCV(boost_model, param_grid, cv=3, scoring="accuracy", n_jobs=1)
gridsearch.fit(X_train, y_train)
                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                             
best_parameters = gridsearch.best_params_

print("Best Parameters: ")
print(best_parameters)

Best Parameters: 
{'learning_rate': 0.05, 'max_depth': 3, 'min_child_weight': 5, 'n_estimators': 100, 'subsample': 0.6}


In [12]:
train_pred = gridsearch.predict(X_train)
test_pred = gridsearch.predict(X_test)

train_acc = accuracy_score(y_train, train_pred)
train_f1 = f1_score(y_train, train_pred)

print("Training Scores")
print("Accuracy: ", train_acc)
print("F1: ", train_f1)

test_acc = accuracy_score(y_test, test_pred)
test_f1 = f1_score(y_test, test_pred)

print("Test Scores")
print("Accuracy: ", test_acc)
print("F1: ", test_f1)

Training Scores
Accuracy:  0.7534080172762856
F1:  0.7494858083093376
Test Scores
Accuracy:  0.7388663967611336
F1:  0.7366271947733769


In [18]:
logreg = LogisticRegression().fit(X_train, y_train)

logreg_train = logreg.predict(X_train)
logreg_test = logreg.predict(X_test)

acc = accuracy_score(y_train, logreg_train)
f1 = f1_score(y_train, logreg_train)

print("Training Scores")
print("Accuracy: ", acc)
print("F1: ", f1)

acc = accuracy_score(y_test, logreg_test)
f1 = f1_score(y_test, logreg_test)

print("Testing Scores")
print("Accuracy: ", acc)
print("F1: ", f1)

Training Scores
Accuracy:  0.730597921446889
F1:  0.7281394715336421
Testing Scores
Accuracy:  0.7230769230769231
F1:  0.7237479806138933


In [19]:
# learning_rate': 0.05, 'max_depth': 3, 'min_child_weight': 5, 'n_estimators': 100, 'subsample': 0.6

boost_model2 = XGBClassifier(random_state=57, objective="reg:logistic",
                            learning_rate= 0.05, max_depth = 3, min_child_weight = 5,
                            n_estimators = 100, subsample = 0.6)

boost_model2.fit(X_train, y_train)

XGBClassifier(base_score=0.5, booster='gbtree', colsample_bylevel=1,
              colsample_bynode=1, colsample_bytree=1, gamma=0, gpu_id=-1,
              importance_type='gain', interaction_constraints='',
              learning_rate=0.05, max_delta_step=0, max_depth=3,
              min_child_weight=5, missing=nan, monotone_constraints='()',
              n_estimators=100, n_jobs=0, num_parallel_tree=1,
              objective='reg:logistic', random_state=57, reg_alpha=0,
              reg_lambda=1, scale_pos_weight=1, subsample=0.6,
              tree_method='exact', validate_parameters=1, verbosity=None)

In [21]:
boost_model2.feature_importances_

array([0.01580316, 0.01697099, 0.01612252, 0.03066567, 0.01527598,
       0.03873337, 0.02960031, 0.        , 0.03121008, 0.02297754,
       0.02422382, 0.2648157 , 0.11158644, 0.01387334, 0.02722628,
       0.01310936, 0.01534835, 0.01443308, 0.01514474, 0.03649735,
       0.0284653 , 0.        , 0.02035023, 0.02412195, 0.01736547,
       0.0151382 , 0.02873145, 0.03485028, 0.03286865, 0.02136004,
       0.0231303 ], dtype=float32)

In [22]:
X_train.columns

Index(['blueWardsPlaced', 'blueWardsDestroyed', 'blueFirstBlood', 'blueKills',
       'blueAssists', 'blueDragons', 'blueHeralds', 'blueTowersDestroyed',
       'blueAvgLevel', 'blueTotalExperience', 'blueTotalJungleMinionsKilled',
       'blueGoldDiff', 'blueExperienceDiff', 'blueCSPerMin', 'blueGoldPerMin',
       'redWardsPlaced', 'redWardsDestroyed', 'redKills', 'redAssists',
       'redDragons', 'redHeralds', 'redTowersDestroyed', 'redAvgLevel',
       'redTotalExperience', 'redTotalJungleMinionsKilled', 'redCSPerMin',
       'redGoldPerMin', 'bluePredWinrate', 'redPredWinrate', 'blueAvgPickrate',
       'redAvgPickrate'],
      dtype='object')

In [30]:
features = list(zip(X_train.columns, 100*(np.round(boost_model2.feature_importances_, 4))))
features

[('blueWardsPlaced', 1.5799999),
 ('blueWardsDestroyed', 1.7),
 ('blueFirstBlood', 1.61),
 ('blueKills', 3.07),
 ('blueAssists', 1.53),
 ('blueDragons', 3.87),
 ('blueHeralds', 2.96),
 ('blueTowersDestroyed', 0.0),
 ('blueAvgLevel', 3.12),
 ('blueTotalExperience', 2.3),
 ('blueTotalJungleMinionsKilled', 2.42),
 ('blueGoldDiff', 26.480001),
 ('blueExperienceDiff', 11.16),
 ('blueCSPerMin', 1.39),
 ('blueGoldPerMin', 2.72),
 ('redWardsPlaced', 1.3100001),
 ('redWardsDestroyed', 1.53),
 ('redKills', 1.4399999),
 ('redAssists', 1.51),
 ('redDragons', 3.6499999),
 ('redHeralds', 2.85),
 ('redTowersDestroyed', 0.0),
 ('redAvgLevel', 2.04),
 ('redTotalExperience', 2.41),
 ('redTotalJungleMinionsKilled', 1.74),
 ('redCSPerMin', 1.51),
 ('redGoldPerMin', 2.87),
 ('bluePredWinrate', 3.4899998),
 ('redPredWinrate', 3.2900002),
 ('blueAvgPickrate', 2.14),
 ('redAvgPickrate', 2.31)]

In [31]:
nonimportant = ["blueTowersDestroyed", "redTowersDestroyed", "blueWardsPlaced", "blueWardsDestroyed",
               "blueKills", "blueAssists", "blueCSPerMin", "redWardsPlaced", "redWardsDestroyed",
               "redKills", "redAssists", "redCSPerMin"]

X_non = X.drop(columns=nonimportant, axis=1)

X_train2, X_test2, y_train2, y_test = train_test_split(X_non, y, random_state=57)

boost_model_non = XGBClassifier(random_state=57, objective="reg:logistic")

param_grid = {
    'learning_rate': [0.01, 0.05, 0.1, 0.2],
    'max_depth': [2, 3, 4, 5],
    'min_child_weight': [1, 2, 3, 4, 5, 6],
    'subsample': [0.4, 0.5, 0.6, 0.7],
    'n_estimators': [30, 50, 100]
}

gridsearch_non = GridSearchCV(boost_model, param_grid, cv=3, scoring="accuracy", n_jobs=1)
gridsearch_non.fit(X_train2, y_train2)
                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                             
best_parameters = gridsearch.best_params_

print("Best Parameters: ")
print(best_parameters)

Best Parameters: 
{'learning_rate': 0.05, 'max_depth': 3, 'min_child_weight': 5, 'n_estimators': 100, 'subsample': 0.6}


In [34]:
train_pred = gridsearch_non.predict(X_train2)
test_pred = gridsearch_non.predict(X_test2)

train_acc = accuracy_score(y_train2, train_pred)
train_f1 = f1_score(y_train2, train_pred)

print("Training Scores")
print("Accuracy: ", train_acc)
print("F1: ", train_f1)

test_acc = accuracy_score(y_test, test_pred)
test_f1 = f1_score(y_test, test_pred)

print("Test Scores")
print("Accuracy: ", test_acc)
print("F1: ", test_f1)

Training Scores
Accuracy:  0.7443649615332704
F1:  0.7395489548954896
Test Scores
Accuracy:  0.7412955465587044
F1:  0.73992673992674


In [36]:
boost_model_non = XGBClassifier(random_state=57, objective="reg:logistic",
                            learning_rate= 0.05, max_depth = 3, min_child_weight = 5,
                            n_estimators = 100, subsample = 0.6)
boost_model_non.fit(X_train2, y_train2)

features = list(zip(X_train2.columns, 100*(np.round(boost_model_non.feature_importances_, 4))))
features

[('blueFirstBlood', 1.25),
 ('blueDragons', 4.9),
 ('blueHeralds', 2.96),
 ('blueAvgLevel', 4.19),
 ('blueTotalExperience', 2.6499999),
 ('blueTotalJungleMinionsKilled', 2.8799999),
 ('blueGoldDiff', 32.41),
 ('blueExperienceDiff', 13.500001),
 ('blueGoldPerMin', 3.05),
 ('redDragons', 4.5),
 ('redHeralds', 3.6200001),
 ('redAvgLevel', 1.73),
 ('redTotalExperience', 2.76),
 ('redTotalJungleMinionsKilled', 1.9),
 ('redGoldPerMin', 3.81),
 ('bluePredWinrate', 4.4700003),
 ('redPredWinrate', 3.99),
 ('blueAvgPickrate', 2.6399999),
 ('redAvgPickrate', 2.78)]

In [38]:
new_df = pd.read_csv("../../../archive/with_rates_and_spec_gods.csv")
new_df.head()

Unnamed: 0,gameId,blueWins,blueWardsPlaced,blueWardsDestroyed,blueFirstBlood,blueKills,blueAssists,blueDragons,blueHeralds,blueTowersDestroyed,blueAvgLevel,blueTotalExperience,blueTotalJungleMinionsKilled,blueGoldDiff,blueExperienceDiff,blueCSPerMin,blueGoldPerMin,redWardsPlaced,redWardsDestroyed,redKills,redAssists,redDragons,redHeralds,redTowersDestroyed,redAvgLevel,redTotalExperience,redTotalJungleMinionsKilled,redCSPerMin,redGoldPerMin,blueChamps,redChamps,bluePredWinrate,redPredWinrate,blueAvgPickrate,redAvgPickrate,blueHas62,redHas62,blueHas111,redHas111,blueHas145,redHas145,blueHas39,redHas39,blueHas517,redHas517
0,4519157822,0,28,2,1,9,11,0,0,0,6.6,17039,36,643,-8,19.5,1721.0,15,6,6,8,0,0,0,6.8,17047,55,19.7,1656.7,"[111, 91, 236, 76, 54]","[26, 235, 82, 421, 69]",0.489394,0.509859,0.08679,0.103614,False,False,True,False,False,False,False,False,False,False
1,4523371949,0,12,1,0,5,5,0,0,0,6.6,16265,43,-2908,-1173,17.4,1471.2,12,1,5,2,1,1,1,6.8,17438,52,24.0,1762.0,"[4, 350, 75, 145, 875]","[110, 59, 53, 134, 39]",0.492787,0.492763,0.124203,0.121591,False,False,False,False,True,False,False,True,False,False
2,4521474530,0,15,0,0,7,4,1,0,0,6.4,16221,46,-1172,-1033,18.6,1611.3,15,3,11,14,0,0,0,6.8,17254,28,20.3,1728.5,"[421, 84, 236, 43, 45]","[145, 154, 92, 134, 223]",0.489751,0.49858,0.086608,0.112683,False,False,False,False,False,True,False,False,False,False
3,4524384067,0,43,1,0,4,5,0,1,0,7.0,17954,55,-1321,-7,20.1,1515.7,15,2,5,10,0,0,0,7.0,17961,47,23.5,1647.8,"[64, 134, 516, 432, 429]","[110, 412, 58, 245, 238]",0.501475,0.508625,0.162425,0.144974,False,False,False,False,False,False,False,False,False,False
4,4436033771,0,75,4,0,6,6,0,0,0,7.0,18543,57,-1004,230,21.0,1640.0,17,2,6,7,1,0,0,7.0,18313,67,22.5,1740.4,"[101, 266, 245, 143, 21]","[51, 86, 69, 235, 32]",0.494971,0.511632,0.12228,0.077761,False,False,False,False,False,False,False,False,False,False


In [43]:
drop_col = ["blueWins", "gameId", "blueChamps", "redChamps"]
X_new = new_df.drop(drop_col, axis=1)
X_new = X_new.drop(nonimportant, axis=1)
y_new = new_df["blueWins"]

X_train_new, X_test_new, y_train_new, y_test_new = train_test_split(X_new, y_new, random_state=57)

boost_model_new = XGBClassifier(random_state=57, objective="reg:logistic",
                            learning_rate= 0.05, max_depth = 3, min_child_weight = 5,
                            n_estimators = 100, subsample = 0.6)
boost_model_new.fit(X_train_new, y_train_new)



train_pred = boost_model_new.predict(X_train_new)
test_pred = boost_model_new.predict(X_test_new)

train_acc = accuracy_score(y_train_new, train_pred)
train_f1 = f1_score(y_train_new, train_pred)

print("Training Scores")
print("Accuracy: ", train_acc)
print("F1: ", train_f1)

test_acc = accuracy_score(y_test_new, test_pred)
test_f1 = f1_score(y_test_new, test_pred)

print("Test Scores")
print("Accuracy: ", test_acc)
print("F1: ", test_f1)

Training Scores
Accuracy:  0.755027669051154
F1:  0.7504468582428158
Test Scores
Accuracy:  0.7404858299595142
F1:  0.7378323108384457


In [44]:
features = list(zip(X_train_new.columns, 100*(np.round(boost_model_new.feature_importances_, 4))))
features

[('blueFirstBlood', 1.3199999),
 ('blueDragons', 3.74),
 ('blueHeralds', 1.8),
 ('blueAvgLevel', 2.83),
 ('blueTotalExperience', 2.3),
 ('blueTotalJungleMinionsKilled', 1.97),
 ('blueGoldDiff', 25.78),
 ('blueExperienceDiff', 10.31),
 ('blueGoldPerMin', 2.33),
 ('redDragons', 4.1499996),
 ('redHeralds', 3.52),
 ('redAvgLevel', 1.99),
 ('redTotalExperience', 2.34),
 ('redTotalJungleMinionsKilled', 1.23),
 ('redGoldPerMin', 2.8200002),
 ('bluePredWinrate', 3.39),
 ('redPredWinrate', 3.04),
 ('blueAvgPickrate', 2.04),
 ('redAvgPickrate', 2.19),
 ('blueHas62', 0.0),
 ('redHas62', 2.53),
 ('blueHas111', 2.38),
 ('redHas111', 2.17),
 ('blueHas145', 2.84),
 ('redHas145', 2.09),
 ('blueHas39', 2.87),
 ('redHas39', 3.34),
 ('blueHas517', 1.26),
 ('redHas517', 1.4499999)]