In [1]:
import pandas as pd
import numpy as np

from xgboost import XGBClassifier
from sklearn.model_selection import train_test_split, GridSearchCV
from sklearn.preprocessing import OneHotEncoder, StandardScaler
from sklearn.pipeline import Pipeline, make_pipeline
from sklearn.svm import SVC
from sklearn.compose import ColumnTransformer
from sklearn.metrics import accuracy_score, recall_score, precision_score, f1_score, roc_auc_score, plot_confusion_matrix
from sklearn.linear_model import LogisticRegression


pd.set_option('display.max_columns', None)

In [2]:
df = pd.read_csv("../../../archive/blazed_diamond_ranked_10min.csv")

df.head()

Unnamed: 0,gameId,blueWins,blueWardsDestroyed,blueFirstBlood,blueKills,blueAssists,blueDragons,blueHeralds,blueTowersDestroyed,blueAvgLevel,blueTotalExperience,blueTotalJungleMinionsKilled,blueGoldDiff,blueExperienceDiff,blueCSPerMin,blueGoldPerMin,redWardsDestroyed,redKills,redAssists,redDragons,redHeralds,redTowersDestroyed,redAvgLevel,redTotalExperience,redTotalJungleMinionsKilled,redCSPerMin,redGoldPerMin,blueVisionScore,redVisionScore
0,4519157822,0,2,1,9,11,0,0,0,6.6,17039,36,643,-8,19.5,1721.0,6,6,8,0,0,0,6.8,17047,55,19.7,1656.7,28,15
1,4523371949,0,1,0,5,5,0,0,0,6.6,16265,43,-2908,-1173,17.4,1471.2,1,5,2,1,1,1,6.8,17438,52,24.0,1762.0,12,12
2,4521474530,0,0,0,7,4,1,0,0,6.4,16221,46,-1172,-1033,18.6,1611.3,3,11,14,0,0,0,6.8,17254,28,20.3,1728.5,15,15
3,4524384067,0,1,0,4,5,0,1,0,7.0,17954,55,-1321,-7,20.1,1515.7,2,5,10,0,0,0,7.0,17961,47,23.5,1647.8,43,15
4,4436033771,0,4,0,6,6,0,0,0,7.0,18543,57,-1004,230,21.0,1640.0,2,6,7,1,0,0,7.0,18313,67,22.5,1740.4,75,17


In [3]:
df.describe()

Unnamed: 0,gameId,blueWins,blueWardsDestroyed,blueFirstBlood,blueKills,blueAssists,blueDragons,blueHeralds,blueTowersDestroyed,blueAvgLevel,blueTotalExperience,blueTotalJungleMinionsKilled,blueGoldDiff,blueExperienceDiff,blueCSPerMin,blueGoldPerMin,redWardsDestroyed,redKills,redAssists,redDragons,redHeralds,redTowersDestroyed,redAvgLevel,redTotalExperience,redTotalJungleMinionsKilled,redCSPerMin,redGoldPerMin,blueVisionScore,redVisionScore
count,9879.0,9879.0,9879.0,9879.0,9879.0,9879.0,9879.0,9879.0,9879.0,9879.0,9879.0,9879.0,9879.0,9879.0,9879.0,9879.0,9879.0,9879.0,9879.0,9879.0,9879.0,9879.0,9879.0,9879.0,9879.0,9879.0,9879.0,9879.0,9879.0
mean,4500084000.0,0.499038,2.824881,0.504808,6.183925,6.645106,0.36198,0.187974,0.051422,6.916004,17928.110133,50.509667,14.414111,-33.620306,21.669956,1650.345551,2.72315,6.137666,6.662112,0.413098,0.160036,0.043021,6.925316,17961.730438,51.313088,21.734923,1648.90414,22.288288,22.367952
std,27573280.0,0.500024,2.174998,0.500002,3.011028,4.06452,0.480597,0.390712,0.244369,0.305146,1200.523764,9.898282,2453.349179,1920.370438,2.185844,153.544664,2.138356,2.933818,4.060612,0.492415,0.366658,0.2169,0.305311,1198.583912,10.027885,2.191167,149.088841,18.019177,18.457427
min,4295358000.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,4.6,10098.0,0.0,-10830.0,-9333.0,9.0,1073.0,0.0,0.0,0.0,0.0,0.0,0.0,4.8,10465.0,4.0,10.7,1121.2,5.0,6.0
25%,4483301000.0,0.0,1.0,0.0,4.0,4.0,0.0,0.0,0.0,6.8,17168.0,44.0,-1585.5,-1290.5,20.2,1541.55,1.0,4.0,4.0,0.0,0.0,0.0,6.8,17209.5,44.0,20.3,1542.75,14.0,14.0
50%,4510920000.0,0.0,3.0,1.0,6.0,6.0,0.0,0.0,0.0,7.0,17951.0,50.0,14.0,-28.0,21.8,1639.8,2.0,6.0,6.0,0.0,0.0,0.0,7.0,17974.0,51.0,21.8,1637.8,16.0,16.0
75%,4521733000.0,1.0,4.0,1.0,8.0,9.0,1.0,0.0,0.0,7.2,18724.0,56.0,1596.0,1212.0,23.2,1745.9,4.0,8.0,9.0,1.0,0.0,0.0,7.2,18764.5,57.0,23.3,1741.85,20.0,20.0
max,4527991000.0,1.0,27.0,1.0,22.0,29.0,1.0,1.0,4.0,8.0,22224.0,92.0,11467.0,8348.0,28.3,2370.1,24.0,22.0,28.0,1.0,1.0,2.0,8.2,22269.0,92.0,28.9,2273.2,250.0,276.0


In [4]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 9879 entries, 0 to 9878
Data columns (total 29 columns):
 #   Column                        Non-Null Count  Dtype  
---  ------                        --------------  -----  
 0   gameId                        9879 non-null   int64  
 1   blueWins                      9879 non-null   int64  
 2   blueWardsDestroyed            9879 non-null   int64  
 3   blueFirstBlood                9879 non-null   int64  
 4   blueKills                     9879 non-null   int64  
 5   blueAssists                   9879 non-null   int64  
 6   blueDragons                   9879 non-null   int64  
 7   blueHeralds                   9879 non-null   int64  
 8   blueTowersDestroyed           9879 non-null   int64  
 9   blueAvgLevel                  9879 non-null   float64
 10  blueTotalExperience           9879 non-null   int64  
 11  blueTotalJungleMinionsKilled  9879 non-null   int64  
 12  blueGoldDiff                  9879 non-null   int64  
 13  blu

In [5]:
df["blueWins"].value_counts()

0    4949
1    4930
Name: blueWins, dtype: int64

In [None]:
drop_col = ["blueWins", "gameId"]
y = df["blueWins"]
X = df.drop(columns=drop_col, axis=1)

ss = StandardScaler()

X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=57)

X_train_ss = ss.fit_transform(X_train)
X_test_ss = ss.transform(X_test)

boost_model = XGBClassifier(random_state=57, objective="reg:logistic")

param_grid = {
    'learning_rate': [0.01, 0.05, 0.1, 0.2],
    'max_depth': [2, 3, 4, 5],
    'min_child_weight': [1, 2, 3, 4, 5, 6],
    'subsample': [0.4, 0.5, 0.6, 0.7],
    'n_estimators': [30, 50, 100]
}

gridsearch = GridSearchCV(boost_model, param_grid, cv=3, scoring="accuracy", n_jobs=1)
gridsearch.fit(X_train_ss, y_train)

best_parameters = gridsearch.best_params_

print("Best Parameters: ")
print(best_parameters)

In [None]:
train_pred = gridsearch.predict(X_train_ss)
test_pred = gridsearch.predict(X_test_ss)

train_acc = accuracy_score(y_train, train_pred)
train_f1 = f1_score(y_train, train_pred)

print("Training Scores")
print("Accuracy: ", train_acc)
print("F1: ", train_f1)

test_acc = accuracy_score(y_test, test_pred)
test_f1 = f1_score(y_test, test_pred)

print("Test Scores")
print("Accuracy: ", test_acc)
print("F1: ", test_f1)

In [None]:
logreg = LogisticRegression(max_iter=5000).fit(X_train, y_train)

logreg_train = logreg.predict(X_train)
logreg_test = logreg.predict(X_test)

acc = accuracy_score(y_train, logreg_train)
f1 = f1_score(y_train, logreg_train)

print("Training Scores")
print("Accuracy: ", acc)
print("F1: ", f1)

acc = accuracy_score(y_test, logreg_test)
f1 = f1_score(y_test, logreg_test)

print("Testing Scores")
print("Accuracy: ", acc)
print("F1: ", f1)