In [45]:
# Pandas, numpy, matplotlib and seaborn
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import joblib as joblib

# RiotWatcher
from riotwatcher import LolWatcher, ApiError

# OS tools
import os
import shutil
import json
import sys
import dill
import ipython_genutils

# Custom scripts
from extract_players_performance import extract_players_performance
from remove_perks import remove_perks
from cleaner import replace_champ_names_with_tags
from sklearn.preprocessing import OneHotEncoder, StandardScaler
from sklearn.compose import ColumnTransformer
from sklearn.model_selection import train_test_split, StratifiedShuffleSplit, cross_val_score, GridSearchCV
from sklearn.metrics import classification_report, confusion_matrix
from sklearn.svm import SVC
from sklearn.ensemble import RandomForestClassifier
from sklearn.tree import DecisionTreeClassifier
from sklearn.naive_bayes import GaussianNB
from sklearn.pipeline import Pipeline
from joblib import dump, load

In [46]:
champ_data = {}
with open('../champion_data/champion.json','r',encoding='utf-8') as f :
    champions = json.load(f)
    for champion in champions['data'] :
        champ_data[str.lower(champion)] = champions['data'][champion]
    



def add(old,new_df) :
    appended_df = pd.concat([old,new_df],ignore_index=True)
    return appended_df

def champ_name_replacer(champ_name) :
    return champ_data[str.lower(champ_name)]['tags'][0]

In [47]:
df = pd.read_csv('../ml_data/full_ml_data.csv')
df = df[:1000]
columns = list(df.columns)
champ_name_columns = [c for c in columns if c.startswith('team_comp') or c.startswith('dmg_carry') or c.startswith('obj_carry')]


for c in champ_name_columns :
    df[c] = df[c].apply(lambda x : champ_name_replacer(x))

# Drop all team composition related rows
df = df.drop(['dmg_carry_0', 'obj_carry_0', 'team_comp_0_champ_1', 'team_comp_0_champ_2', 'team_comp_0_champ_3', 'team_comp_0_champ_4', 'team_comp_0_champ_5', 'dmg_carry_1', 'obj_carry_1', 'team_comp_1_champ_1', 'team_comp_1_champ_2', 'team_comp_1_champ_3', 'team_comp_1_champ_4', 'team_comp_1_champ_5'], axis=1)

print(f"dataframe shape:\n{df.shape}\n")
print(f"dataframe sample:\n{df.head(1)}\n")

dataframe shape:
(1000, 37)

dataframe sample:
   total_gold_earned_0  total_gold_spent_0  total_baron_kills_0  \
0               102871              112435                    3   

   total_dragon_kills_0  total_inhibitor_kils_0  total_kills_0  \
0                     3                       4             50   

   total_deaths_0  total_damage_dealt_to_champions_0  \
0              67                             251493   

   total_damage_dealt_to_objectives_0  total_damage_taken_0  ...  \
0                              114384                277343  ...   

   average_champion_experience_1  gameLengthMin  dmg_to_champs_winner  \
0                          26031              0                     0   

   dmg_to_obj_winner  vision_winner  cs_winner  champ_experience_winner  \
0                  0              0          1                        1   

   wards_placed_winner  gold_spender_winner  final_match_winner  
0                    1                    0                   1  

[1 r

In [48]:
data = df.drop("final_match_winner",axis=1)
labels = df["final_match_winner"].copy()

X_train,X_test,y_train,y_test= train_test_split(data,labels,test_size=0.33,random_state=42)

print('Shape of training data and labels :',X_train.shape,y_train.shape)
print('Shape of test data and labels :',X_test.shape,y_test.shape)

Shape of training data and labels : (670, 36) (670,)
Shape of test data and labels : (330, 36) (330,)


In [49]:
missing_value_row =  data[data.isnull().any(axis=1)].head()
print(f'Rows with missing values:\n {missing_value_row}')
print(f'Missing value row shape: {missing_value_row.shape}')

Rows with missing values:
 Empty DataFrame
Columns: [total_gold_earned_0, total_gold_spent_0, total_baron_kills_0, total_dragon_kills_0, total_inhibitor_kils_0, total_kills_0, total_deaths_0, total_damage_dealt_to_champions_0, total_damage_dealt_to_objectives_0, total_damage_taken_0, average_vision_score_0, total_wards_placed_0, average_creep_score_0, average_champion_experience_0, total_gold_earned_1, total_gold_spent_1, total_baron_kills_1, total_dragon_kills_1, total_inhibitor_kils_1, total_kills_1, total_deaths_1, total_damage_dealt_to_champions_1, total_damage_dealt_to_objectives_1, total_damage_taken_1, average_vision_score_1, total_wards_placed_1, average_creep_score_1, average_champion_experience_1, gameLengthMin, dmg_to_champs_winner, dmg_to_obj_winner, vision_winner, cs_winner, champ_experience_winner, wards_placed_winner, gold_spender_winner]
Index: []

[0 rows x 36 columns]
Missing value row shape: (0, 36)


In [50]:
print('Training data shape (with dummy) :',X_train.shape)
print('Columns of training data :',X_train.columns)

std = StandardScaler()
X_train_prepared = std.fit_transform(X_train)
size = len(X_train_prepared)
X_train_prepared = X_train_prepared[:size]
print('Training data shape (without dummy) :',X_train_prepared.shape)

Training data shape (with dummy) : (670, 36)
Columns of training data : Index(['total_gold_earned_0', 'total_gold_spent_0', 'total_baron_kills_0',
       'total_dragon_kills_0', 'total_inhibitor_kils_0', 'total_kills_0',
       'total_deaths_0', 'total_damage_dealt_to_champions_0',
       'total_damage_dealt_to_objectives_0', 'total_damage_taken_0',
       'average_vision_score_0', 'total_wards_placed_0',
       'average_creep_score_0', 'average_champion_experience_0',
       'total_gold_earned_1', 'total_gold_spent_1', 'total_baron_kills_1',
       'total_dragon_kills_1', 'total_inhibitor_kils_1', 'total_kills_1',
       'total_deaths_1', 'total_damage_dealt_to_champions_1',
       'total_damage_dealt_to_objectives_1', 'total_damage_taken_1',
       'average_vision_score_1', 'total_wards_placed_1',
       'average_creep_score_1', 'average_champion_experience_1',
       'gameLengthMin', 'dmg_to_champs_winner', 'dmg_to_obj_winner',
       'vision_winner', 'cs_winner', 'champ_experience_

In [51]:
# create the following four different models with their default hyperparameter values to be trained using the preprocessed data
# Support Vector Machine
svm_model = SVC()

# Decision Trees
dt_model = DecisionTreeClassifier()

# Random Forests
rf_model = RandomForestClassifier()

# Naive Bayes
nb_model = GaussianNB()

In [52]:
# use sklearn GridSearchCV to train selected model with hyperparameter tuning
# parameters for SVC:
    # C -> e.g., 10, 100
    # gamma ->  e.g., 0.001, 0.0001
    # kernel -> 'rbf' or 'linear' 

svm_params = [
    {'C':[10,100],'gamma':[0.001,0.0001],'kernel':['rbf','linear']}
]

# parameters for DecisionTreeClassifier: 
    # max_depth ->  e.g., 3, 4
    # min_samples_split -> 5, 10
    # min_samples_leaf -> 10, 20
dt_params = [
    {'max_depth':[3,4],'min_samples_split':[5,10],'min_samples_leaf':[10,20]}
]

# parameters for RandomForestClassifier: 
    # n_estimators -> 100, 200
    # max_depth -> 3, 5
    # bootstrap -> True, False
rf_params = [
    {'n_estimators':[100,200],'max_depth':[3,5],'bootstrap':[True,False]}
]

nb_params = [{}]
# initialize gridsearch with the required parameters, including the following scoring methods and refit='bal_accuracy' (2)
scoring = {"accuracy": "accuracy", "bal_accuracy": "balanced_accuracy", "F1_macro": "f1_macro"}
grid_search_svc = GridSearchCV(svm_model,svm_params,scoring=scoring,cv=5,refit='bal_accuracy' ,return_train_score=True)
grid_search_dt = GridSearchCV(dt_model,dt_params,scoring=scoring,cv=5,refit='bal_accuracy' ,return_train_score=True)
grid_search_rf = GridSearchCV(rf_model,rf_params,scoring=scoring,cv=5,refit='bal_accuracy' ,return_train_score=True)
grid_search_nb = GridSearchCV(nb_model,nb_params,scoring=scoring,cv=5,refit='bal_accuracy' ,return_train_score=True)

# fit the training data (0.5)
grid_search_svc.fit(X_train_prepared,y_train)
grid_search_dt.fit(X_train_prepared,y_train)
grid_search_rf.fit(X_train_prepared,y_train)
grid_search_nb.fit(X_train_prepared,y_train)

# print the best parameters (0.5)
print(f'SVC best params:\n{grid_search_svc.best_params_}')
print(f'Decision Tree best params:\n{grid_search_dt.best_params_}')
print(f'Random Forest best params:\n{grid_search_rf.best_params_}')
print(f'Naive Bayes best params:\n{grid_search_nb.best_params_}')

# print the best estimator (0.5)
print(f'SVC best estimator:\n{grid_search_svc.best_estimator_}')
print(f'Decision Tree best estimator:\n{grid_search_dt.best_estimator_}')
print(f'Random Forest best estimator:\n{grid_search_rf.best_estimator_}')
print(f'Naive Bayes best estimator:\n{grid_search_nb.best_estimator_}')

# print the best score from trained GridSearchCV model (0.5)
print(f'SVC best score:\n{grid_search_svc.best_score_}')
print(f'Decision Tree best score:\n{grid_search_dt.best_score_}')
print(f'Random Forest best score:\n{grid_search_rf.best_score_}')
print(f'Naive Bayes best score:\n{grid_search_nb.best_score_}')

# Save session to "notebook_env.db"
dill.dump_session("no_teamcomp_notebook_env.db")

SVC best params:
{'C': 10, 'gamma': 0.001, 'kernel': 'rbf'}
Decision Tree best params:
{'max_depth': 3, 'min_samples_leaf': 10, 'min_samples_split': 5}
Random Forest best params:
{'bootstrap': False, 'max_depth': 5, 'n_estimators': 100}
Naive Bayes best params:
{}
SVC best estimator:
SVC(C=10, gamma=0.001)
Decision Tree best estimator:
DecisionTreeClassifier(max_depth=3, min_samples_leaf=10, min_samples_split=5)
Random Forest best estimator:
RandomForestClassifier(bootstrap=False, max_depth=5)
Naive Bayes best estimator:
GaussianNB()
SVC best score:
0.985109873076625
Decision Tree best score:
0.9700160666273197
Random Forest best score:
0.9864699622628011
Naive Bayes best score:
0.9653176065196526


In [53]:
# Prepare X_test dataset based on previous method for X_train
print('Training data shape (with dummy) :',X_test.shape)
print('Columns of training data :',X_test.columns)


X_test_prepared = std.transform(X_test)
print('Testing data shape (without dummy) :',X_test_prepared.shape)

Training data shape (with dummy) : (330, 36)
Columns of training data : Index(['total_gold_earned_0', 'total_gold_spent_0', 'total_baron_kills_0',
       'total_dragon_kills_0', 'total_inhibitor_kils_0', 'total_kills_0',
       'total_deaths_0', 'total_damage_dealt_to_champions_0',
       'total_damage_dealt_to_objectives_0', 'total_damage_taken_0',
       'average_vision_score_0', 'total_wards_placed_0',
       'average_creep_score_0', 'average_champion_experience_0',
       'total_gold_earned_1', 'total_gold_spent_1', 'total_baron_kills_1',
       'total_dragon_kills_1', 'total_inhibitor_kils_1', 'total_kills_1',
       'total_deaths_1', 'total_damage_dealt_to_champions_1',
       'total_damage_dealt_to_objectives_1', 'total_damage_taken_1',
       'average_vision_score_1', 'total_wards_placed_1',
       'average_creep_score_1', 'average_champion_experience_1',
       'gameLengthMin', 'dmg_to_champs_winner', 'dmg_to_obj_winner',
       'vision_winner', 'cs_winner', 'champ_experience_

In [54]:
# Testing models with test data

# Using the following existing variables:
# X_test_prepared: test data
# y_test: test labels
# Models:
# grid_search_svc
# grid_search_dt
# grid_search_rf
# grid_search_nb

# Predict using models' best estimators
prediction_svc = grid_search_svc.best_estimator_.predict(X_test_prepared)
prediction_dt = grid_search_dt.best_estimator_.predict(X_test_prepared)
prediction_rf = grid_search_rf.best_estimator_.predict(X_test_prepared)
prediction_nb = grid_search_nb.best_estimator_.predict(X_test_prepared)

In [55]:
# Create and print classification reports for all models

# SVC
conf_matrix_svc = confusion_matrix(y_test, prediction_svc)
class_report_svc = classification_report(y_test, prediction_svc, output_dict=True)
print(f'Confusion matrix: SVC:\n{conf_matrix_svc}\n')

# Decision Tree
conf_matrix_dt = confusion_matrix(y_test, prediction_dt)
class_report_dt = classification_report(y_test, prediction_dt, output_dict=True)
print(f'Confusion matrix: DT:\n{conf_matrix_dt}\n')

# Random Forest
conf_matrix_rf = confusion_matrix(y_test, prediction_rf)
class_report_rf = classification_report(y_test, prediction_rf, output_dict=True)
print(f'Confusion matrix: RF:\n{conf_matrix_rf}\n')

# Naive Bayes
conf_matrix_nb = confusion_matrix(y_test, prediction_nb)
class_report_nb = classification_report(y_test, prediction_nb, output_dict=True)
print(f'Confusion matrix: NB:\n{conf_matrix_nb}\n')

Confusion matrix: SVC:
[[169   4]
 [  5 152]]

Confusion matrix: DT:
[[166   7]
 [  4 153]]

Confusion matrix: RF:
[[168   5]
 [  6 151]]

Confusion matrix: NB:
[[160  13]
 [  7 150]]



In [56]:
# Print our classification reports

# SVC
df_svc = pd.DataFrame(class_report_svc).transpose().drop(['0', '1'])
print(f"{df_svc}\n")

# DT
df_dt = pd.DataFrame(class_report_dt).transpose().drop(['0', '1'])
print(f"{df_dt}\n")

# RF
df_rf = pd.DataFrame(class_report_rf).transpose().drop(['0', '1'])
print(f"{df_rf}\n")

# NB
df_nb = pd.DataFrame(class_report_nb).transpose().drop(['0', '1'])
print(f"{df_nb}\n")

              precision    recall  f1-score     support
accuracy       0.972727  0.972727  0.972727    0.972727
macro avg      0.972812  0.972516  0.972655  330.000000
weighted avg   0.972737  0.972727  0.972723  330.000000

              precision    recall  f1-score     support
accuracy       0.966667  0.966667  0.966667    0.966667
macro avg      0.966360  0.967030  0.966615  330.000000
weighted avg   0.966850  0.966667  0.966679  330.000000

              precision    recall  f1-score     support
accuracy       0.966667  0.966667  0.966667    0.966667
macro avg      0.966733  0.966441  0.966578  330.000000
weighted avg   0.966674  0.966667  0.966661  330.000000

              precision    recall  f1-score     support
accuracy       0.939394  0.939394  0.939394    0.939394
macro avg      0.939165  0.940135  0.939338  330.000000
weighted avg   0.940082  0.939394  0.939427  330.000000



In [57]:
# Save session to "notebook_env.db"
dill.dump_session("no_teamcomp_notebook_env.db")

In [58]:
joblib.dump(grid_search_svc, '../models/model_noteam_comp_svc.pkl')
joblib.dump(grid_search_dt, '../models/model_noteam_comp_dt.pkl')
joblib.dump(grid_search_nb, '../models/model_noteam_comp_nb.pkl')
joblib.dump(grid_search_rf, '../models/model_noteam_comp_rf.pkl')
joblib.dump(std,'../models/standard_scaler.pkl')

['../models/standard_scaler.pkl']

In [59]:
model = joblib.load('../models/model_noteam_comp_svc.pkl')
scaler = joblib.load('../models/standard_scaler.pkl')
test = {
   "total_gold_earned_0":[10000],
   "total_gold_spent_0":[10000],
   "total_baron_kills_0":[2],
   "total_dragon_kills_0":[2],
   "total_inhibitor_kils_0":[2],
   "total_kills_0":[20],
   "total_deaths_0":[15],
   "total_damage_dealt_to_champions_0":[10000],
   "total_damage_dealt_to_objectives_0":[10000],
   "total_damage_taken_0":[10000],
   "average_vision_score_0":[35],
   "total_wards_placed_0":[30],
   "average_creep_score_0":[100],
   "average_champion_experience_0":[10000],
   "total_gold_earned_1":[7500],
   "total_gold_spent_1":[7500],
   "total_baron_kills_1":[0],
   "total_dragon_kills_1":[0],
   "total_inhibitor_kils_1":[0],
   "total_kills_1":[15],
   "total_deaths_1":[20],
   "total_damage_dealt_to_champions_1":[7500],
   "total_damage_dealt_to_objectives_1":[7500],
   "total_damage_taken_1":[7500],
   "average_vision_score_1":[25],
   "total_wards_placed_1":[25],
   "average_creep_score_1":[90],
   "average_champion_experience_1":[7500],
   "gameLengthMin":[25],
   "dmg_to_champs_winner":[1],
   "dmg_to_obj_winner":[1],
   "vision_winner":[1],
   "cs_winner":[1],
   "champ_experience_winner":[1],
   "wards_placed_winner":[1],
   "gold_spender_winner":[1],
} 
df = pd.DataFrame(test)
print(df.columns)
print(df.shape)
X_test_prepared = scaler.transform(df)
size = len(X_test_prepared)
X_test_prepared = X_test_prepared[:size]
prediction = model.predict(X_test_prepared)
print(prediction)

Index(['total_gold_earned_0', 'total_gold_spent_0', 'total_baron_kills_0',
       'total_dragon_kills_0', 'total_inhibitor_kils_0', 'total_kills_0',
       'total_deaths_0', 'total_damage_dealt_to_champions_0',
       'total_damage_dealt_to_objectives_0', 'total_damage_taken_0',
       'average_vision_score_0', 'total_wards_placed_0',
       'average_creep_score_0', 'average_champion_experience_0',
       'total_gold_earned_1', 'total_gold_spent_1', 'total_baron_kills_1',
       'total_dragon_kills_1', 'total_inhibitor_kils_1', 'total_kills_1',
       'total_deaths_1', 'total_damage_dealt_to_champions_1',
       'total_damage_dealt_to_objectives_1', 'total_damage_taken_1',
       'average_vision_score_1', 'total_wards_placed_1',
       'average_creep_score_1', 'average_champion_experience_1',
       'gameLengthMin', 'dmg_to_champs_winner', 'dmg_to_obj_winner',
       'vision_winner', 'cs_winner', 'champ_experience_winner',
       'wards_placed_winner', 'gold_spender_winner'],
      dty