In [87]:
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.ensemble import GradientBoostingClassifier
from sklearn.metrics import accuracy_score
from sklearn.model_selection import cross_val_score

In [141]:
# Import data
skill_lookup = {
    1: "Novice",
    2: "Novice",
    3: None,
    4: "Proficient",
    5: "Proficient",
    6: None,
    7: "Expert"
}
league_lookup = {
    1: "Bronze",
    2: "Silver",
    3: "Gold",
    4: "Platinum",
    5: "Diamond",
    6: "Master",
    7: "Grandmaster"
}
data = pd.read_csv("scouting_stats_cluster.csv")
# filter players in specified ranks of skill levels
data_uid = data[data.Rank.isin([1, 2, 4, 5, 7])].copy()
# mutate a new variable "Skill" to map ranks to skill levels
data_uid["Skill"] = data_uid.apply(lambda r: skill_lookup[r.Rank], axis = 1)
# average each player stats (1 player per row)
data_uid = data_uid.groupby(["UID", "Skill", "Win"], as_index = False).mean()
# sample an equal number of players in each skill level
data_uid = data_uid.groupby('Skill', as_index = False).apply(lambda r: r.sample(n = 3000))
# data_uid

In [161]:
def gradientBoosting(df, predictors, responses):
    # Take X, y and use LOOCV
    X = df[predictors].values
    y = df[responses].values.ravel()
    # Fit gradient boosting classification
    clf = GradientBoostingClassifier(n_estimators = 100, learning_rate = 1.0, 
                                     max_depth = 1, random_state = 0)
    scores = cross_val_score(clf, X, y, cv = 10)
    # Results
    print(np.mean(scores))
    clf.fit(X, y)
    print(clf.feature_importances_)

In [163]:
predictors = [['ScoutingFrequency', 'APM'], ['ScoutingFrequency', 'CPS'], ['CPS', 'APM'],
             ['CPS'], ['APM'], ['ScoutingFrequency'],
             ['ScoutingFrequency', 'APM', 'CPS']]
for predictor in predictors:
    gradientBoosting(data_uid, predictor, ['Skill'])

0.7741111111111111
[0.01168686 0.98831314]
0.7692222222222223
[0.02336506 0.97663494]
0.7895555555555556
[0.66372396 0.33627604]
0.7672222222222222
[1.]
0.7712222222222221
[1.]
0.5031111111111111
[1.]
0.7901111111111111
[0.01102836 0.33180231 0.65716934]
