In [1]:
import pandas as pd
import numpy as np
import xgboost as xgb
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
from sklearn.preprocessing import MinMaxScaler
from sklearn.model_selection import GridSearchCV
import pickle

In [2]:
matches = pd.read_csv("matches.csv")
participants = pd.read_csv("participants.csv")
stats1 = pd.read_csv("stats1.csv")
stats2 = pd.read_csv("stats2.csv")
stats = pd.concat([stats1,stats2])


  interactivity=interactivity, compiler=compiler, result=result)


In [3]:
# merge into a single DataFrame
a = pd.merge(participants, matches, left_on="matchid", right_on="id")
allstats_orig = pd.merge(a, stats, left_on="matchid", right_on="id")
allstats = allstats_orig.copy()

# drop games that lasted less than 10 minutes
allstats = allstats.loc[allstats["duration"] >= 10*60,:]

# leave only s8 games
allstats = allstats.loc[allstats["seasonid"] == 8,:]

# Convert string-based categories to numeric values
cat_cols = ["role", "position", "version", "platformid"]
for c in cat_cols:
    allstats[c] = allstats[c].astype('category')
    allstats[c] = allstats[c].cat.codes
    print(c)
allstats["wardsbought"] = allstats["wardsbought"].astype(np.int32)

# filling missing values
allstats = allstats.fillna(0)

X = allstats.drop(["win", "id_x", "matchid", "id_y", "platformid", "seasonid", "creation", "version", "id", "role", "position"], axis=1)
y = allstats["win"]

# convert all features we want to consider as rates
rate_features = [
    "kills", "deaths", "assists", "killingsprees", "doublekills",
    "triplekills", "quadrakills", "pentakills", "legendarykills",
    "totdmgdealt", "magicdmgdealt", "physicaldmgdealt", "truedmgdealt",
    "totdmgtochamp", "magicdmgtochamp", "physdmgtochamp", "truedmgtochamp",
    "totheal", "totunitshealed", "dmgtoobj", "timecc", "totdmgtaken",
    "magicdmgtaken" , "physdmgtaken", "truedmgtaken", "goldearned", "goldspent",
    "totminionskilled", "neutralminionskilled", "ownjunglekills",
    "enemyjunglekills", "totcctimedealt", "pinksbought", "wardsbought",
    "wardsplaced", "wardskilled"
]
for feature_name in rate_features:
    X[feature_name] /= X["duration"] / 60 # per minute rate

# convert to fraction of game
X["longesttimespentliving"] /= X["duration"]

# define friendly names for the features
full_names = {
    "kills": "Kills per min.",
    "deaths": "Deaths per min.",
    "assists": "Assists per min.",
    "killingsprees": "Killing sprees per min.",
    "longesttimespentliving": "Longest time living as % of game",
    "doublekills": "Double kills per min.",
    "triplekills": "Triple kills per min.",
    "quadrakills": "Quadra kills per min.",
    "pentakills": "Penta kills per min.",
    "legendarykills": "Legendary kills per min.",
    "totdmgdealt": "Total damage dealt per min.",
    "magicdmgdealt": "Magic damage dealt per min.",
    "physicaldmgdealt": "Physical damage dealt per min.",
    "truedmgdealt": "True damage dealt per min.",
    "totdmgtochamp": "Total damage to champions per min.",
    "magicdmgtochamp": "Magic damage to champions per min.",
    "physdmgtochamp": "Physical damage to champions per min.",
    "truedmgtochamp": "True damage to champions per min.",
    "totheal": "Total healing per min.",
    "totunitshealed": "Total units healed per min.",
    "dmgtoobj": "Damage to objects per min.",
    "timecc": "Time spent with crown control per min.",
    "totdmgtaken": "Total damage taken per min.",
    "magicdmgtaken": "Magic damage taken per min.",
    "physdmgtaken": "Physical damage taken per min.",
    "truedmgtaken": "True damage taken per min.",
    "goldearned": "Gold earned per min.",
    "goldspent": "Gold spent per min.",
    "totminionskilled": "Total minions killed per min.",
    "neutralminionskilled": "Neutral minions killed per min.",
    "ownjunglekills": "Own jungle kills per min.",
    "enemyjunglekills": "Enemy jungle kills per min.",
    "totcctimedealt": "Total crown control time dealt per min.",
    "pinksbought": "Pink wards bought per min.",
    "wardsbought": "Wards bought per min.",
    "wardsplaced": "Wards placed per min.",
    "turretkills": "# of turret kills",
    "inhibkills": "# of inhibitor kills",
    "dmgtoturrets": "Damage to turrets"
}
feature_names = [full_names.get(n, n) for n in X.columns]
X.columns = feature_names

role
position
version
platformid


In [4]:
#splitting data on train and test
x_train, x_test, y_train, y_test = train_test_split(X, y, test_size=0.33, random_state=42)

#scaling 
#scaler = MinMaxScaler()
#x_train = scaler.fit_transform(x_train.values)
#x_test = scaler.transform(x_test.values)

In [None]:
#parameters optimization 
log_reg = LogisticRegression()
penalty = ['l1', 'l2']
C = np.logspace(0, 3, 5)
hyperparameters = dict(C=C, penalty=penalty)
clf = GridSearchCV(log_reg, hyperparameters, cv=5, verbose=0)
best_model = clf.fit(X, y)
print('Best Penalty:', best_model.best_estimator_.get_params()['penalty'])
print('Best C:', best_model.best_estimator_.get_params()['C'])

In [108]:
log_reg = LogisticRegression()
log_reg.fit(x_train, y_train)
acc_score = log_reg.score(x_test, y_test)
print("Logistic Regression win/lose Accuracy Score: {}".format(acc_score))

Logistic Regression win/lose Accuracy Score: 0.8721519841721823


In [109]:
predictors = [[3, 24, 4, 11, 196141822, 400, 1082, 3077, 2031, 3111, 1419, 1043, 1037, 3340, 2, 5, 3, 0, 1, 0, 454, 0, 0, 0, 0, 0, 98250, 21110, 71036, 6103, 0, 3187, 903, 1927, 356, 4088, 1, 10150, 2320, 0, 8, 3, 16882, 4198, 11977, 707, 7147, 7075, 0, 0, 58, 70, 56, 0, 464, 12, 0, 0, 4, 0, 0]]
#a = log_reg.predict(predictors)
#print(a)

In [110]:
pickle.dump(log_reg, open("model.pkl","wb"))

In [111]:
from sklearn.ensemble import RandomForestClassifier
rfc = RandomForestClassifier(max_depth=10,random_state=42)
rfc.fit(x_train, y_train)
acc_score = rfc.score(x_test, y_test)

print("Random Forest Classifier win/lose Accuracy Score: {}".format(acc_score))

Random Forest Classifier:

Random Forest Classifier win/lose Accuracy Score: 0.8534387115009007


In [66]:
list(X)

['id_x',
 'matchid',
 'player',
 'championid',
 'ss1',
 'ss2',
 'role',
 'position',
 'id_y',
 'gameid',
 'platformid',
 'queueid',
 'seasonid',
 'duration',
 'creation',
 'version',
 'id',
 'item1',
 'item2',
 'item3',
 'item4',
 'item5',
 'item6',
 'trinket',
 'Kills per min.',
 'Deaths per min.',
 'Assists per min.',
 'largestkillingspree',
 'largestmultikill',
 'Killing sprees per min.',
 'Longest time living as % of game',
 'Double kills per min.',
 'Triple kills per min.',
 'Quadra kills per min.',
 'Penta kills per min.',
 'Legendary kills per min.',
 'Total damage dealt per min.',
 'Magic damage dealt per min.',
 'Physical damage dealt per min.',
 'True damage dealt per min.',
 'largestcrit',
 'Total damage to champions per min.',
 'Magic damage to champions per min.',
 'Physical damage to champions per min.',
 'True damage to champions per min.',
 'Total healing per min.',
 'Total units healed per min.',
 'dmgselfmit',
 'Damage to objects per min.',
 'Damage to turrets',
 'vis

In [68]:
allstats_orig.id_x

0                9
1               10
2               11
3               12
4               13
5               14
6               15
7               16
8               17
9               18
10              19
11              20
12              21
13              22
14              23
15              24
16              25
17              26
18              27
19              28
20              29
21              30
22              31
23              32
24              33
25              34
26              35
27              36
28              37
29              38
            ...   
1579380    1865575
1579381    1865576
1579382    1865577
1579383    1865578
1579384    1865579
1579385    1865580
1579386    1865581
1579387    1865582
1579388    1865583
1579389    1865584
1579390    1865585
1579391    1865586
1579392    1865587
1579393    1865588
1579394    1865589
1579395    1865590
1579396    1865591
1579397    1865592
1579398    1865593
1579399    1865594
1579400    1865595
1579401    1