In [1]:
import numpy as np
import pandas as pd

from sklearn.linear_model import LogisticRegression
from sklearn.metrics import classification_report
from sklearn.preprocessing import MinMaxScaler
from sklearn.metrics import roc_auc_score

import pymc3 as pm
from scipy.special import expit



In [2]:
train = pd.read_csv("../data/league_train.csv").sample(frac=1.0)
test = pd.read_csv("../data/league_test.csv")

f_cols = [col for col in train.columns if col not in ["matchId", "blue_win"]]

In [3]:
def get_predictions(x):
    return [1 if xi >= 0.5 else 0 for xi in x]

In [4]:
mms = MinMaxScaler(feature_range=(0.001, 0.999))
X_train = mms.fit_transform(train[f_cols])
y_train = train["blue_win"].values
X_test = mms.transform(test[f_cols])
y_test = test["blue_win"].values

In [5]:
f_cols

['blueGold',
 'blueMinionsKilled',
 'blueJungleMinionsKilled',
 'blueAvgLevel',
 'redGold',
 'redMinionsKilled',
 'redJungleMinionsKilled',
 'redAvgLevel',
 'blueChampKills',
 'blueHeraldKills',
 'blueTowersDestroyed',
 'redChampKills',
 'redHeraldKills',
 'redTowersDestroyed']

### Logistic regression MLE

In [6]:
preds_train = np.zeros(len(y_test))
preds_test = np.zeros(len(y_train))

In [7]:
clf = LogisticRegression(random_state=0, C=1.).fit(X_train, y_train)
preds_test = clf.predict_proba(X_test)
preds_train = clf.predict_proba(X_train)

In [8]:
print(f"Models AUC score: {roc_auc_score(y_train, np.argmax(preds_train, axis=1))}")
print(classification_report(y_train, np.argmax(preds_train, axis=1)))

Models AUC score: 0.792
              precision    recall  f1-score   support

           0       0.80      0.78      0.79       125
           1       0.79      0.80      0.79       125

    accuracy                           0.79       250
   macro avg       0.79      0.79      0.79       250
weighted avg       0.79      0.79      0.79       250



In [9]:
print(f"Models AUC score: {roc_auc_score(y_test, np.argmax(preds_test, axis=1))}")
print(classification_report(y_test, np.argmax(preds_test, axis=1)))

Models AUC score: 0.7773934679348092
              precision    recall  f1-score   support

           0       0.78      0.77      0.77     23937
           1       0.78      0.78      0.78     24464

    accuracy                           0.78     48401
   macro avg       0.78      0.78      0.78     48401
weighted avg       0.78      0.78      0.78     48401



### L2 Logistic regression C=0.3

In [10]:
clf = LogisticRegression(random_state=0, C=0.3).fit(X_train, y_train)
preds_test = clf.predict_proba(X_test)
preds_train = clf.predict_proba(X_train)

In [11]:
print(f"Models AUC score: {roc_auc_score(y_train, np.argmax(preds_train, axis=1))}")
print(classification_report(y_train, np.argmax(preds_train, axis=1)))

Models AUC score: 0.78
              precision    recall  f1-score   support

           0       0.79      0.76      0.78       125
           1       0.77      0.80      0.78       125

    accuracy                           0.78       250
   macro avg       0.78      0.78      0.78       250
weighted avg       0.78      0.78      0.78       250



In [12]:
print(f"Models AUC score: {roc_auc_score(y_test, np.argmax(preds_test, axis=1))}")
print(classification_report(y_test, np.argmax(preds_test, axis=1)))

Models AUC score: 0.7817045293341828
              precision    recall  f1-score   support

           0       0.78      0.78      0.78     23937
           1       0.78      0.78      0.78     24464

    accuracy                           0.78     48401
   macro avg       0.78      0.78      0.78     48401
weighted avg       0.78      0.78      0.78     48401



### Bayesian Logistic regression

In [13]:
with pm.Model() as model:
    # Alpha is the interception
    alpha = pm.Normal("alpha", mu=0, sd=10)
    # The prior for the features varibles which are included
    #beta = pm.Normal("beta", mu=0, sd=3, shape=X.shape[1])
    b_gold = pm.Beta("b_gold", alpha=3, beta=3, observed=X_train[:,f_cols.index("blueGold")])
    b_minion = pm.Beta("b_minion", alpha=3, beta=3, observed=X_train[:,f_cols.index("blueMinionsKilled")])
    b_jungle_camps = pm.Beta("b_camps", alpha=3, beta=3, observed=X_train[:,f_cols.index("blueJungleMinionsKilled")])
    b_avg_level = pm.Beta("b_level", alpha=3, beta=3, observed=X_train[:,f_cols.index("blueAvgLevel")])
    b_champ_kills = pm.Beta("b_kills", alpha=3, beta=3, observed=X_train[:,f_cols.index("blueChampKills")])
    b_towers_destroyed = pm.Beta("b_towers", alpha=3, beta=5, observed=X_train[:,f_cols.index("blueTowersDestroyed")])
    b_herald_kills = pm.Beta("b_heralds", alpha=3, beta=5, observed=X_train[:,f_cols.index("blueHeraldKills")])
    
    r_gold = pm.Beta("r_gold", alpha=3, beta=3, observed=X_train[:,f_cols.index("redGold")])
    r_minion = pm.Beta("r_minion", alpha=3, beta=3, observed=X_train[:,f_cols.index("redMinionsKilled")])
    r_jungle_camps = pm.Beta("r_camps", alpha=3, beta=3, observed=X_train[:,f_cols.index("redJungleMinionsKilled")])
    r_avg_level = pm.Beta("r_level", alpha=3, beta=3, observed=X_train[:,f_cols.index("redAvgLevel")])
    r_champ_kills = pm.Beta("r_kills", alpha=3, beta=3, observed=X_train[:,f_cols.index("redChampKills")])
    r_towers_destroyed = pm.Beta("r_towers", alpha=3, beta=5, observed=X_train[:,f_cols.index("redTowersDestroyed")])
    r_herald_kills = pm.Beta("r_heralds", alpha=3, beta=5, observed=X_train[:,f_cols.index("redHeraldKills")])
    
    #beta = pm.Normal("beta", mu=0, sd=10, shape=X_train.shape[1])
    beta_b_gold = pm.Normal("beta_b_gold", mu=6, sd=3)
    beta_b_minion = pm.Normal("beta_b_minion", mu=2, sd=1)
    beta_b_jungle_camps = pm.Normal("beta_b_camps", mu=2, sd=1)
    beta_b_avg_level = pm.Normal("beta_b_level", mu=2, sd=1)
    beta_b_champ_kills = pm.Normal("beta_b_kills", mu=2, sd=1)
    beta_b_towers_destroyed = pm.Normal("beta_b_towers", mu=2, sd=1)
    beta_b_herald_kills = pm.Normal("beta_b_heralds", mu=2, sd=1)


    beta_r_gold = pm.Normal("beta_r_gold", mu=-6, sd=3)
    beta_r_minion = pm.Normal("beta_r_minion", mu=-2, sd=1)
    beta_r_jungle_camps = pm.Normal("beta_r_camps", mu=-2, sd=1)
    beta_r_avg_level = pm.Normal("beta_r_level", mu=-2, sd=1)
    beta_r_champ_kills = pm.Normal("beta_r_kills", mu=-2, sd=1)
    beta_r_towers_destroyed = pm.Normal("beta_r_towers", mu=-2, sd=1)
    beta_r_herald_kills = pm.Normal("beta_r_heralds", mu=-2, sd=1)

    # Deterministic function
    #p = pm.math.dot(X,beta)
    p = (b_gold * beta_b_gold +
         b_minion * beta_b_minion +
         b_jungle_camps * beta_b_jungle_camps +
         b_avg_level * beta_b_avg_level +
         b_champ_kills * beta_b_champ_kills +
         b_towers_destroyed * beta_b_towers_destroyed +
         b_herald_kills * beta_b_herald_kills +
         r_gold * beta_r_gold +
         r_minion * beta_r_minion +
         r_jungle_camps * beta_r_jungle_camps +
         r_avg_level * beta_r_avg_level +
         r_champ_kills * beta_r_champ_kills +
         r_towers_destroyed * beta_r_towers_destroyed +
         r_herald_kills * beta_r_herald_kills
        )

    y_obs = pm.Bernoulli("y_obs", pm.invlogit(p + alpha), observed=y_train)
    

In [14]:
with model:
    trace = pm.sample(tune=2000, draws = 2000, random_seed = 0, cores = 1, progressbar = True, chains = 1)

  trace = pm.sample(tune=2000, draws = 2000, random_seed = 0, cores = 1, progressbar = True, chains = 1)
Auto-assigning NUTS sampler...
Initializing NUTS using jitter+adapt_diag...
Sequential sampling (1 chains in 1 job)
NUTS: [beta_r_heralds, beta_r_towers, beta_r_kills, beta_r_level, beta_r_camps, beta_r_minion, beta_r_gold, beta_b_heralds, beta_b_towers, beta_b_kills, beta_b_level, beta_b_camps, beta_b_minion, beta_b_gold, alpha]


Sampling 1 chain for 2_000 tune and 2_000 draw iterations (2_000 + 2_000 draws total) took 16 seconds.
Only one chain was sampled, this makes it impossible to run some convergence checks


In [15]:
results = pd.DataFrame({'beta_b_gold':np.apply_along_axis(np.mean, 0, trace['beta_b_gold']),
                        'beta_b_minion':np.apply_along_axis(np.mean, 0, trace['beta_b_minion']),
                        'beta_b_camps':np.apply_along_axis(np.mean, 0, trace['beta_b_camps']),
                        'beta_b_level':np.apply_along_axis(np.mean, 0, trace['beta_b_level']),
                        'beta_b_kills':np.apply_along_axis(np.mean, 0, trace['beta_b_kills']),
                        'beta_b_towers':np.apply_along_axis(np.mean, 0, trace['beta_b_towers']),
                        'beta_b_heralds': np.apply_along_axis(np.mean, 0, trace['beta_b_heralds']),
                        'beta_r_gold':np.apply_along_axis(np.mean, 0, trace['beta_r_gold']),
                        'beta_r_minion':np.apply_along_axis(np.mean, 0, trace['beta_r_minion']),
                        'beta_r_camps':np.apply_along_axis(np.mean, 0, trace['beta_r_camps']),
                        'beta_r_level':np.apply_along_axis(np.mean, 0, trace['beta_r_level']),
                        'beta_r_kills':np.apply_along_axis(np.mean, 0, trace['beta_r_kills']),
                        'beta_r_towers':np.apply_along_axis(np.mean, 0, trace['beta_r_towers']),
                        'beta_r_heralds': np.apply_along_axis(np.mean, 0, trace['beta_r_heralds']),
                        'alpha':np.apply_along_axis(np.mean, 0, trace['alpha'])
                       }, index=[0])
results.T

Unnamed: 0,0
beta_b_gold,3.722213
beta_b_minion,1.510552
beta_b_camps,1.317596
beta_b_level,0.51581
beta_b_kills,1.735135
beta_b_towers,0.940893
beta_b_heralds,0.215242
beta_r_gold,-7.967846
beta_r_minion,-2.024957
beta_r_camps,-1.72305


In [17]:
preds_train = np.apply_along_axis(np.mean, 1, expit(trace['alpha'] + np.dot(X_train, np.transpose(results.drop("alpha", axis=1)) )) )
print(f"Models AUC score: {roc_auc_score(y_train, preds_train)}")
print(classification_report(y_train, get_predictions(preds_train)))

Models AUC score: 0.814208
              precision    recall  f1-score   support

           0       0.64      0.91      0.75       125
           1       0.85      0.48      0.61       125

    accuracy                           0.70       250
   macro avg       0.74      0.70      0.68       250
weighted avg       0.74      0.70      0.68       250



In [18]:
preds_test = np.apply_along_axis(np.mean, 1, expit(trace['alpha'] + np.dot(X_test, np.transpose(results.drop("alpha", axis=1)) )) )
print(f"Models AUC score: {roc_auc_score(y_test, preds_test)}")
print(classification_report(y_test, get_predictions(preds_test)))

Models AUC score: 0.8141042202754106
              precision    recall  f1-score   support

           0       0.62      0.91      0.74     23937
           1       0.84      0.46      0.59     24464

    accuracy                           0.68     48401
   macro avg       0.73      0.68      0.67     48401
weighted avg       0.73      0.68      0.67     48401



In [19]:
map_estimate = pm.find_MAP(model=model)




In [20]:
preds_train = np.apply_along_axis(np.mean, 1, expit(trace['alpha'] + np.dot(X_train, np.transpose(pd.DataFrame(map_estimate, index=[0]).drop("alpha", axis=1)) )) )
print(f"Models AUC score: {roc_auc_score(y_train, preds_train)}")
print(classification_report(y_train, get_predictions(preds_train)))

Models AUC score: 0.813952
              precision    recall  f1-score   support

           0       0.65      0.88      0.75       125
           1       0.82      0.54      0.65       125

    accuracy                           0.71       250
   macro avg       0.74      0.71      0.70       250
weighted avg       0.74      0.71      0.70       250



In [21]:
preds_test = np.apply_along_axis(np.mean, 1, expit(trace['alpha'] + np.dot(X_test, np.transpose(pd.DataFrame(map_estimate, index=[0]).drop("alpha", axis=1)) )) )
print(f"Models AUC score: {roc_auc_score(y_test, preds_test)}")
print(classification_report(y_test, get_predictions(preds_test)))

Models AUC score: 0.8133059395776568
              precision    recall  f1-score   support

           0       0.64      0.89      0.74     23937
           1       0.82      0.50      0.62     24464

    accuracy                           0.69     48401
   macro avg       0.73      0.70      0.68     48401
weighted avg       0.73      0.69      0.68     48401



### random forest

### decision tree

### naive bayes