In [536]:
import pandas as pd
import glob
import numpy as np
import altair as alt
import json
import math
import matplotlib as mpp
from sklearn.preprocessing import StandardScaler
from sklearn.decomposition import PCA
from sklearn import metrics
from sklearn.linear_model import LogisticRegression
from sklearn.model_selection import train_test_split
pd.set_option('display.max_columns', None)
pd.set_option('display.max_rows', 40)
filename = "processed/quantleague.csv"
quantleague = pd.read_csv(filename, index_col=None)
quantleague.drop(labels='Year.1', axis=1, inplace=True)
defaultxcols = ['gamelength', 'earlygoldblue', 'midgoldblue', 'earlygoldred', 'midgoldred',
                'earlygolddiff', 'midgolddiff',
                 'bDragons', 'bBarons', 'bHeralds',
                 'rDragons', 'rBarons', 'rHeralds', 'Year']
defaultycols = ['bluewin']

In [537]:

def createDataset(xcols, ycols, df):
    x = df[xcols]
    # #leaving out late gold columns
    y = df[ycols].to_numpy()
    x = StandardScaler().fit_transform(x)
    return train_test_split(x, y, train_size=.8, random_state=42)


In [538]:
x_train, x_test, y_train, y_test  = createDataset(defaultxcols, defaultycols, quantleague)

In [539]:
def visualizePCA(x_train, y_train, y_name):
    pca = PCA(n_components=2)
    principalcomponents = pca.fit_transform(x_train)
    pcs = pd.DataFrame(data=principalcomponents, columns=['PC1', 'PC2'])
    pcs[y_name] = y_train
    alt.Chart(pcs.sample(5000)).mark_point(size=1).encode(x='PC1', y='PC2', color=y_name).display()
    return pca


In [540]:
pca = visualizePCA(x_train, y_train, 'Blue win')


In [541]:
sum(pca.explained_variance_ratio_)
# This is great news! Around 63% of the variance of our data can be explained with just two principal components. Let's look at how many components we would need to capture
# 95% of the variance:

0.5965303653796767

In [542]:


def visualizePCAComponents(x_train):
    numcomponents = x_train.shape[1]
    pca = PCA(numcomponents).fit(x_train)

    pca_variance = pd.DataFrame({
        'x': list(range(1,numcomponents+1)),
        'y': np.cumsum(pca.explained_variance_ratio_)
    })
    alt.Chart(pca_variance).mark_line().encode(
        x = alt.X('x', title='Number of components'),
        y = alt.Y('y', title='Cumulative explained variance', scale=alt.Scale(domain=(0.3,1.02)))
    ).interactive().display()

#To explain >=95% of variance, we need to use 11 components. We can see this means .... ?
 #First, we try creating a model to predict probability of winning based on early-mid game state, meaning we look at gold differences and neutral objectives taken.
  #Let's try doing a PCA to find which variables are most indicative of winning:


In [543]:
 visualizePCAComponents(x_train)

In [544]:
def LogReg(x_train, y_train):
    return LogisticRegression().fit(x_train, y_train)

def evaluateModel(model, x_test, y_test):    
    return model.score(x_test, y_test)

In [545]:
blueWinModel = LogReg(x_train, y_train)
evaluateModel(blueWinModel, x_test, y_test)

0.8976377952755905

In [546]:
def confusion(model, x_test, y_test, labels):
    predictions = model.predict(x_test)
    return metrics.confusion_matrix(y_test, predictions, labels=labels, normalize='true')

def confusion_graph(confusion_matrix, labels):
    cmlen = confusion_matrix.shape[0]
    x,y = np.meshgrid(labels,labels)    
    source = pd.DataFrame({
        'x': x.ravel(),
        'y': y.ravel(),
        'z':confusion_matrix.ravel()
    })
    return alt.Chart(source).mark_rect().encode(
        x=alt.X('x:N', title="Predicted"),
        y=alt.Y('y:N', title="Target"),
        color='z:Q'
    ).properties(width=200, height=200)

In [547]:
confusion_graph(confusion(blueWinModel, x_test, y_test, np.unique(y_test)), np.unique(y_test))

In [678]:
def createCoeffs(labels, values, classes):   
    dat = {}    
    for i, k in enumerate(labels):
        dat[k] = []
        for j in values:            
            dat[k].append(j[i])        
    res= pd.DataFrame(dat, index=classes)
    if len(res) == 2:
        res.iloc[1, :] *= -1
    return res        

In [679]:
def pipeline(xcols, ycols, df=quantleague):
    print('Creating data...')
    x_train, x_test, y_train, y_test = createDataset(xcols, ycols, df)
    print('Creating model...')
    model = LogReg(x_train, y_train)
    print('Evaluating model...')
    labels = np.unique(y_test)
    print('Accuracy:', evaluateModel(model, x_test, y_test))       
    # print('Coefficients:')
    # display(createCoeffs(xcols, model.coef_, labels))
    print('Displaying confusion matrix')    
    confusion_graph(confusion(model, x_test, y_test, labels), labels).display()


In [680]:
pipeline(defaultxcols, defaultycols)

Creating data...
Creating model...
Evaluating model...
Accuracy: 0.8976377952755905
Coefficients:


Unnamed: 0,gamelength,earlygoldblue,midgoldblue,earlygoldred,midgoldred,earlygolddiff,midgolddiff,bDragons,bBarons,bHeralds,rDragons,rBarons,rHeralds,Year
0,0.456919,-0.003758,-0.064918,0.001802,-0.311468,-0.020671,0.641232,0.649796,1.117982,0.038407,-0.633143,-1.254191,-0.037293,0.04978
1,-0.456919,0.003758,0.064918,-0.001802,0.311468,0.020671,-0.641232,-0.649796,-1.117982,-0.038407,0.633143,1.254191,0.037293,-0.04978


Displaying confusion matrix


In [681]:
pipeline(['gamelength', 'totalgoldblue', 'earlygoldblue', 'midgoldblue',
       'lategoldblue', 'totalgoldred', 'earlygoldred', 'midgoldred',
       'lategoldred', 'earlygolddiff', 'midgolddiff', 'lategolddiff', 'bKills',
       'bTowers', 'bInhibs', 'bDragons', 'bBarons', 'bHeralds', 'rKills',
       'rTowers', 'rInhibs', 'rDragons', 'rBarons', 'rHeralds',
       'Year'], ['League'])

Creating data...
Creating model...
Evaluating model...
Accuracy: 0.23818897637795275
Coefficients:


Unnamed: 0,gamelength,totalgoldblue,earlygoldblue,midgoldblue,lategoldblue,totalgoldred,earlygoldred,midgoldred,lategoldred,earlygolddiff,midgolddiff,lategolddiff,bKills,bTowers,bInhibs,bDragons,bBarons,bHeralds,rKills,rTowers,rInhibs,rDragons,rBarons,rHeralds,Year
CBLoL,1.308894,-0.562771,-0.140288,-0.666192,0.362281,0.545013,-0.106997,-0.66266,0.354471,-0.116271,0.012918,-0.011673,-0.204981,0.351992,0.084125,0.408388,-0.271488,0.210489,-0.283431,-0.096613,0.353042,0.285941,-0.402956,0.265746,0.388343
CLS,1.216116,-0.671462,0.376755,-0.572967,-0.217978,-0.417614,0.328646,-0.497816,-0.351715,0.164547,-0.172679,0.27216,0.493675,0.387761,-0.210447,0.560878,-0.096535,0.069046,0.235757,0.502095,0.124697,0.42032,-0.042898,0.043642,1.61818
EULCS,-0.392063,-0.057179,0.322316,0.792004,-0.873702,-0.070517,0.301824,0.768506,-0.771707,0.062521,0.029089,-0.132729,-0.05529,0.381041,-0.098363,-0.379014,0.353694,-0.116562,-0.170739,0.59954,-0.132453,-0.575981,0.284422,-0.121166,-0.478568
IEM,-2.269956,0.881474,0.164386,0.237491,0.071637,1.063332,0.195826,0.226636,0.063755,-0.115149,0.019825,0.010351,0.11212,-0.665477,0.224981,-0.369788,-0.240882,-0.150286,0.005619,-0.608897,0.042545,-0.436644,-0.08532,-0.216138,-1.176354
LCK,-0.656765,0.667265,0.012254,-0.272086,0.399066,0.509193,0.00097,-0.260725,0.456389,0.048179,-0.017133,-0.138783,-0.569921,-0.665102,0.074966,0.086396,-0.026697,-0.13378,-0.580774,-0.808869,0.1116,-0.007226,0.058154,-0.11267,-0.654086
LCL,1.046148,-0.290534,-0.211982,-0.55612,0.82062,-0.698269,-0.225708,-0.579478,0.790288,0.042952,0.083284,-0.001599,0.13645,-0.060788,-0.094131,-0.034113,-0.368959,0.169134,0.212855,-0.163407,-0.134803,0.290543,-0.274657,0.199978,0.259223
LJL,2.304058,-1.131422,-0.03691,-0.632317,0.08788,0.299417,-0.072905,-0.587888,0.031847,0.137933,-0.092867,0.10161,-0.105189,-0.177186,-0.107412,-0.097791,0.000824,0.17061,-0.148358,-0.489708,-0.176043,0.07076,-0.121976,0.255001,0.383089
LLN,2.096365,-0.473241,0.03885,-0.516627,0.174163,-1.811114,0.023594,-0.41406,0.184711,0.052657,-0.245067,-0.032435,0.25441,0.183273,0.51532,0.132007,-0.218733,-0.10956,0.007944,0.823227,0.133574,0.269199,-0.092434,-0.113158,1.385222
LMS,0.54148,0.103953,0.133614,-0.340922,0.298301,-0.459334,0.131195,-0.341882,0.192579,0.007849,0.018147,0.180592,-0.163528,-0.608428,-0.016359,-0.194363,0.107155,-0.005566,-0.090948,-0.187943,0.000591,-0.256126,0.146881,-0.175774,-0.407319
MSI,-1.248514,0.578047,-0.261354,0.606516,-0.024745,-0.262182,-0.253465,0.451169,0.134321,-0.031093,0.37907,-0.303867,0.027724,-0.529154,-0.24542,0.370538,0.176042,0.057121,0.182802,-0.440044,-0.247948,0.446339,-0.053189,0.174797,-0.536358


Displaying confusion matrix


In [664]:
pipeline(['gamelength', 'totalgoldblue', 'earlygoldblue', 'midgoldblue',
       'lategoldblue', 'totalgoldred', 'earlygoldred', 'midgoldred',
       'lategoldred', 'earlygolddiff', 'midgolddiff', 'lategolddiff', 'bKills',
       'bTowers', 'bInhibs', 'bDragons', 'bBarons', 'bHeralds', 'rKills',
       'rTowers', 'rInhibs', 'rDragons', 'rBarons', 'rHeralds',
       'Year'], ['Season'])

Creating data...
Creating model...
Evaluating model...
Accuracy: 0.5872703412073491
Coefficients:
gamelength
totalgoldblue
earlygoldblue
midgoldblue
lategoldblue
totalgoldred
earlygoldred
midgoldred
lategoldred
earlygolddiff
midgolddiff
lategolddiff
bKills
bTowers
bInhibs
bDragons
bBarons
bHeralds
rKills
rTowers
rInhibs
rDragons
rBarons
rHeralds
Year
2


None

Displaying confusion matrix


In [665]:
pipeline(['gamelength', 'totalgoldblue', 'earlygoldblue', 'midgoldblue',
       'lategoldblue', 'totalgoldred', 'earlygoldred', 'midgoldred',
       'lategoldred', 'earlygolddiff', 'midgolddiff', 'lategolddiff', 'bKills',
       'bTowers', 'bInhibs', 'bDragons', 'bBarons', 'bHeralds', 'rKills',
       'rTowers', 'rInhibs', 'rDragons', 'rBarons', 'rHeralds',
       ], ['Year'])

Creating data...
Creating model...
Evaluating model...
Accuracy: 0.7152230971128609
Coefficients:
gamelength
totalgoldblue
earlygoldblue
midgoldblue
lategoldblue
totalgoldred
earlygoldred
midgoldred
lategoldred
earlygolddiff
midgolddiff
lategolddiff
bKills
bTowers
bInhibs
bDragons
bBarons
bHeralds
rKills
rTowers
rInhibs
rDragons
rBarons
rHeralds
5


None

Displaying confusion matrix


In [616]:
pipeline(['gamelength', 'totalgoldblue', 'earlygoldblue', 'midgoldblue',
       'lategoldblue', 'totalgoldred', 'earlygoldred', 'midgoldred',
       'lategoldred', 'earlygolddiff', 'midgolddiff', 'lategolddiff', 'bKills',
       'bTowers', 'bInhibs', 'bDragons', 'bBarons', 'bHeralds', 'rKills',
       'rTowers', 'rInhibs', 'rDragons', 'rBarons', 'rHeralds',
       'Year'], ['Type'])

Creating data...
Creating model...
Evaluating model...
Accuracy: 0.7637795275590551
[[-2.46954244e+00  1.28012941e+00 -1.39455523e-01  8.62037573e-01
  -8.43822815e-01  1.59203701e+00 -1.26825310e-01  8.21855413e-01
  -8.17223290e-01 -4.86111889e-02  7.12902239e-02  1.04416764e-02
   4.91818102e-03 -2.19946864e-01  1.14585613e-01  8.48216489e-02
   2.79471673e-02 -3.24596587e-02 -5.85570696e-02 -2.98246745e-01
   8.90574438e-02  1.60587090e-01 -6.35539132e-02  2.32357463e-02
  -6.36693341e-01]
 [ 3.98479657e-01 -6.41551428e-01  2.30430693e-01 -2.55448055e-01
   1.67919998e-01 -9.87800331e-02  2.34252845e-01 -2.62085889e-01
   2.02836344e-01 -1.62101555e-02  2.79181481e-02 -7.77533162e-02
   2.26630961e-01 -6.25507958e-02  8.20965549e-02 -3.76571102e-02
   4.71850587e-02  1.66530681e-01  2.72064114e-02  6.19050034e-02
  -9.61338881e-02 -4.67451083e-02 -9.33670543e-02  1.12327819e-01
   5.60315364e-02]
 [ 3.04022415e+00 -1.47033905e+00 -7.77848076e-02 -5.11362619e-01
   3.86228835e-01 -1

Unnamed: 0,0
gamelength,-2.469542
totalgoldblue,1.280129
earlygoldblue,-0.139456
midgoldblue,0.862038
lategoldblue,-0.843823
totalgoldred,1.592037
earlygoldred,-0.126825
midgoldred,0.821855
lategoldred,-0.817223
earlygolddiff,-0.048611


Displaying confusion matrix


In [617]:
#so it seems like league is very hard for the model to classify, as there are 14 classes and most of them don't have much data. Let's try comparing only the top 4 leagues that we have the most data for, in order to see if we can draw more meaningful conclusions. First we need to filter our dataset to only include these three leagues:
# lg. samples
# LCK 1445
# NALCS 1272
# EULCS 1099

def gL(name): #wrapper to select easier
    return quantleague['League'] == name

top4 = quantleague.loc[gL('NALCS') | gL('LCK') | gL('EULCS') | gL('LMS')]
top4.describe()


Unnamed: 0,gamelength,totalgoldblue,earlygoldblue,midgoldblue,lategoldblue,totalgoldred,earlygoldred,midgoldred,lategoldred,earlygolddiff,midgolddiff,lategolddiff,bKills,bTowers,bInhibs,bDragons,bBarons,bHeralds,rKills,rTowers,rInhibs,rDragons,rBarons,rHeralds,bluewin,Year,League_CBLoL,League_CLS,League_EULCS,League_IEM,League_LCK,League_LCL,League_LJL,League_LLN,League_LMS,League_MSI,League_NALCS,League_OPL,League_RR,League_TCL,League_WC,Season_Spring,Season_Summer,Type_International,Type_Playoffs,Type_Promotion,Type_Regional,Type_Season
count,4594.0,4594.0,4594.0,4594.0,4594.0,4594.0,4594.0,4594.0,4594.0,4594.0,4594.0,4594.0,4594.0,4594.0,4594.0,4594.0,4594.0,4594.0,4594.0,4594.0,4594.0,4594.0,4594.0,4594.0,4594.0,4594.0,4594.0,4594.0,4594.0,4594.0,4594.0,4594.0,4594.0,4594.0,4594.0,4594.0,4594.0,4594.0,4594.0,4594.0,4594.0,4594.0,4594.0,4594.0,4594.0,4594.0,4594.0,4594.0
mean,37.492599,63438.196996,8416.601001,28478.826731,50690.442534,62512.894645,8382.662386,28241.624946,50197.15172,33.927079,237.206356,493.277536,25.013931,6.76687,1.232042,1.914889,0.74162,0.304528,23.51502,5.949282,1.015455,1.935133,0.74162,0.228559,0.54397,2016.194819,0.0,0.0,0.239225,0.0,0.314541,0.0,0.0,0.0,0.169351,0.0,0.276883,0.0,0.0,0.0,0.0,0.464954,0.535046,0.0,0.093165,0.085111,0.031128,0.790596
std,7.959814,14836.535018,2072.981388,7070.004809,12150.388926,15689.561009,2066.126611,7159.366295,12628.680882,566.144396,2733.882267,6649.187788,13.081608,3.596613,1.36443,1.38038,0.754449,0.532212,13.882398,3.750468,1.323053,1.460166,0.837077,0.478589,0.498117,0.869023,0.0,0.0,0.426657,0.0,0.464383,0.0,0.0,0.0,0.375103,0.0,0.447507,0.0,0.0,0.0,0.0,0.498825,0.498825,0.0,0.290695,0.279077,0.173681,0.406927
min,19.0,24947.0,3701.0,11449.0,18859.0,24625.0,3864.0,12122.0,20271.0,-2464.0,-12432.0,-16736.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,2015.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
25%,32.0,53450.0,7069.25,23561.5,42227.5,51342.0,7002.0,23301.75,41252.5,-291.0,-1674.5,-5196.25,14.0,3.0,0.0,1.0,0.0,0.0,12.0,2.0,0.0,1.0,0.0,0.0,0.0,2015.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0
50%,36.0,62041.0,8147.0,27526.5,49169.5,61388.5,8180.5,27362.0,49108.5,30.0,234.0,1131.5,24.0,8.0,1.0,2.0,1.0,0.0,22.0,6.0,1.0,2.0,1.0,0.0,1.0,2016.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,1.0
75%,42.0,72089.75,9500.75,32274.25,57255.25,71721.0,9478.0,32099.75,57247.5,346.5,2124.0,6069.75,34.0,10.0,2.0,3.0,1.0,1.0,34.0,10.0,2.0,3.0,1.0,0.0,1.0,2017.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,1.0,1.0,0.0,0.0,0.0,0.0,1.0
max,95.0,177698.0,24252.0,78494.0,145273.0,173712.0,24082.0,90926.0,141750.0,2499.0,9319.0,17347.0,112.0,16.0,14.0,8.0,5.0,3.0,82.0,14.0,14.0,7.0,6.0,3.0,1.0,2018.0,0.0,0.0,1.0,0.0,1.0,0.0,0.0,0.0,1.0,0.0,1.0,0.0,0.0,0.0,0.0,1.0,1.0,0.0,1.0,1.0,1.0,1.0


In [618]:
def getNonLeague(l):
    res = []
    for x in l:
        if 'League' not in x:
            res.append(x)
    res.remove('Year')
    res.remove('Season')
    res.remove('Type')
    return res

pipeline(getNonLeague(list(top4.columns)), ['League'], df=top4)

Creating data...
Creating model...
Evaluating model...
Accuracy: 0.381936887921654
[[-2.58382616e-01 -1.29497360e-01  1.92088870e-01  6.23558450e-01
  -7.33934002e-01 -1.68096932e-01  1.78438327e-01  6.17190118e-01
  -6.67792391e-01  4.79415668e-02 -4.27111926e-03 -7.33174415e-02
   1.29805559e-01  5.43607438e-01 -9.92288961e-02 -2.07905731e-01
   2.33019850e-01  2.93331489e-02  7.29300757e-02  6.31067878e-01
  -3.16969732e-02 -3.39424042e-01  1.64643412e-01  6.21827422e-02
  -2.41098881e-02  1.42363679e-02 -1.42363679e-02  0.00000000e+00
   8.59592460e-02  1.04550984e-01 -1.55542312e-02 -1.26470413e-01]
 [ 4.09431996e-01  2.24772391e-01 -1.53106438e-01 -7.88657526e-01
   8.32711967e-01  8.99402919e-02 -1.47447558e-01 -7.68305781e-01
   8.77378183e-01 -1.43144341e-02 -2.76908645e-02 -1.44082322e-01
  -3.03920353e-01 -5.08563063e-01  1.78900623e-01  1.42843542e-01
  -1.55886419e-01 -6.01774283e-02 -3.27543914e-01 -7.96398672e-01
   1.58256813e-01  1.83011861e-01 -1.48670246e-01  4.12954

Unnamed: 0,0
gamelength,-0.258383
totalgoldblue,-0.129497
earlygoldblue,0.192089
midgoldblue,0.623558
lategoldblue,-0.733934
totalgoldred,-0.168097
earlygoldred,0.178438
midgoldred,0.61719
lategoldred,-0.667792
earlygolddiff,0.047942


Displaying confusion matrix
