# Learning on League of Legends Data

In [6]:
from __future__ import division
import numpy as np
import csv
from sklearn.linear_model import SGDClassifier
from sklearn.svm import SVC

Vendor:  Continuum Analytics, Inc.
Package: mkl
Message: trial mode expires in 12 days


In [7]:
def getData(filename):
    features = []
    x = []
    y = []
    with open(filename, "r") as f:
        reader = csv.reader(f)
        r = 0
        for row in reader:
            if r == 0:
                features = row[1:-2]
                r += 1
                continue
            x.append(list(map(int, row[1:-2])))
            y.append(list(map(int, row[-2]))[0])
#         features = x[0]
    return np.array(features), np.array(x), np.array(y)

In [8]:
def testCorrect(trainsize, x, y):
    correct = 0
    for i, x1 in enumerate(x[trainsize:]):
        ans = clf.predict([x1])
    #     print(ans[0], int(y[trainsize + i]))
        if int(ans[0]) == int(y[trainsize + i]):
            correct += 1
    print("{} of {}".format(correct, len(x[trainsize:])))
    print(correct/len(x[trainsize:]))

In [9]:
def getTopFeatures(clf, n):
    indeces = clf.coef_[0].argsort()[-10:][::-1]
    for ind in indeces:
        print(clf.coef_[0][ind], features[ind])

In [10]:
def getWorstFeatures(clf, n):
    indeces = clf.coef_[0].argsort()[:10]
    for ind in indeces:
        print(clf.coef_[0][ind], features[ind])


### Diamond

# Stochastic Gradient Descent

In [11]:
features, x, y = getData("diamond.csv")
trainsize = int(.8 * len(x))

In [12]:
len(x)

3017

In [13]:
clf = SGDClassifier(loss="log", penalty="l2", n_iter=50)
clf.fit(x[:trainsize], y[:trainsize])

SGDClassifier(alpha=0.0001, average=False, class_weight=None, epsilon=0.1,
       eta0=0.0, fit_intercept=True, l1_ratio=0.15,
       learning_rate='optimal', loss='log', n_iter=50, n_jobs=1,
       penalty='l2', power_t=0.5, random_state=None, shuffle=True,
       verbose=0, warm_start=False)

In [14]:
testCorrect(trainsize, x, y)

502 of 604
0.831125827815


In [15]:
getTopFeatures(clf, 10)

(4.3079200245459868, 't100firstinhibitor')
(3.1770429984117214, 't200:p9:champ110')
(2.9859959172447637, 't200:p6:champ223')
(2.7801233970130323, 't100:p3:champ91')
(2.4727813911553294, 't100:p5:champ78')
(2.4045464522461693, 't100:p3:champ77')
(2.3863047352468585, 't200:p7:champ76')
(2.3311039009310988, 't100:p1:champ57')
(2.2438580293218555, 't200:p8:champ77')
(2.1278318895829869, 't200:p10:champ254')


The SGD algorithm put the largest weight on who took first tower, which makes a lot of sense, but I'm surprised it isn't something else, like first baron, or inhibitor.

# Support Vector Machine

In [21]:
clf = SVC(kernel="linear")
clf.fit(x, y)

SVC(C=1.0, cache_size=200, class_weight=None, coef0=0.0,
  decision_function_shape=None, degree=3, gamma='auto', kernel='linear',
  max_iter=-1, probability=False, random_state=None, shrinking=True,
  tol=0.001, verbose=False)

In [22]:
testCorrect(trainsize, x, y)

589 of 604
0.975165562914


The SVM was way slower, but with almost perfect predictions

In [23]:
getTopFeatures(clf, 10)

(1.7465775795365692, 't100firsttower')
(1.4068040538306388, 't100:p1:champ91')
(1.3640275072050074, 't100:p3:champ90')
(1.2641980920968474, 't200:p6:champ222')
(1.2559923791942502, 't200:p6:champ22')
(1.2337688509449767, 't200:p8:spell232')
(1.2187162597646937, 't100:p1:champ56')
(1.2082441027740243, 't200:p8:champ49')
(1.1736835081241792, 't200:p8:champ88')
(1.1468130584329403, 't200:p9:champ30')


### Bronze

# SGD

In [16]:
features, x, y = getData("bronze.csv")
trainsize = int(.8 * len(x))

In [17]:
clf = SGDClassifier(loss="log", penalty="l2", n_iter=50)
clf.fit(x[:trainsize], y[:trainsize])

SGDClassifier(alpha=0.0001, average=False, class_weight=None, epsilon=0.1,
       eta0=0.0, fit_intercept=True, l1_ratio=0.15,
       learning_rate='optimal', loss='log', n_iter=50, n_jobs=1,
       penalty='l2', power_t=0.5, random_state=None, shuffle=True,
       verbose=0, warm_start=False)

In [18]:
testCorrect(trainsize, x, y)

103 of 122
0.844262295082


In [19]:
getTopFeatures(clf, 10)

(7.6022716603335905, 't100firstinhibitor')
(3.9800115921041614, 't100firstbaron')
(3.0540862893988652, 't100:p1:champ75')
(2.8845837015081846, 't100:p3:champ421')
(2.4179736586344256, 't200:p10:champ236')
(2.4178146777537397, 't200:p9:spell211')
(2.3346093816353966, 't100:p2:champ268')
(2.3201373101404923, 't200:p6:champ33')
(2.3074999088060997, 't200:p6:champ64')
(2.2242731607977362, 't200:p10:champ63')


# SVM

In [20]:
clf = SVC(kernel="linear")
clf.fit(x, y)

SVC(C=1.0, cache_size=200, class_weight=None, coef0=0.0,
  decision_function_shape=None, degree=3, gamma='auto', kernel='linear',
  max_iter=-1, probability=False, random_state=None, shrinking=True,
  tol=0.001, verbose=False)

In [21]:
testCorrect(trainsize, x, y)

122 of 122
1.0


In [22]:
getTopFeatures(clf, 10)

(0.93895450590366503, 't100firstinhibitor')
(0.58420703478535874, 't100:p3:champ421')
(0.56132346159532798, 't200:p6:champ33')
(0.48592351565329556, 't100:p4:champ203')
(0.48081343823012712, 't200:p9:champ103')
(0.48046295930944088, 't200:p10:champ63')
(0.44448225458723811, 't200:p10:champ236')
(0.44075905699406204, 't100firstbaron')
(0.43372511984986006, 't100:p4:champ236')
(0.42939849589842083, 't100:p2:champ268')


In [23]:
getWorstFeatures(clf, 10)

(-0.74804227413276081, 't200firstinhibitor')
(-0.5018316274683674, 't100:p3:champ238')
(-0.45115502398830204, 't200:p8:champ51')
(-0.42820934627212814, 't200:p6:champ98')
(-0.41096143862446533, 't100:p4:champ222')
(-0.39544300484856854, 't100:p5:champ236')
(-0.39085578723990111, 't200firstriftherald')
(-0.37577918076775307, 't200:p10:champ114')
(-0.36265654142910947, 't100:p3:champ10')
(-0.35892139846849508, 't100:p5:champ201')


### Challenger

In [32]:
features, x, y = getData("challenger.csv")
trainsize = int(.8 * len(x))

In [33]:
clf = SGDClassifier(loss="log", penalty="l2", n_iter=50)
clf.fit(x[:trainsize], y[:trainsize])

SGDClassifier(alpha=0.0001, average=False, class_weight=None, epsilon=0.1,
       eta0=0.0, fit_intercept=True, l1_ratio=0.15,
       learning_rate='optimal', loss='log', n_iter=50, n_jobs=1,
       penalty='l2', power_t=0.5, random_state=None, shuffle=True,
       verbose=0, warm_start=False)

In [34]:
testCorrect(trainsize, x, y)

171 of 198
0.863636363636


In [35]:
getTopFeatures(clf, 10)

(6.0694348355002958, 't100firsttower')
(3.8255868820643735, 't200:p10:spell232')
(2.8220233327997537, 't200:p9:champ75')
(2.7126666703425379, 't100:p5:champ244')
(2.4582815256772341, 't200:p7:champ202')
(2.3320826863313511, 't200:p6:champ41')
(2.2738707524524626, 't100:p3:champ104')
(2.2168316264238834, 't200:p8:champ59')
(2.2016336899961946, 't100:p1:champ420')
(2.1587471009361328, 't200:p8:champ222')


In [36]:
getWorstFeatures(clf, 10)

(-4.1221418926230529, 't200firsttower')
(-2.8426305444352851, 't100firstblood')
(-2.646924995639893, 't200:p9:champ91')
(-2.4515796696460979, 't100:p1:champ237')
(-2.4170192328829945, 't100:p4:spell22')
(-2.1920659162433256, 't100:p1:champ40')
(-2.0185531143594395, 't200:p10:champ132')
(-2.0002442498575821, 't100:p2:champ75')
(-1.991718603711496, 't100:p4:champ130')
(-1.9072903915780279, 't100:p1:champ201')


# SVM

In [37]:
clf = SVC(kernel="linear")
clf.fit(x, y)

SVC(C=1.0, cache_size=200, class_weight=None, coef0=0.0,
  decision_function_shape=None, degree=3, gamma='auto', kernel='linear',
  max_iter=-1, probability=False, random_state=None, shrinking=True,
  tol=0.001, verbose=False)

In [38]:
testCorrect(trainsize, x, y)

198 of 198
1.0


In [39]:
getTopFeatures(clf, 10)

(1.3418352903279653, 't100firsttower')
(0.89170317545111133, 't200:p7:champ202')
(0.66010965767307306, 't100:p3:champ104')
(0.65505864519926926, 't100:p5:champ244')
(0.63362494884247922, 't200:p9:champ75')
(0.62968350720651989, 't200:p10:spell232')
(0.60699710069900403, 't200:p6:champ201')
(0.59968610631808938, 't200:p7:champ27')
(0.59132766970157635, 't200:p8:champ222')
(0.5655781457316349, 't100:p1:champ47')
