In [104]:
import pandas as pd

from sklearn.model_selection import cross_val_score
from sklearn.linear_model import LinearRegression, Lars, ElasticNet, Lasso, Ridge, BayesianRidge
from sklearn.model_selection import train_test_split
from sklearn.model_selection import GridSearchCV, RandomizedSearchCV

import math
import numpy as np
import pygal
import os
from IPython.display import display, HTML
%matplotlib inline

base_html = """
<!DOCTYPE html>
<html>
  <head>
  <script type="text/javascript" src="http://kozea.github.com/pygal.js/javascripts/svg.jquery.js"></script>
  <script type="text/javascript" src="https://kozea.github.io/pygal.js/2.0.x/pygal-tooltips.min.js""></script>
  </head>
  <body>
    <figure>
      {rendered_chart}
    </figure>
  </body>
</html>
"""

In [105]:

df=pd.read_csv('./csv/data.csv')

In [106]:
df["gameDuration"]

0      1411
1      1506
2      2267
3      1707
4       759
       ... 
253    1215
254    1082
255    1297
256    1450
257    1203
Name: gameDuration, Length: 258, dtype: int64

In [107]:
def plot_hist(df,value,route=''):
    info=df[value].describe()
    r=info["max"]-info["min"]
    k=math.ceil(1+3.322*math.log10(info["count"]))
    a=int(round(r/k))
    ranges=[]
    for i in range(int(info["min"]),int(info["max"]+2),a):
        ranges.append(i)
    aux=np.histogram(df[value], bins=ranges)
    histograma=[]
    for i in range(len(aux[0])):
        histograma.append([aux[0][i],aux[1][i],aux[1][i+1]])
    hist = pygal.Histogram()
    hist.add(value,histograma)
    if route!='':
        return hist.render_to_png(route)
    display(HTML(base_html.format(rendered_chart=hist.render(is_unicode=True))))

In [108]:
plot_hist(df[df["gameDuration"]>600],"gameDuration")

In [109]:
df=df[df["gameDuration"]>600]

In [110]:
for i, j in zip(df.isna().sum().index, df.isna().sum()):
    print(i,j)

gameId 0
platformId 0
gameCreation 0
gameDuration 0
queueId 0
mapId 0
seasonId 0
gameVersion 0
gameMode 0
gameType 0
teamId 0
win 0
firstBlood 0
firstTower 0
firstInhibitor 0
firstBaron 0
firstDragon 0
firstRiftHerald 0
towerKills 0
inhibitorKills 0
baronKills 0
dragonKills 0
vilemawKills 0
riftHeraldKills 0
dominionVictoryScore 0
participantId 0
championId 0
spell1Id 0
spell2Id 0
item0 0
item1 0
item2 0
item3 0
item4 0
item5 0
item6 0
kills 0
deaths 0
assists 0
largestKillingSpree 0
largestMultiKill 0
killingSprees 0
longestTimeSpentLiving 0
doubleKills 0
tripleKills 0
quadraKills 0
pentaKills 0
unrealKills 0
totalDamageDealt 0
magicDamageDealt 0
physicalDamageDealt 0
trueDamageDealt 0
largestCriticalStrike 0
totalDamageDealtToChampions 0
magicDamageDealtToChampions 0
physicalDamageDealtToChampions 0
trueDamageDealtToChampions 0
totalHeal 0
totalUnitsHealed 0
damageSelfMitigated 0
damageDealtToObjectives 0
damageDealtToTurrets 0
visionScore 0
timeCCingOthers 0
totalDamageTaken 0
magic

In [111]:
tgt="gameDuration"
ls_pred=["kills","deaths","assists","turretKills","totalDamageDealt",
"magicDamageDealt",
"physicalDamageDealt",
"trueDamageDealt",
"wardsPlaced",
"wardsKilled",
"totalDamageTaken",
"magicalDamageTaken",
"physicalDamageTaken",
"trueDamageTaken",
"totalDamageDealt",
"magicDamageDealt",
"physicalDamageDealt",
"trueDamageDealt",
"firstBlood",
"firstTower",
"firstInhibitor",
"firstBaron",
"firstDragon",
"firstRiftHerald",
"towerKills",
"inhibitorKills",
"baronKills",
"dragonKills",
"riftHeraldKills",
"largestKillingSpree",
"largestMultiKill",
"killingSprees",
"longestTimeSpentLiving",
"doubleKills",
"tripleKills",
"quadraKills",
"pentaKills"]

In [112]:
X_train, X_test, y_train, y_test = train_test_split(df[ls_best], df[tgt], test_size=0.33, random_state=42)

In [113]:
reg_grid = {
    'copy_X':[True,False],
    'normalize':[True,False]
}

In [114]:
linReg = LinearRegression()
clf = GridSearchCV(linReg, reg_grid, cv=4, error_score=-1000, n_jobs=-1, scoring="r2")
clf.fit(X_train, y_train)
print("Best score: " + str(clf.best_score_))

Best score: 0.8789529734513368


In [115]:
from sklearn.feature_selection import SelectKBest, f_regression

In [116]:
kb = SelectKBest(k=10, score_func=f_regression)

In [117]:
kb.fit(X = df[ls_pred], y = df[tgt])

SelectKBest(k=10, score_func=<function f_regression at 0x7f91cc9af050>)

In [118]:
ls_best = [x for x, y in zip(ls_pred, kb.get_support()) if y]

In [119]:
ls_best

['deaths',
 'assists',
 'totalDamageDealt',
 'wardsPlaced',
 'totalDamageTaken',
 'magicalDamageTaken',
 'physicalDamageTaken',
 'trueDamageTaken',
 'totalDamageDealt',
 'baronKills']

In [120]:
rid_grid = {
    'alpha': [.01,.02,.05,.08,.09,.003 ,.004 ,.05, .3, .5, .8 ,.08 ,.09],
    'copy_X':[True,False],
    'normalize':[True,False],
    'solver':['auto', 'svd', 'cholesky', 'lsqr', 'sparse_cg', 'sag', 'saga']
}

In [121]:
ridgereg = Ridge()
clf = GridSearchCV(ridgereg, rid_grid, cv=4, error_score=-1000, n_jobs=-1, scoring="r2")
clf.fit(X_train, y_train)
print("Best score: " + str(clf.best_score_))

Best score: 0.8807001990009352




In [122]:
las_reg = {
    'alpha': [.01,.02,.003 ,.004 ,.05, .3, .5, .8 ,.08 ,.09],
    'precompute': [True,False],
    'selection': ["random","cyclic"],
    'positive': [True,False],
    'warm_start': [True,False]
}

In [123]:
lassreg = Lasso()
clf = GridSearchCV(lassreg, las_reg, cv=4, error_score=-1000, n_jobs=-1, scoring="r2")
clf.fit(X_train, y_train)
print("Best score: " + str(clf.best_score_))

Best score: 0.8823774894236531


In [132]:
elas_net = {
    'alpha': [1.0,.01,.02,.003 ,.004 ,.05, .3, .5, .8 ,.08 ,.09],
    'fit_intercept': [True,False],
    'precompute': [True,False],
    'selection': ["random","cyclic"],
    'positive': [True,False],
    'warm_start': [True,False],
    'random_state': [True,False]
}

In [133]:
elasnet = ElasticNet()
clf = GridSearchCV(elasnet, elas_net, cv=4, error_score=-1000, n_jobs=-1, scoring="r2")
clf.fit(X_train, y_train)
print("Best score: " + str(clf.best_score_))

Best score: 0.8822642761103556




In [134]:
clf.best_estimator_

ElasticNet(alpha=0.003, copy_X=True, fit_intercept=True, l1_ratio=0.5,
           max_iter=1000, normalize=False, positive=True, precompute=True,
           random_state=False, selection='random', tol=0.0001, warm_start=True)

In [135]:
elasnet = ElasticNet()
elasnet.fit(X_train, y_train)

  positive)


ElasticNet(alpha=1.0, copy_X=True, fit_intercept=True, l1_ratio=0.5,
           max_iter=1000, normalize=False, positive=False, precompute=False,
           random_state=None, selection='cyclic', tol=0.0001, warm_start=False)

In [136]:
elasnet.score(X_train, y_train)

0.8959855040729043