## Importamos Pandas para trabajar con DataFrames

In [1]:
import pandas as pd

## Leemos el fichero en un DataFrame

In [2]:
df = pd.read_excel('C:\\Users\\david\\Downloads\\2018-spring-match-data-OraclesElixir-2018-05-02.xlsx')

### Vemos qué columnas tiene

In [3]:
df.columns

Index(['gameid', 'url', 'league', 'split', 'date', 'week', 'game', 'patchno',
       'playerid', 'side', 'position', 'player', 'team', 'champion', 'ban1',
       'ban2', 'ban3', 'ban4', 'ban5', 'gamelength', 'result', 'k', 'd', 'a',
       'teamkills', 'teamdeaths', 'doubles', 'triples', 'quadras', 'pentas',
       'fb', 'fbassist', 'fbvictim', 'fbtime', 'kpm', 'okpm', 'ckpm', 'fd',
       'fdtime', 'teamdragkills', 'oppdragkills', 'elementals',
       'oppelementals', 'firedrakes', 'waterdrakes', 'earthdrakes',
       'airdrakes', 'elders', 'oppelders', 'herald', 'heraldtime', 'ft',
       'fttime', 'firstmidouter', 'firsttothreetowers', 'teamtowerkills',
       'opptowerkills', 'fbaron', 'fbarontime', 'teambaronkills',
       'oppbaronkills', 'dmgtochamps', 'dmgtochampsperminute', 'dmgshare',
       'earnedgoldshare', 'wards', 'wpm', 'wardshare', 'wardkills', 'wcpm',
       'visionwards', 'visionwardbuys', 'visiblewardclearrate',
       'invisiblewardclearrate', 'totalgold', 'earnedg

### Quitamos la liga China porque no da información

In [4]:
df2 = df.loc[df.league != 'LPL']

### Nos quedamos sólo con datos de los jugadores y no agregados de equipos

In [5]:
not_team_df = df2.loc[df.player != 'Team']

### Quitamos algunas columnas que no valen

In [6]:
not_team_df.drop(['url', 'league', 'split', 'date', 'week', 'game'], axis=1, inplace=True)

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  """Entry point for launching an IPython kernel.


### Empezamos a quedarnos con lo que nos interesa

#### Ordenamos los datos un poco

In [102]:
df3 = not_team_df.sort_values(by=['gameid', 'playerid'])

#### Creamos un diccionario con los picks de cada partida 

In [80]:
dict1 = {gameid: list(df3.loc[df3.gameid == gameid]['champion']) for gameid in df3.gameid.unique()}

#### Transformamos ese diccionario en un DataFrame

In [88]:
df4 = pd.concat([pd.DataFrame(dict1[gameid]).T for gameid in dict1.keys()])

#### Añadimos el gameId como columna

In [90]:
df4['gameid'] = dict1.keys()

#### Ordenamos las columnas y nos quedamos con las necesarias

In [96]:
df5 = df4[['gameid', 0, 1, 2, 3, 4, 5, 6, 7, 8, 9]]

In [112]:
df3.ft = df3.ft.astype(int)

## Machine Learning

### Preparamos los datos para los modelos de aprendizaje automático

In [122]:
df_train = df3[['gameid', 'ft']].merge(df5, left_on='gameid', right_on='gameid')[::10].set_index('gameid')

In [163]:
df_train

Unnamed: 0_level_0,ft,0,1,2,3,4,5,6,7,8,9
gameid,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1
210112,1,Camille,Olaf,Orianna,Kai'Sa,Shen,Ornn,Skarner,Ryze,Ezreal,Tahm Kench
210117,1,Gnar,Olaf,Taliyah,Ezreal,Braum,Gangplank,Skarner,Zoe,Caitlyn,Taric
210118,0,Ornn,Skarner,Taliyah,Caitlyn,Alistar,Camille,Sejuani,Orianna,Ashe,Thresh
210119,1,Gangplank,Camille,Cassiopeia,Kai'Sa,Shen,Vladimir,Sejuani,Ryze,Xayah,Rakan
210133,1,Renekton,Olaf,Taliyah,Jhin,Thresh,Swain,Trundle,Azir,Caitlyn,Braum
210140,1,Cho'gath,Zac,Azir,Tristana,Alistar,Sion,Olaf,Orianna,Kalista,Braum
210141,1,Ornn,Trundle,Ryze,Caitlyn,Morgana,Vladimir,Skarner,Azir,Ashe,Thresh
210148,0,Gnar,Trundle,Cassiopeia,Ezreal,Braum,Swain,Elise,Zoe,Kai'Sa,Alistar
210169,0,Shen,Trundle,Cassiopeia,Ezreal,Tahm Kench,Ornn,Elise,Azir,Kai'Sa,Thresh
210170,0,Ornn,Skarner,Azir,Vayne,Braum,Maokai,Zac,Vladimir,Kai'Sa,Tahm Kench


### Separamos objetivo de variables de entrenamiento

In [124]:
y = df_train['ft']

#### Transformamos variables categóricas en dummies 

In [129]:
X = pd.get_dummies(df_train.drop('ft', axis=1))

### Empezamos a probar resultados con distintos modelos

#### SVC

In [137]:
from sklearn.svm import SVC
from sklearn.model_selection import cross_val_predict
from sklearn.metrics import accuracy_score

In [154]:
model1 = SVC()

In [155]:
y_pred1 = cross_val_predict(model1, X, y)

In [156]:
accuracy_score(y_true=y, y_pred=y_pred1)

0.54298150163220893

#### MLP

In [139]:
from sklearn.neural_network import MLPClassifier

In [141]:
model2 = MLPClassifier()

In [142]:
y_pred2 = cross_val_predict(model2, X, y)



In [143]:
accuracy_score(y_true=y, y_pred=y_pred2)

0.5125136017410229

#### NB

In [145]:
from sklearn.naive_bayes import BernoulliNB

In [148]:
model3 = BernoulliNB()

In [149]:
y_pred3 = cross_val_predict(model3, X, y)

In [150]:
accuracy_score(y_true=y, y_pred=y_pred3)

0.55059847660500549

#### RF

In [158]:
from sklearn.ensemble import RandomForestClassifier

In [160]:
model4 = RandomForestClassifier()

In [161]:
y_pred4 = cross_val_predict(model4, X, y)

In [162]:
accuracy_score(y_true=y, y_pred=y_pred4)

0.50924918389553864