# Importamos Pandas para trabajar con conjuntos de datos en formato DataFrame 

In [1]:
import pandas as pd

# Leemos los datos que hemos descargado de Oracle's Elixir

In [255]:
df = pd.read_excel('2018-spring-match-data-OraclesElixir-2018-05-02.xlsx')

## Eliminamos las filas de la LPL por falta de información y nos quedamos sólo con las estadísticas de equipos

In [3]:
df2 = df.loc[df.league != 'LPL']

In [4]:
df3 = df2.loc[df.player == 'Team']

## Debido a que los identificadores de partida no son únicos y hay ligas que los comparten, creamos un identificador único a partir del nombre de la liga y los identificadores de partida

In [165]:
df3['gameid_league'] = df3.gameid.astype(str) + '_' + df3.league

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  """Entry point for launching an IPython kernel.


# Empezamos a seleccionar los datos deseados que son: Win Rate (WR), WR Azul, WR Rojo, Media de Oro al 15, Media de Kills por minuto

In [166]:
df4 = df3.groupby('team').mean()

In [167]:
dict1= {}
dict1['win_rate'] = df4.result

In [168]:
df5 = df3.groupby(['team', 'side']).mean()

In [169]:
df6 = df5.reset_index().set_index('team')

In [170]:
df7 = df6.loc[df6.side == 'Blue']
df8 = df6.loc[df6.side == 'Red']

In [171]:
dict1['wr_blue'] = df7.result
dict1['wr_red'] = df8.result

In [172]:
dict1['gold_at_15'] = df4.goldat15

In [173]:
dict1['kills_per_game_lenght'] = df4.teamkills / df4.gamelength

In [174]:
team_data_pool = pd.DataFrame(dict1).reset_index()

In [175]:
df9 = df3.groupby(['gameid_league', 'team', 'side']).max()

In [176]:
df10 = df3.merge(team_data_pool, left_on='team', right_on='team')[['gameid_league', 'team', 'side', 'gold_at_15', 'kills_per_game_lenght', 'win_rate', 'wr_blue', 'wr_red', 'result']]

In [177]:
df11 = df10.groupby(['gameid_league', 'team', 'side']).max()

In [178]:
df12 = df11.reset_index().sort_values(['gameid_league', 'side'])

In [179]:
df13 = df12[::2].reset_index(drop=True)
df14 = df12[1::2].reset_index(drop=True)
df13.columns = [column + '_blue' for column in df13.columns]
df14.columns = [column + '_red' for column in df14.columns]

In [180]:
df15 = pd.concat([df13, df14], axis=1)

In [324]:
df16 = df15.drop(['gameid_league_red', 'side_blue', 'side_red', 'result_red'], axis=1)
df_result = df16.set_index(['gameid_league_blue', 'team_blue', 'team_red'])

# [OPCIONAL] Añadimos las composiciones de campeones

#### Leemos los datos del otro Notebook 

In [192]:
%store -r df_train

In [199]:
df_train.drop('ft', axis=1, inplace=True)

In [222]:
df_train = pd.get_dummies(df_train)

In [223]:
df17 = df16.merge(df_train.reset_index(), left_on='gameid_league_blue', right_on='gameid_league').drop('gameid_league_blue', axis=1)

In [224]:
df_result = df17.set_index(['gameid_league', 'team_blue', 'team_red'])

## Separamos predictores de resultados

In [325]:
X = df_result.drop('result_blue', axis=1)

In [326]:
y = df_result.result_blue.to_frame()

# Empezamos a predecir con modelos de ML 

In [327]:
from sklearn.svm import SVC
from sklearn.model_selection import cross_val_predict, cross_val_score
from sklearn.metrics import accuracy_score

In [228]:
model1 = SVC()

In [229]:
y_pred1 = cross_val_predict(model1, X, y)

  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)


In [230]:
accuracy_score(y_true=y, y_pred=y_pred1)

0.588683351468988

In [231]:
from sklearn.neural_network import MLPClassifier

In [232]:
model2 = MLPClassifier()

In [251]:
y_pred2 = cross_val_predict(model2, X, y)

  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)


In [252]:
accuracy_score(y_true=y, y_pred=y_pred2)

0.5059847660500544

In [235]:
from sklearn.naive_bayes import BernoulliNB

In [236]:
model3 = BernoulliNB()

In [237]:
y_pred3 = cross_val_predict(model3, X, y)

  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)


In [238]:
accuracy_score(y_true=y, y_pred=y_pred3)

0.5310119695321001

In [328]:
from sklearn.ensemble import RandomForestClassifier

In [329]:
model4 = RandomForestClassifier()

In [330]:
y_pred4 = cross_val_predict(model4, X, y)

  estimator.fit(X_train, y_train, **fit_params)
  estimator.fit(X_train, y_train, **fit_params)
  estimator.fit(X_train, y_train, **fit_params)


In [331]:
accuracy_score(y_true=y, y_pred=y_pred4)

0.6507072905331882

# Simulación 

### Preparamos los datos de los equipos

In [304]:
fnc = team_data_pool.loc[team_data_pool.team == 'Fnatic'].reset_index(drop=True)

In [305]:
fw = team_data_pool.loc[team_data_pool.team == 'Flash Wolves'].reset_index(drop=True)

In [306]:
tl = team_data_pool.loc[team_data_pool.team == 'Team Liquid'].reset_index(drop=True)

In [307]:
kz = team_data_pool.loc[team_data_pool.team == 'Kingzone DragonX'].reset_index(drop=True)

### Generamos el conjunto de datos para el modelo 

In [338]:
msi_games_dict = {'FNC-FW': pd.concat([fnc, fw], axis=1), 
                  'TL-FNC': pd.concat([tl, fnc], axis=1), 
                  'FNC-KZ': pd.concat([fw, tl], axis=1), 
                  'FW-TL': pd.concat([fw, tl], axis=1), 
                  'FW-KZ': pd.concat([fw, kz], axis=1), 
                  'FNC-TL': pd.concat([fnc, tl], axis=1), 
                  'FW-KZ_semis': pd.concat([fw, kz], axis=1)}

In [340]:
df18 = pd.concat([data for game, data in msi_games_dict.items()])

In [341]:
df18.drop('team', axis=1, inplace=True)

### Hacemos las predicciones

In [408]:
model = RandomForestClassifier(n_estimators=10)

In [409]:
model.fit(X=X, y=y)

  """Entry point for launching an IPython kernel.


RandomForestClassifier(bootstrap=True, class_weight=None, criterion='gini',
            max_depth=None, max_features='auto', max_leaf_nodes=None,
            min_impurity_decrease=0.0, min_impurity_split=None,
            min_samples_leaf=1, min_samples_split=2,
            min_weight_fraction_leaf=0.0, n_estimators=10, n_jobs=1,
            oob_score=False, random_state=None, verbose=0,
            warm_start=False)

In [410]:
simul_pred = list(model.predict(df18))

In [411]:
simul_real = [1, 1, 0, 0, 1, 1, 0]

In [412]:
simul_pred

[0, 0, 1, 1, 0, 1, 0]