# Word2Vec Encoding of Champions

1. Import modules and load data
2. Create word2vec training set of champ pairs
3. Convert to OHE (and possible downsample for memory reasons)
4. Train neural net
5. Get weights from neural net
6. Encode champs with weights from neural net and train neural net to predict match outcomes

### 1. Import modules and load data

In [1]:
import keras
from keras.layers import Dense
from keras.models import Sequential
import pandas as pd
import numpy as np
import time
import matplotlib.pyplot as plt
import os
import sys
sys.path.append('../src/')
# Project modules
import get_modeling_data
import model_evaluation.model_performance_functions as mpf
import features.win_rates as wr
import data_constants as dc
import warnings
warnings.filterwarnings('ignore')

  from ._conv import register_converters as _register_converters
Using TensorFlow backend.


In [2]:
train = get_modeling_data.get_train()
validation = get_modeling_data.get_validation()
train = train.fillna(0)
validation = validation.fillna(0)

### 2. Create word2vec training set of champ pairs

In [3]:
word2vec_train = pd.DataFrame({'input': [], 'output': []})
team_100_cols = ['100_TOP_SOLO', '100_MIDDLE_SOLO', '100_JUNGLE_NONE', '100_BOTTOM_DUO_CARRY', '100_BOTTOM_DUO_SUPPORT']
word2vec_temp = pd.DataFrame({'input': [], 'output': []})
blue_wins = train[train['team_100_win'] == 1]
for col1 in team_100_cols:
    for col2 in team_100_cols:
        if col1 != col2:
            word2vec_temp['input'] = blue_wins[col1]
            word2vec_temp['output'] = blue_wins[col2]
            word2vec_train = word2vec_train.append(word2vec_temp, ignore_index=True)

team_200_cols = ['200_TOP_SOLO', '200_MIDDLE_SOLO', '200_JUNGLE_NONE', '200_BOTTOM_DUO_CARRY', '200_BOTTOM_DUO_SUPPORT']
red_wins = train[train['team_100_win'] == 0]
for col1 in team_200_cols:
    for col2 in team_200_cols:
        if col1 != col2:
            word2vec_temp['input'] = blue_wins[col1]
            word2vec_temp['output'] = blue_wins[col2]
            word2vec_train = word2vec_train.append(word2vec_temp, ignore_index=True)
print(word2vec_train.shape)
print(word2vec_train.head())

(3386320, 2)
  input output
0  Jarv   Kata
1  Shac   Malz
2  Kled   Twis
3  Pant   Morg
4  Morg   Anni


### 3. Convert to OHE (and possible downsample for memory reasons)

In [4]:
w2v_ts = word2vec_train.sample(500000)

In [5]:
champs = dc.get_champs_four_letters()
for champ in champs:
    w2v_ts[champ + '_in'] = np.where(w2v_ts['input'] == champ, 1, 0)
    w2v_ts[champ + '_out'] = np.where(w2v_ts['output'] == champ, 1, 0)

### 4. Train neural net

In [6]:
in_cols = [x for x in w2v_ts.columns if x[-3:] == '_in']
out_cols = [x for x in w2v_ts.columns if x[-4:] == '_out']

In [7]:
model = Sequential()
model.add(Dense(10, activation='sigmoid', input_shape=(w2v_ts[in_cols].shape[1],)))
model.add(Dense(w2v_ts[out_cols].shape[1]))
model.compile(optimizer='adam', loss='categorical_crossentropy')
model.fit(w2v_ts[in_cols], w2v_ts[out_cols])

Epoch 1/1


<keras.callbacks.History at 0xfac1824828>

In [8]:
#train_pred = model.predict(w2v_ts[in_cols])

### 5. Get weights from neural net

In [9]:
weights = pd.DataFrame(model.get_layer(index=0).get_weights()[0], index=champs)
weights.head()

Unnamed: 0,0,1,2,3,4,5,6,7,8,9
Aatr,-0.089569,-0.084701,0.179452,-0.097304,-0.005922,0.150035,0.078898,-0.041835,-0.005252,-0.141666
Ahri,0.004493,-0.01336,0.123808,0.128273,0.005684,-0.127973,-0.112503,-0.131317,-0.2098,0.038655
Akal,-0.184045,-0.192603,0.025008,-0.035363,0.117062,0.155325,0.028803,0.065332,0.141745,0.147642
Alis,0.107325,-0.026489,0.103717,0.012351,0.010533,-0.121635,0.093064,0.184175,-0.181098,-0.049597
Amum,0.028132,-0.039928,0.195703,-0.108458,0.10381,0.036709,0.100832,0.179343,-0.086603,-0.154195


In [10]:
weights.to_csv('../data/interim/w2vec_weights.csv')

### Encode champs with weights from neural net and train neural net to predict match outcomes

In [11]:
weights = pd.read_csv('../data/interim/w2vec_weights.csv', index_col=0)

In [12]:
champ_cols = ['100_TOP_SOLO', '100_MIDDLE_SOLO', '100_JUNGLE_NONE', '100_BOTTOM_DUO_CARRY', '100_BOTTOM_DUO_SUPPORT',
                              '200_TOP_SOLO', '200_MIDDLE_SOLO', '200_JUNGLE_NONE', '200_BOTTOM_DUO_CARRY', '200_BOTTOM_DUO_SUPPORT']
for col in champ_cols:
    weights.columns = [col + '_' + str(x) for x in range(weights.shape[1])]
    train = pd.merge(train, weights, how='left', left_on=col, right_index=True)
    validation = pd.merge(validation, weights, how='left', left_on=col, right_index=True)

In [13]:
encoded_cols = [x + '_0' for x in champ_cols] + [x + '_1' for x in champ_cols] \
               + [x + '_2' for x in champ_cols] + [x + '_3' for x in champ_cols] + [x + '_4' for x in champ_cols]
X_train = train[encoded_cols]
X_validation = validation[encoded_cols]
Y_train = train['team_100_win']
Y_validation = validation['team_100_win']

In [16]:
model = Sequential()
model.add(Dense(100, activation='sigmoid', input_shape=(X_train.shape[1],)))
model.add(Dense(50, activation='sigmoid'))
model.add(Dense(20, activation='sigmoid'))
model.add(Dense(10, activation='sigmoid'))
model.add(Dense(1))
model.compile(optimizer='adam', loss='mean_squared_error')
model.fit(X_train, Y_train)
train_pred = model.predict(X_train)
print("Train ks and gini: " + str(mpf.ks_gini(Y_train, train_pred[:,0])))
print("Train accuracy: " + str(mpf.correct_prediction_rate(Y_train, train_pred[:,0])))
valid_pred = model.predict(X_validation)
print("Validation ks and gini: " + str(mpf.ks_gini(Y_validation, valid_pred[:,0])))
print("Validation accuracy: " + str(mpf.correct_prediction_rate(Y_validation, valid_pred[:,0])))

Epoch 1/1
Train ks and gini: {'ks': 0.03996083579904153, 'gini': 0.027794769427040136}
Train accuracy: 0.5057798743607108
Validation ks and gini: {'ks': 0.04120129507526804, 'gini': 0.02795874408253728}
Validation accuracy: 0.5080307568353388


In [15]:
thresholds = [x/100 for x in range(40, 60)]
for t in thresholds:
    print("Train accuracy: " + str(mpf.correct_prediction_rate(Y_train, train_pred[:,0], t)))
    print("Validation accuracy: " + str(mpf.correct_prediction_rate(Y_validation, valid_pred[:,0], t)))

Train accuracy: 0.4942551551808692
Validation accuracy: 0.4919692431646612
Train accuracy: 0.4942551551808692
Validation accuracy: 0.4919692431646612
Train accuracy: 0.4942551551808692
Validation accuracy: 0.4919692431646612
Train accuracy: 0.4942551551808692
Validation accuracy: 0.4919692431646612
Train accuracy: 0.4942551551808692
Validation accuracy: 0.4919692431646612
Train accuracy: 0.4942551551808692
Validation accuracy: 0.4919692431646612
Train accuracy: 0.4942551551808692
Validation accuracy: 0.4919692431646612
Train accuracy: 0.49427850820858926
Validation accuracy: 0.4920042737288284
Train accuracy: 0.49493239298475045
Validation accuracy: 0.49231954880633355
Train accuracy: 0.4989724667803181
Validation accuracy: 0.4951044786576288
Train accuracy: 0.5078991616263049
Validation accuracy: 0.50605152995989
Train accuracy: 0.5183204502463744
Validation accuracy: 0.5181721051617536
Train accuracy: 0.5174914177623129
Validation accuracy: 0.5195733277284431
Train accuracy: 0.508325