# Word2Vec Encoding of Champions

1. Import modules and load data
2. Create word2vec training set of champ pairs
3. Convert to OHE (and possible downsample for memory reasons)
4. Train neural net
5. Get weights from neural net
6. Encode champs with weights from neural net and train neural net to predict match outcomes

### 1. Import modules and load data

In [1]:
import keras
from keras.layers import Dense
from keras.models import Sequential
import pandas as pd
import numpy as np
import time
import matplotlib.pyplot as plt
import os
import sys
sys.path.append('../src/')
# Project modules
import get_modeling_data
import model_evaluation.model_performance_functions as mpf
import features.win_rates as wr
import data_constants as dc
import warnings
warnings.filterwarnings('ignore')

  from ._conv import register_converters as _register_converters
Using TensorFlow backend.


In [2]:
train = get_modeling_data.get_train()
validation = get_modeling_data.get_validation()
train = train.fillna(0)
validation = validation.fillna(0)

### 2. Create word2vec training set of champ pairs

In [3]:
word2vec_train = pd.DataFrame({'input': [], 'output': []})
team_100_cols = ['100_TOP_SOLO', '100_MIDDLE_SOLO', '100_JUNGLE_NONE', '100_BOTTOM_DUO_CARRY', '100_BOTTOM_DUO_SUPPORT']
word2vec_temp = pd.DataFrame({'input': [], 'output': []})
blue_wins = train[train['team_100_win'] == 1]
for col1 in team_100_cols:
    for col2 in team_100_cols:
        if col1 != col2:
            word2vec_temp['input'] = blue_wins[col1]
            word2vec_temp['output'] = blue_wins[col2]
            word2vec_train = word2vec_train.append(word2vec_temp, ignore_index=True)

team_200_cols = ['200_TOP_SOLO', '200_MIDDLE_SOLO', '200_JUNGLE_NONE', '200_BOTTOM_DUO_CARRY', '200_BOTTOM_DUO_SUPPORT']
red_wins = train[train['team_100_win'] == 0]
for col1 in team_200_cols:
    for col2 in team_200_cols:
        if col1 != col2:
            word2vec_temp['input'] = blue_wins[col1]
            word2vec_temp['output'] = blue_wins[col2]
            word2vec_train = word2vec_train.append(word2vec_temp, ignore_index=True)
print(word2vec_train.shape)
print(word2vec_train.head())

(3386320, 2)
  input output
0  Jarv   Kata
1  Shac   Malz
2  Kled   Twis
3  Pant   Morg
4  Morg   Anni


### 3. Convert to OHE (and possible downsample for memory reasons)

In [4]:
w2v_ts = word2vec_train.sample(500000)

In [5]:
champs = dc.get_champs_four_letters()
for champ in champs:
    w2v_ts[champ + '_in'] = np.where(w2v_ts['input'] == champ, 1, 0)
    w2v_ts[champ + '_out'] = np.where(w2v_ts['output'] == champ, 1, 0)

### 4. Train neural net

In [6]:
in_cols = [x for x in w2v_ts.columns if x[-3:] == '_in']
out_cols = [x for x in w2v_ts.columns if x[-4:] == '_out']

In [7]:
model = Sequential()
model.add(Dense(5, activation='sigmoid', input_shape=(w2v_ts[in_cols].shape[1],)))
model.add(Dense(w2v_ts[out_cols].shape[1]))
model.compile(optimizer='adam', loss='categorical_crossentropy')
model.fit(w2v_ts[in_cols], w2v_ts[out_cols])

Epoch 1/1


<keras.callbacks.History at 0x6602fdaa20>

In [8]:
train_pred = model.predict(w2v_ts[in_cols])

### 5. Get weights from neural net

In [37]:
weights = pd.DataFrame(model.get_layer(index=0).get_weights()[0], index=champs)
weights.head()

Unnamed: 0,0,1,2,3,4
Aatr,-0.045449,0.193867,-0.143152,0.04702,-0.166744
Ahri,-0.017459,0.116397,-0.076675,-0.122162,0.126722
Akal,-0.057801,-0.210331,0.026305,-0.005185,0.01128
Alis,0.161005,0.01236,0.053184,-0.009253,-0.082782
Amum,-0.115155,-0.220798,-0.125745,-0.110866,-0.036024


In [36]:
weights.to_csv('../data/interim/w2vec_weights.csv')

### Encode champs with weights from neural net and train neural net to predict match outcomes

In [38]:
weights = pd.read_csv('../data/interim/w2v')

Unnamed: 0,match_id,game_version,queue_id,game_duration,team_100_win,100_TOP_SOLO,100_JUNGLE_NONE,100_MIDDLE_SOLO,100_BOTTOM_DUO_CARRY,100_BOTTOM_DUO_SUPPORT,...,100_BOTTOM_DUO_SUPPORT_JUNGLE_NONE_wr,200_BOTTOM_DUO_SUPPORT_JUNGLE_NONE_wr,100_JUNGLE_NONE_TOP_SOLO_wr,200_JUNGLE_NONE_TOP_SOLO_wr,100_JUNGLE_NONE_MIDDLE_SOLO_wr,200_JUNGLE_NONE_MIDDLE_SOLO_wr,100_JUNGLE_NONE_BOTTOM_DUO_CARRY_wr,200_JUNGLE_NONE_BOTTOM_DUO_CARRY_wr,100_JUNGLE_NONE_BOTTOM_DUO_SUPPORT_wr,200_JUNGLE_NONE_BOTTOM_DUO_SUPPORT_wr
1,2726789592,8.4.218.8787,420,1539,0,Rene,Mast,Fizz,Cait,Lux,...,0.475588,0.527778,0.512702,0.567376,0.557813,0.525316,0.518097,0.506106,0.475588,0.527778
2,2726784050,8.4.218.8787,420,1380,0,Kled,Reng,Ryze,Varu,Sora,...,0.511156,0.5,0.431373,0.508584,0.424324,0.520646,0.497542,0.512665,0.511156,0.5
5,2725983971,8.4.218.8787,420,2021,1,Jarv,Malp,Kata,Jinx,Thre,...,0.516129,0.61,0.2,0.436364,0.475,0.507042,0.494624,0.489971,0.516129,0.61
6,2725894847,8.4.218.8787,400,2071,1,Shac,Lee,Malz,Varu,Zile,...,0.44186,0.413462,0.478261,0.48,0.439883,0.333333,0.470056,0.485294,0.44186,0.413462
7,2725869066,8.4.218.8787,420,1345,1,Kled,Mast,Twis,Tris,Tari,...,0.565062,0.465021,0.538462,0.45283,0.526906,0.418831,0.51986,0.443418,0.565062,0.465021


In [60]:
champ_cols = ['100_TOP_SOLO', '100_MIDDLE_SOLO', '100_JUNGLE_NONE', '100_BOTTOM_DUO_CARRY', '100_BOTTOM_DUO_SUPPORT',
                              '200_TOP_SOLO', '200_MIDDLE_SOLO', '200_JUNGLE_NONE', '200_BOTTOM_DUO_CARRY', '200_BOTTOM_DUO_SUPPORT']
for col in champ_cols:
    print(col)
    weights.columns = [col + '_0', col + '_1', col + '_2', col + '_3', col + '_4']
    train = pd.merge(train, weights, how='left', left_on=col, right_index=True)
    validation = pd.merge(validation, weights, how='left', left_on=col, right_index=True)

100_TOP_SOLO
100_MIDDLE_SOLO
100_JUNGLE_NONE
100_BOTTOM_DUO_CARRY
100_BOTTOM_DUO_SUPPORT
200_TOP_SOLO
200_MIDDLE_SOLO
200_JUNGLE_NONE
200_BOTTOM_DUO_CARRY
200_BOTTOM_DUO_SUPPORT


In [63]:
encoded_cols = [x + '_0' for x in champ_cols] + [x + '_1' for x in champ_cols] \
               + [x + '_2' for x in champ_cols] + [x + '_3' for x in champ_cols] + [x + '_4' for x in champ_cols]


In [64]:
model = Sequential()
model.add(Dense(100, activation='sigmoid', input_shape=(train[encoded_cols].shape[1],)))
model.add(Dense(50, activation='sigmoid'))
model.add(Dense(20, activation='sigmoid'))
model.add(Dense(10, activation='sigmoid'))
model.add(Dense(1))
model.compile(optimizer='adam', loss='mean_squared_error')
model.fit(train[encoded_cols], Y_train)
train_pred = model.predict(train[encoded_cols])
print("Train ks and gini: " + str(mpf.ks_gini(Y_train, train_pred[:,0])))
print("Train accuracy: " + str(mpf.correct_prediction_rate(Y_train, train_pred[:,0])))
valid_pred = model.predict(X_validation6)
print("Validation ks and gini: " + str(mpf.ks_gini(Y_validation, valid_pred[:,0])))
print("Validation accuracy: " + str(mpf.correct_prediction_rate(Y_validation, valid_pred[:,0])))

['100_TOP_SOLO_0',
 '100_MIDDLE_SOLO_0',
 '100_JUNGLE_NONE_0',
 '100_BOTTOM_DUO_CARRY_0',
 '100_BOTTOM_DUO_SUPPORT_0',
 '200_TOP_SOLO_0',
 '200_MIDDLE_SOLO_0',
 '200_JUNGLE_NONE_0',
 '200_BOTTOM_DUO_CARRY_0',
 '200_BOTTOM_DUO_SUPPORT_0',
 '100_TOP_SOLO_1',
 '100_MIDDLE_SOLO_1',
 '100_JUNGLE_NONE_1',
 '100_BOTTOM_DUO_CARRY_1',
 '100_BOTTOM_DUO_SUPPORT_1',
 '200_TOP_SOLO_1',
 '200_MIDDLE_SOLO_1',
 '200_JUNGLE_NONE_1',
 '200_BOTTOM_DUO_CARRY_1',
 '200_BOTTOM_DUO_SUPPORT_1',
 '100_TOP_SOLO_2',
 '100_MIDDLE_SOLO_2',
 '100_JUNGLE_NONE_2',
 '100_BOTTOM_DUO_CARRY_2',
 '100_BOTTOM_DUO_SUPPORT_2',
 '200_TOP_SOLO_2',
 '200_MIDDLE_SOLO_2',
 '200_JUNGLE_NONE_2',
 '200_BOTTOM_DUO_CARRY_2',
 '200_BOTTOM_DUO_SUPPORT_2',
 '100_TOP_SOLO_3',
 '100_MIDDLE_SOLO_3',
 '100_JUNGLE_NONE_3',
 '100_BOTTOM_DUO_CARRY_3',
 '100_BOTTOM_DUO_SUPPORT_3',
 '200_TOP_SOLO_3',
 '200_MIDDLE_SOLO_3',
 '200_JUNGLE_NONE_3',
 '200_BOTTOM_DUO_CARRY_3',
 '200_BOTTOM_DUO_SUPPORT_3',
 '100_TOP_SOLO_4',
 '100_MIDDLE_SOLO_4',
 '100_J