In [1]:
import os
import sys
import pandas as pd
import torch
import torch.nn as nn
import numpy as np
import matplotlib.pyplot as plt
%matplotlib inline

sys.path.append(os.path.join(os.path.dirname(os.getcwd()), 'src'))
from database import DB
from analysis import MLP, xyFeature
from analysis.feature import expand_N_last_game, num_win_N_last_game, win_rate_N_last_game
from utils import normal

In [2]:
pd.set_option('mode.chained_assignment', None)
pd.set_option('display.max_columns', None)

In [3]:
db = DB()
db.initialise()

temp_df = db.get_table('view_set_match_info_concat')

In [4]:
col_to_use = ['league_name', 'year', 'season', 'date', 'set_number', 
              'team_1', 'team_2', 'corresponding_team',
              'wdl', 'side', 'game_length','team_kills',
              'team_deaths','team_baron_kills', 'team_dragon_kills',
              'team_rift_kills', 'team_tower_kills', 'team_inhib_kills',
             'team_total_gold', 'team_minion_kills']
df = temp_df[col_to_use]
df = df.dropna(subset=col_to_use)
#, 'top_player', 'jg_player', 'mid_player', 'bot_player', 'sup_player'

In [5]:
list(temp_df.columns)

['league_name',
 'year',
 'season',
 'date',
 'set_number',
 'team_1',
 'team_2',
 'corresponding_team',
 'tiebreaker',
 'match_round',
 'wdl',
 'side',
 'game_length',
 'top_player',
 'jg_player',
 'mid_player',
 'bot_player',
 'sup_player',
 'ban1',
 'ban2',
 'ban3',
 'ban4',
 'ban5',
 'team_kills',
 'team_deaths',
 'team_double',
 'team_triple',
 'team_quadra',
 'team_penta',
 'team_kpm',
 'team_first_blood',
 'team_first_baron',
 'team_first_dragon',
 'team_first_elder',
 'team_first_rift',
 'team_first_tower',
 'team_first_inhib',
 'team_first_midtower',
 'team_first_three_towers',
 'team_baron_kills',
 'team_dragon_kills',
 'team_elder_kills',
 'team_rift_kills',
 'team_tower_kills',
 'team_inhib_kills',
 'team_total_gold',
 'team_earned_gold',
 'team_minion_kills',
 'team_monster_kills',
 'team_goldat10',
 'team_goldat15',
 'team_golddiffat10',
 'team_golddiffat15',
 'team_csat10',
 'team_csat15',
 'team_csdiffat10',
 'team_csdiffat15']

In [6]:
gamelength_norm_cols = ['team_kills','team_deaths','team_baron_kills', 
                        'team_dragon_kills','team_rift_kills', 
                        'team_total_gold','team_minion_kills']
df[gamelength_norm_cols] = normal(df, gamelength_norm_cols, 'game_length')


min_max_cols = ['team_tower_kills', 'team_inhib_kills']
df[min_max_cols] = normal(df, min_max_cols, 'minmax')

In [7]:
df = expand_N_last_game(df, gamelength_norm_cols+min_max_cols, 3)
#df = expand_N_last_game(df, ['top_player', 'jg_player', 'mid_player', 'bot_player', 'sup_player', 'side'], 1)

In [8]:
df.head()

Unnamed: 0,league_name,year,season,date,set_number,team_1,team_2,corresponding_team,wdl,side,game_length,team_kills,team_deaths,team_baron_kills,team_dragon_kills,team_rift_kills,team_tower_kills,team_inhib_kills,team_total_gold,team_minion_kills,team_kills__1,team_deaths__1,team_baron_kills__1,team_dragon_kills__1,team_rift_kills__1,team_total_gold__1,team_minion_kills__1,team_tower_kills__1,team_inhib_kills__1,team_kills__2,team_deaths__2,team_baron_kills__2,team_dragon_kills__2,team_rift_kills__2,team_total_gold__2,team_minion_kills__2,team_tower_kills__2,team_inhib_kills__2,team_kills__3,team_deaths__3,team_baron_kills__3,team_dragon_kills__3,team_rift_kills__3,team_total_gold__3,team_minion_kills__3,team_tower_kills__3,team_inhib_kills__3
0,BL,2020,spring,2020-01-21,1,Sector One,Aethra,Sector One,W,blue,1611.0,0.819367,0.297952,0.037244,0.111732,0.037244,1.0,0.1875,1977.653631,23.836127,,,,,,,,,,,,,,,,,,,,,,,,,,,
1,BL,2020,spring,2020-01-21,1,Sector One,Timeout Esports,Sector One,W,blue,1887.0,0.540541,0.476948,0.063593,0.063593,0.031797,0.909091,0.1875,1869.63434,23.879173,0.819367,0.297952,0.037244,0.111732,0.037244,1977.653631,23.836127,1.0,0.1875,,,,,,,,,,,,,,,,,,
2,BL,2020,spring,2020-01-28,1,RSCA,Sector One,Sector One,W,red,1598.0,1.314143,0.375469,0.037547,0.075094,0.075094,0.909091,0.125,2147.684606,22.866083,0.540541,0.476948,0.063593,0.063593,0.031797,1869.63434,23.879173,0.909091,0.1875,0.819367,0.297952,0.037244,0.111732,0.037244,1977.653631,23.836127,1.0,0.1875,,,,,,,,,
3,BL,2020,spring,2020-02-04,1,KV Mechelen,Sector One,Sector One,W,blue,1465.0,0.737201,0.327645,0.040956,0.122867,0.081911,0.909091,0.125,1998.634812,25.96587,1.314143,0.375469,0.037547,0.075094,0.075094,2147.684606,22.866083,0.909091,0.125,0.540541,0.476948,0.063593,0.063593,0.031797,1869.63434,23.879173,0.909091,0.1875,0.819367,0.297952,0.037244,0.111732,0.037244,1977.653631,23.836127,1.0,0.1875
4,BL,2020,spring,2020-02-11,1,Brussels Guardians,Sector One,Sector One,W,red,1252.0,1.102236,0.239617,0.0,0.095847,0.047923,0.818182,0.125,2103.833866,26.166134,0.737201,0.327645,0.040956,0.122867,0.081911,1998.634812,25.96587,0.909091,0.125,1.314143,0.375469,0.037547,0.075094,0.075094,2147.684606,22.866083,0.909091,0.125,0.540541,0.476948,0.063593,0.063593,0.031797,1869.63434,23.879173,0.909091,0.1875


In [9]:
df = num_win_N_last_game(df, rolling_window=3)

In [10]:
df = win_rate_N_last_game(df, rolling_window=3)
df = win_rate_N_last_game(df, opponent=True)

In [11]:
df.shape

(23464, 50)

In [12]:
df = df.dropna()

In [13]:
drop_cols = ['league_name', 'year', 'season', 'date', 'set_number', 
             'team_1', 'team_2' ,'corresponding_team', 'game_length',
            'team_kills', 'team_deaths', 'team_baron_kills', 
            'team_dragon_kills', 'team_rift_kills', 'team_total_gold',
            'team_minion_kills', 'team_tower_kills', 'team_inhib_kills']
#cat_cols = ['side', 'top_player', 'jg_player', 'mid_player', 'bot_player', 'sup_player']
cat_cols = ['side']

y_col = ['wdl']

cont_cols = list(set(list(df.columns)) - set(drop_cols) - set(cat_cols) - set(y_col))

In [14]:
xy = xyFeature(df, cat_cols, cont_cols, y_col, gpu=False)
cats, conts, y = xy.prepare_XY()
emb_szs, n_cont, out_sz = xy.get_size()

In [15]:
torch.manual_seed(327)
model = MLP(emb_szs, n_cont, out_sz, [200, 100, 100], p=0.5)

criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(model.parameters(), lr=0.01)

In [16]:
#batch_size = temp_df.shape[0]
train_size = df[df['year']!=2020].shape[0]
val_size = df[(df['year']==2020) & (df['season']=='spring')].shape[0]
test_size = df[(df['year']==2020) & (df['season']=='summer')].shape[0]

cat_train = cats[:train_size]
con_train = conts[:train_size]
y_train = y[:train_size]

cat_valid = cats[train_size: train_size+val_size]
con_valid = conts[train_size: train_size+val_size]
y_valid = y[train_size: train_size+val_size]

cat_test = cats[train_size+val_size: train_size+val_size+test_size]
con_test = conts[train_size+val_size: train_size+val_size+test_size]
y_test = y[train_size+val_size: train_size+val_size+test_size]

In [17]:
print(train_size, val_size, test_size)

12999 2295 2597


In [18]:
import time
start_time = time.time()

epochs = 10000
costs = []
val_costs = []
count_epsilon = 0

for epoch in range(epochs):
    y_pred = model.forward(cat_train, con_train)
    cost = criterion(y_pred, y_train)
    costs.append(cost)
    
    if epoch % 100 == 0:
        print(f'epoch: {epoch}, cost: {cost:.8f}')
        
        with torch.no_grad():
            y_pred = model.forward(cat_valid, con_valid)
            cost = criterion(y_pred, y_valid)
            
            if len(val_costs) > 0:
                val_costs.append(cost)
                if torch.abs(val_costs[-1] - cost) < 0.00001:
                    count_epsilon += 1
                    if count_epsilon > 20:
                        optimizer.param_groups[0]['lr'] *= 0.7
                        count_epsilon = 0
    optimizer.zero_grad()
    cost.backward()
    optimizer.step()
    
print(f'Duration: {(time.time() - start_time):.0f} seconds')

epoch: 0, cost: 0.71821171


RuntimeError: element 0 of tensors does not require grad and does not have a grad_fn

In [None]:
plt.plot(range(epochs), costs)
plt.ylabel('CE Cost')
plt.xlabel('epoch')

In [None]:
with torch.no_grad():
    y_pred = model(cat_test, con_test)
    cost = criterion(y_pred, y_test)
print(f'Cross Entropy Loss: {cost:.8f}')
    

In [None]:
rows = 2000
correct = 0
print(f'{"Model output":36} ARGMAX    Y_test')
for i in range(rows):
    print(f'{str(y_pred[i]):36}  {y_pred[i].argmax():^7}{y_test[i]:^7}')
    if y_pred[i].argmax().item() == y_test[i]:
        correct += 1
print(f'\n{correct} out of {rows} = {100*correct/rows:.2f}% correct')


In [None]:
import time
start_time = time.time()

epochs = 3000
costs = []

for epoch in range(epochs):
    model.train()
    y_pred = model.forward(cat_train, con_train)
    cost = criterion(y_pred, y_train)
    costs.append(cost)
    
    if epoch % 100 == 0:
        print(f'epoch: {epoch}, cost: {cost:.8f}')
    optimizer.zero_grad()
    cost.backward()
    optimizer.step()
    
print(f'Duration: {(time.time() - start_time):.0f} seconds')