In [1]:
import sys
sys.path.append('../')

import json
import numpy as np
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt

from src.database import MongoDB

from sklearn.preprocessing import OneHotEncoder

pd.options.display.max_columns = 100

%matplotlib inline

In [2]:
def hotencode(df):
    unq, idx = np.unique(df, return_inverse=1)
    col_idx = idx.reshape(df.shape)
    out = np.zeros((len(col_idx),col_idx.max()+1),dtype=int)
    out[np.arange(len(col_idx))[:,None], col_idx] = 1
    return pd.DataFrame(out, columns=unq, index=df.index)

In [3]:
radiant_cols = ['hero_' + str(i) for i in range(5)]
dire_cols = ['hero_' + str(i) for i in range(5, 10)]
hero_cols = ['hero_' + str(i) for i in range(10)]

## Overview

In [4]:
df = pd.read_csv('../data/matches_data.csv')
df.shape

(35835, 12)

In [5]:
df.head()

Unnamed: 0,match_id,hero_0,hero_1,hero_2,hero_3,hero_4,hero_5,hero_6,hero_7,hero_8,hero_9,radiant_win
0,4154000815,83,64,93,99,22,110,32,41,92,11,False
1,4152494309,23,120,82,18,26,108,31,7,14,44,False
2,4152425012,121,18,106,21,86,10,82,112,110,14,False
3,4152417604,77,93,45,26,7,119,14,4,106,70,False
4,4152114308,9,114,37,56,96,74,6,2,112,121,False


### Any null values?

In [6]:
df.isnull().sum()

match_id       0
hero_0         0
hero_1         0
hero_2         0
hero_3         0
hero_4         0
hero_5         0
hero_6         0
hero_7         0
hero_8         0
hero_9         0
radiant_win    0
dtype: int64

### Percentage of radiants winning

In [7]:
df.radiant_win.mean()

0.5386354123064043

It's almost about 50 50, so doesn't seem like there is any major difference in terms of team played.

### Are all heroes used in the data matches?

In [8]:
radiants = df[radiant_cols]
print('Number of unique heroes used in radiants:', len(set(radiants.values.ravel())))

dires = df[dire_cols]
print('Number of unique heroes used in dires:', len(set(dires.values.ravel())))

Number of unique heroes used in radiants: 116
Number of unique heroes used in dires: 116


Apparently, all heroes have been used in as both radiants and dires.

### Check if there are any matches with the same sets of heroes that have different outcomes.

In [9]:
df['radiants_team'] = list(map(sorted, df[radiant_cols].values.tolist()))
df['radiants_team'] = df['radiants_team'].map(lambda x: ', '.join(map(str, x)))

df['dires_team'] = list(map(sorted, df[dire_cols].values.tolist()))
df['dires_team'] = df['dires_team'].map(lambda x: ', '.join(map(str, x)))

In [10]:
df.drop_duplicates(subset=['radiants_team', 'dires_team']).shape

(35835, 14)

Since after dropping duplicates based on `radiants_team` and `dires_team`, we still get the original shape of the df which means that all the matches have different combinations of heroes in both radiants and dires.

# Hero

In [11]:
with open('../data/hero_stats.json', 'r') as f:
    heroes_stats = json.load(f)
    heroes_stats = pd.DataFrame(heroes_stats).set_index('id')
    
with open('../data/hero_data.json', 'r') as f:
    heroes_data = json.load(f)
    heroes_data = pd.DataFrame(heroes_data).set_index('id')
    
assert len(heroes_stats) == len(heroes_data)

no_heroes = len(heroes_stats)

heroes = heroes_stats.merge(heroes_data, left_index=True, right_index=True)
heroes.head()

Unnamed: 0_level_0,1_pick,1_win,2_pick,2_win,3_pick,3_win,4_pick,4_win,5_pick,5_win,6_pick,6_win,7_pick,7_win,8_pick,8_win,agi_gain,attack_range,attack_rate,attack_type_x,base_agi,base_armor,base_attack_max,base_attack_min,base_health,base_health_regen,base_int,base_mana,base_mana_regen,base_mr,base_str,cm_enabled,hero_id,icon,img,int_gain,legs_x,localized_name_x,move_speed,name_x,primary_attr_x,pro_ban,pro_pick,pro_win,projectile_speed,roles_x,str_gain,turn_rate,attack_type_y,legs_y,localized_name_y,name_y,primary_attr_y,roles_y
id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1,Unnamed: 23_level_1,Unnamed: 24_level_1,Unnamed: 25_level_1,Unnamed: 26_level_1,Unnamed: 27_level_1,Unnamed: 28_level_1,Unnamed: 29_level_1,Unnamed: 30_level_1,Unnamed: 31_level_1,Unnamed: 32_level_1,Unnamed: 33_level_1,Unnamed: 34_level_1,Unnamed: 35_level_1,Unnamed: 36_level_1,Unnamed: 37_level_1,Unnamed: 38_level_1,Unnamed: 39_level_1,Unnamed: 40_level_1,Unnamed: 41_level_1,Unnamed: 42_level_1,Unnamed: 43_level_1,Unnamed: 44_level_1,Unnamed: 45_level_1,Unnamed: 46_level_1,Unnamed: 47_level_1,Unnamed: 48_level_1,Unnamed: 49_level_1,Unnamed: 50_level_1,Unnamed: 51_level_1,Unnamed: 52_level_1,Unnamed: 53_level_1,Unnamed: 54_level_1
1,3819,1861,17546,8841,38115,19515,54088,28335,38746,20537,10384,5525,1565,824,149,83,2.8,150,1.4,Melee,22,-1.0,33,29,200,1.75,12,75,0.9,25,23,True,1.0,/apps/dota2/images/heroes/antimage_icon.png,/apps/dota2/images/heroes/antimage_full.png?,1.8,2,Anti-Mage,310,npc_dota_hero_antimage,agi,20.0,17.0,10.0,0,"[Carry, Escape, Nuker]",1.3,0.5,Melee,2,Anti-Mage,npc_dota_hero_antimage,agi,"[Carry, Escape, Nuker]"
2,6126,3331,26952,14620,51597,27905,68232,36490,48797,25890,14492,7616,2839,1425,339,166,2.2,150,1.7,Melee,20,-2.0,28,24,200,4.25,18,75,0.9,25,25,True,2.0,/apps/dota2/images/heroes/axe_icon.png,/apps/dota2/images/heroes/axe_full.png?,1.6,2,Axe,290,npc_dota_hero_axe,str,43.0,57.0,33.0,900,"[Initiator, Durable, Disabler, Jungler]",2.8,0.6,Melee,2,Axe,npc_dota_hero_axe,str,"[Initiator, Durable, Disabler, Jungler]"
3,712,337,2982,1324,6645,2902,10286,4727,9229,4308,3691,1819,1186,577,319,174,2.4,400,1.7,Ranged,23,1.0,41,35,200,1.5,23,75,0.9,25,23,True,3.0,/apps/dota2/images/heroes/bane_icon.png,/apps/dota2/images/heroes/bane_full.png?,2.4,4,Bane,310,npc_dota_hero_bane,int,48.0,87.0,44.0,900,"[Support, Disabler, Nuker, Durable]",2.4,0.6,Ranged,4,Bane,npc_dota_hero_bane,int,"[Support, Disabler, Nuker, Durable]"
4,6735,3505,24397,12680,41077,21296,50922,26689,35503,18452,12009,6279,2948,1555,514,276,3.0,150,1.7,Melee,24,0.0,39,33,200,1.5,18,75,0.9,25,24,True,4.0,/apps/dota2/images/heroes/bloodseeker_icon.png,/apps/dota2/images/heroes/bloodseeker_full.png?,1.7,2,Bloodseeker,285,npc_dota_hero_bloodseeker,agi,89.0,53.0,26.0,900,"[Carry, Disabler, Jungler, Nuker, Initiator]",2.7,0.5,Melee,2,Bloodseeker,npc_dota_hero_bloodseeker,agi,"[Carry, Disabler, Jungler, Nuker, Initiator]"
5,4001,2191,18742,10161,39066,21039,58829,31605,48403,25501,17035,8948,3931,2096,675,368,1.6,600,1.7,Ranged,16,0.0,32,26,200,1.5,14,75,0.9,25,18,True,5.0,/apps/dota2/images/heroes/crystal_maiden_icon.png,/apps/dota2/images/heroes/crystal_maiden_full....,2.9,2,Crystal Maiden,275,npc_dota_hero_crystal_maiden,int,5.0,67.0,41.0,900,"[Support, Disabler, Nuker, Jungler]",2.0,0.5,Ranged,2,Crystal Maiden,npc_dota_hero_crystal_maiden,int,"[Support, Disabler, Nuker, Jungler]"
