# Hero/Item Analysis

Visualizes and analyzes a subset of the data based on hero and items for win rate. The database must contains matches where a single hero appear in each match, see the command line options in ``fetch.py`` for details.

In [None]:
import sqlite3
import meta
import os
import ujson as json
import pandas as pd

## Parameters

In [None]:
# Parameters for analysis
#DB_FILES = ['matches_2_2019122810.db', 'matches_2_2019122912.db']            
DB_FILES = ['matches_3_2019123112.db']
HERO = 'juggernaut'
#ITEMS = ['phase_boots', 'power_treads', 'travel_boots', 'boots']
#ITEMS = ['diffusal_blade']
ITEMS = ['bfury']
#ITEMS = ['maelstrom','mjollnir']


In [None]:
rows=[]
for db_file in DB_FILES:
    conn = sqlite3.connect(db_file)
    c = conn.cursor()
    c.execute("SELECT match_id, radiant_heroes, dire_heroes, items, gold_spent, radiant_win FROM {}".format(
        os.environ['DOTA_SQL_STATS_TABLE']))
    rows.extend(c.fetchall())
    conn.close()
    
print("{0} matches found in database".format(len(rows)))

In [None]:
# Debug for finding item names
meta.ITEMS

In [None]:
items_tuple=[(meta.ITEMS[t]['id'], t) for t in ITEMS]
items_tuple

In [None]:
# Process all matches
dd=[]
target_hero_num = meta.REVERSE_HERO_DICT[HERO.lower()]
for match_id, radiant_heroes, dire_heroes, items, gold_spent, radiant_win in rows:
    
    # Load team and enemy
    rhs=json.loads(radiant_heroes)
    dhs=json.loads(dire_heroes)
    
    if target_hero_num in rhs:
        team = rhs
        enemy = dhs
        team_win = radiant_win        
    else:
        team = dhs
        enemy = rhs
        team_win = not(radiant_win)        

    # Metrics on spent gold
    gs=json.loads(gold_spent)
    team_gold_spent = 0
    enemy_gold_spent = 0
    
    for th in team:
        if th is not target_hero_num:
            team_gold_spent = team_gold_spent + gs[str(th)]
        else:
            hero_gold_spent = gs[str(th)]
    
    enemy_gold_spent = 0
    for eh in enemy:
        enemy_gold_spent = enemy_gold_spent + gs[str(eh)]
        
    # Presence of selected items, default to not having item in list
    items_dict=json.loads(items)
    item_value = 'none'
    for item_num, item_name in items_tuple:
        if item_num in items_dict[str(target_hero_num)]:
            item_value=item_name

    # Create row for the dataframe
    
    if team_win:
        team_win = 'yes'
    else:
        team_win = 'no'
    
    dd.append((match_id,
               target_hero_num in rhs,               
               hero_gold_spent,
               team_gold_spent,
               enemy_gold_spent, 
               team_win, 
               item_value ))

In [None]:
# Calculate some additional metrics
df=pd.DataFrame(dd, columns=['match_id', 'hero_radiant', 'hero_gold_spent', 
                             'team_gold_spent', 'enemy_gold_spent', 'team_win', 'item' ])
df['gold_spent_ratio']=df['team_gold_spent']/(df['team_gold_spent']+df['enemy_gold_spent'])
df['total_gold_spent']=df['team_gold_spent']+df['enemy_gold_spent']

In [None]:
import plotly.express as px
fig=px.scatter(df, x='gold_spent_ratio', y='total_gold_spent', color='team_win', 
               facet_col='item', width=1000, height=700, marginal_x="violin", marginal_y="violin")
fig.update_traces(marker=dict(size=6,
                              line=dict(width=1,
                                        color='DarkSlateGrey')),
                  selector=dict(mode='markers'))

fig.show()

In [None]:
from sklearn.linear_model import LogisticRegression
from sklearn.preprocessing import OneHotEncoder
import numpy as np

In [None]:
lr=LogisticRegression()
ohe=OneHotEncoder()

In [None]:
ohe.fit(df['item'].values.reshape(-1,1))

In [None]:
X2=ohe.transform(df['item'].values.reshape(-1,1)).todense()

In [None]:
X1=df['gold_spent_ratio'].values

In [None]:
X=np.concatenate([X1.reshape(-1,1),X2], axis=1)
y=df['team_win'].values

In [None]:
lr.fit(X,y)

In [None]:
lr.coef_

In [None]:
labels=['gold_spent_ratio']+ohe.categories_[0].tolist()

In [None]:
for a, b in zip(labels,lr.coef_.flatten().tolist()):
    print("{0:20} {1:7.4f}".format(a,b))