In [1]:
import json
import numpy as np
import pandas as pd
from itertools import combinations

from atod import Hero, Heroes

In [2]:
n_heroes = 115

In [3]:
with open('data/players_in_matches.json', 'r') as fp:
    players_in_matches = json.load(fp)

In [4]:
''' Print some info about dataset:
    * first match date
    * last match date
    * number of matches
'''

' Print some info about dataset:\n    * first match date\n    * last match date\n    * number of matches\n'

In [5]:
matches = dict()

for record in players_in_matches:
    # create match in matches dictionary with arrays for
    # winners and losers ids
    matches.setdefault(str(record['match_id']), 
                       {
                        'winners': [],
                        'loosers': [],
                       }
                      )
    if record['win']:
        # add hero to winners of this match
        matches[str(record['match_id'])]['winners'].append(record['hero_id'])
    else:
        # add hero to losers
        matches[str(record['match_id'])]['loosers'].append(record['hero_id'])

# length of matches should be 10 times smaller than length of players...
# since there are 10 players in each match
assert len(matches), len(players_in_matches) / 10

In [6]:
# crete and fill 
matches_played = np.zeros((n_heroes, n_heroes))
matches_won = np.zeros((n_heroes, n_heroes))

for match in matches.values():
    # for winners
    # sorting is needed to have upper traingular matrix
    # combinations produces all heroes pairs with smaller id first
    for hero1, hero2 in combinations(sorted(match['winners']), 2):
        matches_played[hero1][hero2] += 1
        matches_won[hero1][hero2] += 1
        
    for hero1, hero2 in combinations(sorted(match['loosers']), 2):
        matches_played[hero1][hero2] += 1

In [7]:
# minimum number of matches for pair of heroes to be included in dataset
min_matches_played = 10
max_matches_played = max([max(a) for a in matches_played])
were_nulls = sum([a.shape[0] - np.count_nonzero(a) for a in matches_played])

# if combination of 2 heroes were used less than `min_matches` times,
# don't count their winrate (it would be NaN in `winrate` matrix)
matches_played[matches_played < min_matches_played] = np.NaN
print(matches_played)
become_nulls = sum([a.shape[0] - np.count_nonzero(a) for a in matches_played])

print(become_nulls - were_nulls)

[[  nan   nan   nan ...,   nan   nan   nan]
 [  nan   nan  142. ...,  142.   nan   20.]
 [  nan   nan   nan ...,  109.   nan   51.]
 ..., 
 [  nan   nan   nan ...,   nan   nan   nan]
 [  nan   nan   nan ...,   nan   nan   nan]
 [  nan   nan   nan ...,   nan   nan   nan]]
-6952


In [8]:
# find maximum amount of matches played by 2 heroes
max_matches_played = np.nanmax([np.nanmax(hero) 
                                for hero in matches_played])

# some combinations were played more than another, so
# there is more confidence in picking this kind of heroes (tiny-wi)

  app.launch_new_instance()


In [9]:
winrate_ = (matches_won / matches_played) * (1 + matches_played / max_matches_played)
winrate = pd.DataFrame(winrate_)
winrate.dropna(axis=0, how='all', inplace=True)
winrate.dropna(axis=1, how='all', inplace=True)
winrate.head()

Unnamed: 0,2,3,4,5,6,7,8,9,10,11,...,105,106,107,108,109,110,111,112,113,114
1,0.532759,0.563166,,0.56124,,0.634433,0.549627,0.598962,,0.613611,...,,0.47444,0.471726,,,0.483847,0.510187,0.60997,,0.304073
2,,0.588971,0.30083,0.639464,0.635565,0.497665,0.680133,0.68867,0.644553,0.637646,...,,0.501392,0.551679,,0.581264,0.513059,0.512738,0.472954,,0.507168
3,,,0.478933,0.590084,0.624485,0.55261,0.579205,0.883181,0.481909,0.685494,...,,0.608398,0.610273,0.503394,0.68296,0.394246,0.385183,0.482209,,
4,,,,0.504976,,0.552376,0.527425,0.449778,0.366352,0.493585,...,0.310408,0.655077,0.60611,,,0.494054,0.56861,0.496734,,
5,,,,,0.537561,0.554786,0.766948,0.612663,0.547021,0.711811,...,,0.669742,0.695146,0.592865,0.707216,0.548887,0.504073,0.464729,,0.585847


In [10]:
n = winrate.shape[0]
# how many heroes pairs don't have enough matches to have
# meaningful winrate
n_bad_pairs = n**2 - winrate.count().sum() - (n**2 - n)/2
n_pairs = (n**2 - n)/2
print('Percent of pairs with not enough matches to count them:', 
      n_bad_pairs / n_pairs)

Percent of pairs with not enough matches to count them: 0.0346955796497


## Building a pick
Idea: user gives 2 heroes as input, after that algorithms searches for the best next hero till there are 5 of them. The best hero would be choosen by maximazing the weight of edges in heroes graph. Heroes graph -- vertices are rows in winrate matrix and edges are winrates of heroes pairs.

In [11]:
def get_next_hero(opening):
    best_connection = 0
    next_pick = 0

    for hero_id in winrate.index:
        # if this hero is not in the opening
        if hero_id not in opening:
            # check if this hero fits the best
            ids = sorted([*opening, hero_id])

            # count sum of all created edges
            total_connection = sum([winrate.loc[hero1][hero2]
                                    for hero1, hero2 in combinations(ids, 2)])

            if total_connection > best_connection:
                next_pick = hero_id
                best_connection = total_connection

    return next_pick.item()

In [14]:
pick = Heroes()
pick.add(Hero.from_name('Centaur Warrunner'))
pick.add(Hero.from_name('Shadow Fiend'))

while len(pick) < 5:
    next_hero = get_next_hero(pick.get_ids())
    pick.add(Hero(next_hero))
    
print(pick.get_names())

['Centaur Warrunner', 'Shadow Fiend', 'Rubick', 'Clockwerk', 'Juggernaut']
