In [1]:
import json
import numpy as np
import pandas as pd
from itertools import combinations

In [2]:
n_heroes = 115

In [3]:
with open('data/players_in_matches.json', 'r') as fp:
    players_in_matches = json.load(fp)

In [4]:
''' Print some info about dataset:
    * first match date
    * last match date
    * number of matches
'''

' Print some info about dataset:\n    * first match date\n    * last match date\n    * number of matches\n'

In [5]:
matches = dict()

for record in players_in_matches:
    # create match in matches dictionary with arrays for
    # winners and losers ids
    matches.setdefault(str(record['match_id']), 
                       {
                        'winners': [],
                        'loosers': [],
                       }
                      )
    if record['win']:
        # add hero to winners of this match
        matches[str(record['match_id'])]['winners'].append(record['hero_id'])
    else:
        # add hero to losers
        matches[str(record['match_id'])]['loosers'].append(record['hero_id'])

# length of matches should be 10 times smaller than length of players...
# since there are 10 players in each match
assert len(matches), len(players_in_matches) / 10

In [6]:
# crete and fill 
matches_played = np.zeros((n_heroes, n_heroes))
matches_won = np.zeros((n_heroes, n_heroes))

for match in matches.values():
    # for winners
    # sorting is needed to have upper traingular matrix
    # combinations produces all heroes pairs with smaller id first
    for hero1, hero2 in combinations(sorted(match['winners']), 2):
        matches_played[hero1][hero2] += 1
        matches_won[hero1][hero2] += 1
        
    for hero1, hero2 in combinations(sorted(match['loosers']), 2):
        matches_played[hero1][hero2] += 1

In [7]:
# minimum number of matches for pair of heroes to be included in dataset
min_matches = 10
were_nulls = sum([a.shape[0] - np.count_nonzero(a) for a in matches_played])

# if combination of 2 heroes were used less than `min_matches` times,
# don't count their winrate (it would be NaN in `winrate` matrix)
matches_played[matches_played < min_matches] = np.NaN
print(matches_played)
become_nulls = sum([a.shape[0] - np.count_nonzero(a) for a in matches_played])

print(become_nulls - were_nulls)

[[  nan   nan   nan ...,   nan   nan   nan]
 [  nan   nan  142. ...,  142.   nan   20.]
 [  nan   nan   nan ...,  109.   nan   51.]
 ..., 
 [  nan   nan   nan ...,   nan   nan   nan]
 [  nan   nan   nan ...,   nan   nan   nan]
 [  nan   nan   nan ...,   nan   nan   nan]]
-6952


In [8]:
# find maximum amount of matches played by 2 heroes
print(max([max(hero) for hero in matches_played]))

nan


In [9]:
winrate_ = matches_won / matches_played
winrate = pd.DataFrame(winrate_)
winrate.dropna(axis=0, how='all', inplace=True)
winrate.dropna(axis=1, how='all', inplace=True)
winrate.head()

Unnamed: 0,2,3,4,5,6,7,8,9,10,11,...,105,106,107,108,109,110,111,112,113,114
1,0.485915,0.514286,,0.50625,,0.52649,0.533333,0.541935,,0.558621,...,,0.45,0.441176,,,0.465517,0.482353,0.556338,,0.3
2,,0.554348,0.292683,0.572254,0.577181,0.425703,0.512448,0.55493,0.567839,0.507979,...,,0.439614,0.485149,,0.537815,0.466216,0.4625,0.440367,,0.490196
3,,,0.46875,0.556818,0.580357,0.489474,0.492308,0.583113,0.45679,0.557864,...,,0.510638,0.581081,0.5,0.666667,0.37931,0.375,0.45098,,
4,,,,0.489362,,0.503497,0.517241,0.430769,0.363636,0.474576,...,0.307692,0.642857,0.6,,,0.483871,0.5625,0.472973,,
5,,,,,0.519231,0.469925,0.571429,0.490463,0.488636,0.582822,...,,0.557047,0.596708,0.580645,0.657658,0.531915,0.5,0.454545,,0.539683


In [22]:
n = winrate.shape[0]
# how many heroes pairs don't have enough matches to have
# meaningful winrate
n_bad_pairs = n**2 - winrate.count().sum() - (n**2 - n)/2
n_pairs = (n**2 - n)/2
print('Percent of pairs with not enough matches to count them:', 
      n_bad_pairs / n_pairs)

Percent of pairs with not enough matches to count them: 0.0346955796497


In [40]:
print(winrate.loc[26,:].argmax())
print(matches_played[26][66], winrate_[26][66])

66
269.0 0.620817843866
