In [1]:
import json

from zipfile import ZipFile, ZipInfo

import scipy as sp
import scipy.sparse
import numpy as np
import pandas as pd

In [7]:
# data_path = "../data/dota_games.zip"
# with ZipFile(data_path) as z:
#     z.extract('dota_games/5607724594.json', "../data/test_match.json")

### reconstruct heroes dict


In [8]:
# heroes_path = "../data/heroes.json"
# with open(heroes_path, 'r') as fp:
#     heroes = json.load(fp)
    
# new_heroes_d = {}
# for hero in heroes:
#     new_heroes_d[int(hero["id"])] = hero["name"]
    
# with open("../data/heroes_clean.json", "w") as fp:
#     fp.write(json.dumps(new_heroes_d))

In [10]:
n_matches = 10000
n_heroes = 119
max_hero_id = 128

test_match_path = "../data/test_match_5607724594.json"
heroes_path = "../data/heroes_clean.json"

with open(test_match_path, 'r') as fp:
    match = json.load(fp)
    
with open(heroes_path, 'r') as fp:
    heroes = json.load(fp)

In [15]:
def is_match_valid(match):
    # check if hero ids is within range
    players = match["result"]["players"]
    hero_ids = [True if 0 < x["hero_id"] and x["hero_id"] < max_hero_id else False for x in players]
    if not all(hero_ids):
        return False
    
    # check matchtype is not all pick, random draft and ranked matchmaking
    valid_game_modes = {1, 3, 22}
    if not match["result"]["game_mode"] in valid_game_modes:
        return False
    
    # check match duration less than 10 minutes
    if match["result"]["duration"] <= 600: 
        return False
    
    # check if match is not 10 players
    if not len(match["result"]["players"]) == 10:
        return False
    
    # check for leavers
    no_dc = [True if x["leaver_status"] in [0, 1] else False for x in players]
    if not all(no_dc):
        return False
    
    return True

In [17]:
data_path = "../data/dota_games.zip"
training_examples = []
n_samples = 10000

with ZipFile(data_path) as z:
    for item in z.filelist[1:n_samples]:
        match = z.read(item.filename)
        match = json.loads(match)
        players = match["result"]["players"]
    
        # check if match req is fulfilled
        if not is_match_valid(match):
            continue

        heroes_onehot = np.zeros(128, dtype=np.float)

        for player in players:
            hero_id = player["hero_id"]
            is_dire = 1 if player["player_slot"] < 7 else -1
            heroes_onehot[hero_id] = is_dire

        # stack X
        s_x = sp.sparse.csr_matrix(heroes_onehot)
        training_examples.append(s_x)
    
    sparse_training = sp.sparse.vstack(training_examples)

    # save 
    sp.sparse.save_npz("../data/training.npz", sparse_training)

    print(sparse_training)

  (0, 9)	-1.0
  (0, 16)	-1.0
  (0, 27)	1.0
  (0, 31)	1.0
  (0, 35)	-1.0
  (0, 36)	1.0
  (0, 41)	1.0
  (0, 67)	-1.0
  (0, 98)	1.0
  (0, 103)	-1.0
  (1, 10)	1.0
  (1, 14)	1.0
  (1, 18)	1.0
  (1, 28)	-1.0
  (1, 40)	1.0
  (1, 63)	-1.0
  (1, 67)	-1.0
  (1, 75)	-1.0
  (1, 97)	1.0
  (1, 100)	-1.0
  (2, 14)	-1.0
  (2, 22)	1.0
  (2, 41)	-1.0
  (2, 49)	1.0
  (2, 63)	-1.0
  :	:
  (6895, 44)	-1.0
  (6895, 60)	-1.0
  (6895, 71)	-1.0
  (6895, 99)	1.0
  (6895, 112)	1.0
  (6896, 14)	1.0
  (6896, 16)	-1.0
  (6896, 40)	1.0
  (6896, 45)	-1.0
  (6896, 51)	-1.0
  (6896, 52)	-1.0
  (6896, 54)	1.0
  (6896, 61)	1.0
  (6896, 94)	-1.0
  (6896, 121)	1.0
  (6897, 5)	-1.0
  (6897, 8)	-1.0
  (6897, 13)	-1.0
  (6897, 14)	1.0
  (6897, 60)	1.0
  (6897, 67)	1.0
  (6897, 94)	1.0
  (6897, 98)	-1.0
  (6897, 106)	-1.0
  (6897, 110)	1.0


### test code

In [118]:
training_examples = []

for match in matches:
    players = match["result"]["players"]
    
    # check if match req is fulfilled
    if not is_match_valid(match):
        continue
    
    heroes_onehot = np.zeros(128, dtype=np.float)
    
    for player in players:
        hero_id = player["hero_id"]
        is_dire = 1 if player["player_slot"] < 7 else -1
        heroes_onehot[hero_id] = is_dire
        
    # stack X
    s_x = sp.sparse.csr_matrix(heroes_onehot)
    training_examples.append(s_x)
    
     
sparse_training = sp.sparse.vstack(training_examples)

# save 
sp.sparse.save_npz("../data/training.npz", sparse_training)

print(sparse_training)

  (0, 9)	-1.0
  (0, 16)	-1.0
  (0, 27)	1.0
  (0, 31)	1.0
  (0, 35)	-1.0
  (0, 36)	1.0
  (0, 41)	1.0
  (0, 67)	-1.0
  (0, 98)	1.0
  (0, 103)	-1.0


In [119]:
a = sp.sparse.load_npz("../data/training.npz")
print(a)

  (0, 9)	-1.0
  (0, 16)	-1.0
  (0, 27)	1.0
  (0, 31)	1.0
  (0, 35)	-1.0
  (0, 36)	1.0
  (0, 41)	1.0
  (0, 67)	-1.0
  (0, 98)	1.0
  (0, 103)	-1.0


In [108]:
# check if hero_id matches the match info
row, col = sparse_training.nonzero()
    
for i in col:
    print(heroes[str(i)])

Mirana
Sand King
Shadow Shaman
Lich
Sniper
Necrophos
Faceless Void
Spectre
Timbersaw
Skywrath Mage
Elder Titan
