In [1]:
import os
import time
import gzip
import json
import numpy as np
import pickle 
import tarfile
from collections import defaultdict
from pprint import pprint
from collections import Counter
from itertools import permutations
from multiprocessing import Pool, TimeoutError

class PositionOptimizer:
    def __init__(self, clfs_path: str, hero_path: str) -> None:
        self.clfs_path = clfs_path
        self.hero_path = hero_path
        self.opendota_data = {}
        self.clfs = defaultdict(list)
        self.role_counts = defaultdict(dict)
        self.hero_data = {}

        self._load_clfs(clfs_path)
        self._load_hero_data(hero_path)
        self._load_opendota_data()
        self.hid_to_name = {h["id"]: h["localized_name"] for h in self.hero_data}

    def find_optimal_roles(self, match):
        players = match["players"]
        t0 = [p for p in players if p["player_slot"] < 128]
        t1 = [p for p in players if p["player_slot"] >= 128]
        team_optimal_positions = {}

        for team, marker in zip([t0, t1], [0, 1]):
            # !Ranks
            attributes = ["gold_per_min", "xp_per_min", "kills", "deaths", "assists", "last_hits", "hero_damage", "tower_damage"]
            ranks = {attr: sorted([(p["hero_id"], p[attr]) for p in team], key=lambda x: x[1], reverse=True) for attr in attributes}
            hids = [p["hero_id"] for p in team]
            team_position_proba = defaultdict(list)

            # !Create c
            for p in team:
                features = []
                hid = p["hero_id"]

                teammates = np.zeros((136))
                for team_hid in hids:
                    if team_hid != hid:
                        teammates[team_hid] = 1.
                
                features.append(teammates)
                
                for rank in ranks: 
                    r = ranks[rank].index((hid, p[rank]))
                    features.append(self._get_rank(r))

                x = np.concatenate(features)
                y_pred = self.clfs[hid].predict_log_proba(x.reshape(1, -1))
                # updated_y_pred = self._remove_unplayed_roles(hid, y_pred.ravel())         
                team_position_proba[hid] = y_pred.ravel()
            best_log_p = -np.inf
            best_comp = None

            # Optimal 
            for comp in permutations(range(5), 5):
                # comp_with_heroid = [(comp[i], hid, self.hid_to_name[hid], round(team_position_proba[hid][comp[i]], 2)) for i, hid in enumerate(hids)]
                comp_with_heroid = [(comp[i], hid, self.hid_to_name[hid], team_position_proba[hid][comp[i]]) for i, hid in enumerate(hids)]
                
                comp_with_heroid_dict = {c[1]: (c[0], c[2], c[3]) for c in comp_with_heroid}
                
                log_p = np.array([team_position_proba[hid][comp[i]] for i, hid in enumerate(hids)]).sum()
                if log_p > best_log_p:
                    best_log_p = log_p
                    best_comp = comp_with_heroid_dict
                    # best_comp = sorted(comp_with_heroid, key=lambda x: x[0])
            
            team_optimal_positions[marker] = best_comp
            # print("{} => {:.2f}".format(best_comp, best_log_p))

        # Return both teams
        return team_optimal_positions

    def _remove_unplayed_roles(self, hid, y_pred, threshold=200):
        ys = self.opendota_data["ys"]
        role_counts = dict(Counter(ys[hid]))
        updated_y_pred = np.zeros(y_pred.shape)

        for k in range(0,5):
            if role_counts[k + 1] < threshold:
                updated_y_pred[k] = -1000.
            else:
                updated_y_pred[k] = y_pred[k]
        return updated_y_pred

    def _get_rank(self, rank):
        oh = np.zeros(5)
        oh[rank] = 1
        return oh

    def _load_clfs(self, clf_path):
        """
        Load clfs from pickle file
        """
        with open(clf_path, 'rb') as f:
            self.clfs = pickle.load(f)

    def _load_opendota_data(self) -> None:
        """
        Load opendota data from data/opendota_data.json
        """
        with open('../position_optimizer/data/dataset_positions_all.pkl', 'rb') as f:
            self.opendota_data = pickle.load(f)


    def _load_hero_data(self, hero_path) -> None:
        """
        Load hero data from data/heroes.json
        """
        with open(hero_path, 'r') as f:
            self.hero_data = json.load(f)

    def get_all_hids(self):
        return [h["id"] for h in self.hero_data]


Annotate each hero with position

In [2]:
def is_valid_match(match, all_hids):
    if "match_id" not in match:
        return False
    if not match["lobby_type"] == 7:
        # print("Invalid lobby type:", match["lobby_type"])
        return False
    if match["duration"] < 60 * 20 and match["duration"] > 60 * 55:
        # print("Invalid duration:", match["duration"])
        return False
    if not match["game_mode"] in {1, 2, 16, 22}:
        # print("Invalid game mode:", match["game_mode"])
        return False

    for p in match["players"]:
        if p["hero_id"] not in all_hids:
            # print("Invalid hero id:", p["hero_id"])
            return False

    return True

def process_matches(lines, n_examples, po, all_hids):
    out_path = "../data/examples/test/example_{}.pkl".format(n_examples)
    training_data = []
    winners = []

    # for line in tqdm.tqdm(fp):
    match_array = np.zeros(shape=(121, 5))
    for line in lines:
        match_array.fill(0)
        match = json.loads(line)       
         
        # Check if match is valid
        if not is_valid_match(match, all_hids):
            continue
        
        players = match["players"]
        t0 = [p for p in players if p["player_slot"] < 128]
        t1 = [p for p in players if p["player_slot"] >= 128]

        optimal_positions = po.find_optimal_roles(match)
        for i, h in enumerate(po.hero_data):
            participant_hero_lane = np.zeros(shape=(5))
            for p in t0:
                hid = p['hero_id']
                pos = optimal_positions[0][hid][0]
                if hid == h['id']:
                    participant_hero_lane[pos] = 1
                    match_array[i] = participant_hero_lane
                    print("i:", i, "hid:", h["id"], "team_hid", hid, "pos:", pos)
            for p in t1:
                hid = p['hero_id']
                pos = optimal_positions[1][hid][0]
                if hid == h['id']:
                    participant_hero_lane[pos] = -1
                    match_array[i] = participant_hero_lane
                    print("i:", i, "hid:", h["id"], "team_hid", hid, "pos:", pos)


        training_data.append(np.concatenate(match_array))
        winners.append(match["radiant_win"])
            
        
    x = np.vstack(training_data).astype(np.float32)
    y = np.array(winners)

    dataset = {
        "x": x,
        "y": y
    }
    
    with open(out_path, "wb") as f:
        pickle.dump(dataset, f, protocol=pickle.HIGHEST_PROTOCOL)

    print("saved to: ", out_path)

# FOR THE SMALL
buffer_size = 10000
n_examples = 0
po = PositionOptimizer('../data/clfs/logreg_clfs_all.pkl', '../data/heroes.json')
all_hids = po.get_all_hids()

with gzip.open("../data/raw/dataset_batch1_900k.gz", "r") as fp:
    buffer = []
    for line in fp:
        n_examples += 1
        buffer.append(line)
        if len(buffer) == buffer_size:
            
            process_matches(buffer, n_examples, po, all_hids)
            buffer = []

    # process remaining
    if len(buffer) > 0:
        process_matches(buffer, n_examples, po, all_hids)



# # FOR THE BIG
# if __name__ == "__main__":
#     buffer_size = 100000
#     n_examples = 0
#     po = PositionOptimizer('../data/clfs/logreg_clfs_all.pkl', '../data/heroes.json')
#     all_hids = po.get_all_hids()
#     with Pool(processes=os.cpu_count()-4) as pool:
#         with tarfile.open("../data/dota2_matches_30669739_samples.tar.gz", 'r:gz') as tar:       
#             buffer = []
#             for batch_info in tar:
#                 for line in tar.extractfile(batch_info):                    
#                     n_examples += 1
#                     buffer.append(line)
#                     if len(buffer) == buffer_size:
#                         kwds = {
#                             "lines": buffer,
#                             "n_examples": n_examples,
#                             "po": po,
#                             "all_hids": all_hids
#                         }
#                         pool.apply_async(process_matches, kwds=kwds)
#                         buffer = []

#                 # process remaining
#                 if len(buffer) > 0:
#                     process_matches(buffer, n_examples, po, all_hids)



i: 6 hid: 7 team_hid 7 pos: 3
i: 23 hid: 25 team_hid 25 pos: 1
i: 39 hid: 41 team_hid 41 pos: 0
i: 40 hid: 42 team_hid 42 pos: 0
i: 65 hid: 67 team_hid 67 pos: 1
i: 73 hid: 75 team_hid 75 pos: 4
i: 75 hid: 77 team_hid 77 pos: 2
i: 79 hid: 81 team_hid 81 pos: 2
i: 110 hid: 112 team_hid 112 pos: 4
i: 119 hid: 129 team_hid 129 pos: 3
i: 8 hid: 9 team_hid 9 pos: 3
i: 9 hid: 10 team_hid 10 pos: 0
i: 13 hid: 14 team_hid 14 pos: 3
i: 27 hid: 29 team_hid 29 pos: 4
i: 33 hid: 35 team_hid 35 pos: 1
i: 51 hid: 53 team_hid 53 pos: 2
i: 68 hid: 70 team_hid 70 pos: 2
i: 72 hid: 74 team_hid 74 pos: 1
i: 91 hid: 93 team_hid 93 pos: 0
i: 113 hid: 119 team_hid 119 pos: 4
i: 8 hid: 9 team_hid 9 pos: 3
i: 9 hid: 10 team_hid 10 pos: 0
i: 13 hid: 14 team_hid 14 pos: 3
i: 27 hid: 29 team_hid 29 pos: 4
i: 33 hid: 35 team_hid 35 pos: 1
i: 51 hid: 53 team_hid 53 pos: 2
i: 68 hid: 70 team_hid 70 pos: 2
i: 72 hid: 74 team_hid 74 pos: 1
i: 91 hid: 93 team_hid 93 pos: 0
i: 113 hid: 119 team_hid 119 pos: 4
i: 8 hid:

KeyboardInterrupt: 

In [47]:
with open("../data/virtual_loss_training_data/example_10000.pkl", "rb") as f:
    dataset = pickle.load(f)
    x = dataset["x"]
    y = dataset["y"]
    print(x.shape, y.shape)

(7463, 605) (7463,)


In [10]:
with open("../data/heroes.json", "r") as fp:
    heroes = json.load(fp)

hid_to_name = {x["id"]:x["localized_name"] for x in heroes}

id_to_hid = {x["id"]:i for i, x in enumerate(heroes)}


id_to_hid[108]

# i: 6 hid: 7 team_hid 7 pos: 3
# i: 23 hid: 25 team_hid 25 pos: 1
# i: 39 hid: 41 team_hid 41 pos: 0
# i: 40 hid: 42 team_hid 42 pos: 0
# i: 65 hid: 67 team_hid 67 pos: 1
# i: 73 hid: 75 team_hid 75 pos: 4
# i: 75 hid: 77 team_hid 77 pos: 2
# i: 79 hid: 81 team_hid 81 pos: 2
# i: 110 hid: 112 team_hid 112 pos: 4

106