Todo: 

* Classifier per hero * role

In [1]:
import os
import json
import numpy as np
import pickle 
import tqdm
import tarfile
from scipy import sparse
from collections import defaultdict
from pprint import pprint
from collections import Counter

In [2]:
# with tarfile.open("../data/parsed/opendota_parsed_matches.tar.gz", 'r:gz') as zfp:
#     for filename in zfp.getnames():
#         match = pickle.load(zfp.extractfile(filename))
#         print(match)
#         break

zfp = tarfile.open("../data/opendota_parsed_matches.tar.gz", 'r:gz')
all_filenames = zfp.getnames()
filenames = all_filenames[1:]

# fp = zfp.extractfile(filenames[0])
# fp.seek(0)
# match = pickle.loads(fp.read())

In [3]:
def get_rank(rank: int):
    oh = np.zeros(5)
    oh[rank] = 1
    return oh

def kda(kills, deaths, assists):
    return kills + assists / (deaths + 1)

def create_wards_dict(team):
    wards_placed = {}
    for p in team:
        item_uses = p["item_uses"]
        c = 0
        if "ward_observer" in item_uses:
            c += item_uses["ward_observer"]
        if "ward_dispenser" in item_uses:
            c += item_uses["ward_dispenser"]
        if "ward_sentry" in item_uses:
            c += item_uses["ward_sentry"]
            
        wards_placed[p["hero_id"]] = c
    return wards_placed

In [4]:
output_path = "../data/dataset_positions_all.pkl" 

assert not os.path.exists(output_path)

def is_valid_match(match):
    if not match["lobby_type"] in {0, 5, 6, 7}:
        return False
    if match["duration"] < 60 * 20:
        return False
    if not match["game_mode"] in {1, 2, 16, 22}:
        return False

    return True

def extract_data_point(match, loop_index):
    players = match["players"]
    t0 = list(filter(lambda p: p["isRadiant"], players))
    t1 = list(filter(lambda p: not p["isRadiant"], players))
    xs = {}
    ys = {}
    unassigned_roles_counter = Counter()
    winning_teams = []
    if match["radiant_win"]:
        winning_teams.append(t0)
    else:
        winning_teams.append(t1)

    for team in [t0, t1]:
        hids = [p["hero_id"] for p in team]

        # !Ranks
        attributes = ["gold_per_min", "xp_per_min", "kills", "deaths", "assists", "last_hits", "hero_damage", "tower_damage"]
        ranks = {}
        
        for attr in attributes:
            ranks[attr] = sorted([(p["hero_id"], p[attr]) for p in team], key=lambda x: x[1], reverse=True)

        # !Wards
        wards_placed = create_wards_dict(team)

        # !Find roles
        support_candidates_4 = []
        support_candidates_5 = []
        unassigned_candidates = []
        final_roles = {}
        lanes = defaultdict(list)
        dire_team_lanes = {
            1: 3,
            2: 2,
            3: 1,
            4: 5,
            5: 4
        }
        radiant_team_lanes = {
            1: 1,
            2: 2,
            3: 3,
            4: 4,
            5: 5
        } 

        team_to_lane = {0: dire_team_lanes, 1: radiant_team_lanes}
        players = defaultdict(dict)
        for p in team:
            players_lane = team_to_lane[int(p["isRadiant"])][p["lane"]]
            lanes[players_lane].append({"hid": p["hero_id"], "gpm": p["gold_per_min"], "wards": wards_placed[p["hero_id"]]})
            players[p["hero_id"]] = {"gpm": p["gold_per_min"], "last_hits": p["last_hits"], "wards": wards_placed[p["hero_id"]]}
        
        for lane in lanes.items():
            if len(lane[1]) == 5:
                return "invalid_match", None, None

        if len(lanes[1]) > 1:
            highest_gpm_hero = max(lanes[1], key=lambda x: x["gpm"])
            support_candidates = [x["hid"] for x in lanes[1] if x["hid"] != highest_gpm_hero["hid"]]
            support_candidates_5.extend(support_candidates)
            final_roles[1] = highest_gpm_hero["hid"]
        elif len(lanes[1]) == 1:
            final_roles[1] = lanes[1][0]["hid"]
            
        if len(lanes[2]) > 1:
            highest_gpm_hero = max(lanes[2], key=lambda x: x["gpm"])
            support_candidates = [x["hid"] for x in lanes[2] if x["hid"] != highest_gpm_hero["hid"]]
            support_candidates_4.extend(support_candidates)
            final_roles[2] = highest_gpm_hero["hid"]
        elif len(lanes[2]) == 1:
            final_roles[2] = lanes[2][0]["hid"]

        if len(lanes[3]) > 1:
            highest_gpm_hero = max(lanes[3], key=lambda x: x["gpm"])
            support_candidates = [x["hid"] for x in lanes[3] if x["hid"] != highest_gpm_hero["hid"]]
            support_candidates_4.extend(support_candidates)
            final_roles[3] = highest_gpm_hero["hid"]
        elif len(lanes[3]) == 1:
            final_roles[3] = lanes[3][0]["hid"]

        # if len(final_roles) != 3:
        #     print("=" * 20)
        #     print("final roles: ", final_roles)
        #     pprint(lanes)

        all_hero_ids = [x["hid"] for x in lanes[4]]
        support_candidates_4.extend(all_hero_ids)
        
        all_hero_ids = [x["hid"] for x in lanes[5]]
        support_candidates_5.extend(all_hero_ids)

        # ! Assign pos 4 and 5
        support_candidates_4 = list(set(support_candidates_4))
        support_candidates_5 = list(set(support_candidates_5))

        if len(support_candidates_5) > 1:
            support_candidates_5_wards_max = max(support_candidates_5, key=lambda x: wards_placed[x])
            final_roles[5] = support_candidates_5_wards_max
            unassigned_candidates.extend([x for x in support_candidates_5 if x != support_candidates_5_wards_max])
            # support_candidates_5.remove(support_candidates_5_wards_max)
        elif len(support_candidates_5) == 1: 
            final_roles[5] = support_candidates_5[0]

        if len(support_candidates_4) > 1:
            support_candidates_4_wards_max = max(support_candidates_4, key=lambda x: wards_placed[x])
            final_roles[4] = support_candidates_4_wards_max
            unassigned_candidates.extend([x for x in support_candidates_4 if x != support_candidates_4_wards_max])
        elif len(support_candidates_4) == 1:
            final_roles[4] = support_candidates_4[0]

        # ! Assign pos 1 if length of support_candidates_5 is 1
        # if len(unassigned_candidates) > 0:
        #     unassigned_candidates = list(set(unassigned_candidates))
        #     unassigned_heroes_gpm_rank = max(unassigned_candidates, key=lambda x: players[x]["gpm"])
        #     if len(support_candidates_5) == 1 and unassigned_heroes_gpm_rank == support_candidates_5[0]:
        #         final_roles[1] = support_candidates_5[0]

        # ! check if all roles are assigned
        discard_dp = False
        for role in lanes:
            # if discard_dp:
            #     break
            if role not in final_roles:
                # if len(unassigned_candidates) == 1:
                #     final_roles[role] = unassigned_candidates[0]
                #     unassigned_candidates.pop()
                # elif len(unassigned_candidates) > 1:
                #     discard_dp = True
                #     break

                
                if len(unassigned_candidates) == 1:
                    final_roles[role] = unassigned_candidates[0]
                    unassigned_candidates.pop()
                    # print("Assigned {} to role {}".format(final_roles[role], role))
                elif len(unassigned_candidates) > 1:
                    # unassigned_candidates_gpm_wards = [(x, players[x]["gpm"], players[x]["wards"]) for x in unassigned_candidates]
                    # final_roles_gpm_wards = [(x, players[x]["gpm"], players[x]["wards"]) for x in final_roles.values()]
                    
                    if role == 1: 
                        max_gpm_hero = max(unassigned_candidates, key=lambda x: players[x]["gpm"])
                        final_roles[role] = max_gpm_hero
                        unassigned_roles_counter[role] += 1
                        unassigned_candidates.pop(unassigned_candidates.index(max_gpm_hero))
                        # print("Role {} not assigned. Assigning {}, which has {}. {}".format(role, max_gpm_hero, players[max_gpm_hero], unassigned_candidates_gpm_wards))
                        # print(final_roles_gpm_wards)

                    elif role == 5: 
                        max_wards_hero = max(unassigned_candidates, key=lambda x: wards_placed[x])
                        final_roles[role] = max_wards_hero
                        unassigned_roles_counter[role] += 1
                        unassigned_candidates.pop(unassigned_candidates.index(max_wards_hero))
                        # print("Role {} not assigned. Assigning {}, which has {}. {}".format(role, max_wards_hero, players[max_wards_hero], unassigned_candidates_gpm_wards))
                        # print(final_roles_gpm_wards)

                    elif role == 3:
                        max_gpm_hero = max(unassigned_candidates, key=lambda x: players[x]["gpm"])
                        final_roles[role] = max_gpm_hero
                        unassigned_roles_counter[role] += 1
                        unassigned_candidates.pop(unassigned_candidates.index(max_gpm_hero))
                        # print("Role {} not assigned. Assigning {}, which has {}. {}".format(role, max_gpm_hero, players[max_gpm_hero], unassigned_candidates_gpm_wards))
                        # print(final_roles_gpm_wards)
                else:
                    raise Exception("No more heroes to assign")
       

        if discard_dp:
            return "invalid_match", None, None

        # ! Assign pos 1, 2, 3
        for role in lanes:
            if role not in final_roles:
                if len(unassigned_candidates) == 1:
                    final_roles[role] = unassigned_candidates[0]
                    unassigned_candidates.pop()
                    unassigned_roles_counter[role] += 1

                    print("Assigned {} to role {}".format(final_roles[role], role))
        
        #! fixing 
        hero_id_last_hits_tuple_list_unassigned_roles = [(x, players[x]["last_hits"]) for x in unassigned_candidates]
        hero_id_last_hits_tuple_list_unassigned_roles.sort(key=lambda x: x[1], reverse=True)
        for role_id in range(1,6):
            if role_id not in final_roles:
                final_roles[role_id] = hero_id_last_hits_tuple_list_unassigned_roles[0][0]
                unassigned_roles_counter[role_id] += 1
                hero_id_last_hits_tuple_list_unassigned_roles.pop(0)

        if len(final_roles) != 5:
            print("final roles: ", final_roles)
            print("support candidates 4: ", support_candidates_4)
            print("support candidates 5: ", support_candidates_5)
            print("wards placed: ", wards_placed)
            raise Exception("Not all roles assigned")

        # if final_roles[1] == 2:
        #     print("axe got assigned to 1", loop_index, match["match_id"])
        #     pprint(lanes)
        #     assert False


        # !Add features
        for p in team:
            features = []
            hid = p["hero_id"]

            teammates = np.zeros((136))
            for team_hid in hids:
                if team_hid != hid:
                    teammates[team_hid] = 1.
            
            features.append(teammates)
            
            for rank in ranks: 
                features.append(get_rank(ranks[rank].index((hid, p[rank]))))

            #! Add labels
            
            flipped_final_roles = {v: k for k, v in final_roles.items()}
            position = flipped_final_roles[hid]                
            
            xs[hid] = np.concatenate(features)
            ys[hid] = position
    
    return xs, ys, unassigned_roles_counter

# !Main loop
trainingset = defaultdict(list)
labels = defaultdict(list)
skips = 0
un_counter = Counter()
for k, fname in tqdm.tqdm(enumerate(filenames)):
    leaver = False
    fp = zfp.extractfile(fname)
    fp.seek(0)
    match = pickle.loads(fp.read())
    if not is_valid_match(match):
        skips += 1
        continue

    xs, ys, ur = extract_data_point(match, k)
    
    # !Skip invalid matches
    if xs == "invalid_match":
        skips += 1
        continue

    # !Skip matches with no assigned roles
    un_counter += ur
    
    # !Add to trainingset
    for hid, x in xs.items():
        trainingset[hid].append(x)
        labels[hid].append(ys[hid])

with open(output_path, 'wb') as fp: 
    pickle.dump({"xs": trainingset, "ys": labels}, fp)

print("=" * 20)
print(un_counter.most_common())
print("skips", skips)  

50000it [05:05, 163.74it/s]


[(1, 58), (3, 8)]
skips 9749
