Todo: 

* Classifier per hero * role

In [1]:
import os
import json
import numpy as np
import pickle 
import tqdm
import tarfile
from scipy import sparse
from collections import defaultdict
from pprint import pprint
from collections import Counter

In [None]:
# with tarfile.open("../data/parsed/opendota_parsed_matches.tar.gz", 'r:gz') as zfp:
#     for filename in zfp.getnames():
#         match = pickle.load(zfp.extractfile(filename))
#         print(match)
#         break

zfp = tarfile.open("../data/opendota_parsed_matches.tar.gz", 'r:gz')
all_filenames = zfp.getnames()
filenames = all_filenames[1:]

# fp = zfp.extractfile(filenames[0])
# fp.seek(0)
# match = pickle.loads(fp.read())

In [None]:
output_path = "../data/dataset_positions.pkl" 

assert not os.path.exists(output_path)

def get_rank(rank: int):
    oh = np.zeros(5)
    oh[rank] = 1
    return oh

def kda(kills, deaths, assists):
    return kills + assists / (deaths + 1)

def extract_data_point(match):
    players = match["players"]
    t0 = list(filter(lambda p: p["isRadiant"], players))
    t1 = list(filter(lambda p: not p["isRadiant"], players))
    xs = {}
    ys = {}
    unassigned_roles_counter = Counter()

    for team in [t0, t1]:
        hids = [p["hero_id"] for p in team]

        # !Ranks
        attributes = ["gold_per_min", "xp_per_min", "kills", "deaths", "assists", "last_hits", "hero_damage", "tower_damage"]
        ranks = {}
        
        for attr in attributes:
            ranks[attr] = sorted([(p["hero_id"], p[attr]) for p in team], key=lambda x: x[1], reverse=True)

        wards_placed = {}
        for p in team:
            item_uses = p["item_uses"]
            c = 0
            if "ward_observer" in item_uses:
                c += item_uses["ward_observer"]
            if "ward_dispenser" in item_uses:
                c += item_uses["ward_dispenser"]
            if "ward_sentry" in item_uses:
                c += item_uses["ward_sentry"]
                
            wards_placed[p["hero_id"]] = c

        # !Find roles

        support_candidates_4 = []
        support_candidates_5 = []
        unassigned_candidates = []
        final_roles = {}
        lanes = defaultdict(list)
        players = defaultdict(dict)
        for p in team:
            lanes[p["lane"]].append({"hid": p["hero_id"], "gpm": p["gold_per_min"], "wards": wards_placed[p["hero_id"]]})
            players[p["hero_id"]] = {"gpm": p["gold_per_min"], "last_hits": p["last_hits"], "wards": wards_placed[p["hero_id"]]}
        

        for lane in lanes.items():
            if len(lane[1]) == 5:
                return "invalid_match", None, None

        if len(lanes[1]) > 1:
            highest_gpm_hero = max(lanes[1], key=lambda x: x["gpm"])
            everyone_except_highest_gpm_hero_id = [x["hid"] for x in lanes[1] if x["hid"] != highest_gpm_hero["hid"]]
            support_candidates_5.extend(everyone_except_highest_gpm_hero_id)
            final_roles[1] = highest_gpm_hero["hid"]
        elif len(lanes[1]) == 1:
            final_roles[1] = lanes[1][0]["hid"]
            
        if len(lanes[2]) > 1:
            highest_gpm_hero = max(lanes[2], key=lambda x: x["gpm"])
            everyone_except_highest_gpm_hero_id = [x["hid"] for x in lanes[2] if x["hid"] != highest_gpm_hero["hid"]]
            support_candidates_4.extend(everyone_except_highest_gpm_hero_id)
            final_roles[2] = highest_gpm_hero["hid"]
        elif len(lanes[2]) == 1:
            final_roles[2] = lanes[2][0]["hid"]

        if len(lanes[3]) > 1:
            highest_gpm_hero = max(lanes[3], key=lambda x: x["gpm"])
            everyone_except_highest_gpm_hero_id = [x["hid"] for x in lanes[3] if x["hid"] != highest_gpm_hero["hid"]]
            support_candidates_4.extend(everyone_except_highest_gpm_hero_id)
            final_roles[3] = highest_gpm_hero["hid"]
        elif len(lanes[3]) == 1:
            final_roles[3] = lanes[3][0]["hid"]

        all_hero_ids = [x["hid"] for x in lanes[4]]
        support_candidates_4.extend(all_hero_ids)
        
        all_hero_ids = [x["hid"] for x in lanes[5]]
        support_candidates_5.extend(all_hero_ids)

        # ! Assign pos 4 and 5
        support_candidates_4 = list(set(support_candidates_4))
        support_candidates_5 = list(set(support_candidates_5))

        if len(support_candidates_5) > 1:
            support_candidates_5_wards_max = max(support_candidates_5, key=lambda x: wards_placed[x])
            final_roles[5] = support_candidates_5_wards_max
            unassigned_candidates.extend([x for x in support_candidates_5 if x != support_candidates_5_wards_max])
        elif len(support_candidates_5) == 1: 
            final_roles[5] = support_candidates_5[0]

        if len(support_candidates_4) > 1:
            support_candidates_4_wards_max = max(support_candidates_4, key=lambda x: wards_placed[x])
            final_roles[4] = support_candidates_4_wards_max
            unassigned_candidates.extend([x for x in support_candidates_4 if x != support_candidates_4_wards_max])
        elif len(support_candidates_4) == 1:
            final_roles[4] = support_candidates_4[0]

        # ! check if all roles are assigned
        unassigned_candidates = list(set(unassigned_candidates))
        for role in lanes:
            if role not in final_roles:
                if len(unassigned_candidates) == 1:
                    final_roles[role] = unassigned_candidates[0]
                    unassigned_candidates.pop()
                    # print("Assigned {} to role {}".format(final_roles[role], role))
                elif len(unassigned_candidates) > 1:
                    unassigned_candidates_gpm_wards = [(x, players[x]["gpm"], players[x]["wards"]) for x in unassigned_candidates]
                    final_roles_gpm_wards = [(x, players[x]["gpm"], players[x]["wards"]) for x in final_roles.values()]
                    if role == 1: 
                        max_gpm_hero = max(unassigned_candidates, key=lambda x: players[x]["gpm"])
                        final_roles[role] = max_gpm_hero
                        unassigned_roles_counter[role] += 1
                        unassigned_candidates.pop(unassigned_candidates.index(max_gpm_hero))
                        # print("Role {} not assigned. Assigning {}, which has {}. {}".format(role, max_gpm_hero, players[max_gpm_hero], unassigned_candidates_gpm_wards))
                        # print(final_roles_gpm_wards)

                    elif role == 5: 
                        max_wards_hero = max(unassigned_candidates, key=lambda x: wards_placed[x])
                        final_roles[role] = max_wards_hero
                        unassigned_roles_counter[role] += 1
                        unassigned_candidates.pop(unassigned_candidates.index(max_wards_hero))
                        # print("Role {} not assigned. Assigning {}, which has {}. {}".format(role, max_wards_hero, players[max_wards_hero], unassigned_candidates_gpm_wards))
                        # print(final_roles_gpm_wards)

                    elif role == 3:
                        max_gpm_hero = max(unassigned_candidates, key=lambda x: players[x]["gpm"])
                        final_roles[role] = max_gpm_hero
                        unassigned_roles_counter[role] += 1
                        unassigned_candidates.pop(unassigned_candidates.index(max_gpm_hero))
                        # print("Role {} not assigned. Assigning {}, which has {}. {}".format(role, max_gpm_hero, players[max_gpm_hero], unassigned_candidates_gpm_wards))
                        # print(final_roles_gpm_wards)
                else:
                    raise Exception("No more heroes to assign")
       

        # ! Assign pos 1, 2, 3
        for role in lanes:
            if role not in final_roles:
                if len(unassigned_candidates) == 1:
                    final_roles[role] = unassigned_candidates[0]
                    unassigned_candidates.pop()
                    unassigned_roles_counter[role] += 1

                    # print("Assigned {} to role {}".format(final_roles[role], role))
        
        #! fixing 
        hero_id_last_hits_tuple_list_unassigned_roles = [(x, players[x]["last_hits"]) for x in unassigned_candidates]
        hero_id_last_hits_tuple_list_unassigned_roles.sort(key=lambda x: x[1], reverse=True)
        for role_id in range(1,6):
            if role_id not in final_roles:
                final_roles[role_id] = hero_id_last_hits_tuple_list_unassigned_roles[0][0]
                unassigned_roles_counter[role_id] += 1
                hero_id_last_hits_tuple_list_unassigned_roles.pop(0)


        if len(final_roles) != 5:
            print("final roles: ", final_roles)
            print("support candidates 4: ", support_candidates_4)
            print("support candidates 5: ", support_candidates_5)
            print("wards placed: ", wards_placed)
            raise Exception("Not all roles assigned")


        # !Add features
        for p in team:
            features = []
            hid = p["hero_id"]

            teammates = np.zeros((136))
            for team_hid in hids:
                if team_hid != hid:
                    teammates[team_hid] = 1.
            
            features.append(teammates)
            
            for rank in ranks: 
                features.append(get_rank(ranks[rank].index((hid, p[rank]))))


            #! Add labels
            
            flipped_final_roles = {v: k for k, v in final_roles.items()}
            position = flipped_final_roles[hid]                
            
            xs[hid] = np.concatenate(features)
            ys[hid] = position
    
    return xs, ys, unassigned_roles_counter



# !Main loop
trainingset = defaultdict(list)
labels = defaultdict(list)
skips = 0
un_counter = Counter()
for fname in tqdm.tqdm(filenames):
    leaver = False
    fp = zfp.extractfile(fname)
    fp.seek(0)
    match = pickle.loads(fp.read())
    xs, ys, ur = extract_data_point(match)
    if xs == "invalid_match":
        skips += 1
        continue
    un_counter += ur
    
    for hid, x in xs.items():
        trainingset[hid].append(x)
        labels[hid].append(ys[hid])

with open(output_path, 'wb') as fp: 
    pickle.dump({"xs": trainingset, "ys": labels}, fp)

print("=" * 20)
print(un_counter.most_common())
print("skips", skips)  

In [2]:
output_path = "../data/dataset_positions.pkl" 

with open(output_path, 'rb') as fp:
    data = pickle.load(fp)

xs = data["xs"]
ys = data["ys"]

In [None]:
with open("../../data/heroes.json", "r") as fp:
  heroes = json.load(fp)

safelane = ["Anti-Mage", "Arc Warden", "Bloodseeker", "Chaos Knight", "Clinkz", "Drow Ranger", "Faceless Void", 
"Gyrocopter", "Juggernaut", "Lifestealer", "Luna", "Medusa", "Monkey King", "Morphling", "Naga Siren", "Phantom Assassin", 
"Phantom Lancer", "Riki", "Slark", "Spectre", "Sven", "Terrorblade", "Tiny", "Troll Warlord", "Ursa", "Weaver", "Wraith King"]
safelane_name_to_id = [x["id"] for x in heroes if x["localized_name"] in safelane]
print(len(safelane), len(safelane_name_to_id))

midlane = ["Alchemist", "Arc Warden", "Batrider", "Broodmother", "Death Prophet", "Ember Spirit", 
"Huskar", "Invoker", "Kunkka", "Leshrac", "Lina", "Lone Druid", "Meepo", "Necrophos", "Outworld Destroyer", 
"Puck", "Pugna", "Queen of Pain", "Razor", "Shadow Fiend", "Sniper", "Storm Spirit", 
"Templar Assassin", "Tinker", "Viper", "Visage", "Void Spirit", "Zeus"]
midlane_name_to_id = [x["id"] for x in heroes if x["localized_name"] in midlane]
print(len(midlane), len(midlane_name_to_id))

offlane = ["Axe", "Beastmaster", "Bloodseeker", "Brewmaster", "Bristleback", 
"Centaur Warrunner", "Chaos Knight", "Dark Seer", "Dawnbreaker", "Death Prophet", "Doom", 
"Dragon Knight", "Earthshaker", "Elder Titan", "Enigma", "Legion Commander", "Lycan", 
"Mars", "Nature's Prophet", "Necrophos", "Night Stalker", "Pangolier", 
"Razor", "Sand King", "Slardar", "Spirit Breaker", "Tidehunter", "Timbersaw", "Underlord", "Venomancer", "Viper"]

offlane_name_to_id = [x["id"] for x in heroes if x["localized_name"] in offlane]
print(len(offlane),len(offlane_name_to_id))

soft_support = ["Bounty Hunter", "Chen", "Clockwerk", "Dark Willow", "Earth Spirit", "Earthshaker", "Enigma", 
"Grimstroke", "Hoodwink", "Keeper of the Light", "Mirana", "Nyx Assassin", "Phoenix", "Pudge", "Rubick", 
"Shadow Demon", "Shadow Shaman", "Silencer", "Skywrath Mage", "Snapfire", "Spirit Breaker", "Techies", 
"Treant Protector", "Tusk", "Venomancer", "Weaver", "Windranger"]

soft_support_name_to_id = [x["id"] for x in heroes if x["localized_name"] in soft_support]
print(len(soft_support), len(soft_support_name_to_id))

hard_support = ["Abaddon", "Ancient Apparition", "Bane", "Chen", "Crystal Maiden", "Dark Willow", "Dazzle", "Disruptor", 
"Enchantress", "Grimstroke", "Io", "Jakiro", "Keeper of the Light", "Lich", "Lion", "Ogre Magi", "Omniknight", "Oracle", "Shadow Demon", 
"Shadow Shaman", "Silencer", "Snapfire", "Treant Protector", "Vengeful Spirit", "Undying", "Warlock", "Winter Wyvern", "Witch Doctor"]
hard_support_name_to_id = [x["id"] for x in heroes if x["localized_name"] in hard_support]
print(len(hard_support), len(hard_support_name_to_id))


In [None]:
weights_dict = defaultdict(lambda: np.zeros(5))

for h in heroes:
    weights_dict[h["id"]] = np.array([.1, .1, .1, .1, .1])

lane_assist = [safelane_name_to_id, midlane_name_to_id, offlane_name_to_id, soft_support_name_to_id, hard_support_name_to_id]

for k, lane in enumerate(lane_assist):
    for hid in lane:
        weights_dict[hid][k] = 1.


In [None]:
from sklearn.ensemble import RandomForestClassifier
from sklearn.linear_model import LogisticRegression
from sklearn.model_selection import train_test_split

clfs = []
scores = [] 

for hid in tqdm.tqdm(xs):
    x = np.vstack(xs[hid])
    y = np.hstack(ys[hid]) - 1
 
    class_weight = weights_dict[hid]
    class_weight_dict = {{e, v} for e, v in enumerate(class_weight)}
    print(class_weight_dict)
    break
    clf = LogisticRegression(multi_class="ovr", max_iter=200, n_jobs=-1, solver="lbfgs", class_weight=class_weight)
    # clf = RandomForestClassifier()
    clf.fit(x, y)

    # print(hid, "score is: ", clf.score(x,y))
    clfs.append(clf)
    scores.append(clf.score(x,y))


In [None]:
print(np.array(scores).mean())

clfs[31].coef_[4, 136:136+5]

# clfs[31].feature_importances_[136:136+5]

In [None]:
print(np.array(scores).mean())

clfs[31].coef_[4, 136:136+5]

# clfs[31].feature_importances_[136:136+5]

In [None]:
unique, counts = np.unique(y, return_counts=True)
print(unique, counts)
import pandas as pd

df = pd.DataFrame(y)
df.plot.hist(xticks=[1, 2, 3, 4, 5])


In [None]:
from sklearn.ensemble import RandomForestClassifier, VotingClassifier
from sklearn.model_selection import cross_val_score,cross_val_predict, train_test_split
from sklearn.metrics import confusion_matrix

x_train, x_test, y_train, y_test = train_test_split(x, y, test_size=0.2, random_state=42)
clf = RandomForestClassifier(max_depth=2, random_state=0)
clf.fit(x_train, y_train)

y_pred = clf.predict(x_test)

print(np.mean(y_pred == y_test))

print(confusion_matrix(y_test, y_pred))

In [None]:
from sklearn.linear_model import LogisticRegression
x_train, x_test, y_train, y_test = train_test_split(x, y, test_size=0.2, random_state=42)
clf = LogisticRegression(random_state=0)
clf.fit(x_train, y_train)

y_pred = clf.predict(x_test)

print(np.mean(y_pred == y_test))

print(confusion_matrix(y_test, y_pred))

In [None]:
from sklearn.linear_model import LogisticRegression
from sklearn.naive_bayes import GaussianNB
from sklearn.ensemble import RandomForestClassifier, VotingClassifier
from sklearn.metrics import confusion_matrix
from sklearn.model_selection import cross_val_score,cross_val_predict, train_test_split


# for hero, train a logistic regression model and add to VotingClassifier
estimators = []


clf = LogisticRegression(multi_class='multinomial', random_state=1)

eclf1 = VotingClassifier(estimators=estimators, voting='hard')
eclf1 = eclf1.fit(x_train, y_train)

In [None]:
print(eclf1.predict(x_test))

In [None]:
np.array_equal(eclf1.named_estimators_.lr.predict(x), eclf1.named_estimators_['lr'].predict(x))