In [1]:
import os
import json
import numpy as np
import pickle 
import tqdm
import tarfile
from scipy import sparse
from collections import defaultdict
from pprint import pprint
from collections import Counter

In [26]:
output_path = "../data/dataset_positions_test.pkl" 

with open(output_path, 'rb') as fp:
    data = pickle.load(fp)

xs = data["xs"]
ys = data["ys"]

In [27]:
with open("../../data/heroes.json", "r") as fp:
  heroes = json.load(fp)

safelane = ["Anti-Mage", "Arc Warden", "Bloodseeker", "Chaos Knight", "Clinkz", "Drow Ranger", "Faceless Void", 
"Gyrocopter", "Juggernaut", "Lifestealer", "Luna", "Medusa", "Monkey King", "Morphling", "Naga Siren", "Phantom Assassin", 
"Phantom Lancer", "Riki", "Slark", "Spectre", "Sven", "Terrorblade", "Tiny", "Troll Warlord", "Ursa", "Weaver", "Wraith King"]
safelane_name_to_id = [x["id"] for x in heroes if x["localized_name"] in safelane]
print(len(safelane), len(safelane_name_to_id))

midlane = ["Alchemist", "Arc Warden", "Batrider", "Broodmother", "Death Prophet", "Ember Spirit", 
"Huskar", "Invoker", "Kunkka", "Leshrac", "Lina", "Lone Druid", "Meepo", "Necrophos", "Outworld Destroyer", 
"Puck", "Pugna", "Queen of Pain", "Razor", "Shadow Fiend", "Sniper", "Storm Spirit", 
"Templar Assassin", "Tinker", "Viper", "Visage", "Void Spirit", "Zeus"]
midlane_name_to_id = [x["id"] for x in heroes if x["localized_name"] in midlane]
print(len(midlane), len(midlane_name_to_id))

offlane = ["Axe", "Beastmaster", "Bloodseeker", "Brewmaster", "Bristleback", 
"Centaur Warrunner", "Chaos Knight", "Dark Seer", "Dawnbreaker", "Death Prophet", "Doom", 
"Dragon Knight", "Earthshaker", "Elder Titan", "Enigma", "Legion Commander", "Lycan", 
"Mars", "Nature's Prophet", "Necrophos", "Night Stalker", "Pangolier", 
"Razor", "Sand King", "Slardar", "Spirit Breaker", "Tidehunter", "Timbersaw", "Underlord", "Venomancer", "Viper"]

offlane_name_to_id = [x["id"] for x in heroes if x["localized_name"] in offlane]
print(len(offlane),len(offlane_name_to_id))

soft_support = ["Bounty Hunter", "Chen", "Clockwerk", "Dark Willow", "Earth Spirit", "Earthshaker", "Enigma", 
"Grimstroke", "Hoodwink", "Keeper of the Light", "Mirana", "Nyx Assassin", "Phoenix", "Pudge", "Rubick", 
"Shadow Demon", "Shadow Shaman", "Silencer", "Skywrath Mage", "Snapfire", "Spirit Breaker", "Techies", 
"Treant Protector", "Tusk", "Venomancer", "Weaver", "Windranger"]

soft_support_name_to_id = [x["id"] for x in heroes if x["localized_name"] in soft_support]
print(len(soft_support), len(soft_support_name_to_id))

hard_support = ["Abaddon", "Ancient Apparition", "Bane", "Chen", "Crystal Maiden", "Dark Willow", "Dazzle", "Disruptor", 
"Enchantress", "Grimstroke", "Io", "Jakiro", "Keeper of the Light", "Lich", "Lion", "Ogre Magi", "Omniknight", "Oracle", "Shadow Demon", 
"Shadow Shaman", "Silencer", "Snapfire", "Treant Protector", "Vengeful Spirit", "Undying", "Warlock", "Winter Wyvern", "Witch Doctor"]
hard_support_name_to_id = [x["id"] for x in heroes if x["localized_name"] in hard_support]
print(len(hard_support), len(hard_support_name_to_id))


27 27
28 28
31 31
27 27
28 28


In [28]:
weights_dict = defaultdict(lambda: np.zeros(5))

for h in heroes:
    weights_dict[h["id"]] = np.array([.1, .1, .1, .1, .1])

lane_assist = [safelane_name_to_id, midlane_name_to_id, offlane_name_to_id, soft_support_name_to_id, hard_support_name_to_id]

for k, lane in enumerate(lane_assist):
    for hid in lane:
        weights_dict[hid][k] = 1.


In [29]:
{e:v for e, v in enumerate(weights_dict[4])}


{0: 1.0, 1: 0.1, 2: 1.0, 3: 0.1, 4: 0.1}

In [30]:
from sklearn.ensemble import RandomForestClassifier
from sklearn.linear_model import LogisticRegression
from sklearn.model_selection import train_test_split

clfs = []
scores = []
scores_dict = {}

for hid in tqdm.tqdm(xs):
    x = np.vstack(xs[hid])
    y = np.hstack(ys[hid]) - 1
    
    class_weight = {e:v for e, v in enumerate(weights_dict[hid])}
    clf = LogisticRegression(multi_class="ovr", max_iter=300, n_jobs=-1, solver="lbfgs", class_weight=class_weight)
    clf.fit(x, y)

    score = clf.score(x,y)
    clfs.append(clf)
    scores.append(score)
    scores_dict[hid] = score


100%|██████████| 121/121 [01:41<00:00,  1.19it/s]


In [31]:
from sklearn.metrics import confusion_matrix
hid = 96
confusion_matrix(clfs[hid].predict(np.vstack(xs[hid])), np.hstack(ys[hid])-1)

counts, unique = np.unique(np.hstack(ys[hid])-1, return_counts=True)
print(counts, unique)

[0 1 2 3 4] [910  56 881 150 156]


[0 1 2 3 4] [1708  114 1632  365  417]

In [32]:
a = list(scores_dict.items())
a.sort(key=lambda x: x[1], reverse=False)

hid_to_name = {x["id"]:x["localized_name"] for x in heroes}

for k, v in a:
    print(hid_to_name[k], k, v)



Nature's Prophet 53 0.34572490706319703
Clinkz 56 0.35523012552301253
Dragon Knight 49 0.3641740599915505
Techies 105 0.36475998850244323
Medusa 94 0.373432518597237
Drow Ranger 6 0.3809309646953002
Timbersaw 98 0.3885025498377376
Axe 2 0.39919545669663986
Lycan 77 0.4010095344924285
Sand King 16 0.4018172640080767
Troll Warlord 95 0.4028002154011847
Riki 32 0.40296610169491526
Dark Seer 55 0.4051724137931034
Legion Commander 104 0.40845643041062474
Tiny 19 0.408699351270871
Monkey King 114 0.40924966879441166
Morphling 10 0.4097459233978005
Pangolier 120 0.4118873826903024
Enigma 33 0.4121889650198341
Underlord 108 0.4126436781609195
Lone Druid 80 0.4126778783958603
Centaur Warrunner 96 0.4143056200650255
Bounty Hunter 62 0.4150506512301013
Naga Siren 89 0.4155759162303665
Doom 69 0.41827991113932084
Night Stalker 60 0.42704626334519574
Tidehunter 29 0.4274079320113314
Bristleback 99 0.42811791383219955
Mars 129 0.42821700297717497
Bloodseeker 4 0.4286979627989371
Gyrocopter 72 0.4297

In [None]:
# save model

In [26]:
print(np.array(scores).mean())

clfs[1].coef_[0, 136:136+5]

# clfs[31].feature_importances_[136:136+5]

0.5073834651239812


array([ 0.67401485,  0.64140675,  0.5257871 , -0.0839084 , -1.75650057])

In [None]:
print(np.array(scores).mean())

clfs[31].coef_[4, 136:136+5]

# clfs[31].feature_importances_[136:136+5]

In [None]:
unique, counts = np.unique(y, return_counts=True)
print(unique, counts)
import pandas as pd

df = pd.DataFrame(y)
df.plot.hist(xticks=[1, 2, 3, 4, 5])


In [None]:
from sklearn.ensemble import RandomForestClassifier, VotingClassifier
from sklearn.model_selection import cross_val_score,cross_val_predict, train_test_split
from sklearn.metrics import confusion_matrix

x_train, x_test, y_train, y_test = train_test_split(x, y, test_size=0.2, random_state=42)
clf = RandomForestClassifier(max_depth=2, random_state=0)
clf.fit(x_train, y_train)

y_pred = clf.predict(x_test)

print(np.mean(y_pred == y_test))

print(confusion_matrix(y_test, y_pred))

In [None]:
from sklearn.linear_model import LogisticRegression
x_train, x_test, y_train, y_test = train_test_split(x, y, test_size=0.2, random_state=42)
clf = LogisticRegression(random_state=0)
clf.fit(x_train, y_train)

y_pred = clf.predict(x_test)

print(np.mean(y_pred == y_test))

print(confusion_matrix(y_test, y_pred))

In [None]:
from sklearn.linear_model import LogisticRegression
from sklearn.naive_bayes import GaussianNB
from sklearn.ensemble import RandomForestClassifier, VotingClassifier
from sklearn.metrics import confusion_matrix
from sklearn.model_selection import cross_val_score,cross_val_predict, train_test_split


# for hero, train a logistic regression model and add to VotingClassifier
estimators = []


clf = LogisticRegression(multi_class='multinomial', random_state=1)

eclf1 = VotingClassifier(estimators=estimators, voting='hard')
eclf1 = eclf1.fit(x_train, y_train)