In [94]:
import os
import json
import numpy as np
import pickle 
import tqdm
import tarfile
from scipy import sparse
from collections import defaultdict
from pprint import pprint
from collections import Counter

In [95]:
output_path = "../data/dataset_positions_winners.pkl" 

with open(output_path, 'rb') as fp:
    data = pickle.load(fp)

xs = data["xs"]
ys = data["ys"]

In [96]:
with open("../../data/heroes.json", "r") as fp:
  heroes = json.load(fp)

safelane = ["Anti-Mage", "Arc Warden", "Bloodseeker", "Chaos Knight", "Clinkz", "Drow Ranger", "Faceless Void", 
"Gyrocopter", "Juggernaut", "Lifestealer", "Luna", "Medusa", "Monkey King", "Morphling", "Naga Siren", "Phantom Assassin", 
"Phantom Lancer", "Riki", "Slark", "Spectre", "Sven", "Terrorblade", "Tiny", "Troll Warlord", "Ursa", "Weaver", "Wraith King"]
safelane_name_to_id = [x["id"] for x in heroes if x["localized_name"] in safelane]
print(len(safelane), len(safelane_name_to_id))

midlane = ["Alchemist", "Arc Warden", "Batrider", "Broodmother", "Death Prophet", "Ember Spirit", 
"Huskar", "Invoker", "Kunkka", "Leshrac", "Lina", "Lone Druid", "Meepo", "Necrophos", "Outworld Destroyer", 
"Puck", "Pugna", "Queen of Pain", "Razor", "Shadow Fiend", "Sniper", "Storm Spirit", 
"Templar Assassin", "Tinker", "Viper", "Visage", "Void Spirit", "Zeus"]
midlane_name_to_id = [x["id"] for x in heroes if x["localized_name"] in midlane]
print(len(midlane), len(midlane_name_to_id))

offlane = ["Axe", "Beastmaster", "Bloodseeker", "Brewmaster", "Bristleback", 
"Centaur Warrunner", "Chaos Knight", "Dark Seer", "Dawnbreaker", "Death Prophet", "Doom", 
"Dragon Knight", "Earthshaker", "Elder Titan", "Enigma", "Legion Commander", "Lycan", 
"Mars", "Nature's Prophet", "Necrophos", "Night Stalker", "Pangolier", 
"Razor", "Sand King", "Slardar", "Spirit Breaker", "Tidehunter", "Timbersaw", "Underlord", "Venomancer", "Viper"]

offlane_name_to_id = [x["id"] for x in heroes if x["localized_name"] in offlane]
print(len(offlane),len(offlane_name_to_id))

soft_support = ["Bounty Hunter", "Chen", "Clockwerk", "Dark Willow", "Earth Spirit", "Earthshaker", "Enigma", 
"Grimstroke", "Hoodwink", "Keeper of the Light", "Mirana", "Nyx Assassin", "Phoenix", "Pudge", "Rubick", 
"Shadow Demon", "Shadow Shaman", "Silencer", "Skywrath Mage", "Snapfire", "Spirit Breaker", "Techies", 
"Treant Protector", "Tusk", "Venomancer", "Weaver", "Windranger"]

soft_support_name_to_id = [x["id"] for x in heroes if x["localized_name"] in soft_support]
print(len(soft_support), len(soft_support_name_to_id))

hard_support = ["Abaddon", "Ancient Apparition", "Bane", "Chen", "Crystal Maiden", "Dark Willow", "Dazzle", "Disruptor", 
"Enchantress", "Grimstroke", "Io", "Jakiro", "Keeper of the Light", "Lich", "Lion", "Ogre Magi", "Omniknight", "Oracle", "Shadow Demon", 
"Shadow Shaman", "Silencer", "Snapfire", "Treant Protector", "Vengeful Spirit", "Undying", "Warlock", "Winter Wyvern", "Witch Doctor"]
hard_support_name_to_id = [x["id"] for x in heroes if x["localized_name"] in hard_support]
print(len(hard_support), len(hard_support_name_to_id))


27 27
28 28
31 31
27 27
28 28


In [97]:
weights_dict = defaultdict(lambda: np.zeros(5))

for h in heroes:
    weights_dict[h["id"]] = np.array([.1, .1, .1, .1, .1])

lane_assist = [safelane_name_to_id, midlane_name_to_id, offlane_name_to_id, soft_support_name_to_id, hard_support_name_to_id]

for k, lane in enumerate(lane_assist):
    for hid in lane:
        weights_dict[hid][k] = 1.

{e:v for e, v in enumerate(weights_dict[4])}


In [120]:
from sklearn.tree import DecisionTreeClassifier
from sklearn.linear_model import LogisticRegression
from sklearn.model_selection import train_test_split
from sklearn.dummy import DummyClassifier


def get_dummy_classifier():
    return DummyClassifier(strategy="constant", constant=0)

clfs = {}
scores = {}
scores_dict = {}

n_features = np.vstack(xs[1]).shape[1]
for hid in tqdm.tqdm(xs):
    try:
        x = np.vstack(xs[hid])
        y = np.hstack(ys[hid])
        
        class_weight = {e:v for e, v in enumerate(weights_dict[hid])}
        unique = np.unique(np.hstack(ys[hid]))

        for class_role in [1, 2, 3, 4, 5]:
            if not class_role in unique.tolist():
                print('added classrole', class_role, "for hero", hid)
                xs[hid].append(np.zeros((n_features)))
                ys[hid].append(class_role)

        x = np.vstack(xs[hid])
        y = np.hstack(ys[hid]) - 1

        x_train, x_test, y_train, y_test = train_test_split(x, y, test_size=0.2, random_state=42)

        clf = LogisticRegression(multi_class="ovr", max_iter=300, n_jobs=-1, solver="lbfgs", class_weight=class_weight)
        # clf = DecisionTreeClassifier(class_weight=class_weight)
        clf.fit(x_train, y_train)

        score = clf.score(x_test, y_test)
        clfs[hid] = clf
        scores[hid] = score
        scores_dict[hid] = score


    except:
        print("creashed on hid: ", hid)
        raise


100%|██████████| 121/121 [00:57<00:00,  2.09it/s]


In [121]:
from sklearn.metrics import confusion_matrix
hid = 129
confusion_matrix(clfs[hid].predict(np.vstack(xs[hid])), np.hstack(ys[hid])-1)

counts, unique = np.unique(np.hstack(ys[hid])-1, return_counts=True)
print(counts, unique)

[0 1 2 3 4] [  30  118 1970  277   59]


[0 1 2 3 4] [1708  114 1632  365  417]

In [122]:
from sklearn.metrics import classification_report
print(classification_report(clfs[hid].predict(np.vstack(xs[hid])), np.hstack(ys[hid])-1))

              precision    recall  f1-score   support

           0       0.00      0.00      0.00         0
           1       0.00      0.00      0.00         0
           2       1.00      0.83      0.91      2378
           3       0.18      0.67      0.28        75
           4       0.02      1.00      0.03         1

    accuracy                           0.82      2454
   macro avg       0.24      0.50      0.24      2454
weighted avg       0.97      0.82      0.89      2454



  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


In [123]:
a = list(scores_dict.items())
a.sort(key=lambda x: x[1], reverse=False)

hid_to_name = {x["id"]:x["localized_name"] for x in heroes}
s_sum = []
for k, v in a:
    s_sum.append(v)
    print(hid_to_name[k], k, v)

np.mean(s_sum), np.median(s_sum)

Alchemist 73 0.43243243243243246
Lone Druid 80 0.49056603773584906
Keeper of the Light 90 0.5137614678899083
Nature's Prophet 53 0.53125
Omniknight 57 0.5333333333333333
Vengeful Spirit 20 0.5529801324503312
Techies 105 0.555956678700361
Enchantress 58 0.5566037735849056
Broodmother 61 0.56
Bloodseeker 4 0.573170731707317
Riki 32 0.5751295336787565
Silencer 75 0.5798045602605864
Undying 85 0.5844155844155844
Rubick 86 0.5876460767946577
Treant Protector 83 0.6060606060606061
Bounty Hunter 62 0.6202090592334495
Snapfire 128 0.6213592233009708
Venomancer 40 0.624031007751938
Chen 66 0.6296296296296297
Nyx Assassin 88 0.6305732484076433
Hoodwink 123 0.6318681318681318
Pugna 45 0.6363636363636364
Visage 92 0.6363636363636364
Dark Willow 119 0.6385224274406333
Skywrath Mage 101 0.638755980861244
Razor 15 0.6412213740458015
Clinkz 56 0.6413043478260869
Viper 47 0.6416382252559727
Shadow Demon 79 0.6481481481481481
Dragon Knight 49 0.6483516483516484
Pudge 14 0.6492462311557788
Ogre Magi 84 0

(0.7314542161210236, 0.7261306532663316)

In [118]:
# sum_teammates = clfs[hid].feature_importances_[:136].sum()
# sum_features = clfs[hid].feature_importances_[136:].sum()


feature_sum = dict((k, 0.0) for k in ["gold_per_min", "xp_per_min", "kills", "deaths", "assists", "last_hits", "hero_damage", "tower_damage"])

for hid in xs:
    for k, l in enumerate(["gold_per_min", "xp_per_min", "kills", "deaths", "assists", "last_hits", "hero_damage", "tower_damage"]):
        index = 136 + k * 5
        s = clfs[hid].feature_importances_[index:index+5].sum()
        feature_sum[l] += s



pprint(feature_sum.items())
# sum_teammates, sum_features


dict_items([('gold_per_min', 16.82834187691858), ('xp_per_min', 4.534913684123263), ('kills', 4.553598039104362), ('deaths', 5.753761465307338), ('assists', 5.626764202281147), ('last_hits', 9.401213883965836), ('hero_damage', 5.269952380693461), ('tower_damage', 4.650736035260402)])


In [119]:
print(np.array(scores).mean())

clfs[1].coef_[0, 136:136+5]

# clfs[31].feature_importances_[136:136+5]

TypeError: unsupported operand type(s) for /: 'dict' and 'int'

In [None]:
print(np.array(scores).mean())

clfs[31].coef_[4, 136:136+5]

# clfs[31].feature_importances_[136:136+5]

In [None]:
unique, counts = np.unique(y, return_counts=True)
print(unique, counts)
import pandas as pd

df = pd.DataFrame(y)
df.plot.hist(xticks=[1, 2, 3, 4, 5])


In [None]:
from sklearn.ensemble import RandomForestClassifier, VotingClassifier
from sklearn.model_selection import cross_val_score,cross_val_predict, train_test_split
from sklearn.metrics import confusion_matrix

x_train, x_test, y_train, y_test = train_test_split(x, y, test_size=0.2, random_state=42)
clf = RandomForestClassifier(max_depth=2, random_state=0)
clf.fit(x_train, y_train)

y_pred = clf.predict(x_test)

print(np.mean(y_pred == y_test))

print(confusion_matrix(y_test, y_pred))

In [None]:
from sklearn.linear_model import LogisticRegression
x_train, x_test, y_train, y_test = train_test_split(x, y, test_size=0.2, random_state=42)
clf = LogisticRegression(random_state=0)
clf.fit(x_train, y_train)

y_pred = clf.predict(x_test)

print(np.mean(y_pred == y_test))

print(confusion_matrix(y_test, y_pred))

In [None]:
from sklearn.linear_model import LogisticRegression
from sklearn.naive_bayes import GaussianNB
from sklearn.ensemble import RandomForestClassifier, VotingClassifier
from sklearn.metrics import confusion_matrix
from sklearn.model_selection import cross_val_score,cross_val_predict, train_test_split


# for hero, train a logistic regression model and add to VotingClassifier
estimators = []


clf = LogisticRegression(multi_class='multinomial', random_state=1)

eclf1 = VotingClassifier(estimators=estimators, voting='hard')
eclf1 = eclf1.fit(x_train, y_train)