In [11]:
import os
import json
import numpy as np
import pickle 
import tqdm
import tarfile
from scipy import sparse
from collections import defaultdict
from pprint import pprint
from collections import Counter

In [14]:
def load_data(path):
    with open(path, 'rb') as fp:
        data = pickle.load(fp)

    return data["xs"], data["ys"]

def load_weights_dict(path):
    with open(path, 'rb') as fp:
        weights_dict = pickle.load(fp)
    return weights_dict

def get_dummy_classifier():
    return DummyClassifier(strategy="constant", constant=0)

In [15]:
from sklearn.tree import DecisionTreeClassifier
from sklearn.linear_model import LogisticRegression
from sklearn.model_selection import train_test_split
from sklearn.dummy import DummyClassifier


output_path = "../data/dataset_positions_winners.pkl" 
weights_path = "../data/weights_dict.pkl"
xs, ys = load_data(output_path)
weights_dict = load_weights_dict(weights_path)

clfs = {}
scores = {}
scores_dict = {}
n_features = np.vstack(xs[1]).shape[1]
for hid in tqdm.tqdm(xs):
    try:
        x = np.vstack(xs[hid])
        y = np.hstack(ys[hid])
        
        class_weight = {e:v for e, v in enumerate(weights_dict[hid])}
        unique = np.unique(np.hstack(ys[hid]))

        for class_role in [1, 2, 3, 4, 5]:
            if not class_role in unique.tolist():
                print('added classrole', class_role, "for hero", hid)
                xs[hid].append(np.zeros((n_features)))
                ys[hid].append(class_role)

        x = np.vstack(xs[hid])
        y = np.hstack(ys[hid]) - 1

        x_train, x_test, y_train, y_test = train_test_split(x, y, test_size=0.2, random_state=42)

        clf = LogisticRegression(multi_class="ovr", max_iter=300, n_jobs=-1, solver="lbfgs", class_weight=class_weight)
        clf.fit(x_train, y_train)

        score = clf.score(x_test, y_test)
        clfs[hid] = clf
        scores[hid] = score
        scores_dict[hid] = score


    except:
        print("creashed on hid: ", hid)
        raise


 74%|███████▎  | 89/121 [00:45<00:13,  2.42it/s]

added classrole 1 for hero 65


100%|██████████| 121/121 [00:51<00:00,  2.36it/s]


In [16]:
from sklearn.metrics import confusion_matrix
hid = 129
print(confusion_matrix(clfs[hid].predict(np.vstack(xs[hid])), np.hstack(ys[hid])-1))

counts, unique = np.unique(np.hstack(ys[hid])-1, return_counts=True)
print(counts, unique)

[[   0    0    0    0    0]
 [   0    0    0    0    0]
 [  30  118 1969  227   34]
 [   0    0    1   50   24]
 [   0    0    0    0    1]]
[0 1 2 3 4] [  30  118 1970  277   59]


[0 1 2 3 4] [1708  114 1632  365  417]

In [17]:
from sklearn.metrics import classification_report
print(classification_report(clfs[hid].predict(np.vstack(xs[hid])), np.hstack(ys[hid])-1))

              precision    recall  f1-score   support

           0       0.00      0.00      0.00         0
           1       0.00      0.00      0.00         0
           2       1.00      0.83      0.91      2378
           3       0.18      0.67      0.28        75
           4       0.02      1.00      0.03         1

    accuracy                           0.82      2454
   macro avg       0.24      0.50      0.24      2454
weighted avg       0.97      0.82      0.89      2454



  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


In [9]:
a = list(scores_dict.items())
a.sort(key=lambda x: x[1], reverse=False)

hid_to_name = {x["id"]:x["localized_name"] for x in heroes}
s_sum = []
for k, v in a:
    s_sum.append(v)
    print(hid_to_name[k], k, v)

np.mean(s_sum), np.median(s_sum)

Alchemist 73 0.43243243243243246
Lone Druid 80 0.49056603773584906
Keeper of the Light 90 0.5137614678899083
Nature's Prophet 53 0.53125
Omniknight 57 0.5333333333333333
Vengeful Spirit 20 0.5529801324503312
Techies 105 0.555956678700361
Enchantress 58 0.5566037735849056
Broodmother 61 0.56
Bloodseeker 4 0.573170731707317
Riki 32 0.5751295336787565
Silencer 75 0.5798045602605864
Undying 85 0.5844155844155844
Rubick 86 0.5876460767946577
Treant Protector 83 0.6060606060606061
Bounty Hunter 62 0.6202090592334495
Snapfire 128 0.6213592233009708
Venomancer 40 0.624031007751938
Chen 66 0.6296296296296297
Nyx Assassin 88 0.6305732484076433
Hoodwink 123 0.6318681318681318
Pugna 45 0.6363636363636364
Visage 92 0.6363636363636364
Dark Willow 119 0.6385224274406333
Skywrath Mage 101 0.638755980861244
Razor 15 0.6412213740458015
Clinkz 56 0.6413043478260869
Viper 47 0.6416382252559727
Shadow Demon 79 0.6481481481481481
Dragon Knight 49 0.6483516483516484
Pudge 14 0.6492462311557788
Ogre Magi 84 0

(0.7314542161210236, 0.7261306532663316)

In [None]:
unique, counts = np.unique(y, return_counts=True)
print(unique, counts)
import pandas as pd

df = pd.DataFrame(y)
df.plot.hist(xticks=[1, 2, 3, 4, 5])
