In [None]:
import os
import json
import numpy as np
import pickle 
import tqdm
import tarfile
from scipy import sparse
from collections import defaultdict
from pprint import pprint
from collections import Counter
from sklearn.tree import DecisionTreeClassifier
from sklearn.linear_model import LogisticRegression
from sklearn.model_selection import train_test_split
from sklearn.dummy import DummyClassifier

def load_data(path):
    with open(path, 'rb') as fp:
        data = pickle.load(fp)

    return data["xs"], data["ys"]

def load_weights_dict(path):
    with open(path, 'rb') as fp:
        weights_dict = pickle.load(fp)
    return weights_dict

def get_dummy_classifier():
    return DummyClassifier(strategy="constant", constant=0)

output_path = "../data/dataset_positions_winners.pkl" 
weights_path = "../data/weights_dict.pkl"
xs, ys = load_data(output_path)
weights_dict = load_weights_dict(weights_path)

clfs = {}
scores = {}
scores_dict = {}
n_features = np.vstack(xs[1]).shape[1]
for hid in tqdm.tqdm(xs):
    try:
        x = np.vstack(xs[hid])
        y = np.hstack(ys[hid])
        
        class_weight = {e:v for e, v in enumerate(weights_dict[hid])}
        unique = np.unique(np.hstack(ys[hid]))

        for class_role in [1, 2, 3, 4, 5]:
            if not class_role in unique.tolist():
                print('added classrole', class_role, "for hero", hid)
                xs[hid].append(np.zeros((n_features)))
                ys[hid].append(class_role)

        x = np.vstack(xs[hid])
        y = np.hstack(ys[hid]) - 1

        x_train, x_test, y_train, y_test = train_test_split(x, y, test_size=0.2, random_state=42)

        clf = DecisionTreeClassifier(class_weight=class_weight)
        clf.fit(x_train, y_train)

        score = clf.score(x_test, y_test)
        clfs[hid] = clf
        scores[hid] = score
        scores_dict[hid] = score


    except:
        print("creashed on hid: ", hid)
        raise


In [None]:
# sum_teammates = clfs[hid].feature_importances_[:136].sum()
# sum_features = clfs[hid].feature_importances_[136:].sum()


feature_sum = dict((k, 0.0) for k in ["gold_per_min", "xp_per_min", "kills", "deaths", "assists", "last_hits", "hero_damage", "tower_damage"])

for hid in xs:
    for k, l in enumerate(["gold_per_min", "xp_per_min", "kills", "deaths", "assists", "last_hits", "hero_damage", "tower_damage"]):
        index = 136 + k * 5
        s = clfs[hid].feature_importances_[index:index+5].sum()
        feature_sum[l] += s



pprint(feature_sum.items())
# sum_teammates, sum_features


In [None]:
from sklearn.metrics import confusion_matrix
hid = 129
print(confusion_matrix(clfs[hid].predict(np.vstack(xs[hid])), np.hstack(ys[hid])-1))

counts, unique = np.unique(np.hstack(ys[hid])-1, return_counts=True)
print(counts, unique)

In [None]:
from sklearn.metrics import classification_report
print(classification_report(clfs[hid].predict(np.vstack(xs[hid])), np.hstack(ys[hid])-1))

In [None]:
a = list(scores_dict.items())
a.sort(key=lambda x: x[1], reverse=False)

hid_to_name = {x["id"]:x["localized_name"] for x in heroes}
s_sum = []
for k, v in a:
    s_sum.append(v)
    print(hid_to_name[k], k, v)

np.mean(s_sum), np.median(s_sum)