In [15]:
import json
import random
from collections import defaultdict
from itertools import chain
from typing import Iterable

import keras
import numpy
import pandas
from keras.layers import Dense, Dropout
from keras.models import Sequential
from keras.wrappers.scikit_learn import KerasClassifier
from sklearn.linear_model import LogisticRegression
from sklearn.neighbors import KNeighborsClassifier
from sklearn.svm import SVC
from sklearn.model_selection import cross_val_score, GridSearchCV, KFold, StratifiedKFold

In [16]:
cv = StratifiedKFold(n_splits=10, shuffle=True, random_state=42)
scoring = 'accuracy'

In [17]:
# Copy-pasted from the game.
# https://heroes.cdnvideo.ru/vk/v0326/locale/ru.json.gz

lib = {
  "0": "Карен",
  "1": "Аврора",
  "10": "Безликий",
  "11": "Чабба",
  "12": "Арахна",
  "13": "Орион",
  "14": "Фокс",
  "15": "Джинджер",
  "16": "Данте",
  "17": "Моджо",
  "18": "Судья",
  "19": "Темная Звезда",
  "2": "Галахад",
  "20": "Артемис",
  "21": "Маркус",
  "22": "Пеппи",
  "23": "Лиэн",
  "24": "Тесак",
  "25": "Исмаил",
  "26": "Лилит",
  "27": "Лютер",
  "28": "Цин Мао",
  "29": "Дориан",
  "3": "Кира",
  "30": "Корнелиус",
  "31": "Джет",
  "32": "Гелиос",
  "33": "Ларс",
  "34": "Криста",
  "35": "Йорген",
  "36": "Майя",
  "37": "Джу",
  "38": "Эльмир",
  "39": "Зири",
  "4": "Астарот",
  "40": "Небула",
  "41": "К'арх",
  "5": "Кай",
  "6": "Фобос",
  "7": "Тея",
  "8": "Сорвиголова",
  "9": "Хайди",
}

In [18]:
# Parse battle logs.

def parse_heroes(heroes: Iterable[dict], sign: int, result: dict):
    for hero in heroes:
        for hero_key in ('Level', 'Color', 'Star'):
            result[f'''{lib[str(hero['id'])]} {hero_key}'''] += sign * hero[hero_key.lower()]


def parse_battle(line: str) -> dict:
    battle = json.loads(line)
    result = defaultdict(int)

    parse_heroes(battle.get('attackers') or battle['player'], +1, result)
    parse_heroes(battle.get('defenders') or battle['enemies'], -1, result)
        
    return {'Win': battle['win'], **result}

In [19]:
# Load the logs into a data frame.

def invert_column(series: pandas.Series):
    """
    Inverts the column to make an "opposite" battle.
    """
    return series == False if series.name == 'Win' else -series

lines = list(chain.from_iterable([
    open('battles.jsonl'),
    open('battles-lilia.jsonl'),
]))

numpy.random.seed(42)
battles = pandas.DataFrame([parse_battle(line) for line in lines]).fillna(value=0)
battles.drop_duplicates(inplace=True)  # because the files may contain duplicate battles
battles = pandas.concat((battles, battles.apply(invert_column)))  # add inverted battles
battles = battles.sample(frac=1)  # shuffle
battles.head()

Unnamed: 0,Win,Аврора Color,Аврора Level,Аврора Star,Арахна Color,Арахна Level,Арахна Star,Артемис Color,Артемис Level,Артемис Star,...,Хайди Star,Цин Мао Color,Цин Мао Level,Цин Мао Star,Чабба Color,Чабба Level,Чабба Star,Эльмир Color,Эльмир Level,Эльмир Star
44,False,-0.0,-0.0,-0.0,1.0,1.0,1.0,-0.0,-0.0,-0.0,...,-0.0,-0.0,-0.0,-0.0,-0.0,-0.0,-0.0,-0.0,-0.0,-0.0
42,False,0.0,0.0,0.0,-2.0,-12.0,-1.0,-5.0,-50.0,-2.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
106,True,-0.0,-0.0,-0.0,-0.0,-0.0,-0.0,7.0,58.0,2.0,...,-0.0,-0.0,-0.0,-0.0,-0.0,-0.0,-0.0,-0.0,-0.0,-0.0
123,False,7.0,58.0,3.0,-0.0,-0.0,-0.0,-0.0,-0.0,-0.0,...,-0.0,-0.0,-0.0,-0.0,-0.0,-0.0,-0.0,-0.0,-0.0,-0.0
56,True,-6.0,-48.0,-2.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


In [20]:
battles.describe()

Unnamed: 0,Аврора Color,Аврора Level,Аврора Star,Арахна Color,Арахна Level,Арахна Star,Артемис Color,Артемис Level,Артемис Star,Астарот Color,...,Хайди Star,Цин Мао Color,Цин Мао Level,Цин Мао Star,Чабба Color,Чабба Level,Чабба Star,Эльмир Color,Эльмир Level,Эльмир Star
count,360.0,360.0,360.0,360.0,360.0,360.0,360.0,360.0,360.0,360.0,...,360.0,360.0,360.0,360.0,360.0,360.0,360.0,360.0,360.0,360.0
mean,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
std,2.496795,19.676492,0.886293,3.627502,28.868446,1.766289,3.444749,28.801114,1.266672,5.141995,...,0.316668,1.34558,11.283138,0.408816,1.672321,13.446065,0.708091,0.941166,7.647892,0.415574
min,-9.0,-70.0,-3.0,-9.0,-71.0,-6.0,-8.0,-69.0,-3.0,-9.0,...,-2.0,-8.0,-71.0,-3.0,-8.0,-70.0,-3.0,-7.0,-60.0,-3.0
25%,0.0,0.0,0.0,-0.0,0.0,0.0,-0.0,-0.0,0.0,-6.0,...,0.0,-0.0,-0.0,-0.0,0.0,0.0,0.0,-0.0,-0.0,-0.0
50%,0.0,0.0,0.0,0.0,-0.0,-0.0,-0.0,-0.0,-0.0,0.0,...,0.0,-0.0,-0.0,-0.0,-0.0,-0.0,-0.0,0.0,0.0,0.0
75%,0.0,0.0,0.0,-0.0,0.0,0.0,0.0,0.0,0.0,6.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
max,9.0,70.0,3.0,9.0,71.0,6.0,8.0,69.0,3.0,9.0,...,2.0,8.0,71.0,3.0,8.0,70.0,3.0,7.0,60.0,3.0


In [21]:
x = battles.drop(['Win'], axis=1)
y = battles['Win']

In [22]:
classifier = LogisticRegression(max_iter=1000, fit_intercept=False)
param_grid = {'C': numpy.logspace(-6, 2, num=1000)}

numpy.random.seed(42)
grid_search = GridSearchCV(classifier, param_grid, cv=cv, scoring=scoring, n_jobs=4).fit(x, y)

print(f'Best score: {grid_search.best_score_}')
print(f'Best params: {grid_search.best_params_}')
print(f'Classes: {grid_search.best_estimator_.classes_}')

scores = cross_val_score(grid_search.best_estimator_, x, y, scoring=scoring, cv=cv)
print(f'CV score: {scores.mean()} (std: {scores.std()})')

Best score: 0.9722222222222222
Best params: {'C': 80.15006961565398}
Classes: [False  True]
CV score: 0.9694444444444443 (std: 0.026205503144601686)


In [23]:
pandas.DataFrame({'Feature': x.columns, 'Importance': grid_search.best_estimator_.coef_[0]}) \
    .set_index('Feature') \
    .sort_values('Importance', ascending=False) \
    .head(n=20)

Unnamed: 0_level_0,Importance
Feature,Unnamed: 1_level_1
Зири Color,12.216805
Астарот Star,11.194745
Криста Color,9.584731
Фобос Star,9.208786
Судья Color,6.571109
Судья Star,6.435076
Фобос Color,5.956577
Джинджер Color,5.847264
Арахна Color,5.005208
Аврора Color,4.895972


In [24]:
result = pandas.concat((
    pandas.Series(grid_search.best_estimator_.predict(x), index=battles.index, name='Predicted'),
    pandas.Series(grid_search.best_estimator_.predict_proba(x)[:, 1], index=battles.index, name='Probability'),
    battles,
), axis=1)
result['Probability'] = result['Probability'].apply('{:.2f}'.format)
result.head()

Unnamed: 0,Predicted,Probability,Win,Аврора Color,Аврора Level,Аврора Star,Арахна Color,Арахна Level,Арахна Star,Артемис Color,...,Хайди Star,Цин Мао Color,Цин Мао Level,Цин Мао Star,Чабба Color,Чабба Level,Чабба Star,Эльмир Color,Эльмир Level,Эльмир Star
44,False,0.0,False,-0.0,-0.0,-0.0,1.0,1.0,1.0,-0.0,...,-0.0,-0.0,-0.0,-0.0,-0.0,-0.0,-0.0,-0.0,-0.0,-0.0
42,False,0.0,False,0.0,0.0,0.0,-2.0,-12.0,-1.0,-5.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
106,True,0.8,True,-0.0,-0.0,-0.0,-0.0,-0.0,-0.0,7.0,...,-0.0,-0.0,-0.0,-0.0,-0.0,-0.0,-0.0,-0.0,-0.0,-0.0
123,False,0.02,False,7.0,58.0,3.0,-0.0,-0.0,-0.0,-0.0,...,-0.0,-0.0,-0.0,-0.0,-0.0,-0.0,-0.0,-0.0,-0.0,-0.0
56,True,0.89,True,-6.0,-48.0,-2.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


In [25]:
# Experimental NN model.
# I want it to catch an interaction between different heroes (e.g. Йорген is good in conjunction with Исмаил).

def create_model() -> keras.Model:
    model = Sequential()
    model.add(Dense(1000, input_dim=x.columns.size, activation='sigmoid'))
    model.add(Dropout(rate=0.1))
    model.add(Dense(1, input_dim=x.columns.size, activation='sigmoid'))
    model.compile(loss='binary_crossentropy', optimizer='rmsprop', metrics=['accuracy'])
    return model

keras_classifier = KerasClassifier(build_fn=create_model, epochs=100, verbose=0)

numpy.random.seed(42)
scores = cross_val_score(keras_classifier, x, y, cv=cv)

print(f'Score: {scores.mean()} (std: {scores.std()})')

Score: 0.9722222222222221 (std: 0.03513641844631534)
