In [93]:
import json
import random
from collections import defaultdict
from typing import Iterable

import keras
import numpy
import pandas
from keras.layers import Dense, Dropout
from keras.models import Sequential
from keras.wrappers.scikit_learn import KerasClassifier
from sklearn.linear_model import LogisticRegression
from sklearn.neighbors import KNeighborsClassifier
from sklearn.svm import SVC
from sklearn.model_selection import cross_val_score, GridSearchCV, KFold, StratifiedKFold

In [49]:
# Copy-pasted from the game.
lib = {
  "LIB_HERO_NAME_0": "Карен",
  "LIB_HERO_NAME_1": "Аврора",
  "LIB_HERO_NAME_10": "Безликий",
  "LIB_HERO_NAME_11": "Чабба",
  "LIB_HERO_NAME_12": "Арахна",
  "LIB_HERO_NAME_13": "Орион",
  "LIB_HERO_NAME_14": "Фокс",
  "LIB_HERO_NAME_15": "Джинджер",
  "LIB_HERO_NAME_16": "Данте",
  "LIB_HERO_NAME_17": "Моджо",
  "LIB_HERO_NAME_18": "Судья",
  "LIB_HERO_NAME_19": "Темная Звезда",
  "LIB_HERO_NAME_2": "Галахад",
  "LIB_HERO_NAME_20": "Артемис",
  "LIB_HERO_NAME_21": "Маркус",
  "LIB_HERO_NAME_22": "Пеппи",
  "LIB_HERO_NAME_23": "Лиэн",
  "LIB_HERO_NAME_24": "Тесак",
  "LIB_HERO_NAME_25": "Исмаил",
  "LIB_HERO_NAME_26": "Лилит",
  "LIB_HERO_NAME_27": "Лютер",
  "LIB_HERO_NAME_28": "Цин Мао",
  "LIB_HERO_NAME_29": "Дориан",
  "LIB_HERO_NAME_3": "Кира",
  "LIB_HERO_NAME_30": "Корнелиус",
  "LIB_HERO_NAME_31": "Джет",
  "LIB_HERO_NAME_32": "Гелиос",
  "LIB_HERO_NAME_33": "Ларс",
  "LIB_HERO_NAME_34": "Криста",
  "LIB_HERO_NAME_35": "Йорген",
  "LIB_HERO_NAME_36": "Майя",
  "LIB_HERO_NAME_37": "Джу",
  "LIB_HERO_NAME_38": "Эльмир",
  "LIB_HERO_NAME_39": "Зири",
  "LIB_HERO_NAME_4": "Астарот",
  "LIB_HERO_NAME_40": "Небула",
  "LIB_HERO_NAME_5": "Кай",
  "LIB_HERO_NAME_6": "Фобос",
  "LIB_HERO_NAME_7": "Тея",
  "LIB_HERO_NAME_8": "Сорвиголова",
  "LIB_HERO_NAME_9": "Хайди",
}

In [50]:
def parse_battle(line: str) -> dict:
    battle = json.loads(line)
    result = defaultdict(int)

    for battle_key, sign in (('player', +1), ('enemies', -1)):
        for hero in battle[battle_key]:
            for hero_key in ('Level', 'Color', 'Star'):
                result[f'''{lib[f"LIB_HERO_NAME_{hero['id']}"]} {hero_key}'''] += sign * hero[hero_key.lower()]
        
    return {'Win': battle['win'], **result}

In [51]:
def invert_column(series: pandas.Series):
    """
    Inverts the column to make an "opposite" battle.
    """
    return series == False if series.name == 'Win' else -series

random.seed(42)
battles = pandas.DataFrame([parse_battle(line) for line in open('battles.jsonl')]).fillna(value=0)
battles = pandas.concat((battles, battles.apply(invert_column)))
battles.head()

Unnamed: 0,Win,Аврора Color,Аврора Level,Аврора Star,Арахна Color,Арахна Level,Арахна Star,Артемис Color,Артемис Level,Артемис Star,...,Хайди Star,Цин Мао Color,Цин Мао Level,Цин Мао Star,Чабба Color,Чабба Level,Чабба Star,Эльмир Color,Эльмир Level,Эльмир Star
0,False,0.0,0.0,0.0,4.0,35.0,2.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
1,False,0.0,0.0,0.0,-2.0,-10.0,-1.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2,False,0.0,0.0,0.0,4.0,36.0,2.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,-5.0,-45.0,-2.0
3,False,0.0,0.0,0.0,-1.0,-10.0,-1.0,-4.0,-46.0,-3.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
4,False,0.0,0.0,0.0,-1.0,-4.0,-2.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


In [52]:
battles.describe()

Unnamed: 0,Аврора Color,Аврора Level,Аврора Star,Арахна Color,Арахна Level,Арахна Star,Артемис Color,Артемис Level,Артемис Star,Астарот Color,...,Хайди Star,Цин Мао Color,Цин Мао Level,Цин Мао Star,Чабба Color,Чабба Level,Чабба Star,Эльмир Color,Эльмир Level,Эльмир Star
count,264.0,264.0,264.0,264.0,264.0,264.0,264.0,264.0,264.0,264.0,...,264.0,264.0,264.0,264.0,264.0,264.0,264.0,264.0,264.0,264.0
mean,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
std,2.536427,20.156611,0.893577,3.506648,28.072963,1.72435,3.539027,29.945705,1.281634,5.096036,...,0.213606,1.113348,9.17303,0.314419,1.448744,11.523162,0.640817,0.967141,8.110454,0.409024
min,-8.0,-61.0,-3.0,-8.0,-70.0,-6.0,-8.0,-69.0,-3.0,-8.0,...,-2.0,-7.0,-60.0,-2.0,-8.0,-61.0,-3.0,-7.0,-60.0,-3.0
25%,-0.0,-0.0,-0.0,-1.0,-5.0,-1.0,-0.0,-0.0,-0.0,-6.0,...,-0.0,-0.0,-0.0,-0.0,-0.0,-0.0,-0.0,-0.0,-0.0,-0.0
50%,0.0,0.0,0.0,-0.0,-0.0,-0.0,0.0,0.0,0.0,-0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
75%,0.0,0.0,0.0,1.0,5.0,1.0,0.0,0.0,0.0,6.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
max,8.0,61.0,3.0,8.0,70.0,6.0,8.0,69.0,3.0,8.0,...,2.0,7.0,60.0,2.0,8.0,61.0,3.0,7.0,60.0,3.0


In [53]:
x = battles.drop(['Win'], axis=1)
y = battles['Win']

In [None]:
# Logistic regression model. Works well.

classifier = LogisticRegression(max_iter=1000, fit_intercept=False)
param_grid = {
    'C': numpy.logspace(-6, 2, num=100),
}

In [140]:
numpy.random.seed(42)
grid_search = GridSearchCV(
    classifier, param_grid,
    cv=KFold(n_splits=10, shuffle=True, random_state=42),
    scoring='accuracy',
).fit(x, y)

print(f'Score: {grid_search.best_score_}')
print(f'Params: {grid_search.best_params_}')
print(f'Classes: {grid_search.best_estimator_.classes_}')

Score: 0.9545454545454546
Params: {'C': 47.50810162102803}
Classes: [False  True]


In [128]:
pandas.DataFrame({'Feature': x.columns, 'Importance': grid_search.best_estimator_.coef_[0]}) \
    .set_index('Feature') \
    .sort_values('Importance', ascending=False) \
    .head(n=20)

Unnamed: 0_level_0,Importance
Feature,Unnamed: 1_level_1
Астарот Star,9.141981
Судья Color,6.412196
Фобос Color,5.433219
Сорвиголова Color,5.095252
Сорвиголова Star,5.035639
Джинджер Color,4.967311
Арахна Color,4.957612
Исмаил Star,4.5464
Галахад Star,4.299569
Аврора Color,3.809593


In [126]:
result = pandas.concat((
    pandas.Series(grid_search.best_estimator_.predict(x), index=battles.index, name='Predicted'),
    pandas.Series(grid_search.best_estimator_.predict_proba(x)[:, 1], index=battles.index, name='Probability'),
    battles,
), axis=1)
result['Probability'] = result['Probability'].apply('{:.2f}'.format)
result.head()

Unnamed: 0,Predicted,Probability,Win,Аврора Color,Аврора Level,Аврора Star,Арахна Color,Арахна Level,Арахна Star,Артемис Color,...,Хайди Star,Цин Мао Color,Цин Мао Level,Цин Мао Star,Чабба Color,Чабба Level,Чабба Star,Эльмир Color,Эльмир Level,Эльмир Star
0,False,0.0,False,0.0,0.0,0.0,4.0,35.0,2.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
1,False,0.0,False,0.0,0.0,0.0,-2.0,-10.0,-1.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2,False,0.0,False,0.0,0.0,0.0,4.0,36.0,2.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,-5.0,-45.0,-2.0
3,False,0.01,False,0.0,0.0,0.0,-1.0,-10.0,-1.0,-4.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
4,False,0.0,False,0.0,0.0,0.0,-1.0,-4.0,-2.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


In [145]:
# Experimental NN model.
# I want it to catch an interaction between different heroes (e.g. Йорген is good in conjunction with Исмаил).

def create_model() -> keras.Model:
    model = Sequential()
    model.add(Dense(100, input_dim=x.columns.size, activation='sigmoid'))
    model.add(Dropout(rate=0.2))
    model.add(Dense(1, activation='sigmoid'))
    model.compile(loss='binary_crossentropy', optimizer='adam', metrics=['accuracy'])
    return model

numpy.random.seed(42)
classifier_2 = KerasClassifier(build_fn=create_model, epochs=100, verbose=2, validation_split=0.2)
classifier_2.fit(x, y)

Train on 211 samples, validate on 53 samples
Epoch 1/100
 - 11s - loss: 0.7911 - acc: 0.5829 - val_loss: 0.8567 - val_acc: 0.3774
Epoch 2/100
 - 0s - loss: 0.6335 - acc: 0.6588 - val_loss: 0.7787 - val_acc: 0.4151
Epoch 3/100
 - 0s - loss: 0.5452 - acc: 0.7251 - val_loss: 0.7107 - val_acc: 0.4906
Epoch 4/100
 - 0s - loss: 0.4997 - acc: 0.7583 - val_loss: 0.6534 - val_acc: 0.5472
Epoch 5/100
 - 0s - loss: 0.4607 - acc: 0.7820 - val_loss: 0.6109 - val_acc: 0.6038
Epoch 6/100
 - 0s - loss: 0.4181 - acc: 0.8199 - val_loss: 0.5784 - val_acc: 0.6604
Epoch 7/100
 - 0s - loss: 0.3932 - acc: 0.8436 - val_loss: 0.5528 - val_acc: 0.6981
Epoch 8/100
 - 0s - loss: 0.3996 - acc: 0.8626 - val_loss: 0.5298 - val_acc: 0.7358
Epoch 9/100
 - 0s - loss: 0.3562 - acc: 0.8673 - val_loss: 0.5061 - val_acc: 0.7547
Epoch 10/100
 - 0s - loss: 0.3349 - acc: 0.9100 - val_loss: 0.4864 - val_acc: 0.7925
Epoch 11/100
 - 0s - loss: 0.3310 - acc: 0.8863 - val_loss: 0.4666 - val_acc: 0.8113
Epoch 12/100
 - 0s - loss: 0

Epoch 97/100
 - 0s - loss: 0.0565 - acc: 0.9905 - val_loss: 0.1022 - val_acc: 0.9811
Epoch 98/100
 - 0s - loss: 0.0700 - acc: 0.9905 - val_loss: 0.1014 - val_acc: 0.9811
Epoch 99/100
 - 0s - loss: 0.0677 - acc: 0.9858 - val_loss: 0.1003 - val_acc: 0.9811
Epoch 100/100
 - 0s - loss: 0.0567 - acc: 0.9858 - val_loss: 0.0988 - val_acc: 0.9811


<keras.callbacks.History at 0x2022a5fd0>