# Predicting Victories in Video Games

## Table of Contents

  1  Preliminary Analysis

      1.1  Tabular Data Schema
      1.2  Games by Version
      1.3  Games by Map
      1.4  Games by Mode
      1.5  Games by Turn NO
      1.6  Games by Player
      1.7  Splitting into Training and Validation Sets
      1.8  Metadata
      1.9  Test Data Distribution
      1.10  Gadgets Features
        1.10.1  Relative Changes Between 0.9.2 and 0.10.1
      1.11  Weapons Features
      1.12  Units

  2  Features Engineering
      
      2.1  Infrastructure
            2.1.1  One Hot Encoding
            2.1.2  Transformers, Scalers and Training and Evaluting Infrasture
            2.1.3  Featured Metrics and Engineering
            2.1.4  Augmented Tabular Data by Adding Truncated Logs
      2.2  Analysis
            2.2.1  Correlation Between Features and Result
                2.2.1.1  Devastator
                2.2.1.2  Domination
            2.2.2  Unit Type, HP of Units, Armors and Units' AP
            2.2.3  Time, Turn NO and To Move
            2.2.4  Gadgets IDs and Counts
            2.2.5  Weapons IDs and Counts
            2.2.6  Generators
            2.2.7  Control Points
  
  3  Devastator and Domination Transformations
  
  4  Devastator Model
  
      4.1  Logistic Regression
      4.2  Extra Trees and Gradient Boosting
      4.3  DNNs
  
  5  Domination Model
  
  6  Old Single Model Without Gadgets and Weapons
  
      6.1  Plain One-Hot-Encoded Features
      6.2  Transforming Unit Type, HP, AP and Armor
      6.3  Aggregating Generators
      6.4  Aggregating Control Points
      6.5  Adding Turn NO Information
      6.6  Fine Tuning
      6.7  Gradient Boosting
      6.8  Training on Augmented Data
      6.9  Where Classifiers Perform Worse or Better
  
  7  Predicting Result Based on Gameplay History


In [None]:
!pip install pandas



In [None]:
# initialization cell
import pandas as pd
import numpy as np
import sklearn
from IPython.display import display, HTML
import plotly.express as px
import plotly.graph_objects as go
#import nb_black
import jsondiff as jd
from jsondiff import diff
import json

In [None]:
# initialization cell
#%load_ext nb_black

In [None]:
# initialization cell
pd.options.display.max_columns = 200
pd.options.display.max_rows = 410
pd.options.display.precision = 3

### Preliminary Analysis
Tabular Data Schema

In [39]:
# initialization cell
path = 'https://github.com/VedantPatel124/Predicting-Victories-in-Video-Games/blob/main/training_data_tabular_format.csv'
original_train_tab = pd.read_csv(path)
all_train_tab = original_train_tab.copy()

tab_data_trans_lst = []


def prepare_tab_data(tab_data):
    for transformation in tab_data_trans_lst:
        tab_data = transformation(tab_data)
    return tab_data


def convert_to_categories(tab_data):
    tab_data = tab_data.convert_dtypes().set_index('id')
    colnames = tab_data.columns[
        ((tab_data.dtypes == "string") & (~tab_data.columns.str.endswith("player_id")))
        | tab_data.columns.str.startswith("control_points")
        | tab_data.columns.str.endswith(".ai")
    ]
    for colname in colnames:
        tab_data[colname] = tab_data[colname].astype("category")
    return tab_data


all_train_tab = convert_to_categories(all_train_tab)
tab_data_trans_lst.append(convert_to_categories)

ParserError: ignored

In [None]:
all_train_tab

Unnamed: 0,"<html lang=""en"" data-color-mode=""auto"" data-light-theme=""light"" data-dark-theme=""dark"" >"
0,<head>
1,"<meta charset=""utf-8"">"
2,"<link rel=""dns-prefetch"" href=""https://github...."
3,"<link rel=""dns-prefetch"" href=""https://avatars..."
4,"<link rel=""dns-prefetch"" href=""https://github-..."
...,...
1041,</clipboard-copy>
1042,</div>
1043,</template>
1044,</body>


In [None]:
all_train_tab.dtypes

In [None]:
all_train_tab[[f"teams.0.{u_no}.unit_type" for u_no in units_nos]]

In [None]:
# initialization cell
control_points_cols = all_train_tab.columns[
    all_train_tab.columns.str.startswith("control_points")
]
generators_cols = all_train_tab.columns[
    all_train_tab.columns.str.startswith("generators")
]
teams_cols = all_train_tab.columns[all_train_tab.columns.str.startswith("teams")]


general_cols = all_train_tab.columns[
    ~(
        all_train_tab.columns.str.startswith("control_points")
        | all_train_tab.columns.str.startswith("generators")
        | all_train_tab.columns.str.startswith("teams")
    )
]
general_cols

In [None]:
control_points_cols

In [None]:
# initialization cell
control_points_nos = [0, 1, 2]


generators_cols

In [None]:
# initialization cell
generators_nos = [0, 1, 2, 3, 4]

teams_cols

In [None]:
# initialization cell
units_nos = [0, 1, 2, 3]

unit_cols = all_train_tab.columns[all_train_tab.columns.str.startswith("teams.0.0.")]
unit_cols

In [None]:
# initialization cell
unit_weapons_nos = [0, 1]
unit_gadgets_nos = [0, 1, 2, 3, 4]

In [None]:
# initialization cell
def zero_stats_for_dead_units(tab_data):
    for team in [0, 1]:
        for u_no in units_nos:
            unit = f"teams.{team}.{u_no}"
            dead_col = f"{unit}.dead"
            index = tab_data[tab_data[dead_col] == 1].index
            cols_to_be_reset_to_nan = [f"{unit}.unit_type", f"{unit}.armor.id"]
            cols_to_be_reset_to_zero = [f"{unit}.current_ap", f"{unit}.armor.hp"]
            for g_no in unit_gadgets_nos:
                cols_to_be_reset_to_nan.append(f"{unit}.gadgets_stats.{g_no}.id")
                cols_to_be_reset_to_zero.append(f"{unit}.gadgets_stats.{g_no}.cnt")
            for w_no in unit_weapons_nos:
                cols_to_be_reset_to_nan.append(f"{unit}.weapons_stats.{w_no}.id")
                cols_to_be_reset_to_zero.append(f"{unit}.weapons_stats.{w_no}.cnt")
            tab_data.loc[index, cols_to_be_reset_to_nan] = np.nan
            tab_data.loc[index, cols_to_be_reset_to_zero] = 0
    return tab_data


all_train_tab = zero_stats_for_dead_units(all_train_tab)
tab_data_trans_lst.append(zero_stats_for_dead_units)

In [None]:
all_train_tab

In [None]:
general_train_tab = all_train_tab[general_cols]
general_train_tab

In [None]:
general_train_tab.describe(include="all")

In [None]:
# initialization cell
def drop_player_ai(tab_data):
    tab_data.drop(columns=["players.0.ai", "players.1.ai"], inplace=True)
    return tab_data


all_train_tab = drop_player_ai(all_train_tab)
tab_data_trans_lst.append(drop_player_ai)

In [None]:
# initialization cell
def count_in_data(tab_data, grouping_attrs, attrs_to_count=[], attrs_to_be_renamed={}):
    df = tab_data[grouping_attrs + attrs_to_count]
    dfgb = df.groupby(grouping_attrs)
    counts_attr = "counts"
    if counts_attr in attrs_to_be_renamed:
        counts_attr = attrs_to_be_renamed[counts_attr]
    dct = {counts_attr: dfgb.size()}
    for attr in attrs_to_count:
        old_attr = attr
        if attr in attrs_to_be_renamed:
            attr = attrs_to_be_renamed[attr]
        dct[attr] = dfgb[old_attr].sum()
    return pd.DataFrame(dct)

### Games by Version

In [None]:
versions_counts = count_in_data(general_train_tab, ["version"])
versions_counts

In [None]:
# initialization
versions = ["0.9.2", "0.10.1", "0.12.0", "0.12.1", "0.13.3"]

###Games by Map

In [None]:
count_in_data(general_train_tab, ["map_name"])

###Games by Mode

In [None]:
count_in_data(general_train_tab, ["mode"])

###Games by Turn NO

In [None]:
games_by_turn_no = count_in_data(general_train_tab, ["turn_no"])
games_by_turn_no

In [None]:
# initialization cell
end_turns_nos = np.array([0, 3, 7, 9, 12, 17, 999999999999])

ranges_labels = [
    f"{int(start)}-{end - 1}" if start + 1 < end else f"{start}"
    for start, end in zip(end_turns_nos, end_turns_nos[1:-1])
] + [f">{end_turns_nos[-2] - 1}"]


def turn_no_to_range_idx(turn_no):
    for idx, end in enumerate(end_turns_nos[1:]):
        if turn_no < end:
            return idx


def get_range_label(rng_idx):
    return ranges_labels[rng_idx]


def turn_no_to_range_label(turn_no):
    return get_range_label(turn_no_to_range_idx(turn_no))


def add_turns_range_column(tab_data):
    tab_data["turns_range"] = np.vectorize(turn_no_to_range_label)(tab_data.turn_no)
    return tab_data

In [None]:
games_by_turn_no["turns_range"] = np.vectorize(turn_no_to_range_idx)(
    games_by_turn_no.index.values
)
games_by_turns_ranges = count_in_data(games_by_turn_no, ["turns_range"], ["counts"])

games_by_turns_ranges.set_index(
    pd.Index(
        np.vectorize(get_range_label)(games_by_turns_ranges.index.values),
    ),
    inplace=True,
)
games_by_turns_ranges

In [None]:
# initialization cell
all_train_tab = add_turns_range_column(all_train_tab)
tab_data_trans_lst.append(add_turns_range_column)


def convert_ints_to_floats(tab_data):
    for colname in tab_data.columns:
        if str(tab_data[colname].dtype) == "Int64":
            tab_data[colname] = tab_data[colname].astype("float")
    return tab_data


all_train_tab = convert_ints_to_floats(all_train_tab)
tab_data_trans_lst.append(convert_ints_to_floats)

### Games by Player

In [None]:
games_by_player0 = count_in_data(
    general_train_tab, ["players.0.player_id"], [], {"players.0.player_id": "player_id"}
)
games_by_player0["is_first"] = games_by_player0.counts
games_by_player1 = count_in_data(
    general_train_tab, ["players.1.player_id"], [], {"players.1.player_id": "player_id"}
)
games_by_player1["is_first"] = 0
games_by_player0 + games_by_player1

#The predictor should not use ids of players

In [None]:
# initialization cell
def drop_player_id(tab_data):
    tab_data.drop(columns=["players.0.player_id", "players.1.player_id"], inplace=True)
    return tab_data


all_train_tab = drop_player_id(all_train_tab)
tab_data_trans_lst.append(drop_player_id)

### Splitting into Training and Validation Sets

In [None]:
from sklearn.model_selection import train_test_split

# train_tab, val_tab = train_test_split(
#     all_train_tab, test_size=5000, random_state=42, stratify=all_train_tab.map_name
# )

train_tab, val_tab = train_test_split(
    all_train_tab, test_size=5000, random_state=42, stratify=all_train_tab.map_name
)

In [None]:
count_in_data(val_tab, ["map_name"])

In [None]:
count_in_data(val_tab, ["turns_range"]).reindex(ranges_labels)

In [None]:
count_in_data(val_tab, ["version"]).reindex(versions)

In [None]:
(
    count_in_data(val_tab, ["version"]) / count_in_data(all_train_tab, ["version"])
).reindex(versions)

In [None]:
train_tab.index.astype(int).to_series().to_csv("train_ids.csv", index=False)
val_tab.index.astype(int).to_series().to_csv("val_ids.csv", index=False)

In [None]:
# initialization cell
train_ids = pd.Index(pd.read_csv("train_ids.csv", squeeze=True))
val_ids = pd.Index(pd.read_csv("val_ids.csv", squeeze=True))
train_tab = all_train_tab.loc[train_ids, :]
val_tab = all_train_tab.loc[val_ids, :]

In [None]:
display(val_tab)

### Metadata

In [None]:
metadata = {}
for v in versions:
    dct = json.loads(open(f"metadata/{v}/metadata.json", "r").read())
    for k, lst in dct.items():
        d = {}
        for obj in lst:
            d[obj["Id"]] = obj
        dct[k] = d
    metadata[v] = dct
    with open(f"metadata/{v}/reformatted_metadata.json", "w") as f:
        f.write(json.dumps(dct, indent=3))

In [None]:
#print(json.dumps(metadata["0.10.1"], indent=3))

In [None]:
mdd = diff(metadata["0.9.2"], metadata["0.13.3"], marshal=True)
print(json.dumps(mdd, indent=3))

### Test Data Distribution

In [None]:
# initialization cell
original_test_tab = pd.read_csv("test_data/data_tabular_format.csv")
test_tab = original_test_tab.copy()
test_tab = prepare_tab_data(test_tab)

In [None]:
test_tab

In [None]:
count_in_data(test_tab, ["version"])

In [None]:
count_in_data(all_train_tab, ["version"])

In [None]:
(count_in_data(test_tab, ["version"]) / len(test_tab)) / (
    count_in_data(all_train_tab, ["version"]) / len(all_train_tab)
)

In [None]:
count_in_data(test_tab, ["map_name"])

In [None]:
count_in_data(test_tab, ["mode"])

###Gadgets Features

In [None]:
# initialization cell
def move_cols_left(df, colnames):
    for colname in reversed(colnames):
        col = df.pop(colname)
        df.insert(0, colname, col)


def compare_names_between_versions(dct_names):
    names_intersection = set.intersection(*dct_names.values())
    extra_names = {v: dct_names[v] - names_intersection for v in versions}
    return extra_names


def compare_colnames_between_versions(dct):
    return compare_names_between_versions(
        {v: set(dct[v].columns.values) for v in versions}
    )


def compare_indexes_between_versions(dct):
    return compare_names_between_versions(
        {v: set(dct[v].index.values) for v in versions}
    )

In [None]:
# initialization cell
gadgets = {}
for v in versions:
    df = (
        pd.read_csv(f"metadata/{v}/gadgets.csv").set_index(["Id"]).drop("Unnamed: 0", 1)
    )
    move_cols_left(df, ["TYPE", "Manufacturer", "Name"])
    gadgets[v] = df

In [None]:
gadgets["0.10.1"]

### Relative Changes Between 0.9.2 and 0.10.1

In [None]:
(
    gadgets["0.10.1"].select_dtypes(include=np.number)
    - gadgets["0.9.2"].select_dtypes(include=np.number)
) / gadgets["0.9.2"].select_dtypes(include=np.number)

In [None]:
compare_colnames_between_versions(gadgets)

#All the schemas of the gadgets tables are the same

In [None]:
compare_indexes_between_versions(gadgets)

#All the game versions have the same gadgets.

In [None]:
# initialization cell
gadgets_ids = gadgets[versions[0]].index.values
gadgets_ids

###Weapons Features

In [None]:
# initialization cell
weapons = {}
for v in versions:
    df = (
        pd.read_csv(f"metadata/{v}/weapons.csv")
        .sort_values("WEAPONCLASS")
        .set_index(["Id"])
        .drop("Unnamed: 0", 1)
    )
    move_cols_left(df, ["WEAPONCLASS", "Manufacturer", "Name"])
    weapons[v] = df

In [None]:
weapons["0.10.1"]

In [None]:
compare_colnames_between_versions(weapons)

#All the schemas of the weapons tables are the same

In [None]:
compare_indexes_between_versions(weapons)

#All the game versions have the same weapons.

In [None]:
# initialization cell
weapons_ids = weapons[versions[0]].index.values
weapons_ids

### Units

In [None]:
# initialization cell
units = {}
for v in versions:
    df = (
        pd.read_csv(f"metadata/{v}/units.csv")
        .set_index(["Id"])
        .sort_values("Id")
        .drop("Unnamed: 0", 1)
    )
    move_cols_left(df, ["Name", "Description"])
    units[v] = df

In [None]:
for v in versions:
    display(HTML(f"<h3>{v}</h3>"))
    display(units[v])

In [None]:
compare_colnames_between_versions(units)

#All the units tables have the same schema.

In [None]:
compare_indexes_between_versions(units)

#All the game versions have the same units.

In [None]:
# initialization cell
units_ids = units[versions[0]].index.values
units_ids

## Features Engineering
### Infrastructure
One Hot Encoding

In [None]:
# initialization cell
def drop_zero_std_features(tab, inplace=False):
    features_to_be_removed = tab.columns[tab.std() == 0]
    if inplace:
        tab.drop(columns=features_to_be_removed, inplace=True)
    else:
        return tab.drop(columns=features_to_be_removed)


default_drop_before = ["version"]


def get_dummies(tab, drop_before=default_drop_before):
    return pd.get_dummies(tab.drop(columns=drop_before), dtype="float64")


def cut_or_fill_columns(tab, colnames, inplace=False):
    colnames_to_be_removed = set(tab) - set(colnames)
    new_tab = tab.drop(columns=colnames_to_be_removed, inplace=inplace)
    if not inplace:
        tab = new_tab
    lacking_colnames = set(colnames) - set(tab.columns)
    for colname in lacking_colnames:
        tab[colname] = 0


def onehot_enc_tab(tab, onehot_enc_train_tab, drop_before=default_drop_before):
    tab = get_dummies(tab, drop_before)
    cut_or_fill_columns(tab, onehot_enc_train_tab.columns, inplace=True)
    return tab


def onehot_enc(train_tab, *tabs, drop_before=default_drop_before):
    train_tab = get_dummies(train_tab, drop_before)
    drop_zero_std_features(train_tab, inplace=True)
    new_tabs = [train_tab]
    for tab in tabs:
        new_tabs.append(onehot_enc_tab(tab, train_tab, drop_before))
    return tuple(new_tabs)


onehot_enc_train_tab, onehot_enc_val_tab, onehot_enc_test_tab = onehot_enc(
    train_tab, val_tab, test_tab
)

In [None]:
# initialization cell
def rows_subset(index_condition, train_tab, *tabs):
    train_tab = drop_zero_std_features(train_tab[index_condition(train_tab)])
    splitted_tabs_lst = [train_tab]
    for tab in tabs:
        tab = tab[index_condition(tab)].copy()
        cut_or_fill_columns(tab, train_tab.columns)
        splitted_tabs_lst.append(tab)
    return tuple(splitted_tabs_lst)


devastator_cond = lambda df: df["mode_Devastator"] == 1
domination_cond = lambda df: df["mode_Domination"] == 1

(
    devastator_onehot_enc_train_tab,
    devastator_onehot_enc_val_tab,
    devastator_onehot_enc_test_tab,
) = rows_subset(
    devastator_cond, onehot_enc_train_tab, onehot_enc_val_tab, onehot_enc_test_tab
)

(
    domination_onehot_enc_train_tab,
    domination_onehot_enc_val_tab,
    domination_onehot_enc_test_tab,
) = rows_subset(
    domination_cond, onehot_enc_train_tab, onehot_enc_val_tab, onehot_enc_test_tab
)

In [None]:
onehot_enc_train_tab

In [None]:
devastator_onehot_enc_train_tab

In [None]:
domination_onehot_enc_train_tab

In [None]:
onehot_enc_train_tab.dtypes

In [None]:
round(0.1232131, 5)

### Transformers, Scalers and Training and Evaluting Infrasture

In [None]:
# initialization cell
import itertools
from functools import reduce


from sklearn.ensemble import ExtraTreesClassifier
from sklearn.linear_model import LogisticRegression
from sklearn.preprocessing import MinMaxScaler
from sklearn.metrics import roc_auc_score


class DummyScaler:
    def transform(self, x, **kwargs):
        return x.to_numpy()

    def fit_transform(self, *args, **kwargs):
        return self.transform(*args, **kwargs)


class TabTransformer:
    def __init__(
        self, transform_fun=lambda df: df.copy(), scale=True, ret_pd=None, keep_res=None
    ):
        self.transform_fun = transform_fun
        self.scale = scale
        self.ret_pd = False if ret_pd is None else ret_pd
        self.keep_res = False if keep_res is None else keep_res
        self.x_scaler = None
        self.features = None

    def __call__(self, x, **kwargs):
        return self.transform(x, **kwargs)

    def fit(self, x, transform=False, **kwargs):
        x = self.transform_fun(x)
        drop_zero_std_features(x, inplace=True)
        self.features = set(x.columns)
        x.fillna(0, inplace=True)
        if self.scale:
            self.x_scaler = MinMaxScaler(copy=False)
        else:
            self.x_scaler = DummyScaler()
        if transform:
            return self.transform(x, fitted_tab=True, **kwargs)
        else:
            self.x_scaler.fit(x)
            return self

    def fit_transform(self, x, **kwargs):
        return self.fit(x, transform=True, **kwargs)

    def transform(self, x, ret_pd=None, keep_res=None, fitted_tab=False):
        if ret_pd is None:
            ret_pd = self.ret_pd
        if keep_res is None:
            keep_res = self.keep_res
        #         print(f"ret_pd {ret_pd}")
        #         print(f"keep_res {keep_res}")
        assert self.x_scaler is not None and self.features is not None
        if not fitted_tab:
            x = self.transform_fun(x)
        features_to_be_removed = set(x.columns) - self.features
        x.drop(columns=features_to_be_removed, inplace=True)
        columns = x.columns
        if not fitted_tab:
            x.fillna(0, inplace=True)
            sx = self.x_scaler.transform(x)
        else:
            sx = self.x_scaler.fit_transform(x)
        if not keep_res and "winner" in x:
            columns = columns.difference(pd.Index(["winner"]), sort=False)
            sx = sx[:, 1:]
        if ret_pd:
            return pd.DataFrame(sx, columns=columns)
        else:
            return sx


import types


def compose(transform_fun, of=None):
    if isinstance(of, types.FunctionType):
        return lambda df: transform_fun(of(df))
    else:
        return transform_fun(of)


def functor(transform_fun, *args, **kwargs):
    return lambda of=None: compose(lambda df: transform_fun(df, *args, **kwargs), of)


def scale(scaleTransformer, ret_pd=True, keep_res=False):
    return functor(scaleTransformer, ret_pd=ret_pd, keep_res=keep_res)


def predict_ans(cls, x):
    return cls.predict_proba(x)[:, 1]


def get_score(cls, x, y, prefix):
    dct = {f"{prefix}_R^2": round(cls.score(x, y), 5)}
    if len(pd.unique(y)) > 1:
        dct[f"{prefix}_roc_auc"] = round(roc_auc_score(y, predict_ans(cls, x)), 5)
    return dct


def display_score(train_score=None, val_score=None, display_engine="HTML"):
    if display_engine == "HTML":
        if train_score is not None:
            display(HTML(f"{train_score}"))
        if val_score is not None:
            display(HTML(f"{val_score}"))
    else:
        if train_score is not None:
            print(f"{train_score}")
        if val_score is not None:
            print(f"{val_score}")


import datetime


def save_ans(ans, ans_prefix="ans", val_score=None):
    cur_dt = datetime.datetime.today()
    str_dt = cur_dt.strftime("%y-%m-%d_%H-%M-%S")
    ans_file_name = f"answers/{ans_prefix}_{str_dt}"
    with open(ans_file_name, "w") as f:
        for p in ans:
            print(p, file=f)
    if val_score is None:
        val_score_msg = ""
    else:
        val_score_file = f"answers/{ans_prefix}_{str_dt}_val_score"
        with open(val_score_file, "w") as f:
            print(val_score, file=f)
        val_score_msg = f" and validation score saved as '{val_score_file}'"
    print(f"The answer saved as '{ans_file_name}'{val_score_msg}.")


def predict_and_save_ans(cls, x_test, xy_val=None, ans_prefix="ans"):
    if xy_val is None:
        ans_val = None
    else:
        x_val, y_val = xy_val
        val_score = get_score(cls, x_val, y_val, "val")
    ans = predict_ans(cls, x_test)
    save_ans(ans, ans_prefix, val_score)


from sklearn.model_selection import train_test_split


def fit_and_score(
    cls=None,
    transform_fun=None,
    transformer=None,
    x_train=None,
    y_train=None,
    x_val=None,
    y_val=None,
    train_tab=None,
    val_tab=None,
    val_from_train=0.1,
    shuffle=False,
    scale=True,
    cls_type="extra_trees",
    ret_only_val_R2=False,
    fast_training=False,
    cls_params={},
    mode="all",
    display_engine="print",
    ret=True,
):
    if cls is None:
        if cls_type == "extra_trees":
            default_trees_params = dict(
                n_estimators=50, random_state=3, bootstrap=True, min_samples_leaf=1.5e-3
            )
            if fast_training:
                default_trees_params["n_estimators"] = 10
            for param, value in cls_params.items():
                default_trees_params[param] = value
            cls = ExtraTreesClassifier(**default_trees_params)
        if cls_type == "log_regr":
            default_logreg_params = dict(
                solver="saga",
                max_iter=200,
                tol=1e-3,
                random_state=3,
                n_jobs=3,
            )
            if fast_training:
                default_logreg_params["max_iter"] = 50
            #                 default_logreg_params["n_jobs"] = 1
            for param, value in cls_params.items():
                default_logreg_params[param] = value
            cls = LogisticRegression(**default_logreg_params)
    if transform_fun is not None:
        if train_tab is None:
            if mode == "all":
                train_tab = onehot_enc_train_tab
            if mode == "Devastator":
                train_tab = devastator_onehot_enc_train_tab
            if mode == "Domination":
                train_tab = domination_onehot_enc_train_tab
        if val_tab is None and val_from_train is not None:
            train_tab, val_tab = train_test_split(
                train_tab,
                shuffle=shuffle,
                test_size=val_from_train,
                random_state=13,
            )
        assert train_tab is not None
        transformer = TabTransformer(transform_fun, scale=scale)
        transformer.fit(train_tab)
    if x_train is None:
        if transformer is None:
            x_train = train_tab.drop(columns=["winner"])
        else:
            x_train = transformer(train_tab, keep_res=False)
    if x_val is None:
        if val_tab is None:
            if mode == "all":
                val_tab = onehot_enc_val_tab
            if mode == "Devastator":
                val_tab = devastator_onehot_enc_val_tab
            if mode == "Domination":
                val_tab = domination_onehot_enc_val_tab
        assert val_tab is not None
        if transformer is None:
            x_val = val_tab.drop(columns=["winner"])
        else:
            x_val = transformer(val_tab, keep_res=False)
    if y_train is None:
        y_train = train_tab.winner.astype(int)
    if y_val is None:
        y_val = val_tab.winner.astype(int)
    cls.fit(x_train, y_train)
    train_score = get_score(cls, x_train, y_train, "train")
    val_score = get_score(cls, x_val, y_val, "val")

    if ret_only_val_R2:
        return val_score["val_R^2"]
        del cls
        del transformer
    else:
        display_score(train_score, val_score, display_engine=display_engine)
        if ret:
            if transform_fun is not None:
                return cls, transformer
            else:
                return cls


def fit_and_score_xy(cls):
    return fit_and_score(
        cls=cls, x_train=x_train, y_train=y_train, x_val=x_val, y_val=y_val
    )


def score_xy(cls):
    display_score(
        get_score(cls, x_train, y_train, "train"), get_score(cls, x_val, y_val, "val")
    )


def sorted_scores(scores):
    return sorted(scores, key=lambda t: t[0], reverse=True)


def grid_search_transform(
    transform_fun,
    grid,
    transform_kwargs={},
    best=10,
    max_fails=20,
    scores=None,
    *args,
    **kwargs,
):
    fails = 0
    if scores is None:
        scores = []
    max_score = 0
    best_valuation = None
    n = len(grid)
    print(f"0/{n}", end="")
    for i, params_values in enumerate(grid):
        if fails > max_fails:
            print(f"\nTerminating: more than {max_fails} fails")
            break
        try:
            score = fit_and_score(
                transform_fun=transform_fun(**transform_kwargs, **params_values),
                *args,
                **kwargs,
                shuffle=True,
                fast_training=True,
                val_from_train=0.3,
                ret_only_val_R2=True,
            )
        except:
            score = -1
            fails += 1
        scores.append((score, params_values))
        if score > max_score:
            max_score = score
            best_valuation = params_values
        print(
            f"\r{i+1}/{n}, max_score: {max_score}, valuation: {best_valuation}",
            end="",
        )
    print("")
    scores = sorted_scores(scores)
    if best is None:
        return scores
    else:
        return scores[:best]


def make_grid(*args, **kwargs):
    if len(args) > 0:
        dct = args[0]
    else:
        dct = kwargs
    grid = []
    params_and_values = list(dct.items())

    def create_valuation(current, rest):
        if rest == []:
            grid.append(current.copy())
        else:
            param, values = rest[0]
            for value in values:
                current[param] = value
                create_valuation(current, rest[1:])

    create_valuation({}, params_and_values)

    return grid


def grid_from_values_sets(params, values_sets):
    grid = []
    for values in values_sets:
        assert len(values) == len(params)
        valuation = {}
        for param, value in zip(params, values):
            valuation[param] = value
        grid.append(valuation)
    return grid


def scores_by_turn_no_and_mode(cls, onehot_enc_val_tab, transformer, mode):
    val_xy_by_turn_no = {}
    for turn_no in range(1, 26):
        tab = onehot_enc_val_tab[
            (onehot_enc_val_tab.turn_no == turn_no)
            & (onehot_enc_val_tab[f"mode_{mode}"] == 1)
        ]
        if len(tab) > 0:
            y = tab.winner
            x = transformer(tab, keep_res=False)
            val_xy_by_turn_no[turn_no] = (x, y)
    scores_by_turn_no = {
        turn_no: get_score(cls, x, y, "val")
        for turn_no, (x, y) in val_xy_by_turn_no.items()
    }
    return scores_by_turn_no

### Featured Metrics and Engineering

In [None]:
# initialization cell
# https://medium.com/analytics-vidhya/how-to-measure-the-non-linear-correlation-between-multiple-variables-804d896760b8
def entropy(*X):
    entropy = sum(
        -p * np.log(p) if p > 0 else 0
        for p in (
            np.mean(
                reduce(
                    np.logical_and,
                    (predictions == c for predictions, c in zip(X, classes)),
                )
            )
            for classes in itertools.product(*[set(x) for x in X])
        )
    )
    return entropy


def attrs_exprs_and_substitutions_generators(df, attrs_exprs):
    attrs_exprs_and_substitutions_generators_lst = []
    for attr_expr in attrs_exprs:
        substitutions_gen = {}
        if attr_expr.find("{cp_no}") != -1:
            substitutions_gen["cp_no"] = control_points_nos
        if attr_expr.find("{team}") != -1:
            substitutions_gen["team"] = [0, 1]
        if attr_expr.find("{gn_no}") != -1:
            substitutions_gen["gn_no"] = generators_nos
        if attr_expr.find("{u_no}") != -1:
            substitutions_gen["u_no"] = units_nos
        if attr_expr.find("{g_no}") != -1:
            substitutions_gen["g_no"] = unit_gadgets_nos
        if attr_expr.find("{w_no}") != -1:
            substitutions_gen["w_no"] = unit_weapons_nos
        if attr_expr.find("{u_id}") != -1:
            substitutions_gen["u_id"] = units_ids
        if attr_expr.find("{g_id}") != -1:
            substitutions_gen["g_id"] = gadgets_ids
        if attr_expr.find("{w_id}") != -1:
            substitutions_gen["w_id"] = weapons_ids
        attrs_exprs_and_substitutions_generators_lst.append(
            (attr_expr, substitutions_gen)
        )

    return attrs_exprs_and_substitutions_generators_lst


def gen_all_substitutions(substitutions_gen, res_buff, past_substitutions=[]):
    if substitutions_gen == {}:
        res_buff.append(past_substitutions)
    else:
        name, val_list = substitutions_gen.popitem()
        for val in val_list:
            past_substitutions[name] = val
            gen_all_substitutions(substitutions_gen, res_buff, past_substitutions)
        substitutions_gen[name] = val_list


def gen_all_substitutions_and_put_in_exprs(
    substitutions_gen, exprs, substituted_exprs_buffs, substitutions={}
):
    if substitutions_gen == {}:
        substituted_exprs = []
        for expr in exprs:
            substituted_exprs.append(expr.format(**substitutions))
        substituted_exprs_buffs.append(substituted_exprs)
    else:
        name, val_list = substitutions_gen.popitem()
        for val in val_list:
            substitutions[name] = val
            gen_all_substitutions_and_put_in_exprs(
                substitutions_gen, exprs, substituted_exprs_buffs, substitutions
            )
        substitutions_gen[name] = val_list


# Selects attributes generated based on expressions and returns them:
# as a new pd.DataFrame or only as a colnames list
#
# substitusions:
# gadgets_nos:        {g_no}
# weapons_nos:        {w_no}
# units_nos:          {u_no}
# generators_nos:     {gn_no}
# control_points_nos: {cp_no}
def get_selected_attrs(df, attrs_exprs=[], ret="pd.DataFrame", copy=True):
    if len(attrs_exprs) > 0:
        attrs_exprs_and_substitutions_generators_lst = (
            attrs_exprs_and_substitutions_generators(df, attrs_exprs)
        )
        colnames = []
        for (
            attr_expr,
            substitutions_gen,
        ) in attrs_exprs_and_substitutions_generators_lst:
            substituted_attrs_buffs = []
            gen_all_substitutions_and_put_in_exprs(
                substitutions_gen, [attr_expr], substituted_attrs_buffs
            )
            for attrs_buff in substituted_attrs_buffs:
                for attr in attrs_buff:
                    if attr in df.columns:
                        colnames.append(attr)
        if ret == "colnames":
            return colnames
        assert ret == "pd.DataFrame"
        df = df[colnames]
    return df.copy() if copy else df


def select_attrs(*args, **kwargs):
    return functor(get_selected_attrs, *args, **kwargs)


# Generates new columns as multiplications of powers of attributes
# described in function arguments.
# Returns the generated columns in a new pd.DataFrame
#
# substitusions:
# gadgets_nos:        {g_no}
# weapons_nos:        {w_no}
# units_nos:          {u_no}
# generators_nos:     {gn_no}
# control_points_nos: {cp_no}
def multiply_attrs_powers(df, attrs_exprs, exps_tuples):
    for exps in exps_tuples:
        assert len(exps) == len(attrs_exprs)

    attrs_exprs_and_substitutions_generators_lst = (
        attrs_exprs_and_substitutions_generators(df, attrs_exprs)
    )
    #     print(attrs_exprs_and_substitutions_generators_lst)

    new_df = pd.DataFrame(index=df.index)

    for buff_idx, exps in enumerate(exps_tuples):
        current_attrs_exprs = []
        current_substitutions_gen = {}
        current_exps = []
        for (attr_expr, substitutions_gen), exp in zip(
            attrs_exprs_and_substitutions_generators_lst, exps
        ):
            if exp != 0:
                current_attrs_exprs.append(attr_expr)
                current_substitutions_gen.update(substitutions_gen)
                current_exps.append(exp)
        substituted_attrs_buffs = []
        #         print(current_attrs_exprs)
        gen_all_substitutions_and_put_in_exprs(
            current_substitutions_gen, current_attrs_exprs, substituted_attrs_buffs
        )
        #         print(substituted_attrs_buffs)
        for substituted_attrs in substituted_attrs_buffs:
            colname = ""
            col = 1
            to_continue = False
            for attr in substituted_attrs:
                if not attr in df.columns:
                    to_continue = True
                    break
            if to_continue:
                continue
            for attr, exp in zip(substituted_attrs, current_exps):
                if len(colname) > 0:
                    colname += " * "
                colname += f"{attr}"
                if exp != 1:
                    colname += f"^{exp}"
                col *= np.power(df[attr], exp)
            new_df[colname] = col
    return new_df


def get_colsmask(df, prefixes=[], infixes=[], suffixes=[], equals=[]):
    if len(equals) > 0:
        colsmask = np.full(len(df.columns), False)
        for i, colname in enumerate(df.columns):
            if colname in equals:
                colsmask[i] = True
    else:
        prefixes_mask = np.full(len(df.columns), False)
        if len(prefixes) > 0:
            for prefix in prefixes:
                prefixes_mask |= df.columns.str.startswith(prefix)
        else:
            prefixes_mask[:] = True

        infixes_mask = np.full(len(df.columns), False)
        if len(infixes) > 0:
            for infix in infixes:
                infixes_mask |= df.columns.str.contains(infix)
        else:
            infixes_mask[:] = True

        suffixes_mask = np.full(len(df.columns), False)
        if len(suffixes) > 0:
            for suffix in suffixes:
                suffixes_mask |= df.columns.str.endswith(suffix)
        else:
            suffixes_mask[:] = True

        colsmask = prefixes_mask & infixes_mask & suffixes_mask
    return colsmask


def get_colnames(df, all_except=False, **kwargs):
    colsmask = get_colsmask(df, **kwargs)
    if all_except:
        return df.columns[~colsmask]
    else:
        return df.columns[colsmask]


def get_columns(df, *args, copy=False, **kwargs):
    if len(args) > 0:
        df = df[args[0]]
    else:
        df = df[get_colnames(df, *args, **kwargs)]
    return df.copy() if copy else df


def drop_old_and_add_new(df, old_colnames, new_columns):
    return df.drop(columns=old_colnames).join(new_columns)


def get_replaced_by_transformed(df, colnames, transform_fun, **kwargs):
    new_columns = transform_fun(df, **kwargs)
    return drop_old_and_add_new(df, colnames, new_columns)


def select_columns(*args, **kwargs):
    return functor(get_columns, *args, **kwargs)


# initialization cell
# Aggregate all attributes by team.
# Returns a new pd.DataFrame.
def aggregate_attrs_by_team(
    df, other_columns=True, contains=[], single_exp=1, sum_exp=1
):
    mask = df.columns.str.startswith("teams")
    if len(contains) > 0:
        contain_mask = np.full_like(mask, False)
        for name in contains:
            contain_mask |= df.columns.str.contains(name)
        mask &= contain_mask
    columns = df.columns[mask]

    if other_columns:
        aggr_df = df[df.columns.difference(columns, sort=False)].copy()
    else:
        aggr_df = pd.DataFrame(index=df.index)
    aggr_colnames = []

    for team in [0, 1]:
        for u_no in units_nos:
            prefix = f"teams.{team}.{u_no}"
            for colname in columns[columns.str.startswith(prefix)]:
                aggr_colname = colname.replace(prefix, f"teams.{team}.units")
                if single_exp != 1:
                    aggr_colname = f"({aggr_colname})^{single_exp}"
                if sum_exp != 1:
                    aggr_colname = f"(sum of {aggr_colname})^{sum_exp}"
                if not aggr_colname in aggr_df.columns:
                    aggr_df[aggr_colname] = 0
                aggr_colnames.append(aggr_colname)
                aggr_df[aggr_colname] += np.power(df[colname], single_exp)
    for aggr_colname in aggr_colnames:
        aggr_df[aggr_colname] = np.power(aggr_df[aggr_colname], sum_exp)
    return aggr_df


def aggr_by_team(*args, **kwargs):
    return functor(aggregate_attrs_by_team, *args, **kwargs)

### Augmented Tabular Data by Adding Truncated Logs

In [None]:
def convert_trunc_log_to_tab(iloc, original_tab, prefix_dir, save=False, ret_tab=True):
    game_id = original_tab["id"][iloc]
    game_log = json.loads(open(f"{prefix_dir}/logs_short/{game_id}.json", "r").read())
    turns = game_log["Turns"]
    n = len(turns)

    def get_turn_no(t):
        for turn_no, next_turn in enumerate(turns):
            if t < next_turn["t"]:
                return turn_no
        return len(turns)

    tab_data = pd.concat(
        [original_tab.iloc[iloc : iloc + 1, :]] * (len(turns) + 1), ignore_index=True
    )
    tab_data.loc[0, "turn_no"] = 0
    tab_data.loc[0, "t"] = 0
    tab_data.loc[0, "to_move"] = 0
    for i, turn in enumerate(turns[1:]):
        tab_data.loc[i + 1, "turn_no"] = i + 1
        tab_data.loc[i + 1, "t"] = turn["t"]
        tab_data.loc[i + 1, "to_move"] = turn["pid"]

    colnames_to_be_reset = tab_data.columns[
        tab_data.columns.str.contains("gadget")
        | tab_data.columns.str.contains("weapon")
        | tab_data.columns.str.contains("armor")
    ]
    colnames_to_be_reset_to_nan = colnames_to_be_reset[
        colnames_to_be_reset.str.endswith("id")
    ]
    colnames_to_be_reset_to_zero = colnames_to_be_reset[
        colnames_to_be_reset.str.endswith("cnt")
        | colnames_to_be_reset.str.endswith("hp")
    ]
    tab_data.loc[: n - 1, colnames_to_be_reset_to_nan] = np.nan
    tab_data.loc[: n - 1, colnames_to_be_reset_to_zero] = 0

    id_type = {}
    generators = {}
    units = {}

    players_generators_cnt = {0: 0, 1: 0}
    players_units_cnt = {0: 0, 1: 0}

    for entry in game_log["RegisteredObjects"]:
        roid = entry["id"]
        owner = entry["ownerId"]
        if entry["name"] == "GeneratorMain":
            id_type[roid] = "generator"
            no = players_generators_cnt[owner]
            players_generators_cnt[owner] += 1
            generators[roid] = f"generators.{owner}.{no}"
            continue
        classId = entry.get("classId")
        if classId is not None and classId.startswith("unt"):
            id_type[roid] = "unit"
            no = players_units_cnt[owner]
            players_units_cnt[owner] += 1
            units[roid] = f"teams.{owner}.{no}"

    for unit in units.values():
        tab_data.loc[: n - 1, f"{unit}.dead"] = 0

    for entry in game_log["States"]:
        roid = entry["id"]
        typ = id_type.get(roid)
        if typ is None:
            continue
        t = entry["t"]
        turn_no = get_turn_no(t)
        if typ == "generator":
            hp = entry["hp"]["current"]
            dead = 1 if "dead" in entry and entry["dead"] else 0
            tab_data.loc[turn_no : n - 1, f"{generators[roid]}.hp"] = hp
            tab_data.loc[turn_no : n - 1, f"{generators[roid]}.dead"] = dead
            continue
        if typ == "unit":
            ap = hp = None
            if "ap" in entry:
                tab_data.loc[turn_no : n - 1, f"{units[roid]}.current_ap"] = entry[
                    "ap"
                ]["current"]
            if "hp" in entry:
                tab_data.loc[turn_no : n - 1, f"{units[roid]}.current_hp"] = entry[
                    "hp"
                ]["current"]
            if "dead" in entry and entry["dead"]:
                tab_data.loc[turn_no : n - 1, f"{units[roid]}.dead"] = 1
            if "eq" in entry:
                if "gadgetIds" in entry["eq"]:
                    for g_no, g_id in zip(unit_gadgets_nos, entry["eq"]["gadgetIds"]):
                        tab_data.loc[
                            turn_no : n - 1, f"{units[roid]}.gadgets_stats.{g_no}.id"
                        ] = g_id
                if "weaponIds" in entry["eq"]:
                    for w_no, w_id in zip(unit_weapons_nos, entry["eq"]["weaponIds"]):
                        tab_data.loc[
                            turn_no : n - 1, f"{units[roid]}.weapons_stats.{w_no}.id"
                        ] = w_id
            if "eqs" in entry:
                if "gcnt" in entry["eqs"]:
                    for g_no, g_cnt in zip(unit_gadgets_nos, entry["eqs"]["gcnt"]):
                        tab_data.loc[
                            turn_no : n - 1, f"{units[roid]}.gadgets_stats.{g_no}.cnt"
                        ] = g_cnt
                if "wcnt" in entry["eqs"]:
                    for w_no, w_cnt in zip(unit_weapons_nos, entry["eqs"]["wcnt"]):
                        tab_data.loc[
                            turn_no : n - 1, f"{units[roid]}.weapons_stats.{w_no}.cnt"
                        ] = w_cnt
            if "st" in entry:
                for status in entry["st"]:
                    if status["n"] == "ArmoredStatus":
                        tab_data.loc[
                            turn_no : n - 1, f"{units[roid]}.armor.id"
                        ] = status["g"]
                        tab_data.loc[
                            turn_no : n - 1, f"{units[roid]}.armor.hp"
                        ] = status["hp"]["current"]

    if "RegionsState" in game_log:
        for entry in game_log["RegionsState"]:
            t = entry["t"]
            turn_no = get_turn_no(t)
            captured = 1 if entry["c"] else 0
            owner = entry["pid"]
            no = entry["pn"] - 1
            tab_data.loc[turn_no : n - 1, f"control_points.{no}.captured"] = captured
            tab_data.loc[turn_no : n - 1, f"control_points.{no}.owner"] = owner

    if save:
        tab_data.to_csv(f"{prefix_dir}/turns_tabular/{game_id}.csv", index=False)
    if ret_tab:
        return tab_data

In [None]:
original_train_tab[
    (original_train_tab["mode"] == "Domination")
    & (original_train_tab.turn_no >= 25)
    & (original_train_tab["control_points.0.captured"] == 1)
    & (original_train_tab["control_points.1.captured"] == 1)
]

In [None]:
iloc = 37742  # Devastator
# iloc = 5736  # Domination

In [None]:
convert_trunc_log_to_tab(37742, original_train_tab, "train_data", save=True)

In [None]:
turns_tab = pd.read_csv("train_data/turns_tabular/96541.csv")
turns_tab

In [None]:
for iloc in original_train_tab.index:
    convert_trunc_log_to_tab(
        iloc, original_train_tab, "train_data", save=True, ret_tab=False
    )
for iloc in original_test_tab.index:
    convert_trunc_log_to_tab(
        iloc, original_test_tab, "test_data", save=True, ret_tab=False
    )

In [None]:
games_tabs = []
prefix_dir = "train_data"
original_tab = original_train_tab

for iloc in original_tab.index:
    game_id = original_tab["id"][iloc]
    games_tabs.append(pd.read_csv(f"{prefix_dir}/turns_tabular/{game_id}.csv"))
    
all_games_turns_tab = pd.concat(games_tabs, ignore_index=True)
all_games_turns_tab.to_csv(
    f"{prefix_dir}/all_turns_data_tabular_format.csv", index=False
)

In [None]:
game_tab_data = prepare_tab_data(game_tab_data)
game_tab_data

### Analysis
Correlation Between Features and Result

In [None]:
# initialization cell
scaleTransformer = TabTransformer()
scaleTransformer.fit(onehot_enc_train_tab)
sdf = scaleTransformer(onehot_enc_train_tab, ret_pd=True, keep_res=True)

In [None]:
# initialization cell
corrs = sdf.corrwith(sdf.winner)
corrs.sort_values(inplace=True, ascending=False)
corrs[corrs >= 0.05]

### Devastator

In [None]:
# initialization cell
scaleTransformer = TabTransformer()
scaleTransformer.fit(devastator_onehot_enc_train_tab)
dvst_df = scaleTransformer(devastator_onehot_enc_train_tab, ret_pd=True, keep_res=True)

In [None]:
# initialization cell
dvst_corrs = dvst_df.corrwith(dvst_df.winner)
dvst_corrs.sort_values(inplace=True, ascending=False)
dvst_corrs[dvst_corrs >= 0.05]

### Domination

In [None]:
# initialization cell
scaleTransformer = TabTransformer()
scaleTransformer.fit(domination_onehot_enc_train_tab)
dom_df = scaleTransformer(domination_onehot_enc_train_tab, ret_pd=True, keep_res=True)
dom_df

In [None]:
# initialization cell
dom_corrs = dom_df.corrwith(dom_df.winner)
dom_corrs.sort_values(inplace=True, ascending=False)
dom_corrs[dom_corrs >= 0.05]

### Unit Type, HP of Units, Armors and Units' AP

In [None]:
from sklearn.ensemble import GradientBoostingClassifier

gb_clf = GradientBoostingClassifier(max_depth=6, min_samples_leaf=1e-3)

fit_and_score(
    cls=gb_clf,
    transform_fun=select_attrs(
        [
            "teams.{team}.{u_no}.unit_type_{u_id}",
            "teams.{team}.{u_no}.dead",
            "teams.{team}.{u_no}.current_hp",
            "teams.{team}.{u_no}.current_ap",
            "teams.{team}.{u_no}.armor.hp",
        ],
    ),
    cls_type="log_regr",
)

In [None]:
cls, trans = fit_and_score(
    transform_fun=select_attrs(
        [
            "teams.{team}.{u_no}.unit_type_{u_id}",
            "teams.{team}.{u_no}.dead",
            "teams.{team}.{u_no}.current_hp",
            "teams.{team}.{u_no}.current_ap",
            "teams.{team}.{u_no}.armor.hp",
        ],
    ),
    cls_type="log_regr",
)

In [None]:
cls, trans = fit_and_score(
    transform_fun=select_attrs(
        [
            "teams.{team}.{u_no}.unit_type_{u_id}",
            "teams.{team}.{u_no}.dead",
            "teams.{team}.{u_no}.current_hp",
            "teams.{team}.{u_no}.current_ap",
            "teams.{team}.{u_no}.armor.hp",
        ],
    ),
)

In [None]:
cls, trans = fit_and_score(
    transform_fun=select_attrs(
        [
            "teams.{team}.{u_no}.unit_type_{u_id}",
            "teams.{team}.{u_no}.current_hp",
            "teams.{team}.{u_no}.current_ap",
            "teams.{team}.{u_no}.armor.hp",
        ],
    ),
)

In [None]:
# initialization cell
def transform_ap_hp_armor(df, hp_exp=0.3, arm_hp_exp=0.3, ap_exp=1.8):
    return multiply_attrs_powers(
        df,
        [
            "teams.{team}.{u_no}.current_hp",
            "teams.{team}.{u_no}.armor.hp",
            "teams.{team}.{u_no}.current_ap",
        ],
        [
            (hp_exp, 0, 0),
            (0, arm_hp_exp, 0),
            (0, 0, ap_exp),
        ],
    )


def ap_hp_armor(*args, **kwargs):
    return functor(transform_ap_hp_armor, *args, **kwargs)

In [None]:
%%time
basic_range = [0.1, 0.5, 1, 1.5, 2]
grid_search_transform(
    ap_hp_armor,
    make_grid(
        hp_exp=basic_range, arm_hp_exp=basic_range, ap_exp=basic_range
    ),
    cls_type="log_regr",
)

In [None]:
%%time
hp_range = [0.3, 0.4, 0.5, 0.6, 0.7]
ap_range = [1.8, 2.0, 2.2, 2.5, 3]
grid_search_transform(
    ap_hp_armor,
    make_grid(
        hp_exp=hp_range, arm_hp_exp=hp_range, ap_exp=ap_range
    ),
    cls_type="log_regr",
)

In [None]:
# initialization cell
def transform_unit_type_ap_hp_armor(df, hp_exp=0.5, arm_hp_exp=1.8, ap_exp=0.6):
    return multiply_attrs_powers(
        df,
        [
            "teams.{team}.{u_no}.unit_type_{u_id}",
            "teams.{team}.{u_no}.current_hp",
            "teams.{team}.{u_no}.armor.hp",
            "teams.{team}.{u_no}.current_ap",
        ],
        [
            (1, hp_exp, 0, 0),
            (1, 0, arm_hp_exp, 0),
            (1, hp_exp, 0, ap_exp),
            (1, 0, arm_hp_exp, ap_exp),
        ],
    )


def unit_type_ap_hp_armor(*args, **kwargs):
    return functor(transform_unit_type_ap_hp_armor, *args, **kwargs)

In [None]:
cls, trans = fit_and_score(
    transform_fun=unit_type_ap_hp_armor(hp_exp=0.5, arm_hp_exp=1.8, ap_exp=0.6),
    cls_type="log_regr",
)

In [None]:
%%time
basic_range = [0.1, 0.5, 1, 1.5, 2]
grid_search_transform(
    unit_type_ap_hp_armor,
    make_grid(
        hp_exp=basic_range, arm_hp_exp=basic_range, ap_exp=basic_range
    ),
    cls_type="log_regr",
)

In [None]:
%%time
grid_search_transform(
    unit_type_ap_hp_armor,
    make_grid(
        hp_exp=[0.3, 0.4, 0.5, 0.6, 0.7], arm_hp_exp=[1.6, 1.8, 2.0, 2.2, 2.4], ap_exp=[0.6, 0.8, 1.0, 1.2, 1.4]
    ),
    cls_type="log_regr",
)

In [None]:
cls, trans = fit_and_score(
    transform_fun=unit_type_ap_hp_armor(hp_exp=0.5, arm_hp_exp=1.8, ap_exp=0.6),
    cls_type="log_regr",
)

In [None]:
aggr_by_team()(
    select_attrs(
        [
            "teams.{team}.{u_no}.unit_type_{u_id}",
            "teams.{team}.{u_no}.current_hp",
            "teams.{team}.{u_no}.current_ap",
            "teams.{team}.{u_no}.armor.hp",
        ]
    )(onehot_enc_train_tab)
)

In [None]:
# initialization cell
def grid_aggr_team(single_exp, sum_exp):
    return aggr_by_team(single_exp, sum_exp)(
        select_attrs(
            [
                "teams.{team}.{u_no}.unit_type_{u_id}",
                "teams.{team}.{u_no}.current_hp",
                "teams.{team}.{u_no}.current_ap",
                "teams.{team}.{u_no}.armor.hp",
            ]
        )
    )

In [None]:
%%time
exp_grid = [0.1, 0.3, 0.5, 0.7, 1, 1.2, 1.5, 1.7, 2, 2.5]
grid_search_transform(
    grid_aggr_team,
    make_grid(
        single_exp=exp_grid, sum_exp=exp_grid
    ),
    cls_type="log_regr",
)

In [None]:
# initialization cell
def grid_aggr_team_hp_ap_armor(single_exp, sum_exp):
    return aggr_by_team(single_exp=single_exp, sum_exp=sum_exp)(
        ap_hp_armor(hp_exp=0.3, arm_hp_exp=0.3, ap_exp=1.8)
    )

In [None]:
%%time
exp_grid = [0.1, 0.3, 0.5, 0.7, 1, 1.2, 1.5, 1.7, 2]
grid_search_transform(
    grid_aggr_team_hp_ap_armor,
    make_grid(
        single_exp=exp_grid, sum_exp=exp_grid
    ),
    cls_type="log_regr",
)

In [None]:
aggr_by_team(
    select_attrs(
        [
            "teams.{team}.{u_no}.unit_type_{u_id}",
        ],
        ret="colnames",
    )(onehot_enc_train_tab)
)(
    select_attrs(
        [
            "teams.{team}.{u_no}.unit_type_{u_id}",
            "teams.{team}.{u_no}.current_hp",
            "teams.{team}.{u_no}.current_ap",
            "teams.{team}.{u_no}.armor.hp",
        ]
    )(onehot_enc_train_tab)
)

In [None]:
# initialization cell
def grid_aggr_transform_unit_type_ap_hp_armor(single_exp, sum_exp):
    return aggr_by_team(single_exp=single_exp, sum_exp=sum_exp)(
        unit_type_ap_hp_armor(hp_exp=0.5, arm_hp_exp=1.8, ap_exp=0.6)
    )

In [None]:
%%time


exp_grid = [0.1, 0.3, 0.5, 0.7, 1, 1.2, 1.5, 1.7, 2]
grid_search_transform(
    grid_aggr_transform_unit_type_ap_hp_armor,
    make_grid(
        single_exp=exp_grid, sum_exp=exp_grid
    ),
    cls_type="log_regr",
)

In [None]:
%%time
single_exp_grid = [0.1, 0.3, 0.5, 0.7, 1, 1.2, 1.5, 1.7, 2]
sum_exp_grid = []

grid_search_transform(
    grid_aggr_transform_unit_type_ap_hp_armor,
    make_grid(
        single_exp=single_exp_grid, sum_exp=sum_exp_grid
    ),
    cls_type="log_regr",
)

In [None]:
cls, trans = fit_and_score(
    transform_fun=aggr_by_team(single_exp=1.5, sum_exp=1)(unit_type_ap_hp_armor()),
    cls_type="log_regr",
)

In [None]:
cls, trans = fit_and_score(
    transform_fun=aggr_by_team(single_exp=1.2, sum_exp=1)(unit_type_ap_hp_armor()),
    cls_type="log_regr",
)

### Time, Turn NO and To Move

In [None]:
cls, trans = fit_and_score(
    transform_fun=select_attrs(
        [
            "teams.{team}.{u_no}.unit_type_{u_id}",
            "teams.{team}.{u_no}.current_hp",
            "teams.{team}.{u_no}.armor.hp",
            "teams.{team}.{u_no}.current_ap",
        ]
    ),
    cls_type="log_regr",
)

In [None]:
cls, trans = fit_and_score(
    transform_fun=select_attrs(
        [
            "teams.{team}.{u_no}.unit_type_{u_id}",
            "teams.{team}.{u_no}.current_hp",
            "teams.{team}.{u_no}.armor.hp",
            "teams.{team}.{u_no}.current_ap",
            "turn_no",
        ]
    ),
    cls_type="log_regr",
)

In [None]:
cls, trans = fit_and_score(
    transform_fun=select_attrs(
        [
            "teams.{team}.{u_no}.unit_type_{u_id}",
            "teams.{team}.{u_no}.current_hp",
            "teams.{team}.{u_no}.armor.hp",
            "teams.{team}.{u_no}.current_ap",
            "turns_range_3-6",
            #             "turns_range_7-8",
            #             "turns_range_9-11",
            #             "turns_range_12-16",
            #             "turns_range_>16",
        ]
    ),
    cls_type="log_regr",
)

In [None]:
cls, trans = fit_and_score(
    transform_fun=select_attrs(
        [
            "teams.{team}.{u_no}.unit_type_{u_id}",
            "teams.{team}.{u_no}.current_hp",
            "teams.{team}.{u_no}.armor.hp",
            "teams.{team}.{u_no}.current_ap",
            "t",
            "turn_no",
        ]
    ),
    cls_type="log_regr",
)

In [None]:
cls, trans = fit_and_score(
    transform_fun=select_attrs(
        [
            "teams.{team}.{u_no}.unit_type_{u_id}",
            "teams.{team}.{u_no}.current_hp",
            "teams.{team}.{u_no}.armor.hp",
            "teams.{team}.{u_no}.current_ap",
            "t",
            "turn_no",
        ]
    ),
)

In [None]:
cls, trans = fit_and_score(
    transform_fun=select_attrs(
        [
            "teams.{team}.{u_no}.unit_type_{u_id}",
            "teams.{team}.{u_no}.current_hp",
            "teams.{team}.{u_no}.armor.hp",
            "teams.{team}.{u_no}.current_ap",
            "turn_no",
            "to_move",
        ]
    ),
    cls_type="log_regr",
)

In [None]:
cls, trans = fit_and_score(
    transform_fun=select_attrs(
        [
            "teams.{team}.{u_no}.unit_type_{u_id}",
            "teams.{team}.{u_no}.current_hp",
            "teams.{team}.{u_no}.armor.hp",
            "teams.{team}.{u_no}.current_ap",
            "turn_no",
            "to_move",
        ]
    )
)

In [None]:
# initialization cell
def transform_unit_type_hp_ap_armor_turn(
    df,
    hp_exp=0.5,
    arm_hp_exp=1.8,
    ap_exp=0.6,
    turn_no_exp=0.05,
    t_exp=1,
    other_columns=False,
):
    new_columns = multiply_attrs_powers(
        df,
        [
            "teams.{team}.{u_no}.unit_type_{u_id}",
            "teams.{team}.{u_no}.current_hp",
            "teams.{team}.{u_no}.armor.hp",
            "teams.{team}.{u_no}.current_ap",
            "turn_no",
            "t",
        ],
        [
            (1, hp_exp, 0, 0, turn_no_exp, 0),
            (1, 0, arm_hp_exp, 0, turn_no_exp, 0),
            (1, hp_exp, 0, ap_exp, turn_no_exp, 0),
            (1, 0, arm_hp_exp, ap_exp, turn_no_exp, 0),
            (0, 0, 0, 0, 0, t_exp),
        ],
    )
    if other_columns:
        old_colnames = df.columns[
            get_colsmask(df, prefixes=["team"], suffixes=["hp", "ap"])
            | get_colsmask(df, prefixes=["team"], infixes=["unit_type_", "armor.id"])
            | get_colsmask(df, infixes=["turns_range"])
            | get_colsmask(df, equals=["t"])
        ]
        return drop_old_and_add_new(
            df, old_colnames=old_colnames, new_columns=new_columns
        )
    else:
        return new_columns


def unit_type_hp_ap_armor_turn(*args, **kwargs):
    return functor(transform_unit_type_hp_ap_armor_turn, *args, **kwargs)

In [None]:
cls, trans = fit_and_score(
    transform_fun=unit_type_ap_hp_armor(hp_exp=0.5, arm_hp_exp=1.8, ap_exp=0.6),
    cls_type="log_regr",
)

In [None]:
cls, trans = fit_and_score(
    transform_fun=unit_type_hp_ap_armor_turn(turn_no_exp=0.05),
    cls_type="log_regr",
)

In [None]:
%%time
scores1=[]
grid_search_transform(
    unit_type_hp_ap_armor_turn,
    make_grid(
        turn_no_exp=np.arange(-2, 2, 0.02)
    ),
    cls_type="log_regr",
    scores=scores1
)

In [None]:
%%time
grid_search_transform(
    unit_type_hp_ap_armor_turn,
    make_grid(
        turn_no_exp=np.arange(0.03, 0.06, 0.002), t_exp=np.arange(-1, 1, 0.1)
    ),
    cls_type="log_regr",
)

In [None]:
# initialization cell
def grid_aggr_team_unit_type_hp_ap_armor_turn(
    search="aggr", turn_kwargs={}, aggr_kwargs={}, **kwargs
):
    if search == "aggr":
        return aggr_by_team(**aggr_kwargs, **kwargs)(
            unit_type_hp_ap_armor_turn(**turn_kwargs)
        )
    if search == "turn":
        return aggr_by_team(**aggr_kwargs)(
            unit_type_hp_ap_armor_turn(**turn_kwargs, **kwargs)
        )
    raise Exception("search has to be in ['aggr', 'turn']")

In [None]:
grid_search_transform(
    grid_aggr_team_unit_type_hp_ap_armor_turn,
    make_grid(turn_no_exp=np.arange(0.1, 2, 0.2), t_exp=np.arange(0.1, 2, 0.2)),
    transform_kwargs=dict(search="turn"),
    cls_type="log_regr",
)

In [None]:
grid_search_transform(
    grid_aggr_team_unit_type_hp_ap_armor_turn,
    make_grid(
        turn_no_exp=np.arange(0.55, 0.85, 0.03), t_exp=np.arange(0.15, 0.45, 0.03)
    ),
    transform_kwargs=dict(search="turn"),
    cls_type="log_regr",
)

In [None]:
%%time

exp_grid = [0.1, 0.3, 0.5, 0.7, 1, 1.2, 1.5, 1.7, 2, 2.5]

grid_search_transform(
    grid_aggr_team_unit_type_hp_ap_armor_turn,
    make_grid(single_exp=exp_grid, sum_exp=exp_grid),
    transform_kwargs=dict(search="aggr", turn_kwargs=dict(turn_no_exp=0.73, t_exp=0.21)),
    cls_type="log_regr",
)

In [None]:
%%time

single_exp_grid = [0.9, 1, 1.1]
sum_exp_grid = np.arange(0.8, 1.2, 0.02)

grid_search_transform(
    grid_aggr_team_unit_type_hp_ap_armor_turn,
    make_grid(single_exp=single_exp_grid, sum_exp=sum_exp_grid),
    transform_kwargs=dict(search="aggr", turn_kwargs=dict(turn_no_exp=0.73, t_exp=0.21)),
    cls_type="log_regr",
)

In [None]:
# initialization cell
def aggr_units_and_time(other_columns=True):
    return aggr_by_team(
        single_exp=1,
        sum_exp=1,
        contains=["current_ap", "current_hp", "armor.hp", "unit_type", "dead"],
    )(
        unit_type_hp_ap_armor_turn(
            turn_no_exp=0.73, t_exp=0.21, other_columns=other_columns
        )(select_columns(all_except=True, infixes=["turns_range", "to_move"]))
    )

In [None]:
cls, trans = fit_and_score(
    transform_fun=aggr_units_and_time(other_columns=False),
    cls_type="log_regr",
)

In [None]:
cls, trans = fit_and_score(transform_fun=aggr_units_and_time(other_columns=False))

In [None]:
from sklearn.ensemble import GradientBoostingClassifier

gb_clf = GradientBoostingClassifier(
    n_estimators=200,
    max_depth=6,
    min_samples_leaf=1e-3,
    min_samples_split=0.1,
    min_impurity_decrease=0.3,
)

_, transformer1 = fit_and_score(
    cls=gb_clf,
    transform_fun=select_attrs(
        [
            "teams.{team}.{u_no}.unit_type_{u_id}",
            "teams.{team}.{u_no}.current_hp",
            "teams.{team}.{u_no}.armor.hp",
            "teams.{team}.{u_no}.current_ap",
            "t",
            "turn_no",
        ]
    ),
)

In [None]:
x_train1 = transformer(onehot_enc_train_tab)
y_train1 = onehot_enc_train_tab.winner
x_train1, x_val1, y_train1, y_val1 = train_test_split(x_train1, y_train1)
x_test1 = transformer(onehot_enc_val_tab)
y_test1 = onehot_enc_val_tab.winner

In [None]:
from hypopt import GridSearch

param_grid = [
    {
        "max_depth": [3, 4, 5, 6, 8, 10, 12, 15, 20],
        "min_samples_split": [1e-5, 1e-4, 1e-3, 1e-2],
        "min_samples_leaf": [1e-5, 1e-4, 1e-3, 1e-2],
        "min_impurity_decrease": [0, 1e-5, 1e-4, 1e-3, 1e-2],
        "ccp_alpha": [0, 1e-5, 1e-4, 1e-3, 1e-2, 0.1, 0.5],
        "max_features": ["sqrt", "log2"] + [0.2, 0.5, 0.8, 1],
    }
]

opt1 = GridSearch(model=DecisionTreeClassifier(), param_grid=param_grid)
opt1.fit(x_train1, y_train1, x_val1, y_val1)
opt1.score(x_val1, y_val1)
print(opt1.get_param_scores()[:3])

In [None]:
from hypopt import GridSearch

param_grid = [
    {
        "max_depth": [11, 12, 13, 14],
        "min_samples_split": np.power(10, np.linspace(-4.5, -3.5, 4)),
        "min_samples_leaf": np.power(10, np.linspace(-4.5, -3.5, 4)),
        "min_impurity_decrease": np.power(10, np.linspace(-4.5, -3.5, 4)),
        "ccp_alpha": np.power(10, np.linspace(-7, -4.3, 4)),
        "max_features": [1],
    }
]

opt1 = GridSearch(model=DecisionTreeClassifier(), param_grid=param_grid)
opt1.fit(x_train1, y_train1, x_val1, y_val1)
opt1.score(x_val1, y_val1)
print(opt1.get_param_scores()[:3])

In [None]:
np.power(10, np.linspace(-4.5, -3.5, 4))

In [None]:
from hypopt import GridSearch

param_grid = [
    {
        "max_depth": [13, 14, 15, 16],
        "min_samples_split": np.power(10, np.linspace(-4.5, -3.5, 4)),
        "min_samples_leaf": np.power(10, np.linspace(-4.5, -3.5, 4)),
        "min_impurity_decrease": np.power(10, np.linspace(-4.1, -3.8, 5)),
        "ccp_alpha": np.power(10, np.linspace(-6.7, -5.3, 8)),
        "max_features": ["sqrt", "log2"] + list(np.exp(np.linspace(-2, 0, 4))),
    }
]

opt1 = GridSearch(model=DecisionTreeClassifier(), param_grid=param_grid)
opt1.fit(x_train1, y_train1, x_val1, y_val1)
opt1.score(x_val1, y_val1)
print(opt1.get_param_scores()[:3])

In [None]:
np.exp(np.linspace(-2, 0, 10))

In [None]:
np.power(10, np.linspace(-7, -4.3, 10))

In [None]:
from sklearn.ensemble import GradientBoostingClassifier

gb_clf = GradientBoostingClassifier(
    n_estimators=200,
    max_depth=12,
    min_samples_leaf=0.01,
    min_samples_split=0.01,
    min_impurity_decrease=0.01,
)

_ = fit_and_score(
    cls=gb_clf,
    transform_fun=select_attrs(
        [
            "teams.{team}.{u_no}.unit_type_{u_id}",
            "teams.{team}.{u_no}.current_hp",
            "teams.{team}.{u_no}.armor.hp",
            "teams.{team}.{u_no}.current_ap",
            "t",
            "turn_no",
        ]
    ),
)

In [None]:
from sklearn.ensemble import GradientBoostingClassifier

gb_clf = GradientBoostingClassifier(
    n_estimators=200,
    max_depth=6,
    min_samples_leaf=1e-3,
    min_samples_split=0.1,
    min_impurity_decrease=0.1,
)

_ = fit_and_score(
    cls=gb_clf,
    transform_fun=select_attrs(
        [
            "teams.{team}.{u_no}.unit_type_{u_id}",
            "teams.{team}.{u_no}.current_hp",
            "teams.{team}.{u_no}.armor.hp",
            "teams.{team}.{u_no}.current_ap",
        ]
    ),
)

In [None]:
from sklearn.ensemble import GradientBoostingClassifier

gb_clf = GradientBoostingClassifier(
    n_estimators=200,
    max_depth=6,
    min_samples_leaf=1e-3,
    min_samples_split=0.1,
    min_impurity_decrease=0.9,
)

_, transformer = fit_and_score(
    cls=gb_clf,
    transform_fun=aggr_units_and_time(other_columns=False),
)

In [None]:
x_train = transformer(onehot_enc_train_tab)
y_train = onehot_enc_train_tab.winner
x_train, x_val, y_train, y_val = train_test_split(x_train, y_train)
x_test = transformer(onehot_enc_val_tab)
y_test = onehot_enc_val_tab.winner

In [None]:
from hypopt import GridSearch

param_grid = [
    {
        "max_depth": [3, 4, 5, 6, 7, 8, 10, 12],
        "min_samples_split": [2, 0.001, 0.005, 0.01, 0.05, 0.1, 0.2, 0.3, 0.4],
        "min_samples_leaf": [1, 0.001, 0.005,0.01, 0.05, 0.1, 0.2, 0.3, 0.4],
        "min_impurity_decrease": [0.0, 0.001, 0.005, 0.01, 0.05, 0.1, 0.2],
        "ccp_alpha": np.power(10, np.linspace(-6.7, -5.3, 8)),
        "max_features": ["sqrt", "log2"] + list(np.exp(np.linspace(-2, 0, 4)))
    }
]

opt = GridSearch(model=GradientBoostingClassifier(), param_grid=param_grid)
opt.fit(x_train, y_train, x_val, y_val)
opt.score(x_val, y_val)
print(opt.get_param_scores()[:3])

In [None]:
from sklearn.ensemble import GradientBoostingClassifier

gb_clf = GradientBoostingClassifier(
    n_estimators=200,
    max_depth=,
    min_samples_leaf=,
    min_samples_split=,
    min_impurity_decrease=,
)

_, transformer = fit_and_score(
    cls=gb_clf,
    transform_fun=aggr_units_and_time(other_columns=False),
)

In [None]:
from sklearn.ensemble import GradientBoostingClassifier

gb_clf = GradientBoostingClassifier(
    n_estimators=200,
    max_depth=6,
    min_samples_leaf=1e-3,
    min_samples_split=0.2,
    min_impurity_decrease=0.9,
)

fit_and_score(
    cls=gb_clf,
    transform_fun=aggr_units_and_time(other_columns=False),
)

In [None]:
from sklearn.ensemble import GradientBoostingClassifier

gb_clf = GradientBoostingClassifier(
    n_estimators=200,
    max_depth=6,
    min_samples_leaf=1e-3,
    min_samples_split=0.1,
    min_impurity_decrease=0.95,
)

fit_and_score(
    cls=gb_clf,
    transform_fun=aggr_units_and_time(other_columns=False),
)

In [None]:
from sklearn.ensemble import GradientBoostingClassifier

gb_clf = GradientBoostingClassifier(
    n_estimators=200,
    max_depth=6,
    min_samples_leaf=1e-3,
    min_samples_split=0.01,
    min_impurity_decrease=0.2,
)

fit_and_score(
    cls=gb_clf,
    transform_fun=select_attrs(
        [
            "teams.{team}.{u_no}.unit_type_{u_id}",
            "teams.{team}.{u_no}.current_hp",
            "teams.{team}.{u_no}.armor.hp",
            "teams.{team}.{u_no}.current_ap",
            "t",
            "turn_no",
        ]
    ),
)

In [None]:
from sklearn.ensemble import GradientBoostingClassifier

gb_clf = GradientBoostingClassifier(
    n_estimators=200,
    max_depth=6,
    min_samples_leaf=1e-3,
    min_samples_split=0.1,
    min_impurity_decrease=0.95,
)

fit_and_score(
    cls=gb_clf,
    transform_fun=select_attrs(
        [
            "teams.{team}.{u_no}.unit_type_{u_id}",
            "teams.{team}.{u_no}.current_hp",
            "teams.{team}.{u_no}.armor.hp",
            "teams.{team}.{u_no}.current_ap",
            "t",
            "turn_no",
        ]
    ),
)

In [None]:
adf = aggr_units_and_time()(onehot_enc_train_tab)
adf

In [None]:
adf_corrs = adf.corrwith(adf.winner)
adf_corrs.sort_values(inplace=True, ascending=False)

### Gadgets IDs and Counts
The goal is to represent each feature of a gadget by computing statistics based on the feature values of all player's units.

In [None]:
select_columns(infixes=["gadgets"])(onehot_enc_train_tab)

In [None]:
corrs[corrs.index.str.contains("gadget")][:100]

In [None]:
# initialization cell
def aggregate_attrs_by_gadget_id(df, other_columns=True, single_exp=1, sum_exp=1):
    columns = df.columns[
        df.columns.str.contains("gadgets_stats.") & df.columns.str.contains(".id_")
    ]
    if other_columns:
        aggr_df = df[df.columns.difference(columns, sort=False)].copy()
    else:
        aggr_df = pd.DataFrame(index=df.index)
    aggr_colnames = []
    for colname in columns:
        pos1 = colname.find("gadgets_stats.") + len("gadgets_stats.")
        pos_id = colname.find("id_")
        aggr_colname = colname[:pos1] + colname[pos_id:]
        if single_exp != 1:
            aggr_colname = f"({aggr_colname})^{single_exp}"
        if sum_exp != 1:
            aggr_colname = f"(sum of {aggr_colname})^{sum_exp}"
        if not aggr_colname in aggr_df.columns:
            aggr_df[aggr_colname] = 0
        aggr_colnames.append(aggr_colname)
        aggr_df[aggr_colname] += np.power(df[colname], single_exp)
    for aggr_colname in aggr_colnames:
        aggr_df[aggr_colname] = np.power(aggr_df[aggr_colname], sum_exp)
    return aggr_df


def aggr_by_gadget_id(*args, **kwargs):
    return functor(aggregate_attrs_by_gadget_id, *args, **kwargs)

In [None]:
# initialization cell
gdf = aggr_by_team()(
    aggr_by_gadget_id(other_columns=False)(
        select_columns(prefixes=["teams.1"])(onehot_enc_train_tab)
    )
)
gdf.columns = gdf.columns.str[len("teams.1.units.gadgets_stats.") :]
scaleTransformer = TabTransformer()
scaleTransformer.fit(gdf)
gdf = scaleTransformer(gdf, ret_pd=True)
gdf

In [None]:
# initialization cell
gadgets_corrs = gdf.corrwith(sdf.winner)
gadgets_corrs.sort_values(inplace=True, ascending=False)
gadgets_corrs

In [None]:
# initialization cell
def get_important_gadgets(
    df, impt, other_columns=False, ret_colnames=False, with_cnts=True, copy=False
):
    all_gadgets_mask = get_colsmask(df, infixes=["gadget"])
    if with_cnts:
        gadgets_cnts_mask = all_gadgets_mask & get_colsmask(df, suffixes=["cnt"])
    else:
        gadgets_cnts_mask = np.full_like(all_gadgets_mask, False)

    max_impt = max(np.abs(gadgets_corrs))
    if impt - 0.00001 > max_impt:
        raise Exception(f"gadgets: max impt is {max_impt}")
    impt_gadget_ids = gadgets_corrs[np.abs(gadgets_corrs) >= impt].index
    gadget_colsmask = get_colsmask(df, infixes=["gadget"], suffixes=impt_gadget_ids)

    gadget_colsmask |= gadgets_cnts_mask
    if other_columns:
        colsmask = (~all_gadgets_mask) | gadget_colsmask
    else:
        colsmask = gadget_colsmask

    columns = df.columns[colsmask]

    if ret_colnames:
        return columns
    else:
        df = df[columns]
        if copy:
            return df.copy()
        else:
            return df


def select_important_gadgets(*args, **kwargs):
    return functor(get_important_gadgets, *args, **kwargs)

In [None]:
select_important_gadgets(0.2, with_cnts=False)(sdf)

In [None]:
aggr_by_team()(select_important_gadgets(0.05, with_cnts=True)(sdf))

In [None]:
aggr_by_gadget_id()(aggr_by_team()(select_important_gadgets(-1, with_cnts=True)(sdf)))

In [None]:
cls, trans = fit_and_score(
    transform_fun=select_important_gadgets(0.05, with_cnts=False, copy=True),
    cls_type="log_regr",
    fast_training=True,
)

In [None]:
cls, trans = fit_and_score(
    transform_fun=select_important_gadgets(0.05, with_cnts=True, copy=True),
    cls_type="log_regr",
    fast_training=True,
)

In [None]:
cls, trans = fit_and_score(
    transform_fun=select_important_gadgets(0.05, with_cnts=True, copy=True),
    fast_training=True,
)

In [None]:
cls, trans = fit_and_score(
    transform_fun=aggr_by_team()(
        select_important_gadgets(0.05, with_cnts=True, copy=True)
    ),
    cls_type="log_regr",
    fast_training=True,
)

In [None]:
cls, trans = fit_and_score(
    transform_fun=aggr_by_team()(
        select_important_gadgets(0.05, with_cnts=True, copy=True)
    ),
    fast_training=True,
)

In [None]:
cls, trans = fit_and_score(
    transform_fun=aggr_by_gadget_id()(
        aggr_by_team()(select_important_gadgets(0.05, with_cnts=True, copy=True))
    ),
    cls_type="log_regr",
    fast_training=True,
)

In [None]:
cls, trans = fit_and_score(
    transform_fun=aggr_by_gadget_id()(
        aggr_by_team()(select_important_gadgets(0.05, with_cnts=True, copy=True))
    ),
    fast_training=True,
)

In [None]:
cls, trans = fit_and_score(
    transform_fun=aggr_by_gadget_id()(
        aggr_by_team()(select_important_gadgets(-1, with_cnts=True, copy=True))
    ),
    cls_type="log_regr",
    fast_training=True,
)

In [None]:
# initialization cell
def transform_gadget_id_cnt(
    df, other_columns=False, single_g_id=1, cnt_exp=0, cnt_exp2=0
):
    new_columns = multiply_attrs_powers(
        df,
        [
            "teams.{team}.{u_no}.gadgets_stats.{g_no}.id_{g_id}",
            "teams.{team}.{u_no}.gadgets_stats.{g_no}.cnt",
        ],
        [(single_g_id, 0), (1, cnt_exp), (0, cnt_exp2)],
    )
    if other_columns:
        old_colnames = df.columns[get_colsmask(df, infixes=["gadgets_stats"])]
        return drop_old_and_add_new(
            df, old_colnames=old_colnames, new_columns=new_columns
        )
    else:
        return new_columns


def gadget_id_cnt(*args, **kwargs):
    return functor(transform_gadget_id_cnt, *args, **kwargs)

In [None]:
aggr_by_gadget_id()(
    aggr_by_team()(
        gadget_id_cnt(cnt_exp=1, cnt_exp2=1)(
            select_important_gadgets(0.068, with_cnts=True)(onehot_enc_train_tab)
        )
    )
)

In [None]:
aggr_by_gadget_id()(
    aggr_by_team()(
        gadget_id_cnt(cnt_exp=0, cnt_exp2=0.02)(
            select_important_gadgets(0.05, with_cnts=True)(onehot_enc_train_tab)
        )
    )
)

In [None]:
# initialization cell
def grid_gadget_id_cnt(impt, gadgets_kwargs={}, **kwargs):
    return gadget_id_cnt(**gadgets_kwargs, **kwargs)(
        select_important_gadgets(impt, with_cnts=True)
    )


def grid_aggr_by_team_gadget_id_cnt(
    impt, search="aggr", gadgets_kwargs={}, aggr_kwargs={}, **kwargs
):
    if search == "aggr":
        return aggr_by_team(**aggr_kwargs, **kwargs)(
            gadget_id_cnt(**gadgets_kwargs)(
                select_important_gadgets(impt, with_cnts=True)
            )
        )
    if search == "gadgets":
        return aggr_by_team(**aggr_kwargs)(
            gadget_id_cnt(**gadgets_kwargs, **kwargs)(
                select_important_gadgets(impt, with_cnts=True)
            )
        )
    raise Exception("search has to be in ['aggr', 'gadgets']")


def grid_aggr_by_g_id_and_team_gadget_id_cnt(
    impt,
    search="g_id_aggr",
    g_id_kwargs={},
    team_aggr_kwargs={},
    gadgets_kwargs={},
    **kwargs
):
    if search == "g_id_aggr":
        return aggr_by_gadget_id(**g_id_kwargs, **kwargs)(
            aggr_by_team(**team_aggr_kwargs)(
                gadget_id_cnt(**gadgets_kwargs)(
                    select_important_gadgets(impt, with_cnts=True)
                )
            )
        )
    if search == "team_aggr":
        return aggr_by_gadget_id(**g_id_kwargs)(
            aggr_by_team(**team_aggr_kwargs, **kwargs)(
                gadget_id_cnt(**gadgets_kwargs)(
                    select_important_gadgets(impt, with_cnts=True)
                )
            )
        )
    if search == "gadgets":
        return aggr_by_gadget_id(**g_id_kwargs)(
            aggr_by_team(**team_aggr_kwargs)(
                gadget_id_cnt(**gadgets_kwargs, **kwargs)(
                    select_important_gadgets(impt, with_cnts=True)
                )
            )
        )
    raise Exception("search has to be in ['g_id_aggr', 'team_aggr', 'gadgets']")

In [None]:
%%time
grid_search_transform(
    grid_aggr_by_g_id_and_team_gadget_id_cnt,
    make_grid(cnt_exp=np.arange(0, 0.1, 0.02), cnt_exp2=list(np.arange(0, 0.5, 0.02))),
    transform_kwargs=dict(impt=0.068, search="gadgets"),
    cls_type="log_regr",
)

In [None]:
%%time
grid_search_transform(
    grid_aggr_by_g_id_and_team_gadget_id_cnt,
    make_grid(cnt_exp2=list(np.arange(0.1, 0.14, 0.0005))),
    transform_kwargs=dict(impt=0.05, search="gadgets", gadgets_kwargs=dict(cnt_exp=0)),
    cls_type="log_regr",
)

In [None]:
%%time
exp_grid = [0.1, 0.3, 0.5, 0.7, 1, 1.2, 1.5, 1.7, 2, 2.5]

        
grid_search_transform(
    grid_aggr_by_g_id_and_team_gadget_id_cnt,
    make_grid(single_exp=exp_grid, sum_exp=exp_grid),
    transform_kwargs=dict(impt=0.05, search = "team_aggr", gadgets_kwargs=dict(cnt_exp=0, cnt_exp2=0.109)),
    cls_type="log_regr",
)

In [None]:
%%time
single_exp_grid = [1.8, 1.9, 2, 2.1, 2.2]
sum_exp_grid = [0.6, 0.7, 0.75, 0.8, 0.9]

        
grid_search_transform(
    grid_aggr_by_g_id_and_team_gadget_id_cnt,
    make_grid(single_exp=single_exp_grid, sum_exp=sum_exp_grid),
    transform_kwargs=dict(impt=0.05, search = "team_aggr", gadgets_kwargs=dict(cnt_exp=0, cnt_exp2=0.109)),
    cls_type="log_regr",
)

In [None]:
%%time
exp_grid = [0.1, 0.3, 0.5, 0.7, 1, 1.2, 1.5, 1.7, 2, 2.5]

        
grid_search_transform(
    grid_aggr_by_g_id_and_team_gadget_id_cnt,
    make_grid(single_exp=exp_grid, sum_exp=exp_grid),
    transform_kwargs=dict(impt=0.05, team_aggr_kwargs=dict(single_exp=2, sum_exp=0.7), gadgets_kwargs=dict(cnt_exp=0, cnt_exp2=0.109)),
    cls_type="log_regr",
)

In [None]:
%%time
single_exp_grid = np.arange(0.15, 0.45, 0.02)
sum_exp_grid = np.arange(0.85, 1.15, 0.02)
        
grid_search_transform(
    grid_aggr_by_g_id_and_team_gadget_id_cnt,
    make_grid(single_exp=single_exp_grid, sum_exp=sum_exp_grid),
    transform_kwargs=dict(impt=0.05, team_aggr_kwargs=dict(single_exp=2, sum_exp=0.7), gadgets_kwargs=dict(cnt_exp=0, cnt_exp2=0.109)),
    cls_type="log_regr",
)

In [None]:
# initialization cell
def gadget_transform(impt, other_columns=True):
    return aggr_by_gadget_id(single_exp=0.7, sum_exp=1.2, other_columns=other_columns)(
        aggr_by_team(
            contains=["gadget"],
            other_columns=other_columns,
            single_exp=0.9,
            sum_exp=1.04,
        )(
            gadget_id_cnt(cnt_exp=0, cnt_exp2=0.009, other_columns=other_columns)(
                select_important_gadgets(
                    impt=impt, with_cnts=True, other_columns=other_columns
                )
            )
        )
    )

In [None]:
gadget_transform(0.11)(onehot_enc_train_tab)

In [None]:
%%time
gadget_transform(-1, other_columns=False)(onehot_enc_train_tab)

In [None]:
%%time
gadget_transform(0.025, other_columns=False)(onehot_enc_train_tab)

In [None]:
%%time
cls, trans = fit_and_score(
    transform_fun=gadget_transform(-1, other_columns=False),
    cls_type="log_regr",
)

In [None]:
%%time
cls, trans = fit_and_score(
    transform_fun=gadget_transform(0.01, other_columns=False),
    cls_type="log_regr",
)

In [None]:
%%time
cls, trans = fit_and_score(
    transform_fun=gadget_transform(0.015, other_columns=False),
    cls_type="log_regr",
)

In [None]:
%%time
cls, trans = fit_and_score(
    transform_fun=gadget_transform(0.018, other_columns=False),
    cls_type="log_regr",
)

In [None]:
%%time
cls, trans = fit_and_score(
    transform_fun=gadget_transform(0.025, other_columns=False),
    cls_type="log_regr",
)

In [None]:
%%time
cls, trans = fit_and_score(
    transform_fun=gadget_transform(0.05, other_columns=False),
    cls_type="log_regr",
)

### Weapons IDs and Counts
The goal is to represent each feature of a weapon by computing statistics based on the feature values of all player's units.

In [None]:
select_columns(infixes=["weapons"])(onehot_enc_train_tab)

In [None]:
corrs[corrs.index.str.contains("weapons")][:100]

In [None]:
# initialization cell
def aggregate_attrs_by_weapon_id(df, other_columns=True, single_exp=1, sum_exp=1):
    columns = df.columns[
        df.columns.str.contains("weapons_stats.") & df.columns.str.contains(".id_")
    ]
    if other_columns:
        aggr_df = df[df.columns.difference(columns, sort=False)].copy()
    else:
        aggr_df = pd.DataFrame(index=df.index)
    aggr_colnames = []
    for colname in columns:
        pos1 = colname.find("weapons_stats.") + len("weapons_stats.")
        pos_id = colname.find("id_")
        aggr_colname = colname[:pos1] + colname[pos_id:]
        if single_exp != 1:
            aggr_colname = f"({aggr_colname})^{single_exp}"
        if sum_exp != 1:
            aggr_colname = f"(sum of {aggr_colname})^{sum_exp}"
        if not aggr_colname in aggr_df.columns:
            aggr_df[aggr_colname] = 0
        aggr_colnames.append(aggr_colname)
        aggr_df[aggr_colname] += np.power(df[colname], single_exp)
    for aggr_colname in aggr_colnames:
        aggr_df[aggr_colname] = np.power(aggr_df[aggr_colname], sum_exp)
    return aggr_df


def aggr_by_weapon_id(*args, **kwargs):
    return functor(aggregate_attrs_by_weapon_id, *args, **kwargs)


In [None]:
# initialization cell
wdf = aggr_by_team()(
    aggr_by_weapon_id(other_columns=False)(
        select_columns(prefixes=["teams.1"])(onehot_enc_train_tab)
    )
)
wdf.columns = wdf.columns.str[len("teams.1.units.weapons_stats.") :]
scaleTransformer = TabTransformer()
scaleTransformer.fit(wdf)
wdf = scaleTransformer(wdf, ret_pd=True)
wdf

In [None]:
# initialization cell
weapons_corrs = wdf.corrwith(sdf.winner)
weapons_corrs.sort_values(inplace=True, ascending=False)
weapons_corrs

In [None]:
np.abs(weapons_corrs) > 0.5

In [None]:
# initialization cell


def get_important_weapons(
    df, impt, other_columns=False, ret_colnames=False, with_cnts=True, copy=False
):
    all_weapons_mask = get_colsmask(df, infixes=["weapon"])
    if with_cnts:
        weapons_cnts_mask = all_weapons_mask & get_colsmask(df, suffixes=["cnt"])
    else:
        weapons_cnts_mask = np.full_like(all_weapons_mask, False)

    max_impt = max(np.abs(weapons_corrs))
    if impt - 0.00001 > max_impt:
        raise Exception(f"weapons: max impt is {max_impt}")
    impt_weapon_ids = weapons_corrs[np.abs(weapons_corrs) >= impt].index
    weapon_colsmask = get_colsmask(df, infixes=["weapon"], suffixes=impt_weapon_ids)

    weapon_colsmask |= weapons_cnts_mask
    if other_columns:
        colsmask = (~all_weapons_mask) | weapon_colsmask
    else:
        colsmask = weapon_colsmask

    columns = df.columns[colsmask]

    if ret_colnames:
        return columns
    else:
        df = df[columns]
        if copy:
            return df.copy()
        else:
            return df


def select_important_weapons(*args, **kwargs):
    return functor(get_important_weapons, *args, **kwargs)

In [None]:
select_important_weapons(impt=0.11, with_cnts=True)(sdf)

In [None]:
aggr_by_team()(select_important_weapons(0.05, with_cnts=True)(sdf))

In [None]:
aggr_by_weapon_id()(select_important_weapons(0.05, with_cnts=True)(sdf))

In [None]:
aggr_by_weapon_id()(aggr_by_team()(select_important_weapons(-1, with_cnts=True)(sdf)))

In [None]:
cls, trans = fit_and_score(
    transform_fun=select_important_weapons(0.05, with_cnts=False, copy=True),
    cls_type="log_regr",
    fast_training=True,
)
 

In [None]:
cls, trans = fit_and_score(
    transform_fun=select_important_weapons(-1, with_cnts=False, copy=True),
    cls_type="log_regr",
    fast_training=True,
)

In [None]:
cls, trans = fit_and_score(
    transform_fun=select_important_weapons(-1, with_cnts=False, copy=True),
)

In [None]:
cls, trans = fit_and_score(
    transform_fun=select_important_weapons(0.05, with_cnts=True, copy=True),
    cls_type="log_regr",
    fast_training=True,
)

In [None]:
cls, trans = fit_and_score(
    transform_fun=aggr_by_team()(select_important_weapons(0.05, with_cnts=True)),
    cls_type="log_regr",
    fast_training=True,
)

In [None]:
cls, trans = fit_and_score(
    transform_fun=aggr_by_weapon_id()(
        aggr_by_team()(select_important_weapons(-1, with_cnts=True))
    ),
    cls_type="log_regr",
    fast_training=True,
)

In [None]:
cls, trans = fit_and_score(
    transform_fun=aggr_by_weapon_id()(
        aggr_by_team()(select_important_weapons(-1, with_cnts=True))
    ),
    fast_training=True,
)

In [None]:
# initialization cell


def transform_weapon_id_cnt(
    df, other_columns=False, single_w_id=1, cnt_exp=0, cnt_exp2=1
):
    new_columns = multiply_attrs_powers(
        df,
        [
            "teams.{team}.{u_no}.weapons_stats.{w_no}.id_{w_id}",
            "teams.{team}.{u_no}.weapons_stats.{w_no}.cnt",
        ],
        [(single_w_id, 0), (1, cnt_exp), (0, cnt_exp2)],
    )
    if other_columns:
        old_colnames = df.columns[get_colsmask(df, infixes=["weapons_stats"])]
        return drop_old_and_add_new(
            df, old_colnames=old_colnames, new_columns=new_columns
        )
    else:
        return new_columns


def weapon_id_cnt(*args, **kwargs):
    return functor(transform_weapon_id_cnt, *args, **kwargs)

In [None]:
aggr_by_weapon_id()(aggr_by_team()(weapon_id_cnt(cnt_exp=1)(
    select_important_weapons(0.085, with_cnts=True)(onehot_enc_train_tab)
)))

In [None]:
aggr_by_weapon_id()(aggr_by_team()(weapon_id_cnt(cnt_exp=0)(
    select_important_weapons(0.051, with_cnts=True)(onehot_enc_train_tab)
)))

In [None]:
# initialization cell
def grid_weapon_id_cnt(impt, weapons_kwargs={}, **kwargs):
    return weapon_id_cnt(**weapons_kwargs, **kwargs)(
        select_important_weapons(impt, with_cnts=True)
    )


def grid_aggr_by_team_weapon_id_cnt(
    impt, search="aggr", weapons_kwargs={}, aggr_kwargs={}, **kwargs
):
    if search == "aggr":
        return aggr_by_team(**aggr_kwargs, **kwargs)(
            weapon_id_cnt(**weapons_kwargs)(
                select_important_weapons(impt, with_cnts=True)
            )
        )
    if search == "weapons":
        return aggr_by_team(**aggr_kwargs)(
            weapon_id_cnt(**weapons_kwargs, **kwargs)(
                select_important_weapons(impt, with_cnts=True)
            )
        )
    raise Exception("search has to be in ['aggr', 'weapons']")


def grid_aggr_by_w_id_and_team_weapon_id_cnt(
    impt,
    search="w_id_aggr",
    w_id_kwargs={},
    team_aggr_kwargs={},
    weapons_kwargs={},
    **kwargs
):
    if search == "w_id_aggr":
        return aggr_by_weapon_id(**w_id_kwargs, **kwargs)(
            aggr_by_team(**team_aggr_kwargs)(
                weapon_id_cnt(**weapons_kwargs)(
                    select_important_weapons(impt, with_cnts=True)
                )
            )
        )
    if search == "team_aggr":
        return aggr_by_weapon_id(**w_id_kwargs)(
            aggr_by_team(**team_aggr_kwargs, **kwargs)(
                weapon_id_cnt(**weapons_kwargs)(
                    select_important_weapons(impt, with_cnts=True)
                )
            )
        )
    if search == "weapons":
        return aggr_by_weapon_id(**w_id_kwargs)(
            aggr_by_team(**team_aggr_kwargs)(
                weapon_id_cnt(**weapons_kwargs, **kwargs)(
                    select_important_weapons(impt, with_cnts=True)
                )
            )
        )
    raise Exception("search has to be in ['w_id_aggr', 'team_aggr', 'weapons']")


impt_grid = list(np.arange(20, 300, 20)) + list(np.arange(300, 951, 50)) + [-1]

In [None]:
%%time
grid_search_transform(
    grid_aggr_by_w_id_and_team_weapon_id_cnt,
    make_grid(cnt_exp=np.arange(0, 0.1, 0.02), cnt_exp2=list(np.arange(0, 0.5, 0.02))),
    transform_kwargs=dict(impt=0.085, search="weapons"),
    cls_type="log_regr",
)

In [None]:
%%time
grid_search_transform(
    grid_aggr_by_w_id_and_team_weapon_id_cnt,
    make_grid(cnt_exp2=list(np.arange(0, 0.05, 0.001))),
    transform_kwargs=dict(impt=0.085, search="weapons", weapons_kwargs=dict(cnt_exp=0)),
    cls_type="log_regr",
)

In [None]:
%%time
exp_grid = [0.1, 0.3, 0.5, 0.7, 1, 1.2, 1.5, 1.7, 2, 2.5]

        
grid_search_transform(
    grid_aggr_by_w_id_and_team_weapon_id_cnt,
    make_grid(single_exp=exp_grid, sum_exp=exp_grid),
    transform_kwargs=dict(impt=0.051, search="team_aggr", weapons_kwargs=dict(cnt_exp=0, cnt_exp2=0.009)),
    cls_type="log_regr",
)

In [None]:
%%time
single_exp_grid = np.arange(0.01, 0.3, 0.02)
sum_exp_grid = np.arange(0.9, 1.1, 0.02)
        
grid_search_transform(
    grid_aggr_by_w_id_and_team_weapon_id_cnt,
    make_grid(single_exp=single_exp_grid, sum_exp=sum_exp_grid),
    transform_kwargs=dict(impt=0.051, search="team_aggr", weapons_kwargs=dict(cnt_exp=0, cnt_exp2=0.009)),
    cls_type="log_regr",
)

In [None]:
%%time
exp_grid = [0.1, 0.3, 0.5, 0.7, 1, 1.2, 1.5, 1.7, 2, 2.5]

        
grid_search_transform(
    grid_aggr_by_w_id_and_team_weapon_id_cnt,
    make_grid(single_exp=exp_grid, sum_exp=exp_grid),
    transform_kwargs=dict(impt=0.051,
                          team_aggr_kwargs=dict(single_exp=0.9, sum_exp=1.04),
                          weapons_kwargs=dict(cnt_exp=0, cnt_exp2=0.009)),
    cls_type="log_regr",
)

In [None]:
%%time
single_exp_grid = np.arange(0.03, 0.7, 0.02)
sum_exp_grid = np.arange(1, 1.4, 0.02)

        
grid_search_transform(
    grid_aggr_by_w_id_and_team_weapon_id_cnt,
    make_grid(single_exp=single_exp_grid, sum_exp=sum_exp_grid),
    transform_kwargs=dict(impt=0.051,
                          team_aggr_kwargs=dict(single_exp=0.9, sum_exp=1.04),
                          weapons_kwargs=dict(cnt_exp=0, cnt_exp2=0.009)),
    cls_type="log_regr",
)

In [None]:
# initialization cell
def weapon_transform(impt, other_columns=True):
    return aggr_by_weapon_id(single_exp=0.7, sum_exp=1.2, other_columns=other_columns)(
        aggr_by_team(
            contains=["weapon"],
            other_columns=other_columns,
            single_exp=0.9,
            sum_exp=1.04,
        )(
            weapon_id_cnt(cnt_exp=0, cnt_exp2=0.009, other_columns=other_columns)(
                select_important_weapons(
                    impt=impt, with_cnts=True, other_columns=other_columns
                )
            )
        )
    )

In [None]:
%%time
weapon_transform(0.035, other_columns=False)(onehot_enc_train_tab)

In [None]:
%%time
weapon_transform(-1, other_columns=False)(onehot_enc_train_tab)

In [None]:
%%time
cls, trans = fit_and_score(
    transform_fun=weapon_transform(-1, other_columns=False),
    cls_type="log_regr",
)

In [None]:
%%time
cls, trans = fit_and_score(
    transform_fun=weapon_transform(0.01, other_columns=False),
    cls_type="log_regr",
)

In [None]:
%%time
cls, trans = fit_and_score(
    transform_fun=weapon_transform(0.02, other_columns=False),
    cls_type="log_regr",
)

In [None]:
%%time
cls, trans = fit_and_score(
    transform_fun=weapon_transform(0.03, other_columns=False),
    cls_type="log_regr",
)

In [None]:
%%time
cls, trans = fit_and_score(
    transform_fun=weapon_transform(0.035, other_columns=False),
    cls_type="log_regr",
)

### Generators

In [None]:
# initialization cell
generators_dead_columns = get_colnames(sdf, prefixes=["generators"], suffixes=["dead"])
generators_hp_columns = get_colnames(sdf, prefixes=["generators"], suffixes=["hp"])

In [None]:
# initialization cell


def aggregate_team_generators_single_features(
    df, columns="all", single_exp=1, sum_exp=1, replace=True
):
    if type(columns) is str and columns == "all":
        columns = df.columns[df.columns.str.startswith("generators")]
    if replace:
        aggr_df = df[df.columns.difference(columns, sort=False)].copy()
    else:
        aggr_df = df.copy()
    aggr_colnames = []
    for team in [0, 1]:
        for gr_no in units_nos:
            prefix = f"generators.{team}.{gr_no}."
            for colname in columns[columns.str.startswith(prefix)]:
                aggr_colname = colname.replace(prefix, f"generators.{team}.")
                if single_exp != 1:
                    aggr_colname = f"({aggr_colname})^{single_exp}"
                if sum_exp != 1:
                    aggr_colname = f"(sum of {aggr_colname})^{sum_exp}"
                if not aggr_colname in aggr_df.columns:
                    aggr_df[aggr_colname] = 0
                aggr_colnames.append(aggr_colname)
                aggr_df[aggr_colname] += np.power(df[colname], single_exp)
    for aggr_colname in aggr_colnames:
        aggr_df[aggr_colname] = np.power(aggr_df[aggr_colname], sum_exp)
    return aggr_df


def aggregate_team_generators(
    df,
    single_dead_exp=1,
    single_hp_exp=1,
    sum_dead_exp=1,
    sum_hp_exp=1,
):
    df1 = aggregate_team_generators_single_features(
        df, generators_dead_columns, single_dead_exp, sum_dead_exp, replace=False
    )
    return aggregate_team_generators_single_features(
        df1, generators_hp_columns, single_hp_exp, sum_hp_exp, replace=False
    )


def aggr_team_generators(*args, **kwargs):
    return functor(aggregate_team_generators, *args, **kwargs)

In [None]:
aggr_team_generators()(select_columns(prefixes=["generators"])(onehot_enc_train_tab))

In [None]:
cls, trans = fit_and_score(
    transform_fun=select_columns(prefixes=["generators"], copy=True),
    cls_type="log_regr",
    mode="Devastator",
)

In [None]:
cls, trans = fit_and_score(
    transform_fun=select_columns(prefixes=["generators"], copy=True),
    mode="Devastator",
)

In [None]:
cls, trans = fit_and_score(
    transform_fun=aggr_team_generators()(select_columns(prefixes=["generators"])),
    cls_type="log_regr",
    mode="Devastator",
)

In [None]:
cls, trans = fit_and_score(
    transform_fun=aggr_team_generators()(select_columns(prefixes=["generators"])),
    mode="Devastator",
)

In [None]:
# initialization cell
def grid_aggr_team_generators(*args, **kwargs):
    return aggr_team_generators(*args, **kwargs)(
        select_columns(prefixes=["generators"])
    )

In [None]:
%%time
gn_exp_grid = [0.1, 0.5, 1, 1.5, 2]

grid_search_transform(
    grid_aggr_team_generators,
    make_grid(single_dead_exp=gn_exp_grid,
              single_hp_exp=gn_exp_grid,
              sum_dead_exp=gn_exp_grid,
              sum_hp_exp=gn_exp_grid),
    mode="Devastator",
    cls_type="log_regr",
)

In [None]:
%%time

grid_search_transform(
    grid_aggr_team_generators,
    make_grid(single_dead_exp=[1.75, 2, 2.5],
              single_hp_exp=[0.5, 0.75, 1],
              sum_dead_exp=[0.75, 1, 1.25],
              sum_hp_exp=[1.25, 1.5, 1.75]),
    mode="Devastator",
    cls_type="log_regr",
)

In [None]:
# initialization cell
def aggr_generators(other_columns=True):
    fun = aggr_team_generators(
        single_dead_exp=1.75, single_hp_exp=0.75, sum_dead_exp=1, sum_hp_exp=1.25
    )
    if other_columns:
        return fun
    else:
        return fun(select_columns(prefixes=["generators"]))

In [None]:
cls, trans = fit_and_score(
    transform_fun=aggr_generators(other_columns=False),
    cls_type="log_regr",
    mode="Devastator",
)

In [None]:
cls, trans = fit_and_score(
    transform_fun=aggr_generators(other_columns=False),
    mode="Devastator",
)

### Control Points

In [None]:
# initialization cell
cp_owner_columns = get_colnames(sdf, prefixes=["control_points"], infixes=["owner"])
cp_captured_columns = get_colnames(
    sdf, prefixes=["control_points"], infixes=["captured"]
)
cp_not_owned_columns = get_colnames(
    sdf, prefixes=["control_points"], infixes=["owner_-1"]
)
cp_owner_columns = cp_owner_columns.difference(cp_not_owned_columns)

In [None]:
select_columns(cp_owner_columns.union(cp_captured_columns))(sdf)

In [None]:
cls, trans = fit_and_score(
    transform_fun=select_columns(
        cp_owner_columns.union(cp_captured_columns).union(cp_not_owned_columns),
        copy=True,
    ),
    cls_type="log_regr",
    mode="Domination",
)

In [None]:
cls, trans = fit_and_score(
    transform_fun=select_columns(
        cp_owner_columns.union(cp_captured_columns).union(cp_not_owned_columns),
        copy=True,
    ),
    mode="Domination",
)

In [None]:
cls, trans = fit_and_score(
    transform_fun=select_columns(
        cp_owner_columns.union(cp_captured_columns),
        copy=True,
    ),
    cls_type="log_regr",
    mode="Domination",
)

In [None]:
cls, trans = fit_and_score(
    transform_fun=select_columns(
        cp_owner_columns.union(cp_captured_columns),
        copy=True,
    ),
    mode="Domination",
)

In [None]:
cls, trans = fit_and_score(
    transform_fun=select_columns(
        cp_owner_columns.union(cp_captured_columns).union(cp_not_owned_columns),
        copy=True,
    ),
    cls_type="log_regr",
    mode="Domination",
)

In [None]:
# initialization cell

def aggregate_team_control_points_single_features(
    df,
    ftr,
    single_exp=1,
    sum_exp=1,
    other_columns=True,
):
    columns = df.columns[
        df.columns.str.contains("control_points.") & df.columns.str.contains(ftr)
    ]
    if other_columns:
        aggr_df = df[df.columns.difference(columns, sort=False)].copy()
    else:
        aggr_df = pd.DataFrame(index=df.index)

    aggr_colnames = []
    for team in [0, 1]:
        for cp_no in control_points_nos:
            prefix = f"control_points.{cp_no}.{ftr}_{team}"
            for colname in columns[columns.str.startswith(prefix)]:
                aggr_colname = colname.replace(prefix, f"control_points.{ftr}_{team}")
                if single_exp != 1:
                    aggr_colname = f"({aggr_colname})^{single_exp}"
                if sum_exp != 1:
                    aggr_colname = f"(sum of {aggr_colname})^{sum_exp}"
                if not aggr_colname in aggr_df.columns:
                    aggr_df[aggr_colname] = 0
                aggr_colnames.append(aggr_colname)
                aggr_df[aggr_colname] += np.power(df[colname], single_exp)
    for aggr_colname in aggr_colnames:
        aggr_df[aggr_colname] = np.power(aggr_df[aggr_colname], sum_exp)
    return aggr_df


def aggregate_team_control_points(
    df, single_own_exp=1, single_cpt_exp=1, sum_own_exp=1, sum_cpt_exp=1
):
    df1 = aggregate_team_control_points_single_features(
        df, "owner", single_own_exp, sum_own_exp
    )
    return aggregate_team_control_points_single_features(
        df1, "captured", single_cpt_exp, sum_cpt_exp
    )


def aggr_team_control_points(*args, **kwargs):
    return functor(aggregate_team_control_points, *args, **kwargs)

In [None]:
cls, trans = fit_and_score(
    transform_fun=aggr_team_control_points()(
        select_columns(cp_owner_columns.union(cp_captured_columns))
    ),
    cls_type="log_regr",
    mode="Domination",
)

In [None]:
aggr_team_control_points()(
    select_columns(cp_owner_columns.union(cp_captured_columns))(onehot_enc_train_tab)
)

In [None]:
# initialization cell
def grid_aggr_team_control_points(*args, **kwargs):
    return aggr_team_control_points(*args, **kwargs)(
        select_columns(cp_owner_columns.union(cp_captured_columns))
    )

In [None]:
cls, trans = fit_and_score(
    transform_fun=grid_aggr_team_control_points(),
    cls_type="log_regr",
    mode="Domination",
)

In [None]:
%%time
cp_exp_grid = [0.1, 0.5, 1, 1.5, 2]

grid_search_transform(
    grid_aggr_team_control_points,
    make_grid(single_own_exp=cp_exp_grid,
              single_cpt_exp=cp_exp_grid,
              sum_own_exp=cp_exp_grid,
              sum_cpt_exp=cp_exp_grid),
    mode="Domination",
    cls_type="log_regr",
)

In [None]:
%%time

grid_search_transform(
    grid_aggr_team_control_points,
    make_grid(single_own_exp=np.arange(0.01, 0.5, 0.04),
              single_cpt_exp=np.arange(0.01, 0.5, 0.04)),
    transform_kwargs=dict(sum_own_exp=1, sum_cpt_exp=1),
    mode="Domination",
    cls_type="log_regr",
)

In [None]:
# initialization cell
def aggr_control_points(other_columns=True):
    params = dict(
        single_own_exp=0.01, single_cpt_exp=0.01, sum_own_exp=1, sum_cpt_exp=1
    )
    fun = aggr_team_control_points(**params)(
        select_columns(all_except=True, infixes=["owner_-1"])
    )
    if other_columns:
        return fun
    else:
        return aggr_team_control_points(**params)(
            select_columns(all_except=True, infixes=["owner_-1"])(
                select_columns(prefixes=["control_points"])
            )
        )

In [None]:
cls, trans = fit_and_score(
    transform_fun=aggr_control_points(other_columns=False),
    cls_type="log_regr",
    mode="Domination",
)

In [None]:
cls, trans = fit_and_score(
    transform_fun=aggr_control_points(other_columns=False),
    mode="Domination",
)

### Devastator and Domination Transformation

In [None]:
# initialization cell
def general_transformation(x, weapons_impt=-1, gadgets_impt=-1):
    return weapon_transform(weapons_impt)(
        gadget_transform(gadgets_impt)(
            aggr_units_and_time()(
                select_columns(all_except=True, infixes=["map_name"])(x)
            )
        )
    )

In [None]:
# initialization cell
general_transformer = TabTransformer(
    transform_fun=general_transformation, scale=False, ret_pd=True, keep_res=True
)
general_train_tab = general_transformer.fit_transform(onehot_enc_train_tab)
general_val_tab = general_transformer(onehot_enc_val_tab)
general_test_tab = general_transformer(onehot_enc_test_tab)

In [None]:
general_train_tab

In [None]:
fit_and_score(
    transform_fun=lambda df: df.copy(),
    train_tab=general_train_tab,
    val_tab=general_val_tab,
    cls_type="log_regr",
)

In [None]:
cls, transformer = fit_and_score(
    transform_fun=lambda df: df.copy(),
    train_tab=general_train_tab,
    val_tab=general_val_tab,
)

In [None]:
general_x_train = transformer(general_train_tab)
general_y_train = general_train_tab.winner.astype(int)

In [None]:
general_x_val = transformer(general_val_tab)
general_y_val = general_val_tab.winner.astype(int)

In [None]:
from sklearn.ensemble import GradientBoostingClassifier

general_gb_clf = GradientBoostingClassifier(
    n_estimators=1000,
    n_iter_no_change=10,
    learning_rate=0.01,
    tol=1e-5,
    random_state=3,
    verbose=3,
    warm_start=True,
    subsample=0.5,
    validation_fraction=0.2,
    max_depth=3,
)
general_gb_clf.fit(general_x_train, general_y_train)

In [None]:
get_score(general_gb_clf, general_x_val, general_y_val, "val")

### Devastator Model

In [None]:
# initialization cell
devastator_transformation = lambda x: aggr_generators()(
    select_columns(all_except=True, infixes=["control_points"])(
        select_columns(all_except=True, equals=["turn_no"])(x)
    )
)
devastator_transformer_from_general = TabTransformer(
    transform_fun=devastator_transformation, keep_res=True, ret_pd=True
)
devastator_train_tab = devastator_transformer_from_general.fit_transform(
    general_train_tab[general_train_tab.mode_Devastator == 1]
)
devastator_val_tab = devastator_transformer_from_general(
    general_val_tab[general_val_tab.mode_Devastator == 1]
)
devastator_test_tab = devastator_transformer_from_general(
    general_test_tab[general_test_tab.mode_Devastator == 1]
)

In [None]:
devastator_val_tab

In [None]:
devastator_x_train = devastator_train_tab.drop(columns=["winner"])
devastator_y_train = devastator_train_tab.winner
devastator_x_val = devastator_val_tab.drop(columns=["winner"])
devastator_y_val = devastator_val_tab.winner
devastator_x_test = devastator_test_tab.drop(columns=["winner"])
devastator_y_test = devastator_test_tab.winner

## Logistic Regression

In [None]:
devastator_log_regr = LogisticRegression(
    solver="saga", max_iter=1000, tol=1e-5, random_state=3, n_jobs=3
)

fit_and_score(
    cls=devastator_log_regr,
    x_train=devastator_x_train,
    y_train=devastator_y_train,
    x_val=devastator_x_val,
    y_val=devastator_y_val,
    ret=False,
)

In [None]:
scores_by_turn_no_and_mode(
    devastator_log_regr,
    general_val_tab,
    devastator_transformer_from_general,
    "Devastator",
)

In [None]:
from sklearn.linear_model import LogisticRegressionCV

devastator_log_regr_cv = LogisticRegressionCV(max_iter=1000, n_jobs=3)

fit_and_score(
    cls=devastator_log_regr_cv,
    x_train=devastator_x_train,
    y_train=devastator_y_train,
    x_val=devastator_x_val,
    y_val=devastator_y_val,
    ret=False,
)

In [None]:
devastator_log_regr = LogisticRegression(
    solver="saga", max_iter=1000, tol=1e-5, random_state=3, n_jobs=3
)

fit_and_score(
    cls=devastator_log_regr,
    x_train=devastator_x_train,
    y_train=devastator_y_train,
    x_val=devastator_x_val,
    y_val=devastator_y_val,
    ret=False,
)

In [None]:
from sklearn.linear_model import LogisticRegressionCV

devastator_log_regr_cv = LogisticRegressionCV(max_iter=1000, n_jobs=3)

fit_and_score(
    cls=devastator_log_regr_cv,
    x_train=devastator_x_train,
    y_train=devastator_y_train,
    x_val=devastator_x_val,
    y_val=devastator_y_val,
    ret=False,
)

## Extra Trees and Gradient Boosting

In [None]:
devastator_trees = ExtraTreesClassifier(
    n_estimators=100, random_state=3, bootstrap=True, min_samples_leaf=1.5e-3
)

fit_and_score(
    cls=devastator_trees,
    x_train=devastator_x_train,
    y_train=devastator_y_train,
    x_val=devastator_x_val,
    y_val=devastator_y_val,
    ret=False,
)

In [None]:
devastator_trees = ExtraTreesClassifier(
    n_estimators=1000,
    ccp_alpha=7.943282347242822e-07,
    max_depth=14,
    max_features=1.0,
    min_impurity_decrease=0.0001467799267622069,
    min_samples_leaf=0.00031622776601683794,
    min_samples_split=6.812920690579608e-05,
)

fit_and_score(
    cls=devastator_trees,
    x_train=devastator_x_train,
    y_train=devastator_y_train,
    x_val=devastator_x_val,
    y_val=devastator_y_val,
    ret=False,
)

In [None]:
scores_by_turn_no_and_mode(
    devastator_trees, general_val_tab, devastator_transformer_from_general, "Devastator"
)

In [None]:
devastator_transformer_from_general_leq_5 = TabTransformer(
    transform_fun=devastator_transformation, keep_res=True, ret_pd=True
)
devastator_train_tab_leq_5 = devastator_transformer_from_general_leq_5.fit_transform(
    general_train_tab[
        (general_train_tab.mode_Devastator == 1) & (general_train_tab.turn_no <= 5)
    ]
)
devastator_val_tab_leq_5 = devastator_transformer_from_general(
    general_val_tab[
        (general_val_tab.mode_Devastator == 1) & (general_val_tab.turn_no <= 5)
    ]
)
devastator_test_tab_leq_5 = devastator_transformer_from_general(
    general_test_tab[
        (general_test_tab.mode_Devastator == 1) & (general_test_tab.turn_no <= 5)
    ]
)

In [None]:
gb_clf = GradientBoostingClassifier(n_estimators=100)

fit_and_score(
    cls=gb_clf,
    train_tab=devastator_train_tab_leq_5,
    val_tab=devastator_val_tab_leq_5,
    ret=False,
)

In [None]:
scores_by_turn_no_and_mode(
    gb_clf,
    general_val_tab[general_val_tab.turn_no <= 5],
    devastator_transformer_from_general_leq_5,
    "Devastator",
)

In [None]:
logregr_clf = fit_and_score(
    train_tab=devastator_train_tab_leq_5,
    val_tab=devastator_val_tab_leq_5,
    cls_type="log_regr",
)

In [None]:
scores_by_turn_no_and_mode(
    logregr_clf,
    general_val_tab[general_val_tab.turn_no <= 5],
    devastator_transformer_from_general_leq_5,
    "Devastator",
)

In [None]:
devastator_leq_5_x_train = devastator_train_tab_leq_5.drop(columns=["winner"])
devastator_leq_5_y_train = devastator_train_tab_leq_5.winner
devastator_leq_5_x_val_test = devastator_val_tab_leq_5.drop(columns=["winner"])
devastator_leq_5_y_val_test = devastator_val_tab_leq_5.winner
devastator_leq_5_x_test = devastator_test_tab_leq_5.drop(columns=["winner"])
devastator_leq_5_y_test = devastator_test_tab_leq_5.winner

devastator_leq_5_x_train, devastator_leq_5_x_val, devastator_leq_5_y_train, devastator_leq_5_y_val = train_test_split(devastator_leq_5_x_train, devastator_leq_5_y_train)

In [None]:
from hypopt import GridSearch

param_grid = [
    {
        "max_depth": [3, 4, 5, 6, 8],
        "min_samples_split": [0.005, 0.01, 0.05, 0.1, 0.2],
        "min_samples_leaf": [0.005,0.01, 0.05, 0.1, 0.2],
        "min_impurity_decrease": [0.0, 0.001, 0.01, 0.05, 0.1, 0.2],
        "ccp_alpha": np.power(10, np.linspace(-6.7, -5.3, 3)),
        "max_features": ["sqrt", "log2"] + list(np.exp(np.linspace(-2, 0, 3)))
    }
]

opt = GridSearch(model=GradientBoostingClassifier(), param_grid=param_grid)
opt.fit(devastator_leq_5_x_train, devastator_leq_5_y_train, devastator_leq_5_x_val, devastator_leq_5_y_val)
opt.score(devastator_leq_5_x_val, devastator_leq_5_y_val)
print(opt.get_param_scores()[:3])

## DNNs

In [None]:
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Dropout, LSTM
from sklearn.metrics import mean_absolute_error
from tensorflow.keras.optimizers import SGD, Adam

model = Sequential()
model.add(
    Dense(
        100,
        input_dim=devastator_x_train.shape[1],
        activation="sigmoid",
        kernel_initializer="he_normal",
    )
)
model.add(Dropout(0.5))
model.add(Dense(100, activation="sigmoid", kernel_initializer="he_normal"))
model.add(Dropout(0.5))
model.add(Dense(10, activation="sigmoid", kernel_initializer="he_normal"))
model.add(Dropout(0.5))
model.add(Dense(1, activation="relu"))

opt = Adam(learning_rate=0.001, beta_1=0.9)

model.compile(loss="mse", optimizer=opt, metrics=["accuracy"])
model.fit(
    devastator_x_train,
    devastator_y_train,
    epochs=30,
    batch_size=64,
    verbose=2,
    validation_split=0.01,
)
# preds_Devastator = model.predict(X_Devastator_test)

In [None]:
#!pip install tensorflow

In [None]:
# print(len(X_Dominator.columns))

model = Sequential()
model.add(Dense(100, input_dim=X_Dominator.shape[1], activation='sigmoid', kernel_initializer='he_normal'))
model.add(Dropout(0.5))
model.add(Dense(100, activation='sigmoid', kernel_initializer='he_normal'))
model.add(Dropout(0.5))
model.add(Dense(20, activation='sigmoid', kernel_initializer='he_normal'))
model.add(Dropout(0.5))
model.add(Dense(1, activation='linear'))

opt = Adam(learning_rate=0.002, beta_1=0.9)

model.compile(loss='mse', optimizer=opt, metrics=['accuracy'])
model.fit(X_Dominator, y_Dominator, epochs=30, batch_size=64, verbose=2, validation_split=0.01)
preds_Dominator = model.predict(X_Dominator_test)

## Domination Model

In [None]:
# initialization cell
domination_transformation = lambda x: aggr_control_points()(
    select_columns(all_except=True, infixes=["generators"])(
        select_columns(all_except=True, equals=["turn_no"])(x)
    )
)

domination_transformer_from_general = TabTransformer(
    transform_fun=domination_transformation, keep_res=True, ret_pd=True
)
domination_train_tab = domination_transformer_from_general.fit_transform(
    general_train_tab[general_train_tab.mode_Domination == 1]
)
domination_val_tab = domination_transformer_from_general(general_val_tab)
domination_test_tab = domination_transformer_from_general(general_test_tab)

In [None]:
domination_train_tab

In [None]:
domination_x_train = domination_train_tab.drop(columns=["winner"])
domination_y_train = domination_train_tab.winner
domination_x_val = domination_val_tab.drop(columns=["winner"])
domination_y_val = domination_val_tab.winner
domination_x_test = domination_test_tab.drop(columns=["winner"])
domination_y_test = domination_test_tab.winner

In [None]:
domination_trees = ExtraTreesClassifier(
    n_estimators=200,
    ccp_alpha=7.943282347242822e-07,
    max_depth=12,
    max_features=1.0,
    min_impurity_decrease=0.0001467799267622069,
    min_samples_leaf=0.00031622776601683794,
    min_samples_split=6.812920690579608e-05,
)

fit_and_score(
    cls=domination_trees,
    x_train=domination_x_train,
    y_train=domination_y_train,
    x_val=domination_x_val,
    y_val=domination_y_val,
    ret=False,
)

In [None]:
domination_log_regr = fit_and_score(
    x_train=domination_x_train,
    y_train=domination_y_train,
    x_val=domination_x_val,
    y_val=domination_y_val,
    cls_type="log_regr",
)

In [None]:
scores_by_turn_no_and_mode(
    domination_trees, general_val_tab, domination_transformer_from_general, "Domination"
)

In [None]:
scores_by_turn_no_and_mode(
    domination_log_regr,
    general_val_tab,
    domination_transformer_from_general,
    "Domination",
)

In [None]:
domination_transformer_from_general_leq_4 = TabTransformer(
    transform_fun=domination_transformation, keep_res=True, ret_pd=True
)
domination_train_tab_leq_4 = domination_transformer_from_general_leq_4.fit_transform(
    general_train_tab[
        (general_train_tab.mode_Domination == 1) & (general_train_tab.turn_no <= 4)
    ]
)
domination_val_tab_leq_4 = domination_transformer_from_general(
    general_val_tab[
        (general_val_tab.mode_Domination == 1) & (general_val_tab.turn_no <= 4)
    ]
)
domination_test_tab_leq_4 = domination_transformer_from_general(
    general_test_tab[
        (general_test_tab.mode_Domination == 1) & (general_test_tab.turn_no <= 4)
    ]
)

In [None]:
cls = domination_log_regr = fit_and_score(
    train_tab=domination_train_tab_leq_4,
    val_tab=domination_val_tab_leq_4,
    cls_type="log_regr",
)

In [None]:
scores_by_turn_no_and_mode(
    cls,
    general_val_tab[(general_val_tab.turn_no <= 4)],
    domination_transformer_from_general_leq_4,
    "Domination",
)

In [None]:
domination_transformer_from_general_ge_4 = TabTransformer(
    transform_fun=lambda x: domination_transformation, keep_res=True, ret_pd=True
)
domination_train_tab_ge_4 = domination_transformer_from_general_ge_4.fit_transform(
    general_train_tab[
        (general_train_tab.mode_Domination == 1) & (general_train_tab.turn_no > 10)
    ]
)
domination_val_tab_ge_4 = domination_transformer_from_general(
    general_val_tab[
        (general_val_tab.mode_Domination == 1) & (general_val_tab.turn_no > 10)
    ]
)
domination_test_tab_ge_4 = domination_transformer_from_general(
    general_test_tab[
        (general_test_tab.mode_Domination == 1) & (general_test_tab.turn_no > 10)
    ]
)

In [None]:
cls = domination_log_regr = fit_and_score(
    train_tab=domination_train_tab_ge_4,
    val_tab=domination_val_tab_ge_4,
    cls_type="log_regr",
)

In [None]:
cls = domination_log_regr = fit_and_score(
    train_tab=domination_train_tab_ge_4,
    val_tab=domination_val_tab_ge_4,
)

In [None]:
scores_by_turn_no_and_mode(
    cls,
    general_val_tab,
    domination_transformer_from_general_ge_4,
    "Domination",
)

# Old Single Model Without Gadgets and Weapons

In [None]:
df = onehot_enc_train_tab
colnames2 = df.columns[
    (
        ~(
            get_colsmask(
                df, infixes=["gadget", "weapon", "armor.id", "turns_range", "map_name"]
            )
            | get_colsmask(df, prefixes=["team"], suffixes=["dead"])
            | get_colsmask(df, equals=["t"])
        )
    )
    | get_colsmask(df, equals=["turn_no", "turns_range_3-4", "to_move"])
]

In [None]:
list(colnames2.values)

## Plain One-Hot-Encoded Features

In [None]:
%%time
exp_grid = [0.1, 0.5, 1, 1.5, 2]
grid_search_transform(
    aggr_by_unit_type,
    make_grid(
        single_exp=exp_grid, sum_exp=exp_grid
    ),
    cls_type="log_regr",
)

In [None]:
%%time
grid_search_transform(
    aggr_by_team,
    make_grid(
        single_exp=[0.5, 0.75, 1, 1.25, 1.5, 1.75, 2, 2.25, 2.5], sum_exp=[0.5, 0.75, 1, 1.25]
    ),
    cls_type="log_regr",
)

In [None]:
cls, transformer = fit_and_score(
    transform_fun=select_columns(colnames2, copy=True),
)

In [None]:
cls, transformer = fit_and_score(
    transform_fun=select_columns(colnames2, copy=True),
    cls_type="log_regr",
)

In [None]:
cls, transformer = fit_and_score(
    transform_fun=aggr_by_unit_type()(select_columns(colnames2)),
    cls_type="log_regr",
)

## Transforming Unit Type, HP, AP and Armor

In [None]:
cls, transformer = fit_and_score(
    transform_fun=replace_unit_type_ap_hp_armor()(select_columns(colnames2)),
    cls_type="log_regr",
)

In [None]:
cls, transformer = fit_and_score(
    transform_fun=aggr_by_unit_type()(
        replace_unit_type_ap_hp_armor()(select_columns(colnames2))
    ),
    cls_type="log_regr",
)

## Aggregating Generators

In [None]:
cls, transformer = fit_and_score(
    transform_fun=aggr_team_generators()(select_columns(colnames2)),
    cls_type="log_regr",
)

In [None]:
cls3, transformer3 = fit_and_score(
    transform_fun=aggr_team_generators(
        aggr_by_unit_type(
            replace_unit_type_ap_hp_armor(
                transform_fun=transform_unit_type_ap_hp_armor2
            )
        )
    ),
    cls_type="log_regr",
    train_tab=onehot_enc_train_tab2,
    val_tab=onehot_enc_val_tab2,
)

## Aggregation Control Point

In [None]:
cls3, transformer3 = fit_and_score(
    transform_fun=aggr_control_points_by_team(),
    cls_type="log_regr",
    train_tab=onehot_enc_train_tab2,
    val_tab=onehot_enc_val_tab2,
)

In [None]:
cls4, transformer4 = fit_and_score(
    transform_fun=aggr_control_points_by_team(
        aggr_team_generators(
            aggr_by_unit_type(
                replace_unit_type_ap_hp_armor(
                    transform_fun=transform_unit_type_ap_hp_armor2
                )
            )
        )
    ),
    cls_type="log_regr",
    train_tab=onehot_enc_train_tab2,
    val_tab=onehot_enc_val_tab2,
)

In [None]:
cls4, transformer4 = fit_and_score(
    transform_fun=aggr_by_unit_type(
        aggr_control_points_by_team(aggregate_team_generators)
    ),
    train_tab=onehot_enc_train_tab2,
    val_tab=onehot_enc_val_tab2,
)

In [None]:
cls4, transformer4 = fit_and_score(
    transform_fun=aggr_by_unit_type(
        aggr_control_points_by_team(aggregate_team_generators)
    ),
    cls_type="log_regr",
    train_tab=onehot_enc_train_tab2,
    val_tab=onehot_enc_val_tab2,
)

## Adding Turn NO Information

In [None]:
# initialization cell
colnames3 = list(
    train_tab.columns[
        train_tab.columns.str.contains("gadget")
        | train_tab.columns.str.contains("weapon")
        | train_tab.columns.str.contains("armor.id")
    ]
) + ["version", "map_name", "turn_no"]

onehot_enc_train_tab3 = get_dummies(train_tab, drop_before=colnames3)
onehot_enc_val_tab3 = onehot_enc(val_tab, onehot_enc_train_tab3, colnames3)
onehot_enc_test_tab3 = onehot_enc(test_tab, onehot_enc_train_tab3, colnames3)

## Fine Tuning

In [None]:
transformer = transformer3
x_train = transformer(onehot_enc_train_tab2)
y_train = onehot_enc_train_tab2.winner
x_val = transformer(onehot_enc_val_tab2)
y_val = onehot_enc_val_tab2.winner
x_test = transformer(onehot_enc_test_tab2)

In [None]:
transformer(onehot_enc_val_tab2, ret_pd=True)

In [None]:
trees = ExtraTreesClassifier(
    n_estimators=100,
    random_state=3,
    bootstrap=True,
    max_depth=12,
    min_samples_leaf=50,
)
fit_and_score_xy(trees)

## Gradient Boosting

In [None]:
from sklearn.ensemble import GradientBoostingClassifier

gb_clf = GradientBoostingClassifier(
    n_estimators=1000,
    n_iter_no_change=10,
    learning_rate=0.1,
    tol=1e-5,
    random_state=3,
    verbose=3,
    warm_start=True,
    subsample=0.5,
    validation_fraction=0.2,
    max_depth=5,
)
gb_clf.fit(x_train, y_train)

In [None]:
score_xy(gb_clf)

In [None]:
predict_and_save_ans(gb_clf, x_test, (x_val, y_val), "ans_gb_clf")

In [None]:
import xgboost as xgb
from hypopt import GridSearch

## Training on Augmented Data

In [None]:
all_augmented_train_data = pd.read_csv("train_data/all_turns_data_tabular_format.csv")

In [None]:
augmented_train_tab = prepare_tab_data(all_augmented_train_data).loc[train_ids, :]

In [None]:
augmented_train_tab

In [None]:
onehot_enc_augmented_train_tab = onehot_enc(
    augmented_train_tab, onehot_enc_train_tab2, colnames
)

In [None]:
onehot_enc_augmented_train_tab

In [None]:
x_train = transformer(onehot_enc_augmented_train_tab)
y_train = onehot_enc_augmented_train_tab.winner

In [None]:
gb_clf_aug = GradientBoostingClassifier(
    n_estimators=1000,
    n_iter_no_change=10,
    learning_rate=0.05,
    tol=1e-5,
    random_state=3,
    verbose=3,
    warm_start=True,
    subsample=0.01,
    validation_fraction=0.1,
    #     max_depth=8,
)
gb_clf_aug.fit(x_train, y_train)

In [None]:
score_xy(gb_clf_aug)

In [None]:
predict_and_save_ans(gb_clf_aug, x_test, (x_val, y_val), "ans_gb_clf_aug")

## Where Classifiers Perform Worse or Better

In [None]:
scores_by_turn_no_and_mode(trees, onehot_enc_val_tab2, transformer, "Devastator")

In [None]:
scores_by_turn_no_and_mode(trees, onehot_enc_val_tab2, transformer, "Domination")

## Predicting Result Based on Gameplay History

In [None]:
# initialization cell
import torch
from torch import nn
from torch.nn import functional as F
from torch import optim
from torch.autograd import Variable
from torch.utils.data import Dataset, DataLoader
from collections import OrderedDict

from pytorch_fit import fit
from pytorch_fit.metrics import Accuracy
from pytorch_fit.visuals import plot_history
from pytorch_fit.stopping import EarlyStopping

In [None]:
n = 3
max_len = int(max(test_tab.turn_no.max(), all_train_tab.turn_no.max())) + 1
x_train = np.zeros((n, max_len, 1))
y_train = np.full((n, max_len, 1), 2)

prefix_dir = "train_data"
for (
    i,
    game_id,
) in zip(range(n), train_ids):
    game_tab = pd.read_csv(f"{prefix_dir}/turns_tabular/{game_id}.csv")
    winner = game_tab.winner
    l = len(winner)
    y_train[i, :l, 0] = (winner - 0.5) * 2
    game_tab = transformer(onehot_enc(game_tab, onehot_enc_train_tab2))
    probas = predict_ans(trees, game_tab)
    x_train[i, :l, 0] = (probas - 0.5) * 2

print(x_train)
print(y_train)

In [None]:
# initialization cell
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
device

In [None]:
np.apply_along_axis(lambda x: x.argmax(), 1, y_train)

In [None]:
y_train[2, :].argmax()

In [None]:
y_train[1, :].view(bool)

In [None]:
def loss(y_pred, y_true, max_len=None, only_last=False):
    y_true = y_true.copy()
    if max_len is not None:
        y_true[:, max_len:,:] = 2
    np.apply_along_axis(lambda x: x.argmax(), 1, y_train)
    if only_last:
        y_true
#     c = list(y_true.shape[0:-1]) + list(y_true.shape[-1:]) + [1]
#     r = y_true.reshape(c)
#     j = torch.tensor([0, 1, 2, 3, 4, 5, 6, 7, 8, 9]).to(device)
#     J = torch.sum(torch.square(j - r) * y_pred)
#     return J

In [None]:
class LSTM(nn.Module):
    def __init__(self, input_size, hidden_size):
        super().__init__()
        self.input_size = input_size
        self.hidden_size = hidden_size
        self.num_layers = 1
        self.lstm = nn.LSTM(
            self.input_size, self.hidden_size, self.num_layers, batch_first=True
        )

    def forward(self, x):
        batch_size = x.shape[0]
        h_0 = Variable(torch.zeros(self.num_layers, batch_size, self.hidden_size)).to(
            device
        )
        c_0 = Variable(torch.zeros(self.num_layers, batch_size, self.hidden_size)).to(
            device
        )
        out, (hn, cn) = self.lstm(x, (h_0, c_0))
        return out


def LstmNet1():
    return nn.Sequential(
        OrderedDict(
            [
                ("lstm1", LSTM(input_size=1, hidden_size=16)),
                ("lstm2", LSTM(input_size=16, hidden_size=1)),
                ("softmax", nn.Softmax(dim=1)),
            ]
        )
    ).to(device)

In [None]:
lstm1 = LstmNet1()
lstm1.forward(torch.Tensor(x_train).to(device))