In [27]:
import sys

import pandas as pd
import numpy as np
from sklearn.naive_bayes import MultinomialNB
from sklearn.model_selection import train_test_split
from sklearn import preprocessing
from tqdm import tqdm

In [28]:
df = pd.read_csv(
    "./data/hot-encoded-data.csv",
    dtype={
        "mapname": "category",
        "matchid": "int64",
        "roundnumber": "int64",
        "objectivelocation": "category",
        "winrole": "category",
    },
    index_col=0
)
df.shape

(292960, 40)

In [29]:
df.head()

Unnamed: 0,mapname,matchid,roundnumber,objectivelocation,winrole,OP_JTF2-BUCK,OP_GIGN-ROOK,OP_G.E.O.-MIRA,OP_SWAT-CASTLE,OP_GSG9-JAGER,...,OP_SPETSNAZ-RESERVE,OP_GSG9-BLITZ,OP_BOPE-CAPITAO,OP_SPETSNAZ-KAPKAN,OP_SPETSNAZ-TACHANKA,OP_SAT-ECHO,OP_SAS-RESERVE,OP_GIGN-RESERVE,OP_GSG9-RESERVE,OP_SWAT-RESERVE
0,COASTLINE,147541,1,1F_KITCHEN-1F_SERVICE_ENTRANCE,Attacker,0,1,0,0,1,...,0,0,0,0,0,0,0,0,0,0
1,COASTLINE,147541,3,2F_PENTHOUSE-2F_THEATER,Defender,0,1,1,0,0,...,0,0,0,0,0,0,0,0,0,0
2,COASTLINE,147541,4,2F_PENTHOUSE-2F_THEATER,Attacker,0,0,0,0,1,...,0,0,0,0,0,1,0,0,0,0
3,COASTLINE,147541,5,1F_KITCHEN-1F_SERVICE_ENTRANCE,Defender,0,0,1,0,1,...,0,0,0,0,0,1,0,0,0,0
4,BORDER,251521,1,ARMORY_LOCKERS-ARCHIVES,Defender,0,0,0,0,1,...,0,0,0,0,0,0,1,0,0,0


In [30]:
features = df.drop(columns=["matchid", "roundnumber", "winrole"]).to_numpy()
labels = df["winrole"].to_numpy()

print(features.shape)
print(labels.shape)

(292960, 37)
(292960,)


In [31]:
le = preprocessing.LabelEncoder()
features[:, 0] = le.fit_transform(features[:, 0])
features[:, 1] = le.fit_transform(features[:, 1])
features

array([[5, 4, 0, ..., 0, 0, 0],
       [5, 10, 0, ..., 0, 0, 0],
       [5, 10, 0, ..., 0, 0, 0],
       ...,
       [9, 55, 1, ..., 0, 0, 0],
       [9, 55, 0, ..., 0, 0, 0],
       [9, 32, 1, ..., 0, 0, 0]], dtype=object)

In [32]:
labels = le.fit_transform(labels)
labels

array([0, 1, 0, ..., 1, 1, 0])

In [33]:
X_train, X_test, y_train, y_test = train_test_split(features, labels, test_size=.3, random_state=42)
print(X_train.shape)
print(X_test.shape)
print(y_train.shape)
print(y_test.shape)

(205072, 37)
(87888, 37)
(205072,)
(87888,)


In [34]:
clf = MultinomialNB()
clf.fit(X_train, y_train)

print("Accuracy:", round(100 * clf.score(X_train, y_train), 2), "%")

Accuracy: 53.31 %


Only use map and objectivelocation

In [35]:
clf = MultinomialNB()
clf.fit(X_train[:, :2], y_train)

print("Accuracy:", round(100 * clf.score(X_train[:, :2], y_train), 2), "%")

Accuracy: 52.41 %


Test the methodology

In [36]:
df_tmp = pd.DataFrame({
    "map": [1, 1, 1, 1, 1, 1, 1, 1, 1, 1],
    "higherskill": [1, 1, 0, 0, 1, 1, 1, 1, 1, 1],
    "win": [1, 1, 1, 0, 1, 0, 1, 1, 1, 1]
})
df_tmp

Unnamed: 0,map,higherskill,win
0,1,1,1
1,1,1,1
2,1,0,1
3,1,0,0
4,1,1,1
5,1,1,0
6,1,1,1
7,1,1,1
8,1,1,1
9,1,1,1


In [37]:

X_train, X_test, y_train, y_test = train_test_split(df_tmp[["map", "higherskill"]], df_tmp["win"], test_size=.2)
print(X_train.shape)
print(X_test.shape)
print(y_train.shape)
print(y_test.shape)

clf = MultinomialNB()
clf.fit(X_train, y_train)

print("Accuracy:", round(100 * clf.score(X_train, y_train), 2), "%")

(8, 2)
(2, 2)
(8,)
(2,)
Accuracy: 75.0 %


In [38]:
df.drop(columns=["matchid", "roundnumber"]).value_counts().reset_index(drop=True)

0         13
1         10
2         10
3          9
4          9
          ..
289169     1
289170     1
289171     1
289172     1
289173     1
Length: 289174, dtype: int64

Reduce features

In [44]:
df = df.loc[
    ~(df["OP_GIGN-RESERVE"] == 1)
    & ~(df["OP_GSG9-RESERVE"] == 1)
    & ~(df["OP_SAS-RESERVE"] == 1)
    & ~(df["OP_SPETSNAZ-RESERVE"] == 1)
    & ~(df["OP_SWAT-RESERVE"] == 1)
]
df.head()

Unnamed: 0,mapname,matchid,roundnumber,objectivelocation,winrole,OP_JTF2-BUCK,OP_GIGN-ROOK,OP_G.E.O.-MIRA,OP_SWAT-CASTLE,OP_GSG9-JAGER,...,OP_SPETSNAZ-RESERVE,OP_GSG9-BLITZ,OP_BOPE-CAPITAO,OP_SPETSNAZ-KAPKAN,OP_SPETSNAZ-TACHANKA,OP_SAT-ECHO,OP_SAS-RESERVE,OP_GIGN-RESERVE,OP_GSG9-RESERVE,OP_SWAT-RESERVE
0,COASTLINE,147541,1,1F_KITCHEN-1F_SERVICE_ENTRANCE,Attacker,0,1,0,0,1,...,0,0,0,0,0,0,0,0,0,0
1,COASTLINE,147541,3,2F_PENTHOUSE-2F_THEATER,Defender,0,1,1,0,0,...,0,0,0,0,0,0,0,0,0,0
2,COASTLINE,147541,4,2F_PENTHOUSE-2F_THEATER,Attacker,0,0,0,0,1,...,0,0,0,0,0,1,0,0,0,0
3,COASTLINE,147541,5,1F_KITCHEN-1F_SERVICE_ENTRANCE,Defender,0,0,1,0,1,...,0,0,0,0,0,1,0,0,0,0
5,BORDER,251521,2,ARMORY_LOCKERS-ARCHIVES,Attacker,1,0,1,0,1,...,0,0,0,0,0,0,0,0,0,0


In [45]:
df = df.drop(columns=["OP_SWAT-RESERVE", "OP_SPETSNAZ-RESERVE", "OP_GIGN-RESERVE", "OP_GSG9-RESERVE", "OP_SAS-RESERVE"])
df.head()

Unnamed: 0,mapname,matchid,roundnumber,objectivelocation,winrole,OP_JTF2-BUCK,OP_GIGN-ROOK,OP_G.E.O.-MIRA,OP_SWAT-CASTLE,OP_GSG9-JAGER,...,OP_SWAT-ASH,OP_SAS-THATCHER,OP_SAS-SMOKE,OP_SWAT-THERMITE,OP_SPETSNAZ-GLAZ,OP_GSG9-BLITZ,OP_BOPE-CAPITAO,OP_SPETSNAZ-KAPKAN,OP_SPETSNAZ-TACHANKA,OP_SAT-ECHO
0,COASTLINE,147541,1,1F_KITCHEN-1F_SERVICE_ENTRANCE,Attacker,0,1,0,0,1,...,1,0,1,0,1,0,0,0,0,0
1,COASTLINE,147541,3,2F_PENTHOUSE-2F_THEATER,Defender,0,1,1,0,0,...,1,0,1,0,1,0,0,0,0,0
2,COASTLINE,147541,4,2F_PENTHOUSE-2F_THEATER,Attacker,0,0,0,0,1,...,1,1,0,0,0,0,0,0,0,1
3,COASTLINE,147541,5,1F_KITCHEN-1F_SERVICE_ENTRANCE,Defender,0,0,1,0,1,...,1,0,0,0,1,0,0,0,0,1
5,BORDER,251521,2,ARMORY_LOCKERS-ARCHIVES,Attacker,1,0,1,0,1,...,1,1,0,0,0,0,0,0,0,0


In [49]:
op_list = df.columns[5:].to_list()
op_list

['OP_JTF2-BUCK',
 'OP_GIGN-ROOK',
 'OP_G.E.O.-MIRA',
 'OP_SWAT-CASTLE',
 'OP_GSG9-JAGER',
 'OP_NAVYSEAL-BLACKBEARD',
 'OP_SWAT-PULSE',
 'OP_GIGN-MONTAGNE',
 'OP_GIGN-TWITCH',
 'OP_BOPE-CAVEIRA',
 'OP_SAS-MUTE',
 'OP_SAT-HIBANA',
 'OP_GIGN-DOC',
 'OP_JTF2-FROST',
 'OP_GSG9-IQ',
 'OP_NAVYSEAL-VALKYRIE',
 'OP_SPETSNAZ-FUZE',
 'OP_SAS-SLEDGE',
 'OP_G.E.O.-JACKAL',
 'OP_GSG9-BANDIT',
 'OP_SWAT-ASH',
 'OP_SAS-THATCHER',
 'OP_SAS-SMOKE',
 'OP_SWAT-THERMITE',
 'OP_SPETSNAZ-GLAZ',
 'OP_GSG9-BLITZ',
 'OP_BOPE-CAPITAO',
 'OP_SPETSNAZ-KAPKAN',
 'OP_SPETSNAZ-TACHANKA',
 'OP_SAT-ECHO']

In [74]:
def categorize_operators(row):

    def_roamer = [
        "OP_GSG9-JAGER",
        "OP_SWAT-PULSE",
        "OP_BOPE-CAVEIRA",
        "OP_GIGN-DOC", # meh
    ]

    def_trapper = [
        "OP_JTF2-FROST",
        "OP_SPETSNAZ-KAPKAN",
    ]

    def_entry_denial = [
        "OP_SWAT-CASTLE",
        "OP_SAS-MUTE",
        "OP_NAVYSEAL-VALKYRIE", #meh
        "OP_GSG9-BANDIT",
        "OP_SAS-SMOKE",
        "OP_SAT-ECHO",
    ]

    def_anchor = [
        "OP_GIGN-ROOK",
        "OP_G.E.O.-MIRA",
        "OP_SPETSNAZ-TACHANKA",
    ]

    att_breacher = [
        "OP_JTF2-BUCK",
        "OP_SAT-HIBANA",
        "OP_SAS-SLEDGE",
        "OP_SAS-THATCHER",
        "OP_SWAT-THERMITE",
    ]

    att_intel = [
        "OP_GIGN-MONTAGNE",
        "OP_GIGN-TWITCH",
        "OP_GSG9-IQ",
        "OP_G.E.O.-JACKAL",
    ]

    att_general = [
        "OP_SPETSNAZ-FUZE",
        "OP_NAVYSEAL-BLACKBEARD",
        "OP_SWAT-ASH",
        "OP_SPETSNAZ-GLAZ",
        "OP_GSG9-BLITZ",
        "OP_BOPE-CAPITAO",
    ]

    att_breacher_count = 0
    att_intel_count = 0
    att_general_count = 0

    def_anchor_count = 0
    def_entry_denial_count = 0
    def_roamer_count = 0
    def_trapper_count = 0

    for op in op_list:

        if row[op]:

            if op in att_breacher:
                att_breacher_count = att_breacher_count + 1

            if op in att_intel:
                att_intel_count = att_intel_count + 1

            if op in att_general:
                att_general_count = att_general_count + 1

            if op in def_anchor:
                def_anchor_count = def_anchor_count + 1

            if op in def_entry_denial:
                def_entry_denial_count = def_entry_denial_count + 1

            if op in def_roamer:
                def_roamer_count = def_roamer_count + 1

            if op in def_trapper:
                def_trapper_count = def_trapper_count + 1

    row["ATT_BREACHER_COUNT"] = att_breacher_count
    row["ATT_INTEL_COUNT"] = att_intel_count
    row["ATT_GENERAL_COUNT"] = att_general_count

    row["DEF_ANCHOUR_COUNT"] = def_anchor_count
    row["DEF_ENTRY_DENIAL_COUNT"] = def_entry_denial_count
    row["DEF_ROAMER_COUNT"] = def_roamer_count
    row["DEF_TRAPPER_COUNT"] = def_trapper_count

    return row

In [83]:
def categorize_operators_fast(df):

    df["ATT_BREACHER_COUNT"]       = df["OP_JTF2-BUCK"] + df["OP_SAT-HIBANA"] + df["OP_SAS-SLEDGE"] + df["OP_SAS-THATCHER"] + df["OP_SWAT-THERMITE"]
    df["ATT_INTEL_COUNT"]          = df["OP_GIGN-MONTAGNE"] + df["OP_GIGN-TWITCH"] + df["OP_GSG9-IQ"] + df["OP_G.E.O.-JACKAL"]
    df["ATT_GENERAL_COUNT"]        = df["OP_SPETSNAZ-FUZE"] + df["OP_NAVYSEAL-BLACKBEARD"] + df["OP_SWAT-ASH"] + df["OP_SPETSNAZ-GLAZ"] + df["OP_GSG9-BLITZ"] + df["OP_BOPE-CAPITAO"]

    df["DEF_ANCHOUR_COUNT"]        = df["OP_GIGN-ROOK"] + df["OP_G.E.O.-MIRA"] + df["OP_SPETSNAZ-TACHANKA"]
    df["DEF_ENTRY_DENIAL_COUNT"]   = df["OP_SWAT-CASTLE"] + df["OP_SAS-MUTE"] + df["OP_NAVYSEAL-VALKYRIE"] + df["OP_GSG9-BANDIT"] + df["OP_SAS-SMOKE"] + df["OP_SAT-ECHO"]
    df["DEF_ROAMER_COUNT"]         = df["OP_GSG9-JAGER"] + df["OP_SWAT-PULSE"] + df["OP_BOPE-CAVEIRA"] + df["OP_GIGN-DOC"]
    df["DEF_TRAPPER_COUNT"]        = df["OP_JTF2-FROST"] + df["OP_SPETSNAZ-KAPKAN"]

    return df

In [88]:
%%timeit
df_test = df.head(100).apply(categorize_operators, axis=1)
(df_test["ATT_BREACHER_COUNT"] + df_test["ATT_INTEL_COUNT"] + df_test["ATT_GENERAL_COUNT"]+ df_test["DEF_ANCHOUR_COUNT"] + df_test["DEF_ENTRY_DENIAL_COUNT"]+ df_test["DEF_ROAMER_COUNT"] + df_test["DEF_TRAPPER_COUNT"]).value_counts()

329 ms ± 24.2 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)


In [94]:
df_test = categorize_operators_fast(df)
(df_test["ATT_BREACHER_COUNT"] + df_test["ATT_INTEL_COUNT"] + df_test["ATT_GENERAL_COUNT"]+ df_test["DEF_ANCHOUR_COUNT"] + df_test["DEF_ENTRY_DENIAL_COUNT"]+ df_test["DEF_ROAMER_COUNT"] + df_test["DEF_TRAPPER_COUNT"]).value_counts()

10    266341
8          2
9          1
6          1
7          1
dtype: int64

In [97]:
df_reduced = df_test[["mapname", "winrole", "ATT_BREACHER_COUNT", "ATT_INTEL_COUNT", "ATT_GENERAL_COUNT", "DEF_ANCHOUR_COUNT", "DEF_ENTRY_DENIAL_COUNT", "DEF_ROAMER_COUNT", "DEF_TRAPPER_COUNT"]]
df_reduced

Unnamed: 0,mapname,winrole,ATT_BREACHER_COUNT,ATT_INTEL_COUNT,ATT_GENERAL_COUNT,DEF_ANCHOUR_COUNT,DEF_ENTRY_DENIAL_COUNT,DEF_ROAMER_COUNT,DEF_TRAPPER_COUNT
0,COASTLINE,Attacker,1,1,3,1,3,1,0
1,COASTLINE,Defender,1,1,3,2,3,0,0
2,COASTLINE,Attacker,3,1,1,0,2,3,0
3,COASTLINE,Defender,1,2,2,1,3,1,0
5,BORDER,Attacker,3,1,1,1,2,1,1
...,...,...,...,...,...,...,...,...,...
292955,HOUSE,Defender,1,1,3,1,1,2,1
292956,HOUSE,Attacker,1,0,4,1,2,1,1
292957,HOUSE,Defender,2,1,2,1,2,1,1
292958,HOUSE,Defender,2,0,3,2,1,1,1


In [102]:
features = df_reduced.drop(columns=["winrole"]).to_numpy()
labels = df_reduced["winrole"]

print(features.shape)
print(labels.shape)

(266346, 8)
(266346,)


In [103]:
le = preprocessing.LabelEncoder()
features[:, 0] = le.fit_transform(features[:, 0])
features[:, 1] = le.fit_transform(features[:, 1])
features

array([[5, 1, 1, ..., 3, 1, 0],
       [5, 1, 1, ..., 3, 0, 0],
       [5, 3, 1, ..., 2, 3, 0],
       ...,
       [9, 2, 1, ..., 2, 1, 1],
       [9, 2, 0, ..., 1, 1, 1],
       [9, 2, 1, ..., 1, 2, 1]], dtype=object)

In [104]:
labels = le.fit_transform(labels)
labels

array([0, 1, 0, ..., 1, 1, 0])

In [106]:
X_train, X_test, y_train, y_test = train_test_split(features, labels, test_size=.3, random_state=42)
print(X_train.shape)
print(X_test.shape)
print(y_train.shape)
print(y_test.shape)

clf = MultinomialNB()
clf.fit(X_train, y_train)

print("Accuracy:", round(100 * clf.score(X_train, y_train), 2), "%")

(186442, 8)
(79904, 8)
(186442,)
(79904,)
Accuracy: 52.33 %
