In [1]:
import sys

import pandas as pd
import numpy as np
from sklearn.naive_bayes import MultinomialNB
from sklearn.model_selection import train_test_split
from sklearn import preprocessing
from tqdm import tqdm

In [2]:
df = pd.read_csv(
    "./data/data_reduced.csv",
    usecols={
        "dateid",
        "platform",
        "gamemode",
        "mapname",
        "matchid",
        "roundnumber",
        "objectivelocation",
        "winrole",
        "endroundreason",
        "roundduration",
        "clearancelevel",
        "skillrank",
        "role",
        "team",
        "haswon",
        "operator",
        "nbkills",
        "isdead"
    },
    dtype={
        "platform": "category",
        "gamemode": "category",
        "mapname": "category",
        "matchid": "int64",
        "roundnumber": "int64",
        "objectivelocation": "category",
        "winrole": "category",
        "endroundreason": "category",
        "roundduration": "int64",
        "clearancelevel": "int64",
        "skillrank": "category",
        "role": "category",
        "team": "int64",
        "haswon": "boolean",
        "operator": "category",
        "nbkills": "int64",
        "isdead": "boolean"
    },
    parse_dates=["dateid"]
)

Features:
- Map
- 5 Attacker Operators
- 5 Defender Operators

In [3]:
operator_list = df["operator"].unique().to_list()

In [4]:
df_playercount = df.groupby(["matchid", "roundnumber"])["operator"].count().reset_index(name="playercount")
df_playercount

Unnamed: 0,matchid,roundnumber,playercount
0,147541,1,10
1,147541,2,9
2,147541,3,10
3,147541,4,10
4,147541,5,10
...,...,...,...
402805,4294898609,5,10
402806,4294898609,6,10
402807,4294898609,7,10
402808,4294898609,8,10


In [5]:
df = pd.merge(left=df, right=df_playercount, on=["matchid", "roundnumber"], how="inner")
print(df.shape)
df.head()

(3889573, 19)


Unnamed: 0,dateid,platform,gamemode,mapname,matchid,roundnumber,objectivelocation,winrole,endroundreason,roundduration,clearancelevel,skillrank,role,team,haswon,operator,nbkills,isdead,playercount
0,2017-02-12,PC,BOMB,BARTLETT_U.,1529883301,6,ROWING_MUSEUM-TROPHY_ROOM,Attacker,DefendersEliminated,246,147,Platinum,Attacker,1,True,JTF2-BUCK,1,True,10
1,2017-02-12,PC,BOMB,BARTLETT_U.,1529883301,6,ROWING_MUSEUM-TROPHY_ROOM,Attacker,DefendersEliminated,246,40,Unranked,Defender,0,False,GIGN-ROOK,0,True,10
2,2017-02-12,PC,BOMB,BARTLETT_U.,1529883301,6,ROWING_MUSEUM-TROPHY_ROOM,Attacker,DefendersEliminated,246,56,Unranked,Defender,0,False,SWAT-CASTLE,1,True,10
3,2017-02-12,PC,BOMB,BARTLETT_U.,1529883301,6,ROWING_MUSEUM-TROPHY_ROOM,Attacker,DefendersEliminated,246,171,Gold,Attacker,1,True,SAT-HIBANA,1,False,10
4,2017-02-12,PC,BOMB,BARTLETT_U.,1529883301,6,ROWING_MUSEUM-TROPHY_ROOM,Attacker,DefendersEliminated,246,165,Platinum,Attacker,1,True,G.E.O.-JACKAL,1,False,10


In [6]:
df = df.loc[df["playercount"] == 10]
df.shape

(2929600, 19)

Drop every column that isn't needed anymore

In [7]:
df = df.drop(columns=["dateid", "platform", "gamemode", "endroundreason", "roundduration", "clearancelevel", "team", "haswon", "nbkills", "isdead", "playercount", "skillrank"], errors="ignore")
df.head()

Unnamed: 0,mapname,matchid,roundnumber,objectivelocation,winrole,role,operator
0,BARTLETT_U.,1529883301,6,ROWING_MUSEUM-TROPHY_ROOM,Attacker,Attacker,JTF2-BUCK
1,BARTLETT_U.,1529883301,6,ROWING_MUSEUM-TROPHY_ROOM,Attacker,Defender,GIGN-ROOK
2,BARTLETT_U.,1529883301,6,ROWING_MUSEUM-TROPHY_ROOM,Attacker,Defender,SWAT-CASTLE
3,BARTLETT_U.,1529883301,6,ROWING_MUSEUM-TROPHY_ROOM,Attacker,Attacker,SAT-HIBANA
4,BARTLETT_U.,1529883301,6,ROWING_MUSEUM-TROPHY_ROOM,Attacker,Attacker,G.E.O.-JACKAL


Calculate the operators per round

In [8]:
for operator in operator_list:
    df[f"OP_{operator}"] = np.nan

df.head()

Unnamed: 0,mapname,matchid,roundnumber,objectivelocation,winrole,role,operator,OP_JTF2-BUCK,OP_GIGN-ROOK,OP_G.E.O.-MIRA,...,OP_SPETSNAZ-RESERVE,OP_GSG9-BLITZ,OP_BOPE-CAPITAO,OP_SPETSNAZ-KAPKAN,OP_SPETSNAZ-TACHANKA,OP_SAT-ECHO,OP_SAS-RESERVE,OP_GIGN-RESERVE,OP_GSG9-RESERVE,OP_SWAT-RESERVE
0,BARTLETT_U.,1529883301,6,ROWING_MUSEUM-TROPHY_ROOM,Attacker,Attacker,JTF2-BUCK,,,,...,,,,,,,,,,
1,BARTLETT_U.,1529883301,6,ROWING_MUSEUM-TROPHY_ROOM,Attacker,Defender,GIGN-ROOK,,,,...,,,,,,,,,,
2,BARTLETT_U.,1529883301,6,ROWING_MUSEUM-TROPHY_ROOM,Attacker,Defender,SWAT-CASTLE,,,,...,,,,,,,,,,
3,BARTLETT_U.,1529883301,6,ROWING_MUSEUM-TROPHY_ROOM,Attacker,Attacker,SAT-HIBANA,,,,...,,,,,,,,,,
4,BARTLETT_U.,1529883301,6,ROWING_MUSEUM-TROPHY_ROOM,Attacker,Attacker,G.E.O.-JACKAL,,,,...,,,,,,,,,,


In [None]:
frames = []
groupby_round = df.groupby(["matchid", "roundnumber"])
del df

for group, tbl in tqdm(groupby_round):

    # tbl["ops"] = str(tbl["operator"].values.to_list())

    for operator in operator_list:
        tbl[f"OP_{operator}"] = operator in tbl["operator"].values

    # tbl = tbl.drop_duplicates(subset=["matchid", "roundnumber"])
    # tbl.drop(columns=["role"])

    frames.append(tbl)

 21%|██▏       | 62514/292960 [08:00<35:57, 106.79it/s] 

In [35]:
# df_test = pd.concat(frames).reset_index(drop=True)
df_test = pd.concat(frames)
df_test

Unnamed: 0,mapname,matchid,roundnumber,objectivelocation,winrole,role,operator,OP_JTF2-BUCK,OP_GIGN-ROOK,OP_G.E.O.-MIRA,...,OP_SPETSNAZ-RESERVE,OP_GSG9-BLITZ,OP_BOPE-CAPITAO,OP_SPETSNAZ-KAPKAN,OP_SPETSNAZ-TACHANKA,OP_SAT-ECHO,OP_SAS-RESERVE,OP_GIGN-RESERVE,OP_GSG9-RESERVE,OP_SWAT-RESERVE
0,BARTLETT_U.,1529883301,1,CLASSROOM-LIBRARY,Attacker,Attacker,NAVYSEAL-BLACKBEARD,False,False,False,...,False,False,False,False,False,False,False,False,False,False
1,BARTLETT_U.,1529883301,1,CLASSROOM-LIBRARY,Attacker,Defender,SWAT-PULSE,False,False,False,...,False,False,False,False,False,False,False,False,False,False
2,BARTLETT_U.,1529883301,1,CLASSROOM-LIBRARY,Attacker,Attacker,SAT-HIBANA,False,False,False,...,False,False,False,False,False,False,False,False,False,False
3,BARTLETT_U.,1529883301,1,CLASSROOM-LIBRARY,Attacker,Attacker,GSG9-IQ,False,False,False,...,False,False,False,False,False,False,False,False,False,False
4,BARTLETT_U.,1529883301,1,CLASSROOM-LIBRARY,Attacker,Attacker,GIGN-TWITCH,False,False,False,...,False,False,False,False,False,False,False,False,False,False
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
95,HEREFORD_BASE,1530052161,8,BRIEFING_ROOM-ARMORY,Defender,Defender,G.E.O.-MIRA,False,False,True,...,False,False,False,False,False,False,False,False,False,False
96,HEREFORD_BASE,1530052161,8,BRIEFING_ROOM-ARMORY,Defender,Attacker,SAS-SLEDGE,False,False,True,...,False,False,False,False,False,False,False,False,False,False
97,HEREFORD_BASE,1530052161,8,BRIEFING_ROOM-ARMORY,Defender,Defender,GSG9-BANDIT,False,False,True,...,False,False,False,False,False,False,False,False,False,False
98,HEREFORD_BASE,1530052161,8,BRIEFING_ROOM-ARMORY,Defender,Defender,SAS-MUTE,False,False,True,...,False,False,False,False,False,False,False,False,False,False


In [14]:
df_test = df_test.drop_duplicates(subset=["matchid", "roundnumber"])
df_test

Unnamed: 0,mapname,matchid,roundnumber,objectivelocation,winrole,role,operator,OP_JTF2-BUCK,OP_GIGN-ROOK,OP_G.E.O.-MIRA,...,OP_SPETSNAZ-RESERVE,OP_GSG9-BLITZ,OP_BOPE-CAPITAO,OP_SPETSNAZ-KAPKAN,OP_SPETSNAZ-TACHANKA,OP_SAT-ECHO,OP_SAS-RESERVE,OP_GIGN-RESERVE,OP_GSG9-RESERVE,OP_SWAT-RESERVE
50,BARTLETT_U.,1529883301,1,CLASSROOM-LIBRARY,Attacker,Attacker,NAVYSEAL-BLACKBEARD,False,False,False,...,False,False,False,False,False,False,False,False,False,False
10,BARTLETT_U.,1529883301,2,READING_ROOM-LIBRARY,Defender,Defender,GIGN-ROOK,False,True,True,...,False,False,False,False,False,False,False,False,False,False
40,BARTLETT_U.,1529883301,3,CLASSROOM-LIBRARY,Defender,Defender,GSG9-JAGER,False,True,False,...,False,False,False,False,False,False,False,False,False,False
20,BARTLETT_U.,1529883301,4,KITCHEN-PIANO_ROOM,Attacker,Defender,G.E.O.-MIRA,False,True,True,...,False,False,False,False,False,False,False,False,False,False
60,BARTLETT_U.,1529883301,5,READING_ROOM-LIBRARY,Attacker,Defender,SWAT-PULSE,False,True,False,...,False,False,False,False,False,False,False,False,False,False
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
12854,HOUSE,2108803141,7,KID'S_BEDROOM-WORKSHOP,Attacker,Attacker,BOPE-CAPITAO,False,False,False,...,False,False,True,True,False,False,False,False,False,False
12894,HOUSE,2108803141,8,TRAINING_ROOM-GARAGE,Attacker,Defender,SAS-MUTE,True,True,True,...,False,False,True,False,False,False,False,False,False,False
12864,HOUSE,2108803141,9,KID'S_BEDROOM-WORKSHOP,Attacker,Attacker,G.E.O.-JACKAL,False,False,False,...,False,False,False,True,False,False,False,False,False,False
12924,HEREFORD_BASE,2119229301,2,BRIEFING_ROOM-ARMORY,Defender,Defender,SWAT-PULSE,True,True,False,...,False,False,False,False,False,False,False,False,False,False


In [154]:
df_test.to_csv("./data/hot-encoded-data.csv")
print("congrats u did it!")

In [155]:
# df_ops_per_round = df.groupby(["matchid", "roundnumber"])["operator"].apply(list).reset_index()
# df_ops_per_round

In [156]:
# def one_hot_encode_ops(row):
#     # for operator in operator_list:
#     #     row[f"OP_{operator}"] = int(operator in row["operator"])
#
#
#     return row

In [157]:
# df_ops_per_round.head(10).apply(one_hot_encode_ops, axis=1)
# df_ops_per_round = df_ops_per_round.head(1000).apply(one_hot_encode_ops, axis=1)
# print(df_ops_per_round.shape)
# df_ops_per_round.head()

In [158]:
# for operator in operator_list:
#     df_ops_per_round[f"OP_{operator}"] = int(operator in df_ops_per_round["operator"])
#
# df_ops_per_round

In [159]:
# df_sample = df.loc[(df["mapname"] == "CHALET") | (df["mapname"] == "FAVELAS") | (df["mapname"] == "BARTLETT_U.") | (df["mapname"] == "HOUSE")]
# df_sample = df.loc[(df["mapname"] == "CHALET")]
# df_sample = df
# df_sample

In [160]:
# df_sample_2 = df_sample.groupby(["matchid", "roundnumber"])["operator"].apply(list).reset_index()
# df_sample_2

In [161]:
# df_sample_3 = pd.merge(left=df_sample_2, right=df_sample[["matchid", "roundnumber", "mapname", "objectivelocation", "winrole"]], on=["matchid", "roundnumber"], how="inner").drop_duplicates(subset=["matchid", "roundnumber"]).drop(columns=["matchid", "roundnumber"])
# df_sample_3

In [162]:
# df_sample_3["nboperators"] = df_sample_3["operator"].apply(len)
# df_sample_3

In [163]:
# df_sample_4 = df_sample_3.loc[df_sample_3["nboperators"] == 10]
# df_sample_4 = df_sample_4.drop(columns=["nboperators"]).reset_index(drop=True)
# df_sample_4

In [164]:
# df_sample_5 = pd.DataFrame(df_sample_4['operator'].tolist(), columns=[f"operator_{x}" for x in range(1, 11)])
# df_sample_5

In [165]:
# df_sample_6 = pd.concat([df_sample_4, df_sample_5], axis=1).drop(columns=["operator"])
# df_sample_6

In [166]:
# df_labels = df_sample_6["winrole"]
# df_labels

In [167]:
# df_sample_7 = df_sample_6.drop(columns=["winrole", "objectivelocation"])
# df_sample_7

In [30]:
df_labels = df_test["winrole"]
df_features = df_test.drop(columns=["matchid", "roundnumber", "winrole", "skillrank", "role", "operator"])
print(df_labels.shape)
print(df_features.shape)

(10000,)
(10000, 37)


In [31]:
df_features.head()

Unnamed: 0,mapname,objectivelocation,OP_JTF2-BUCK,OP_GIGN-ROOK,OP_G.E.O.-MIRA,OP_SWAT-CASTLE,OP_GSG9-JAGER,OP_NAVYSEAL-BLACKBEARD,OP_SWAT-PULSE,OP_GIGN-MONTAGNE,...,OP_SPETSNAZ-RESERVE,OP_GSG9-BLITZ,OP_BOPE-CAPITAO,OP_SPETSNAZ-KAPKAN,OP_SPETSNAZ-TACHANKA,OP_SAT-ECHO,OP_SAS-RESERVE,OP_GIGN-RESERVE,OP_GSG9-RESERVE,OP_SWAT-RESERVE
24693,BANK,EXECUTIVE_LOUNGE-CEO_OFFICE,False,True,False,False,False,False,True,True,...,False,False,False,True,False,False,False,False,False,False
24683,BANK,LOCKERS-CCTV_ROOM,False,True,True,False,True,False,False,False,...,False,False,False,False,False,False,False,False,False,False
24723,BANK,TELLERS'_OFFICE-ARCHIVES,False,True,False,False,False,False,True,True,...,False,False,False,False,False,False,False,False,False,False
24673,BANK,EXECUTIVE_LOUNGE-CEO_OFFICE,True,True,False,False,True,False,False,False,...,False,False,True,False,False,True,False,False,False,False
24663,BANK,LOCKERS-CCTV_ROOM,False,True,True,False,False,False,False,True,...,False,False,False,True,False,False,False,False,False,False


In [35]:
features = df_features.to_numpy()
features[:, 2:] = features[:, 2:].astype(int)
features

array([['BANK', 'EXECUTIVE_LOUNGE-CEO_OFFICE', 0, ..., 0, 0, 0],
       ['BANK', 'LOCKERS-CCTV_ROOM', 0, ..., 0, 0, 0],
       ['BANK', "TELLERS'_OFFICE-ARCHIVES", 0, ..., 0, 0, 0],
       ...,
       ['HEREFORD_BASE', 'DUMMY_DEPOT-STORAGE', 0, ..., 0, 0, 0],
       ['HEREFORD_BASE', 'DUMMY_DEPOT-STORAGE', 0, ..., 0, 0, 0],
       ['HEREFORD_BASE', 'TV_ROOM-KITCHEN', 0, ..., 0, 0, 0]],
      dtype=object)

In [36]:
labels = df_labels.to_numpy()
labels

array(['Defender', 'Defender', 'Defender', ..., 'Attacker', 'Attacker',
       'Attacker'], dtype=object)

In [37]:
le = preprocessing.LabelEncoder()
# for col in range(features.shape[1]):
features[:, 0] = le.fit_transform(features[:, 0])
features[:, 1] = le.fit_transform(features[:, 1])
features

array([[0, 28, 0, ..., 0, 0, 0],
       [0, 43, 0, ..., 0, 0, 0],
       [0, 54, 0, ..., 0, 0, 0],
       ...,
       [8, 27, 0, ..., 0, 0, 0],
       [8, 27, 0, ..., 0, 0, 0],
       [8, 56, 0, ..., 0, 0, 0]], dtype=object)

In [38]:
labels = le.fit_transform(labels)
labels

array([1, 1, 1, ..., 0, 0, 0])

In [52]:
# X_train, X_test, y_train, y_test = train_test_split(features, labels, test_size=.3)
X_train, X_test, y_train, y_test = train_test_split(features, labels, test_size=.3, random_state=42)
print(X_train.shape)
print(X_test.shape)
print(y_train.shape)
print(y_test.shape)

clf = MultinomialNB()
clf.fit(X_train, y_train)

print("Accuracy:", round(100 * clf.score(X_train, y_train), 2), "%")

(7000, 37)
(3000, 37)
(7000,)
(3000,)
Accuracy: 53.19 %


Feature Reduktion

In [53]:
clf = MultinomialNB()
clf.fit(X_train[:, 0:1], y_train)

print("Accuracy:", round(100 * clf.score(X_test[:, 0:1], y_test), 2), "%")

Accuracy: 51.53 %


In [66]:
df_tmp = pd.DataFrame({
    "map": [1, 1, 1, 1, 1, 1, 1, 1, 1, 1],
    "higherskill": [1, 1, 0, 0, 1, 1, 1, 1, 1, 1],
    "win": [1, 1, 1, 0, 1, 0, 1, 1, 1, 1]
})
df_tmp

Unnamed: 0,map,higherskill,win
0,1,1,1
1,1,1,1
2,1,0,1
3,1,0,0
4,1,1,1
5,1,1,0
6,1,1,1
7,1,1,1
8,1,1,1
9,1,1,1


In [72]:
X_train, X_test, y_train, y_test = train_test_split(df_tmp[["map", "higherskill"]], df_tmp["win"], test_size=.2)
print(X_train.shape)
print(X_test.shape)
print(y_train.shape)
print(y_test.shape)

clf = MultinomialNB()
clf.fit(X_train, y_train)

print("Accuracy:", round(100 * clf.score(X_train, y_train), 2), "%")

(8, 2)
(2, 2)
(8,)
(2,)
Accuracy: 75.0 %


In [78]:
df_test[["mapname", "objectivelocation"]].value_counts()

mapname           objectivelocation                      
CLUB_HOUSE        CHURCH-ARSENAL_ROOM                        374
HOUSE             TRAINING_ROOM-GARAGE                       349
HEREFORD_BASE     BRIEFING_ROOM-ARMORY                       344
OREGON            LAUDRY_ROOM-SUPPLY_ROOM                    338
KANAL             SERVER_ROOM-CONTROL_ROOM                   338
BORDER            ARMORY_LOCKERS-ARCHIVES                    328
KAFE_DOSTOYEVSKY  FIREPLACE_HALL-MINING_ROOM                 311
BANK              LOCKERS-CCTV_ROOM                          310
CHALET            WINE_CELLAR-SNOWMOBILE_GARAGE              305
CONSULATE         GARAGE-CAFETERIA                           293
YACHT             SERVER_ROOM-ENGINE_STORAGE                 274
HOUSE             KID'S_BEDROOM-WORKSHOP                     265
FAVELAS           2F_FOOTBALL_BEDROOM-2F_FOOTBALL_OFFICE     261
SKYSCRAPER        2F_KARAOKE-2F_TEA_ROOM                     259
PLANE             MEETING_ROOM-E

In [80]:
df_test.loc[(df_test["mapname"] == "CLUB_HOUSE") & (df_test["objectivelocation"] == "CHURCH-ARSENAL_ROOM")].reset_index(drop=True)["winrole"].value_counts()

Defender    205
Attacker    169
Name: winrole, dtype: int64