In [1]:
import sys

import pandas as pd
import numpy as np
from sklearn.naive_bayes import MultinomialNB
from sklearn.model_selection import train_test_split
from sklearn import preprocessing
from tqdm import tqdm

In [2]:
df = pd.read_csv(
    "./data/data_reduced.csv",
    usecols={
        "dateid",
        "platform",
        "gamemode",
        "mapname",
        "matchid",
        "roundnumber",
        "objectivelocation",
        "winrole",
        "endroundreason",
        "roundduration",
        "clearancelevel",
        "skillrank",
        "role",
        "team",
        "haswon",
        "operator",
        "nbkills",
        "isdead"
    },
    dtype={
        "platform": "category",
        "gamemode": "category",
        "mapname": "category",
        "matchid": "int64",
        "roundnumber": "int64",
        "objectivelocation": "category",
        "winrole": "category",
        "endroundreason": "category",
        "roundduration": "int64",
        "clearancelevel": "int64",
        "skillrank": "category",
        "role": "category",
        "team": "int64",
        "haswon": "boolean",
        "operator": "category",
        "nbkills": "int64",
        "isdead": "boolean"
    },
    parse_dates=["dateid"]
)

Features:
- Map
- 5 Attacker Operators
- 5 Defender Operators

In [3]:
operator_list = df["operator"].unique().to_list()

In [4]:
df_playercount = df.groupby(["matchid", "roundnumber"])["operator"].count().reset_index(name="playercount")
df_playercount

Unnamed: 0,matchid,roundnumber,playercount
0,147541,1,10
1,147541,2,9
2,147541,3,10
3,147541,4,10
4,147541,5,10
...,...,...,...
402805,4294898609,5,10
402806,4294898609,6,10
402807,4294898609,7,10
402808,4294898609,8,10


In [5]:
df = pd.merge(left=df, right=df_playercount, on=["matchid", "roundnumber"], how="inner")
print(df.shape)
df.head()

(3889573, 19)


Unnamed: 0,dateid,platform,gamemode,mapname,matchid,roundnumber,objectivelocation,winrole,endroundreason,roundduration,clearancelevel,skillrank,role,team,haswon,operator,nbkills,isdead,playercount
0,2017-02-12,PC,BOMB,BARTLETT_U.,1529883301,6,ROWING_MUSEUM-TROPHY_ROOM,Attacker,DefendersEliminated,246,147,Platinum,Attacker,1,True,JTF2-BUCK,1,True,10
1,2017-02-12,PC,BOMB,BARTLETT_U.,1529883301,6,ROWING_MUSEUM-TROPHY_ROOM,Attacker,DefendersEliminated,246,40,Unranked,Defender,0,False,GIGN-ROOK,0,True,10
2,2017-02-12,PC,BOMB,BARTLETT_U.,1529883301,6,ROWING_MUSEUM-TROPHY_ROOM,Attacker,DefendersEliminated,246,56,Unranked,Defender,0,False,SWAT-CASTLE,1,True,10
3,2017-02-12,PC,BOMB,BARTLETT_U.,1529883301,6,ROWING_MUSEUM-TROPHY_ROOM,Attacker,DefendersEliminated,246,171,Gold,Attacker,1,True,SAT-HIBANA,1,False,10
4,2017-02-12,PC,BOMB,BARTLETT_U.,1529883301,6,ROWING_MUSEUM-TROPHY_ROOM,Attacker,DefendersEliminated,246,165,Platinum,Attacker,1,True,G.E.O.-JACKAL,1,False,10


In [6]:
df = df.loc[df["playercount"] == 10]
df.shape

(2929600, 19)

Drop every column that isn't needed anymore

In [7]:
df = df.drop(columns=["dateid", "platform", "gamemode", "endroundreason", "roundduration", "clearancelevel", "team", "haswon", "nbkills", "isdead", "playercount", "skillrank"], errors="ignore")
df.head()

Unnamed: 0,mapname,matchid,roundnumber,objectivelocation,winrole,role,operator
0,BARTLETT_U.,1529883301,6,ROWING_MUSEUM-TROPHY_ROOM,Attacker,Attacker,JTF2-BUCK
1,BARTLETT_U.,1529883301,6,ROWING_MUSEUM-TROPHY_ROOM,Attacker,Defender,GIGN-ROOK
2,BARTLETT_U.,1529883301,6,ROWING_MUSEUM-TROPHY_ROOM,Attacker,Defender,SWAT-CASTLE
3,BARTLETT_U.,1529883301,6,ROWING_MUSEUM-TROPHY_ROOM,Attacker,Attacker,SAT-HIBANA
4,BARTLETT_U.,1529883301,6,ROWING_MUSEUM-TROPHY_ROOM,Attacker,Attacker,G.E.O.-JACKAL


Calculate the operators per round

In [8]:
for operator in operator_list:
    df[f"OP_{operator}"] = np.nan

df.head()

Unnamed: 0,mapname,matchid,roundnumber,objectivelocation,winrole,role,operator,OP_JTF2-BUCK,OP_GIGN-ROOK,OP_G.E.O.-MIRA,...,OP_SPETSNAZ-RESERVE,OP_GSG9-BLITZ,OP_BOPE-CAPITAO,OP_SPETSNAZ-KAPKAN,OP_SPETSNAZ-TACHANKA,OP_SAT-ECHO,OP_SAS-RESERVE,OP_GIGN-RESERVE,OP_GSG9-RESERVE,OP_SWAT-RESERVE
0,BARTLETT_U.,1529883301,6,ROWING_MUSEUM-TROPHY_ROOM,Attacker,Attacker,JTF2-BUCK,,,,...,,,,,,,,,,
1,BARTLETT_U.,1529883301,6,ROWING_MUSEUM-TROPHY_ROOM,Attacker,Defender,GIGN-ROOK,,,,...,,,,,,,,,,
2,BARTLETT_U.,1529883301,6,ROWING_MUSEUM-TROPHY_ROOM,Attacker,Defender,SWAT-CASTLE,,,,...,,,,,,,,,,
3,BARTLETT_U.,1529883301,6,ROWING_MUSEUM-TROPHY_ROOM,Attacker,Attacker,SAT-HIBANA,,,,...,,,,,,,,,,
4,BARTLETT_U.,1529883301,6,ROWING_MUSEUM-TROPHY_ROOM,Attacker,Attacker,G.E.O.-JACKAL,,,,...,,,,,,,,,,


In [9]:
frames = []
groupby_round = df.groupby(["matchid", "roundnumber"])
del df

for group, tbl in tqdm(groupby_round):

    # tbl["ops"] = str(tbl["operator"].values.to_list())

    for operator in operator_list:
        tbl[f"OP_{operator}"] = operator in tbl["operator"].values

    # tbl = tbl.drop_duplicates(subset=["matchid", "roundnumber"])
    # tbl.drop(columns=["role"])

    frames.append(tbl)

100%|██████████| 292960/292960 [38:20<00:00, 127.33it/s] 


In [10]:
print(sys.getsizeof(frames) / (1024 ** 2), "MB")

2.2702102661132812 MB


In [12]:
# df_test = pd.concat(frames).reset_index(drop=True)
df_test = pd.concat(frames[:10_000])
df_test

Unnamed: 0,mapname,matchid,roundnumber,objectivelocation,winrole,role,operator,OP_JTF2-BUCK,OP_GIGN-ROOK,OP_G.E.O.-MIRA,...,OP_SPETSNAZ-RESERVE,OP_GSG9-BLITZ,OP_BOPE-CAPITAO,OP_SPETSNAZ-KAPKAN,OP_SPETSNAZ-TACHANKA,OP_SAT-ECHO,OP_SAS-RESERVE,OP_GIGN-RESERVE,OP_GSG9-RESERVE,OP_SWAT-RESERVE
2675174,COASTLINE,147541,1,1F_KITCHEN-1F_SERVICE_ENTRANCE,Attacker,Defender,GSG9-BANDIT,False,True,False,...,False,False,False,False,False,False,False,False,False,False
2675175,COASTLINE,147541,1,1F_KITCHEN-1F_SERVICE_ENTRANCE,Attacker,Defender,GSG9-JAGER,False,True,False,...,False,False,False,False,False,False,False,False,False,False
2675176,COASTLINE,147541,1,1F_KITCHEN-1F_SERVICE_ENTRANCE,Attacker,Attacker,NAVYSEAL-BLACKBEARD,False,True,False,...,False,False,False,False,False,False,False,False,False,False
2675177,COASTLINE,147541,1,1F_KITCHEN-1F_SERVICE_ENTRANCE,Attacker,Defender,SAS-MUTE,False,True,False,...,False,False,False,False,False,False,False,False,False,False
2675178,COASTLINE,147541,1,1F_KITCHEN-1F_SERVICE_ENTRANCE,Attacker,Defender,GIGN-ROOK,False,True,False,...,False,False,False,False,False,False,False,False,False,False
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
640198,CLUB_HOUSE,144273241,5,BAR-STOCK_ROOM,Defender,Attacker,NAVYSEAL-BLACKBEARD,False,True,False,...,False,True,False,False,False,False,False,False,False,False
640199,CLUB_HOUSE,144273241,5,BAR-STOCK_ROOM,Defender,Defender,GIGN-ROOK,False,True,False,...,False,True,False,False,False,False,False,False,False,False
640200,CLUB_HOUSE,144273241,5,BAR-STOCK_ROOM,Defender,Defender,GSG9-BANDIT,False,True,False,...,False,True,False,False,False,False,False,False,False,False
640201,CLUB_HOUSE,144273241,5,BAR-STOCK_ROOM,Defender,Attacker,GIGN-MONTAGNE,False,True,False,...,False,True,False,False,False,False,False,False,False,False


In [14]:
df_test = pd.DataFrame(np.vstack(frames), columns=frames[0].columns)
df_test

Unnamed: 0,mapname,matchid,roundnumber,objectivelocation,winrole,role,operator,OP_JTF2-BUCK,OP_GIGN-ROOK,OP_G.E.O.-MIRA,...,OP_SPETSNAZ-RESERVE,OP_GSG9-BLITZ,OP_BOPE-CAPITAO,OP_SPETSNAZ-KAPKAN,OP_SPETSNAZ-TACHANKA,OP_SAT-ECHO,OP_SAS-RESERVE,OP_GIGN-RESERVE,OP_GSG9-RESERVE,OP_SWAT-RESERVE
0,COASTLINE,147541,1,1F_KITCHEN-1F_SERVICE_ENTRANCE,Attacker,Defender,GSG9-BANDIT,False,True,False,...,False,False,False,False,False,False,False,False,False,False
1,COASTLINE,147541,1,1F_KITCHEN-1F_SERVICE_ENTRANCE,Attacker,Defender,GSG9-JAGER,False,True,False,...,False,False,False,False,False,False,False,False,False,False
2,COASTLINE,147541,1,1F_KITCHEN-1F_SERVICE_ENTRANCE,Attacker,Attacker,NAVYSEAL-BLACKBEARD,False,True,False,...,False,False,False,False,False,False,False,False,False,False
3,COASTLINE,147541,1,1F_KITCHEN-1F_SERVICE_ENTRANCE,Attacker,Defender,SAS-MUTE,False,True,False,...,False,False,False,False,False,False,False,False,False,False
4,COASTLINE,147541,1,1F_KITCHEN-1F_SERVICE_ENTRANCE,Attacker,Defender,GIGN-ROOK,False,True,False,...,False,False,False,False,False,False,False,False,False,False
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2929595,HOUSE,4294898609,9,KID'S_BEDROOM-WORKSHOP,Attacker,Attacker,GIGN-TWITCH,True,True,False,...,False,False,False,True,False,False,False,False,False,False
2929596,HOUSE,4294898609,9,KID'S_BEDROOM-WORKSHOP,Attacker,Defender,GIGN-ROOK,True,True,False,...,False,False,False,True,False,False,False,False,False,False
2929597,HOUSE,4294898609,9,KID'S_BEDROOM-WORKSHOP,Attacker,Defender,SWAT-PULSE,True,True,False,...,False,False,False,True,False,False,False,False,False,False
2929598,HOUSE,4294898609,9,KID'S_BEDROOM-WORKSHOP,Attacker,Attacker,SPETSNAZ-FUZE,True,True,False,...,False,False,False,True,False,False,False,False,False,False


In [15]:
df_test = df_test.drop_duplicates(subset=["matchid", "roundnumber"])
df_test

Unnamed: 0,mapname,matchid,roundnumber,objectivelocation,winrole,role,operator,OP_JTF2-BUCK,OP_GIGN-ROOK,OP_G.E.O.-MIRA,...,OP_SPETSNAZ-RESERVE,OP_GSG9-BLITZ,OP_BOPE-CAPITAO,OP_SPETSNAZ-KAPKAN,OP_SPETSNAZ-TACHANKA,OP_SAT-ECHO,OP_SAS-RESERVE,OP_GIGN-RESERVE,OP_GSG9-RESERVE,OP_SWAT-RESERVE
0,COASTLINE,147541,1,1F_KITCHEN-1F_SERVICE_ENTRANCE,Attacker,Defender,GSG9-BANDIT,False,True,False,...,False,False,False,False,False,False,False,False,False,False
10,COASTLINE,147541,3,2F_PENTHOUSE-2F_THEATER,Defender,Attacker,SPETSNAZ-GLAZ,False,True,True,...,False,False,False,False,False,False,False,False,False,False
20,COASTLINE,147541,4,2F_PENTHOUSE-2F_THEATER,Attacker,Attacker,SAS-SLEDGE,False,False,False,...,False,False,False,False,False,True,False,False,False,False
30,COASTLINE,147541,5,1F_KITCHEN-1F_SERVICE_ENTRANCE,Defender,Defender,G.E.O.-MIRA,False,False,True,...,False,False,False,False,False,True,False,False,False,False
40,BORDER,251521,1,ARMORY_LOCKERS-ARCHIVES,Defender,Defender,GSG9-JAGER,False,False,False,...,False,False,False,False,False,False,True,False,False,False
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2929550,HOUSE,4294898609,5,KID'S_BEDROOM-WORKSHOP,Defender,Attacker,SAS-SLEDGE,False,True,False,...,False,False,False,True,False,False,False,False,False,False
2929560,HOUSE,4294898609,6,LIVING_ROOM-TRAINING_ROOM,Attacker,Defender,NAVYSEAL-VALKYRIE,False,True,False,...,False,False,True,False,False,False,False,False,False,False
2929570,HOUSE,4294898609,7,TRAINING_ROOM-GARAGE,Defender,Attacker,SAS-THATCHER,True,True,False,...,False,False,False,True,False,False,False,False,False,False
2929580,HOUSE,4294898609,8,TRAINING_ROOM-GARAGE,Defender,Attacker,SWAT-THERMITE,False,True,True,...,False,False,True,False,False,False,False,False,False,False


In [16]:
df_test = df_test.reset_index(drop=True)
df_test

Unnamed: 0,mapname,matchid,roundnumber,objectivelocation,winrole,role,operator,OP_JTF2-BUCK,OP_GIGN-ROOK,OP_G.E.O.-MIRA,...,OP_SPETSNAZ-RESERVE,OP_GSG9-BLITZ,OP_BOPE-CAPITAO,OP_SPETSNAZ-KAPKAN,OP_SPETSNAZ-TACHANKA,OP_SAT-ECHO,OP_SAS-RESERVE,OP_GIGN-RESERVE,OP_GSG9-RESERVE,OP_SWAT-RESERVE
0,COASTLINE,147541,1,1F_KITCHEN-1F_SERVICE_ENTRANCE,Attacker,Defender,GSG9-BANDIT,False,True,False,...,False,False,False,False,False,False,False,False,False,False
1,COASTLINE,147541,3,2F_PENTHOUSE-2F_THEATER,Defender,Attacker,SPETSNAZ-GLAZ,False,True,True,...,False,False,False,False,False,False,False,False,False,False
2,COASTLINE,147541,4,2F_PENTHOUSE-2F_THEATER,Attacker,Attacker,SAS-SLEDGE,False,False,False,...,False,False,False,False,False,True,False,False,False,False
3,COASTLINE,147541,5,1F_KITCHEN-1F_SERVICE_ENTRANCE,Defender,Defender,G.E.O.-MIRA,False,False,True,...,False,False,False,False,False,True,False,False,False,False
4,BORDER,251521,1,ARMORY_LOCKERS-ARCHIVES,Defender,Defender,GSG9-JAGER,False,False,False,...,False,False,False,False,False,False,True,False,False,False
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
292955,HOUSE,4294898609,5,KID'S_BEDROOM-WORKSHOP,Defender,Attacker,SAS-SLEDGE,False,True,False,...,False,False,False,True,False,False,False,False,False,False
292956,HOUSE,4294898609,6,LIVING_ROOM-TRAINING_ROOM,Attacker,Defender,NAVYSEAL-VALKYRIE,False,True,False,...,False,False,True,False,False,False,False,False,False,False
292957,HOUSE,4294898609,7,TRAINING_ROOM-GARAGE,Defender,Attacker,SAS-THATCHER,True,True,False,...,False,False,False,True,False,False,False,False,False,False
292958,HOUSE,4294898609,8,TRAINING_ROOM-GARAGE,Defender,Attacker,SWAT-THERMITE,False,True,True,...,False,False,True,False,False,False,False,False,False,False


In [20]:
for operator in operator_list:
    df_test[f"OP_{operator}"] = df_test[f"OP_{operator}"].astype("int64")

df_test = df_test.drop(columns=["role", "operator"], errors="ignore")
df_test

Unnamed: 0,mapname,matchid,roundnumber,objectivelocation,winrole,OP_JTF2-BUCK,OP_GIGN-ROOK,OP_G.E.O.-MIRA,OP_SWAT-CASTLE,OP_GSG9-JAGER,...,OP_SPETSNAZ-RESERVE,OP_GSG9-BLITZ,OP_BOPE-CAPITAO,OP_SPETSNAZ-KAPKAN,OP_SPETSNAZ-TACHANKA,OP_SAT-ECHO,OP_SAS-RESERVE,OP_GIGN-RESERVE,OP_GSG9-RESERVE,OP_SWAT-RESERVE
0,COASTLINE,147541,1,1F_KITCHEN-1F_SERVICE_ENTRANCE,Attacker,0,1,0,0,1,...,0,0,0,0,0,0,0,0,0,0
1,COASTLINE,147541,3,2F_PENTHOUSE-2F_THEATER,Defender,0,1,1,0,0,...,0,0,0,0,0,0,0,0,0,0
2,COASTLINE,147541,4,2F_PENTHOUSE-2F_THEATER,Attacker,0,0,0,0,1,...,0,0,0,0,0,1,0,0,0,0
3,COASTLINE,147541,5,1F_KITCHEN-1F_SERVICE_ENTRANCE,Defender,0,0,1,0,1,...,0,0,0,0,0,1,0,0,0,0
4,BORDER,251521,1,ARMORY_LOCKERS-ARCHIVES,Defender,0,0,0,0,1,...,0,0,0,0,0,0,1,0,0,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
292955,HOUSE,4294898609,5,KID'S_BEDROOM-WORKSHOP,Defender,0,1,0,0,1,...,0,0,0,1,0,0,0,0,0,0
292956,HOUSE,4294898609,6,LIVING_ROOM-TRAINING_ROOM,Attacker,0,1,0,0,1,...,0,0,1,0,0,0,0,0,0,0
292957,HOUSE,4294898609,7,TRAINING_ROOM-GARAGE,Defender,1,1,0,0,0,...,0,0,0,1,0,0,0,0,0,0
292958,HOUSE,4294898609,8,TRAINING_ROOM-GARAGE,Defender,0,1,1,0,1,...,0,0,1,0,0,0,0,0,0,0


In [21]:
df_test.to_csv("./data/hot-encoded-data.csv")
print("congrats u did it!")

congrats u did it!


In [155]:
# df_ops_per_round = df.groupby(["matchid", "roundnumber"])["operator"].apply(list).reset_index()
# df_ops_per_round

In [156]:
# def one_hot_encode_ops(row):
#     # for operator in operator_list:
#     #     row[f"OP_{operator}"] = int(operator in row["operator"])
#
#
#     return row

In [157]:
# df_ops_per_round.head(10).apply(one_hot_encode_ops, axis=1)
# df_ops_per_round = df_ops_per_round.head(1000).apply(one_hot_encode_ops, axis=1)
# print(df_ops_per_round.shape)
# df_ops_per_round.head()

In [158]:
# for operator in operator_list:
#     df_ops_per_round[f"OP_{operator}"] = int(operator in df_ops_per_round["operator"])
#
# df_ops_per_round

In [159]:
# df_sample = df.loc[(df["mapname"] == "CHALET") | (df["mapname"] == "FAVELAS") | (df["mapname"] == "BARTLETT_U.") | (df["mapname"] == "HOUSE")]
# df_sample = df.loc[(df["mapname"] == "CHALET")]
# df_sample = df
# df_sample

In [160]:
# df_sample_2 = df_sample.groupby(["matchid", "roundnumber"])["operator"].apply(list).reset_index()
# df_sample_2

In [161]:
# df_sample_3 = pd.merge(left=df_sample_2, right=df_sample[["matchid", "roundnumber", "mapname", "objectivelocation", "winrole"]], on=["matchid", "roundnumber"], how="inner").drop_duplicates(subset=["matchid", "roundnumber"]).drop(columns=["matchid", "roundnumber"])
# df_sample_3

In [162]:
# df_sample_3["nboperators"] = df_sample_3["operator"].apply(len)
# df_sample_3

In [163]:
# df_sample_4 = df_sample_3.loc[df_sample_3["nboperators"] == 10]
# df_sample_4 = df_sample_4.drop(columns=["nboperators"]).reset_index(drop=True)
# df_sample_4

In [164]:
# df_sample_5 = pd.DataFrame(df_sample_4['operator'].tolist(), columns=[f"operator_{x}" for x in range(1, 11)])
# df_sample_5

In [165]:
# df_sample_6 = pd.concat([df_sample_4, df_sample_5], axis=1).drop(columns=["operator"])
# df_sample_6

In [166]:
# df_labels = df_sample_6["winrole"]
# df_labels

In [None]:
# df_sample_7 = df_sample_6.drop(columns=["winrole", "objectivelocation"])
# df_sample_7