# Załadowanie paczek

In [1]:
import pandas as pd
import numpy as np
import json
import multiprocessing
from awpy.data import PLACE_DIST_MATRIX, NAV
from awpy.analytics.nav import find_closest_area
from sqlalchemy import create_engine
from tqdm import tqdm
from AnalyticModule import AnalyticModule
from multiprocessing import Pool

In [2]:
n_cores = multiprocessing.cpu_count()
n_cores

20

# Read parquet

In [3]:
def get_cols(cols, team_specific):
    team_cols = [f"{team}Player_{i}_{col}"
                     for team in ['ct', 't']
                         for i in range(1, 6)
                             for col in cols ]
    spec_cols = [f"{team}Player_{i}_{col}"
                    for team, col in team_specific
                        for i in range(1, 6)
                ]
    return team_cols + spec_cols


cols = [
    "hp", "armor", "hasHelmet", "DecoyGrenade", 
    "Flashbang", "HEGrenade", "SmokeGrenade", "fireGrenades", 
    "isBlinded"
]
add_col = ["spotters", "activeWeapon", "mainWeapon", "secondaryWeapon", "isAlive",  "ID"]
team_spec = [("ct", "hasDefuse"), ("t", "hasBomb"), ("ct", "isDefusing"), ("t", "isPlanting")]
columns = get_cols(cols + add_col, team_spec)
info_cols = ['demoId', 'matchName', 'roundNum','seconds','ctAlivePlayers', 'ctEqVal', 'tAlivePlayers', 'tEqVal', 'bombsite', 'mapName', 'winningSide']

In [4]:
states = pd.read_parquet("data/ESTA_frames.parquet", columns=columns + info_cols)
states = states.reset_index(drop=True)
states

Unnamed: 0,ctPlayer_1_hp,ctPlayer_1_armor,ctPlayer_1_hasHelmet,ctPlayer_1_DecoyGrenade,ctPlayer_1_Flashbang,ctPlayer_1_HEGrenade,ctPlayer_1_SmokeGrenade,ctPlayer_1_fireGrenades,ctPlayer_1_isBlinded,ctPlayer_1_spotters,...,matchName,roundNum,seconds,ctAlivePlayers,ctEqVal,tAlivePlayers,tEqVal,bombsite,mapName,winningSide
0,100,100,False,0,0,0,0,0,False,[],...,heroic-vs-big-iem-katowice-2021,1,0.007874,5,4400,5,4250,,de_nuke,T
1,100,100,False,0,0,0,0,0,False,[],...,heroic-vs-big-iem-katowice-2021,1,0.511811,5,4400,5,4250,,de_nuke,T
2,100,100,False,0,0,0,0,0,False,[],...,heroic-vs-big-iem-katowice-2021,1,1.015748,5,4400,5,4250,,de_nuke,T
3,100,100,False,0,0,0,0,0,False,[],...,heroic-vs-big-iem-katowice-2021,1,1.519685,5,4400,5,4250,,de_nuke,T
4,100,100,False,0,0,0,0,0,False,[],...,heroic-vs-big-iem-katowice-2021,1,2.023622,5,4400,5,4250,,de_nuke,T
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
7464293,59,94,True,0,2,0,1,0,False,[],...,spirit-vs-copenhagen-flames-pgl-major-antwerp-...,26,81.464567,3,26800,1,24300,,de_vertigo,CT
7464294,59,94,True,0,2,0,1,0,False,[],...,spirit-vs-copenhagen-flames-pgl-major-antwerp-...,26,81.968504,3,26800,1,24300,,de_vertigo,CT
7464295,59,94,True,0,2,0,1,0,False,[],...,spirit-vs-copenhagen-flames-pgl-major-antwerp-...,26,82.472441,3,26800,1,24300,,de_vertigo,CT
7464296,59,94,True,0,2,0,1,0,False,[],...,spirit-vs-copenhagen-flames-pgl-major-antwerp-...,26,82.976378,2,26800,1,24300,,de_vertigo,CT


In [5]:
states.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 7464298 entries, 0 to 7464297
Columns: 181 entries, ctPlayer_1_hp to winningSide
dtypes: bool(50), float64(1), int64(85), object(45)
memory usage: 7.6+ GB


## PLACE_DIST

In [6]:
PLACE_DIST_MATRIX.keys()

dict_keys(['de_ancient', 'de_cache', 'de_cbble', 'de_dust2', 'de_inferno', 'de_mirage', 'de_nuke', 'de_overpass', 'de_train', 'de_vertigo'])

In [7]:
PLACE_DIST_MATRIX['de_mirage'].keys()

dict_keys(['CTSpawn', 'BombsiteA', 'TRamp', 'TicketBooth', 'TopofMid', '', 'Shop', 'Apartments', 'BombsiteB', 'PalaceAlley', 'TSpawn', 'Catwalk', 'House', 'SnipersNest', 'Jungle', 'Tunnel', 'BackAlley', 'Middle', 'PalaceInterior', 'TunnelStairs', 'Ladder', 'Stairs', 'SideAlley', 'Scaffolding', 'Truck', 'Connector', 'Balcony'])

# Modyfikacje

## Aktualny wynik

In [8]:
rounds = pd.read_parquet("data/ESTA_rounds.parquet", columns=["roundNum", "ctScore", "tScore", "demoId"])
rounds.drop_duplicates(inplace=True)
rounds

Unnamed: 0,roundNum,ctScore,tScore,demoId
0,1,0,0,00330e8b-ba01-4d3f-a6c1-66881b879dac
1,2,0,1,00330e8b-ba01-4d3f-a6c1-66881b879dac
2,3,1,1,00330e8b-ba01-4d3f-a6c1-66881b879dac
3,4,1,2,00330e8b-ba01-4d3f-a6c1-66881b879dac
4,5,2,2,00330e8b-ba01-4d3f-a6c1-66881b879dac
...,...,...,...,...
21,22,12,9,ff910f54-0bed-4fdf-afa8-b481be09a788
22,23,12,10,ff910f54-0bed-4fdf-afa8-b481be09a788
23,24,13,10,ff910f54-0bed-4fdf-afa8-b481be09a788
24,25,14,10,ff910f54-0bed-4fdf-afa8-b481be09a788


In [9]:
states = states.merge(rounds, left_on=['demoId', 'roundNum'], right_on=['demoId', 'roundNum'])
states

Unnamed: 0,ctPlayer_1_hp,ctPlayer_1_armor,ctPlayer_1_hasHelmet,ctPlayer_1_DecoyGrenade,ctPlayer_1_Flashbang,ctPlayer_1_HEGrenade,ctPlayer_1_SmokeGrenade,ctPlayer_1_fireGrenades,ctPlayer_1_isBlinded,ctPlayer_1_spotters,...,seconds,ctAlivePlayers,ctEqVal,tAlivePlayers,tEqVal,bombsite,mapName,winningSide,ctScore,tScore
0,100,100,False,0,0,0,0,0,False,[],...,0.007874,5,4400,5,4250,,de_nuke,T,0,0
1,100,100,False,0,0,0,0,0,False,[],...,0.511811,5,4400,5,4250,,de_nuke,T,0,0
2,100,100,False,0,0,0,0,0,False,[],...,1.015748,5,4400,5,4250,,de_nuke,T,0,0
3,100,100,False,0,0,0,0,0,False,[],...,1.519685,5,4400,5,4250,,de_nuke,T,0,0
4,100,100,False,0,0,0,0,0,False,[],...,2.023622,5,4400,5,4250,,de_nuke,T,0,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
7464293,59,94,True,0,2,0,1,0,False,[],...,81.464567,3,26800,1,24300,,de_vertigo,CT,15,10
7464294,59,94,True,0,2,0,1,0,False,[],...,81.968504,3,26800,1,24300,,de_vertigo,CT,15,10
7464295,59,94,True,0,2,0,1,0,False,[],...,82.472441,3,26800,1,24300,,de_vertigo,CT,15,10
7464296,59,94,True,0,2,0,1,0,False,[],...,82.976378,2,26800,1,24300,,de_vertigo,CT,15,10


In [10]:
states.loc[(states.ctScore.isna()) | (states.tScore.isna()), ['ctScore','tScore']]

Unnamed: 0,ctScore,tScore


## Kille

In [11]:
kills = pd.read_parquet("data/ESTA_kills.parquet", columns=["roundNum", "attackerID", "demoId", "kills"])
kills = kills.loc[~kills.attackerID.isna()]
kills = kills.reset_index(drop=True)
kills["roundNum"] = kills["roundNum"] + 1
kills

Unnamed: 0,roundNum,attackerID,demoId,kills
0,2,7.656120e+16,00330e8b-ba01-4d3f-a6c1-66881b879dac,1
1,2,7.656120e+16,00330e8b-ba01-4d3f-a6c1-66881b879dac,1
2,2,7.656120e+16,00330e8b-ba01-4d3f-a6c1-66881b879dac,1
3,2,7.656120e+16,00330e8b-ba01-4d3f-a6c1-66881b879dac,1
4,2,7.656120e+16,00330e8b-ba01-4d3f-a6c1-66881b879dac,4
...,...,...,...,...
389605,27,7.656120e+16,ff910f54-0bed-4fdf-afa8-b481be09a788,25
389606,27,7.656120e+16,ff910f54-0bed-4fdf-afa8-b481be09a788,20
389607,27,7.656120e+16,ff910f54-0bed-4fdf-afa8-b481be09a788,14
389608,27,7.656120e+16,ff910f54-0bed-4fdf-afa8-b481be09a788,15


In [12]:
for team in tqdm(['ct', 't']):
    for i in range(1, 6):
        states = states.merge(kills, how='left', left_on=['demoId', 'roundNum', f'{team}Player_{i}_ID'], right_on=['demoId', 'roundNum', 'attackerID'],
                             suffixes=(None, f"_{team}_{i}"))
states

100%|██████████████████████████████████████████████████████████████████| 2/2 [02:43<00:00, 81.51s/it]


Unnamed: 0,ctPlayer_1_hp,ctPlayer_1_armor,ctPlayer_1_hasHelmet,ctPlayer_1_DecoyGrenade,ctPlayer_1_Flashbang,ctPlayer_1_HEGrenade,ctPlayer_1_SmokeGrenade,ctPlayer_1_fireGrenades,ctPlayer_1_isBlinded,ctPlayer_1_spotters,...,attackerID_t_1,kills_t_1,attackerID_t_2,kills_t_2,attackerID_t_3,kills_t_3,attackerID_t_4,kills_t_4,attackerID_t_5,kills_t_5
0,100,100,False,0,0,0,0,0,False,[],...,,,,,,,,,,
1,100,100,False,0,0,0,0,0,False,[],...,,,,,,,,,,
2,100,100,False,0,0,0,0,0,False,[],...,,,,,,,,,,
3,100,100,False,0,0,0,0,0,False,[],...,,,,,,,,,,
4,100,100,False,0,0,0,0,0,False,[],...,,,,,,,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
7464293,59,94,True,0,2,0,1,0,False,[],...,,,,,,,7.656120e+16,20.0,,
7464294,59,94,True,0,2,0,1,0,False,[],...,,,,,,,7.656120e+16,20.0,,
7464295,59,94,True,0,2,0,1,0,False,[],...,,,,,,,7.656120e+16,20.0,,
7464296,59,94,True,0,2,0,1,0,False,[],...,,,,,,,7.656120e+16,20.0,,


In [13]:
states.loc[~states.kills.isna(), 'kills']

35164       3.0
35165       3.0
35166       3.0
35167       3.0
35168       3.0
           ... 
7408874    19.0
7408875    19.0
7408876    19.0
7408877    19.0
7408878    19.0
Name: kills, Length: 617611, dtype: float64

In [14]:
states.rename(columns={'kills': 'kills_ct_1'}, inplace=True)
states.loc[states.roundNum == 1, [f'kills_{team}_{i}' for team in ['ct', 't'] for i in range(1, 6)]] = 0
states = states.drop(columns=states.columns[states.columns.str.startswith('attackerID')])
states

Unnamed: 0,ctPlayer_1_hp,ctPlayer_1_armor,ctPlayer_1_hasHelmet,ctPlayer_1_DecoyGrenade,ctPlayer_1_Flashbang,ctPlayer_1_HEGrenade,ctPlayer_1_SmokeGrenade,ctPlayer_1_fireGrenades,ctPlayer_1_isBlinded,ctPlayer_1_spotters,...,kills_ct_1,kills_ct_2,kills_ct_3,kills_ct_4,kills_ct_5,kills_t_1,kills_t_2,kills_t_3,kills_t_4,kills_t_5
0,100,100,False,0,0,0,0,0,False,[],...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
1,100,100,False,0,0,0,0,0,False,[],...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2,100,100,False,0,0,0,0,0,False,[],...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
3,100,100,False,0,0,0,0,0,False,[],...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
4,100,100,False,0,0,0,0,0,False,[],...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
7464293,59,94,True,0,2,0,1,0,False,[],...,,,,,,,,,20.0,
7464294,59,94,True,0,2,0,1,0,False,[],...,,,,,,,,,20.0,
7464295,59,94,True,0,2,0,1,0,False,[],...,,,,,,,,,20.0,
7464296,59,94,True,0,2,0,1,0,False,[],...,,,,,,,,,20.0,


In [15]:
for team in ['ct', 't']: 
    for i in range(1, 6):
        states[f'kills_{team}_{i}'] = states[f'kills_{team}_{i}'].fillna(method='ffill')

## Ostatnie pozycje

In [16]:
# generowanie ostatnich pozycji
# from functions_multiprocessing import get_last_positions, parallelize_dataframe
# last_place_df = parallelize_dataframe(states, get_last_positions, n_cores)
# last_place_df.to_parquet("data/last_place_df.parquet")

last_place_df = pd.read_parquet("data/last_place_df.parquet").reset_index(drop=True)
states = states.join(last_place_df)
states.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 7464298 entries, 0 to 7464297
Columns: 203 entries, ctPlayer_1_hp to t_Player_5_lastPlaceName
dtypes: bool(50), float64(11), int64(87), object(55)
memory usage: 9.2+ GB


## Aggregacje

In [17]:
aggregation = AnalyticModule._get_summed_columns(states,
    ["hp", "armor", "hasHelmet", "DecoyGrenade", "Flashbang", "HEGrenade", "SmokeGrenade", "fireGrenades", "isBlinded"],
    [("ct", "hasDefuse"), ("t", "hasBomb"), ("ct", "isDefusing"), ("t", "isPlanting")])
aggregation = aggregation.reset_index(drop=True)
aggregation

Unnamed: 0,ct_hp,ct_armor,ct_hasHelmet,ct_DecoyGrenade,ct_Flashbang,ct_HEGrenade,ct_SmokeGrenade,ct_fireGrenades,ct_isBlinded,t_hp,...,t_DecoyGrenade,t_Flashbang,t_HEGrenade,t_SmokeGrenade,t_fireGrenades,t_isBlinded,ct_hasDefuse,t_hasBomb,ct_isDefusing,t_isPlanting
0,500,400,0,0,2,0,0,0,0,500,...,0,1,0,1,1,0,1,1,0,0
1,500,400,0,0,2,0,0,0,0,500,...,0,1,0,1,1,0,1,1,0,0
2,500,400,0,0,2,0,0,0,0,500,...,0,1,0,1,1,0,1,1,0,0
3,500,400,0,0,2,0,0,0,0,500,...,0,1,0,1,1,0,1,1,0,0
4,500,400,0,0,2,0,0,0,0,500,...,0,1,0,1,1,0,1,1,0,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
7464293,233,290,3,0,2,0,1,0,0,74,...,0,1,0,0,0,0,2,0,0,0
7464294,233,290,3,0,2,0,1,0,0,74,...,0,1,0,0,0,0,2,0,0,0
7464295,206,286,3,0,2,0,1,0,0,74,...,0,1,0,0,0,0,2,0,0,0
7464296,159,194,2,0,2,0,1,0,0,74,...,0,1,0,0,0,0,1,0,0,0


## Odległości do teameatów

In [18]:
from functions_multiprocessing import parallelize_dataframe, get_dist

dist_frame = parallelize_dataframe(states, get_dist, n_cores)
dist_frame

Unnamed: 0,ct_meanDist,ct_minDist,t_meanDist,t_minDist
0,622.183397,622.183397,955.517356,955.517356
1,622.183397,622.183397,955.517356,955.517356
2,622.183397,622.183397,955.517356,955.517356
3,622.183397,622.183397,955.517356,955.517356
4,2162.980651,622.183397,955.517356,955.517356
...,...,...,...,...
7464293,2054.600765,1486.920862,9000.000000,9000.000000
7464294,2054.600765,1486.920862,9000.000000,9000.000000
7464295,2054.600765,1486.920862,9000.000000,9000.000000
7464296,2961.579056,2961.579056,9000.000000,9000.000000


## Zgrupowanie graczy

In [19]:
from functions_multiprocessing import get_grouped_players

groupped_frame = parallelize_dataframe(states, get_grouped_players, n_cores)
groupped_frame

Unnamed: 0,ct_grouppedPlayers,t_grouppedPlayers
0,5,5
1,5,5
2,5,5
3,5,5
4,4,5
...,...,...
7464293,1,1
7464294,1,1
7464295,1,1
7464296,1,1


## Odległości do bombsite'u

In [20]:
from functions_multiprocessing import get_bombsite_dist

bombsite_df = parallelize_dataframe(states, get_bombsite_dist, n_cores)
bombsite_df

Unnamed: 0,ctPlayer_1_distToA,ctPlayer_1_distToB,ctPlayer_2_distToA,ctPlayer_2_distToB,ctPlayer_3_distToA,ctPlayer_3_distToB,ctPlayer_4_distToA,ctPlayer_4_distToB,ctPlayer_5_distToA,ctPlayer_5_distToB,tPlayer_1_distToA,tPlayer_1_distToB,tPlayer_2_distToA,tPlayer_2_distToB,tPlayer_3_distToA,tPlayer_3_distToB,tPlayer_4_distToA,tPlayer_4_distToB,tPlayer_5_distToA,tPlayer_5_distToB
0,5037.461605,6578.789286,5037.461605,6578.789286,5037.461605,6578.789286,5037.461605,6578.789286,5037.461605,6578.789286,4405.452895,7888.040642,4405.452895,7888.040642,4405.452895,7888.040642,4405.452895,7888.040642,4405.452895,7888.040642
1,5037.461605,6578.789286,5037.461605,6578.789286,5037.461605,6578.789286,5037.461605,6578.789286,5037.461605,6578.789286,4405.452895,7888.040642,4405.452895,7888.040642,4405.452895,7888.040642,4405.452895,7888.040642,4405.452895,7888.040642
2,5037.461605,6578.789286,5037.461605,6578.789286,5037.461605,6578.789286,5037.461605,6578.789286,5037.461605,6578.789286,4405.452895,7888.040642,4405.452895,7888.040642,4405.452895,7888.040642,4405.452895,7888.040642,4405.452895,7888.040642
3,5037.461605,6578.789286,5037.461605,6578.789286,5037.461605,6578.789286,5037.461605,6578.789286,5037.461605,6578.789286,4405.452895,7888.040642,4405.452895,7888.040642,4405.452895,7888.040642,4405.452895,7888.040642,4405.452895,7888.040642
4,2871.942478,5304.952813,5037.461605,6578.789286,5037.461605,6578.789286,5037.461605,6578.789286,5037.461605,6578.789286,4405.452895,7888.040642,4405.452895,7888.040642,4405.452895,7888.040642,4405.452895,7888.040642,4405.452895,7888.040642
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
7464293,1720.696771,5401.651956,1486.920862,3393.093312,,,,,711.154792,4485.758094,,,,,,,,,711.154792,4485.758094
7464294,1720.696771,5401.651956,1486.920862,3393.093312,,,,,711.154792,4485.758094,,,,,,,,,711.154792,4485.758094
7464295,1720.696771,5401.651956,1486.920862,3393.093312,,,,,711.154792,4485.758094,,,,,,,,,711.154792,4485.758094
7464296,1720.696771,5401.651956,1486.920862,3393.093312,,,,,,,,,,,,,,,711.154792,4485.758094


## Widziani gracze

In [21]:
from functions_multiprocessing import get_spotted_players

spotted_df = parallelize_dataframe(states, get_spotted_players, n_cores)
spotted_df

Unnamed: 0,ct_spottedPlayers,t_spottedPlayers
0,0,0
1,0,0
2,0,0
3,0,0
4,0,0
...,...,...
7464293,0,0
7464294,0,0
7464295,0,0
7464296,0,1


## Przejęcie mapy

In [22]:
from functions_multiprocessing import conquer_map
conquer = conquer_map(states)
conquer

7464298it [00:11, 649713.59it/s]


Unnamed: 0,ct_conquerMap,t_conquerMap
0,0.033333,0.033333
1,0.033333,0.033333
2,0.033333,0.033333
3,0.033333,0.033333
4,0.066667,0.033333
...,...,...
7464293,0.478261,0.260870
7464294,0.478261,0.260870
7464295,0.478261,0.260870
7464296,0.478261,0.304348


# Łączenie i generowanie

In [23]:
states.drop(columns=states.columns[states.columns.str.endswith(("_ID", "_lastPlaceName", "_spotters"))], inplace=True)
states.columns.values

array(['ctPlayer_1_hp', 'ctPlayer_1_armor', 'ctPlayer_1_hasHelmet',
       'ctPlayer_1_DecoyGrenade', 'ctPlayer_1_Flashbang',
       'ctPlayer_1_HEGrenade', 'ctPlayer_1_SmokeGrenade',
       'ctPlayer_1_fireGrenades', 'ctPlayer_1_isBlinded',
       'ctPlayer_1_activeWeapon', 'ctPlayer_1_mainWeapon',
       'ctPlayer_1_secondaryWeapon', 'ctPlayer_1_isAlive',
       'ctPlayer_2_hp', 'ctPlayer_2_armor', 'ctPlayer_2_hasHelmet',
       'ctPlayer_2_DecoyGrenade', 'ctPlayer_2_Flashbang',
       'ctPlayer_2_HEGrenade', 'ctPlayer_2_SmokeGrenade',
       'ctPlayer_2_fireGrenades', 'ctPlayer_2_isBlinded',
       'ctPlayer_2_activeWeapon', 'ctPlayer_2_mainWeapon',
       'ctPlayer_2_secondaryWeapon', 'ctPlayer_2_isAlive',
       'ctPlayer_3_hp', 'ctPlayer_3_armor', 'ctPlayer_3_hasHelmet',
       'ctPlayer_3_DecoyGrenade', 'ctPlayer_3_Flashbang',
       'ctPlayer_3_HEGrenade', 'ctPlayer_3_SmokeGrenade',
       'ctPlayer_3_fireGrenades', 'ctPlayer_3_isBlinded',
       'ctPlayer_3_activeWeapon', 'ctP

In [24]:
states = pd.concat([states, aggregation, dist_frame, groupped_frame, bombsite_df, spotted_df, conquer], axis=1)
states

Unnamed: 0,ctPlayer_1_hp,ctPlayer_1_armor,ctPlayer_1_hasHelmet,ctPlayer_1_DecoyGrenade,ctPlayer_1_Flashbang,ctPlayer_1_HEGrenade,ctPlayer_1_SmokeGrenade,ctPlayer_1_fireGrenades,ctPlayer_1_isBlinded,ctPlayer_1_activeWeapon,...,tPlayer_3_distToA,tPlayer_3_distToB,tPlayer_4_distToA,tPlayer_4_distToB,tPlayer_5_distToA,tPlayer_5_distToB,ct_spottedPlayers,t_spottedPlayers,ct_conquerMap,t_conquerMap
0,100,100,False,0,0,0,0,0,False,Knife,...,4405.452895,7888.040642,4405.452895,7888.040642,4405.452895,7888.040642,0,0,0.033333,0.033333
1,100,100,False,0,0,0,0,0,False,Knife,...,4405.452895,7888.040642,4405.452895,7888.040642,4405.452895,7888.040642,0,0,0.033333,0.033333
2,100,100,False,0,0,0,0,0,False,Knife,...,4405.452895,7888.040642,4405.452895,7888.040642,4405.452895,7888.040642,0,0,0.033333,0.033333
3,100,100,False,0,0,0,0,0,False,Knife,...,4405.452895,7888.040642,4405.452895,7888.040642,4405.452895,7888.040642,0,0,0.033333,0.033333
4,100,100,False,0,0,0,0,0,False,Knife,...,4405.452895,7888.040642,4405.452895,7888.040642,4405.452895,7888.040642,0,0,0.066667,0.033333
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
7464293,59,94,True,0,2,0,1,0,False,AK-47,...,,,,,711.154792,4485.758094,0,0,0.478261,0.260870
7464294,59,94,True,0,2,0,1,0,False,AK-47,...,,,,,711.154792,4485.758094,0,0,0.478261,0.260870
7464295,59,94,True,0,2,0,1,0,False,AK-47,...,,,,,711.154792,4485.758094,0,0,0.478261,0.260870
7464296,59,94,True,0,2,0,1,0,False,AK-47,...,,,,,711.154792,4485.758094,0,1,0.478261,0.304348


In [25]:
del aggregation, dist_frame, groupped_frame, bombsite_df, spotted_df, conquer

In [26]:
def prepare_features(unprep_df):
    pistols = {'Glock-18', 'USP-S', 'P2000', 'P250', 'Dual Berettas'}
    enhanced_pistols = {'CZ75 Auto', 'Five-SeveN', 'Tec-9', 'R8 Revolver'}
    deagle = 'Desert Eagle'
    shotguns = {'MAG-7', 'XM1014', 'Nova', 'Sawed-Off'}
    machine_guns = {'M249', 'Negev'}
    smgs = {'MP9', 'MP7', 'MP5-SD', 'MAC-10', 'UMP-45', 'PP-Bizon', 'P90'}
    weaker_rifles = {'Galil AR', 'SSG 08', 'FAMAS'}
    lunet_rifles = {'SG 553', 'AUG'}
    sniper_rifle = {'G3SG1', 'SCAR-20', 'AWP'}
    assault_rifle = {'M4A1', 'M4A4', 'AK-47'}
    others = {'Zeus x27', 'Knife', 'C4', 'Molotov', 'Incendiary Grenade', 
              'Smoke Grenade', 'Flashbang', 'Decoy Grenade', 'HE Grenade'}
    others.update(shotguns, machine_guns)
    
    def convert_weapons(col):
        for team in ['ct', 't']:
            unprep_df[f"{team}_{col}_Pistol"] = 0
            unprep_df[f"{team}_{col}_EnhancedPistols"] = 0
            unprep_df[f"{team}_{col}_Deagle"] = 0
            unprep_df[f"{team}_{col}_SMG"] = 0
            unprep_df[f"{team}_{col}_WeakAssaultRifle"] = 0
            unprep_df[f"{team}_{col}_LunetRifle"] = 0
            unprep_df[f"{team}_{col}_SniperRifle"] = 0
            unprep_df[f"{team}_{col}_AssaultRifle"] = 0
            for i in range(1, 6):
                unprep_df[f"{team}_{col}_Pistol"] += unprep_df[f"{team}Player_{i}_{col}"].isin(pistols).astype(int)
                unprep_df[f"{team}_{col}_EnhancedPistols"] = unprep_df[f"{team}Player_{i}_{col}"].isin(enhanced_pistols).astype(int)
                unprep_df[f"{team}_{col}_Deagle"] += (unprep_df[f"{team}Player_{i}_{col}"] == deagle).astype(int)
                unprep_df[f"{team}_{col}_SMG"] += unprep_df[f"{team}Player_{i}_{col}"].isin(smgs).astype(int)
                unprep_df[f"{team}_{col}_WeakAssaultRifle"] += unprep_df[f"{team}Player_{i}_{col}"].isin(weaker_rifles).astype(int)
                unprep_df[f"{team}_{col}_LunetRifle"] += unprep_df[f"{team}Player_{i}_{col}"].isin(lunet_rifles).astype(int)
                unprep_df[f"{team}_{col}_SniperRifle"] += unprep_df[f"{team}Player_{i}_{col}"].isin(sniper_rifle).astype(int)
                unprep_df[f"{team}_{col}_AssaultRifle"] += unprep_df[f"{team}Player_{i}_{col}"].isin(assault_rifle).astype(int)
                                
    def convert_active_weapons(col):
        for team in ['ct', 't']:
            unprep_df[f"{team}_{col}_Pistol"] = 0
            unprep_df[f"{team}_{col}_EnhancedPistols"] = 0
            unprep_df[f"{team}_{col}_Deagle"] = 0
            unprep_df[f"{team}_{col}_SMG"] = 0
            unprep_df[f"{team}_{col}_WeakAssaultRifle"] = 0
            unprep_df[f"{team}_{col}_LunetRifle"] = 0
            unprep_df[f"{team}_{col}_SniperRifle"] = 0
            unprep_df[f"{team}_{col}_AssaultRifle"] = 0
            unprep_df[f"{team}_{col}_Others"] = 0
            for i in range(1, 6):
                unprep_df[f"{team}_{col}_Pistol"] += unprep_df[f"{team}Player_{i}_{col}"].isin(pistols).astype(int)
                unprep_df[f"{team}_{col}_EnhancedPistols"] = unprep_df[f"{team}Player_{i}_{col}"].isin(enhanced_pistols).astype(int)
                unprep_df[f"{team}_{col}_Deagle"] += (unprep_df[f"{team}Player_{i}_{col}"] == deagle).astype(int)
                unprep_df[f"{team}_{col}_SMG"] += unprep_df[f"{team}Player_{i}_{col}"].isin(smgs).astype(int)
                unprep_df[f"{team}_{col}_WeakAssaultRifle"] += unprep_df[f"{team}Player_{i}_{col}"].isin(weaker_rifles).astype(int)
                unprep_df[f"{team}_{col}_LunetRifle"] += unprep_df[f"{team}Player_{i}_{col}"].isin(lunet_rifles).astype(int)
                unprep_df[f"{team}_{col}_SniperRifle"] += unprep_df[f"{team}Player_{i}_{col}"].isin(sniper_rifle).astype(int)
                unprep_df[f"{team}_{col}_AssaultRifle"] += unprep_df[f"{team}Player_{i}_{col}"].isin(assault_rifle).astype(int)
                unprep_df[f"{team}_{col}_Others"] += unprep_df[f"{team}Player_{i}_{col}"].isin(others).astype(int)
                
    def convert_sec_weapons(col):
        for team in ['ct', 't']:
            unprep_df[f"{team}_{col}_Pistol"] = 0
            unprep_df[f"{team}_{col}_EnhancedPistols"] = 0
            unprep_df[f"{team}_{col}_Deagle"] = 0
            for i in range(1, 6):
                unprep_df[f"{team}_{col}_Pistol"] += unprep_df[f"{team}Player_{i}_{col}"].isin(pistols).astype(int)
                unprep_df[f"{team}_{col}_EnhancedPistols"] = unprep_df[f"{team}Player_{i}_{col}"].isin(enhanced_pistols).astype(int)
                unprep_df[f"{team}_{col}_Deagle"] += (unprep_df[f"{team}Player_{i}_{col}"] == deagle).astype(int)


    convert_active_weapons('activeWeapon')
    convert_weapons('mainWeapon')
    convert_sec_weapons('secondaryWeapon')
    
    unprep_df['winningSideCT'] = np.where(unprep_df['winningSide'] == 'CT', 1, 0)
    unprep_df['bombsite'] = np.where(unprep_df['bombsite'] == '', 'not_planted', unprep_df['bombsite'])
    unprep_df['ctMinDistToA'] = unprep_df[["ctPlayer_1_distToA", "ctPlayer_2_distToA", "ctPlayer_3_distToA", "ctPlayer_4_distToA", "ctPlayer_5_distToA"]].min(axis=1)
    unprep_df['ctMinDistToB'] = unprep_df[["ctPlayer_1_distToB", "ctPlayer_2_distToB", "ctPlayer_3_distToB", "ctPlayer_4_distToB", "ctPlayer_5_distToB"]].min(axis=1)
    unprep_df['tMinDistToA'] = unprep_df[["tPlayer_1_distToA", "tPlayer_2_distToA", "tPlayer_3_distToA", "tPlayer_4_distToA", "tPlayer_5_distToA"]].min(axis=1)
    unprep_df['tMinDistToB'] = unprep_df[["tPlayer_1_distToB", "tPlayer_2_distToB", "tPlayer_3_distToB", "tPlayer_4_distToB", "tPlayer_5_distToB"]].min(axis=1)
    unprep_df['ctMeanDistToA'] = unprep_df[["ctPlayer_1_distToA", "ctPlayer_2_distToA", "ctPlayer_3_distToA", "ctPlayer_4_distToA", "ctPlayer_5_distToA"]].mean(axis=1)
    unprep_df['ctMeanDistToB'] = unprep_df[["ctPlayer_1_distToB", "ctPlayer_2_distToB", "ctPlayer_3_distToB", "ctPlayer_4_distToB", "ctPlayer_5_distToB"]].mean(axis=1)
    unprep_df['tMeanDistToA'] = unprep_df[["tPlayer_1_distToA", "tPlayer_2_distToA", "tPlayer_3_distToA", "tPlayer_4_distToA", "tPlayer_5_distToA"]].mean(axis=1)
    unprep_df['tMeanDistToB'] = unprep_df[["tPlayer_1_distToB", "tPlayer_2_distToB", "tPlayer_3_distToB", "tPlayer_4_distToB", "tPlayer_5_distToB"]].mean(axis=1)
    unprep_df['ctMeanKills'] = (unprep_df['kills_ct_1'] * unprep_df['ctPlayer_1_isAlive'] + unprep_df['kills_ct_2'] * unprep_df['ctPlayer_2_isAlive'] + 
            unprep_df['kills_ct_3'] * unprep_df['ctPlayer_3_isAlive'] + unprep_df['kills_ct_4'] * unprep_df['ctPlayer_4_isAlive'] + 
            unprep_df['kills_ct_5'] * unprep_df['ctPlayer_5_isAlive']) / unprep_df[[f'ctPlayer_{i}_isAlive' for i in range(1, 6)]].sum(axis=1)
    unprep_df['tMeanKills'] = (unprep_df['kills_t_1'] * unprep_df['tPlayer_1_isAlive'] + unprep_df['kills_t_2'] * unprep_df['tPlayer_2_isAlive'] + 
            unprep_df['kills_t_3'] * unprep_df['tPlayer_3_isAlive'] + unprep_df['kills_t_4'] * unprep_df['tPlayer_4_isAlive'] + 
            unprep_df['kills_t_5'] * unprep_df['tPlayer_5_isAlive']) / unprep_df[[f'tPlayer_{i}_isAlive' for i in range(1, 6)]].sum(axis=1)
    unprep_df['tMeanKills'] = unprep_df['tMeanKills'].fillna(-1)
    unprep_df = pd.concat([unprep_df, pd.get_dummies(unprep_df['mapName']), pd.get_dummies(unprep_df['bombsite'], prefix='bombsite')], axis=1)
    unprep_df['tMinDistToA'] = unprep_df['tMinDistToA'].fillna(9000)
    unprep_df['tMinDistToB'] = unprep_df['tMinDistToB'].fillna(9000)
    unprep_df['tMeanDistToA'] = unprep_df['tMeanDistToA'].fillna(9000)
    unprep_df['tMeanDistToB'] = unprep_df['tMeanDistToB'].fillna(9000)
    unprep_df['tMinDistToA'] = unprep_df['tMinDistToA'].replace(np.inf, 9000)
    unprep_df['tMinDistToB'] = unprep_df['tMinDistToB'].replace(np.inf, 9000)
    unprep_df['tMeanDistToA'] = unprep_df['tMeanDistToA'].replace(np.inf, 9000)
    unprep_df['tMeanDistToB'] = unprep_df['tMeanDistToB'].replace(np.inf, 9000)
    unprep_df['ctMinDistToA'] = unprep_df['ctMinDistToA'].fillna(9000)
    unprep_df['ctMinDistToB'] = unprep_df['ctMinDistToB'].fillna(9000)
    unprep_df['ctMeanDistToA'] = unprep_df['ctMeanDistToA'].fillna(9000)
    unprep_df['ctMeanDistToB'] = unprep_df['ctMeanDistToB'].fillna(9000)
    unprep_df['ctMinDistToA'] = unprep_df['ctMinDistToA'].replace(np.inf, 9000)
    unprep_df['ctMinDistToB'] = unprep_df['ctMinDistToB'].replace(np.inf, 9000)
    unprep_df['ctMeanDistToA'] = unprep_df['ctMeanDistToA'].replace(np.inf, 9000)
    unprep_df['ctMeanDistToB'] = unprep_df['ctMeanDistToB'].replace(np.inf, 9000)
    return unprep_df

In [27]:
states = prepare_features(states)
states

  unprep_df[f"{team}_{col}_EnhancedPistols"] = 0
  unprep_df[f"{team}_{col}_Deagle"] = 0
  unprep_df[f"{team}_{col}_SMG"] = 0
  unprep_df[f"{team}_{col}_WeakAssaultRifle"] = 0
  unprep_df[f"{team}_{col}_LunetRifle"] = 0
  unprep_df[f"{team}_{col}_SniperRifle"] = 0
  unprep_df[f"{team}_{col}_AssaultRifle"] = 0
  unprep_df[f"{team}_{col}_Pistol"] = 0
  unprep_df[f"{team}_{col}_EnhancedPistols"] = 0
  unprep_df[f"{team}_{col}_Deagle"] = 0
  unprep_df[f"{team}_{col}_Pistol"] = 0
  unprep_df[f"{team}_{col}_EnhancedPistols"] = 0
  unprep_df[f"{team}_{col}_Deagle"] = 0
  unprep_df['winningSideCT'] = np.where(unprep_df['winningSide'] == 'CT', 1, 0)


Unnamed: 0,ctPlayer_1_hp,ctPlayer_1_armor,ctPlayer_1_hasHelmet,ctPlayer_1_DecoyGrenade,ctPlayer_1_Flashbang,ctPlayer_1_HEGrenade,ctPlayer_1_SmokeGrenade,ctPlayer_1_fireGrenades,ctPlayer_1_isBlinded,ctPlayer_1_activeWeapon,...,de_dust2,de_inferno,de_mirage,de_nuke,de_overpass,de_train,de_vertigo,bombsite_A,bombsite_B,bombsite_not_planted
0,100,100,False,0,0,0,0,0,False,Knife,...,0,0,0,1,0,0,0,0,0,1
1,100,100,False,0,0,0,0,0,False,Knife,...,0,0,0,1,0,0,0,0,0,1
2,100,100,False,0,0,0,0,0,False,Knife,...,0,0,0,1,0,0,0,0,0,1
3,100,100,False,0,0,0,0,0,False,Knife,...,0,0,0,1,0,0,0,0,0,1
4,100,100,False,0,0,0,0,0,False,Knife,...,0,0,0,1,0,0,0,0,0,1
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
7464293,59,94,True,0,2,0,1,0,False,AK-47,...,0,0,0,0,0,0,1,0,0,1
7464294,59,94,True,0,2,0,1,0,False,AK-47,...,0,0,0,0,0,0,1,0,0,1
7464295,59,94,True,0,2,0,1,0,False,AK-47,...,0,0,0,0,0,0,1,0,0,1
7464296,59,94,True,0,2,0,1,0,False,AK-47,...,0,0,0,0,0,0,1,0,0,1


In [28]:
states['ctMeanKills']

0          0.0
1          0.0
2          0.0
3          0.0
4          0.0
          ... 
7464293    0.0
7464294    0.0
7464295    0.0
7464296    0.0
7464297    0.0
Name: ctMeanKills, Length: 7464298, dtype: float64

In [29]:
states.loc[states['tMeanKills'].isna(), 'tMeanKills']

Series([], Name: tMeanKills, dtype: float64)

In [30]:
states.columns.tolist()

['ctPlayer_1_hp',
 'ctPlayer_1_armor',
 'ctPlayer_1_hasHelmet',
 'ctPlayer_1_DecoyGrenade',
 'ctPlayer_1_Flashbang',
 'ctPlayer_1_HEGrenade',
 'ctPlayer_1_SmokeGrenade',
 'ctPlayer_1_fireGrenades',
 'ctPlayer_1_isBlinded',
 'ctPlayer_1_activeWeapon',
 'ctPlayer_1_mainWeapon',
 'ctPlayer_1_secondaryWeapon',
 'ctPlayer_1_isAlive',
 'ctPlayer_2_hp',
 'ctPlayer_2_armor',
 'ctPlayer_2_hasHelmet',
 'ctPlayer_2_DecoyGrenade',
 'ctPlayer_2_Flashbang',
 'ctPlayer_2_HEGrenade',
 'ctPlayer_2_SmokeGrenade',
 'ctPlayer_2_fireGrenades',
 'ctPlayer_2_isBlinded',
 'ctPlayer_2_activeWeapon',
 'ctPlayer_2_mainWeapon',
 'ctPlayer_2_secondaryWeapon',
 'ctPlayer_2_isAlive',
 'ctPlayer_3_hp',
 'ctPlayer_3_armor',
 'ctPlayer_3_hasHelmet',
 'ctPlayer_3_DecoyGrenade',
 'ctPlayer_3_Flashbang',
 'ctPlayer_3_HEGrenade',
 'ctPlayer_3_SmokeGrenade',
 'ctPlayer_3_fireGrenades',
 'ctPlayer_3_isBlinded',
 'ctPlayer_3_activeWeapon',
 'ctPlayer_3_mainWeapon',
 'ctPlayer_3_secondaryWeapon',
 'ctPlayer_3_isAlive',
 'ctPla

In [31]:
states.drop(columns=states.columns[states.columns.str.endswith(("_activeWeapon", "_mainWeapon", "_secondaryWeapon", "_distToA", "_distToB",
                                                               "_isAlive")) | states.columns.str.startswith("kills", "mapName_")],
            inplace=True)
states.columns.tolist()

['ctPlayer_1_hp',
 'ctPlayer_1_armor',
 'ctPlayer_1_hasHelmet',
 'ctPlayer_1_DecoyGrenade',
 'ctPlayer_1_Flashbang',
 'ctPlayer_1_HEGrenade',
 'ctPlayer_1_SmokeGrenade',
 'ctPlayer_1_fireGrenades',
 'ctPlayer_1_isBlinded',
 'ctPlayer_2_hp',
 'ctPlayer_2_armor',
 'ctPlayer_2_hasHelmet',
 'ctPlayer_2_DecoyGrenade',
 'ctPlayer_2_Flashbang',
 'ctPlayer_2_HEGrenade',
 'ctPlayer_2_SmokeGrenade',
 'ctPlayer_2_fireGrenades',
 'ctPlayer_2_isBlinded',
 'ctPlayer_3_hp',
 'ctPlayer_3_armor',
 'ctPlayer_3_hasHelmet',
 'ctPlayer_3_DecoyGrenade',
 'ctPlayer_3_Flashbang',
 'ctPlayer_3_HEGrenade',
 'ctPlayer_3_SmokeGrenade',
 'ctPlayer_3_fireGrenades',
 'ctPlayer_3_isBlinded',
 'ctPlayer_4_hp',
 'ctPlayer_4_armor',
 'ctPlayer_4_hasHelmet',
 'ctPlayer_4_DecoyGrenade',
 'ctPlayer_4_Flashbang',
 'ctPlayer_4_HEGrenade',
 'ctPlayer_4_SmokeGrenade',
 'ctPlayer_4_fireGrenades',
 'ctPlayer_4_isBlinded',
 'ctPlayer_5_hp',
 'ctPlayer_5_armor',
 'ctPlayer_5_hasHelmet',
 'ctPlayer_5_DecoyGrenade',
 'ctPlayer_5_Fla

In [32]:
states.to_parquet("data/ESTA_states_prepared.parquet")