# Preprocess

In [1]:
import os
import pandas as pd
import numpy as np
import lzma
import json

import random

from tqdm import tqdm
from awpy.data import PLACE_DIST_MATRIX, NAV
from awpy.analytics.nav import find_closest_area

In [2]:
path = r"C:\Users\Piotrek\Documents\Inzynierka\esta-v1.0\pnxenopoulos-esta-f684ccf\data" 
online_files = os.listdir(os.path.join(path, "online"))
online_files = [os.path.join(path, "online", f) for f in online_files] 
lan_files = os.listdir(os.path.join(path, "lan"))
lan_files = [os.path.join(path, "lan", f) for f in lan_files]
demo_files = online_files + lan_files
len(demo_files)

1558

In [3]:
def read_parsed_demo(filename):
    with lzma.LZMAFile(filename, "rb") as f:
        d = json.load(f)
    return d

In [4]:
def get_team_data(frame, team, mapping):
        team_frame = frame[team]
        team_data = {}
        team_data[team + 'Name'] = team_frame['teamName']
        team_data[team + 'EqVal'] = team_frame['teamEqVal']
        team_data[team + 'AlivePlayers'] = team_frame['alivePlayers']
        team_data[team + 'TotalUtility'] = team_frame['totalUtility']

        for player in team_frame['players']:
            mapped_player = mapping[player['steamID']]
            team_data[f"{team}{mapped_player}_ID"] = player['steamID']
            for key_player in player:
                if key_player not in ['inventory', 'steamID', 'name', 'team', 'side', 'flashGrenades', 'smokeGrenades', 
                                     'heGrenades', 'fireGrenades', 'totalUtility']:
                    team_data[f'{team}{mapped_player}_{key_player}'] = player[key_player]
                elif key_player == 'inventory':
                    team_data[f"{team}{mapped_player}_SmokeGrenade"] = 0
                    team_data[f"{team}{mapped_player}_Flashbang"] = 0
                    team_data[f"{team}{mapped_player}_DecoyGrenade"] = 0
                    team_data[f"{team}{mapped_player}_fireGrenades"] = 0
                    team_data[f"{team}{mapped_player}_HEGrenade"] = 0
                    if player[key_player] is None:
                        team_data[f'{team}{mapped_player}_mainWeapon'] = ''
                        team_data[f'{team}{mapped_player}_secondaryWeapon'] = ''
                    else:
                        for weapon in player[key_player]:
                            if weapon['weaponClass'] == 'Pistols':
                                team_data[f'{team}{mapped_player}_secondaryWeapon'] = weapon['weaponName']
                            elif weapon['weaponClass'] == 'Grenade':
                                if weapon['weaponName'] in {"Molotov", "Incendiary Grenade"}:
                                    team_data[f"{team}{mapped_player}_fireGrenades"] = weapon['ammoInMagazine'] +\
                                        weapon['ammoInReserve']
                                else:
                                    team_data[f"{team}{mapped_player}_{weapon['weaponName'].replace(' ', '')}"] =\
                                        weapon['ammoInMagazine'] + weapon['ammoInReserve']
                            else:
                                team_data[f'{team}{mapped_player}_mainWeapon'] = weapon['weaponName']
                        if f'{team}{mapped_player}_mainWeapon' not in team_data and\
                                f'{team}{mapped_player}_secondaryWeapon' not in team_data:
                            team_data[f'{team}{mapped_player}_mainWeapon'] = ''
                        elif f'{team}{mapped_player}_mainWeapon' not in team_data:
                            team_data[f'{team}{mapped_player}_mainWeapon'] =\
                                team_data[f'{team}{mapped_player}_secondaryWeapon']
        return team_data

def get_frame_data(frame, mapping):
        frame_data = {**get_team_data(frame, 'ct', mapping), 
                      **get_team_data(frame, 't', mapping)}
        frame_data['bombPlanted'] = frame['bombPlanted']
        frame_data['bombsite'] = frame['bombsite']
        frame_data['tick'] = frame['tick']
        frame_data['seconds'] = frame['seconds']
        frame_data['clockTime'] = frame['clockTime']
        bomb_data = frame['bomb']
        for key in bomb_data:
            frame_data[f"bomb_{key}"] = bomb_data[key]
        return frame_data

def create_mapping(round_):
    ct_players = round_['ctSide']
    map_steam_id = {}
    for i, player in enumerate(ct_players['players']):
        map_steam_id[player['steamID']] = f'Player_{i + 1}'

    t_players = round_['tSide']
    for i, player in enumerate(t_players['players']):
        map_steam_id[player['steamID']] = f'Player_{i + 1}'

    return map_steam_id

def get_match_data(data):
    data_list = []
    round_ = data['gameRounds'][1] if data['gameRounds'][0]['ctSide']['players'] is None else data['gameRounds'][0]
    mapping = create_mapping(round_)
    for round_ in data['gameRounds']:
        for frame in round_['frames']:
            if (frame["ct"]["players"] is not None) & (frame["t"]["players"] is not None) & (frame["clockTime"] != "00:00") & (frame["t"]["alivePlayers"] >= 0) & (frame["ct"]["alivePlayers"] >= 1):
                if (len(frame["ct"]["players"]) == 5) & (len(frame["t"]["players"]) == 5):
                    converted_vector = get_frame_data(frame, mapping)
                    converted_vector['roundNum'] = round_['roundNum']
                    converted_vector['winningSide'] = round_['winningSide']
                    data_list.append(converted_vector)
    res = pd.DataFrame(data_list)
    res['matchName'] = data['matchName']
    res['mapName'] = data['mapName']
    res.fillna(method='ffill', inplace=True)
    return res

In [6]:
def process_demo(filename):
    parsed_demo = read_parsed_demo(filename)
    match_df = get_match_data(parsed_demo)
    return match_df

all_matches = []
for f in tqdm(demo_files):
    parsed_demo = read_parsed_demo(f)
    match_df = get_match_data(parsed_demo)
    all_matches.append(match_df)
len(all_matches)

100%|████████████████████████████████████████████████████████████████████████████| 1558/1558 [1:10:26<00:00,  2.71s/it]


1558

In [8]:
states = pd.concat(all_matches)
states.to_parquet("data/states_ESTA_all.parquet")

In [9]:
del all_matches

# Read parquet

In [10]:
states = pd.read_parquet("data/states_ESTA_all.parquet")

In [11]:
states.columns.tolist()

['ctName',
 'ctEqVal',
 'ctAlivePlayers',
 'ctTotalUtility',
 'ctPlayer_1_ID',
 'ctPlayer_1_x',
 'ctPlayer_1_y',
 'ctPlayer_1_z',
 'ctPlayer_1_velocityX',
 'ctPlayer_1_velocityY',
 'ctPlayer_1_velocityZ',
 'ctPlayer_1_viewX',
 'ctPlayer_1_viewY',
 'ctPlayer_1_hp',
 'ctPlayer_1_armor',
 'ctPlayer_1_activeWeapon',
 'ctPlayer_1_isAlive',
 'ctPlayer_1_isBlinded',
 'ctPlayer_1_isAirborne',
 'ctPlayer_1_isDucking',
 'ctPlayer_1_isDuckingInProgress',
 'ctPlayer_1_isUnDuckingInProgress',
 'ctPlayer_1_isDefusing',
 'ctPlayer_1_isPlanting',
 'ctPlayer_1_isReloading',
 'ctPlayer_1_isInBombZone',
 'ctPlayer_1_isInBuyZone',
 'ctPlayer_1_isStanding',
 'ctPlayer_1_isScoped',
 'ctPlayer_1_isWalking',
 'ctPlayer_1_isUnknown',
 'ctPlayer_1_SmokeGrenade',
 'ctPlayer_1_Flashbang',
 'ctPlayer_1_DecoyGrenade',
 'ctPlayer_1_fireGrenades',
 'ctPlayer_1_HEGrenade',
 'ctPlayer_1_secondaryWeapon',
 'ctPlayer_1_mainWeapon',
 'ctPlayer_1_spotters',
 'ctPlayer_1_equipmentValue',
 'ctPlayer_1_equipmentValueFreezetim

In [12]:
def get_cols(cols, team_specific):
    team_cols = [f"{team}Player_{i}_{col}"
                     for team in ['ct', 't']
                         for i in range(1, 6)
                             for col in cols ]
    spec_cols = [f"{team}Player_{i}_{col}"
                    for team, col in team_specific
                        for i in range(1, 6)
                ]
    return team_cols + spec_cols

def get_position_columns():
    players = [f"{team}Player_{i}_{pos}"
        for team in ["ct", "t"]
            for i in range(1, 6)
                for pos in ["x", "y","z"]
    ]
    bomb = [f"bomb_{pos}" for pos in ["x", "y","z"]]
    return players + bomb

pos_cols = get_position_columns()
pos_cols

['ctPlayer_1_x',
 'ctPlayer_1_y',
 'ctPlayer_1_z',
 'ctPlayer_2_x',
 'ctPlayer_2_y',
 'ctPlayer_2_z',
 'ctPlayer_3_x',
 'ctPlayer_3_y',
 'ctPlayer_3_z',
 'ctPlayer_4_x',
 'ctPlayer_4_y',
 'ctPlayer_4_z',
 'ctPlayer_5_x',
 'ctPlayer_5_y',
 'ctPlayer_5_z',
 'tPlayer_1_x',
 'tPlayer_1_y',
 'tPlayer_1_z',
 'tPlayer_2_x',
 'tPlayer_2_y',
 'tPlayer_2_z',
 'tPlayer_3_x',
 'tPlayer_3_y',
 'tPlayer_3_z',
 'tPlayer_4_x',
 'tPlayer_4_y',
 'tPlayer_4_z',
 'tPlayer_5_x',
 'tPlayer_5_y',
 'tPlayer_5_z',
 'bomb_x',
 'bomb_y',
 'bomb_z']

In [13]:
cols = [
    "hp", "armor", "hasHelmet", "DecoyGrenade", 
    "Flashbang", "HEGrenade", "SmokeGrenade", "fireGrenades", 
    "isBlinded"
]
add_col = ["spotters", "activeWeapon", "mainWeapon", "secondaryWeapon", "isAlive",  "ID"]
team_spec = [("ct", "hasDefuse"), ("t", "hasBomb"), ("ct", "isDefusing"), ("t", "isPlanting")]

columns = get_cols(cols + add_col, team_spec)
columns

['ctPlayer_1_hp',
 'ctPlayer_1_armor',
 'ctPlayer_1_hasHelmet',
 'ctPlayer_1_DecoyGrenade',
 'ctPlayer_1_Flashbang',
 'ctPlayer_1_HEGrenade',
 'ctPlayer_1_SmokeGrenade',
 'ctPlayer_1_fireGrenades',
 'ctPlayer_1_isBlinded',
 'ctPlayer_1_spotters',
 'ctPlayer_1_activeWeapon',
 'ctPlayer_1_mainWeapon',
 'ctPlayer_1_secondaryWeapon',
 'ctPlayer_1_isAlive',
 'ctPlayer_1_ID',
 'ctPlayer_2_hp',
 'ctPlayer_2_armor',
 'ctPlayer_2_hasHelmet',
 'ctPlayer_2_DecoyGrenade',
 'ctPlayer_2_Flashbang',
 'ctPlayer_2_HEGrenade',
 'ctPlayer_2_SmokeGrenade',
 'ctPlayer_2_fireGrenades',
 'ctPlayer_2_isBlinded',
 'ctPlayer_2_spotters',
 'ctPlayer_2_activeWeapon',
 'ctPlayer_2_mainWeapon',
 'ctPlayer_2_secondaryWeapon',
 'ctPlayer_2_isAlive',
 'ctPlayer_2_ID',
 'ctPlayer_3_hp',
 'ctPlayer_3_armor',
 'ctPlayer_3_hasHelmet',
 'ctPlayer_3_DecoyGrenade',
 'ctPlayer_3_Flashbang',
 'ctPlayer_3_HEGrenade',
 'ctPlayer_3_SmokeGrenade',
 'ctPlayer_3_fireGrenades',
 'ctPlayer_3_isBlinded',
 'ctPlayer_3_spotters',
 'ctPla

In [14]:
info_col = ['matchName', 'roundNum','seconds','ctAlivePlayers', 'ctEqVal', 'tAlivePlayers', 'tEqVal', 'bombsite', 'mapName']
info_col

['matchName',
 'roundNum',
 'seconds',
 'ctAlivePlayers',
 'ctEqVal',
 'tAlivePlayers',
 'tEqVal',
 'bombsite',
 'mapName']

In [15]:
new_states = states[columns + pos_cols + info_col]
new_states.to_parquet("data/states_ESTA.parquet")