# Importy

In [None]:
from awpy import DemoParser
from awpy.analytics.states import generate_vector_state
from tqdm.notebook import tqdm
import pandas as pd
import patoolib
import os
import shutil

# Funkcje do wyciągania feature'ów

In [None]:
def get_team_data(frame, team):
    team_frame = frame[team]
    team_data = {}
    team_data[team + 'Name'] = team_frame['teamName']
    team_data[team + 'EqVal'] = team_frame['teamEqVal']
    team_data[team + 'AlivePlayers'] = team_frame['alivePlayers']
    team_data[team + 'TotalUtility'] = team_frame['totalUtility']
    for i, player in enumerate(team_frame['players']):
        team_data[f'{team}Player_{i}_steamID'] = player['steamID']
        team_data[f'{team}Player_{i}_name'] = player['name']
        team_data[f'{team}Player_{i}_hp'] = player['hp']
        team_data[f'{team}Player_{i}_armor'] = player['armor']
        team_data[f'{team}Player_{i}_activeWeapon'] = player['activeWeapon']
        team_data[f'{team}Player_{i}_totalUtility'] = player['totalUtility']
        team_data[f'{team}Player_{i}_isBlinded'] = player['isBlinded']
        team_data[f'{team}Player_{i}_isAlive'] = player['isAlive']
        team_data[f'{team}Player_{i}_isReloading'] = player['isReloading']
        team_data[f'{team}Player_{i}_isDucking'] = player['isDucking']
        team_data[f'{team}Player_{i}_isAirborne'] = player['isAirborne']
        team_data[f'{team}Player_{i}_spottedEnemies'] = len(player['spotters'])
        team_data[f'{team}Player_{i}_hasHelmet'] = player['hasHelmet']
        team_data[f'{team}Player_{i}_hasDefuse'] = player['hasDefuse']                                                      
    return team_data

def get_frame_data(frame):
    frame_data = {**get_team_data(frame, 'ct'), **get_team_data(frame, 't')}
    frame_data['bombPlanted'] = frame['bombPlanted']
    frame_data['bombsite'] = frame['bombsite']
    return frame_data

In [None]:
def get_match_data(data, filename):
    data_list = []
    for round_ in data['gameRounds']:
        for frame in round_['frames']:
            converted_vector = get_frame_data(frame)
            converted_vector['roundNum'] = round_['roundNum']
            converted_vector['winningSide'] = round_['winningSide']
            data_list.append(converted_vector)
    res = pd.DataFrame(data_list)
    res['map'] = data['mapName']
    res['match'] = filename.replace('-', ' ').split('_')[0]
    return res

# Konfiguracja folderów

In [None]:
directory = r'Demka' # Katalog z demkami
temp_directory = r'csgo_tmp' # Tymczasowy katalog z przetwarzanym meczem

In [None]:
if os.path.exists(temp_directory):
    shutil.rmtree(temp_directory)

# Przejście po meczach

In [None]:
all_matches = []
for file in tqdm(os.listdir(directory), desc="Match", leave=True, position=0):
    patoolib.extract_archive(os.path.join(directory, file), outdir=temp_directory)
    for demo in tqdm(os.listdir(temp_directory), desc="Map", leave=False):
        demo_parser = DemoParser(
            demofile = os.path.join(temp_directory, demo), 
            demo_id = demo[:-4],
            parse_rate=32, 
            trade_time=5, 
            buy_style="hltv"
        )
        parsed_demo = demo_parser.parse()
        match_df = get_match_data(parsed_demo, file)
        all_matches.append(match_df)
        os.remove(demo[:-4] + ".json")
    shutil.rmtree(temp_directory)

In [None]:
merged_df = pd.concat(all_matches)
merged_df.to_parquet('player_data.parquet.gzip', compression='gzip')

In [None]:
df = pd.read_parquet('player_data.parquet.gzip')
df

# Check danych

In [None]:
df.info()

In [None]:
df.describe().T

In [None]:
df.loc[df.tPlayer_2_steamID.isna()]

Braki danych, bo najpewniej gracze wyszli z serwera po przegranym meczu (przynajmniej u mnie tak było).