# Załadowanie paczek

In [1]:
import pandas as pd
import numpy as np
import json
from awpy.data import PLACE_DIST_MATRIX, NAV
from awpy.analytics.nav import find_closest_area
from sqlalchemy import create_engine
from tqdm import tqdm

# Read parquet

In [2]:
states = pd.read_parquet("data/states.parquet")
states

Unnamed: 0,filename,mapName,ctBuyType,tBuyType,winningSide,matchID,roundNum,seconds,ctAlivePlayers,ctEqVal,...,tPlayer_4_isAlive,tPlayer_4_lastPlaceName,tPlayer_4_ID,tPlayer_5_spotters,tPlayer_5_activeWeapon,tPlayer_5_mainWeapon,tPlayer_5_secondaryWeapon,tPlayer_5_isAlive,tPlayer_5_lastPlaceName,tPlayer_5_ID
0,BLAST-Premier-Spring-Final-2022-ence-vs-big-bo...,de_mirage,Full Eco,Full Eco,CT,1,1,0.148438,5,3700,...,1,TSpawn,6,[],Knife,Glock-18,Glock-18,1,TSpawn,9
1,BLAST-Premier-Spring-Final-2022-ence-vs-big-bo...,de_mirage,Full Eco,Full Eco,CT,1,1,0.398438,5,3700,...,1,TSpawn,6,[],Knife,Glock-18,Glock-18,1,TSpawn,9
2,BLAST-Premier-Spring-Final-2022-ence-vs-big-bo...,de_mirage,Full Eco,Full Eco,CT,1,1,0.648438,5,3700,...,1,TSpawn,6,[],Knife,Glock-18,Glock-18,1,TSpawn,9
3,BLAST-Premier-Spring-Final-2022-ence-vs-big-bo...,de_mirage,Full Eco,Full Eco,CT,1,1,0.898438,5,3700,...,1,TSpawn,6,[],Knife,Glock-18,Glock-18,1,TSpawn,9
4,BLAST-Premier-Spring-Final-2022-ence-vs-big-bo...,de_mirage,Full Eco,Full Eco,CT,1,1,1.148438,5,3700,...,1,TSpawn,6,[],Knife,Glock-18,Glock-18,1,TSpawn,9
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
3186232,PGL-Major-Antwerp-2022-vitality-vs-outsiders-m...,de_mirage,Full Buy,Semi Buy,CT,333,24,19.765625,1,30150,...,0,BombsiteA,180,[],,,,0,BombsiteA,178
3186233,PGL-Major-Antwerp-2022-vitality-vs-outsiders-m...,de_mirage,Full Buy,Semi Buy,CT,333,24,20.015625,1,30150,...,0,BombsiteA,180,[],,,,0,BombsiteA,178
3186234,PGL-Major-Antwerp-2022-vitality-vs-outsiders-m...,de_mirage,Full Buy,Semi Buy,CT,333,24,20.265625,1,30150,...,0,BombsiteA,180,[],,,,0,BombsiteA,178
3186235,PGL-Major-Antwerp-2022-vitality-vs-outsiders-m...,de_mirage,Full Buy,Semi Buy,CT,333,24,20.515625,1,30150,...,0,BombsiteA,180,[],,,,0,BombsiteA,178


In [3]:
states.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 3186237 entries, 0 to 3186236
Columns: 138 entries, filename to tPlayer_5_ID
dtypes: float64(34), int64(48), object(56)
memory usage: 3.3+ GB


## PLACE_DIST

In [4]:
PLACE_DIST_MATRIX.keys()

dict_keys(['de_ancient', 'de_dust2', 'de_inferno', 'de_mirage', 'de_nuke', 'de_overpass', 'de_vertigo'])

In [5]:
PLACE_DIST_MATRIX['de_mirage'].keys()

dict_keys(['CTSpawn', 'BombsiteA', 'TRamp', 'TicketBooth', 'TopofMid', '', 'Shop', 'Apartments', 'BombsiteB', 'PalaceAlley', 'TSpawn', 'Catwalk', 'House', 'SnipersNest', 'Jungle', 'Tunnel', 'BackAlley', 'Middle', 'PalaceInterior', 'TunnelStairs', 'Ladder', 'Stairs', 'SideAlley', 'Scaffolding', 'Truck', 'Connector', 'Balcony'])

# Modyfikacje

In [6]:
db_con = create_engine('mysql+mysqlconnector://root:password@localhost/CSGOAnalysis?allow_local_infile=1')
db_con

Engine(mysql+mysqlconnector://root:***@localhost/CSGOAnalysis?allow_local_infile=1)

## Odległości do teameatów

In [7]:
def get_dist(mapName, team, spots, areAlive, positions):
    new_positions = []
    for lastPlace, position, isAlive in zip(spots, positions, areAlive):
        if isAlive:
            if lastPlace in PLACE_DIST_MATRIX[mapName]:
                newPlace = lastPlace
            else:
                areaID = find_closest_area(mapName, position)['areaId']
                newPlace = NAV[mapName][areaID]["areaName"]
            new_positions.append(newPlace)
  
    if len(new_positions) <= 1:
        return 9000, 9000
    else:
        dist = [
            PLACE_DIST_MATRIX[mapName][position_x][position_y]['geodesic']['median_dist']
                for i, position_x in enumerate(new_positions)
                    for j, position_y in enumerate(new_positions)
                        if i != j
            ]
        dist_arr = np.array(dist)
        dist_arr = dist_arr[np.isfinite(dist_arr)]
        if dist_arr.size == 0:
            return 9000, 9000
        return np.mean(dist_arr), np.min(dist_arr)

dist_parts = []
for team in tqdm(['ct', 't'], leave=True, position=0):
    data = [get_dist(mapName, team, [spot1, spot2, spot3, spot4, spot5],
                       [isAlive2, isAlive3, isAlive4, isAlive5],
                       [[p1_x, p1_y, p1_z], [p2_x, p2_y, p2_z], [p3_x, p3_y, p3_z], [p4_x, p4_y, p4_z], [p5_x, p5_y, p5_z]])
                for mapName, spot1, spot2, spot3, spot4, spot5, isAlive1, isAlive2, isAlive3, isAlive4, isAlive5,
                    p1_x, p1_y, p1_z, p2_x, p2_y, p2_z, p3_x, p3_y, p3_z, p4_x, p4_y, p4_z, p5_x, p5_y, p5_z in zip(
                                   states[f'mapName'],
                                   states[f'{team}Player_1_lastPlaceName'],
                                   states[f'{team}Player_2_lastPlaceName'],
                                   states[f'{team}Player_3_lastPlaceName'],
                                   states[f'{team}Player_4_lastPlaceName'],
                                   states[f'{team}Player_5_lastPlaceName'],
                                   states[f'{team}Player_1_isAlive'],
                                   states[f'{team}Player_2_isAlive'],
                                   states[f'{team}Player_3_isAlive'],
                                   states[f'{team}Player_4_isAlive'],
                                   states[f'{team}Player_5_isAlive'],
                                   states[f'{team}Player_1_x'],
                                   states[f'{team}Player_1_y'],
                                   states[f'{team}Player_1_z'],
                                   states[f'{team}Player_2_x'],
                                   states[f'{team}Player_2_y'],
                                   states[f'{team}Player_2_z'],
                                   states[f'{team}Player_3_x'],
                                   states[f'{team}Player_3_y'],
                                   states[f'{team}Player_3_z'],
                                   states[f'{team}Player_4_x'],
                                   states[f'{team}Player_4_y'],
                                   states[f'{team}Player_4_z'],
                                   states[f'{team}Player_5_x'],
                                   states[f'{team}Player_5_y'],
                                   states[f'{team}Player_5_z'])] 
    part = pd.DataFrame(data, columns=[f'{team}_meanDist', f'{team}_minDist'])
    dist_parts.append(part)

100%|███████████████████████████████████████████████████████████████████████████████████| 2/2 [06:48<00:00, 204.23s/it]


In [8]:
dist_parts[0]

Unnamed: 0,ct_meanDist,ct_minDist
0,495.2346,495.2346
1,495.2346,495.2346
2,495.2346,495.2346
3,495.2346,495.2346
4,495.2346,495.2346
...,...,...
3186232,9000.0000,9000.0000
3186233,9000.0000,9000.0000
3186234,9000.0000,9000.0000
3186235,9000.0000,9000.0000


## Kille

In [9]:
with db_con.connect() as connection:
    kills = pd.read_sql("""
    SELECT k.attackerID, matchID, roundNum, sum(kills) over(partition by k.attackerID, matchID order by roundNum) AS growing_kills
    FROM (
        SELECT attackerID, matchID, roundNum, count(*) as kills
        FROM csgoanalysis.elimination
        WHERE attackerID is NOT NULL
        GROUP BY attackerID, matchID, roundNum
    ) k""", connection)
kills

Unnamed: 0,attackerID,matchID,roundNum,growing_kills
0,1,1,1,1.0
1,1,1,2,3.0
2,1,1,3,4.0
3,1,1,5,5.0
4,1,1,7,6.0
...,...,...,...,...
39196,232,316,1,1.0
39197,232,316,5,2.0
39198,232,316,10,3.0
39199,232,316,17,4.0


In [10]:
kills["roundNum"] = kills["roundNum"] + 1
kills

Unnamed: 0,attackerID,matchID,roundNum,growing_kills
0,1,1,2,1.0
1,1,1,3,3.0
2,1,1,4,4.0
3,1,1,6,5.0
4,1,1,8,6.0
...,...,...,...,...
39196,232,316,2,1.0
39197,232,316,6,2.0
39198,232,316,11,3.0
39199,232,316,18,4.0


In [11]:
for team in tqdm(['ct', 't']):
    for i in range(1, 6):
        states = states.merge(kills, how='left', left_on=[f'{team}Player_{i}_ID', 'matchID', 'roundNum'], right_on=['attackerID', 'matchID', 'roundNum'],
                             suffixes=(None, f"_{team}_{i}"))
states

100%|████████████████████████████████████████████████████████████████████████████████████| 2/2 [01:38<00:00, 49.44s/it]


Unnamed: 0,filename,mapName,ctBuyType,tBuyType,winningSide,matchID,roundNum,seconds,ctAlivePlayers,ctEqVal,...,attackerID_t_1,growing_kills_t_1,attackerID_t_2,growing_kills_t_2,attackerID_t_3,growing_kills_t_3,attackerID_t_4,growing_kills_t_4,attackerID_t_5,growing_kills_t_5
0,BLAST-Premier-Spring-Final-2022-ence-vs-big-bo...,de_mirage,Full Eco,Full Eco,CT,1,1,0.148438,5,3700,...,,,,,,,,,,
1,BLAST-Premier-Spring-Final-2022-ence-vs-big-bo...,de_mirage,Full Eco,Full Eco,CT,1,1,0.398438,5,3700,...,,,,,,,,,,
2,BLAST-Premier-Spring-Final-2022-ence-vs-big-bo...,de_mirage,Full Eco,Full Eco,CT,1,1,0.648438,5,3700,...,,,,,,,,,,
3,BLAST-Premier-Spring-Final-2022-ence-vs-big-bo...,de_mirage,Full Eco,Full Eco,CT,1,1,0.898438,5,3700,...,,,,,,,,,,
4,BLAST-Premier-Spring-Final-2022-ence-vs-big-bo...,de_mirage,Full Eco,Full Eco,CT,1,1,1.148438,5,3700,...,,,,,,,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
3186232,PGL-Major-Antwerp-2022-vitality-vs-outsiders-m...,de_mirage,Full Buy,Semi Buy,CT,333,24,19.765625,1,30150,...,182.0,13.0,,,,,,,,
3186233,PGL-Major-Antwerp-2022-vitality-vs-outsiders-m...,de_mirage,Full Buy,Semi Buy,CT,333,24,20.015625,1,30150,...,182.0,13.0,,,,,,,,
3186234,PGL-Major-Antwerp-2022-vitality-vs-outsiders-m...,de_mirage,Full Buy,Semi Buy,CT,333,24,20.265625,1,30150,...,182.0,13.0,,,,,,,,
3186235,PGL-Major-Antwerp-2022-vitality-vs-outsiders-m...,de_mirage,Full Buy,Semi Buy,CT,333,24,20.515625,1,30150,...,182.0,13.0,,,,,,,,


In [12]:
states.loc[~states.growing_kills.isna(), 'growing_kills']

294         2.0
295         2.0
296         2.0
297         2.0
298         2.0
           ... 
3186232    16.0
3186233    16.0
3186234    16.0
3186235    16.0
3186236    16.0
Name: growing_kills, Length: 1496006, dtype: float64

In [13]:
states = states.rename(columns={'growing_kills': 'growing_kills_ct_1'})

In [14]:
states.loc[states.roundNum == 1, [f'growing_kills_{team}_{i}' for team in ['ct', 't'] for i in range(1, 6)]] = 0

In [15]:
states = states.drop(columns=states.columns[states.columns.str.startswith('attackerID')])
states

Unnamed: 0,filename,mapName,ctBuyType,tBuyType,winningSide,matchID,roundNum,seconds,ctAlivePlayers,ctEqVal,...,growing_kills_ct_1,growing_kills_ct_2,growing_kills_ct_3,growing_kills_ct_4,growing_kills_ct_5,growing_kills_t_1,growing_kills_t_2,growing_kills_t_3,growing_kills_t_4,growing_kills_t_5
0,BLAST-Premier-Spring-Final-2022-ence-vs-big-bo...,de_mirage,Full Eco,Full Eco,CT,1,1,0.148438,5,3700,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
1,BLAST-Premier-Spring-Final-2022-ence-vs-big-bo...,de_mirage,Full Eco,Full Eco,CT,1,1,0.398438,5,3700,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2,BLAST-Premier-Spring-Final-2022-ence-vs-big-bo...,de_mirage,Full Eco,Full Eco,CT,1,1,0.648438,5,3700,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
3,BLAST-Premier-Spring-Final-2022-ence-vs-big-bo...,de_mirage,Full Eco,Full Eco,CT,1,1,0.898438,5,3700,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
4,BLAST-Premier-Spring-Final-2022-ence-vs-big-bo...,de_mirage,Full Eco,Full Eco,CT,1,1,1.148438,5,3700,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
3186232,PGL-Major-Antwerp-2022-vitality-vs-outsiders-m...,de_mirage,Full Buy,Semi Buy,CT,333,24,19.765625,1,30150,...,16.0,21.0,23.0,,,13.0,,,,
3186233,PGL-Major-Antwerp-2022-vitality-vs-outsiders-m...,de_mirage,Full Buy,Semi Buy,CT,333,24,20.015625,1,30150,...,16.0,21.0,23.0,,,13.0,,,,
3186234,PGL-Major-Antwerp-2022-vitality-vs-outsiders-m...,de_mirage,Full Buy,Semi Buy,CT,333,24,20.265625,1,30150,...,16.0,21.0,23.0,,,13.0,,,,
3186235,PGL-Major-Antwerp-2022-vitality-vs-outsiders-m...,de_mirage,Full Buy,Semi Buy,CT,333,24,20.515625,1,30150,...,16.0,21.0,23.0,,,13.0,,,,


In [16]:
for team in ['ct', 't']: 
    for i in range(1, 6):
        states[f'growing_kills_{team}_{i}'] = states[f'growing_kills_{team}_{i}'].fillna(method='ffill')

## Aktualny wynik

In [17]:
with db_con.connect() as connection:
    rounds = pd.read_sql("""
    SELECT matchID, roundNum, tScore, ctScore
    FROM csgoanalysis.round""", connection)
rounds

Unnamed: 0,matchID,roundNum,tScore,ctScore
0,1,1,0,0
1,1,2,0,1
2,1,3,1,1
3,1,4,1,2
4,1,5,1,3
...,...,...,...,...
8798,333,20,7,12
8799,333,21,7,13
8800,333,22,7,14
8801,333,23,8,14


In [18]:
states = states.merge(rounds, left_on=['matchID', 'roundNum'], right_on=['matchID', 'roundNum'])
states

Unnamed: 0,filename,mapName,ctBuyType,tBuyType,winningSide,matchID,roundNum,seconds,ctAlivePlayers,ctEqVal,...,growing_kills_ct_3,growing_kills_ct_4,growing_kills_ct_5,growing_kills_t_1,growing_kills_t_2,growing_kills_t_3,growing_kills_t_4,growing_kills_t_5,tScore,ctScore
0,BLAST-Premier-Spring-Final-2022-ence-vs-big-bo...,de_mirage,Full Eco,Full Eco,CT,1,1,0.148438,5,3700,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0,0
1,BLAST-Premier-Spring-Final-2022-ence-vs-big-bo...,de_mirage,Full Eco,Full Eco,CT,1,1,0.398438,5,3700,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0,0
2,BLAST-Premier-Spring-Final-2022-ence-vs-big-bo...,de_mirage,Full Eco,Full Eco,CT,1,1,0.648438,5,3700,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0,0
3,BLAST-Premier-Spring-Final-2022-ence-vs-big-bo...,de_mirage,Full Eco,Full Eco,CT,1,1,0.898438,5,3700,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0,0
4,BLAST-Premier-Spring-Final-2022-ence-vs-big-bo...,de_mirage,Full Eco,Full Eco,CT,1,1,1.148438,5,3700,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
3186232,PGL-Major-Antwerp-2022-vitality-vs-outsiders-m...,de_mirage,Full Buy,Semi Buy,CT,333,24,19.765625,1,30150,...,23.0,11.0,15.0,13.0,16.0,16.0,12.0,9.0,8,15
3186233,PGL-Major-Antwerp-2022-vitality-vs-outsiders-m...,de_mirage,Full Buy,Semi Buy,CT,333,24,20.015625,1,30150,...,23.0,11.0,15.0,13.0,16.0,16.0,12.0,9.0,8,15
3186234,PGL-Major-Antwerp-2022-vitality-vs-outsiders-m...,de_mirage,Full Buy,Semi Buy,CT,333,24,20.265625,1,30150,...,23.0,11.0,15.0,13.0,16.0,16.0,12.0,9.0,8,15
3186235,PGL-Major-Antwerp-2022-vitality-vs-outsiders-m...,de_mirage,Full Buy,Semi Buy,CT,333,24,20.515625,1,30150,...,23.0,11.0,15.0,13.0,16.0,16.0,12.0,9.0,8,15


## Przejęcie mapy

In [19]:
def get_place(isAlive, mapName, lastPlace, position):
    if isAlive:
        if lastPlace in PLACE_DIST_MATRIX[mapName]:
            newPlace = lastPlace
        else:
            areaID = find_closest_area(mapName, position)['areaId']
            newPlace = NAV[mapName][areaID]["areaName"]
    else:
        newPlace = None
    return newPlace

In [20]:
data = []
for mapName, roundNum, lastRoundNum, \
    ct_spot_1, ct_x_1, ct_y_1, ct_z_1, ct_alive_1, \
    ct_spot_2, ct_x_2, ct_y_2, ct_z_2, ct_alive_2, \
    ct_spot_3, ct_x_3, ct_y_3, ct_z_3, ct_alive_3, \
    ct_spot_4, ct_x_4, ct_y_4, ct_z_4, ct_alive_4, \
    ct_spot_5, ct_x_5, ct_y_5, ct_z_5, ct_alive_5, \
    t_spot_1, t_x_1, t_y_1, t_z_1, t_alive_1, \
    t_spot_2, t_x_2, t_y_2, t_z_2, t_alive_2, \
    t_spot_3, t_x_3, t_y_3, t_z_3, t_alive_3, \
    t_spot_4, t_x_4, t_y_4, t_z_4, t_alive_4, \
    t_spot_5, t_x_5, t_y_5, t_z_5, t_alive_5 in tqdm(zip(states['mapName'], states['roundNum'], states['roundNum'].shift(fill_value=0),
        states['ctPlayer_1_lastPlaceName'], states['ctPlayer_1_x'], states['ctPlayer_1_y'], states['ctPlayer_1_z'], states['ctPlayer_1_isAlive'],
        states['ctPlayer_2_lastPlaceName'], states['ctPlayer_2_x'], states['ctPlayer_2_y'], states['ctPlayer_2_z'], states['ctPlayer_2_isAlive'],
        states['ctPlayer_3_lastPlaceName'], states['ctPlayer_3_x'], states['ctPlayer_3_y'], states['ctPlayer_3_z'], states['ctPlayer_3_isAlive'],
        states['ctPlayer_4_lastPlaceName'], states['ctPlayer_4_x'], states['ctPlayer_4_y'], states['ctPlayer_4_z'], states['ctPlayer_4_isAlive'],
        states['ctPlayer_5_lastPlaceName'], states['ctPlayer_5_x'], states['ctPlayer_5_y'], states['ctPlayer_5_z'], states['ctPlayer_5_isAlive'],
        states['tPlayer_1_lastPlaceName'], states['tPlayer_1_x'], states['tPlayer_1_y'], states['tPlayer_1_z'], states['tPlayer_1_isAlive'],
        states['tPlayer_2_lastPlaceName'], states['tPlayer_2_x'], states['tPlayer_2_y'], states['tPlayer_2_z'], states['tPlayer_2_isAlive'],
        states['tPlayer_3_lastPlaceName'], states['tPlayer_3_x'], states['tPlayer_3_y'], states['tPlayer_3_z'], states['tPlayer_3_isAlive'],
        states['tPlayer_4_lastPlaceName'], states['tPlayer_4_x'], states['tPlayer_4_y'], states['tPlayer_4_z'], states['tPlayer_4_isAlive'],
        states['tPlayer_5_lastPlaceName'], states['tPlayer_5_x'], states['tPlayer_5_y'], states['tPlayer_5_z'], states['tPlayer_5_isAlive']), 
                                                     total=len(states.index)):
    if lastRoundNum != roundNum:
        ct_areas = set()
        t_areas = set()
        
    curr_ct_areas = {
        get_place(ct_alive_1, mapName, ct_spot_1, [ct_x_1, ct_y_1, ct_z_1]),
        get_place(ct_alive_2, mapName, ct_spot_2, [ct_x_2, ct_y_2, ct_z_2]),
        get_place(ct_alive_3, mapName, ct_spot_3, [ct_x_3, ct_y_3, ct_z_3]),
        get_place(ct_alive_4, mapName, ct_spot_4, [ct_x_4, ct_y_4, ct_z_4]),
        get_place(ct_alive_5, mapName, ct_spot_5, [ct_x_5, ct_y_5, ct_z_5])
    }
    curr_t_areas = {
        get_place(t_alive_1, mapName, t_spot_1, [t_x_1, t_y_1, t_z_1]),
        get_place(t_alive_2, mapName, t_spot_2, [t_x_2, t_y_2, t_z_2]),
        get_place(t_alive_3, mapName, t_spot_3, [t_x_3, t_y_3, t_z_3]),
        get_place(t_alive_4, mapName, t_spot_4, [t_x_4, t_y_4, t_z_4]),
        get_place(t_alive_5, mapName, t_spot_5, [t_x_5, t_y_5, t_z_5])
    }

    for area in curr_ct_areas:
        if area not in curr_t_areas:
            ct_areas.add(area)
        if area in t_areas:
            t_areas.remove(area)

    for area in curr_t_areas:
        if area not in curr_ct_areas:
            t_areas.add(area)
        if area in ct_areas:
            ct_areas.remove(area)
    
    ct_areas.discard(None)
    t_areas.discard(None)
    ct_percentage = len(ct_areas)/len(PLACE_DIST_MATRIX[mapName].keys())
    t_percentage = len(t_areas)/len(PLACE_DIST_MATRIX[mapName].keys()) 
    data.append((ct_percentage, t_percentage))
    
conquer = pd.DataFrame.from_records(data, columns=['ct_conquerMap', 't_conquerMap'])

100%|█████████████████████████████████████████████████████████████████████| 3186237/3186237 [05:15<00:00, 10095.70it/s]


In [21]:
conquer

Unnamed: 0,ct_conquerMap,t_conquerMap
0,0.037037,0.037037
1,0.037037,0.037037
2,0.037037,0.037037
3,0.037037,0.037037
4,0.037037,0.037037
...,...,...
3186232,0.333333,0.259259
3186233,0.333333,0.259259
3186234,0.333333,0.259259
3186235,0.333333,0.259259


## Zgrupowanie graczy

In [22]:
def get_groupped_players(mapName, spots, alive, positions):
    counter = {}
    for spot, isAlive, position in zip(spots, alive, positions):
        if isAlive:
            if spot in PLACE_DIST_MATRIX[mapName]:
                newPlace = spot
            else:
                areaID = find_closest_area(mapName, position)['areaId']
                newPlace = NAV[mapName][areaID]["areaName"]
            counter[newPlace] = counter.get(newPlace, 0) + 1
    maximum = 0 if len(counter.values()) == 0 else max(counter.values())
    return maximum

In [23]:
groupped_parts = []
for team in tqdm(['ct', 't'], leave=True, position=0):
    data = [ 
        get_groupped_players(mapName=mapName,
                            spots=[spot1, spot2, spot3, spot4, spot5], 
                            alive=[isAlive1, isAlive2, isAlive3, isAlive4, isAlive5],
                            positions=[[p1_x, p1_y, p1_z], [p2_x, p2_y, p2_z], [p3_x, p3_y, p3_z], [p4_x, p4_y, p4_z], [p5_x, p5_y, p5_z]])
                for mapName, spot1, spot2, spot3, spot4, spot5, isAlive1, isAlive2, isAlive3, isAlive4, isAlive5,
                    p1_x, p1_y, p1_z, p2_x, p2_y, p2_z, p3_x, p3_y, p3_z, p4_x, p4_y, p4_z, p5_x, p5_y, p5_z in 
                    zip(
                       states[f'mapName'],
                       states[f'{team}Player_1_lastPlaceName'],
                       states[f'{team}Player_2_lastPlaceName'],
                       states[f'{team}Player_3_lastPlaceName'],
                       states[f'{team}Player_4_lastPlaceName'],
                       states[f'{team}Player_5_lastPlaceName'],
                       states[f'{team}Player_1_isAlive'],
                       states[f'{team}Player_2_isAlive'],
                       states[f'{team}Player_3_isAlive'],
                       states[f'{team}Player_4_isAlive'],
                       states[f'{team}Player_5_isAlive'],
                       states[f'{team}Player_1_x'],
                       states[f'{team}Player_1_y'],
                       states[f'{team}Player_1_z'],
                       states[f'{team}Player_2_x'],
                       states[f'{team}Player_2_y'],
                       states[f'{team}Player_2_z'],
                       states[f'{team}Player_3_x'],
                       states[f'{team}Player_3_y'],
                       states[f'{team}Player_3_z'],
                       states[f'{team}Player_4_x'],
                       states[f'{team}Player_4_y'],
                       states[f'{team}Player_4_z'],
                       states[f'{team}Player_5_x'],
                       states[f'{team}Player_5_y'],
                       states[f'{team}Player_5_z'])
        ]
    part = pd.DataFrame(data, columns=[f'{team}_grouppedPlayers'])
    groupped_parts.append(part)

100%|███████████████████████████████████████████████████████████████████████████████████| 2/2 [04:59<00:00, 149.99s/it]


## Odległości do bombsite'u

In [24]:
def get_dist(isAlive, mapName, lastPlace, position):
    if isAlive:
        if lastPlace in PLACE_DIST_MATRIX[mapName]:
            newPlace = lastPlace
        else:
            areaID = find_closest_area(mapName, position)['areaId']
            newPlace = NAV[mapName][areaID]["areaName"]
        distToA = PLACE_DIST_MATRIX[mapName][newPlace]['BombsiteA']['geodesic']['median_dist']
        distToB = PLACE_DIST_MATRIX[mapName][newPlace]['BombsiteB']['geodesic']['median_dist']
    else:
        distToA = None
        distToB = None
    return distToA, distToB

In [25]:
all_parts = []
for team in tqdm(['ct', 't'], leave=True, position=0):
    for i in tqdm(range(1, 6), leave=True, position=1):
        data = [ 
            get_dist(isAlive, mapName, lastPlace, [x, y, z])
                for isAlive, mapName, lastPlace, x, y, z in 
                    zip(states[f'{team}Player_{i}_isAlive'],
                        states[f'mapName'], 
                        states[f'{team}Player_{i}_lastPlaceName'],
                        states[f'{team}Player_{i}_x'],
                        states[f'{team}Player_{i}_y'],
                        states[f'{team}Player_{i}_z'])
        ]
        part = pd.DataFrame.from_records(data, columns=[f'{team}Player_{i}_distToA', f'{team}Player_{i}_distToB'])
        all_parts.append(part)

  0%|                                                                                            | 0/2 [00:00<?, ?it/s]
  0%|                                                                                            | 0/5 [00:00<?, ?it/s][A
 20%|████████████████▊                                                                   | 1/5 [00:37<02:28, 37.11s/it][A
 40%|█████████████████████████████████▌                                                  | 2/5 [01:12<01:47, 35.94s/it][A
 60%|██████████████████████████████████████████████████▍                                 | 3/5 [01:48<01:12, 36.19s/it][A
 80%|███████████████████████████████████████████████████████████████████▏                | 4/5 [02:25<00:36, 36.39s/it][A
100%|████████████████████████████████████████████████████████████████████████████████████| 5/5 [02:58<00:00, 35.67s/it][A
 50%|█████████████████████████████████████████▌                                         | 1/2 [02:58<02:58, 178.34s/it]
  0%|                 

## Widziani gracze

In [26]:
def get_spotted_players(*args):
    spotted = set()
    for arg in args:
        spotted.update(json.loads(arg))
    return len(spotted)

In [27]:
spotters_parts = []
for team in tqdm(['ct', 't'], leave=True, position=0):
    data = [ 
        get_spotted_players(spot1, spot2, spot3, spot4, spot5)
                for spot1, spot2, spot3, spot4, spot5 in 
                    zip(states[f'{team}Player_1_spotters'],
                       states[f'{team}Player_2_spotters'],
                       states[f'{team}Player_3_spotters'],
                       states[f'{team}Player_4_spotters'],
                       states[f'{team}Player_5_spotters'])
        ]
    part = pd.DataFrame(data, columns=[f'{team}_spottedPlayers'])
    spotters_parts.append(part)

100%|████████████████████████████████████████████████████████████████████████████████████| 2/2 [01:29<00:00, 44.94s/it]


# Łączenie i generowanie

In [28]:
states.drop(columns=states.columns[states.columns.str.endswith(("_x", "_y", "_z", "_ID", "_lastPlaceName", "_spotters"))], inplace=True)
states = pd.concat([states, *dist_parts, *spotters_parts, *all_parts, *groupped_parts, conquer], axis=1)
states.columns.values

array(['filename', 'mapName', 'ctBuyType', 'tBuyType', 'winningSide',
       'matchID', 'roundNum', 'seconds', 'ctAlivePlayers', 'ctEqVal',
       'tAlivePlayers', 'tEqVal', 'bombsite', 'ct_hp', 't_hp', 'ct_armor',
       't_armor', 'ct_hasHelmet', 't_hasHelmet', 'ct_DecoyGrenade',
       't_DecoyGrenade', 'ct_Flashbang', 't_Flashbang', 'ct_HEGrenade',
       't_HEGrenade', 'ct_SmokeGrenade', 't_SmokeGrenade',
       'ct_fireGrenades', 't_fireGrenades', 'ct_isBlinded', 't_isBlinded',
       'ct_hasDefuse', 't_hasBomb', 'ct_isDefusing', 't_isPlanting',
       'ctPlayer_1_activeWeapon', 'ctPlayer_1_mainWeapon',
       'ctPlayer_1_secondaryWeapon', 'ctPlayer_1_isAlive',
       'ctPlayer_2_activeWeapon', 'ctPlayer_2_mainWeapon',
       'ctPlayer_2_secondaryWeapon', 'ctPlayer_2_isAlive',
       'ctPlayer_3_activeWeapon', 'ctPlayer_3_mainWeapon',
       'ctPlayer_3_secondaryWeapon', 'ctPlayer_3_isAlive',
       'ctPlayer_4_activeWeapon', 'ctPlayer_4_mainWeapon',
       'ctPlayer_4_secondaryW

In [29]:
def prepare_features(unprep_df):
    pistols = {'Glock-18', 'USP-S', 'P2000', 'P250', 'Dual Berettas'}
    enhanced_pistols = {'CZ75 Auto', 'Five-SeveN', 'Tec-9', 'R8 Revolver'}
    deagle = 'Desert Eagle'
    shotguns = {'MAG-7', 'XM1014', 'Nova', 'Sawed-Off'}
    machine_guns = {'M249', 'Negev'}
    smgs = {'MP9', 'MP7', 'MP5-SD', 'MAC-10', 'UMP-45', 'PP-Bizon', 'P90'}
    weaker_rifles = {'Galil AR', 'SSG 08', 'FAMAS'}
    lunet_rifles = {'SG 553', 'AUG'}
    sniper_rifle = {'G3SG1', 'SCAR-20', 'AWP'}
    assault_rifle = {'M4A1', 'M4A4', 'AK-47'}
    others = {'Zeus x27', 'Knife', 'C4', 'Molotov', 'Incendiary Grenade', 
              'Smoke Grenade', 'Flashbang', 'Decoy Grenade', 'HE Grenade'}
    others.update(shotguns, machine_guns)
    
    def convert_weapons(col):
        for team in ['ct', 't']:
            unprep_df[f"{team}_{col}_Pistol"] = 0
            unprep_df[f"{team}_{col}_EnhancedPistols"] = 0
            unprep_df[f"{team}_{col}_Deagle"] = 0
            unprep_df[f"{team}_{col}_SMG"] = 0
            unprep_df[f"{team}_{col}_WeakAssaultRifle"] = 0
            unprep_df[f"{team}_{col}_LunetRifle"] = 0
            unprep_df[f"{team}_{col}_SniperRifle"] = 0
            unprep_df[f"{team}_{col}_AssaultRifle"] = 0
            for i in range(1, 6):
                unprep_df[f"{team}_{col}_Pistol"] += unprep_df[f"{team}Player_{i}_{col}"].isin(pistols).astype(int)
                unprep_df[f"{team}_{col}_EnhancedPistols"] = unprep_df[f"{team}Player_{i}_{col}"].isin(enhanced_pistols).astype(int)
                unprep_df[f"{team}_{col}_Deagle"] += (unprep_df[f"{team}Player_{i}_{col}"] == deagle).astype(int)
                unprep_df[f"{team}_{col}_SMG"] += unprep_df[f"{team}Player_{i}_{col}"].isin(smgs).astype(int)
                unprep_df[f"{team}_{col}_WeakAssaultRifle"] += unprep_df[f"{team}Player_{i}_{col}"].isin(weaker_rifles).astype(int)
                unprep_df[f"{team}_{col}_LunetRifle"] += unprep_df[f"{team}Player_{i}_{col}"].isin(lunet_rifles).astype(int)
                unprep_df[f"{team}_{col}_SniperRifle"] += unprep_df[f"{team}Player_{i}_{col}"].isin(sniper_rifle).astype(int)
                unprep_df[f"{team}_{col}_AssaultRifle"] += unprep_df[f"{team}Player_{i}_{col}"].isin(assault_rifle).astype(int)
                                
    def convert_active_weapons(col):
        for team in ['ct', 't']:
            unprep_df[f"{team}_{col}_Pistol"] = 0
            unprep_df[f"{team}_{col}_EnhancedPistols"] = 0
            unprep_df[f"{team}_{col}_Deagle"] = 0
            unprep_df[f"{team}_{col}_SMG"] = 0
            unprep_df[f"{team}_{col}_WeakAssaultRifle"] = 0
            unprep_df[f"{team}_{col}_LunetRifle"] = 0
            unprep_df[f"{team}_{col}_SniperRifle"] = 0
            unprep_df[f"{team}_{col}_AssaultRifle"] = 0
            unprep_df[f"{team}_{col}_Others"] = 0
            for i in range(1, 6):
                unprep_df[f"{team}_{col}_Pistol"] += unprep_df[f"{team}Player_{i}_{col}"].isin(pistols).astype(int)
                unprep_df[f"{team}_{col}_EnhancedPistols"] = unprep_df[f"{team}Player_{i}_{col}"].isin(enhanced_pistols).astype(int)
                unprep_df[f"{team}_{col}_Deagle"] += (unprep_df[f"{team}Player_{i}_{col}"] == deagle).astype(int)
                unprep_df[f"{team}_{col}_SMG"] += unprep_df[f"{team}Player_{i}_{col}"].isin(smgs).astype(int)
                unprep_df[f"{team}_{col}_WeakAssaultRifle"] += unprep_df[f"{team}Player_{i}_{col}"].isin(weaker_rifles).astype(int)
                unprep_df[f"{team}_{col}_LunetRifle"] += unprep_df[f"{team}Player_{i}_{col}"].isin(lunet_rifles).astype(int)
                unprep_df[f"{team}_{col}_SniperRifle"] += unprep_df[f"{team}Player_{i}_{col}"].isin(sniper_rifle).astype(int)
                unprep_df[f"{team}_{col}_AssaultRifle"] += unprep_df[f"{team}Player_{i}_{col}"].isin(assault_rifle).astype(int)
                unprep_df[f"{team}_{col}_Others"] += unprep_df[f"{team}Player_{i}_{col}"].isin(others).astype(int)
                
    def convert_sec_weapons(col):
        for team in ['ct', 't']:
            unprep_df[f"{team}_{col}_Pistol"] = 0
            unprep_df[f"{team}_{col}_EnhancedPistols"] = 0
            unprep_df[f"{team}_{col}_Deagle"] = 0
            for i in range(1, 6):
                unprep_df[f"{team}_{col}_Pistol"] += unprep_df[f"{team}Player_{i}_{col}"].isin(pistols).astype(int)
                unprep_df[f"{team}_{col}_EnhancedPistols"] = unprep_df[f"{team}Player_{i}_{col}"].isin(enhanced_pistols).astype(int)
                unprep_df[f"{team}_{col}_Deagle"] += (unprep_df[f"{team}Player_{i}_{col}"] == deagle).astype(int)


    convert_active_weapons('activeWeapon')
    convert_weapons('mainWeapon')
    convert_sec_weapons('secondaryWeapon')
    
    unprep_df['winningSideCT'] = np.where(unprep_df['winningSide'] == 'CT', 1, 0)
    unprep_df['bombsite'] = np.where(unprep_df['bombsite'] == '', 'not_planted', unprep_df['bombsite'])
    unprep_df['ctMinDistToA'] = unprep_df[["ctPlayer_1_distToA", "ctPlayer_2_distToA", "ctPlayer_3_distToA", "ctPlayer_4_distToA", "ctPlayer_5_distToA"]].min(axis=1)
    unprep_df['ctMinDistToB'] = unprep_df[["ctPlayer_1_distToB", "ctPlayer_2_distToB", "ctPlayer_3_distToB", "ctPlayer_4_distToB", "ctPlayer_5_distToB"]].min(axis=1)
    unprep_df['tMinDistToA'] = unprep_df[["tPlayer_1_distToA", "tPlayer_2_distToA", "tPlayer_3_distToA", "tPlayer_4_distToA", "tPlayer_5_distToA"]].min(axis=1)
    unprep_df['tMinDistToB'] = unprep_df[["tPlayer_1_distToB", "tPlayer_2_distToB", "tPlayer_3_distToB", "tPlayer_4_distToB", "tPlayer_5_distToB"]].min(axis=1)
    unprep_df['ctMeanDistToA'] = unprep_df[["ctPlayer_1_distToA", "ctPlayer_2_distToA", "ctPlayer_3_distToA", "ctPlayer_4_distToA", "ctPlayer_5_distToA"]].mean(axis=1)
    unprep_df['ctMeanDistToB'] = unprep_df[["ctPlayer_1_distToB", "ctPlayer_2_distToB", "ctPlayer_3_distToB", "ctPlayer_4_distToB", "ctPlayer_5_distToB"]].mean(axis=1)
    unprep_df['tMeanDistToA'] = unprep_df[["tPlayer_1_distToA", "tPlayer_2_distToA", "tPlayer_3_distToA", "tPlayer_4_distToA", "tPlayer_5_distToA"]].mean(axis=1)
    unprep_df['tMeanDistToB'] = unprep_df[["tPlayer_1_distToB", "tPlayer_2_distToB", "tPlayer_3_distToB", "tPlayer_4_distToB", "tPlayer_5_distToB"]].mean(axis=1)
    unprep_df['ctMeanKills'] = (unprep_df['growing_kills_ct_1'] * unprep_df['ctPlayer_1_isAlive'] + unprep_df['growing_kills_ct_2'] * unprep_df['ctPlayer_2_isAlive'] + 
            unprep_df['growing_kills_ct_3'] * unprep_df['ctPlayer_3_isAlive'] + unprep_df['growing_kills_ct_4'] * unprep_df['ctPlayer_4_isAlive'] + 
            unprep_df['growing_kills_ct_5'] * unprep_df['ctPlayer_5_isAlive']) / unprep_df[[f'ctPlayer_{i}_isAlive' for i in range(1, 6)]].sum(axis=1)
    unprep_df['tMeanKills'] = (unprep_df['growing_kills_t_1'] * unprep_df['tPlayer_1_isAlive'] + unprep_df['growing_kills_t_2'] * unprep_df['tPlayer_2_isAlive'] + 
            unprep_df['growing_kills_t_3'] * unprep_df['tPlayer_3_isAlive'] + unprep_df['growing_kills_t_4'] * unprep_df['tPlayer_4_isAlive'] + 
            unprep_df['growing_kills_t_5'] * unprep_df['tPlayer_5_isAlive']) / unprep_df[[f'tPlayer_{i}_isAlive' for i in range(1, 6)]].sum(axis=1)
    unprep_df['tMeanKills'] = unprep_df['tMeanKills'].fillna(-1)
    unprep_df = pd.concat([unprep_df, pd.get_dummies(unprep_df['mapName']), pd.get_dummies(unprep_df['bombsite'], prefix='bombsite')], axis=1)
    unprep_df['tMinDistToA'] = unprep_df['tMinDistToA'].fillna(9000)
    unprep_df['tMinDistToB'] = unprep_df['tMinDistToB'].fillna(9000)
    unprep_df['tMeanDistToA'] = unprep_df['tMeanDistToA'].fillna(9000)
    unprep_df['tMeanDistToB'] = unprep_df['tMeanDistToB'].fillna(9000)
    return unprep_df

In [30]:
states = prepare_features(states)
states

Unnamed: 0,filename,mapName,ctBuyType,tBuyType,winningSide,matchID,roundNum,seconds,ctAlivePlayers,ctEqVal,...,de_ancient,de_dust2,de_inferno,de_mirage,de_nuke,de_overpass,de_vertigo,bombsite_A,bombsite_B,bombsite_not_planted
0,BLAST-Premier-Spring-Final-2022-ence-vs-big-bo...,de_mirage,Full Eco,Full Eco,CT,1,1,0.148438,5,3700,...,0,0,0,1,0,0,0,0,0,1
1,BLAST-Premier-Spring-Final-2022-ence-vs-big-bo...,de_mirage,Full Eco,Full Eco,CT,1,1,0.398438,5,3700,...,0,0,0,1,0,0,0,0,0,1
2,BLAST-Premier-Spring-Final-2022-ence-vs-big-bo...,de_mirage,Full Eco,Full Eco,CT,1,1,0.648438,5,3700,...,0,0,0,1,0,0,0,0,0,1
3,BLAST-Premier-Spring-Final-2022-ence-vs-big-bo...,de_mirage,Full Eco,Full Eco,CT,1,1,0.898438,5,3700,...,0,0,0,1,0,0,0,0,0,1
4,BLAST-Premier-Spring-Final-2022-ence-vs-big-bo...,de_mirage,Full Eco,Full Eco,CT,1,1,1.148438,5,3700,...,0,0,0,1,0,0,0,0,0,1
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
3186232,PGL-Major-Antwerp-2022-vitality-vs-outsiders-m...,de_mirage,Full Buy,Semi Buy,CT,333,24,19.765625,1,30150,...,0,0,0,1,0,0,0,1,0,0
3186233,PGL-Major-Antwerp-2022-vitality-vs-outsiders-m...,de_mirage,Full Buy,Semi Buy,CT,333,24,20.015625,1,30150,...,0,0,0,1,0,0,0,1,0,0
3186234,PGL-Major-Antwerp-2022-vitality-vs-outsiders-m...,de_mirage,Full Buy,Semi Buy,CT,333,24,20.265625,1,30150,...,0,0,0,1,0,0,0,1,0,0
3186235,PGL-Major-Antwerp-2022-vitality-vs-outsiders-m...,de_mirage,Full Buy,Semi Buy,CT,333,24,20.515625,1,30150,...,0,0,0,1,0,0,0,1,0,0


In [31]:
states['ctMeanKills']

0           0.0
1           0.0
2           0.0
3           0.0
4           0.0
           ... 
3186232    23.0
3186233    23.0
3186234    23.0
3186235    23.0
3186236    23.0
Name: ctMeanKills, Length: 3186237, dtype: float64

In [32]:
states.loc[states['tMeanKills'].isna(), 'tMeanKills']

Series([], Name: tMeanKills, dtype: float64)

In [33]:
states.drop(columns=states.columns[states.columns.str.endswith(("_activeWeapon", "_mainWeapon", "_secondaryWeapon", "_distToA", "_distToB",
                                                               "_isAlive")) | states.columns.str.startswith("growing_kills")],
            inplace=True)
states.columns

Index(['filename', 'mapName', 'ctBuyType', 'tBuyType', 'winningSide',
       'matchID', 'roundNum', 'seconds', 'ctAlivePlayers', 'ctEqVal',
       ...
       'de_ancient', 'de_dust2', 'de_inferno', 'de_mirage', 'de_nuke',
       'de_overpass', 'de_vertigo', 'bombsite_A', 'bombsite_B',
       'bombsite_not_planted'],
      dtype='object', length=108)

In [34]:
states.to_parquet("data/states_another_mod.parquet")