In [1]:
import pandas as pd

damages_df = pd.read_csv('toy_dataset/Damages.csv', index_col=0)
player_frames_df = pd.read_csv('toy_dataset/PlayerFrames.csv', index_col=0)
bomb_events_df = pd.read_csv('toy_dataset/BombEvents.csv', index_col=0)
rounds_df = pd.read_csv('toy_dataset/Rounds.csv', index_col=0)
kills_df = pd.read_csv('toy_dataset/Kills.csv', index_col=0)


In [2]:
rounds_df.head()

Unnamed: 0,MatchId,MapName,RoundNum,StartTick,FreezeTimeEnd,EndTick,EndOfficialTick,TScore,CTScore,WinningSide,WinningTeam,LosingTeam,RoundEndReason,CTStartEqVal,CTBuyType,TStartEqVal,TBuyType
0,Liga-Gamers-Club-2021-Serie-A-April-Cup-santos...,de_overpass,1,13232,15794,27867,27867,0,0,CT,SWS Gaming,Santos e-Sports,BombDefused,4350,Pistol,4550,Pistol
1,Liga-Gamers-Club-2021-Serie-A-April-Cup-santos...,de_overpass,2,28507,31070,46939,46945,0,1,T,Santos e-Sports,SWS Gaming,TargetBombed,20800,Half Buy,16150,Half Buy
2,Liga-Gamers-Club-2021-Serie-A-April-Cup-santos...,de_overpass,3,47585,50138,64531,64534,1,1,T,Santos e-Sports,SWS Gaming,TargetBombed,16350,Half Buy,23800,Full Buy
3,Liga-Gamers-Club-2021-Serie-A-April-Cup-santos...,de_overpass,4,65174,67724,81080,81090,2,1,T,Santos e-Sports,SWS Gaming,TargetBombed,6200,Half Buy,25300,Full Buy
4,Liga-Gamers-Club-2021-Serie-A-April-Cup-santos...,de_overpass,5,81730,84283,98592,98601,3,1,T,Santos e-Sports,SWS Gaming,TerroristsWin,24150,Full Buy,25400,Full Buy


In [3]:
game_state_cols = ['MatchId', 'MapName', 'RoundNum', 'TicksSinceStart', 'CTStartEqVal', 'TStartEqVal', 'CTAlive', 'TAlive', 'CTTotalHP', 'TTotalHP', 'BombPlantedA', 'BombPlantedB', 'CTClosestA', 'TClosestA', 'CTClosestB', 'TClosestB']

## Remove events before and after the round

In [4]:
# TODO: Check that only the post-round time is getting cut (weird since the endtick and officialendtick are practically the same)

In [5]:
def keep_in_round_time(df):
    # df needs to have "MatchId", "MapName", "RoundNum" columns
    merged_df = df.merge(rounds_df[['MatchId', 'MapName', 'RoundNum', 'FreezeTimeEnd', 'EndTick']], how='left', on=['MatchId', 'MapName', 'RoundNum'], validate='many_to_one')
    merged_dropped_df = merged_df[(merged_df.Tick > merged_df.FreezeTimeEnd) & (merged_df.Tick <= merged_df.EndTick)]
    return merged_dropped_df.drop(['FreezeTimeEnd', 'EndTick'], axis=1)

In [6]:
# m_bomb_events_df = bomb_events_df.merge(rounds_df[['MatchId', 'MapName', 'RoundNum', 'FreezeTimeEnd', 'EndTick']], how='left', on=['MatchId', 'MapName', 'RoundNum'], validate='many_to_one')
# m_bomb_events_df[(m_bomb_events_df.Tick <= m_bomb_events_df.FreezeTimeEnd) | (m_bomb_events_df.Tick > m_bomb_events_df.EndTick)]

In [7]:
damages_df = keep_in_round_time(damages_df)
player_frames_df = keep_in_round_time(player_frames_df)
bomb_events_df = keep_in_round_time(bomb_events_df)
kills_df = keep_in_round_time(kills_df)

## Bomb Plants

In [8]:
bomb_events_df.head()

Unnamed: 0,MatchId,MapName,RoundNum,Tick,Second,PlayerSteamId,PlayerName,PlayerTeam,PlayerX,PlayerY,PlayerZ,BombAction,BombSite
0,Liga-Gamers-Club-2021-Serie-A-April-Cup-santos...,de_overpass,1,23419,59.570312,76561198169934019,★ ⑳ MaLLby,Santos e-Sports,-2549.361572,645.96875,480.03125,plant,A
1,Liga-Gamers-Club-2021-Serie-A-April-Cup-santos...,de_overpass,1,27867,94.320312,76561197963277583,★ ㉑ matios,SWS Gaming,-2507.430908,645.319946,480.03125,defuse,A
2,Liga-Gamers-Club-2021-Serie-A-April-Cup-santos...,de_overpass,2,41686,82.9375,76561198148561509,★ ㉑ cass1n,Santos e-Sports,-954.525574,-31.595411,96.03125,plant,B
3,Liga-Gamers-Club-2021-Serie-A-April-Cup-santos...,de_overpass,3,59274,71.375,76561198169934019,★ ⑳ MaLLby,Santos e-Sports,-1166.488892,-79.727104,98.03125,plant,B
4,Liga-Gamers-Club-2021-Serie-A-April-Cup-santos...,de_overpass,4,75832,63.34375,76561198128804839,★ ㉑ voltera,Santos e-Sports,-1166.488892,-73.471153,98.03125,plant,B


In [9]:
bomb_events_df['BombPlantedA'] = (bomb_events_df['BombSite'] == 'A') & (bomb_events_df['BombAction'] == 'plant')
bomb_events_df['BombPlantedB'] = (bomb_events_df['BombSite'] == 'B') & (bomb_events_df['BombAction'] == 'plant')
bomb_events_df['AttackerSide'] = bomb_events_df['BombAction'].map({'plant': 'T', 'defuse': 'CT'})
bomb_events_df['VictimSide'] = bomb_events_df['BombAction'].map({'plant': 'CT', 'defuse': 'T'})
bomb_events_df['AttackerSteamId'] = bomb_events_df['PlayerSteamId']

bomb_state_df = bomb_events_df.drop(columns=['Second', 'PlayerX', 'PlayerY', 'PlayerZ', 'BombSite', 'PlayerName', 'PlayerTeam', 'BombAction', 'PlayerSteamId'])
bomb_state_df.set_index(['MatchId', 'MapName', 'RoundNum', 'Tick'], inplace=True)
bomb_state_df.head()

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,Unnamed: 3_level_0,BombPlantedA,BombPlantedB,AttackerSide,VictimSide,AttackerSteamId
MatchId,MapName,RoundNum,Tick,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
Liga-Gamers-Club-2021-Serie-A-April-Cup-santos-vs-sws-bo3,de_overpass,1,23419,True,False,T,CT,76561198169934019
Liga-Gamers-Club-2021-Serie-A-April-Cup-santos-vs-sws-bo3,de_overpass,1,27867,False,False,CT,T,76561197963277583
Liga-Gamers-Club-2021-Serie-A-April-Cup-santos-vs-sws-bo3,de_overpass,2,41686,False,True,T,CT,76561198148561509
Liga-Gamers-Club-2021-Serie-A-April-Cup-santos-vs-sws-bo3,de_overpass,3,59274,False,True,T,CT,76561198169934019
Liga-Gamers-Club-2021-Serie-A-April-Cup-santos-vs-sws-bo3,de_overpass,4,75832,False,True,T,CT,76561198128804839


## Kills and Damage

In [10]:
# Drop C4 kills and damages since those happen when the round ends
kills_df = kills_df[kills_df.Weapon != 'C4']
damages_df = damages_df[damages_df.Weapon != 'C4']

In [11]:
kills_df = kills_df[['MatchId', 'MapName', 'RoundNum', 'Tick', 'AttackerSteamId', 'AttackerSide', 'VictimSteamId', 'VictimSide']]
kills_df.head()

Unnamed: 0,MatchId,MapName,RoundNum,Tick,AttackerSteamId,AttackerSide,VictimSteamId,VictimSide
0,Liga-Gamers-Club-2021-Serie-A-April-Cup-santos...,de_overpass,1,20967,7.65612e+16,T,76561198286138224,CT
1,Liga-Gamers-Club-2021-Serie-A-April-Cup-santos...,de_overpass,1,21115,7.65612e+16,T,76561198135228658,CT
2,Liga-Gamers-Club-2021-Serie-A-April-Cup-santos...,de_overpass,1,21259,7.65612e+16,CT,76561198148561509,T
3,Liga-Gamers-Club-2021-Serie-A-April-Cup-santos...,de_overpass,1,21799,7.65612e+16,T,76561198061789000,CT
4,Liga-Gamers-Club-2021-Serie-A-April-Cup-santos...,de_overpass,1,22791,7.65612e+16,T,76561198051070311,CT


In [12]:
damages_df = damages_df[['MatchId', 'MapName', 'RoundNum', 'Tick', 'HpDamageTaken', 'AttackerSteamId', 'AttackerSide', 'VictimSteamId', 'VictimSide']]
damages_df.head()

Unnamed: 0,MatchId,MapName,RoundNum,Tick,HpDamageTaken,AttackerSteamId,AttackerSide,VictimSteamId,VictimSide
0,Liga-Gamers-Club-2021-Serie-A-April-Cup-santos...,de_overpass,1,20943,21,7.65612e+16,T,76561198286138224,CT
1,Liga-Gamers-Club-2021-Serie-A-April-Cup-santos...,de_overpass,1,20967,79,7.65612e+16,T,76561198286138224,CT
2,Liga-Gamers-Club-2021-Serie-A-April-Cup-santos...,de_overpass,1,21115,100,7.65612e+16,T,76561198135228658,CT
3,Liga-Gamers-Club-2021-Serie-A-April-Cup-santos...,de_overpass,1,21259,100,7.65612e+16,CT,76561198148561509,T
4,Liga-Gamers-Club-2021-Serie-A-April-Cup-santos...,de_overpass,1,21759,11,7.65612e+16,T,76561198061789000,CT


In [13]:
# Combine simultaneous instances of damage from the same player to the same player
kills_damage_key = ['MatchId', 'MapName', 'RoundNum', 'Tick', 'AttackerSteamId', 'AttackerSide', 'VictimSteamId', 'VictimSide']
damages_df = damages_df.groupby(kills_damage_key, as_index=False).sum()
damages_df['HpDamageTaken'] = damages_df.HpDamageTaken.apply(lambda x: min([x, 100]))  # Bugged/laggy damages instances > 100 (shouldn't be possible)


In [14]:
# Check that all kills have a corresponding damage event
inner_merge_df = kills_df.merge(damages_df, on=kills_damage_key)

if len(inner_merge_df) != len(kills_df):
    raise ValueError(f'{len(inner_merge_df)} kills have corresponding damage event, {len(kills_df)} kills in total')

### Merging kills into damage

In [15]:
kills_df['IsKill'] = True
damages_df = pd.merge(damages_df, kills_df, how='left', on=kills_damage_key, validate='one_to_one')
damages_df['IsKill'].fillna(False, inplace=True)
damages_df.head()

Unnamed: 0,MatchId,MapName,RoundNum,Tick,AttackerSteamId,AttackerSide,VictimSteamId,VictimSide,HpDamageTaken,IsKill
0,DreamHack-Masters-Spring-2021-astralis-vs-comp...,de_inferno,1,6431,7.65612e+16,CT,76561198131369187,T,16,False
1,DreamHack-Masters-Spring-2021-astralis-vs-comp...,de_inferno,1,6499,7.65612e+16,CT,76561198131369187,T,16,False
2,DreamHack-Masters-Spring-2021-astralis-vs-comp...,de_inferno,1,6522,7.65612e+16,CT,76561198131369187,T,20,False
3,DreamHack-Masters-Spring-2021-astralis-vs-comp...,de_inferno,1,6543,7.65612e+16,CT,76561198131369187,T,48,True
4,DreamHack-Masters-Spring-2021-astralis-vs-comp...,de_inferno,1,6646,7.65612e+16,T,76561198023592320,CT,12,False


In [16]:
# TODO: Make sure this accounts for kills that don't deal damage (suicide/disconnect/fall off on vertigo)

def count_alive_and_damage(round_df):
    round_df.sort_values('Tick', inplace=True)

    # Tally kills to get number of players alive
    round_df['TKilled'] = (round_df.IsKill) & (round_df.VictimSide == 'T')
    round_df['CTKilled'] = (round_df.IsKill) & (round_df.VictimSide == 'CT')
    round_df['TAlive'] = 5 - round_df.TKilled.cumsum()
    round_df['CTAlive'] = 5 - round_df.CTKilled.cumsum()
    
    # Tally damage to get total team HP
    round_df['TDamaged'] = (round_df.VictimSide == 'T') * round_df.HpDamageTaken
    round_df['CTDamaged'] = (round_df.VictimSide == 'CT') * round_df.HpDamageTaken
    round_df['TTotalHp'] = 500 - round_df.TDamaged.cumsum()
    round_df['CTTotalHp'] = 500 - round_df.CTDamaged.cumsum()

    round_df.drop(['IsKill', 'TKilled', 'CTKilled', 'TDamaged', 'CTDamaged', 'HpDamageTaken'], axis=1, inplace=True)
    return round_df

damages_kills_state_df = damages_df.groupby(['MatchId', 'MapName', 'RoundNum']).apply(count_alive_and_damage)
damages_kills_state_df.set_index(['MatchId', 'MapName', 'RoundNum', 'Tick'], inplace=True)

In [17]:
# Make sure data looks good
damages_kills_state_df.describe()

Unnamed: 0,AttackerSteamId,VictimSteamId,TAlive,CTAlive,TTotalHp,CTTotalHp
count,13665.0,13665.0,13665.0,13665.0,13665.0,13665.0
mean,7.65612e+16,7.65612e+16,3.487742,3.672155,294.964727,326.728357
std,96651960.0,107325000.0,1.39,1.325674,142.819717,138.162873
min,7.65612e+16,7.65612e+16,0.0,0.0,-35.0,-34.0
25%,7.65612e+16,7.65612e+16,2.0,3.0,185.0,223.0
50%,7.65612e+16,7.65612e+16,4.0,4.0,304.0,353.0
75%,7.65612e+16,7.65612e+16,5.0,5.0,408.0,448.0
max,7.65612e+16,7.65612e+16,5.0,5.0,500.0,500.0


In [18]:
'DreamHack-Masters-Spring-2021-astralis-vs-complexity-bo3', 'de_vertigo', 'RoundNum'

('DreamHack-Masters-Spring-2021-astralis-vs-complexity-bo3',
 'de_vertigo',
 'RoundNum')

## Player Positions

In [19]:
player_frames_df.head()

Unnamed: 0,MatchId,MapName,RoundNum,Tick,Second,Side,TeamName,PlayerName,PlayerSteamId,X,...,IsFlashed,IsAirborne,IsDucking,IsScoped,IsWalking,EqValue,HasHelmet,HasDefuse,DistToBombsiteA,DistToBombsiteB
0,Liga-Gamers-Club-2021-Serie-A-April-Cup-santos...,de_overpass,1,16046,1.96875,CT,SWS Gaming,★ ㉑ gafolo,76561198135228658,-1850.647949,...,False,True,False,False,False,850,False,False,28,36
1,Liga-Gamers-Club-2021-Serie-A-April-Cup-santos...,de_overpass,1,16046,1.96875,CT,SWS Gaming,★ ㉑ matios,76561197963277583,-1960.10144,...,False,False,False,False,False,900,False,True,13,39
2,Liga-Gamers-Club-2021-Serie-A-April-Cup-santos...,de_overpass,1,16046,1.96875,CT,SWS Gaming,★ ㉑ RICIOLI,76561198061789000,-2513.387451,...,False,False,False,False,False,850,False,False,9,59
3,Liga-Gamers-Club-2021-Serie-A-April-Cup-santos...,de_overpass,1,16046,1.96875,CT,SWS Gaming,★ ㉑ BGalvao90,76561198286138224,-1902.988892,...,False,False,False,False,False,850,False,False,14,40
4,Liga-Gamers-Club-2021-Serie-A-April-Cup-santos...,de_overpass,1,16046,1.96875,CT,SWS Gaming,★ ㉑ phx,76561198051070311,-2005.056274,...,False,False,False,False,False,900,False,False,13,41


In [20]:
player_frames_df = player_frames_df[['MatchId', 'MapName', 'PlayerSteamId', 'RoundNum', 'Tick', 'Side', 'HasDefuse', 'DistToBombsiteA', 'DistToBombsiteB']]
player_frames_df.head()

Unnamed: 0,MatchId,MapName,PlayerSteamId,RoundNum,Tick,Side,HasDefuse,DistToBombsiteA,DistToBombsiteB
0,Liga-Gamers-Club-2021-Serie-A-April-Cup-santos...,de_overpass,76561198135228658,1,16046,CT,False,28,36
1,Liga-Gamers-Club-2021-Serie-A-April-Cup-santos...,de_overpass,76561197963277583,1,16046,CT,True,13,39
2,Liga-Gamers-Club-2021-Serie-A-April-Cup-santos...,de_overpass,76561198061789000,1,16046,CT,False,9,59
3,Liga-Gamers-Club-2021-Serie-A-April-Cup-santos...,de_overpass,76561198286138224,1,16046,CT,False,14,40
4,Liga-Gamers-Club-2021-Serie-A-April-Cup-santos...,de_overpass,76561198051070311,1,16046,CT,False,13,41


In [21]:
player_frames_df.drop('PlayerSteamId', axis=1, inplace=True)
t_player_frames_df = player_frames_df[player_frames_df.Side == 'T'].drop(['Side', 'HasDefuse'], axis=1)  # Only Counter-Terrorists can buy defuse kits
ct_player_frames_df = player_frames_df[player_frames_df.Side == 'CT'].drop('Side', axis=1)

t_frames_df = t_player_frames_df.groupby(['MatchId', 'MapName', 'RoundNum', 'Tick']).aggregate({'DistToBombsiteA': 'min', 'DistToBombsiteB': 'min'})
ct_frames_df = ct_player_frames_df.groupby(['MatchId', 'MapName', 'RoundNum', 'Tick']).aggregate({'DistToBombsiteA': 'min', 'DistToBombsiteB': 'min'})
t_frames_df.columns = ['ClosestDistToA', 'ClosestDistToB']
ct_frames_df.columns = ['ClosestDistToA', 'ClosestDistToB']

def lag_distance(df):
    df.sort_values('Tick', inplace=True)
    df['ChangeClosestDistToA'] = df['ClosestDistToA'] - df['ClosestDistToA'].shift(1)
    df['ChangeClosestDistToB'] = df['ClosestDistToB'] - df['ClosestDistToB'].shift(1)
    df['ChangeClosestDistToA'][0] = 0
    df['ChangeClosestDistToB'][0] = 0
    return df

t_frames_df = t_frames_df.groupby(['MatchId', 'MapName', 'RoundNum']).apply(lag_distance)
ct_frames_df = ct_frames_df.groupby(['MatchId', 'MapName', 'RoundNum']).apply(lag_distance)

position_state_df = pd.merge(t_frames_df, ct_frames_df, 'outer', ['MatchId', 'MapName', 'RoundNum', 'Tick'], suffixes=('_T', '_CT'), validate='one_to_one')


In [22]:
position_state_df.head(20)

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,Unnamed: 3_level_0,ClosestDistToA_T,ClosestDistToB_T,ChangeClosestDistToA_T,ChangeClosestDistToB_T,ClosestDistToA_CT,ClosestDistToB_CT,ChangeClosestDistToA_CT,ChangeClosestDistToB_CT
MatchId,MapName,RoundNum,Tick,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1
DreamHack-Masters-Spring-2021-astralis-vs-complexity-bo3,de_inferno,1,3740,51,42,0.0,0.0,19,15,0.0,0.0
DreamHack-Masters-Spring-2021-astralis-vs-complexity-bo3,de_inferno,1,3869,46,37,-5.0,-5.0,17,14,-2.0,-1.0
DreamHack-Masters-Spring-2021-astralis-vs-complexity-bo3,de_inferno,1,3998,43,34,-3.0,-3.0,10,12,-7.0,-2.0
DreamHack-Masters-Spring-2021-astralis-vs-complexity-bo3,de_inferno,1,4127,39,30,-4.0,-4.0,8,17,-2.0,5.0
DreamHack-Masters-Spring-2021-astralis-vs-complexity-bo3,de_inferno,1,4256,37,28,-2.0,-2.0,6,13,-2.0,-4.0
DreamHack-Masters-Spring-2021-astralis-vs-complexity-bo3,de_inferno,1,4385,34,25,-3.0,-3.0,4,16,-2.0,3.0
DreamHack-Masters-Spring-2021-astralis-vs-complexity-bo3,de_inferno,1,4514,31,22,-3.0,-3.0,2,8,-2.0,-8.0
DreamHack-Masters-Spring-2021-astralis-vs-complexity-bo3,de_inferno,1,4643,31,21,0.0,-1.0,3,7,1.0,-1.0
DreamHack-Masters-Spring-2021-astralis-vs-complexity-bo3,de_inferno,1,4772,28,19,-3.0,-2.0,3,5,0.0,-2.0
DreamHack-Masters-Spring-2021-astralis-vs-complexity-bo3,de_inferno,1,4901,27,23,-1.0,4.0,6,2,3.0,-3.0


In [23]:
position_state_df.describe()

Unnamed: 0,ClosestDistToA_T,ClosestDistToB_T,ChangeClosestDistToA_T,ChangeClosestDistToB_T,ClosestDistToA_CT,ClosestDistToB_CT,ChangeClosestDistToA_CT,ChangeClosestDistToB_CT
count,43787.0,43787.0,43787.0,43787.0,43787.0,43787.0,43787.0,43787.0
mean,17.918104,19.135794,-0.334894,-0.309795,9.383424,9.874118,-0.074999,-0.109896
std,12.100438,10.974001,2.234367,2.819939,7.68141,8.021169,2.283396,2.611839
min,0.0,0.0,-39.0,-40.0,0.0,0.0,-43.0,-39.0
25%,8.0,10.0,-1.0,-1.0,4.0,4.0,0.0,0.0
50%,16.0,19.0,0.0,0.0,7.0,7.0,0.0,0.0
75%,25.0,27.0,0.0,0.0,11.0,14.0,0.0,0.0
max,69.0,59.0,29.0,33.0,58.0,53.0,43.0,40.0


## Putting it all together

In [24]:
bomb_state_df['EventType'] = 'BombPlant'
damages_kills_state_df['EventType'] = 'DamageDealt'
position_state_df['EventType'] = 'PositionUpdate'

In [25]:
game_states_df = pd.concat([bomb_state_df, damages_kills_state_df, position_state_df], axis=0)
game_states_df = game_states_df.sort_index()
game_states_df.reset_index(inplace=True)

In [26]:
game_states_df.columns

Index(['MatchId', 'MapName', 'RoundNum', 'Tick', 'BombPlantedA',
       'BombPlantedB', 'AttackerSide', 'VictimSide', 'AttackerSteamId',
       'EventType', 'VictimSteamId', 'TAlive', 'CTAlive', 'TTotalHp',
       'CTTotalHp', 'ClosestDistToA_T', 'ClosestDistToB_T',
       'ChangeClosestDistToA_T', 'ChangeClosestDistToB_T', 'ClosestDistToA_CT',
       'ClosestDistToB_CT', 'ChangeClosestDistToA_CT',
       'ChangeClosestDistToB_CT'],
      dtype='object')

In [27]:
def fill_states(df):
    fill_cols = ['BombPlantedA', 'BombPlantedB', 'TAlive', 'CTAlive', 'TTotalHp', 'CTTotalHp', 'ClosestDistToA_T', 'ClosestDistToB_T', 'ChangeClosestDistToA_T', 'ChangeClosestDistToB_T', 'ClosestDistToA_CT', 'ClosestDistToB_CT', 'ChangeClosestDistToA_CT', 'ChangeClosestDistToB_CT']
    df[fill_cols] = df[fill_cols].fillna(method='ffill')
    dist_names = ['ClosestDistToA_T', 'ClosestDistToB_T', 'ClosestDistToA_CT', 'ClosestDistToB_CT']
    df[dist_names] = df[dist_names].fillna(method='bfill')
    change_names = ['ChangeClosestDistToA_T', 'ChangeClosestDistToB_T', 'ChangeClosestDistToA_CT', 'ChangeClosestDistToB_CT']
    df[change_names] = df[change_names].fillna(0)
    df['BombPlantedA'] = df['BombPlantedA'].fillna(False)
    df['BombPlantedB'] = df['BombPlantedB'].fillna(False)
    df['TAlive'] = df['TAlive'].fillna(5)
    df['CTAlive'] = df['CTAlive'].fillna(5)
    df['TTotalHp'] = df['TTotalHp'].fillna(5)
    df['CTTotalHp'] = df['CTTotalHp'].fillna(5)
    return df

game_states_df = game_states_df.groupby(['MatchId', 'MapName', 'RoundNum']).apply(fill_states)
game_states_df.head()

Unnamed: 0,MatchId,MapName,RoundNum,Tick,BombPlantedA,BombPlantedB,AttackerSide,VictimSide,AttackerSteamId,EventType,...,TTotalHp,CTTotalHp,ClosestDistToA_T,ClosestDistToB_T,ChangeClosestDistToA_T,ChangeClosestDistToB_T,ClosestDistToA_CT,ClosestDistToB_CT,ChangeClosestDistToA_CT,ChangeClosestDistToB_CT
0,DreamHack-Masters-Spring-2021-astralis-vs-comp...,de_inferno,1,3740,False,False,,,,PositionUpdate,...,5.0,5.0,51.0,42.0,0.0,0.0,19.0,15.0,0.0,0.0
1,DreamHack-Masters-Spring-2021-astralis-vs-comp...,de_inferno,1,3869,False,False,,,,PositionUpdate,...,5.0,5.0,46.0,37.0,-5.0,-5.0,17.0,14.0,-2.0,-1.0
2,DreamHack-Masters-Spring-2021-astralis-vs-comp...,de_inferno,1,3998,False,False,,,,PositionUpdate,...,5.0,5.0,43.0,34.0,-3.0,-3.0,10.0,12.0,-7.0,-2.0
3,DreamHack-Masters-Spring-2021-astralis-vs-comp...,de_inferno,1,4127,False,False,,,,PositionUpdate,...,5.0,5.0,39.0,30.0,-4.0,-4.0,8.0,17.0,-2.0,5.0
4,DreamHack-Masters-Spring-2021-astralis-vs-comp...,de_inferno,1,4256,False,False,,,,PositionUpdate,...,5.0,5.0,37.0,28.0,-2.0,-2.0,6.0,13.0,-2.0,-4.0


In [28]:
game_states_df = game_states_df.merge(rounds_df[['MatchId', 'MapName', 'RoundNum', 'RoundEndReason', 'CTBuyType', 'TBuyType', 'FreezeTimeEnd', 'WinningSide']], how='left', on=['MatchId', 'MapName', 'RoundNum'], validate='many_to_one')
game_states_df.insert(3, 'RoundTick', game_states_df.Tick - game_states_df.FreezeTimeEnd)
game_states_df.drop(['Tick', 'FreezeTimeEnd'], axis=1, inplace=True)

In [29]:
game_states_df.head()

Unnamed: 0,MatchId,MapName,RoundNum,RoundTick,BombPlantedA,BombPlantedB,AttackerSide,VictimSide,AttackerSteamId,EventType,...,ChangeClosestDistToA_T,ChangeClosestDistToB_T,ClosestDistToA_CT,ClosestDistToB_CT,ChangeClosestDistToA_CT,ChangeClosestDistToB_CT,RoundEndReason,CTBuyType,TBuyType,WinningSide
0,DreamHack-Masters-Spring-2021-astralis-vs-comp...,de_inferno,1,30,False,False,,,,PositionUpdate,...,0.0,0.0,19.0,15.0,0.0,0.0,TerroristsWin,Pistol,Pistol,T
1,DreamHack-Masters-Spring-2021-astralis-vs-comp...,de_inferno,1,159,False,False,,,,PositionUpdate,...,-5.0,-5.0,17.0,14.0,-2.0,-1.0,TerroristsWin,Pistol,Pistol,T
2,DreamHack-Masters-Spring-2021-astralis-vs-comp...,de_inferno,1,288,False,False,,,,PositionUpdate,...,-3.0,-3.0,10.0,12.0,-7.0,-2.0,TerroristsWin,Pistol,Pistol,T
3,DreamHack-Masters-Spring-2021-astralis-vs-comp...,de_inferno,1,417,False,False,,,,PositionUpdate,...,-4.0,-4.0,8.0,17.0,-2.0,5.0,TerroristsWin,Pistol,Pistol,T
4,DreamHack-Masters-Spring-2021-astralis-vs-comp...,de_inferno,1,546,False,False,,,,PositionUpdate,...,-2.0,-2.0,6.0,13.0,-2.0,-4.0,TerroristsWin,Pistol,Pistol,T


In [30]:
game_states_df.shape[0]

57829

In [31]:
bad_rounds = game_states_df[(game_states_df.TTotalHp < 0) | (game_states_df.CTTotalHp < 0)][['MatchId', 'MapName', 'RoundNum']].drop_duplicates()
bad_rounds = bad_rounds.reindex(columns=['MatchId', 'MapName', 'RoundNum'])
#  | ((game_states_df.TTotalHp > 0) & (game_states_df.TAlive == 0)) | ((game_states_df.CTTotalHp > 0) & (game_states_df.CTAlive == 0))
# Going to leave this^^ out because it elimintes too mucn of the data (~8%) (there are a lot of rounds where an entire team is dead and they still have >0 HP, probably because a damage event was missed somewhere)
bad_game_states = game_states_df.merge(bad_rounds, how='left', validate='many_to_one', indicator=True)
game_states_df = bad_game_states[bad_game_states._merge != 'both'].drop('_merge', axis=1)
game_states_df.shape[0]

57004

In [32]:
game_states_df.to_csv('game_states.csv', encoding='utf-8', index=False)