<a href="https://colab.research.google.com/github/gandhimonil9823/CS_GO_Analysis/blob/main/preprocessing/aggregated_data.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
import pandas as pd
import numpy as np
import os

### Mount data from Google Drive

In [None]:
#mount data folder from google drive
from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


### Change dir and check contents

In [None]:
cd '/content/drive/Shared drives/CS_GO_Data'

/content/drive/Shared drives/CS_GO_Data


In [None]:
ls

'Application Alogrithm.gdoc'   de_train.png
 cs_cruise.png                 de_tulip.png
'data preprocessing.txt'       [0m[01;34mdont_use[0m/
 de_cache.png                  esea_master_dmg_demos.part1.csv
 de_cbble.png                  esea_master_dmg_demos.part2.csv
 de_coast.png                  esea_master_grenades_demos.part1.csv
 de_dust2.png                  esea_master_grenades_demos.part2.csv
 de_empire.png                 esea_master_kills_demos.part1.csv
 de_inferno.png                esea_master_kills_demos.part2.csv
 de_mikla.png                  esea_meta_demos.part1.csv
 de_mirage.png                 esea_meta_demos.part2.csv
 de_new_nuke.png               map_data.csv
 de_nuke.png                   [01;34mmaps[0m/
 de_overpass.png               [01;34mprocessed_data[0m/
 de_royal.png                  [01;34mprocessed_data_v2[0m/
 de_santorini.png              Updated_CS_GO_Analysis.ipynb
 de_season.png


### Loader functions

In [None]:
# Function to combine the dataframes and return a single dataframe
def combine_dataframes(df1,df2):
  df_to_return = df1.append(df2)
  df_to_return = df_to_return.reset_index(drop=True)
  return df_to_return

In [None]:
#load raw data
def load_raw_dmg():
  df1 = pd.read_csv('esea_master_dmg_demos.part1.csv')
  df2 = pd.read_csv('esea_master_dmg_demos.part2.csv')
  return combine_dataframes(df1,df2)

def load_raw_grenades():
  df1 = pd.read_csv('esea_master_grenades_demos.part1.csv')
  df2 = pd.read_csv('esea_master_grenades_demos.part2.csv')
  return combine_dataframes(df1,df2) 
  
def load_raw_kills():
  df1 = pd.read_csv('esea_master_kills_demos.part1.csv')
  df2 = pd.read_csv('esea_master_kills_demos.part2.csv')
  return combine_dataframes(df1,df2) 
  
def load_raw_meta():
  df1 = pd.read_csv('esea_meta_demos.part1.csv')
  df2 = pd.read_csv('esea_meta_demos.part2.csv')
  return combine_dataframes(df1,df2)

def load_raw_map_data():
  return pd.read_csv('map_data.csv')

def load_raw_all():
 return load_raw_dmg(), load_raw_grenades(), load_raw_kills(), load_raw_meta(), load_raw_map_data()

### load all data

In [None]:
%%time
raw_damage, raw_grenades, raw_kills, raw_meta, raw_map_data = load_raw_all()

CPU times: user 58.1 s, sys: 9.15 s, total: 1min 7s
Wall time: 1min 22s


In [None]:
#print the shape of the data
print('damage data:',raw_damage.shape)
print('grenades data:',raw_grenades.shape)
print('kills data:',raw_kills.shape)
print('meta data:',raw_meta.shape)
print('map data:',raw_map_data.shape)

damage data: (10538182, 23)
grenades data: (5246458, 23)
kills data: (2742646, 13)
meta data: (377629, 10)
map data: (7, 7)


### 1) Remove corrupted data:

We noticed that in some of the lines of the kaggle dataset, the attacking side (att_side) and the victim side (vic_side) don't match the attacking team (att_team) and the victim team(vic_team). This inconsistency poisons not only that line, but also the round, and the entire match as a whole. Therefore, for any matches these corrupt lines should be removed from the dataset.

In [None]:
#identify and output a list of corrupt matches for given the damage dataframe
def find_inconsistent_games(df):
  #removal condition #1: team mismatch when the sides match
  mismatched_teams = raw_damage[(raw_damage.att_side == raw_damage.vic_side ) & (raw_damage.att_team != raw_damage.vic_team )]
  #removal condition #2: side mismatch when the teams match
  mismatched_sides = raw_damage[(raw_damage.att_side != raw_damage.vic_side ) & (raw_damage.att_team == raw_damage.vic_team )]

  df = combine_dataframes(mismatched_teams,mismatched_sides)
  strange_files = df['file'].unique()

  #other erronous matches that we identified 
  strange_files = np.append(strange_files, 'esea_match_13787633.dem') 
  strange_files = np.append(strange_files, 'esea_match_13784811.dem')

  strange_files = np.unique(strange_files)
  return strange_files


In [None]:
#removes the matches specified in match_list from the given list of dataframes
def remove_matches(inp_data, match_list):
  filtered_data = []
  for df in inp_data:
    filtered_data.append(df[~df['file'].isin(strange_files)])

  return filtered_data

In [None]:
data = [raw_damage, raw_grenades, raw_kills, raw_meta]
strange_files = find_inconsistent_games(raw_damage)

print('Percentage of matches removed:',(len(strange_files)/len(raw_meta))*100,'%')

data = remove_matches(data, strange_files)
# for df in data:
#   df = df[~df['file'].isin(strange_files)]
  #filtered.to_csv(f'/content/drive/Shared drives/CS_GO_Data/processed_data/filtered_{target}_demos.csv')

Percentage of matches removed: 0.3868876595812292 %


In [None]:
print('damage data:',data[0].shape)
print('grenades data:',data[1].shape)
print('kills data:',data[2].shape)
print('meta data:',data[3].shape)
print('map data:',raw_map_data.shape)

damage data: (9505465, 23)
grenades data: (4722314, 23)
kills data: (2477355, 13)
meta data: (340333, 10)
map data: (7, 7)


### 2) Anonymize the team names: 

A small, but significant number of the matches contain the team names of the teams. Replacing the team names with the more generic 'Team 1' and 'Team 2' Tags would maintain the anonymity of the data while removing the possibility of displaying inappropriate and confusing team names.

There are three sources of damage in this dataset:

this is represented by the variable att_side (attacker side). 

1) att_side = <"Enemy team's name"> :
When an individual takes damage from the actions of the enemy team member)

2) att_side = <"Own team's name"> :
In cases of friendly fire or self damage)

3) att_side = "World" :
When an individual takes damage from non-player objects present in the game. Examples include fall damage and collision with moving objects (if any). 

In [None]:
def remove_other_dmg_sources(df):
  #we need to temporarily remove the world damage to create a list of non-standard team namesdef delete_world_damage(df_esea_master_dmg_demos):
  only_enemy_dmg = df[df.att_side != 'None']
  
  #remove friendly/self damage records
  only_enemy_dmg = only_enemy_dmg[only_enemy_dmg.att_side != only_enemy_dmg.vic_side]
  
  #reset index
  only_enemy_dmg = only_enemy_dmg.reset_index(drop=True)
  return only_enemy_dmg

In [None]:
def find_rename_mask(df):
  #get the first enemy damage instance of each match. This row has the all the data we are looking for
  to_rename = df.groupby('file').first()

  #only select the relevant columns
  to_rename = to_rename[['att_team', 'vic_team','att_side','vic_side']] 

  # filter out rows(in this case, matches) where team names are the default 'Team 1' and 'Team 2' #only consider games where the team names aren't standard. Check both columns just in case
  to_rename = to_rename[((to_rename.att_team != 'Team 1') & (to_rename.att_team != 'Team 2')) | ((to_rename.vic_team != 'Team 1') & (to_rename.vic_team != 'Team 2'))]

  to_rename.reset_index(inplace=True)

  #create new columns for each default team name
  to_rename['Team 1'] = None
  to_rename['Team 2'] = None

  # the CounterTerriorist role is always given to Team 1 at round 1
  for index,row in to_rename.iterrows():
    if row['att_side'] == 'CounterTerrorist':
      row['Team 1'] = row['att_team']
      row['Team 2'] = row['vic_team']
    else:
      row['Team 1'] = row['vic_team']
      row['Team 2'] = row['att_team']

  to_rename.drop(columns=['att_team','vic_team','att_side','vic_side'], inplace=True) #drop the other columns #we only need file, Team1, Team2

  return to_rename

In [None]:
def rename_teams(df,to_rename):

  for index,row in to_rename.iterrows():

    mask = (df['file'] == row['file'])

    df.loc[mask, 'att_team'] = df[mask]['att_team'].apply(lambda x: 'Team 1' if x == row['Team 1'] else 'Team 2')
    df.loc[mask, 'vic_team'] = df[mask]['vic_team'].apply(lambda x: 'Team 1' if x == row['Team 1'] else 'Team 2')
    
  return df

In [None]:
only_enemy_dmg = remove_other_dmg_sources(data[0])
to_rename = find_rename_mask(only_enemy_dmg)
print('Number of matches that have custom team names:', len(to_rename))

Number of matches that have custom team names: 29


In [None]:
data[0] = rename_teams(data[0],to_rename)
data[1] = rename_teams(data[1],to_rename)
data[2] = rename_teams(data[2],to_rename)

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  isetter(ilocs[0], value)


In [None]:
def rename_meta_teams(df,to_rename):

  for index,row in to_rename.iterrows():

    mask = (df['file'] == row['file'])
    df.loc[mask, 'winner_team'] = df[mask]['winner_team'].apply(lambda x: 'Team 1' if x == row['Team 1'] else 'Team 2')
    
  return df

In [None]:
data[3] = rename_meta_teams(data[3],to_rename)

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  isetter(ilocs[0], value)


In [None]:
# confirming that no rows were lost in the process
print('damage data:',data[0].shape)
print('grenades data:',data[1].shape)
print('kills data:',data[2].shape)
print('meta data:',data[3].shape)
print('map data:',raw_map_data.shape)

damage data: (9505465, 23)
grenades data: (4722314, 23)
kills data: (2477355, 13)
meta data: (340333, 10)
map data: (7, 7)


### 3) Verify contents

In [None]:
data[0] #damage

Unnamed: 0,file,round,tick,seconds,att_team,vic_team,att_side,vic_side,hp_dmg,arm_dmg,is_bomb_planted,bomb_site,hitbox,wp,wp_type,att_id,att_rank,vic_id,vic_rank,att_pos_x,att_pos_y,vic_pos_x,vic_pos_y
0,esea_match_13770997.dem,1,14372,111.8476,Team 2,Team 1,,CounterTerrorist,1,0,False,,Generic,Unknown,Unkown,0,0,76561198055054795,0,0.0000,0.00000,0.0000,0.000000
1,esea_match_13770997.dem,1,15972,124.3761,Team 1,Team 2,CounterTerrorist,Terrorist,18,9,False,,Stomach,USP,Pistol,76561198048742997,0,76561198082200410,0,-1499.6900,63.33829,-669.5558,-79.769570
2,esea_match_13770997.dem,1,16058,125.0495,Team 1,Team 2,CounterTerrorist,Terrorist,100,0,False,,Head,USP,Pistol,76561198055054795,0,76561197961009213,0,-1066.8740,3.44563,-614.1868,-91.707770
3,esea_match_13770997.dem,1,16066,125.1121,Team 2,Team 1,Terrorist,CounterTerrorist,12,7,False,,RightArm,Glock,Pistol,76561198082200410,0,76561198055054795,0,-747.3146,-49.32681,-1065.5560,9.381622
4,esea_match_13770997.dem,1,16108,125.4410,Team 1,Team 2,CounterTerrorist,Terrorist,15,7,False,,Chest,USP,Pistol,76561198048742997,0,76561198082200410,0,-1501.8610,49.19798,-748.4188,-53.469220
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
10538177,esea_match_13829173.dem,22,243361,1901.4210,Team 1,Team 2,Terrorist,CounterTerrorist,20,2,False,,Stomach,AK47,Rifle,76561197986109746,0,76561198047370175,0,-1132.0250,-191.14730,-1220.2790,-1279.492000
10538178,esea_match_13829173.dem,22,243373,1901.5150,Team 1,Team 2,Terrorist,CounterTerrorist,26,3,False,,Chest,AK47,Rifle,76561197986109746,0,76561198047370175,0,-1132.0250,-191.14730,-1218.2490,-1275.254000
10538179,esea_match_13829173.dem,22,243405,1901.7650,Team 2,Team 1,CounterTerrorist,Terrorist,100,15,False,,Head,AK47,Rifle,76561198047370175,0,76561197986109746,0,-1205.9710,-1271.50400,-1132.0250,-191.147300
10538180,esea_match_13829173.dem,22,243719,1904.2230,Team 2,Team 1,CounterTerrorist,Terrorist,27,4,False,,Chest,AK47,Rifle,76561198047370175,0,76561198103856560,0,-1217.5950,-1091.54300,-1373.8790,-931.702600


In [None]:
data[1] #grenades

Unnamed: 0,file,round,seconds,att_team,vic_team,att_id,vic_id,att_side,vic_side,hp_dmg,arm_dmg,is_bomb_planted,bomb_site,hitbox,nade,att_rank,vic_rank,att_pos_x,att_pos_y,nade_land_x,nade_land_y,vic_pos_x,vic_pos_y
0,esea_match_13770997.dem,1,153.1602,Team 1,Team 2,76561198165334141,,CounterTerrorist,,0,0,True,B,,Smoke,0,,-1618.1460,-66.00259,-949.8569,-340.3019,,
1,esea_match_13770997.dem,2,184.7945,Team 2,Team 1,76561198037331400,7.656120e+16,Terrorist,CounterTerrorist,70,0,False,,Generic,HE,0,0.0,-1719.9040,-2357.64700,-2774.6650,-1603.9430,-2741.25,-1523.163
2,esea_match_13770997.dem,2,186.8617,Team 1,Team 2,76561198055191021,,CounterTerrorist,,0,0,False,,,HE,0,,-1036.3520,492.16760,-466.8676,-356.9641,,
3,esea_match_13770997.dem,2,187.1122,Team 1,Team 2,76561198055054795,,CounterTerrorist,,0,0,False,,,HE,0,,-855.0770,438.69090,-459.0147,-543.8581,,
4,esea_match_13770997.dem,2,191.0587,Team 2,Team 2,76561198037331400,,Terrorist,,0,0,False,,,Molotov,0,,-2617.4900,-1832.40700,-2743.5610,-927.2995,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
5246453,esea_match_13829173.dem,22,1883.3240,Team 2,,76561198318542436,,CounterTerrorist,,0,0,False,,,Flash,0,,-1137.7870,-757.97360,-1031.7970,-778.3727,,
5246454,esea_match_13829173.dem,22,1885.8760,Team 1,,76561197986109746,,Terrorist,,0,0,False,,,Flash,0,,-248.5423,-347.89890,-1126.4050,-728.1499,,
5246455,esea_match_13829173.dem,22,1888.1460,Team 1,,76561197986109746,,Terrorist,,0,0,False,,,Flash,0,,-356.5590,-339.70210,-682.2595,-1381.8450,,
5246456,esea_match_13829173.dem,22,1890.2750,Team 1,,76561198037412060,,Terrorist,,0,0,False,,,Molotov,0,,249.9704,-875.69920,-1126.4140,-587.8959,,


In [None]:
data[2] #kills

Unnamed: 0,file,round,tick,seconds,att_team,vic_team,att_side,vic_side,wp,wp_type,ct_alive,t_alive,is_bomb_planted
0,esea_match_13770997.dem,1,16058,30.74165,Team 1,Team 2,CounterTerrorist,Terrorist,USP,Pistol,5,4,False
1,esea_match_13770997.dem,1,16210,31.93185,Team 2,Team 1,Terrorist,CounterTerrorist,Glock,Pistol,4,4,False
2,esea_match_13770997.dem,1,16510,34.28094,Team 2,Team 1,Terrorist,CounterTerrorist,Glock,Pistol,3,4,False
3,esea_match_13770997.dem,1,17104,38.93212,Team 1,Team 2,CounterTerrorist,Terrorist,USP,Pistol,3,3,False
4,esea_match_13770997.dem,1,17338,40.76441,Team 2,Team 1,Terrorist,CounterTerrorist,Glock,Pistol,2,3,False
...,...,...,...,...,...,...,...,...,...,...,...,...,...
2742641,esea_match_13829173.dem,22,241483,30.63599,Team 2,Team 1,CounterTerrorist,Terrorist,AWP,Sniper,3,3,False
2742642,esea_match_13829173.dem,22,242773,40.73315,Team 1,Team 2,Terrorist,CounterTerrorist,AK47,Rifle,2,3,False
2742643,esea_match_13829173.dem,22,243361,45.33569,Team 1,Team 2,Terrorist,CounterTerrorist,AK47,Rifle,1,3,False
2742644,esea_match_13829173.dem,22,243405,45.68005,Team 2,Team 1,CounterTerrorist,Terrorist,AK47,Rifle,1,2,False


In [None]:
data[3] #metadata for each round

Unnamed: 0,file,map,round,start_seconds,end_seconds,winner_team,winner_side,round_type,ct_eq_val,t_eq_val
0,esea_match_13770997.dem,de_overpass,1,94.30782,160.9591,Team 2,Terrorist,PISTOL_ROUND,4300,4250
1,esea_match_13770997.dem,de_overpass,2,160.95910,279.3998,Team 2,Terrorist,ECO,6300,19400
2,esea_match_13770997.dem,de_overpass,3,279.39980,341.0084,Team 2,Terrorist,SEMI_ECO,7650,19250
3,esea_match_13770997.dem,de_overpass,4,341.00840,435.4259,Team 2,Terrorist,NORMAL,24900,23400
4,esea_match_13770997.dem,de_overpass,5,435.42590,484.2398,Team 1,CounterTerrorist,ECO,5400,20550
...,...,...,...,...,...,...,...,...,...,...
377624,esea_match_13829173.dem,de_mirage,18,1647.20600,1694.6550,Team 1,Terrorist,ECO,4500,21950
377625,esea_match_13829173.dem,de_mirage,19,1694.65500,1753.4380,Team 1,Terrorist,NORMAL,16400,26100
377626,esea_match_13829173.dem,de_mirage,20,1753.43800,1803.6580,Team 1,Terrorist,FORCE_BUY,15100,26000
377627,esea_match_13829173.dem,de_mirage,21,1803.65800,1856.0850,Team 1,Terrorist,NORMAL,17550,25500


In [None]:
raw_map_data  #map coordinates

Unnamed: 0.1,Unnamed: 0,EndX,EndY,ResX,ResY,StartX,StartY
0,de_cache,3752,3187,1024,1024,-2031,-2240
1,de_cbble,2282,3032,1024,1024,-3819,-3073
2,de_dust2,2127,3455,1024,1024,-2486,-1150
3,de_inferno,2797,3800,1024,1024,-1960,-1062
4,de_mirage,1912,1682,1024,1024,-3217,-3401
5,de_overpass,503,1740,1024,1024,-4820,-3591
6,de_train,2262,2447,1024,1024,-2436,-2469


### 4) Save preprocessed files to drive

In [None]:
%%time
data[0].to_csv('/content/drive/Shared drives/CS_GO_Data/processed_data_v2/damage_data.csv', index = False)
data[1].to_csv('/content/drive/Shared drives/CS_GO_Data/processed_data_v2/grenade_data.csv', index = False)
data[2].to_csv('/content/drive/Shared drives/CS_GO_Data/processed_data_v2/kill_data.csv', index = False)
data[3].to_csv('/content/drive/Shared drives/CS_GO_Data/processed_data_v2/meta_data.csv', index = False)
raw_map_data.to_csv('/content/drive/Shared drives/CS_GO_Data/processed_data_v2/map_data.csv', index = False)

CPU times: user 3min 9s, sys: 5.34 s, total: 3min 14s
Wall time: 3min 42s


### 5) load a file and check the contents

In [None]:
test_load = pd.read_csv('/content/drive/Shared drives/CS_GO_Data/processed_data_v2/meta_data.csv')

In [None]:
test_load

Unnamed: 0,file,map,round,start_seconds,end_seconds,winner_team,winner_side,round_type,ct_eq_val,t_eq_val
0,esea_match_13770997.dem,de_overpass,1,94.30782,160.9591,Team 2,Terrorist,PISTOL_ROUND,4300,4250
1,esea_match_13770997.dem,de_overpass,2,160.95910,279.3998,Team 2,Terrorist,ECO,6300,19400
2,esea_match_13770997.dem,de_overpass,3,279.39980,341.0084,Team 2,Terrorist,SEMI_ECO,7650,19250
3,esea_match_13770997.dem,de_overpass,4,341.00840,435.4259,Team 2,Terrorist,NORMAL,24900,23400
4,esea_match_13770997.dem,de_overpass,5,435.42590,484.2398,Team 1,CounterTerrorist,ECO,5400,20550
...,...,...,...,...,...,...,...,...,...,...
340328,esea_match_13829173.dem,de_mirage,18,1647.20600,1694.6550,Team 1,Terrorist,ECO,4500,21950
340329,esea_match_13829173.dem,de_mirage,19,1694.65500,1753.4380,Team 1,Terrorist,NORMAL,16400,26100
340330,esea_match_13829173.dem,de_mirage,20,1753.43800,1803.6580,Team 1,Terrorist,FORCE_BUY,15100,26000
340331,esea_match_13829173.dem,de_mirage,21,1803.65800,1856.0850,Team 1,Terrorist,NORMAL,17550,25500
