<a href="https://colab.research.google.com/github/gandhimonil9823/CS_GO_Analysis/blob/main/temp3.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [47]:
import pandas as pd
import numpy as np
import os

### Mount data from Google Drive

In [48]:
#mount data folder from google drive
from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


change dir and check contents

In [49]:
cd '/content/drive/Shared drives/CS_GO_Data'


/content/drive/Shared drives/CS_GO_Data


In [50]:
ls

'Application Alogrithm.gdoc'   de_train.png
 cs_cruise.png                 de_tulip.png
'data preprocessing.txt'       [0m[01;34mdont_use[0m/
 de_cache.png                  esea_master_dmg_demos.part1.csv
 de_cbble.png                  esea_master_dmg_demos.part2.csv
 de_coast.png                  esea_master_grenades_demos.part1.csv
 de_dust2.png                  esea_master_grenades_demos.part2.csv
 de_empire.png                 esea_master_kills_demos.part1.csv
 de_inferno.png                esea_master_kills_demos.part2.csv
 de_mikla.png                  esea_meta_demos.part1.csv
 de_mirage.png                 esea_meta_demos.part2.csv
 de_new_nuke.png              [01;34m'final_processed data'[0m/
 de_nuke.png                   map_data.csv
 de_overpass.png               [01;34mmaps[0m/
 de_royal.png                  [01;34mprocessed_data[0m/
 de_santorini.png              Updated_CS_GO_Analysis.ipynb
 de_season.png


### loaders

In [51]:
# Function to combine the dataframes and return a single dataframe
def combine_dataframes(df1,df2):
  df_to_return = df1.append(df2)
  df_to_return = df_to_return.reset_index(drop=True)
  return df_to_return

In [52]:
#load raw data
def load_raw_dmg():
  df1 = pd.read_csv('esea_master_dmg_demos.part1.csv')
  df2 = pd.read_csv('esea_master_dmg_demos.part2.csv')
  return combine_dataframes(df1,df2)

def load_raw_grenades():
  df1 = pd.read_csv('esea_master_grenades_demos.part1.csv')
  df2 = pd.read_csv('esea_master_grenades_demos.part2.csv')
  return combine_dataframes(df1,df2) 
  
def load_raw_kills():
  df1 = pd.read_csv('esea_master_kills_demos.part1.csv')
  df2 = pd.read_csv('esea_master_kills_demos.part2.csv')
  return combine_dataframes(df1,df2) 
  
def load_raw_meta():
  df1 = pd.read_csv('esea_meta_demos.part1.csv')
  df2 = pd.read_csv('esea_meta_demos.part2.csv')
  return combine_dataframes(df1,df2)

def load_raw_map_data():
  return pd.read_csv('map_data.csv')

def load_raw_all():
 return load_raw_dmg(), load_raw_grenades(), load_raw_kills(), load_raw_meta(), load_raw_map_data()

### load all data

In [None]:
%%time
raw_damage, raw_grenades, raw_kills, raw_meta, raw_map_data = load_raw_all()

In [None]:
#print the shape of the data
print('damage data:',raw_damage.shape)
print('grenades data:',raw_grenades.shape)
print('kills data:',raw_kills.shape)
print('meta data:',raw_meta.shape)
print('map data:',raw_map_data.shape)

### 1) Clean the raw data of corrupted data:

We noticed that in some of the lines of the kaggle dataset, the attacking side (att_side) and the victim side (vic_side) don't match the attacking team (att_team) and the victim team(vic_team). This inconsistency poisons not only that line, but also the round, and the entire match as a whole. Therefore, for any matches these corrupt lines should be removed from the dataset.

In [None]:
#identify and output a list of corrupt matches for given the damage dataframe
def find_inconsistent_games(df):
  #removal condition #1: team mismatch when the sides match
  mismatched_teams = raw_damage[(raw_damage.att_side == raw_damage.vic_side ) & (raw_damage.att_team != raw_damage.vic_team )]
  #removal condition #2: side mismatch when the teams match
  mismatched_sides = raw_damage[(raw_damage.att_side != raw_damage.vic_side ) & (raw_damage.att_team == raw_damage.vic_team )]

  df = combine_dataframes(mismatched_teams,mismatched_sides)
  strange_files = df['file'].unique()

  #other erronous matches that we identified 
  strange_files = np.append(strange_files, 'esea_match_13787633.dem') 
  strange_files = np.append(strange_files, 'esea_match_13784811.dem')

  strange_files = np.unique(strange_files)
  return strange_files


In [None]:
#removes the matches specified in match_list from the given list of dataframes
def remove_matches(inp_data, match_list):
  filtered_data = []
  for df in inp_data:
    filtered_data.append(df[~df['file'].isin(strange_files)])

  return filtered_data

In [None]:
data = [raw_damage, raw_grenades, raw_kills, raw_meta]
strange_files = find_inconsistent_games(raw_damage)

print('Percentage of matches removed:',(len(strange_files)/len(raw_meta))*100,'%')

data = remove_matches(data, strange_files)
# for df in data:
#   df = df[~df['file'].isin(strange_files)]
  #filtered.to_csv(f'/content/drive/Shared drives/CS_GO_Data/processed_data/filtered_{target}_demos.csv')

In [None]:
print('damage data:',data[0].shape)
print('grenades data:',data[1].shape)
print('kills data:',data[2].shape)
print('meta data:',data[3].shape)
print('map data:',raw_map_data.shape)

### 2) Anonymize the data: 

A small, but significant number of the matches record the team names of the teams. Replacing the team names with the more generic 'Team 1' and 'Team 2' Tags would maintain the anonymity of the data while removing the possibility of displaying inappropriate and confusing team names.

There are three sources of damage in this dataset:

this is represented by the variable att_side (attacker side). 

1) att_side = <"Enemy team's name"> :
When an individual takes damage from the actions of the enemy team member)

2) att_side = <"Own team's name"> :
In cases of friendly fire or self damage)

3) att_side = "World" :
When an individual takes damage from non-player objects present in the game. Examples include fall damage and collision with moving objects (if any). 

In [None]:
def remove_other_dmg_sources(df):
  #we need to temporarily remove the world damage to create a list of non-standard team namesdef delete_world_damage(df_esea_master_dmg_demos):
  only_enemy_dmg = df[df.att_side != 'None']
  
  #remove friendly/self damage records
  only_enemy_dmg = only_enemy_dmg[only_enemy_dmg.att_side != only_enemy_dmg.vic_side]
  
  #reset index
  only_enemy_dmg = only_enemy_dmg.reset_index(drop=True)
  return only_enemy_dmg

In [None]:
def find_rename_mask(df):
  #get the first enemy damage instance of each match. This row has the all the data we are looking for
  to_rename = df.groupby('file').first()

  #only select the relevant columns
  to_rename = to_rename[['att_team', 'vic_team','att_side','vic_side']] 

  # filter out rows(in this case, matches) where team names are the default 'Team 1' and 'Team 2' #only consider games where the team names aren't standard. Check both columns just in case
  to_rename = to_rename[((to_rename.att_team != 'Team 1') & (to_rename.att_team != 'Team 2')) | ((to_rename.vic_team != 'Team 1') & (to_rename.vic_team != 'Team 2'))]

  to_rename.reset_index(inplace=True)

  #create new columns for each default team name
  to_rename['Team 1'] = None
  to_rename['Team 2'] = None

  # the CounterTerriorist role is always given to Team 1 at round 1
  for index,row in to_rename.iterrows():
    if row['att_side'] == 'CounterTerrorist':
      row['Team 1'] = row['att_team']
      row['Team 2'] = row['vic_team']
    else:
      row['Team 1'] = row['vic_team']
      row['Team 2'] = row['att_team']

  to_rename.drop(columns=['att_team','vic_team','att_side','vic_side'], inplace=True) #drop the other columns #we only need file, Team1, Team2

  return to_rename

In [None]:
raw_meta

In [None]:
def rename_team_names(df,to_rename):
  for index,row in to_rename.iterrows():
    mask = (df['file'] == row['file'])
    df.loc[mask, 'att_team'] = df[mask]['att_team'].apply(lambda x: 'Team 1' if x == row['Team 1'] else 'Team 2')
    df.loc[mask, 'vic_team'] = df[mask]['vic_team'].apply(lambda x: 'Team 1' if x == row['Team 1'] else 'Team 2')
  return df

In [None]:
only_enemy_dmg = remove_other_dmg_sources(data[0])
to_rename = find_rename_mask(only_enemy_dmg)
print('Number of matches that have custom team names:', len(to_rename))

In [None]:
to_rename

In [None]:
data[0] = rename_team_names(data[0],to_rename)
data[1] = rename_team_names(data[1],to_rename)
data[2] = rename_team_names(data[2],to_rename)

In [None]:
data[0]

In [None]:
data[3]

In [None]:
def rename_meta_names(df,to_rename):
  for index,row in to_rename.iterrows():
    mask = (df['file'] == row['file'])
    df.loc[mask, 'winner_team'] = df[mask]['winner_team'].apply(lambda x: 'Team 1' if x == row['Team 1'] else 'Team 2')
  return df

In [None]:
data[3] = rename_meta_names(data[3],to_rename)


In [None]:
data[3]