# DATA CLEANING

This file should produce two dataframes for EACH GAME. One should be the overall team performance, with metrics like: 
The other should be individual player performance, with metrics like:

## Imports

In [1]:
import json
import pandas as pd

## Data Loading

In [2]:
with open('data/game_data_2022.json', 'r', encoding='utf-8') as f:
    raw_data = json.load(f)
raw_data = pd.DataFrame(raw_data)

## Rounds Dictionary

In [3]:
# Filter rows where roundStarted or roundEnded are not NaN
filtered_df = raw_data[(pd.notna(raw_data['roundStarted'])) | (pd.notna(raw_data['roundEnded']))]

rounds_dict = {}

# Loop through the filtered DataFrame
for index, row in filtered_df.iterrows():
    # Check if the round has started
    if pd.notna(row['roundStarted']):
        round_number = row['roundStarted'].get('roundNumber')
        if round_number:
            rounds_dict[round_number] = {'start_index': index, 'end_index': None}
    
    # Check if the round has ended
    if pd.notna(row['roundEnded']):
        round_number = row['roundEnded'].get('roundNumber')
        if round_number in rounds_dict:
            rounds_dict[round_number]['end_index'] = index

# Remove rounds that don't have both start and end indices
rounds_dict = {k: v for k, v in rounds_dict.items() if v['end_index'] is not None}

# Now rounds_dict will contain the start and end index for each round
print(rounds_dict)


{1: {'start_index': 35, 'end_index': 847}, 2: {'start_index': 860, 'end_index': 1046}, 3: {'start_index': 1059, 'end_index': 1271}, 4: {'start_index': 1284, 'end_index': 1571}, 5: {'start_index': 1584, 'end_index': 1871}, 6: {'start_index': 1884, 'end_index': 2183}, 7: {'start_index': 2196, 'end_index': 2407}, 8: {'start_index': 2420, 'end_index': 2715}, 9: {'start_index': 2728, 'end_index': 3061}, 10: {'start_index': 3074, 'end_index': 3421}, 11: {'start_index': 3434, 'end_index': 3900}, 13: {'start_index': 3917, 'end_index': 4492}, 14: {'start_index': 4505, 'end_index': 4780}, 15: {'start_index': 4793, 'end_index': 4977}, 16: {'start_index': 4990, 'end_index': 5207}, 17: {'start_index': 5220, 'end_index': 5476}, 18: {'start_index': 5489, 'end_index': 5767}, 19: {'start_index': 5780, 'end_index': 6337}}


## team_pf

In [4]:
winlossdata = raw_data['gameDecided'].dropna().values[0]

# Extract the first round's teams
first_round = winlossdata['spikeMode']['completedRounds'][0]
team_1_number = first_round['spikeModeResult']['attackingTeam']['value']
team_2_number = first_round['spikeModeResult']['defendingTeam']['value']

# Initialize cumulative metrics using Python lists
team_1_metrics = {'Team': team_1_number, 'Total Wins': 0, 'Attacking Half Wins': 0, 'Defending Half Wins': 0, 'Pistol Round Wins': 0}
team_2_metrics = {'Team': team_2_number, 'Total Wins': 0, 'Attacking Half Wins': 0, 'Defending Half Wins': 0, 'Pistol Round Wins': 0}

# Loop through the rounds to calculate cumulative metrics
for round_info in winlossdata['spikeMode']['completedRounds']:
    round_number = round_info['roundNumber']
    winning_team = round_info['winningTeam']['value']
    attacking_team = round_info['spikeModeResult']['attackingTeam']['value']
    defending_team = round_info['spikeModeResult']['defendingTeam']['value']

    # Determine if the round was won while attacking or defending
    if winning_team == attacking_team:
        if winning_team == team_1_number:
            team_1_metrics['Attacking Half Wins'] += 1
        else:
            team_2_metrics['Attacking Half Wins'] += 1
    elif winning_team == defending_team:
        if winning_team == team_1_number:
            team_1_metrics['Defending Half Wins'] += 1
        else:
            team_2_metrics['Defending Half Wins'] += 1

    # Increment total wins for the winning team
    if winning_team == team_1_number:
        team_1_metrics['Total Wins'] += 1
    else:
        team_2_metrics['Total Wins'] += 1

    # Check if it's a pistol round (round 1 or round 13)
    if round_number == 1 or round_number == 13:
        if winning_team == team_1_number:
            team_1_metrics['Pistol Round Wins'] += 1
        else:
            team_2_metrics['Pistol Round Wins'] += 1

# Combine metrics for both teams into a list
all_team_metrics = [team_1_metrics, team_2_metrics]

# Convert the list of dictionaries into a DataFrame
team_pf = pd.DataFrame(all_team_metrics)



In [5]:
team_pf

Unnamed: 0,Team,Total Wins,Attacking Half Wins,Defending Half Wins,Pistol Round Wins
0,21,13,9,4,2
1,22,7,4,3,0


## player_pf

In [6]:
# Initialize a dictionary to store player stats
player_metrics = {}

# Iterate over rounds_dict to extract relevant damage events and calculate metrics
for round_number, indices in rounds_dict.items():
    start_index = indices['start_index']
    end_index = indices['end_index']
    
    # Filter the rows of damage events within the round using vectorized indexing
    round_damage_events = raw_data.loc[start_index:end_index, 'damageEvent'].dropna()

    # Process each damage event and directly update player stats
    for event in round_damage_events:
        causer_id = event['causerId']['value']
        victim_id = event['victimId']['value']
        damage_amount = event['damageAmount']
        kill_event = event['killEvent']
        location = event['location']

        # Initialize or update stats for causer (the one dealing damage)
        if causer_id not in player_metrics:
            player_metrics[causer_id] = {
                'kills': 0, 'deaths': 0, 'damage_dealt': 0, 'damage_taken': 0, 'total_hits': 0, 'headshots': 0
            }

        # Initialize or update stats for victim (the one receiving damage)
        if victim_id not in player_metrics:
            player_metrics[victim_id] = {
                'kills': 0, 'deaths': 0, 'damage_dealt': 0, 'damage_taken': 0, 'total_hits': 0, 'headshots': 0
            }

        # Update causer's stats
        player_metrics[causer_id]['damage_dealt'] += damage_amount
        player_metrics[causer_id]['total_hits'] += 1
        
        if location == 'HEAD':
            player_metrics[causer_id]['headshots'] += 1

        if kill_event:
            player_metrics[causer_id]['kills'] += 1

        # Update victim's stats
        player_metrics[victim_id]['damage_taken'] += damage_amount
        if kill_event:
            player_metrics[victim_id]['deaths'] += 1

# Calculate headshot percentage for each player
for player_id, metrics in player_metrics.items():
    total_hits = metrics['total_hits']
    headshots = metrics['headshots']
    metrics['headshot_percentage'] = (headshots / total_hits * 100) if total_hits > 0 else 0

# Convert the player_metrics dictionary into a DataFrame for analysis
player_pf = pd.DataFrame.from_dict(player_metrics, orient='index').reset_index()
player_pf.rename(columns={'index': 'playerID'}, inplace=True)


In [20]:
final_row = raw_data['snapshot'].iloc[-1]

# Iterate through playertest['players'] and update player_metrics_df
for player in final_row['players']:
    player_id = player['playerId']['value']
    
    # Find the index of the player in player_metrics_df
    if player_id in player_pf['playerID'].values:
        # Get the index of the player to update their metrics
        player_index = player_pf[player_pf['playerID'] == player_id].index[0]

        # Update Assists and TotalScore in player_metrics_df
        player_pf.at[player_index, 'Assists'] = player['assists']
        player_pf.at[player_index, 'TotalScore'] = player['scores']['combatScore']['totalScore']


In [12]:
player_pf

Unnamed: 0,playerID,kills,deaths,damage_dealt,damage_taken,total_hits,headshots,headshot_percentage
0,10,20,18,2822.73664,2823.297726,59,15,25.423729
1,2,7,14,1318.44572,2030.1,30,4,13.333333
2,9,14,14,1998.442848,2244.164459,40,10,25.0
3,6,16,12,2331.223366,1858.332002,48,11,22.916667
4,5,19,15,2553.462225,2253.999974,34,17,50.0
5,3,15,17,1927.724972,2389.999943,34,15,44.117647
6,4,13,16,2383.175523,2107.332547,33,13,39.393939
7,7,13,12,1525.64093,1970.0,109,10,9.174312
8,1,16,15,2778.28042,2149.999989,99,14,14.141414
9,8,14,14,2252.908669,2064.814672,31,5,16.129032
