In [None]:
import pandas as pd
import numpy as np
import json

# Load the CSV file
file_path = 'playoff_series.csv'
df = pd.read_csv(file_path)

# Rename the columns and divide PTS by 100
df['PTS'] = df['PTS'] / 100
df = df.rename(columns={'PLAYER_ID': 'nba_id', 'PLAYER_NAME': 'player_name'})

# Select the specified columns
columns_to_keep = [
    'nba_id', 'player_name', 'year', 'round', 'OPP', 'USG_PCT', 'TS_PCT',
    'year_avg_ts', 'OPP_TS_PCT', 'rOPP_TS_PCT', 'GP', 'PTS', 'MIN', 'AGE',
    'AST_PCT', 'POSS', 'OFF_RATING', 'FGM', 'FGA', 'FG_PCT', 'FG3M', 'FG3A',
    'FG3_PCT', 'FTM', 'FTA', 'OREB', 'DREB', 'REB', 'AST', 'TOV', 'STL', 'BLK',
    'BLKA', 'PF', 'PFD', 'PLUS_MINUS'
]

df_selected = df[columns_to_keep].copy()

# Create TSA column
df_selected['TSA'] = df_selected['FGA'] + 0.44 * df_selected['FTA']


# Function to safely divide
def safe_divide(a, b):
    return np.where(b != 0, a / b, 0)


# Function for weighted average
def weighted_average(group, value_col, weight_col):
    return (group[value_col] *
            group[weight_col]).sum() / group[weight_col].sum()


# Group by nba_id and year to roll up stats to the year
grouped_df_corrected = df_selected.groupby(['nba_id', 'year']).sum({
    'player_name':
    'first',
    'round':
    lambda x: 'All',
    'OPP':
    lambda x: 'All',
    'AGE':
    'first',
    'PTS':
    'sum',
    'GP':
    'sum',
    'MIN':
    'sum',
    'POSS':
    'sum',
    'TSA':
    'sum',
    'FGM':
    'sum',
    'FGA':
    'sum',
    'FG_PCT':
    'mean',
    'FG3M':
    'sum',
    'FG3A':
    'sum',
    'FG3_PCT':
    'mean',
    'FTM':
    'sum',
    'FTA':
    'sum',
    'OREB':
    'sum',
    'DREB':
    'sum',
    'REB':
    'sum',
    'AST':
    'sum',
    'TOV':
    'sum',
    'STL':
    'sum',
    'BLK':
    'sum',
    'BLKA':
    'sum',
    'PF':
    'sum',
    'PFD':
    'sum',
    'PLUS_MINUS':
    'sum',
    'AST_PCT':
    lambda x: weighted_average(x, 'AST_PCT', 'POSS'),
    'OFF_RATING':
    lambda x: weighted_average(x, 'OFF_RATING', 'POSS'),
    'USG_PCT':
    lambda x: weighted_average(x, 'USG_PCT', 'POSS'),
    'TS_PCT':
    lambda x: weighted_average(x, 'TS_PCT', 'TSA'),
    'year_avg_ts':
    lambda x: weighted_average(x, 'year_avg_ts', 'TSA'),
    'OPP_TS_PCT':
    lambda x: weighted_average(x, 'OPP_TS_PCT', 'TSA'),
    'rOPP_TS_PCT':
    lambda x: weighted_average(x, 'rOPP_TS_PCT', 'TSA')
}).reset_index()

# Append the corrected rolled up rows to the original dataframe
df_final_corrected = pd.concat([df_selected, grouped_df_corrected],
                               ignore_index=True)

# Roll up stats for each player across their career
career_grouped_df = df_final_corrected[
    df_final_corrected['round'] == 'All'].groupby(['nba_id']).agg({
        'player_name':
        'first',
        'year':
        lambda x: 'Career',
        'round':
        lambda x: 'Career',
        'OPP':
        lambda x: 'All',
        'AGE':
        'first',
        'PTS':
        'sum',
        'GP':
        'sum',
        'MIN':
        'sum',
        'POSS':
        'sum',
        'TSA':
        'sum',
        'FGM':
        'sum',
        'FGA':
        'sum',
        'FG_PCT':
        'mean',
        'FG3M':
        'sum',
        'FG3A':
        'sum',
        'FG3_PCT':
        'mean',
        'FTM':
        'sum',
        'FTA':
        'sum',
        'OREB':
        'sum',
        'DREB':
        'sum',
        'REB':
        'sum',
        'AST':
        'sum',
        'TOV':
        'sum',
        'STL':
        'sum',
        'BLK':
        'sum',
        'BLKA':
        'sum',
        'PF':
        'sum',
        'PFD':
        'sum',
        'PLUS_MINUS':
        'sum',
        'AST_PCT':
        lambda x: weighted_average(x, 'AST_PCT', 'POSS'),
        'OFF_RATING':
        lambda x: weighted_average(x, 'OFF_RATING', 'POSS'),
        'USG_PCT':
        lambda x: weighted_average(x, 'USG_PCT', 'POSS'),
        'TS_PCT':
        lambda x: weighted_average(x, 'TS_PCT', 'TSA'),
        'year_avg_ts':
        lambda x: weighted_average(x, 'year_avg_ts', 'TSA'),
        'OPP_TS_PCT':
        lambda x: weighted_average(x, 'OPP_TS_PCT', 'TSA'),
        'rOPP_TS_PCT':
        lambda x: weighted_average(x, 'rOPP_TS_PCT', 'TSA')
    }).reset_index()

# Append the career rolled up rows to the original dataframe
df_final_with_career = pd.concat([df_final_corrected, career_grouped_df],
                                 ignore_index=True)

# Calculate derived statistics using safe division
df_final_with_career['MPG'] = safe_divide(df_final_with_career['MIN'],
                                          df_final_with_career['GP'])
df_final_with_career['PPG'] = safe_divide(df_final_with_career['PTS'],
                                          df_final_with_career['GP'])
df_final_with_career['PTS/75'] = safe_divide(df_final_with_career['PTS'],
                                             df_final_with_career['POSS']) * 75
df_final_with_career['AST/75'] = safe_divide(df_final_with_career['AST'],
                                             df_final_with_career['POSS']) * 75
df_final_with_career['REB/75'] = safe_divide(df_final_with_career['REB'],
                                             df_final_with_career['POSS']) * 75
df_final_with_career['TOV/75'] = safe_divide(df_final_with_career['TOV'],
                                             df_final_with_career['POSS']) * 75

df_final_with_career['TSA/100'] = safe_divide(
    df_final_with_career['TSA'] * 100, df_final_with_career['POSS'])
df_final_with_career['TS_ADD'] = df_final_with_career[
    'TSA'] * df_final_with_career['rOPP_TS_PCT'] * 2
df_final_with_career['TS_ADD_1'] = df_final_with_career['TSA'] * (
    df_final_with_career['rOPP_TS_PCT'] + 0.01) * 2
df_final_with_career['TS_ADD_PER_100'] = safe_divide(
    df_final_with_career['TS_ADD'] * 100, df_final_with_career['POSS'])
df_final_with_career['TS_ADD_1_PER_100'] = safe_divide(
    df_final_with_career['TS_ADD_1'] * 100, df_final_with_career['POSS'])

# ... rest of the code remains the same

# Separate the rows for 'Career'
df_career = df_final_with_career[df_final_with_career['round'] == 'Career']

# Separate the rows for 'All'
df_all = df_final_with_career[df_final_with_career['round'] == 'All']

# Get the other rounds
df_rounds = df_final_with_career[(df_final_with_career['round'] != 'All')
                                 & (df_final_with_career['round'] != 'Career')]

# Create a dictionary to hold the nested structure
nested_data = []

for index, row in df_all.iterrows():
    nba_id = row['nba_id']
    year = row['year']

    # Get the rounds for this player and year
    rounds = df_rounds[(df_rounds['nba_id'] == nba_id)
                       & (df_rounds['year'] == year)]

    # Convert the rounds to a list of dictionaries
    rounds_list = rounds.to_dict(orient='records')

    # Add the rounds to the 'All' row
    row['rounds'] = json.dumps(rounds_list)

    # Append to the nested data list
    nested_data.append(row)

# Convert the nested data list to a DataFrame
df_nested = pd.DataFrame(nested_data)

# Concatenate with the career rows
df_final_nested = pd.concat([df_nested, df_career], ignore_index=True)

# Save to CSV
output_path = 'nested_playoff_series.csv'
df_final_nested.to_csv(output_path, index=False)