In [13]:
import pandas as pd
import numpy as np
import time
from nba_api.stats.endpoints import leaguegamefinder, playergamelogs
from nba_api.stats.static import teams, players

# Constants
RELEVANT_STATS = ['PTS', 'AST', 'TOV', 'STL', 'BLK', 'OREB', 'DREB', 'FGM', 'FG3M', 'FGA']
PERCENTILE_THRESHOLDS = [1, 2, 3, 4, 5, 10, 25, 50]

def load_team_data():
    nba_teams = teams.get_teams()
    team_df = pd.DataFrame(nba_teams)
    return team_df[['id', 'full_name', 'abbreviation']]

# Helper Functions

def fetch_player_id_by_name(player_name, debug=False):
    """Fetch player ID based on player name."""
    try:
        player = players.find_players_by_full_name(player_name)[0]
        if debug:
            print(f"Fetched ID for player {player_name}: {player['id']}")
        return player['id']
    except Exception as e:
        if debug:
            print(f"Error fetching ID for player {player_name}: {e}")
        return None

def get_champion(season, debug=False):
    """Fetch the champion team for a given NBA season."""
    try:
        games = leaguegamefinder.LeagueGameFinder(season_nullable=season, season_type_nullable='Playoffs').get_data_frames()[0]
        games['GAME_DATE'] = pd.to_datetime(games['GAME_DATE'])
        last_game = games.sort_values('GAME_DATE').iloc[-2:]
        winner = last_game[last_game['WL'] == 'W'].iloc[0]
        if debug:
            print(f"Champion for season {season}: {winner['TEAM_NAME']} ({winner['TEAM_ID']})")
        return winner['TEAM_NAME']
    except Exception as e:
        if debug:
            print(f"Error fetching champion for season {season}: {e}")
        return None

def get_champions(start_year, end_year, debug=False):
    """Fetch champions for each season from start_year to end_year."""
    champions = []
    for year in range(start_year, end_year + 1):
        season = f"{year}-{str(year+1)[-2:]}"
        champ_name = get_champion(season, debug)
        if champ_name:
            champions.append({'Season': season, 'ChampionTeamName': champ_name})
        elif debug:
            print(f"Champion data not available for season {season}")
        time.sleep(1)  # To avoid overwhelming the API
    if debug:
        print(f"Champions data: {champions}")
    return pd.DataFrame(champions)

def calculate_average_top_percentiles(top_percentile_counts_df, debug=False):
    """Calculate the average percentiles for all champion teams."""
    average_percentiles = {}

    # Calculate average percentiles for each threshold
    for col in RELEVANT_STATS:
        for threshold in PERCENTILE_THRESHOLDS:
            count_key = f'{col}_Top_{threshold}_count'
            avg_key = f'{col}_Avg_Top_{threshold}_percentile'
            
            # Calculate the mean of counts across all seasons
            if count_key in top_percentile_counts_df.columns:
                avg_value = top_percentile_counts_df[count_key].mean()
            else:
                if debug:
                    print(f"Warning: {count_key} not found in top_percentile_counts columns.")
                avg_value = 0  # Set to 0 if not found
            
            avg_value = avg_value if pd.notnull(avg_value) else 0  # Ensure zero is shown if average is NaN
            average_percentiles[avg_key] = avg_value
            
            if debug:
                print(f"{col} Avg Top {threshold}% Count: {avg_value}")
    
    return pd.DataFrame([average_percentiles])

def calculate_champion_percentiles(league_percentiles, champions, debug=False):
    """Extract percentiles for players in champion teams based on league percentiles."""
    champion_data = league_percentiles[league_percentiles['TEAM_NAME'].isin(champions['ChampionTeamName'])].copy()
    if debug:
        print("Champion Data Percentiles:")
        print(champion_data.head())
    return champion_data

def fetch_all_player_data(seasons, debug=False):
    """Fetch player game logs data for all players across multiple seasons."""
    all_data = pd.DataFrame()
    for season in seasons:
        try:
            player_logs = playergamelogs.PlayerGameLogs(season_nullable=season).get_data_frames()[0]
            player_logs['SEASON'] = season
            all_data = pd.concat([all_data, player_logs], ignore_index=True)
            if debug:
                print(f"Fetched {len(player_logs)} player logs for the league in season {season}")
        except Exception as e:
            if debug:
                print(f"Error fetching player data for the league in season {season}: {e}")
    if debug:
        print(f"Total logs fetched: {len(all_data)}")
    return all_data

def calculate_player_stats(player_data, debug=False):
    """Calculate average player statistics from game logs."""
    # Calculate stats per game for players
    player_stats = player_data.groupby(['SEASON', 'TEAM_NAME', 'PLAYER_NAME'])[RELEVANT_STATS].mean().reset_index()
    
    # Rename columns to include '_per_game'
    for stat in RELEVANT_STATS:
        player_stats.rename(columns={stat: f'{stat}_per_game'}, inplace=True)

    if debug:
        print("Sample player stats (entire league):")
        print(player_stats.head())  # Show head of the player stats
    return player_stats

def calculate_percentiles(stats_df, debug=False):
    """Calculate percentile ranks for each stat in the DataFrame by season."""
    percentiles = {}

    for col in RELEVANT_STATS:
        col_per_game = f'{col}_per_game'
        if col_per_game in stats_df.columns:
            # Calculate percentiles across the entire dataset
            stats_df[f'{col}_percentile'] = stats_df[col_per_game].rank(pct=True, method='min')
            # Ensure no NaN values before calculating percentiles
            if not stats_df[col_per_game].isna().any():
                percentiles[col] = np.percentile(stats_df[col_per_game], [100 - t for t in PERCENTILE_THRESHOLDS])
            else:
                if debug:
                    print(f"NaN values found in {col_per_game} column.")
            if debug:
                print(f"Calculated percentiles for {col_per_game}:")
                print(stats_df[['TEAM_NAME', 'PLAYER_NAME', col_per_game, f'{col}_percentile']].head())
    return stats_df, percentiles

def count_top_percentiles(player_percentiles, percentiles, team_name, debug=False):
    """Count how many players in a specific team fall within top percentiles."""
    top_counts = {f'{stat}_Top_{threshold}_count': 0 for stat in RELEVANT_STATS for threshold in PERCENTILE_THRESHOLDS}
    team_data = player_percentiles[player_percentiles['TEAM_NAME'] == team_name]

    if debug:
        print(f"\n{team_name} player percentiles data:\n{team_data}")

    for col in RELEVANT_STATS:
        col_per_game = f'{col}_per_game'
        if col in percentiles:  # Ensure we have valid percentiles calculated
            for idx, threshold in enumerate(PERCENTILE_THRESHOLDS):
                count_key = f'{col}_Top_{threshold}_count'
                top_counts[count_key] = (team_data[col_per_game] >= percentiles[col][idx]).sum()

                if debug:
                    print(f"{col} Top {threshold}% Count: {top_counts[count_key]}")
    
    return top_counts

def simulate_trade(player_stats, players_from_team_a, players_from_team_b, team_a_name, team_b_name, debug=False):
    """Simulate a trade by swapping players between two teams."""
    # Swap players between the two teams
    player_stats.loc[player_stats['PLAYER_NAME'].isin(players_from_team_a), 'TEAM_NAME'] = team_b_name
    player_stats.loc[player_stats['PLAYER_NAME'].isin(players_from_team_b), 'TEAM_NAME'] = team_a_name
    
    if debug:
        print("\nAfter trade simulation:")
        print(player_stats[(player_stats['PLAYER_NAME'].isin(players_from_team_a + players_from_team_b))][['PLAYER_NAME', 'TEAM_NAME']])

    return player_stats

def create_comparison_table(before_trade, after_trade, average_percentiles, team_name):
    """Create a comparison table for a team before and after the trade."""
    data = {'Team': [team_name] * len(PERCENTILE_THRESHOLDS), 'Percentile': PERCENTILE_THRESHOLDS}
    
    for stat in RELEVANT_STATS:
        before_counts = [before_trade[f'{stat}_Top_{threshold}_count'] for threshold in PERCENTILE_THRESHOLDS]
        after_counts = [after_trade[f'{stat}_Top_{threshold}_count'] for threshold in PERCENTILE_THRESHOLDS]
        champ_avg = [average_percentiles[f'{stat}_Avg_Top_{threshold}_percentile'][0] for threshold in PERCENTILE_THRESHOLDS]
        
        data[f'{stat}_Before'] = before_counts
        data[f'{stat}_After'] = after_counts
        data[f'{stat}_Champ_Avg'] = champ_avg
    
    df = pd.DataFrame(data)
    df.set_index('Percentile', inplace=True)
    return df

def main(debug=False):
    seasons = ["2023-24"]  # Add more seasons as needed
    champion_info = get_champions(2023, 2023, debug)  # Fetch champions for specified seasons
    
    # Fetch player data for all specified seasons
    all_player_data = fetch_all_player_data(seasons, debug)
    
    # Calculate player-level stats
    player_stats = calculate_player_stats(all_player_data, debug)
    
    # Calculate percentiles for all players in the league
    league_percentiles, league_percentiles_ref = calculate_percentiles(player_stats, debug)
    
    # Calculate percentiles for champion teams based on the whole league
    champion_percentiles = calculate_champion_percentiles(league_percentiles, champion_info, debug)
    
    if debug:
        print("Champion Percentiles:")
        print(champion_percentiles)
    
    # Count top percentiles for each champion team and calculate averages
    top_percentile_counts = champion_percentiles.groupby('TEAM_NAME').apply(lambda x: count_top_percentiles(x, league_percentiles_ref, x.iloc[0]['TEAM_NAME'], debug)).apply(pd.Series)
    average_top_percentiles_df = calculate_average_top_percentiles(top_percentile_counts, debug)

    if debug:
        print("\nAverage Champion Percentiles:")
        print(average_top_percentiles_df)

    #------------------
    chosen_season = "2023-24"
    team_a_name = "Boston Celtics"
    team_b_name = "Golden State Warriors"

    # Fetch player data for the entire league
    all_player_data = fetch_all_player_data([chosen_season], debug)
    
    # Calculate player-level stats and percentiles
    player_stats = calculate_player_stats(all_player_data, debug)
    player_percentiles, percentiles = calculate_percentiles(player_stats, debug)
    
    # Count top percentiles before the trade
    print(f"\nBefore Trade:")
    team_a_top_percentile_counts = count_top_percentiles(player_percentiles, percentiles, team_a_name, debug)
    team_b_top_percentile_counts = count_top_percentiles(player_percentiles, percentiles, team_b_name, debug)

    # Simulate a trade between Boston Celtics and Golden State Warriors
    players_from_boston = ["Jaylen Brown", "Jayson Tatum"]  # Example player names from Boston Celtics
    players_from_warriors = ["Jordan Poole", "Kevon Looney"]  # Example player names from Golden State Warriors

    # Apply the trade
    player_stats = simulate_trade(player_stats, players_from_boston, players_from_warriors, team_a_name, team_b_name, debug)

    # Recalculate percentiles after the trade
    player_percentiles, percentiles = calculate_percentiles(player_stats, debug)

    # Count top percentiles after the trade
    print(f"\nAfter Trade:")
    team_a_top_percentile_counts_after = count_top_percentiles(player_percentiles, percentiles, team_a_name, debug)
    team_b_top_percentile_counts_after = count_top_percentiles(player_percentiles, percentiles, team_b_name, debug)

    # Create comparison tables with champion average percentiles
    celtics_comparison_table = create_comparison_table(team_a_top_percentile_counts, team_a_top_percentile_counts_after, average_top_percentiles_df, team_a_name)
    warriors_comparison_table = create_comparison_table(team_b_top_percentile_counts, team_b_top_percentile_counts_after, average_top_percentiles_df, team_b_name)
    
    # Display tables
    print("\nBoston Celtics Comparison Table:")
    print(celtics_comparison_table)
    
    print("\nGolden State Warriors Comparison Table:")
    print(warriors_comparison_table)

if __name__ == "__main__":
    # Set debug to True for detailed output, or False for summary output
    main(debug=True)


Champion for season 2015-16: Cleveland Cavaliers (1610612739)
Champion for season 2016-17: Golden State Warriors (1610612744)
Champion for season 2017-18: Golden State Warriors (1610612744)
Champion for season 2018-19: Toronto Raptors (1610612761)
Champion for season 2019-20: Los Angeles Lakers (1610612747)
Champion for season 2020-21: Milwaukee Bucks (1610612749)
Champion for season 2021-22: Golden State Warriors (1610612744)
Champion for season 2022-23: Denver Nuggets (1610612743)
Champion for season 2023-24: Boston Celtics (1610612738)
Champions data: [{'Season': '2015-16', 'ChampionTeamName': 'Cleveland Cavaliers'}, {'Season': '2016-17', 'ChampionTeamName': 'Golden State Warriors'}, {'Season': '2017-18', 'ChampionTeamName': 'Golden State Warriors'}, {'Season': '2018-19', 'ChampionTeamName': 'Toronto Raptors'}, {'Season': '2019-20', 'ChampionTeamName': 'Los Angeles Lakers'}, {'Season': '2020-21', 'ChampionTeamName': 'Milwaukee Bucks'}, {'Season': '2021-22', 'ChampionTeamName': 'Gold

In [11]:
# %%writefile ../src/salary_predict/updated/overall_team_trade_impact.py

import pandas as pd
import numpy as np
from nba_api.stats.endpoints import playergamelogs, leaguegamefinder
from tabulate import tabulate
import time
from nba_api.stats.endpoints import commonplayerinfo
from nba_api.stats.static import teams, players

# Constants
RELEVANT_STATS = ['PTS', 'AST', 'TOV', 'STL', 'BLK', 'OREB', 'DREB', 'FGM', 'FG3M', 'FGA']

def load_team_data():
    nba_teams = teams.get_teams()
    team_df = pd.DataFrame(nba_teams)
    return team_df[['id', 'full_name', 'abbreviation']]

def fetch_player_id_by_name(player_name, debug=False):
    """Fetch player ID based on player name."""
    try:
        player = players.find_players_by_full_name(player_name)[0]
        if debug:
            print(f"Fetched ID for player {player_name}: {player['id']}")
        return player['id']
    except Exception as e:
        if debug:
            print(f"Error fetching ID for player {player_name}: {e}")
        return None

def fetch_player_info(player_id, debug=False):
    """Fetch player information based on player ID."""
    try:
        player_info = commonplayerinfo.CommonPlayerInfo(player_id=player_id).get_data_frames()[0]
        if debug:
            print(f"Fetched info for player ID {player_id}: {player_info['DISPLAY_FIRST_LAST'].values[0]}")
        return player_info
    except Exception as e:
        if debug:
            print(f"Error fetching info for player ID {player_id}: {e}")
        return None

def fetch_season_data_by_year(year, debug=False):
    """Fetch player game logs data for a given starting year of the NBA season."""
    season = f"{year}-{str(year+1)[-2:]}"
    if debug:
        print(f"Fetching player data for season {season}")
    try:
        player_logs = playergamelogs.PlayerGameLogs(season_nullable=season).get_data_frames()[0]
        player_logs['SEASON'] = season
        player_logs['GAME_DATE'] = pd.to_datetime(player_logs['GAME_DATE'])
        if debug:
            print(f"Player data for season {season} contains {player_logs.shape[0]} rows.")
        return player_logs
    except Exception as e:
        if debug:
            print(f"Error fetching player data for season {season}: {e}")
        return None

# Helper Functions
def get_champion(season, debug=False):
    """Fetch the champion team for a given NBA season."""
    try:
        games = leaguegamefinder.LeagueGameFinder(season_nullable=season, season_type_nullable='Playoffs').get_data_frames()[0]
        games['GAME_DATE'] = pd.to_datetime(games['GAME_DATE'])
        last_game = games.sort_values('GAME_DATE').iloc[-2:]
        winner = last_game[last_game['WL'] == 'W'].iloc[0]
        if debug:
            print(f"Champion for season {season}: {winner['TEAM_NAME']} ({winner['TEAM_ID']})")
        return winner['TEAM_NAME']
    except Exception as e:
        if debug:
            print(f"Error fetching champion for season {season}: {e}")
        return None

def get_champions(start_year, end_year, debug=False):
    """Fetch champions for each season from start_year to end_year."""
    champions = {}
    for year in range(start_year, end_year + 1):
        season = f"{year}-{str(year+1)[-2:]}"
        champ_name = get_champion(season, debug)
        if champ_name:
            champions[season] = {'ChampionTeamName': champ_name}
        elif debug:
            print(f"Champion data not available for season {season}")
        time.sleep(1)  # To avoid overwhelming the API
    if debug:
        print(f"Champions data: {champions}")
    return champions

def calculate_percentiles(stats_df, debug=False):
    """Calculate percentiles for stats after averages are computed."""
    # Group by season and calculate percentiles for each season separately
    for season in stats_df['SEASON'].unique():
        season_data = stats_df[stats_df['SEASON'] == season]
        for stat in RELEVANT_STATS + ['eFG%']:
            stat_per_game = f'{stat}_per_game'
            if stat_per_game in season_data.columns:
                stats_df.loc[season_data.index, f'{stat}_percentile'] = season_data[stat_per_game].rank(pct=True)
                if debug:
                    print(f"Calculated percentiles for {stat} in season {season}:")
                    print(stats_df.loc[season_data.index, [stat_per_game, f'{stat}_percentile']].head())
    return stats_df

def calculate_team_stats(player_data, period, debug=False):
    """Calculate team-level statistics, including averages."""
    if debug:
        print(f"Calculating {period} team-level statistics.")
        print("Initial player_data head:")
        print(player_data.head())

    # Calculate team-level stats by summing player stats for each team and season
    team_stats = (
        player_data.groupby(['SEASON', 'TEAM_NAME'])[RELEVANT_STATS]
        .sum()
        .reset_index()
    )

    # Calculate the number of games played by each team
    games_played = player_data.groupby(['SEASON', 'TEAM_NAME'])['GAME_ID'].nunique().reset_index(name='GAMES_PLAYED')

    # Merge games played with team stats
    team_stats = pd.merge(team_stats, games_played, on=['SEASON', 'TEAM_NAME'])

    # Calculate stats per game
    for stat in RELEVANT_STATS:
        team_stats[f'{stat}_per_game'] = team_stats[stat] / team_stats['GAMES_PLAYED']

    # Add period column
    team_stats['PERIOD'] = period

    if debug:
        print(f"{period} team-level statistics head:")
        print(team_stats.head())

    return team_stats

def process_champion_team_data(player_data, champions, debug=False):
    """Process the game logs to get data for the champion teams."""
    champion_team_stats = pd.DataFrame()

    for season, champ_info in champions.items():
        champ_name = champ_info['ChampionTeamName']

        # Filter player data for champion team
        champ_data = player_data[(player_data['SEASON'] == season) & (player_data['TEAM_NAME'] == champ_name)]

        if champ_data.empty:
            if debug:
                print(f"No data found for champion team {champ_name} in season {season}")
            continue

        # Calculate team statistics
        champ_stats = calculate_team_stats(champ_data, 'Champion', debug)
        champ_stats['ChampionTeamName'] = champ_name

        champion_team_stats = pd.concat([champion_team_stats, champ_stats], ignore_index=True)

    # Calculate eFG%
    champion_team_stats['eFG%_per_game'] = (
        (champion_team_stats['FGM_per_game'] + 0.5 * champion_team_stats['FG3M_per_game']) / champion_team_stats['FGA_per_game']
    )

    # Calculate percentiles for champion teams within their season
    champion_team_stats = calculate_percentiles(champion_team_stats, debug)

    return champion_team_stats


def calculate_average_champion_stats(champion_team_data, debug=False):
    """Calculate the average statistics for all champion teams."""
    if debug:
        print("Calculating average champion team statistics.")
    
    # Calculate average stats for all champion teams
    avg_stats = champion_team_data[RELEVANT_STATS + [f'{stat}_per_game' for stat in RELEVANT_STATS] + ['eFG%_per_game']].mean()

    # Create a DataFrame for the average stats
    avg_row = pd.DataFrame([avg_stats], columns=champion_team_data.columns)
    avg_row['SEASON'] = 'Average'
    avg_row['TEAM_NAME'] = 'Average Champion'
    avg_row['PERIOD'] = 'Champion'
    avg_row['ChampionTeamName'] = 'Average Champion'

    # Append the average row to the champion team data
    champion_team_data = pd.concat([champion_team_data, avg_row], ignore_index=True)

    # Recalculate percentiles for champion teams within their data
    champion_team_data = calculate_percentiles(champion_team_data, debug)
    
    if debug:
        print("\nChampion Team Stats with Average:")
        print(tabulate(champion_team_data, headers='keys', tablefmt='grid'))

    # Return the updated champion data with the new average
    return champion_team_data

def compare_team_performance(percentiles, average_champion_stats, traded_teams, debug=False):
    """Generate a comparison table for team performance before and after trades."""
    if debug:
        print("Comparing team performance:")
        print("Percentiles data head:")
        print(percentiles.head())

    comparison_data = []
    
    for team in traded_teams:
        pre_trade_stats = percentiles[(percentiles['TEAM_NAME'] == team) & (percentiles['PERIOD'] == 'Pre-trade')]
        post_trade_stats = percentiles[(percentiles['TEAM_NAME'] == team) & (percentiles['PERIOD'] == 'Post-trade')]
        
        if not pre_trade_stats.empty and not post_trade_stats.empty:
            team_comparison = {'Team': team}
            for stat in RELEVANT_STATS + ['eFG%']:
                team_comparison[f'{stat} Pre-trade'] = pre_trade_stats[f'{stat}_per_game'].values[0]
                team_comparison[f'{stat} Pre-trade Percentile'] = pre_trade_stats[f'{stat}_percentile'].values[0]
                team_comparison[f'{stat} Post-trade'] = post_trade_stats[f'{stat}_per_game'].values[0]
                team_comparison[f'{stat} Post-trade Percentile'] = post_trade_stats[f'{stat}_percentile'].values[0]
                team_comparison[f'{stat} Champion'] = average_champion_stats[f'{stat}_per_game'].values[0]
            
            comparison_data.append(team_comparison)
        else:
            if debug:
                print(f"No data available for comparison for {team}.")
                print("Pre-trade stats head:")
                print(pre_trade_stats.head())
                print("Post-trade stats head:")
                print(post_trade_stats.head())

    comparison_df = pd.DataFrame(comparison_data)

    if debug:
        print("\nComparison Results:")
        print(tabulate(comparison_df, headers='keys', tablefmt='grid'))

    return comparison_df

def validate_post_trade_stats(player_data, trade_date, traded_teams, post_trade_stats, debug=False):
    """Validate the post-trade statistics calculation."""
    trade_date = pd.to_datetime(trade_date)
    post_trade_data = player_data[player_data['GAME_DATE'] >= trade_date]

    validation_results = {}

    for team in traded_teams:
        team_data = post_trade_data[post_trade_data['TEAM_NAME'] == team]
        
        total_points = team_data['PTS'].sum()
        games_played = team_data['GAME_ID'].nunique()
        calculated_ppg = total_points / games_played if games_played > 0 else 0

        reported_ppg = post_trade_stats[post_trade_stats['TEAM_NAME'] == team]['PTS_per_game'].values[0]

        validation_results[team] = {
            'Calculated PPG': calculated_ppg,
            'Reported PPG': reported_ppg,
            'Difference': calculated_ppg - reported_ppg,
            'Games Played': games_played
        }

    if debug:
        print("\nPost-Trade Statistics Validation:")
        print(tabulate(pd.DataFrame(validation_results).T, headers='keys', tablefmt='grid'))

    return validation_results

def calculate_post_trade_team_stats(player_data, traded_players, trade_date, season_data, debug=False):
    """Calculate post-trade team-level statistics, using entire season if necessary."""
    if debug:
        print("Calculating post-trade team-level statistics.")

    # Convert trade_date to datetime
    trade_date = pd.to_datetime(trade_date)

    # Determine the start of the season based on the SEASON column
    season_start_year = int(player_data['SEASON'].iloc[0].split('-')[0])
    season_start_date = pd.to_datetime(f"{season_start_year}-10-01")  # NBA season typically starts in October

    # Determine whether to use entire season data or data after trade date
    if trade_date < season_start_date:
        if debug:
            print(f"Warning: Trade date {trade_date} is earlier than the start of the season {season_start_date}. Using entire season data.")
        post_trade_data = season_data  # Use the entire season data
    else:
        post_trade_data = player_data[player_data['GAME_DATE'] >= trade_date].copy()

    if debug:
        print("Post-trade player data head:")
        print(post_trade_data.head())

    # Calculate post-trade stats
    post_trade_stats = calculate_team_stats(post_trade_data, 'Post-trade', debug)

    # Calculate traded players' post-trade averages
    traded_player_stats = {}
    for player_name, new_team_name in traded_players.items():
        player_id = fetch_player_id_by_name(player_name, debug)
        player_post_trade_stats = post_trade_data[post_trade_data['PLAYER_ID'] == player_id][RELEVANT_STATS].mean()
        traded_player_stats[player_name] = player_post_trade_stats.to_dict()
        if debug:
            print(f"{player_name} averages post-trade (to {new_team_name}): {traded_player_stats[player_name]}")

    # Adjust post-trade stats based on traded players
    for player_name, new_team_name in traded_players.items():
        player_id = fetch_player_id_by_name(player_name, debug)
        old_team_name = player_data[player_data['PLAYER_ID'] == player_id]['TEAM_NAME'].iloc[0]
        post_trade_games = post_trade_stats.loc[post_trade_stats['TEAM_NAME'] == new_team_name, 'GAMES_PLAYED'].values[0]
        
        if debug:
            print(f"\nAdjusting stats for trade: {player_name} from {old_team_name} to {new_team_name}")

        # Remove player's stats from old team
        for stat in RELEVANT_STATS:
            if debug:
                print(f"  Before adjustment - {old_team_name} {stat}: {post_trade_stats.loc[post_trade_stats['TEAM_NAME'] == old_team_name, stat].values[0]}")
            post_trade_stats.loc[post_trade_stats['TEAM_NAME'] == old_team_name, stat] -= traded_player_stats[player_name][stat] * post_trade_games
            if debug:
                print(f"  After adjustment - {old_team_name} {stat}: {post_trade_stats.loc[post_trade_stats['TEAM_NAME'] == old_team_name, stat].values[0]}")

        # Add player's stats to new team
        for stat in RELEVANT_STATS:
            if debug:
                print(f"  Before adjustment - {new_team_name} {stat}: {post_trade_stats.loc[post_trade_stats['TEAM_NAME'] == new_team_name, stat].values[0]}")
            post_trade_stats.loc[post_trade_stats['TEAM_NAME'] == new_team_name, stat] += traded_player_stats[player_name][stat] * post_trade_games
            if debug:
                print(f"  After adjustment - {new_team_name} {stat}: {post_trade_stats.loc[post_trade_stats['TEAM_NAME'] == new_team_name, stat].values[0]}")

    # Recalculate per-game stats
    for stat in RELEVANT_STATS:
        post_trade_stats[f'{stat}_per_game'] = post_trade_stats[stat] / post_trade_stats['GAMES_PLAYED']

    if debug:
        print("Post-trade team stats calculated successfully.")
        print("Post-trade team stats head:")
        print(post_trade_stats.head())

    return post_trade_stats

def main(debug=True):
    start_year = 2020
    end_year = 2023
    trade_date = '2023-12-20'  # Example trade date
    
    # Traded players with new team names
    traded_players = {
        'Jayson Tatum': 'Los Angeles Lakers',  # Example Player and new team
        'Austin Reaves': 'Boston Celtics'      # Example Player and new team
    }
    
    # Fetch champion data
    champions = get_champions(start_year, end_year, debug)
    
    # Fetch player data for each season
    player_data = pd.DataFrame()
    season_data = pd.DataFrame()  # To store the full season data
    for year in range(start_year, end_year + 1):
        data = fetch_season_data_by_year(year, debug)
        if data is not None:
            player_data = pd.concat([player_data, data], ignore_index=True)
            season_data = player_data  # Assuming season_data should hold the entire season's data

    if player_data.empty:
        print("Failed to fetch player data. Exiting.")
        return

    # Process champion team data
    champion_team_data = process_champion_team_data(player_data, champions, debug)

    # Calculate pre-trade and post-trade team statistics
    if player_data['GAME_DATE'].min() > pd.to_datetime(trade_date):
        pre_trade_team_stats = calculate_team_stats(player_data, 'Pre-trade', debug)
    else:
        pre_trade_team_stats = calculate_team_stats(player_data[player_data['GAME_DATE'] < pd.to_datetime(trade_date)], 'Pre-trade', debug)
        
    post_trade_team_stats = calculate_post_trade_team_stats(player_data, traded_players, trade_date, season_data, debug)

    # Combine pre-trade and post-trade stats
    combined_stats = pd.concat([pre_trade_team_stats, post_trade_team_stats], ignore_index=True)

    # Calculate eFG% for the combined dataset
    combined_stats['eFG%_per_game'] = (combined_stats['FGM_per_game'] + 0.5 * combined_stats['FG3M_per_game']) / combined_stats['FGA_per_game']

    # Calculate percentiles for the combined stats
    percentiles = calculate_percentiles(combined_stats, debug)
    
    if debug:
        print("\nCombined Team Stats and Percentiles:")
        print(tabulate(percentiles, headers='keys', tablefmt='grid'))
    
    # Calculate average champion stats
    average_champion_stats = calculate_average_champion_stats(champion_team_data, debug)

    # Compare pre-trade and post-trade stats for traded teams
    traded_teams = list(set([team_name for _, team_name in traded_players.items()]))
    comparison_table = compare_team_performance(percentiles, average_champion_stats, traded_teams, debug)
    
    # Print the comparison table
    if debug:
        print("\nTrade Impact Comparison:")
        print(tabulate(comparison_table, headers='keys', tablefmt='grid'))

    # Validate post-trade statistics
    validation_results = validate_post_trade_stats(player_data, trade_date, traded_teams, post_trade_team_stats, debug)

    return validation_results

if __name__ == "__main__":
    main(debug=True)

Champion for season 2020-21: Milwaukee Bucks (1610612749)
Champion for season 2021-22: Golden State Warriors (1610612744)
Champion for season 2022-23: Denver Nuggets (1610612743)
Champion for season 2023-24: Boston Celtics (1610612738)
Champions data: {'2020-21': {'ChampionTeamName': 'Milwaukee Bucks'}, '2021-22': {'ChampionTeamName': 'Golden State Warriors'}, '2022-23': {'ChampionTeamName': 'Denver Nuggets'}, '2023-24': {'ChampionTeamName': 'Boston Celtics'}}
Fetching player data for season 2020-21
Player data for season 2020-21 contains 23054 rows.
Fetching player data for season 2021-22
Player data for season 2021-22 contains 26039 rows.
Fetching player data for season 2022-23
Player data for season 2022-23 contains 25894 rows.
Fetching player data for season 2023-24
Player data for season 2023-24 contains 26401 rows.
Calculating Champion team-level statistics.
Initial player_data head:
    SEASON_YEAR  PLAYER_ID             PLAYER_NAME  NICKNAME     TEAM_ID  \
9       2020-21     2

  post_trade_stats.loc[post_trade_stats['TEAM_NAME'] == old_team_name, stat] -= traded_player_stats[player_name][stat] * post_trade_games
  post_trade_stats.loc[post_trade_stats['TEAM_NAME'] == old_team_name, stat] -= traded_player_stats[player_name][stat] * post_trade_games
  post_trade_stats.loc[post_trade_stats['TEAM_NAME'] == old_team_name, stat] -= traded_player_stats[player_name][stat] * post_trade_games
  post_trade_stats.loc[post_trade_stats['TEAM_NAME'] == old_team_name, stat] -= traded_player_stats[player_name][stat] * post_trade_games
  post_trade_stats.loc[post_trade_stats['TEAM_NAME'] == old_team_name, stat] -= traded_player_stats[player_name][stat] * post_trade_games
  post_trade_stats.loc[post_trade_stats['TEAM_NAME'] == old_team_name, stat] -= traded_player_stats[player_name][stat] * post_trade_games
  post_trade_stats.loc[post_trade_stats['TEAM_NAME'] == old_team_name, stat] -= traded_player_stats[player_name][stat] * post_trade_games
  post_trade_stats.loc[post_trade_

Calculated percentiles for PTS in season 2020-21:
   PTS_per_game  PTS_percentile
0       120.125             1.0
Calculated percentiles for AST in season 2020-21:
   AST_per_game  AST_percentile
0     25.472222             1.0
Calculated percentiles for TOV in season 2020-21:
   TOV_per_game  TOV_percentile
0     13.388889             1.0
Calculated percentiles for STL in season 2020-21:
   STL_per_game  STL_percentile
0         8.125             1.0
Calculated percentiles for BLK in season 2020-21:
   BLK_per_game  BLK_percentile
0      4.638889             1.0
Calculated percentiles for OREB in season 2020-21:
   OREB_per_game  OREB_percentile
0      10.291667              1.0
Calculated percentiles for DREB in season 2020-21:
   DREB_per_game  DREB_percentile
0      37.833333              1.0
Calculated percentiles for FGM in season 2020-21:
   FGM_per_game  FGM_percentile
0     44.736111             1.0
Calculated percentiles for FG3M in season 2020-21:
   FG3M_per_game  FG3M_perc

In [5]:
# %%writefile ../src/salary_predict/updated/nba_rules_trade_impact.py
# https://www.hoopsrumors.com/2023/09/salary-matching-rules-for-trades-during-2023-24-season.html

# for trade rules

import pandas as pd

# Constants for the 2023/24 season
FIRST_TAX_APRON_2023 = 172_346_000
SALARY_CAP_2023 = 136_021_000

# Percentages based on rules
UP_TO_7500K_MULTIPLIER = 2.0
UP_TO_7500K_BONUS = 250_000 / SALARY_CAP_2023

BETWEEN_7501K_AND_29M_BONUS = 7_500_000 / SALARY_CAP_2023

ABOVE_29M_MULTIPLIER = 1.25
ABOVE_29M_BONUS = 250_000 / SALARY_CAP_2023

ABOVE_FIRST_APRON_MULTIPLIER = 1.10

def check_salary_matching_rules(outgoing_salary, incoming_salary, team_salary_before_trade, salary_cap, first_tax_apron, debug=False):
    if debug:
        print(f"Debug: Checking salary matching rules:")
        print(f"  Outgoing Salary: ${outgoing_salary:,.2f}")
        print(f"  Incoming Salary: ${incoming_salary:,.2f}")
        print(f"  Team Salary Before Trade: ${team_salary_before_trade:,.2f}")
        print(f"  Salary Cap: ${salary_cap:,.2f}")
        print(f"  First Tax Apron: ${first_tax_apron:,.2f}")

    if team_salary_before_trade < first_tax_apron:
        if outgoing_salary <= 7_500_000:
            max_incoming_salary = (UP_TO_7500K_MULTIPLIER * outgoing_salary + UP_TO_7500K_BONUS * salary_cap)
            rule = "200% of outgoing + 250,000 (up to 7,500,000)"
            percentage_limit = (UP_TO_7500K_MULTIPLIER * outgoing_salary + UP_TO_7500K_BONUS * salary_cap) / outgoing_salary
        elif outgoing_salary <= 29_000_000:
            max_incoming_salary = outgoing_salary + BETWEEN_7501K_AND_29M_BONUS * salary_cap
            rule = "outgoing + 7,500,000 (7,500,001 to 29,000,000)"
            percentage_limit = (outgoing_salary + BETWEEN_7501K_AND_29M_BONUS * salary_cap) / outgoing_salary
        else:
            max_incoming_salary = (ABOVE_29M_MULTIPLIER * outgoing_salary + ABOVE_29M_BONUS * salary_cap)
            rule = "125% of outgoing + 250,000 (above 29,000,000)"
            percentage_limit = (ABOVE_29M_MULTIPLIER * outgoing_salary + ABOVE_29M_BONUS * salary_cap) / outgoing_salary
    else:
        max_incoming_salary = ABOVE_FIRST_APRON_MULTIPLIER * outgoing_salary
        rule = "110% of outgoing (above first tax apron)"
        percentage_limit = ABOVE_FIRST_APRON_MULTIPLIER

    if debug:
        print(f"  Max Incoming Salary Allowed: ${max_incoming_salary:,.2f}")
        print(f"  Rule Applied: {rule}")
        print(f"  Percentage Limit: {percentage_limit:.2f}")

    return incoming_salary <= max_incoming_salary, max_incoming_salary, rule, percentage_limit

def analyze_trade_scenario(player1, player2, predictions_df, season, debug=False):
    # Filter the dataframe for the specified season
    season_data = predictions_df[predictions_df['Season'] == season]

    # Determine salary cap and tax apron for the season
    salary_cap = SALARY_CAP_2023
    first_tax_apron = FIRST_TAX_APRON_2023

    player1_data = season_data[season_data['Player'] == player1]
    player2_data = season_data[season_data['Player'] == player2]

    if player1_data.empty or player2_data.empty:
        print(f"Could not find data for one or both players: {player1}, {player2}")
        return None

    # Extract relevant data
    player1_salary = player1_data['Salary'].values[0]
    player1_predicted_salary = player1_data['Predicted_Salary'].values[0] * salary_cap
    player2_salary = player2_data['Salary'].values[0]
    player2_predicted_salary = player2_data['Predicted_Salary'].values[0] * salary_cap

    # Check salary matching rules for both teams
    team1_salary_before_trade = season_data[season_data['Team'] == player1_data['Team'].values[0]]['Salary'].sum()
    team2_salary_before_trade = season_data[season_data['Team'] == player2_data['Team'].values[0]]['Salary'].sum()

    # Determine tax apron status
    team1_tax_apron_status = "Below" if team1_salary_before_trade < first_tax_apron else "Above"
    team2_tax_apron_status = "Below" if team2_salary_before_trade < first_tax_apron else "Above"

    trade_works_for_team1, team1_max_incoming_salary, team1_rule, team1_percentage_limit = check_salary_matching_rules(
        player1_salary, player2_salary, team1_salary_before_trade, salary_cap, first_tax_apron, debug
    )
    trade_works_for_team2, team2_max_incoming_salary, team2_rule, team2_percentage_limit = check_salary_matching_rules(
        player2_salary, player1_salary, team2_salary_before_trade, salary_cap, first_tax_apron, debug
    )

    if debug:
        print("\nDebug: Trade Analysis Results:")
        print(f"Player 1: {player1}")
        print(f"  Real Salary: ${player1_salary:,.2f}")
        print(f"  Predicted Salary: ${player1_predicted_salary:,.2f} (as a percentage of the salary cap)")
        print(f"  Team Salary Before Trade: ${team1_salary_before_trade:,.2f} ({team1_tax_apron_status} First Tax Apron)")
        print(f"  Max Incoming Salary Allowed: ${team1_max_incoming_salary:,.2f} (Rule: {team1_rule})")
        print(f"  Percentage Limit: {team1_percentage_limit:.2f}")

        print(f"Player 2: {player2}")
        print(f"  Real Salary: ${player2_salary:,.2f}")
        print(f"  Predicted Salary: ${player2_predicted_salary:,.2f} (as a percentage of the salary cap)")
        print(f"  Team Salary Before Trade: ${team2_salary_before_trade:,.2f} ({team2_tax_apron_status} First Tax Apron)")
        print(f"  Max Incoming Salary Allowed: ${team2_max_incoming_salary:,.2f} (Rule: {team2_rule})")
        print(f"  Percentage Limit: {team2_percentage_limit:.2f}")

    print(f"Trade Works for Team 1: {'Yes' if trade_works_for_team1 else 'No'}")
    if not trade_works_for_team1:
        print(f"  Trade fails for Team 1 because incoming salary exceeds max allowed under rule: {team1_rule}")
        print(f"  Team 1 is {team1_tax_apron_status} the First Tax Apron.")

    print(f"Trade Works for Team 2: {'Yes' if trade_works_for_team2 else 'No'}")
    if not trade_works_for_team2:
        print(f"  Trade fails for Team 2 because incoming salary exceeds max allowed under rule: {team2_rule}")
        print(f"  Team 2 is {team2_tax_apron_status} the First Tax Apron.")

    if trade_works_for_team1 and trade_works_for_team2:
        print("The trade is valid according to salary matching rules.")
    else:
        print("The trade does not satisfy salary matching rules.")

    return trade_works_for_team1 and trade_works_for_team2

if __name__ == "__main__":
    # Create a mock dataframe for predictions_df
    data = {
        'Season': [2023, 2023],
        'Player': ['Anthony Davis', 'Jayson Tatum'],
        'Team': ['LAL', 'BOS'],
        'Salary': [40_600_080, 32_600_060],
        'Predicted_Salary': [0.31, 0.24],  # Predicted salary as a percentage of the salary cap
        'Team Salary Before Trade': [167_302_433, 184_550_024]
    }

    predictions_df = pd.DataFrame(data)

    # Specify two players for the trade scenario
    player1_name = "Anthony Davis"
    player2_name = "Jayson Tatum"

    # Analyze the trade scenario for the specified season with debugging enabled
    season = 2023
    print(f"Analyzing trade for the {season} season:")
    analyze_trade_scenario(player1_name, player2_name, predictions_df, season, debug=False)


Analyzing trade for the 2023 season:
Trade Works for Team 1: Yes
Trade Works for Team 2: Yes
The trade is valid according to salary matching rules.
