In [9]:
%%writefile ../src/trade_impact/percentile_count_trade_impact.py

import pandas as pd
import numpy as np
import os
import time
from nba_api.stats.endpoints import leaguegamefinder, playergamelogs
from nba_api.stats.static import teams, players

# Constants
RELEVANT_STATS = ['PTS', 'AST', 'TOV', 'STL', 'BLK', 'OREB', 'DREB', 'FGM', 'FG3M', 'FGA']
PERCENTILE_THRESHOLDS = [1, 2, 3, 4, 5, 10, 25, 50]
CACHE_FILE_PATH = '../data/processed/top_percentile_cache.csv'

# Helper Functions
def load_team_data():
    nba_teams = teams.get_teams()
    team_df = pd.DataFrame(nba_teams)
    return team_df[['id', 'full_name', 'abbreviation']]

def load_saved_percentile_counts():
    """Load saved top percentile counts from a CSV cache file."""
    if os.path.exists(CACHE_FILE_PATH):
        return pd.read_csv(CACHE_FILE_PATH)
    else:
        return pd.DataFrame()  # Return empty DataFrame if no cache file exists

def save_percentile_counts(percentile_counts_df):
    """Save top percentile counts to a CSV cache file."""
    if not percentile_counts_df.empty:
        percentile_counts_df.to_csv(CACHE_FILE_PATH, index=False)
        print(f"Top percentile counts saved to {CACHE_FILE_PATH}")

def get_champion_for_percentile(season, debug=False):
    """Fetch the champion team for a given NBA season with cache and retries."""
    from trade_impact.utils.nba_api_utils import get_champion_team_name, normalize_season
    season_norm = normalize_season(season)
    winner = get_champion_team_name(season_norm, timeout=90, retries=3, use_live=True, debug=debug)
    if debug:
        print(f"Champion for season {season_norm}: {winner}")
    return winner


def get_champions_for_percentile(start_year, end_year, debug=False):
    """Fetch champions for each season from start_year to end_year."""
    champions = []
    for year in range(start_year, end_year + 1):
        season = f"{year}-{str(year+1)[-2:]}"
        champ_name = get_champion_for_percentile(season, debug)
        if champ_name:
            champions.append({'Season': season, 'ChampionTeamName': champ_name})
        elif debug:
            print(f"Champion data not available for season {season}")
        time.sleep(1)  # To avoid overwhelming the API
    return pd.DataFrame(champions)

# Updated get_champion_percentiles function to work with cached data
def get_champion_percentiles(seasons, debug=False):
    """Fetch or load champion percentiles for the given seasons."""
    saved_percentiles_df = load_saved_percentile_counts()
    
    # Determine which seasons are missing from the cache
    existing_seasons = saved_percentiles_df['Season'].unique() if not saved_percentiles_df.empty else []
    new_seasons = [season for season in seasons if season not in existing_seasons]
    
    if new_seasons:
        if debug:
            print(f"Fetching new data for seasons: {new_seasons}")
        
        # Fetch new data only for missing seasons
        champion_info = get_champions_for_percentile(int(new_seasons[0].split('-')[0]), int(new_seasons[-1].split('-')[0]), debug)
        player_stats, league_percentiles, league_percentiles_ref = fetch_and_process_season_data(new_seasons, debug)

        # Calculate champion percentiles for new seasons
        champion_percentiles = calculate_champion_percentiles(league_percentiles, champion_info, debug)
        new_top_percentile_counts = champion_percentiles.groupby(['TEAM_NAME', 'Season']).apply(
            lambda x: count_top_percentiles(x, league_percentiles_ref, x.iloc[0]['TEAM_NAME'], x.iloc[0]['Season'], debug)
        ).apply(pd.Series).reset_index()
        
        # Append new data to saved cache
        updated_percentiles_df = pd.concat([saved_percentiles_df, new_top_percentile_counts], ignore_index=True)
        
        # Save the updated cache
        save_percentile_counts(updated_percentiles_df)
    else:
        if debug:
            print(f"All requested seasons are already saved: {existing_seasons}")
        updated_percentiles_df = saved_percentiles_df
    
    # Filter the saved or updated data for the requested seasons
    filtered_percentiles_df = updated_percentiles_df[updated_percentiles_df['Season'].isin(seasons)]

    # Calculate the average percentiles across the requested seasons
    average_top_percentiles_df = calculate_average_top_percentiles(filtered_percentiles_df, debug)
    
    return average_top_percentiles_df

def calculate_champion_percentiles(league_percentiles, champions, debug=False):
    """Extract percentiles for players in champion teams based on league percentiles."""
    champion_data = league_percentiles[league_percentiles['TEAM_NAME'].isin(champions['ChampionTeamName'])].copy()
    
    # Merge with champions to get the Season associated with each champion team
    champion_data = pd.merge(champion_data, champions, left_on='TEAM_NAME', right_on='ChampionTeamName')
    
    if debug:
        print("Champion Data Percentiles with Season:")
        print(champion_data[['TEAM_NAME', 'Season', 'PLAYER_NAME']].head())
    
    return champion_data

def fetch_and_process_season_data(seasons, debug=False):
    """Fetch player data and process it to calculate stats and percentiles."""
    all_player_data = fetch_all_player_data(seasons, debug)
    
    # Calculate player-level stats
    player_stats = calculate_player_stats(all_player_data, debug)
    
    # Calculate percentiles for all players in the league
    league_percentiles, league_percentiles_ref = calculate_player_percentiles(player_stats, debug)
    
    return player_stats, league_percentiles, league_percentiles_ref

def fetch_all_player_data(seasons, debug=False):
    """Fetch player game logs data for all players across multiple seasons with retries/cache."""
    from trade_impact.utils.nba_api_utils import get_playergamelogs_df, normalize_season
    all_data = pd.DataFrame()
    for season in seasons:
        season_norm = normalize_season(season)
        try:
            logs = get_playergamelogs_df(season_norm, timeout=90, retries=3, use_live=True, debug=debug)
            all_data = pd.concat([all_data, logs], ignore_index=True)
            if debug:
                print(f"Fetched {len(logs)} player logs for the league in season {season_norm}")
            # gentle pacing to avoid rate limiting
            time.sleep(0.5)
        except Exception as e:
            if debug:
                print(f"Error fetching player data for season {season_norm}: {e}")
            # do not fill; continue so we can see exactly which seasons failed
    if debug:
        print(f"Total logs fetched across requested seasons: {len(all_data)}")
    return all_data


def calculate_player_stats(player_data, debug=False):
    """Calculate average player statistics from game logs."""
    # Calculate stats per game for players
    player_stats = player_data.groupby(['SEASON', 'TEAM_NAME', 'PLAYER_NAME'])[RELEVANT_STATS].mean().reset_index()
    
    # Rename columns to include '_per_game'
    for stat in RELEVANT_STATS:
        player_stats.rename(columns={stat: f'{stat}_per_game'}, inplace=True)

    if debug:
        print("Sample player stats (entire league):")
        print(player_stats.head())  # Show head of the player stats
    return player_stats

def calculate_player_percentiles(stats_df, debug=False):
    """Calculate percentile ranks for each stat in the DataFrame by season."""
    percentiles = {}

    for col in RELEVANT_STATS:
        col_per_game = f'{col}_per_game'
        if col_per_game in stats_df.columns:
            # Calculate percentiles across the entire dataset
            stats_df[f'{col}_percentile'] = stats_df[col_per_game].rank(pct=True, method='min')
            # Ensure no NaN values before calculating percentiles
            if not stats_df[col_per_game].isna().any():
                percentiles[col] = np.percentile(stats_df[col_per_game], [100 - t for t in PERCENTILE_THRESHOLDS])
            else:
                if debug:
                    print(f"NaN values found in {col_per_game} column.")
            if debug:
                print(f"Calculated percentiles for {col_per_game}:")
                print(stats_df[['TEAM_NAME', 'PLAYER_NAME', col_per_game, f'{col}_percentile']].head())
    return stats_df, percentiles

def count_top_percentiles(player_percentiles, percentiles, team_name, season, debug=False):
    """Count how many players in a specific team fall within top percentiles, filtered by season."""
    top_counts = {f'{stat}_Top_{threshold}_count': 0 for stat in RELEVANT_STATS for threshold in PERCENTILE_THRESHOLDS}
    
    # Filter the data by team and season
    team_data = player_percentiles[(player_percentiles['TEAM_NAME'] == team_name) & (player_percentiles['SEASON'] == season)]
    
    if debug:
        print(f"\n{team_name} player percentiles data for season {season}:\n{team_data[['PLAYER_NAME', 'FG3M_per_game', 'FG3M_percentile']]}")
    
    for col in RELEVANT_STATS:
        col_per_game = f'{col}_per_game'
        if col in percentiles:  # Ensure we have valid percentiles calculated
            for idx, threshold in enumerate(PERCENTILE_THRESHOLDS):
                count_key = f'{col}_Top_{threshold}_count'
                top_counts[count_key] = (team_data[col_per_game] >= percentiles[col][idx]).sum()

                if debug and col == 'FG3M':
                    print(f"{col} Top {threshold}% Count for season {season}: {top_counts[count_key]}")
                    print(f"Players in Top {threshold}% for {col} in season {season}: {team_data[team_data[col_per_game] >= percentiles[col][idx]][['PLAYER_NAME', col_per_game, f'{col}_percentile']]}")

    return top_counts

# Function to simulate trades and recalculate percentiles
def simulate_trade(player_stats, players_from_team_a, players_from_team_b, team_a_name, team_b_name, debug=False):
    """Simulate a trade by swapping players between two teams."""
    if debug:
        print("\nBefore trade simulation:")
        print(player_stats[(player_stats['PLAYER_NAME'].isin(players_from_team_a + players_from_team_b))][['PLAYER_NAME', 'TEAM_NAME']])
    
    # Swap players between the two teams
    player_stats.loc[player_stats['PLAYER_NAME'].isin(players_from_team_a), 'TEAM_NAME'] = team_b_name
    player_stats.loc[player_stats['PLAYER_NAME'].isin(players_from_team_b), 'TEAM_NAME'] = team_a_name
    
    if debug:
        print("\nAfter trade simulation:")
        print(player_stats[(player_stats['PLAYER_NAME'].isin(players_from_team_a + players_from_team_b))][['PLAYER_NAME', 'TEAM_NAME']])
    
    return player_stats

def calculate_average_top_percentiles(top_percentile_counts_df, debug=False):
    """Calculate the average percentiles for all champion teams, grouped by season."""
    average_percentiles = {}

    for col in RELEVANT_STATS:
        for threshold in PERCENTILE_THRESHOLDS:
            count_key = f'{col}_Top_{threshold}_count'
            avg_key = f'{col}_Avg_Top_{threshold}_percentile'
            
            # Calculate the mean of counts grouped by 'Season' and then average these means
            avg_value = top_percentile_counts_df.groupby('Season')[count_key].mean().mean()
            
            avg_value = avg_value if pd.notnull(avg_value) else 0
            average_percentiles[avg_key] = avg_value
            
            if debug:
                print(f"{col} Avg Top {threshold}% Count across seasons: {avg_value}")
    
    return pd.DataFrame([average_percentiles])

def create_comparison_table(before_trade, after_trade, average_percentiles, team_name):
    """Create a comparison table for a team before and after the trade."""
    data = {'Team': [team_name] * len(PERCENTILE_THRESHOLDS), 'Percentile': PERCENTILE_THRESHOLDS}
    
    for stat in RELEVANT_STATS:
        before_counts = [before_trade[f'{stat}_Top_{threshold}_count'] for threshold in PERCENTILE_THRESHOLDS]
        after_counts = [after_trade[f'{stat}_Top_{threshold}_count'] for threshold in PERCENTILE_THRESHOLDS]
        champ_avg = [average_percentiles[f'{stat}_Avg_Top_{threshold}_percentile'][0] for threshold in PERCENTILE_THRESHOLDS]
        
        data[f'{stat}_Before'] = before_counts
        data[f'{stat}_After'] = after_counts
        data[f'{stat}_Champ_Avg'] = champ_avg
    
    df = pd.DataFrame(data)
    df.set_index('Percentile', inplace=True)
    return df

def compare_teams_before_after_trade(season, team_a_name, team_b_name, players_from_team_a, players_from_team_b, debug=False):
    # Use cached champion percentile data
    player_stats, league_percentiles, league_percentiles_ref = fetch_and_process_season_data([season], debug)
    
    # Count top percentiles before the trade
    team_a_top_percentile_counts = count_top_percentiles(league_percentiles, league_percentiles_ref, team_a_name, season, debug)
    team_b_top_percentile_counts = count_top_percentiles(league_percentiles, league_percentiles_ref, team_b_name, season, debug)
    
    # Simulate the trade
    player_stats = simulate_trade(player_stats, players_from_team_a, players_from_team_b, team_a_name, team_b_name, debug)
    
    # Recalculate percentiles after the trade
    league_percentiles_after_trade, _ = calculate_player_percentiles(player_stats, debug)
    
    if debug:
        print("\nAfter trade percentiles calculation:")
        print(league_percentiles_after_trade[['TEAM_NAME', 'PLAYER_NAME', 'FG3M_per_game', 'FG3M_percentile']])
    
    # Count top percentiles after the trade
    team_a_top_percentile_counts_after = count_top_percentiles(league_percentiles_after_trade, league_percentiles_ref, team_a_name, season, debug)
    team_b_top_percentile_counts_after = count_top_percentiles(league_percentiles_after_trade, league_percentiles_ref, team_b_name, season, debug)
    
    return team_a_top_percentile_counts, team_a_top_percentile_counts_after, team_b_top_percentile_counts, team_b_top_percentile_counts_after

# Function to generate comparison tables using updated champion percentile data
def generate_comparison_tables(season, team_a_name, team_b_name, players_from_team_a, players_from_team_b, average_top_percentiles_df, debug=False):
    team_a_top_before, team_a_top_after, team_b_top_before, team_b_top_after = compare_teams_before_after_trade(
        season, team_a_name, team_b_name, players_from_team_a, players_from_team_b, debug
    )
    
    # Create comparison tables with champion average percentiles
    celtics_comparison_table = create_comparison_table(team_a_top_before, team_a_top_after, average_top_percentiles_df, team_a_name)
    warriors_comparison_table = create_comparison_table(team_b_top_before, team_b_top_after, average_top_percentiles_df, team_b_name)
    
    return celtics_comparison_table, warriors_comparison_table

def main(debug=False):
    seasons = ["2019-20", "2020-21", "2021-22", "2022-23", "2023-24"]

    # Fetch champion percentiles and calculate averages using cached data
    average_top_percentiles_df = get_champion_percentiles(seasons, debug)
    
    if debug:
        print("\nAverage Champion Percentiles:")
        print(average_top_percentiles_df)
    
    team_a_name = "Boston Celtics"
    team_b_name = "Atlanta Hawks"
    team_a_players = ["Jaylen Brown"]
    team_b_players = ["Trae Young"]
    
    # Generate comparison tables before and after the trade
    celtics_comparison_table, warriors_comparison_table = generate_comparison_tables(
        seasons[-1], team_a_name, team_b_name, team_a_players, team_b_players, average_top_percentiles_df, debug
    )
    
    # Display tables
    print("\nTeam A Comparison Table:")
    print(celtics_comparison_table)
    
    print("\nTeam B Comparison Table:")
    print(warriors_comparison_table)

if __name__ == "__main__":
    main(debug=True)


Overwriting ../src/trade_impact/percentile_count_trade_impact.py


# calculations

final comparison: 
pre trade games / post trade games / no trade games / pre trade metric / post trade metric / no trade metric / champion metic / pre trade percentile / post trade percentile / no trade percentile / champion percentile 


1. Pre-Trade Scenario:

    Definition: This period includes all games played by the teams before the trade date.
    Process:
        Data Collection: Filter the season data to include only games that occurred before the specified trade date.
        Statistical Calculations: Calculate the total points and the number of games played for each team during this period.
        Averaging: Compute the average points per game for each team during the pre-trade period.
        Percentile Ranking: Rank the teams based on their average points per game during this period.

2. Post-Trade Scenario:

    Definition: This period includes all games played by the teams after the trade date.
    Process:
        Data Collection: Filter the season data to include only games that occurred on or after the specified trade date.
        Player Averages: Calculate the average points for traded players based on their performance post-trade.
        Simulating Game Logs:
            Use the calculated player averages to simulate additional game logs for the post-trade period. This simulates the impact of the traded players joining new teams.
            Each game log is based on the new team's schedule after the trade date.
        Statistical Calculations:
            Combine the simulated game logs with the actual post-trade data.
            Calculate the total points and the number of games played by each team.
        Averaging: Compute the average points per game for each team during the post-trade period, including the simulated data.
        Percentile Ranking: Rank the teams based on their average points per game during this period.

3. No-Trade Scenario:

    Definition: This period considers the entire season as if no trades occurred.
    Process:
        Data Collection: Use the full season data, without filtering based on the trade date.
        Statistical Calculations: Calculate the total points and the number of games played for each team across the entire season.
        Averaging: Compute the average points per game for each team for the full season.
        Percentile Ranking: Rank the teams based on their average points per game for the entire season.

4. Final Comparison:

    Data Aggregation: Collect and organize the results from the pre-trade, post-trade, and no-trade scenarios for each team involved in the trade.
    Metrics Compared:
        Total Points: Sum of points scored during the respective period.
        Games Played: Number of games played during the respective period.
        Average Points per Game: Total points divided by the number of games played.
        Percentile Ranking: How each team's average points per game rank within the league during that period.
        Champion Metric: The average points per game of the championship team in that season, used as a benchmark for comparison.

5. Documentation:

    The rules and processes described above should be documented thoroughly to ensure clarity and reproducibility. This includes the rationale behind each step, the handling of edge cases (e.g., missing data, incomplete records), and any assumptions made during the analysis.

In [10]:
%%writefile ../src/trade_impact/overall_team_trade_impact.py

import pandas as pd
import numpy as np
from nba_api.stats.endpoints import playergamelogs, leaguegamefinder
from nba_api.stats.static import players, teams
import os
import pickle

# Set the cache directory and file path
CACHE_DIR = "../data/processed/"
CACHE_FILE = os.path.join(CACHE_DIR, "champion_stats_cache.pkl")

# Ensure the directory exists
if not os.path.exists(CACHE_DIR):
    os.makedirs(CACHE_DIR)

def load_cache():
    """Load the cached champion stats if available."""
    if os.path.exists(CACHE_FILE):
        with open(CACHE_FILE, 'rb') as f:
            cache = pickle.load(f)
        return cache
    return {}

def save_cache(cache):
    """Save the champion stats to cache."""
    with open(CACHE_FILE, 'wb') as f:
        pickle.dump(cache, f)

def get_champion(season, debug=False):
    """Fetch the champion team for a given NBA season (cached + retried)."""
    from trade_impact.utils.nba_api_utils import get_champion_team_name, normalize_season
    season_norm = normalize_season(season)
    winner = get_champion_team_name(season_norm, timeout=90, retries=3, use_live=True, debug=debug)
    if debug:
        print(f"Champion for season {season_norm}: {winner}")
    return winner


def get_champion_team_stats(seasons, relevant_stats, debug=False):
    """Fetch and process champion team stats for the selected seasons, using caching."""
    # Load the cache
    cache = load_cache()

    all_team_stats = pd.DataFrame()

    # List of seasons that need to be fetched (not in cache)
    missing_seasons = [season for season in seasons if season not in cache]

    if missing_seasons:
        if debug:
            print(f"Fetching data for missing seasons: {missing_seasons}")
        
        # Fetch data for missing seasons
        for season in missing_seasons:
            season_data = fetch_season_data_by_year(season, debug)
            if season_data is None:
                continue  # Skip if no data

            team_stats = calculate_team_stats(season_data, 'No-trade', relevant_stats, debug)
            team_stats = calculate_percentiles(team_stats, relevant_stats, debug)
            
            # Identify the champion team
            champ_name = get_champion(season, debug)
            if champ_name:
                champ_stats = team_stats[team_stats['TEAM_NAME'] == champ_name]
                cache[season] = champ_stats  # Store the stats in the cache

        # Save the updated cache
        save_cache(cache)

    # Collect data from the cache for the requested seasons
    for season in seasons:
        if season in cache:
            all_team_stats = pd.concat([all_team_stats, cache[season]])

    # Calculate average champion stats
    if not all_team_stats.empty:
        numeric_cols = all_team_stats.select_dtypes(include=[np.number]).columns
        average_champion = all_team_stats[numeric_cols].mean().to_frame().T
        average_champion['TEAM_NAME'] = 'Average Champion'
        average_champion['SEASON'] = 'Multiple Seasons'
        all_team_stats = pd.concat([all_team_stats, average_champion])

    return all_team_stats


def fetch_player_id_by_name(player_name, debug=False):
    try:
        player = players.find_players_by_full_name(player_name)[0]
        if debug:
            print(f"Fetched ID for player {player_name}: {player['id']}")
        return player['id']
    except Exception as e:
        if debug:
            print(f"Error fetching ID for player {player_name}: {e}")
        return None

def fetch_season_data_by_year(year, debug=False):
    """Fetch league-wide player game logs for a given year or season string using retries/cache."""
    from trade_impact.utils.nba_api_utils import get_playergamelogs_df, normalize_season
    season_norm = normalize_season(year)
    if debug:
        print(f"Fetching data for season: {season_norm}")
    try:
        player_logs = get_playergamelogs_df(season_norm, timeout=90, retries=3, use_live=True, debug=debug)
    except Exception as e:
        if debug:
            print(f"Error fetching data for season {season_norm}: {e}")
        return None
    player_logs['SEASON'] = season_norm
    player_logs['GAME_DATE'] = pd.to_datetime(player_logs['GAME_DATE'])
    if debug:
        print(f"Fetched season data with {len(player_logs)} records.")
    return player_logs



def calculate_team_stats(player_data, period, relevant_stats, debug=False):
    if player_data.empty:
        if debug:
            print(f"No data available for {period}. Returning 'N/A' values.")
        return pd.DataFrame({"SEASON": ["N/A"], "TEAM_NAME": ["N/A"], "GAMES_PLAYED": ["N/A"], **{f'{stat}_per_game': ["N/A"] for stat in relevant_stats}})

    missing_stats = [stat for stat in relevant_stats if stat not in player_data.columns]
    if missing_stats:
        raise KeyError(f"Missing columns in player_data: {missing_stats}")

    if debug:
        print(f"Calculating {period} team-level statistics.")
    
    valid_player_data = player_data.dropna(subset=relevant_stats)
    
    if valid_player_data.empty:
        if debug:
            print(f"No valid data after dropping NA for {period}. Returning 'N/A' values.")
        return pd.DataFrame({"SEASON": ["N/A"], "TEAM_NAME": ["N/A"], "GAMES_PLAYED": ["N/A"], **{f'{stat}_per_game': ["N/A"] for stat in relevant_stats}})
    
    team_stats = (
        valid_player_data.groupby(['SEASON', 'TEAM_NAME'])[relevant_stats]
        .sum()
        .reset_index()
    )
    
    games_played = valid_player_data.groupby(['SEASON', 'TEAM_NAME'])['GAME_ID'].nunique().reset_index(name='GAMES_PLAYED')
    
    team_stats = pd.merge(team_stats, games_played, on=['SEASON', 'TEAM_NAME'])
    for stat in relevant_stats:
        team_stats[f'{stat}_per_game'] = team_stats[stat] / team_stats['GAMES_PLAYED']
    
    team_stats['PERIOD'] = period
    
    if debug:
        print(f"{period} team-level statistics:")
        display_cols = ['SEASON', 'TEAM_NAME', 'GAMES_PLAYED'] + [f'{stat}_per_game' for stat in relevant_stats]
        print(team_stats[display_cols].head(), "\n")
    
    return team_stats


def calculate_percentiles(stats_df, relevant_stats, debug=False):
    if debug:
        print("Calculating percentiles for each team and season.\n")
    
    for season in stats_df['SEASON'].unique():
        season_data = stats_df[stats_df['SEASON'] == season]
        for stat in relevant_stats:
            stat_per_game = f'{stat}_per_game'
            if stat_per_game in season_data.columns:
                percentile_col = f'{stat}_percentile'
                stats_df.loc[stats_df['SEASON'] == season, percentile_col] = season_data[stat_per_game].rank(pct=True)
                if debug:
                    print(f"Calculated percentiles for {stat} in season {season}:")
                    print(stats_df.loc[stats_df['SEASON'] == season, [stat_per_game, percentile_col]].head(), "\n")
    
    return stats_df


def calculate_player_averages(post_trade_data, traded_players, relevant_stats, debug=False):
    player_averages = {}
    for player_name, new_team_name in traded_players.items():
        player_id = fetch_player_id_by_name(player_name, debug)
        if player_id is None:
            if debug:
                print(f"Skipping player {player_name} due to missing ID.")
            continue
        
        # Calculate average stats for the player post-trade
        player_data = post_trade_data[post_trade_data['PLAYER_ID'] == player_id]
        if player_data.empty:
            if debug:
                print(f"No post-trade data found for player {player_name}.")
            continue
        
        avg_stats = player_data[relevant_stats].mean()
        player_averages[player_id] = avg_stats
        if debug:
            print(f"Averages for {player_name} after trade: {avg_stats.to_dict()}")
    
    return player_averages

def simulate_game_logs(post_trade_data, player_averages, traded_players, no_trade_data, trade_date, relevant_stats, debug=False):
    simulated_logs_list = []  # Use a list to collect simulated logs
    
    for player_name, new_team_name in traded_players.items():
        player_id = fetch_player_id_by_name(player_name, debug)
        if player_id is None or player_id not in player_averages:
            if debug:
                print(f"Skipping simulation for player {player_name}.")
            continue
        
        # Remove original player's logs from the post-trade dataset
        post_trade_data = post_trade_data[post_trade_data['PLAYER_ID'] != player_id]
        
        # Get the team's unique schedule post-trade (one entry per game)
        team_schedule = no_trade_data[
            (no_trade_data['TEAM_NAME'] == new_team_name) & 
            (no_trade_data['GAME_DATE'] >= trade_date)
        ].drop_duplicates(subset=['GAME_ID', 'TEAM_NAME'])
        
        if team_schedule.empty:
            if debug:
                print(f"No games found for team {new_team_name} after trade date {trade_date}.")
            continue
        
        # Create simulated logs based on the player's average stats
        for _, game in team_schedule.iterrows():
            simulated_log = {
                'SEASON': game['SEASON'],
                'PLAYER_ID': player_id,
                'PLAYER_NAME': player_name,
                'TEAM_ID': game['TEAM_ID'],
                'TEAM_ABBREVIATION': game['TEAM_ABBREVIATION'],
                'TEAM_NAME': new_team_name,
                'GAME_ID': game['GAME_ID'],
                'GAME_DATE': game['GAME_DATE'],
                'MATCHUP': game['MATCHUP'],
                **{stat: player_averages[player_id][stat] for stat in relevant_stats}
            }
            simulated_logs_list.append(simulated_log)
        
        if debug:
            print(f"Simulated {len(team_schedule)} logs for {player_name} with {new_team_name}.\n")
    
    # Combine the simulated logs with the original post-trade data
    if simulated_logs_list:
        simulated_logs = pd.DataFrame(simulated_logs_list)
        if debug:
            print(f"Total simulated logs created: {len(simulated_logs)}")
            print(simulated_logs.head(), "\n")
        
        post_trade_data = pd.concat([post_trade_data, simulated_logs], ignore_index=True)
    
    if debug:
        print(f"Post-trade data now has {len(post_trade_data)} records after simulation.\n")
    
    return post_trade_data



def trade_impact_analysis(start_season, end_season, trade_date, traded_players, team_a_name, team_b_name, champion_seasons, relevant_stats, debug=False):
    player_data = pd.DataFrame()

    # Fetch full season data
    start_year = int(start_season.split('-')[0])
    end_year = int(end_season.split('-')[0])
    for season in range(start_year, end_year + 1):
        season_str = f"{season}-{str(season + 1)[-2:]}"
        data = fetch_season_data_by_year(season_str, debug)
        if data is not None:
            player_data = pd.concat([player_data, data], ignore_index=True)
    
    if debug:
        print(f"\nTotal player data records: {len(player_data)}")
        print(f"Sample data:\n{player_data.head()}\n")
    
    # Convert trade date to datetime
    trade_date = pd.to_datetime(trade_date)
    trade_month = trade_date.month
    
    # NBA season typically runs from October (month 10) to June (month 6)
    in_season_trade = trade_month in [10, 11, 12, 1, 2, 3, 4, 5, 6]

    # Determine if the trade is during the season or offseason
    if not in_season_trade:
        if debug:
            print("Trade date is in the offseason. Considering the full season for analysis.")
        pre_trade_data = pd.DataFrame()  # No pre-trade data since it's offseason
        post_trade_data = player_data.copy()  # Consider the full season as post-trade
    else:
        # Step 1: Create pre-trade and post-trade datasets
        pre_trade_data = player_data[player_data['GAME_DATE'] < trade_date].copy()
        post_trade_data = player_data[player_data['GAME_DATE'] >= trade_date].copy()

        if debug:
            pre_trade_points = pre_trade_data['PTS'].sum()
            pre_trade_games = pre_trade_data['GAME_ID'].nunique()
            print("Pre-trade Dataset:")
            print(f"Total Points: {pre_trade_points}")
            print(f"Total Games Played: {pre_trade_games}")
        
            post_trade_points = post_trade_data['PTS'].sum()
            post_trade_games = post_trade_data['GAME_ID'].nunique()
            print("Post-trade Dataset:")
            print(f"Total Points: {post_trade_points}")
            print(f"Total Games Played: {post_trade_games}\n")
    
    if pre_trade_data.empty and in_season_trade:
        print("No data available before the trade date.")
        pre_trade_stats = pd.DataFrame({"TEAM_NAME": [team_a_name, team_b_name], "GAMES_PLAYED": ["N/A", "N/A"], **{f"{stat}_per_game": ["N/A", "N/A"] for stat in relevant_stats}})
    else:
        pre_trade_stats = calculate_team_stats(pre_trade_data, 'Pre-trade', relevant_stats, debug)
        pre_trade_stats = calculate_percentiles(pre_trade_stats, relevant_stats, debug)

    # Get champion data for the selected seasons
    champion_team_data = get_champion_team_stats(champion_seasons, relevant_stats, debug)
    
    # Step 2: Calculate and simulate player averages post-trade
    for player_name, new_team_name in traded_players.items():
        player_averages = calculate_player_averages(post_trade_data, {player_name: new_team_name}, relevant_stats, debug)
        post_trade_data = simulate_game_logs(post_trade_data, player_averages, {player_name: new_team_name}, player_data, trade_date, relevant_stats, debug)
    
    # Step 3: Recalculate team statistics after all player simulations
    post_trade_stats = calculate_team_stats(post_trade_data, 'Post-trade', relevant_stats, debug)
    post_trade_stats = calculate_percentiles(post_trade_stats, relevant_stats, debug)

    no_trade_stats = calculate_team_stats(player_data, 'No-trade', relevant_stats, debug)
    no_trade_stats = calculate_percentiles(no_trade_stats, relevant_stats, debug)

    overall_trade_stats = calculate_team_stats(pd.concat([pre_trade_data, post_trade_data]), 'Overall-trade', relevant_stats, debug)
    overall_trade_stats = calculate_percentiles(overall_trade_stats, relevant_stats, debug)
    
    # Step 4: Final Comparison - Create a separate table for each metric
    comparison_tables = {}

    for stat in relevant_stats:
        comparison_data = []

        for team in [team_a_name, team_b_name]:
            pre_trade = pre_trade_stats[pre_trade_stats['TEAM_NAME'] == team]
            post_trade = post_trade_stats[post_trade_stats['TEAM_NAME'] == team]
            no_trade = no_trade_stats[no_trade_stats['TEAM_NAME'] == team]
            overall_trade = overall_trade_stats[overall_trade_stats['TEAM_NAME'] == team]
            
            if pre_trade.empty:
                pre_trade = pd.DataFrame({"GAMES_PLAYED": ["N/A"], f'{stat}_per_game': ["N/A"], f'{stat}_percentile': ["N/A"]})
            if post_trade.empty:
                post_trade = pd.DataFrame({"GAMES_PLAYED": ["N/A"], f'{stat}_per_game': ["N/A"], f'{stat}_percentile': ["N/A"]})
            if no_trade.empty:
                no_trade = pd.DataFrame({"GAMES_PLAYED": ["N/A"], f'{stat}_per_game': ["N/A"], f'{stat}_percentile': ["N/A"]})
            if overall_trade.empty:
                overall_trade = pd.DataFrame({"GAMES_PLAYED": ["N/A"], f'{stat}_per_game': ["N/A"], f'{stat}_percentile': ["N/A"]})

            # Build the comparison data
            comparison_entry = {
                'Team': team,
                'Pre-trade Metric': pre_trade[f'{stat}_per_game'].values[0],
                'Post-trade Metric': post_trade[f'{stat}_per_game'].values[0],
                'Overall-trade Metric': overall_trade[f'{stat}_per_game'].values[0],
                'No-trade Metric': no_trade[f'{stat}_per_game'].values[0],
                'Champion Metric': champion_team_data[champion_team_data['TEAM_NAME'] == 'Average Champion'][f'{stat}_per_game'].max(),
                'Pre-trade Percentile': pre_trade[f'{stat}_percentile'].values[0],
                'Post-trade Percentile': post_trade[f'{stat}_percentile'].values[0],
                'Overall-trade Percentile': overall_trade[f'{stat}_percentile'].values[0],
                'No-trade Percentile': no_trade[f'{stat}_percentile'].values[0],
                'Champion Percentile': champion_team_data[champion_team_data['TEAM_NAME'] == 'Average Champion'][f'{stat}_percentile'].max(),
            }
            
            # Optionally include games and totals if debug is true
            if debug:
                comparison_entry.update({
                    'Pre-trade Games': pre_trade['GAMES_PLAYED'].values[0],
                    'Post-trade Games': post_trade['GAMES_PLAYED'].values[0],
                    'Overall-trade Games': overall_trade['GAMES_PLAYED'].values[0],
                    'No-trade Games': no_trade['GAMES_PLAYED'].values[0],
                    'Pre-trade Total': pre_trade[stat].sum() if stat in pre_trade.columns else "N/A",
                    'Post-trade Total': post_trade[stat].sum() if stat in post_trade.columns else "N/A",
                    'Overall-trade Total': overall_trade[stat].sum() if stat in overall_trade.columns else "N/A",
                    'No-trade Total': no_trade[stat].sum() if stat in no_trade.columns else "N/A",
                })

            comparison_data.append(comparison_entry)
        
        comparison_tables[stat] = pd.DataFrame(comparison_data)
        
        if debug:
            print(f"Comparison Table for {stat}:")
            print(comparison_tables[stat], "\n")
    
    return comparison_tables




def main(debug=True):
    start_season = "2023-24"
    end_season = "2023-24"
    trade_date = '2023-09-20'  # Adjusted trade date to be within the season
    
    # Team A and Team B selection
    team_a_name = "Dallas Mavericks"
    team_b_name = "Charlotte Hornets"
    
    # Fetch players for each team
    players_from_team_a = get_players_for_team(team_a_name, start_season)
    players_from_team_b = get_players_for_team(team_b_name, start_season)
    
    # Player selection - ensure these are split into lists
    selected_players_team_a = ["Grant Williams", "Seth Curry"]  # List of players from Dallas Mavericks
    selected_players_team_b = ["P.J. Washington"]  # List of players from Charlotte Hornets
    
    # Combine selected players into the traded_players dictionary
    traded_players = {player: team_b_name for player in selected_players_team_a}
    traded_players.update({player: team_a_name for player in selected_players_team_b})
    
    # Specify the seasons to consider for champions
    champion_seasons = ["2014-15", "2015-16", "2016-17", "2017-18", "2018-19", "2019-20", "2021-22", "2022-23", "2023-24"]
    
    # Adjust the relevant stats to analyze
    relevant_stats = ['PTS']  # This can be modified
    
    # Perform the trade impact analysis
    comparison_tables = trade_impact_analysis(
        start_season, end_season, trade_date, traded_players, team_a_name, team_b_name, champion_seasons, relevant_stats, debug=debug
    )
    
    # Print all comparison tables
    for stat, table in comparison_tables.items():
        print(f"Comparison Table for {stat}:")
        print(table)

if __name__ == "__main__":
    main(debug=True)




Overwriting ../src/trade_impact/overall_team_trade_impact.py


https://www.hoopsrumors.com/2023/09/salary-matching-rules-for-trades-during-2023-24-season.html

for trade rules


FIRST_TAX_APRON = 172_346_000

def check_salary_matching_rules(outgoing_salary, incoming_salary, team_salary_before_trade):
    if team_salary_before_trade < FIRST_TAX_APRON:
        if outgoing_salary <= 7_500_000:
            max_incoming_salary = 2 * outgoing_salary + 250_000
        elif outgoing_salary <= 29_000_000:
            max_incoming_salary = outgoing_salary + 7_500_000
        else:
            max_incoming_salary = 1.25 * outgoing_salary + 250_000
    else:
        max_incoming_salary = 1.10 * outgoing_salary

    return incoming_salary <= max_incoming_salary

In [11]:
%%writefile ../src/trade_impact/nba_rules_trade_impact.py

# https://www.hoopsrumors.com/2023/09/salary-matching-rules-for-trades-during-2023-24-season.html

import pandas as pd

# Constants for the 2023/24 season
FIRST_TAX_APRON_2023 = 172_346_000
SALARY_CAP_2023 = 136_021_000

# Percentages based on rules
UP_TO_7500K_MULTIPLIER = 2.0
UP_TO_7500K_BONUS = 250_000 / SALARY_CAP_2023
BETWEEN_7501K_AND_29M_BONUS = 7_500_000 / SALARY_CAP_2023
ABOVE_29M_MULTIPLIER = 1.25
ABOVE_29M_BONUS = 250_000 / SALARY_CAP_2023
ABOVE_FIRST_APRON_MULTIPLIER = 1.10

def check_salary_matching_rules(outgoing_salary, incoming_salary, team_salary_before_trade, salary_cap, first_tax_apron, debug=False):
    debug_info = []
    if debug:
        debug_info.append(f"Debug: Checking salary matching rules:")
        debug_info.append(f"  Outgoing Salary: ${outgoing_salary:,.2f}")
        debug_info.append(f"  Incoming Salary: ${incoming_salary:,.2f}")
        debug_info.append(f"  Team Salary Before Trade: ${team_salary_before_trade:,.2f}")
        debug_info.append(f"  Salary Cap: ${salary_cap:,.2f}")
        debug_info.append(f"  First Tax Apron: ${first_tax_apron:,.2f}")

    if team_salary_before_trade < first_tax_apron:
        if outgoing_salary <= 7_500_000:
            max_incoming_salary = (UP_TO_7500K_MULTIPLIER * outgoing_salary + UP_TO_7500K_BONUS * salary_cap)
            rule = "200% of outgoing + 250,000 (up to 7,500,000)"
            percentage_limit = (UP_TO_7500K_MULTIPLIER * outgoing_salary + UP_TO_7500K_BONUS * salary_cap) / outgoing_salary
        elif outgoing_salary <= 29_000_000:
            max_incoming_salary = outgoing_salary + BETWEEN_7501K_AND_29M_BONUS * salary_cap
            rule = "outgoing + 7,500,000 (7,500,001 to 29,000,000)"
            percentage_limit = (outgoing_salary + BETWEEN_7501K_AND_29M_BONUS * salary_cap) / outgoing_salary
        else:
            max_incoming_salary = (ABOVE_29M_MULTIPLIER * outgoing_salary + ABOVE_29M_BONUS * salary_cap)
            rule = "125% of outgoing + 250,000 (above 29,000,000)"
            percentage_limit = (ABOVE_29M_MULTIPLIER * outgoing_salary + ABOVE_29M_BONUS * salary_cap) / outgoing_salary
    else:
        max_incoming_salary = ABOVE_FIRST_APRON_MULTIPLIER * outgoing_salary
        rule = "110% of outgoing (above first tax apron)"
        percentage_limit = ABOVE_FIRST_APRON_MULTIPLIER

    if debug:
        debug_info.append(f"  Max Incoming Salary Allowed: ${max_incoming_salary:,.2f}")
        debug_info.append(f"  Rule Applied: {rule}")
        debug_info.append(f"  Percentage Limit: {percentage_limit:.2f}")

    return incoming_salary <= max_incoming_salary, max_incoming_salary, rule, percentage_limit, "\n".join(debug_info)

def analyze_trade_scenario(players1, players2, predictions_df, season, debug=False):
    debug_info = []

    # Convert the season (e.g., 2023-24) to just the first four characters (e.g., 2023)
    season_filter = str(season)[:4]

    # Ensure the Season column is treated as a string
    predictions_df['Season'] = predictions_df['Season'].astype(str)
    
    # Filter the dataframe for the specified season
    season_data = predictions_df[predictions_df['Season'].str.startswith(season_filter)]

    # Ensure all players in each list are from the same team
    teams1 = season_data[season_data['Player'].isin(players1)]['Team'].unique()
    teams2 = season_data[season_data['Player'].isin(players2)]['Team'].unique()

    # Check if players are found in the dataset
    for player in players1:
        if player not in season_data['Player'].values:
            return None, f"Error: Player {player} is not found in the dataset."

    for player in players2:
        if player not in season_data['Player'].values:
            return None, f"Error: Player {player} is not found in the dataset."

    if len(teams1) != 1:
        missing_players = [player for player in players1 if player not in season_data[season_data['Team'].isin(teams1)]['Player'].values]
        if missing_players:
            return None, f"Error: Player {', '.join(missing_players)} not found on team {teams1[0]}."
        else:
            return None, "Error: All players in the first list must be from the same team."

    if len(teams2) != 1:
        missing_players = [player for player in players2 if player not in season_data[season_data['Team'].isin(teams2)]['Player'].values]
        if missing_players:
            return None, f"Error: Player {', '.join(missing_players)} not found on team {teams2[0]}."
        else:
            return None, "Error: All players in the second list must be from the same team."

    team1 = teams1[0]
    team2 = teams2[0]

    if team1 == team2:
        return None, "Error: The two teams involved in the trade must be different."

    # Calculate total salaries for each group of players
    outgoing_salary_team1 = season_data[season_data['Player'].isin(players1)]['Salary'].sum()
    incoming_salary_team1 = season_data[season_data['Player'].isin(players2)]['Salary'].sum()

    outgoing_salary_team2 = season_data[season_data['Player'].isin(players2)]['Salary'].sum()
    incoming_salary_team2 = season_data[season_data['Player'].isin(players1)]['Salary'].sum()

    # Check salary matching rules for both teams
    team1_salary_before_trade = season_data[season_data['Team'] == team1]['Salary'].sum()
    team2_salary_before_trade = season_data[season_data['Team'] == team2]['Salary'].sum()

    # Determine tax apron status
    team1_tax_apron_status = "Below" if team1_salary_before_trade < FIRST_TAX_APRON_2023 else "Above"
    team2_tax_apron_status = "Below" if team2_salary_before_trade < FIRST_TAX_APRON_2023 else "Above"

    trade_works_for_team1, team1_max_incoming_salary, team1_rule, team1_percentage_limit, team1_debug = check_salary_matching_rules(
        outgoing_salary_team1, incoming_salary_team1, team1_salary_before_trade, SALARY_CAP_2023, FIRST_TAX_APRON_2023, debug
    )
    trade_works_for_team2, team2_max_incoming_salary, team2_rule, team2_percentage_limit, team2_debug = check_salary_matching_rules(
        outgoing_salary_team2, incoming_salary_team2, team2_salary_before_trade, SALARY_CAP_2023, FIRST_TAX_APRON_2023, debug
    )

    if debug:
        debug_info.append(team1_debug)
        debug_info.append(team2_debug)
        debug_info.append("\nDebug: Trade Analysis Results:")
        debug_info.append(f"Team 1 ({team1}):")
        debug_info.append(f"  Total Outgoing Salary: ${outgoing_salary_team1:,.2f}")
        debug_info.append(f"  Max Incoming Salary Allowed: ${team1_max_incoming_salary:,.2f} (Rule: {team1_rule})")
        debug_info.append(f"  Percentage Limit: {team1_percentage_limit:.2f}")
        debug_info.append(f"Team 2 ({team2}):")
        debug_info.append(f"  Total Outgoing Salary: ${outgoing_salary_team2:,.2f}")
        debug_info.append(f"  Max Incoming Salary Allowed: ${team2_max_incoming_salary:,.2f} (Rule: {team2_rule})")
        debug_info.append(f"  Percentage Limit: {team2_percentage_limit:.2f}")

    trade_status = True
    if not trade_works_for_team1:
        debug_info.append(f"Trade Works for Team 1: No")
        debug_info.append(f"  Trade fails for Team 1 because incoming salary exceeds max allowed under rule: {team1_rule}")
        debug_info.append(f"  Team 1 is {team1_tax_apron_status} the First Tax Apron.")
        trade_status = False
    else:
        debug_info.append(f"Trade Works for Team 1: Yes")

    if not trade_works_for_team2:
        debug_info.append(f"Trade Works for Team 2: No")
        debug_info.append(f"  Trade fails for Team 2 because incoming salary exceeds max allowed under rule: {team2_rule}")
        debug_info.append(f"  Team 2 is {team2_tax_apron_status} the First Tax Apron.")
        trade_status = False
    else:
        debug_info.append(f"Trade Works for Team 2: Yes")

    if trade_status:
        debug_info.append("The trade is valid according to salary matching rules.")
    else:
        debug_info.append("The trade does not satisfy salary matching rules.")

    return trade_status, "\n".join(debug_info)

if __name__ == "__main__":
    # Load the real predictions dataframe
    predictions_df = pd.read_csv('../data/processed/predictions_df.csv')

    # Specify two lists of players for the trade scenario
    players1 = ["Anthony Davis", "LeBron James"]
    players2 = ["Jayson Tatum", "Jaylen Brown"]

    # Analyze the trade scenario for the specified season with debugging enabled
    season = "2023-24"
    print(f"Analyzing trade for the {season} season:")
    results, debug_output = analyze_trade_scenario(players1, players2, predictions_df, season, debug=True)
    print("results =", debug_output)
    print("results =", results)




Overwriting ../src/trade_impact/nba_rules_trade_impact.py


In [12]:
%%writefile ../src/trade_impact/combined_trade_analysis.py

import pandas as pd
from trade_impact.percentile_count_trade_impact import get_champion_percentiles, generate_comparison_tables
from trade_impact.overall_team_trade_impact import trade_impact_analysis
from trade_impact.nba_rules_trade_impact import analyze_trade_scenario
from trade_impact.shot_chart.nba_efficiency import calculate_compatibility_between_players
from trade_impact.shot_chart.nba_shots import fetch_shots_for_multiple_players

def analyze_player_salaries(players, predictions_df):
    """Analyze if the selected players are overpaid or underpaid based on predicted salaries."""
    player_salary_analysis = []

    for player in players:
        player_data = predictions_df[predictions_df['Player'] == player]
        if not player_data.empty:
            actual_salary = player_data['Salary'].values[0]
            salary_cap = player_data['Salary_Cap_Inflated'].values[0]
            predicted_salary = player_data['Predicted_Salary'].values[0] * salary_cap
            difference = actual_salary - predicted_salary
            status = "Overpaid" if difference > 0 else "Underpaid" if difference < 0 else "Fairly Paid"
            player_salary_analysis.append({
                'Player': player,
                'Actual Salary': actual_salary,
                'Predicted Salary': predicted_salary,
                'Difference': difference,
                'Status': status
            })

    return pd.DataFrame(player_salary_analysis)

def combined_trade_analysis(team_a_name, team_b_name, selected_players_team_a, selected_players_team_b, 
                            trade_date, champion_seasons, trade_season, relevant_stats, predictions_df, debug=False):
    """
    Perform a comprehensive analysis of the trade impact on the involved teams.
    """
    # Step 1: Analyze the trade scenario using NBA salary matching rules
    trade_valid, trade_analysis_debug = analyze_trade_scenario(
        selected_players_team_a, selected_players_team_b, predictions_df=predictions_df, season=trade_season, debug=debug
    )

    # Step 2: Fetch champion percentiles and calculate averages
    average_top_percentiles_df = get_champion_percentiles(champion_seasons, debug)

    # Step 3: Generate comparison tables before and after the trade for the trade season
    team_a_comparison_table, team_b_comparison_table = generate_comparison_tables(
        trade_season, team_a_name, team_b_name, selected_players_team_a, selected_players_team_b, 
        average_top_percentiles_df, debug
    )

    # Step 4: Perform the trade impact analysis for the trade season
    traded_players = {player: team_b_name for player in selected_players_team_a}
    traded_players.update({player: team_a_name for player in selected_players_team_b})

    comparison_tables = trade_impact_analysis(
        start_season=trade_season, end_season=trade_season, trade_date=trade_date, 
        traded_players=traded_players, 
        team_a_name=team_a_name, team_b_name=team_b_name, 
        champion_seasons=champion_seasons, relevant_stats=relevant_stats, debug=debug
    )

    # Step 5: Analyze player salaries to determine if they are overpaid or underpaid
    all_players = selected_players_team_a + selected_players_team_b
    salary_analysis_df = analyze_player_salaries(all_players, predictions_df)

    # Step 6: Calculate compatibility between the players being traded based on their shooting areas
    player_shots = fetch_shots_for_multiple_players(all_players, season=trade_season, court_areas='all')
    compatibility_df = calculate_compatibility_between_players(player_shots)

    return {
        'average_champion_percentiles': average_top_percentiles_df,
        'team_a_comparison_table': team_a_comparison_table,
        'team_b_comparison_table': team_b_comparison_table,
        'comparison_tables': comparison_tables,
        'trade_analysis': trade_analysis_debug,  # Include the trade scenario analysis output
        'trade_valid': trade_valid,  # Include the trade validity status
        'salary_analysis': salary_analysis_df,  # Include player salary analysis output
        'compatibility_analysis': compatibility_df  # Include player compatibility analysis output
    }

def main():
    # Load the predictions data
    predictions_df = pd.read_csv('../data/processed/predictions_df.csv')

    # Define parameters for the test
    team_a_name = "Los Angeles Lakers"
    team_b_name = "Atlanta Hawks"
    selected_players_team_a = ["LeBron James", "Anthony Davis"]
    selected_players_team_b = ["Dejounte Murray"]
    trade_date = "2023-09-15"  # Example trade date
    champion_seasons = ["2020-21", "2021-22", "2022-23"]
    trade_season = "2023-24"
    relevant_stats = ["PTS", "AST", "REB", "STL", "BLK"]
    debug = True  # Set to True to see debug information

    # Call the combined_trade_analysis function
    results = combined_trade_analysis(
        team_a_name, team_b_name, selected_players_team_a, selected_players_team_b, 
        trade_date, champion_seasons, trade_season, relevant_stats, predictions_df, debug
    )

    # Print the trade scenario analysis result
    print("Trade Analysis Debug Info:\n", results['trade_analysis'])
    
    # Check if the trade is valid
    if results['trade_valid']:
        print("The trade satisfies NBA salary matching rules.")
    else:
        print("The trade does NOT satisfy NBA salary matching rules.")

    # Print the other results
    print("Average Champion Percentiles:\n", results['average_champion_percentiles'])
    print(f"{team_a_name} Comparison Table:\n", results['team_a_comparison_table'])
    print(f"{team_b_name} Comparison Table:\n", results['team_b_comparison_table'])
    for stat, table in results['comparison_tables'].items():
        print(f"Comparison Table for {stat}:\n", table)
    
    # Print the salary analysis results
    print("\nPlayer Salary Analysis:")
    print(results['salary_analysis'])

    # Print the compatibility analysis results
    print("\nPlayer Compatibility Analysis:")
    print(results['compatibility_analysis'])

if __name__ == "__main__":
    main()


Overwriting ../src/trade_impact/combined_trade_analysis.py


In [13]:
%%writefile ../src/trade_impact/utils/nba_api_utils.py
import os
import time
import random
from typing import Optional, Tuple

import pandas as pd

from nba_api.stats.static import teams as static_teams
from nba_api.stats.endpoints import (
    playergamelogs as _playergamelogs,
    commonteamroster as _commonteamroster,
    leaguegamefinder as _leaguegamefinder,
)

# ---- optional global HTTP response cache (safe no-op if lib missing) ----
try:
    from requests_cache import install_cache
    from datetime import timedelta
    _HTTP_CACHE_PATH = os.path.join(_CACHE_DIR, "http_cache_sqlite")
    install_cache(cache_name=_HTTP_CACHE_PATH, backend="sqlite", expire_after=timedelta(hours=12))
except Exception:
    pass

# ---- optional Streamlit cache decorators (work outside Streamlit too) ----
try:
    import streamlit as st
    def _st_cache_data(**kw):
        return st.cache_data(**kw)
except Exception:
    def _st_cache_data(**kw):
        # no-op decorator outside Streamlit
        def deco(fn): return fn
        return deco

# -------------------------
# Paths & simple disk cache
# -------------------------
_CACHE_DIR = os.path.join("..", "data", "processed", "cache_nba_api")
os.makedirs(_CACHE_DIR, exist_ok=True)

def _cache_path(key: str) -> str:
    safe = key.replace("/", "_").replace("\\", "_").replace(" ", "_")
    return os.path.join(_CACHE_DIR, f"{safe}.csv")

def _load_cache_df(key: str) -> Optional[pd.DataFrame]:
    path = _cache_path(key)
    if os.path.exists(path):
        try:
            return pd.read_csv(path)
        except Exception:
            return None
    return None

def _save_cache_df(key: str, df: pd.DataFrame) -> None:
    if df is None or df.empty:
        return
    df.to_csv(_cache_path(key), index=False)

# -------------------------
# Season normalization
# -------------------------
def normalize_season(season_or_year: str | int) -> str:
    """
    Accepts '2023-24' or 2023 or '2023' and returns '2023-24'.
    """
    if isinstance(season_or_year, int):
        y = season_or_year
        return f"{y}-{str(y + 1)[-2:]}"
    s = str(season_or_year)
    if "-" in s and len(s) >= 7:
        return s  # already 'YYYY-YY'
    if s.isdigit():
        y = int(s)
        return f"{y}-{str(y + 1)[-2:]}"
    raise ValueError(f"Unrecognized season format: {season_or_year}")

# -------------------------
# Retry helper
# -------------------------
def _with_retries(fn, *, retries=3, base_delay=1.5, jitter=0.75, debug=False):
    last_err = None
    for attempt in range(1, retries + 1):
        try:
            return fn()
        except Exception as e:
            last_err = e
            if debug:
                print(f"[nba_api_utils] attempt {attempt} failed: {e}")
            if attempt < retries:
                delay = base_delay * attempt + random.random() * jitter
                time.sleep(delay)
    raise last_err

# -------------------------
# Team lookup
# -------------------------
def get_team_id_by_full_name(team_full_name: str) -> Optional[int]:
    tlist = static_teams.get_teams()
    for t in tlist:
        if t.get("full_name") == team_full_name:
            return int(t["id"])
    return None

# -------------------------
# Light roster fetch (preferred for populating UI)
# -------------------------
@_st_cache_data(persist="disk", ttl=60*60*12, max_entries=128)
def get_commonteamroster_df(team_id: int, season: str | int,
                            *, timeout=60, retries=3, use_live=True, debug=False) -> pd.DataFrame:
    """
    Light roster fetch with CSV cache, Streamlit cache, retries, and cache fallback.
    """
    season_norm = normalize_season(season)
    cache_key = f"commonteamroster_{team_id}_{season_norm}"
    if not use_live:
        cached = _load_cache_df(cache_key)
        if cached is not None:
            return cached

    def _call():
        df = _commonteamroster.CommonTeamRoster(
            team_id=team_id,
            season=season_norm,
            timeout=timeout,
        ).get_data_frames()[0]
        return df

    try:
        df = _with_retries(_call, retries=retries, debug=debug)
        _save_cache_df(cache_key, df)
        return df
    except Exception as e:
        if debug:
            print(f"[nba_api_utils] roster live fetch failed for team_id={team_id} season={season_norm}: {e}")
        cached = _load_cache_df(cache_key)
        if cached is not None:
            if debug:
                print(f"[nba_api_utils] Using cached roster for {team_id} {season_norm}")
            return cached
        # final: return empty DF with expected columns so caller can degrade gracefully
        return pd.DataFrame(columns=["PLAYER", "TEAM_ID", "SEASON"])



# -------------------------
# Heavier logs fetch (only when truly needed)
# -------------------------
@_st_cache_data(persist="disk", ttl=60*60*12, max_entries=32)
def get_playergamelogs_df(season: str | int,
                          *, timeout=90, retries=3, use_live=True, debug=False) -> pd.DataFrame:
    """
    League-wide player game logs with CSV cache, Streamlit cache, and retries.
    """
    season_norm = normalize_season(season)
    cache_key = f"playergamelogs_league_{season_norm}"
    if not use_live:
        cached = _load_cache_df(cache_key)
        if cached is not None:
            return cached

    def _call():
        df = _playergamelogs.PlayerGameLogs(
            season_nullable=season_norm,
            timeout=timeout,
        ).get_data_frames()[0]
        df["SEASON"] = season_norm
        return df

    try:
        df = _with_retries(_call, retries=retries, debug=debug)
        _save_cache_df(cache_key, df)
        return df
    except Exception as e:
        if debug:
            print(f"[nba_api_utils] logs live fetch failed for season={season_norm}: {e}")
        cached = _load_cache_df(cache_key)
        if cached is not None:
            if debug:
                print(f"[nba_api_utils] Using cached logs for {season_norm}")
            return cached
        # final: empty DF with expected shape
        return pd.DataFrame(columns=["SEASON","TEAM_NAME","PLAYER_NAME","GAME_DATE","GAME_ID"])


# -------------------------
# Champion helper with cache
# -------------------------
def get_champion_team_name(season: str | int, *, timeout=90, retries=3, use_live=True, debug=False) -> Optional[str]:
    """
    Uses playoff games to identify the winner of the final game.
    """
    season_norm = normalize_season(season)
    cache_key = f"champion_team_{season_norm}"

    if not use_live:
        cached = _load_cache_df(cache_key)
        if isinstance(cached, pd.DataFrame) and not cached.empty and "TEAM_NAME" in cached:
            return cached["TEAM_NAME"].iloc[0]

    def _call():
        df = _leaguegamefinder.LeagueGameFinder(
            season_nullable=season_norm,
            season_type_nullable="Playoffs",
            timeout=timeout,
        ).get_data_frames()[0]
        df["GAME_DATE"] = pd.to_datetime(df["GAME_DATE"])
        last_two = df.sort_values("GAME_DATE").iloc[-2:]
        winner_row = last_two[last_two["WL"] == "W"].iloc[0]
        return pd.DataFrame([{"TEAM_NAME": winner_row["TEAM_NAME"]}])

    try:
        winner_df = _with_retries(_call, retries=retries, debug=debug)
        _save_cache_df(cache_key, winner_df)
        return winner_df["TEAM_NAME"].iloc[0]
    except Exception as e:
        cached = _load_cache_df(cache_key)
        if isinstance(cached, pd.DataFrame) and not cached.empty and "TEAM_NAME" in cached:
            if debug:
                print(f"[nba_api_utils] Using cached champion for {season_norm} due to error: {e}")
            return cached["TEAM_NAME"].iloc[0]
        return None


Overwriting ../src/trade_impact/utils/nba_api_utils.py


In [None]:
%%writefile ../src/streamlit_app_helpers.py
"""
Fast player list accessors for Streamlit app.
Replaces heavy gamelog calls with lightweight season index lookups.
"""

import pandas as pd
from salary_nba_data_pull.data_utils import read_season_player_index
from salary_nba_data_pull.fetch_utils import network_env_diagnostics, fetch_season_players


def get_players_for_season_fast(season: str,
                                *,
                                debug: bool = True) -> pd.DataFrame:
    """
    UI helper: tiny DataFrame [Player, PlayerID, Team, TeamID] for the season.

    Priority:
      1) local season index (instant)
      2) ONE roster call via nba_api (diagnostics only; Team=None by design)

    No filling; returns empty DataFrame if nothing can be fetched.
    """
    from salary_nba_data_pull.data_utils import read_season_player_index
    from salary_nba_data_pull.fetch_utils import network_env_diagnostics, fetch_season_players
    import pandas as pd

    idx = read_season_player_index(season, debug=debug)
    if not idx.empty:
        out = (idx[["Player","PlayerID","Team","TeamID"]]
               .drop_duplicates()
               .reset_index(drop=True))
        if debug:
            print(f"[players-fast] season={season} source=index rows={len(out)}")
            if "Team" in out.columns:
                print(f"[players-fast] Team sample: {out['Team'].dropna().astype(str).unique()[:12]}")
        return out

    diag = network_env_diagnostics(timeout_sec=5)
    if diag.get("nba_stats") not in (200, 301, 302):
        if debug:
            print(f"[players-fast] stats.nba.com not reachable (diag={diag}); returning empty result.")
        return pd.DataFrame(columns=["Player","PlayerID","Team","TeamID"])

    roster = fetch_season_players(season, debug=debug)
    rows = [{"Player": key.upper(),
             "PlayerID": meta.get("player_id"),
             "Team": None,
             "TeamID": meta.get("team_id")} for key, meta in roster.items()]
    out = pd.DataFrame(rows)
    if debug:
        print(f"[players-fast] season={season} source=roster rows={len(out)}; Team is None; TeamID populated variably.")
    return out


def get_season_list_fast(*, debug: bool = True) -> list[str]:
    """
    Get list of available seasons from the season index directory.
    Fast local lookup, no network calls.
    """
    from pathlib import Path
    from salary_nba_data_pull.settings import DATA_PROCESSED_DIR
    
    index_dir = Path(DATA_PROCESSED_DIR) / "season_index"
    if not index_dir.exists():
        if debug:
            print(f"[season-list] index directory not found at {index_dir}")
        return []
    
    seasons = []
    for parquet_file in index_dir.glob("season=*.parquet"):
        season = parquet_file.stem.replace("season=", "")
        seasons.append(season)
    
    seasons.sort(reverse=True)  # newest first
    if debug:
        print(f"[season-list] found {len(seasons)} seasons: {seasons[:5]}...")
    return seasons


def check_network_connectivity(*, debug: bool = True) -> dict:
    """
    Check if the app can reach external services.
    Returns diagnostic information for troubleshooting.
    """
    return network_env_diagnostics(timeout_sec=5)


In [None]:
%%writefile ../src/trade_impact_section_st_app.py

# Required imports
import streamlit as st
import pandas as pd
from nba_api.stats.static import teams
from datetime import date
from trade_impact.combined_trade_analysis import combined_trade_analysis
from streamlit_app_helpers import get_players_for_season_fast, check_network_connectivity

# --- REPLACE this helper; keep the name used by the app ---
def convert_season_format(year):
    """
    Converts an input year (e.g., 2023 or '2023') to the season format (e.g., '2023-24').
    If the input is already 'YYYY-YY', returns it unchanged.
    """
    from trade_impact.utils.nba_api_utils import normalize_season
    return normalize_season(year)


def get_players_for_team(team_name, season="2023-24", *, use_live: bool = True, debug: bool = False):
    """
    Fast + robust players list for a specific team in a season.
    Priority:
      1) Season index (local parquet): [Player, PlayerID, Team, TeamID]
      2) Authoritative fallback: CommonTeamRoster(team_id, season)
    No filling/masking. Heavily instrumented for diagnostics.
    """
    import pandas as pd
    from nba_api.stats.static import teams as _static_teams
    from trade_impact.utils.nba_api_utils import normalize_season
    from streamlit_app_helpers import get_players_for_season_fast
    from salary_nba_data_pull.fetch_utils import fetch_team_roster

    season_norm = normalize_season(season)

    # Resolve team metadata
    all_teams = _static_teams.get_teams()
    by_full = {t["full_name"].casefold(): t for t in all_teams}
    by_abbr = {t["abbreviation"].casefold(): t for t in all_teams}
    meta = by_full.get(team_name.casefold()) or by_abbr.get(team_name.casefold())
    if meta is None:
        if debug:
            print(f"[get_players_for_team] cannot resolve team metadata for '{team_name}'")
        return []
    team_id = int(meta["id"]); abbr = meta["abbreviation"]; full = meta["full_name"]

    # Try the season index first (fast path)
    idx = get_players_for_season_fast(season_norm, debug=debug).copy()
    if not idx.empty:
        if debug:
            print(f"[get_players_for_team] source=index  rows={len(idx)}  "
                  f"cols={list(idx.columns)}  season={season_norm} team={full}({team_id})")
        players = []
        try:
            mask = pd.Series(False, index=idx.index)
            if "TeamID" in idx.columns and idx["TeamID"].notna().any():
                mask |= (idx["TeamID"].astype("Int64") == team_id)
            if (not mask.any()) and ("Team" in idx.columns):
                tser = idx["Team"].astype(str)
                mask |= tser.str.casefold().eq(full.casefold()) | tser.str.upper().eq(abbr.upper())
            players = (idx.loc[mask, ["Player", "PlayerID"]]
                          .dropna(subset=["Player"])
                          .drop_duplicates()
                          .sort_values("Player")["Player"].tolist())
            if debug:
                via_id = int((idx.get("TeamID", pd.Series(dtype="Int64")).astype("Int64") == team_id).sum()) if "TeamID" in idx.columns else -1
                via_lbl = 0
                if "Team" in idx.columns:
                    tser = idx["Team"].astype(str)
                    via_lbl = int((tser.str.casefold().eq(full.casefold()) | tser.str.upper().eq(abbr.upper())).sum())
                print(f"[get_players_for_team] index-match via TeamID={via_id}, via label≈{via_lbl}, final={len(players)}")
        except Exception as e:
            if debug:
                print(f"[get_players_for_team][index-path][ERROR] {e}")
            players = []
        # Check if index returned a reasonable team size
        MIN_TEAM_SIZE = 8  # Reasonable minimum for a full roster
        if len(players) >= MIN_TEAM_SIZE:
            if debug:
                print(f"[get_players_for_team] using index result ({len(players)} players)")
            return players
        elif len(players) > 0:
            if debug:
                print(f"[get_players_for_team] index returned only {len(players)} players, trying roster fallback")

    # Authoritative fallback: CommonTeamRoster
    if debug:
        print(f"[get_players_for_team] source=CommonTeamRoster fallback  season={season_norm} team={full}({team_id})")
    roster = fetch_team_roster(team_id=team_id, season=season_norm, debug=debug)
    if roster.empty:
        if debug:
            print(f"[get_players_for_team] roster fallback returned 0 rows")
        # Return whatever the index had as last resort
        return players if 'players' in locals() else []
    
    name_col = "PLAYER" if "PLAYER" in roster.columns else None
    if name_col is None:
        if debug:
            print(f"[get_players_for_team] roster columns unexpected: {list(roster.columns)}")
        # Return whatever the index had as last resort
        return players if 'players' in locals() else []
    
    roster_players = roster[[name_col]].dropna().drop_duplicates().sort_values(name_col)[name_col].tolist()
    if debug:
        print(f"[get_players_for_team] roster fallback returned {len(roster_players)} players")
    
    # Prefer roster result if it's substantial, otherwise fall back to index
    if len(roster_players) >= MIN_TEAM_SIZE:
        return roster_players
    else:
        if debug:
            print(f"[get_players_for_team] roster fallback also sparse ({len(roster_players)}), using index result")
        return players if 'players' in locals() else roster_players


def get_trade_season(trade_date):
    year = trade_date.year
    if trade_date.month in [10, 11, 12]:
        return f"{year}-{str(year + 1)[-2:]}"
    else:
        return f"{year - 1}-{str(year)[-2:]}"


def get_last_n_seasons(current_season, n=10):
    current_year = int(current_season.split('-')[0])
    seasons = [f"{year}-{str(year + 1)[-2:]}" for year in range(current_year - n + 1, current_year + 1)]
    return seasons  # Return in ascending order


def display_trade_impact_results(results, team_a_name, team_b_name):
    st.write("### Trade Scenario Analysis:")
    st.text(results['trade_analysis'])

    st.write("### Average Champion Percentiles:")
    st.dataframe(results['average_champion_percentiles'])

    st.write(f"### {team_a_name} Comparison Table:")
    st.dataframe(results['team_a_comparison_table'])

    st.write(f"### {team_b_name} Comparison Table:")
    st.dataframe(results['team_b_comparison_table'])

    for stat, table in results['comparison_tables'].items():
        st.write(f"### Comparison Table for {stat}:")
        st.dataframe(table)

    st.write("### Overpaid/Underpaid Player Analysis:")
    st.dataframe(results['salary_analysis'])

    st.write("### Player Compatibility Analysis:")
    st.dataframe(results['compatibility_analysis'])


def get_unique_game_dates(season, *, use_live: bool = True, debug: bool = False):
    """
    Return sorted unique game dates (date objects) for the given season.
    Uses league-wide PlayerGameLogs via utils to leverage retries/cache.
    """
    import pandas as pd
    from trade_impact.utils.nba_api_utils import get_playergamelogs_df, normalize_season

    season_norm = normalize_season(season)
    logs = get_playergamelogs_df(season_norm, use_live=use_live, debug=debug)
    
    if "GAME_DATE" not in logs.columns or logs.empty:
        if debug:
            print(f"[get_unique_game_dates] No game dates found for {season_norm}")
        return []
    
    # Convert to date objects and get unique sorted dates
    dates = pd.to_datetime(logs["GAME_DATE"]).dt.date.unique()
    return sorted(dates)



def trade_impact_simulator_app(selected_season="2023"):
    from trade_impact.utils.nba_api_utils import normalize_season
    formatted_season = normalize_season(selected_season)

    st.title(f"NBA Trade Impact Analysis - {formatted_season}")

    st.sidebar.subheader("Data Source")
    use_live_api = st.sidebar.checkbox("Use live NBA API", value=True,
        help="Uncheck to use cached data only. If live calls fail, cached data will be used when available.")
    
    # Debug option for troubleshooting
    debug_mode = st.sidebar.checkbox("Enable Debug Mode", value=False,
        help="Show detailed debug information in the console/logs for troubleshooting team player loading issues.")

    st.markdown("""
    ## About This App

    This application allows you to analyze the impact of a trade between two NBA teams. It includes the following components:

    ### 1. Trade Scenario Analysis:
    - Ensure the trade satisfies NBA salary matching rules based on the provided player salaries.

    ### 2. Percentile Counts:
    - The count of top 1, 2, 3, 4, 5, 10, 25, 50 percentiles of the team's performance before and after the trade, compared to the last 'n' seasons selected in the champion season filter.

    ### 3. Overall Trade Impact:
    - **Pre-Trade Scenario**:
        * **Data Collection:** Filter season data to include only games before the trade date.
        * **Statistical Calculations:** Calculate total points and games played before the trade.
        * **Averaging:** Calculate average points per game before the trade.
        * **Percentile Ranking:** Rank teams based on pre-trade performance.

    - **Post-Trade Scenario**:
        * **Data Collection:** Filter season data for games on or after the trade date.
        * **Player Averages:** Calculate average points for traded players post-trade.
        * **Simulating Game Logs:** Simulate additional game logs using calculated player averages.
        * **Statistical Calculations:** Combine simulated and actual post-trade data for calculations.
        * **Averaging:** Calculate average points per game post-trade.
        * **Percentile Ranking:** Rank teams based on post-trade performance.

    - **No-Trade Scenario**:
        * **Data Collection:** Use full season data assuming no trades occurred.
        * **Statistical Calculations:** Calculate total points and games played for the entire season.
        * **Averaging:** Calculate average points per game for the full season.
        * **Percentile Ranking:** Rank teams based on full-season performance.

    - **Final Comparison**:
        * **Aggregation:** Organize pre-trade, post-trade, and no-trade results.
        * **Metrics Compared:** Total points, games played, average points per game, and percentile rankings.

    ### 4. Overpaid/Underpaid Player Analysis:
    - Analyze whether the players involved in the trade are overpaid or underpaid based on predicted salaries.

    ### 5. Player Compatibility Analysis:
    - Calculate the compatibility between the players being traded based on their shooting areas.
    """)

    # Load predictions (unchanged)
    predictions_df = pd.read_csv('data/processed/predictions_df.csv')

    # Team and player selectors
    all_teams = [team['full_name'] for team in teams.get_teams()]
    team_a_name = st.selectbox("Select Team A", all_teams, key="team_a")
    team_b_name = st.selectbox("Select Team B", [t for t in all_teams if t != team_a_name], key="team_b")

    # Safely populate player lists using cached, lightweight calls
    try:
        players_a_options = get_players_for_team(team_a_name, formatted_season, use_live=use_live_api, debug=debug_mode)
    except Exception as e:
        players_a_options = []
        st.warning(f"Could not load roster for {team_a_name}: {e}")

    try:
        players_b_options = get_players_for_team(team_b_name, formatted_season, use_live=use_live_api, debug=debug_mode)
    except Exception as e:
        players_b_options = []
        st.warning(f"Could not load roster for {team_b_name}: {e}")

    players_from_team_a = st.multiselect(f"Select Players from {team_a_name}", players_a_options)
    players_from_team_b = st.multiselect(f"Select Players from {team_b_name}", players_b_options)

    # Champion seasons
    def get_last_n_seasons(current_season, n=10):
        y = int(str(current_season)[:4])
        return [f"{yr}-{str(yr+1)[-2:]}" for yr in range(y - n + 1, y + 1)]

    last_10_seasons = get_last_n_seasons(formatted_season)
    champion_seasons = st.multiselect("Select Champion Seasons for Comparison", last_10_seasons, default=last_10_seasons)
    champion_seasons = sorted(champion_seasons)

    analysis_option = st.radio("Select Analysis Period", options=["Full Season", "Specific Date"])

    if analysis_option == "Specific Date":
        try:
            unique_dates = get_unique_game_dates(formatted_season, use_live=use_live_api, debug=True)
            trade_date = st.selectbox("Select Trade Date", unique_dates)
        except Exception as e:
            st.error(f"Could not load game dates for {formatted_season}: {e}")
            return
    else:
        y = int(str(formatted_season)[:4])
        from datetime import date
        trade_date = date(y, 8, 15)  # offseason default

    st.write(f"### Analysis Criteria:\n- **Team A:** {team_a_name}\n- **Team B:** {team_b_name}\n- **Season:** {formatted_season}\n- **Champion Seasons:** {', '.join(champion_seasons)}")

    include_debug_columns = st.checkbox("Include Debug Columns (Games and Totals)", value=False)

    if st.button("Analyze Trade Impact"):
        if not players_from_team_a or not players_from_team_b:
            st.error("Please select at least one player from each team.")
            return

        with st.spinner('Analyzing trade impact...'):
            try:
                results = combined_trade_analysis(
                    team_a_name, team_b_name, players_from_team_a, players_from_team_b,
                    trade_date, champion_seasons, formatted_season,
                    ['PTS', 'AST', 'TOV', 'STL', 'BLK', 'OREB', 'DREB', 'FGM', 'FG3M', 'FGA'],
                    predictions_df, debug=include_debug_columns
                )
                display_trade_impact_results(results, team_a_name, team_b_name)
            except Exception as e:
                st.error(f"An error occurred: {e}")


if __name__ == "__main__":
    trade_impact_simulator_app()


Overwriting ../src/trade_impact_section_st_app.py
