In [None]:
import pandas as pd
import numpy as np

def process_champions_data(champions_data, start_season, end_season):
    # Assuming champions_data is already loaded with the correct structure
    
    # Calculate percentiles for each stat within each season
    percentile_cols = ['PTS', 'AST', 'TOV', 'STL', 'BLK', 'OREB', 'DREB', 'FGM', 'FG3M', 'FGA', 'eFG%']
    for col in percentile_cols:
        champions_data[f'{col}_percentile'] = champions_data.groupby('SEASON')[col].rank(pct=True)

    # Define percentile thresholds
    thresholds = [0.99, 0.98, 0.97, 0.96, 0.95, 0.90, 0.75, 0.50]
    
    # Create a new dataframe to store percentile counts
    percentile_counts = pd.DataFrame()

    for col in percentile_cols:
        for i, threshold in enumerate(thresholds):
            col_name = f'{col}_top_{int(100 - threshold * 100)}%'
            percentile_counts[col_name] = (champions_data[f'{col}_percentile'] >= threshold).astype(int)

    # Sum the counts for all seasons
    total_counts = percentile_counts.sum()

    # Calculate the number of seasons
    num_seasons = champions_data['SEASON'].nunique()

    # Calculate average counts per season
    avg_counts = total_counts / num_seasons

    return champions_data, percentile_counts, avg_counts

# Usage:
# processed_champions_data, champions_percentile_counts, champions_avg_counts = process_champions_data(champions_data, start_season, end_season)

def process_current_season_data(current_season_data):
    # Calculate percentiles for each stat
    percentile_cols = ['PTS', 'AST', 'TOV', 'STL', 'BLK', 'OREB', 'DREB', 'FGM', 'FG3M', 'FGA', 'eFG%']
    for col in percentile_cols:
        current_season_data[f'{col}_percentile'] = current_season_data[col].rank(pct=True)

    # Define percentile thresholds
    thresholds = [0.99, 0.98, 0.97, 0.96, 0.95, 0.90, 0.75, 0.50]
    
    # Create a new dataframe to store percentile counts
    percentile_counts = pd.DataFrame()

    for col in percentile_cols:
        for i, threshold in enumerate(thresholds):
            col_name = f'{col}_top_{int(100 - threshold * 100)}%'
            percentile_counts[col_name] = (current_season_data[f'{col}_percentile'] >= threshold).astype(int)

    return current_season_data, percentile_counts

# Usage:
# processed_current_season_data, current_season_percentile_counts = process_current_season_data(current_season_data)


def analyze_team_trade(before_trade_data, after_trade_data, league_avg, champion_avg):
    teams = before_trade_data['TEAM_NAME'].unique()
    
    results = []
    for team in teams:
        before = before_trade_data[before_trade_data['TEAM_NAME'] == team].mean()
        after = after_trade_data[after_trade_data['TEAM_NAME'] == team].mean()
        
        diff = after - before
        diff['TEAM_NAME'] = team
        diff['TYPE'] = 'Difference'
        
        before['TEAM_NAME'] = team
        before['TYPE'] = 'Before Trade'
        
        after['TEAM_NAME'] = team
        after['TYPE'] = 'After Trade'
        
        results.extend([before, after, diff])
    
    results_df = pd.DataFrame(results)
    
    # Add league average and champion average
    league_avg['TEAM_NAME'] = 'League Average'
    league_avg['TYPE'] = 'League Average'
    champion_avg['TEAM_NAME'] = 'Champion Average'
    champion_avg['TYPE'] = 'Champion Average'
    
    results_df = pd.concat([results_df, league_avg, champion_avg], ignore_index=True)
    
    return results_df

# Usage:
# trade_analysis_results = analyze_team_trade(before_trade_data, after_trade_data, league_avg, champion_avg)

def compare_percentile_counts(before_trade_counts, after_trade_counts, champ_avg_counts):
    teams = before_trade_counts['TEAM_NAME'].unique()
    
    results = []
    for team in teams:
        before = before_trade_counts[before_trade_counts['TEAM_NAME'] == team].iloc[0]
        after = after_trade_counts[after_trade_counts['TEAM_NAME'] == team].iloc[0]
        
        diff = after - before
        diff['TEAM_NAME'] = team
        diff['TYPE'] = 'Difference'
        
        before['TEAM_NAME'] = team
        before['TYPE'] = 'Before Trade'
        
        after['TEAM_NAME'] = team
        after['TYPE'] = 'After Trade'
        
        results.extend([before, after, diff])
    
    results_df = pd.DataFrame(results)
    
    # Add champion average counts
    champ_avg_counts['TEAM_NAME'] = 'Champion Average'
    champ_avg_counts['TYPE'] = 'Champion Average'
    
    results_df = pd.concat([results_df, champ_avg_counts], ignore_index=True)
    
    return results_df

# Usage:
# percentile_comparison_results = compare_percentile_counts(before_trade_counts, after_trade_counts, champ_avg_counts)


def analyze_historical_trade(trade_date, players_traded, team_from, team_to, seasons_data):
    # Determine the season based on the trade date
    trade_season = get_season_from_date(trade_date)
    
    # Filter data for the relevant season
    season_data = seasons_data[seasons_data['SEASON'] == trade_season]
    
    # Get data for the teams involved in the trade
    team_from_data = season_data[season_data['TEAM_NAME'] == team_from]
    team_to_data = season_data[season_data['TEAM_NAME'] == team_to]
    
    # Simulate the trade
    before_trade, after_trade = simulate_trade_with_players(team_from_data, team_to_data, players_traded)
    
    # Process the data
    processed_before_trade, before_trade_counts = process_current_season_data(before_trade)
    processed_after_trade, after_trade_counts = process_current_season_data(after_trade)
    
    # Get league average for the season
    league_avg = season_data.groupby('TEAM_NAME').mean().mean()
    
    # Get champion average (assuming we have this data)
    champion_avg = get_champion_average(trade_season)
    
    # Analyze the trade
    trade_analysis = analyze_team_trade(processed_before_trade, processed_after_trade, league_avg, champion_avg)
    
    # Compare percentile counts
    percentile_comparison = compare_percentile_counts(before_trade_counts, after_trade_counts, get_champion_percentile_counts(trade_season))
    
    return trade_analysis, percentile_comparison

# Usage:
# trade_analysis, percentile_comparison = analyze_historical_trade('2022-02-10', ['Player1', 'Player2'], 'Team A', 'Team B', all_seasons_data)





In [3]:
import pandas as pd
import numpy as np
from nba_api.stats.endpoints import leaguegamefinder, playergamelogs
import time

RELEVANT_STATS = ['PTS', 'AST', 'TOV', 'STL', 'BLK', 'OREB', 'DREB', 'FGM', 'FG3M', 'FGA']
PERCENTILE_THRESHOLDS = [99, 98, 97, 96, 95, 90, 75, 50]

def get_champion(season):
    games = leaguegamefinder.LeagueGameFinder(season_nullable=season, season_type_nullable='Playoffs').get_data_frames()[0]
    games['GAME_DATE'] = pd.to_datetime(games['GAME_DATE'])
    last_game = games.sort_values('GAME_DATE').iloc[-2:]
    winner = last_game[last_game['WL'] == 'W'].iloc[0]
    return winner['TEAM_ID'], winner['TEAM_NAME']

def get_champions(start_year, end_year):
    champions = {}
    for year in range(start_year, end_year + 1):
        season = f"{year}-{str(year+1)[-2:]}"
        champ_id, champ_name = get_champion(season)
        champions[season] = {'ChampionTeamID': champ_id, 'ChampionTeamName': champ_name}
        time.sleep(1)  # To avoid overwhelming the API
    return champions

def get_season_from_date(date):
    year = int(date[:4])
    month = int(date[5:7])
    if month >= 10:
        return f"{year}-{str(year+1)[2:]}"
    else:
        return f"{year-1}-{str(year)[2:]}"

def analyze_leaguegamefinder_endpoint(start_season, end_season):
    all_seasons_data = []
    
    for season in range(int(start_season[:4]), int(end_season[:4]) + 1):
        season_str = f"{season}-{str(season+1)[2:]}"
        print(f"Fetching data for season {season_str}")
        
        games = leaguegamefinder.LeagueGameFinder(
            season_nullable=season_str,
            season_type_nullable='Regular Season'
        ).get_data_frames()[0]
        
        games['SEASON'] = games['GAME_DATE'].apply(get_season_from_date)
        all_seasons_data.append(games)
        
        time.sleep(1)  # To avoid overwhelming the API
    
    return pd.concat(all_seasons_data, ignore_index=True)

def calculate_per_game_stats(games_df):
    per_game_stats = games_df.groupby(['SEASON', 'TEAM_ID', 'TEAM_NAME'])[RELEVANT_STATS].mean().reset_index()
    
    # Calculate eFG%
    per_game_stats['eFG%'] = (per_game_stats['FGM'] + 0.5 * per_game_stats['FG3M']) / per_game_stats['FGA']
    
    return per_game_stats

def calculate_percentiles(stats_df):
    percentile_cols = RELEVANT_STATS + ['eFG%']
    
    for col in percentile_cols:
        stats_df[f'{col}_percentile'] = stats_df.groupby('SEASON')[col].rank(pct=True)
    
    return stats_df

def get_current_season_stats(all_seasons_data, current_season):
    current_season_data = all_seasons_data[all_seasons_data['SEASON'] == current_season]
    per_game_stats = calculate_per_game_stats(current_season_data)
    percentile_stats = calculate_percentiles(per_game_stats)
    
    # Calculate league average
    league_avg = per_game_stats[RELEVANT_STATS + ['eFG%']].mean()
    league_avg['TEAM_NAME'] = 'League Average'
    league_avg['SEASON'] = current_season
    league_avg['TEAM_ID'] = 'AVG'
    league_avg = pd.DataFrame(league_avg).transpose()
    
    # Combine team stats with league average
    combined_stats = pd.concat([percentile_stats, league_avg], ignore_index=True)
    return combined_stats

def get_champions_stats(all_seasons_data, start_season, end_season):
    champions = {}
    for year in range(int(start_season[:4]), int(end_season[:4]) + 1):
        season = f"{year}-{str(year+1)[-2:]}"
        champ_id, champ_name = get_champion(season)
        champions[season] = {'ChampionTeamID': champ_id, 'ChampionTeamName': champ_name}
    
    champions_data = all_seasons_data[all_seasons_data.apply(lambda row: row['TEAM_ID'] == champions.get(row['SEASON'], {}).get('ChampionTeamID'), axis=1)]
    champions_stats = calculate_per_game_stats(champions_data)
    return calculate_percentiles(champions_stats)

def compare_stats(current_stats, champions_stats, league_avg):
    # Compare current stats to champions average and league average
    champs_avg = champions_stats[RELEVANT_STATS + ['eFG%']].mean()
    
    comparison = current_stats.copy()
    for stat in RELEVANT_STATS + ['eFG%']:
        comparison[f'{stat}_vs_champs'] = comparison[stat] - champs_avg[stat]
        comparison[f'{stat}_vs_league'] = comparison[stat] - league_avg[stat]
    
    return comparison

def get_team_data(all_seasons_data, team_names, current_season):
    team_data = all_seasons_data[(all_seasons_data['SEASON'] == current_season) & (all_seasons_data['TEAM_NAME'].isin(team_names))]
    return calculate_per_game_stats(team_data)

def simulate_trade(all_seasons_data, team_from, team_to, trade_impact, current_season):
    before_trade = get_team_data(all_seasons_data, [team_from, team_to], current_season)
    
    # Simulate the trade by adjusting team stats
    after_trade = before_trade.copy()
    numeric_columns = before_trade.select_dtypes(include=[np.number]).columns
    
    for stat in numeric_columns:
        if stat in trade_impact:
            after_trade.loc[after_trade['TEAM_NAME'] == team_from, stat] -= trade_impact[stat]
            after_trade.loc[after_trade['TEAM_NAME'] == team_to, stat] += trade_impact[stat]
    
    # Recalculate eFG% for both teams
    for team in [team_from, team_to]:
        team_data = after_trade[after_trade['TEAM_NAME'] == team]
        after_trade.loc[after_trade['TEAM_NAME'] == team, 'eFG%'] = (
            (team_data['FGM'] + 0.5 * team_data['FG3M']) / team_data['FGA']
        ).values[0]
    
    return before_trade, after_trade

def get_player_game_logs(team_id, season):
    player_logs = playergamelogs.PlayerGameLogs(team_id_nullable=team_id, season_nullable=season).get_data_frames()[0]
    return player_logs

def process_player_data(player_logs):
    player_stats = player_logs.groupby(['PLAYER_ID', 'PLAYER_NAME', 'TEAM_ID', 'TEAM_NAME'])[RELEVANT_STATS].mean().reset_index()
    player_stats['eFG%'] = (player_stats['FGM'] + 0.5 * player_stats['FG3M']) / player_stats['FGA']
    return player_stats

def get_champions_player_data(champions, start_season, end_season):
    all_champion_players = []
    for season in range(int(start_season[:4]), int(end_season[:4]) + 1):
        season_str = f"{season}-{str(season+1)[2:]}"
        champ_id = champions[season_str]['ChampionTeamID']
        player_logs = get_player_game_logs(champ_id, season_str)
        player_stats = process_player_data(player_logs)
        player_stats['SEASON'] = season_str
        all_champion_players.append(player_stats)
        time.sleep(1)  # To avoid overwhelming the API
    return pd.concat(all_champion_players, ignore_index=True)

def get_current_season_player_data(all_seasons_data, current_season):
    current_teams = all_seasons_data[all_seasons_data['SEASON'] == current_season]['TEAM_ID'].unique()
    all_current_players = []
    for team_id in current_teams:
        player_logs = get_player_game_logs(team_id, current_season)
        player_stats = process_player_data(player_logs)
        player_stats['SEASON'] = current_season
        all_current_players.append(player_stats)
        time.sleep(1)  # To avoid overwhelming the API
    return pd.concat(all_current_players, ignore_index=True)

def simulate_trade_with_players(team_from_data, team_to_data, traded_players):
    before_trade = pd.concat([team_from_data, team_to_data])
    
    # Move traded players between teams
    traded_from = team_from_data[team_from_data['PLAYER_NAME'].isin(traded_players)].copy()
    traded_to = team_to_data[team_to_data['PLAYER_NAME'].isin(traded_players)].copy()
    
    team_from_after = team_from_data[~team_from_data['PLAYER_NAME'].isin(traded_players)]
    team_to_after = pd.concat([team_to_data[~team_to_data['PLAYER_NAME'].isin(traded_players)], traded_from])
    
    after_trade = pd.concat([team_from_after, team_to_after])
    
    return before_trade, after_trade

def analyze_trade_impact(before_trade, after_trade):
    team_totals_before = before_trade.groupby('TEAM_NAME')[RELEVANT_STATS + ['eFG%']].sum().reset_index()
    team_totals_after = after_trade.groupby('TEAM_NAME')[RELEVANT_STATS + ['eFG%']].sum().reset_index()
    
    trade_impact = team_totals_after.set_index('TEAM_NAME').subtract(team_totals_before.set_index('TEAM_NAME')).reset_index()
    return trade_impact

def main():
    start_season = "2022-23"
    end_season = "2023-24"
    current_season = end_season
    all_seasons_data = analyze_leaguegamefinder_endpoint(start_season, end_season)
    
    # 1. Get champions for the past 10 seasons
    champions = get_champions(int(start_season[:4]), int(end_season[:4]))
    
    # 2. Get player-level data for champions
    champions_player_data = get_champions_player_data(champions, start_season, end_season)
    print("Champions Player Data (Past 10 Seasons):")
    print(champions_player_data)
    
    # 3. Get current season player-level data
    current_season_player_data = get_current_season_player_data(all_seasons_data, current_season)
    print("\nCurrent Season Player Data:")
    print(current_season_player_data)
    
    # 4. Load predictions dataframe
    predictions_df = pd.read_csv('../data/processed/predictions_df.csv')
    print("\nPredictions DataFrame (first few rows):")
    print(predictions_df.head())
    
    # 5. Simulate trade
    team_from = "Los Angeles Lakers"
    team_to = "Boston Celtics"
    traded_players = ["Anthony Davis", "Jayson Tatum"]  # Example players
    
    team_from_data = current_season_player_data[current_season_player_data['TEAM_NAME'] == team_from]
    team_to_data = current_season_player_data[current_season_player_data['TEAM_NAME'] == team_to]
    
    print("\nTeam Data Before Trade:")
    print(pd.concat([team_from_data, team_to_data]))
    
    before_trade, after_trade = simulate_trade_with_players(team_from_data, team_to_data, traded_players)
    
    print("\nTeam Data After Trade:")
    print(after_trade)
    
    # 6. Analyze trade impact
    trade_impact = analyze_trade_impact(before_trade, after_trade)
    print("\nTrade Impact (Difference in Team Stats):")
    print(trade_impact)
    
    # 7. Compare traded players to champions
    traded_player_stats = before_trade[before_trade['PLAYER_NAME'].isin(traded_players)]
    champion_avg = champions_player_data.groupby('SEASON')[RELEVANT_STATS + ['eFG%']].mean().mean()
    
    print("\nTraded Players vs. Champions Average:")
    for _, player in traded_player_stats.iterrows():
        print(f"\n{player['PLAYER_NAME']}:")
        for stat in RELEVANT_STATS + ['eFG%']:
            diff = player[stat] - champion_avg[stat]
            print(f"{stat}: {player[stat]:.2f} (Diff from Champs Avg: {diff:.2f})")
    
    # 8. Analyze salary based on predictions
    traded_players_salary = predictions_df[predictions_df['Player'].isin(traded_players)]
    print("\nSalary Analysis for Traded Players:")
    print(traded_players_salary[['Player', 'Salary', 'Predicted_Salary']])

if __name__ == "__main__":
    main()

Fetching data for season 2022-23
Fetching data for season 2023-24
Champions Player Data (Past 10 Seasons):
    PLAYER_ID               PLAYER_NAME     TEAM_ID       TEAM_NAME  \
0      201145                Jeff Green  1610612743  Denver Nuggets   
1      201599            DeAndre Jordan  1610612743  Denver Nuggets   
2      202397                 Ish Smith  1610612743  Denver Nuggets   
3      202704            Reggie Jackson  1610612743  Denver Nuggets   
4      203484  Kentavious Caldwell-Pope  1610612743  Denver Nuggets   
5      203932              Aaron Gordon  1610612743  Denver Nuggets   
6      203999              Nikola Jokic  1610612743  Denver Nuggets   
7     1627750              Jamal Murray  1610612743  Denver Nuggets   
8     1628418             Thomas Bryant  1610612743  Denver Nuggets   
9     1628427             Vlatko Cancar  1610612743  Denver Nuggets   
10    1628432                Davon Reed  1610612743  Denver Nuggets   
11    1628971               Bruce Brown  

  return pd.concat(all_current_players, ignore_index=True)


In [11]:
import pandas as pd
import numpy as np
from nba_api.stats.endpoints import leaguegamefinder, playergamelogs
import time

# Constants
RELEVANT_STATS = ['PTS', 'AST', 'TOV', 'STL', 'BLK', 'OREB', 'DREB', 'FGM', 'FG3M', 'FGA']
#RELEVANT_STATS = ['PTS']
PERCENTILE_THRESHOLDS = [99, 98, 97, 96, 95, 90, 75, 50]

# Helper Functions
def get_champion(season):
    games = leaguegamefinder.LeagueGameFinder(season_nullable=season, season_type_nullable='Playoffs').get_data_frames()[0]
    games['GAME_DATE'] = pd.to_datetime(games['GAME_DATE'])
    last_game = games.sort_values('GAME_DATE').iloc[-2:]
    winner = last_game[last_game['WL'] == 'W'].iloc[0]
    return winner['TEAM_ID'], winner['TEAM_NAME']

def get_champions(start_year, end_year):
    champions = {}
    for year in range(start_year, end_year + 1):
        season = f"{year}-{str(year+1)[-2:]}"
        champ_id, champ_name = get_champion(season)
        champions[season] = {'ChampionTeamID': champ_id, 'ChampionTeamName': champ_name}
        time.sleep(1)  # To avoid overwhelming the API
    return champions

def get_season_from_date(date):
    year = int(date[:4])
    month = int(date[5:7])
    if month >= 10:
        return f"{year}-{str(year+1)[2:]}"
    else:
        return f"{year-1}-{str(year)[2:]}"

def analyze_leaguegamefinder_endpoint(start_season, end_season):
    all_seasons_data = []
    for season in range(int(start_season[:4]), int(end_season[:4]) + 1):
        season_str = f"{season}-{str(season+1)[2:]}"
        print(f"Fetching data for season {season_str}")
        games = leaguegamefinder.LeagueGameFinder(
            season_nullable=season_str,
            season_type_nullable='Regular Season'
        ).get_data_frames()[0]
        games['SEASON'] = games['GAME_DATE'].apply(get_season_from_date)
        all_seasons_data.append(games)
        time.sleep(1)  # To avoid overwhelming the API
    return pd.concat(all_seasons_data, ignore_index=True)

def calculate_percentiles(stats_df):
    print("Validating input data for calculate_percentiles:")
    print(f"head: {stats_df.head()}")
    print(f"Columns: {stats_df.columns}")
    
    for col in RELEVANT_STATS:
        stats_df[f'{col}_percentile'] = stats_df.groupby('SEASON')[col].rank(pct=True)
    
    print("Validation after percentile calculation:")
    print(f"head(): {stats_df.head()}")
    print(f"Columns: {stats_df.columns}")
    
    return stats_df

def process_champions_data(champions_data, all_seasons_data, start_season, end_season):
    print("Validating input data for process_champions_data:")
    print(f"Champions data head:\n{champions_data.head()}")
    print(f"All seasons data head:\n{all_seasons_data.head()}")
    
    all_seasons_percentiles = calculate_percentiles(calculate_per_game_stats(all_seasons_data))
    
    champions_with_percentiles = pd.merge(champions_data, 
                                          all_seasons_percentiles[['SEASON', 'TEAM_ID'] + [f'{col}_percentile' for col in RELEVANT_STATS]], 
                                          on=['SEASON', 'TEAM_ID'])
    
    print("Validation after merging champions data:")
    print(f"Shape: {champions_with_percentiles.shape}")
    print(f"Columns: {champions_with_percentiles.columns}")
    
    percentile_counts = pd.DataFrame()
    for season in champions_with_percentiles['SEASON'].unique():
        season_data = champions_with_percentiles[champions_with_percentiles['SEASON'] == season]
        season_counts = pd.DataFrame({'SEASON': season}, index=[0])
        
        for col in RELEVANT_STATS:
            for threshold in PERCENTILE_THRESHOLDS:
                col_name = f'{col}_top_{threshold}%'
                season_counts[col_name] = (season_data[f'{col}_percentile'] >= (threshold / 100)).sum()
        
        percentile_counts = pd.concat([percentile_counts, season_counts], ignore_index=True)
    
    print("Validation of percentile counts:")
    print(f"Percentile counts:\n{percentile_counts}")
    
    avg_counts = percentile_counts.drop('SEASON', axis=1).mean()
    
    return champions_with_percentiles, percentile_counts, avg_counts



def process_current_season_data(current_season_player_data):
    current_season_percentiles = calculate_percentiles(current_season_player_data)
    
    percentile_counts = pd.DataFrame()
    for col in RELEVANT_STATS:
        for threshold in PERCENTILE_THRESHOLDS:
            col_name = f'{col}_top_{int(100 - threshold * 100)}%'
            percentile_counts[col_name] = (current_season_percentiles[f'{col}_percentile'] >= threshold).sum()
    
    return current_season_percentiles, percentile_counts

def calculate_per_game_stats(games_df):
    numeric_cols = games_df.select_dtypes(include=[np.number]).columns
    relevant_numeric_stats = [col for col in RELEVANT_STATS if col in numeric_cols]
    
    per_game_stats = games_df.groupby(['SEASON', 'TEAM_ID', 'TEAM_NAME'])[relevant_numeric_stats].mean().reset_index()
    if 'FGM' in per_game_stats.columns and 'FG3M' in per_game_stats.columns and 'FGA' in per_game_stats.columns:
        per_game_stats['eFG%'] = (per_game_stats['FGM'] + 0.5 * per_game_stats['FG3M']) / per_game_stats['FGA']
    return per_game_stats

def analyze_team_trade(before_trade_data, after_trade_data, league_avg, champion_avg):
    print("Validating input data for analyze_team_trade:")
    print(f"Before trade data shape: {before_trade_data.shape}")
    print(f"After trade data shape: {after_trade_data.shape}")
    print(f"League avg type: {type(league_avg)}")
    print(f"Champion avg type: {type(champion_avg)}")
    
    teams = before_trade_data['TEAM_NAME'].unique()
    
    results = []
    for team in teams:
        before = before_trade_data[before_trade_data['TEAM_NAME'] == team]
        after = after_trade_data[after_trade_data['TEAM_NAME'] == team]
        
        # Handle both DataFrame and Series cases
        if isinstance(before, pd.DataFrame):
            before = before.select_dtypes(include=[np.number]).mean()
        if isinstance(after, pd.DataFrame):
            after = after.select_dtypes(include=[np.number]).mean()
        
        diff = after.subtract(before, fill_value=0)
        diff['TEAM_NAME'] = team
        diff['TYPE'] = 'Difference'
        
        before['TEAM_NAME'] = team
        before['TYPE'] = 'Before Trade'
        
        after['TEAM_NAME'] = team
        after['TYPE'] = 'After Trade'
        
        results.extend([before, after, diff])
    
    results_df = pd.DataFrame(results)
    
    # Handle both DataFrame and Series cases for league_avg and champion_avg
    if isinstance(league_avg, pd.DataFrame):
        league_avg = league_avg.select_dtypes(include=[np.number]).mean()
    league_avg['TEAM_NAME'] = 'League Average'
    league_avg['TYPE'] = 'League Average'
    
    if isinstance(champion_avg, pd.DataFrame):
        champion_avg = champion_avg.select_dtypes(include=[np.number]).mean()
    champion_avg['TEAM_NAME'] = 'Champion Average'
    champion_avg['TYPE'] = 'Champion Average'
    
    results_df = pd.concat([results_df, pd.DataFrame([league_avg]), pd.DataFrame([champion_avg])], ignore_index=True)
    
    print("Validation of analyze_team_trade output:")
    print(f"Shape: {results_df.shape}")
    print(f"Columns: {results_df.columns}")
    
    return results_df

def compare_percentile_counts(before_trade_counts, after_trade_counts, champ_avg_counts):
    teams = before_trade_counts['TEAM_NAME'].unique()
    
    results = []
    for team in teams:
        before = before_trade_counts[before_trade_counts['TEAM_NAME'] == team].iloc[0]
        after = after_trade_counts[after_trade_counts['TEAM_NAME'] == team].iloc[0]
        
        diff = after - before
        diff['TEAM_NAME'] = team
        diff['TYPE'] = 'Difference'
        
        before['TEAM_NAME'] = team
        before['TYPE'] = 'Before Trade'
        
        after['TEAM_NAME'] = team
        after['TYPE'] = 'After Trade'
        
        results.extend([before, after, diff])
    
    results_df = pd.DataFrame(results)
    
    champ_avg_counts['TEAM_NAME'] = 'Champion Average'
    champ_avg_counts['TYPE'] = 'Champion Average'
    
    results_df = pd.concat([results_df, pd.DataFrame([champ_avg_counts])], ignore_index=True)
    
    return results_df

def analyze_historical_trade(trade_date, players_traded, team_from, team_to, all_seasons_data, champions_data):
    trade_season = get_season_from_date(trade_date)
    
    champions_with_percentiles, champions_percentile_counts, champions_avg_counts = process_champions_data(champions_data, all_seasons_data, trade_season, trade_season)
    
    season_data = all_seasons_data[all_seasons_data['SEASON'] == trade_season]
    current_season_percentiles, current_season_percentile_counts = process_current_season_data(calculate_per_game_stats(season_data))
    
    team_from_data = current_season_percentiles[current_season_percentiles['TEAM_NAME'] == team_from]
    team_to_data = current_season_percentiles[current_season_percentiles['TEAM_NAME'] == team_to]
    
    # Note: We can't simulate player trades with team-level data, so we'll skip this step
    before_trade = pd.concat([team_from_data, team_to_data])
    after_trade = before_trade.copy()  # In this simplified version, we're not modifying the data
    
    league_avg = season_data.groupby('TEAM_NAME')[RELEVANT_STATS].mean().mean()
    champion_avg = champions_with_percentiles[champions_with_percentiles['SEASON'] == trade_season][RELEVANT_STATS].mean()
    
    trade_analysis = analyze_team_trade(before_trade, after_trade, league_avg, champion_avg)
    
    before_trade_counts = calculate_percentile_counts(before_trade)
    after_trade_counts = calculate_percentile_counts(after_trade)
    percentile_comparison = compare_percentile_counts(before_trade_counts, after_trade_counts, champions_avg_counts)
    
    return trade_analysis, percentile_comparison

def calculate_percentile_counts(data):
    counts = pd.DataFrame()
    for team in data['TEAM_NAME'].unique():
        team_data = data[data['TEAM_NAME'] == team]
        team_counts = pd.DataFrame({'TEAM_NAME': team}, index=[0])
        for col in RELEVANT_STATS:
            for threshold in PERCENTILE_THRESHOLDS:
                col_name = f'{col}_top_{int(100 - threshold * 100)}%'
                team_counts[col_name] = (team_data[f'{col}_percentile'] >= threshold).sum()
        counts = pd.concat([counts, team_counts], ignore_index=True)
    return counts

def main():
    start_season = "2022-23"  # 10 seasons ago
    end_season = "2023-24"
    current_season = end_season
    all_seasons_data = analyze_leaguegamefinder_endpoint(start_season, end_season)
    
    champions = get_champions(int(start_season[:4]), int(end_season[:4]))
    champions_player_data = get_champions_player_data(champions, start_season, end_season)
    
    champions_with_percentiles, champions_percentile_counts, champions_avg_counts = process_champions_data(champions_player_data, all_seasons_data, start_season, end_season)
    print("Champions Data Analysis:")
    print(champions_avg_counts)
    
    current_season_player_data = get_current_season_player_data(all_seasons_data, current_season)
    current_season_percentiles, current_season_percentile_counts = process_current_season_data(current_season_player_data)
    print("\nCurrent Season Data Analysis:")
    print(current_season_percentile_counts)
    
    # Example trade analysis
    trade_date = "2023-11-15"  # Example date
    team_from = "Los Angeles Lakers"
    team_to = "Boston Celtics"
    traded_players = ["Anthony Davis", "Jayson Tatum"]  # Example players
    
    trade_analysis, percentile_comparison = analyze_historical_trade(trade_date, traded_players, team_from, team_to, all_seasons_data, champions_player_data)
    
    print("\nTrade Analysis:")
    print(trade_analysis)
    
    print("\nPercentile Comparison:")
    print(percentile_comparison)

if __name__ == "__main__":
    main()


Fetching data for season 2022-23
Fetching data for season 2023-24
Validating input data for process_champions_data:
Champions data head:
   PLAYER_ID               PLAYER_NAME     TEAM_ID       TEAM_NAME        PTS  \
0     201145                Jeff Green  1610612743  Denver Nuggets   7.803571   
1     201599            DeAndre Jordan  1610612743  Denver Nuggets   5.102564   
2     202397                 Ish Smith  1610612743  Denver Nuggets   2.511628   
3     202704            Reggie Jackson  1610612743  Denver Nuggets   7.937500   
4     203484  Kentavious Caldwell-Pope  1610612743  Denver Nuggets  10.815789   

        AST       TOV       STL       BLK      OREB      DREB       FGM  \
0  1.232143  0.821429  0.321429  0.303571  0.678571  1.892857  2.857143   
1  0.871795  1.230769  0.307692  0.589744  1.333333  3.846154  2.256410   
2  2.325581  1.023256  0.186047  0.162791  0.116279  1.139535  1.209302   
3  3.125000  1.187500  0.562500  0.062500  0.187500  1.562500  3.062500   
4

  return pd.concat(all_current_players, ignore_index=True)


Validating input data for calculate_percentiles:
head:   PLAYER_ID       PLAYER_NAME     TEAM_ID        TEAM_NAME        PTS  \
0    201568  Danilo Gallinari  1610612749  Milwaukee Bucks   2.764706   
1    201572       Brook Lopez  1610612749  Milwaukee Bucks  12.531646   
2    201577       Robin Lopez  1610612749  Milwaukee Bucks   1.125000   
3    201976  Patrick Beverley  1610612749  Milwaukee Bucks   6.038462   
4    203081    Damian Lillard  1610612749  Milwaukee Bucks  24.315068   

        AST       TOV       STL       BLK      OREB      DREB       FGM  \
0  0.705882  0.588235  0.352941  0.058824  0.352941  0.705882  0.823529   
1  1.607595  1.000000  0.544304  2.392405  1.481013  3.696203  4.632911   
2  0.250000  0.125000  0.062500  0.187500  0.187500  0.125000  0.437500   
3  2.615385  0.615385  0.730769  0.500000  0.730769  2.884615  2.076923   
4  6.958904  2.643836  0.986301  0.246575  0.493151  3.890411  7.410959   

       FG3M        FGA      eFG%   SEASON  
0  0.176471

TypeError: unsupported operand type(s) for -: 'str' and 'str'

In [18]:
import pandas as pd
import numpy as np

def calculate_per_game_stats(games_df):
    numeric_cols = games_df.select_dtypes(include=[np.number]).columns
    relevant_numeric_stats = [col for col in RELEVANT_STATS if col in numeric_cols]
    
    group_cols = ['SEASON', 'TEAM_ID', 'TEAM_NAME', 'PLAYER_ID', 'PLAYER_NAME']
    per_game_stats = games_df.groupby(group_cols)[relevant_numeric_stats].mean().reset_index()
    
    if 'FGM' in per_game_stats.columns and 'FG3M' in per_game_stats.columns and 'FGA' in per_game_stats.columns:
        per_game_stats['eFG%'] = (per_game_stats['FGM'] + 0.5 * per_game_stats['FG3M']) / per_game_stats['FGA']
    
    return per_game_stats

def calculate_percentiles(stats_df):
    for col in RELEVANT_STATS:
        stats_df[f'{col}_percentile'] = stats_df.groupby('SEASON')[col].rank(pct=True)
    return stats_df

def process_champion_percentiles(champions_data, all_seasons_data, season):
    print(f"Processing champion data for season {season}")
    
    # Filter data for the specific season
    season_data = all_seasons_data[all_seasons_data['SEASON'] == season]
    champion_team = champions_data[champions_data['SEASON'] == season]['TEAM_NAME'].iloc[0]
    champion_data = champions_data[(champions_data['SEASON'] == season) & (champions_data['TEAM_NAME'] == champion_team)]
    
    print(f"Champion team for {season}: {champion_team}")
    
    # Calculate league-wide percentiles
    all_seasons_percentiles = calculate_percentiles(calculate_per_game_stats(season_data))
    
    # Merge champion data with percentiles
    champion_with_percentiles = pd.merge(champion_data, 
                                         all_seasons_percentiles[['SEASON', 'TEAM_ID', 'PLAYER_ID'] + [f'{col}_percentile' for col in RELEVANT_STATS]], 
                                         on=['SEASON', 'TEAM_ID', 'PLAYER_ID'])
    
    print("Champion players with percentiles:")
    print(champion_with_percentiles[['PLAYER_NAME'] + [f'{col}_percentile' for col in RELEVANT_STATS]])
    
    # Count players in each percentile range
    percentile_counts = pd.DataFrame({'SEASON': season}, index=[0])
    for col in RELEVANT_STATS:
        for threshold in PERCENTILE_THRESHOLDS:
            col_name = f'{col}_top_{threshold}%'
            percentile_counts[col_name] = (champion_with_percentiles[f'{col}_percentile'] >= (threshold / 100)).sum()
    
    print("\nPercentile counts for champion team:")
    print(percentile_counts)
    
    return champion_with_percentiles, percentile_counts

def compare_percentile_counts(before_trade_counts, after_trade_counts, champ_avg_counts):
    teams = before_trade_counts['TEAM_NAME'].unique()
    
    results = []
    for team in teams:
        before = before_trade_counts[before_trade_counts['TEAM_NAME'] == team].iloc[0]
        after = after_trade_counts[after_trade_counts['TEAM_NAME'] == team].iloc[0]
        
        # Ensure we're only working with numeric columns
        numeric_cols = before.select_dtypes(include=[np.number]).index
        
        diff = after[numeric_cols].subtract(before[numeric_cols], fill_value=0)
        diff['TEAM_NAME'] = team
        diff['TYPE'] = 'Difference'
        
        before['TYPE'] = 'Before Trade'
        after['TYPE'] = 'After Trade'
        
        results.extend([before, after, diff])
    
    results_df = pd.DataFrame(results)
    
    # Ensure champ_avg_counts is numeric
    champ_avg_counts = champ_avg_counts.select_dtypes(include=[np.number])
    champ_avg_counts['TEAM_NAME'] = 'Champion Average'
    champ_avg_counts['TYPE'] = 'Champion Average'
    
    results_df = pd.concat([results_df, pd.DataFrame([champ_avg_counts])], ignore_index=True)
    
    return results_df

def main():
    start_season = "2023-24"
    end_season = "2023-24"
    all_seasons_data = analyze_leaguegamefinder_endpoint(start_season, end_season)
    
    champions = get_champions(int(start_season[:4]), int(end_season[:4]))
    champions_player_data = get_champions_player_data(champions, start_season, end_season)
    
    champion_with_percentiles, percentile_counts = process_champion_percentiles(champions_player_data, all_seasons_data, end_season)
    
    print("\nChampion Team Analysis for", end_season)
    print(percentile_counts)

    # You can add more analysis here, such as comparing with other teams or analyzing trades

if __name__ == "__main__":
    main()

Fetching data for season 2023-24
Processing champion data for season 2023-24
Champion team for 2023-24: Boston Celtics


KeyError: 'PLAYER_ID'