In [None]:
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt

In [None]:
# Load the full stats dataset
full_data = pd.read_csv("nba_merged_data.csv")  # Update with your actual filename
playoffs_boxscore = pd.read_csv("nba_team_boxscores_playoffs.csv")  # Update with your actual filename

# Load the playoff wins dataset
playoff_wins_df = pd.read_csv("nba_playoff_wins_2015_2025.csv")

# Merge on season and team name
real_playoffs_df = playoffs_boxscore.merge(playoff_wins_df, on=["SEASON_YEAR", "TEAM_NAME"], how="left")

real_playoffs_df = real_playoffs_df.dropna()

# Reset index for clean formatting
real_playoffs_df = real_playoffs_df.reset_index(drop=True)


In [None]:
unique_season_team = full_data[full_data['PLAYOFF_WINS'] == 16][['SEASON_YEAR', 'TEAM_NAME']].drop_duplicates()
print(unique_season_team)

In [None]:
print(full_data.columns)

In [None]:
# Update the 'Type' column for playoff teams based on PLAYOFF_WINS
def categorize_playoff_type(wins):
    if wins == 0:
        return "Non-Playoff"
    elif 1 <= wins <= 3:
        return "First-Round"
    elif 4 <= wins <= 7:
        return "Second-Round"
    elif 8 <= wins <= 11:
        return "Conference-Finals"
    elif 12 <= wins <= 15:
        return "Finals"
    elif wins == 16:
        return "Champions"
    return "Playoff"

playoffs = full_data.copy()

# Add a column to indicate team type
playoffs['Type'] = playoffs['PLAYOFF_WINS'].apply(categorize_playoff_type)

playoffs = playoffs.loc[:, ~playoffs.columns.str.endswith('RANK')]

# Add a column to indicate team type
real_playoffs_df['Type'] = real_playoffs_df['PLAYOFF_WINS'].apply(categorize_playoff_type)

real_playoffs_df = real_playoffs_df.loc[:, ~real_playoffs_df.columns.str.endswith('RANK')]

real_playoffs_df[real_playoffs_df['Type'] == 'Non-Playoff']


In [None]:
colors = {
    'Non-Playoff': 'lightcoral',
    'First-Round': 'coral',
    'Second-Round': 'darkorange',
    'Conference-Finals': 'orange',
    'Finals': 'orangered',
    'Champions': 'red'
}

# Define the order for the 'Type' column
type_order = ['Non-Playoff', 'First-Round', 'Second-Round', 'Conference-Finals', 'Finals', 'Champions']

colors2 = {
    'First-Round': 'coral',
    'Second-Round': 'darkorange',
    'Conference-Finals': 'orange',
    'Finals': 'orangered',
    'Champions': 'red'
}

# Define the order for the 'Type' column
type_order2 = [ 'First-Round', 'Second-Round', 'Conference-Finals', 'Finals', 'Champions']

numeric_columns = playoffs.select_dtypes(include=['float64', 'int64']).columns

In [None]:
# Create side-by-side box plots for each numeric column
for column in numeric_columns:
    fig, axes = plt.subplots(1, 2, figsize=(16, 6), sharey=True)
    
    # Box plot for playoffs
    sns.boxplot(data=playoffs, x='Type', y=column, palette=colors, order=type_order, ax=axes[0])
    axes[0].set_title(f'Box Plot of {column} (Regular Season)', fontsize=16)
    axes[0].set_ylabel(column, fontsize=12)
    axes[0].set_xlabel('Type', fontsize=12)
    
    # Box plot for playoffs_df
    sns.boxplot(data=real_playoffs_df, x='Type', y=column, palette=colors2, order=type_order2, ax=axes[1])
    axes[1].set_title(f'Box Plot of {column} (playoffs)', fontsize=16)
    axes[1].set_xlabel('Type', fontsize=12)
    
    plt.tight_layout()
    plt.show()

In [None]:
# Create box plots for each numeric column
for column in numeric_columns:
    plt.figure(figsize=(10, 6))
    sns.boxplot(data=playoffs, x='Type', y=f'{column}', palette=colors, order=type_order)
    plt.title(f'Box Plot of {column}', fontsize=16)
    plt.ylabel(column, fontsize=12)
    plt.show()

In [None]:
# Calculate the average of numeric columns for each 'Type'
averages = playoffs.groupby('Type')[numeric_columns].mean()

# Reindex the averages to match the order in type_order
averages = averages.reindex(type_order)

# Create bar plots for all numeric columns grouped by 'Type'
for column in numeric_columns:
    plt.figure(figsize=(10, 6))
    averages[column].plot(kind='bar', color=[colors.get(t, 'gray') for t in type_order])
    plt.title(f'Average {column} by Type', fontsize=16)
    plt.xlabel('Type', fontsize=12)
    plt.ylabel(f'Average {column}', fontsize=12)
    plt.xticks(ticks=range(len(type_order)), labels=type_order, rotation=45)
    plt.show()

In [None]:
champions = full_data[full_data['PLAYOFF_WINS'] == 16].drop_duplicates()
champions

In [None]:
grouped_data = champions.groupby('SEASON_YEAR').mean()
grouped_data

In [None]:
# Map colors to each team based on TEAM_ABBREVIATION
team_colors = {'BOS': 'green', 'CLE': 'maroon', 'DEN': 'blue', 'GSW': 'gold', 'LAL': 'purple', 
               'MIL': 'darkgreen', 'TOR': 'red'}

# Get the colors for the bars
bar_colors = team_abbrevs['TEAM_ABBREVIATION'].map(team_colors)

team_abbrevs = full_data[full_data['PLAYOFF_WINS'] == 16][['SEASON_YEAR', 'TEAM_ABBREVIATION']].drop_duplicates()
team_abbrevs['Label'] = team_abbrevs['TEAM_ABBREVIATION'] + ' (' + team_abbrevs['SEASON_YEAR'] + ')'

plt.figure(figsize=(10, 6))
grouped_data['FG3_PCT'].plot(kind='bar', color=bar_colors[::-1])
plt.title('Champion Average 3-Point Shooting %', fontsize=16)
plt.xlabel('Team (Year Won)', fontsize=12)
plt.ylabel('3P%', fontsize=12)
plt.xticks(ticks=range(len(team_abbrevs['SEASON_YEAR'])), labels=team_abbrevs['Label'][::-1], rotation=45)
plt.show()