In [3]:
import pandas as pd

# Load the correct advanced stats file (cleaned version)
advanced_stats = pd.read_csv("Advanced_Stats_Cleaned.csv")
payroll = pd.read_csv("TeamPayroll2000-2025.csv")
revenue = pd.read_csv("TeamRevenue2000-2025.csv")

# Define alias mapping for team names
alias_mapping = {
    "NY Knicks": "New York Knicks",
    "LA Lakers": "Los Angeles Lakers",
    "CHI Bulls": "Chicago Bulls",
    "Portland Blazers": "Portland Trail Blazers",
    "Philly 76ers": "Philadelphia 76ers",
    "Brooklyn": "Brooklyn Nets",
    "Miami": "Miami Heat",
    "Cleveland": "Cleveland Cavaliers",
    "Golden State": "Golden State Warriors",
    "Boston": "Boston Celtics",
    "OKC Thunder": "Oklahoma City Thunder",
    "LA Clippers": "Los Angeles Clippers",
    "Phoenix": "Phoenix Suns"
}

# Prepare Advanced Stats
advanced_stats['SeasonStartYear'] = advanced_stats['Season'].str[:4].astype(int)
advanced_stats['Team_standardized'] = advanced_stats['Team'].replace(alias_mapping)

# Prepare Payroll
payroll['team_standardized'] = payroll['team'].replace(alias_mapping)
payroll['payroll'] = payroll['payroll'].replace('[\$,]', '', regex=True).astype(float)
payroll['inflationAdjPayroll'] = payroll['inflationAdjPayroll'].replace('[\$,]', '', regex=True).astype(float)

# Prepare Revenue
revenue['team_standardized'] = revenue['team'].replace(alias_mapping)
revenue['revenue'] = revenue['revenue'].replace('[\$,M]', '', regex=True).astype(float) * 1_000_000

# Merge datasets
merged = advanced_stats.merge(
    payroll[['team_standardized', 'seasonStartYear', 'payroll', 'inflationAdjPayroll']],
    left_on=['Team_standardized', 'SeasonStartYear'],
    right_on=['team_standardized', 'seasonStartYear'],
    how='left'
).merge(
    revenue[['team_standardized', 'seasonStartYear', 'revenue']],
    left_on=['Team_standardized', 'SeasonStartYear'],
    right_on=['team_standardized', 'seasonStartYear'],
    how='left'
)

# Select final columns
final_table = merged[[
    'Team_standardized', 'SeasonStartYear', 'Season', 'Wins', 'Losses', 
    'payroll', 'inflationAdjPayroll', 'revenue'
]]

# Output result
print(final_table)

# Optional: Save to CSV
# final_table.to_csv("NBA_Merged_Data.csv", index=False)


        Team_standardized  SeasonStartYear     Season  Wins  Losses  payroll  \
0                    Team             2000  2000-2001   NaN     NaN      NaN   
1       San Antonio Spurs             2000  2000-2001  58.0    24.0      NaN   
2        Sacramento Kings             2000  2000-2001  55.0    27.0      NaN   
3               Utah Jazz             2000  2000-2001  53.0    29.0      NaN   
4      Philadelphia 76ers             2000  2000-2001  56.0    26.0      NaN   
..                    ...              ...        ...   ...     ...      ...   
791             Utah Jazz             2024  2024-2025  17.0    65.0      NaN   
792     Charlotte Hornets             2024  2024-2025  19.0    63.0      NaN   
793  New Orleans Pelicans             2024  2024-2025  21.0    61.0      NaN   
794    Washington Wizards             2024  2024-2025  18.0    64.0      NaN   
795        League Average             2024  2024-2025   NaN     NaN      NaN   

     inflationAdjPayroll      revenue  

In [4]:
final_table = final_table[~final_table['Team_standardized'].isin(['League Average', 'Team'])]


In [None]:
import pandas as pd

# Load the correct advanced stats file (cleaned version)
advanced_stats = pd.read_csv("Advanced_Stats_Cleaned.csv")
payroll = pd.read_csv("TeamPayroll2000-2025.csv")
revenue = pd.read_csv("TeamRevenue2000-2025.csv")

# Define alias mapping for team names
alias_mapping = {
    # Atlantic Division
    "Boston Celtics": "Boston Celtics",
    "Brooklyn Nets": "Brooklyn Nets",
    "New Jersey Nets": "Brooklyn Nets",  # Pre-2012
    "New York Knicks": "New York Knicks",
    "Philadelphia 76ers": "Philadelphia 76ers",
    "Toronto Raptors": "Toronto Raptors",

    # Central Division
    "Chicago Bulls": "Chicago Bulls",
    "Cleveland Cavaliers": "Cleveland Cavaliers",
    "Detroit Pistons": "Detroit Pistons",
    "Indiana Pacers": "Indiana Pacers",
    "Milwaukee Bucks": "Milwaukee Bucks",

    # Southeast Division
    "Miami Heat": "Miami Heat",
    "Atlanta Hawks": "Atlanta Hawks",
    "Charlotte Hornets": "Charlotte Hornets",
    "Charlotte Bobcats": "Charlotte Hornets",  # For 2004-2014
    "Orlando Magic": "Orlando Magic",
    "Washington Wizards": "Washington Wizards",

    # Northwest Division
    "Denver Nuggets": "Denver Nuggets",
    "Minnesota Timberwolves": "Minnesota Timberwolves",
    "Oklahoma City Thunder": "Oklahoma City Thunder",
    "Seattle SuperSonics": "Oklahoma City Thunder",  # Pre-2008
    "Portland Trail Blazers": "Portland Trail Blazers",
    "Utah Jazz": "Utah Jazz",

    # Pacific Division
    "Golden State Warriors": "Golden State Warriors",
    "LA Clippers": "Los Angeles Clippers",
    "Los Angeles Clippers": "Los Angeles Clippers",
    "Los Angeles Lakers": "Los Angeles Lakers",
    "Phoenix Suns": "Phoenix Suns",
    "Sacramento Kings": "Sacramento Kings",

    # Southwest Division
    "Dallas Mavericks": "Dallas Mavericks",
    "Houston Rockets": "Houston Rockets",
    "Memphis Grizzlies": "Memphis Grizzlies",
    "Vancouver Grizzlies": "Memphis Grizzlies",  # Pre-2001
    "New Orleans Hornets": "New Orleans Pelicans",  # 2002-2013
    "New Orleans Pelicans": "New Orleans Pelicans",
    "New Orleans/Oklahoma City Hornets": "New Orleans Pelicans",  # 2005-2007 (Katrina Years)
    "San Antonio Spurs": "San Antonio Spurs"
}


# Prepare Advanced Stats
advanced_stats['SeasonStartYear'] = advanced_stats['Season'].str[:4].astype(int)
advanced_stats['Team_standardized'] = advanced_stats['Team'].replace(alias_mapping)

# Prepare Payroll
payroll['team_standardized'] = payroll['team'].replace(alias_mapping)
payroll['payroll'] = payroll['payroll'].replace('[\$,]', '', regex=True).astype(float)
payroll['inflationAdjPayroll'] = payroll['inflationAdjPayroll'].replace('[\$,]', '', regex=True).astype(float)

# Prepare Revenue
revenue['team_standardized'] = revenue['team'].replace(alias_mapping)
revenue['revenue'] = revenue['revenue'].replace('[\$,M]', '', regex=True).astype(float) * 1_000_000

# Merge datasets
merged = advanced_stats.merge(
    payroll[['team_standardized', 'seasonStartYear', 'payroll', 'inflationAdjPayroll']],
    left_on=['Team_standardized', 'SeasonStartYear'],
    right_on=['team_standardized', 'seasonStartYear'],
    how='left'
).merge(
    revenue[['team_standardized', 'seasonStartYear', 'revenue']],
    left_on=['Team_standardized', 'SeasonStartYear'],
    right_on=['team_standardized', 'seasonStartYear'],
    how='left'
)

# Select final columns
final_table = merged[[
    'Team_standardized', 'SeasonStartYear', 'Season', 'Wins', 'Losses', 
    'payroll', 'inflationAdjPayroll', 'revenue'
]]

# ✅ Remove non-team rows
final_table = final_table[~final_table['Team_standardized'].isin(['League Average', 'Team'])]

# Output result
print(final_table)

# Optional: Save to CSV
# final_table.to_csv("NBA_Merged_Cleaned.csv", index=False)


          Team_standardized  SeasonStartYear     Season  Wins  Losses  \
1         San Antonio Spurs             2000  2000-2001  58.0    24.0   
2          Sacramento Kings             2000  2000-2001  55.0    27.0   
3                 Utah Jazz             2000  2000-2001  53.0    29.0   
4        Philadelphia 76ers             2000  2000-2001  56.0    26.0   
5    Portland Trail Blazers             2000  2000-2001  50.0    32.0   
..                      ...              ...        ...   ...     ...   
790           Brooklyn Nets             2024  2024-2025  26.0    56.0   
791               Utah Jazz             2024  2024-2025  17.0    65.0   
792       Charlotte Hornets             2024  2024-2025  19.0    63.0   
793    New Orleans Pelicans             2024  2024-2025  21.0    61.0   
794      Washington Wizards             2024  2024-2025  18.0    64.0   

         payroll  inflationAdjPayroll      revenue  
1            NaN                  NaN   82000000.0  
2            NaN 