In [4]:
# ---------------------------------------------
#   WNBA PLAYER PERFORMANCE + SALARY MODEL
# ---------------------------------------------

import pandas as pd
import numpy as np

# --- Load WNBA Data ---
wnba = pd.read_csv("Full WNBA Dataset.csv")

# --- Fill missing numeric values with 0 ---
numeric_cols = ['FP','AGE','FGM','FGA','FTA','DREB','BLK','PTS','TOV','L','PF','STL','FT%','ERROR','MIN']
for col in numeric_cols:
    if col in wnba.columns:
        wnba[col] = wnba[col].fillna(0)

# --- Feature Importances from XGBoost ---
feat_importance = {
    'FP': 0.379349,
    'AGE': 0.080611,
    'FGM': 0.071855,
    'FTA': 0.042036,
    'FGA': 0.038424,
    'DREB': 0.031454,
    'BLK': 0.028346,
    'PTS': 0.027907,
    'TOV': 0.027045,
    'L': 0.023641,
    'PF': 0.023190,
    'STL': 0.023018,
    'FT%': 0.022908,
    'ERROR': 0.022655,
    'MIN': 0.022253
}

# --- Compute Weighted Performance Index ---
wnba['Performance_Index_Raw'] = 0
for feature, weight in feat_importance.items():
    if feature in wnba.columns:
        wnba['Performance_Index_Raw'] += wnba[feature] * weight

# --- Normalize to 0–100 ---
wnba['Performance_Index'] = 100 * (
    (wnba['Performance_Index_Raw'] - wnba['Performance_Index_Raw'].min()) /
    (wnba['Performance_Index_Raw'].max() - wnba['Performance_Index_Raw'].min())
)
wnba['Performance_Index'] = wnba['Performance_Index'].round(2)

# --- Create 10 tiers (Tier 1 = best) ---
wnba = wnba.sort_values(by='Performance_Index', ascending=False).reset_index(drop=True)
wnba['Tier'] = pd.qcut(wnba['Performance_Index'], q=10, labels=False)
wnba['Tier'] = 11 - (wnba['Tier'] + 1)  # Reverse ranking

# -------------------------------------------------------
# WNBA REVENUE BY YEAR + PLAYER SHARE (9.3%)
# -------------------------------------------------------
revenue_by_year = {
    2021: 110_000_000,   # estimate
    2022: 140_000_000,   # estimate
    2023: 190_000_000,   # Bloomberg range mid-point
    2024: 200_000_000,   # reported estimate
    2025: 250_000_000    # projection
}

player_share_pct = 0.50  # 9.3% CBA revenue share

# --- Compute payroll available per season ---
payroll_by_year = {year: rev * player_share_pct for year, rev in revenue_by_year.items()}

# -------------------------------------------------------
#   SALARY CALCULATION BASED ON PERFORMANCE INDEX
# -------------------------------------------------------

wnba['Predicted_Salary'] = 0.0

# Process year-by-year
for season, payroll in payroll_by_year.items():

    # Filter players for this season
    season_players = wnba[wnba['Season'] == season]

    if len(season_players) == 0:
        continue  # skip years not in the dataset

    # Sum of performance index for the year
    total_index = season_players['Performance_Index'].sum()

    if total_index == 0:
        continue

    # Each player's salary share = (PI / total PI of that season)
    wnba.loc[wnba['Season'] == season, 'Predicted_Salary'] = \
        season_players['Performance_Index'] / total_index * payroll

# Clean formatting
wnba['Predicted_Salary'] = wnba['Predicted_Salary'].round(2)

# -------------------------------------------------------
#   SAVE RESULTS
# -------------------------------------------------------
wnba[['PLAYER', 'Season', 'Tier', 'Performance_Index', 'Predicted_Salary']].to_csv(
    "WNBA_Performance_Index_SalaryModel.csv",
    index=False
)

# Preview output
print("✔ WNBA Salaries Computed Using Performance Index + Revenue Share")
print(wnba[['PLAYER', 'Season', 'Tier', 'Performance_Index', 'Predicted_Salary']].head(20).to_string(index=False))


✔ WNBA Salaries Computed Using Performance Index + Revenue Share
          PLAYER  Season  Tier  Performance_Index  Predicted_Salary
     a'ja wilson    2024     1             100.00      1,833,943.73
     jewell loyd    2023     1              93.74      1,523,977.23
arike ogunbowale    2024     1              92.65      1,699,148.87
     a'ja wilson    2025     1              91.55      1,986,503.49
 breanna stewart    2023     1              90.24      1,467,076.01
    tina charles    2021     1              90.10        875,622.86
napheesa collier    2025     1              89.69      1,946,144.17
napheesa collier    2023     1              87.66      1,425,131.69
arike ogunbowale    2023     1              87.62      1,424,481.39
     a'ja wilson    2023     1              87.34      1,419,929.29
 breanna stewart    2021     1              86.14        837,138.21
   caitlin clark    2024     1              85.71      1,571,873.17
napheesa collier    2024     1              85.39  

In [14]:
# ---------------------------------------------
#   WNBA PLAYER PERFORMANCE + SALARY MODEL
# ---------------------------------------------

import pandas as pd
import numpy as np

# --- Load WNBA Data ---
wnba = pd.read_csv("Full WNBA Dataset.csv")

# --- Fill missing numeric values with 0 ---
numeric_cols = ['FP','AGE','FGM','FGA','FTA','DREB','BLK','PTS','TOV','L','PF','STL','FT%','ERROR','MIN','SALARY']
for col in numeric_cols:
    if col in wnba.columns:
        wnba[col] = wnba[col].fillna(0)

# --- Feature Importances from XGBoost ---
feat_importance = {
    'FP': 0.379349,
    'AGE': 0.080611,
    'FGM': 0.071855,
    'FTA': 0.042036,
    'FGA': 0.038424,
    'DREB': 0.031454,
    'BLK': 0.028346,
    'PTS': 0.027907,
    'TOV': 0.027045,
    'L': 0.023641,
    'PF': 0.023190,
    'STL': 0.023018,
    'FT%': 0.022908,
    'ERROR': 0.022655,
    'MIN': 0.022253
}

# --- Compute Weighted Performance Index ---
wnba['Performance_Index_Raw'] = 0
for feature, weight in feat_importance.items():
    if feature in wnba.columns:
        wnba['Performance_Index_Raw'] += wnba[feature] * weight

# --- Normalize to 0–100 ---
wnba['Performance_Index'] = 100 * (
    (wnba['Performance_Index_Raw'] - wnba['Performance_Index_Raw'].min()) /
    (wnba['Performance_Index_Raw'].max() - wnba['Performance_Index_Raw'].min())
)
wnba['Performance_Index'] = wnba['Performance_Index'].round(2)

# --- Create 10 tiers (Tier 1 = best) ---
wnba = wnba.sort_values(by='Performance_Index', ascending=False).reset_index(drop=True)
wnba['Tier'] = pd.qcut(wnba['Performance_Index'], q=10, labels=False)
wnba['Tier'] = 11 - (wnba['Tier'] + 1)  # Reverse ranking

# -------------------------------------------------------
# WNBA REVENUE BY YEAR + PLAYER SHARE
# -------------------------------------------------------
revenue_by_year = {
    2021: 110_000_000,
    2022: 140_000_000,
    2023: 190_000_000,
    2024: 200_000_000,
    2025: 250_000_000
}

player_share_pct = 0.093   # 50% revenue to players
payroll_by_year = {year: rev * player_share_pct for year, rev in revenue_by_year.items()}

# -------------------------------------------------------
#   SALARY CALCULATION BASED ON PERFORMANCE INDEX
# -------------------------------------------------------

wnba['New_Salary'] = 0.0

for season, payroll in payroll_by_year.items():

    season_players = wnba[wnba['Season'] == season]
    if len(season_players) == 0:
        continue

    total_index = season_players['Performance_Index'].sum()
    if total_index == 0:
        continue

    # Salary = performance share * pool
    wnba.loc[wnba['Season'] == season, 'New_Salary'] = (
        season_players['Performance_Index'] / total_index * payroll
    )

wnba['New_Salary'] = wnba['New_Salary'].round(2)

# -------------------------------------------------------
#   COMPARE PREDICTED VS ACTUAL SALARY
# -------------------------------------------------------

wnba['Salary_Diff'] = (wnba['New_Salary'] - wnba['SALARY']).round(2)
wnba['Underpaid'] = wnba['Salary_Diff'] > 0
wnba['Percent_Diff'] = np.where(
    wnba['SALARY'] > 0,
    (wnba['New_Salary'] - wnba['SALARY']) / wnba['SALARY'],
    np.nan
)
wnba['Percent_Diff'] = (wnba['Percent_Diff'] * 100).round(2)

# -------------------------------------------------------
#   SAVE COMPARISON RESULTS
# -------------------------------------------------------

comparison_cols = [
    'PLAYER', 'Season', 'Tier', 'Performance_Index',
    'SALARY', 'New_Salary', 'Salary_Diff',
    'Percent_Diff', 'Underpaid'
]

wnba[comparison_cols].to_csv("WNBA_Predicted_vs_Actual_Salary.csv", index=False)

# -------------------------------------------------------
#   PREVIEW OUTPUT
# -------------------------------------------------------
print("✔ Comparison of Predicted vs Actual Salaries")
print(wnba[comparison_cols].head(20).to_string(index=False))


✔ Comparison of Predicted vs Actual Salaries
          PLAYER  Season  Tier  Performance_Index  SALARY  New_Salary  Salary_Diff  Percent_Diff  Underpaid
     a'ja wilson    2024     1             100.00  227303  341,113.53   113,810.53         50.07       True
     jewell loyd    2023     1              93.74  234936  283,459.77    48,523.77         20.65       True
arike ogunbowale    2024     1              92.65  241984  316,041.69    74,057.69         30.60       True
     a'ja wilson    2025     1              91.55  200000  369,489.65   169,489.65         84.74       True
 breanna stewart    2023     1              90.24  180000  272,876.14    92,876.14         51.60       True
    tina charles    2021     1              90.10  175000  162,865.85   -12,134.15         -6.93      False
napheesa collier    2025     1              89.69  214284  361,982.81   147,698.81         68.93       True
napheesa collier    2023     1              87.66  202154  265,074.49    62,920.49         