In [153]:
import requests
from bs4 import BeautifulSoup
import pandas as pd

In [None]:
def load_contracts(url, years, extra):
    """
    Fetches the Basketball‑Reference contracts table at `url`,
    cleans it, and computes Salary Count & Average Salary.
    
    years: list of the six season‑column names, e.g.
           ["2022-23","2023-24","2024-25","2025-26","2026-27","2027-28"]
    """
    # Step 1–3: get & parse
    headers = {"User-Agent": "Mozilla/5.0"}
    resp = requests.get(url, headers=headers)
    resp.encoding = 'utf-8'
    soup = BeautifulSoup(resp.text, "html.parser")
    table = soup.find("table", {"id": "player-contracts"})
    
    # Step 4: read into pandas
    df = pd.read_html(str(table))[0]
    
    # Step 5: set cols
    if extra:
        df.columns = ["Rk", "Player", "Tm"] + years + ["BIRD"] + ["Guaranteed"]
    else:
        df.columns = ["Rk", "Player", "Tm"] + years + ["Guaranteed"]
    # Step 6: drop repeats/blanks
    df = df[
        df["Player"].notna()
        & (df["Player"] != "Player")
        & (df["Player"] != "Missing value")
    ].copy()
    
    # Step 7: count non-null salaries
    df['Salary Count'] = (
        df.iloc[:, 3:3+len(years)]
          .notnull()
          .sum(axis=1)
          .astype(int)
    )
    
    # Step 8: clean & convert Guaranteed to float
    df['Guaranteed'] = (
        df['Guaranteed']
          .astype(str)
          .str.replace(r'[^0-9\.]', '', regex=True)
    )
    df['Guaranteed'] = pd.to_numeric(df['Guaranteed'], errors='coerce')
    
    # report any failures
    bad = df['Guaranteed'].isna()
    if bad.any():
        print()
        #print("Rows with non‑numeric Guaranteed:",
        #      df.loc[bad, ['Player','Guaranteed']])
    
    # Step 9: compute average
    df['Average Salary'] = df['Guaranteed'] / df['Salary Count']

    
    return df

# --- now just call it for each snapshot:
years21 = ["2020-21","2021-22","2022-23","2023-24","2024-25","2025-26"] #
url21   = "https://web.archive.org/web/20220127184320/https://www.basketball-reference.com//contracts/players.html"
contract21 = load_contracts(url21, years21, True)

years22 = ["2021-22","2022-23","2023-24","2024-25","2025-26","2026-27"]
url22   = "https://web.archive.org/web/20220127184320/https://www.basketball-reference.com//contracts/players.html"
contract22 = load_contracts(url22, years22, True)

years23 = ["2022-23","2023-24","2024-25","2025-26","2026-27","2027-28"]
url23   = "https://web.archive.org/web/20230516051257/https://www.basketball-reference.com/contracts/players.html"
contract23 = load_contracts(url23, years23, False)

years24 = ["2023-24","2024-25","2025-26","2026-27","2027-28","2028-29"]
url24   = "https://web.archive.org/web/20240603223914/https://www.basketball-reference.com/contracts/players.html"
contract24 = load_contracts(url24, years24, False)

years25 = ["2024-25","2025-26","2026-27","2027-28","2028-29","2029-30"]
url25 = "https://www.basketball-reference.com/contracts/players.html"
contract25 = load_contracts(url25, years25, False)

  df = pd.read_html(str(table))[0]





  df = pd.read_html(str(table))[0]





  df = pd.read_html(str(table))[0]
  df = pd.read_html(str(table))[0]





  df = pd.read_html(str(table))[0]


Unnamed: 0,Rk,Player,Tm,2022-23,2023-24,2024-25,2025-26,2026-27,2027-28,Guaranteed,Salary Count,Average Salary
0,1,Stephen Curry,GSW,"$48,070,014","$51,915,615","$55,761,216","$59,606,817",,,215353662,4,5.383842e+07
1,2,Russell Westbrook,UTA,"$47,559,433",,,,,,47063478,1,4.706348e+07
2,3,LeBron James,LAL,"$44,474,988","$53,369,986","$57,639,585",,,,97844974,3,3.261499e+07
3,4,Kevin Durant,PHO,"$44,119,845","$47,649,433","$51,179,021","$54,708,609",,,197656908,4,4.941423e+07
4,5,Bradley Beal,WAS,"$43,279,250","$46,741,590","$50,203,930","$53,666,270","$57,128,610",,193891040,5,3.877821e+07
...,...,...,...,...,...,...,...,...,...,...,...,...
617,562,Charles Bassey,PHI,"$2,674,742","$2,600,000","$2,500,000","$2,500,000",,,74742,4,1.868550e+04
618,563,Trevor Keels,NYK,,,,,,,58493,0,inf
619,564,Lester Quinones,GSW,,,,,,,58493,0,inf
620,565,Stanley Umude,DET,"$58,493",,,,,,58493,1,5.849300e+04


In [193]:
display(contract21)
display(contract22)
display(contract23)
display(contract24)
display(contract25)

Unnamed: 0,Rk,Player,Tm,2020-21,2021-22,2022-23,2023-24,2024-25,2025-26,BIRD,Guaranteed,Salary Count,Average Salary
0,1,Stephen Curry,GSW,"$45,780,966","$48,070,014","$51,915,615","$55,761,216","$59,606,817",,Bird Rights,261134628.0,5,52226925.6
1,2,John Wall,HOU,"$44,310,840","$47,366,760",,,,,Bird Rights,44310840.0,2,22155420.0
2,3,Russell Westbrook,LAL,"$44,211,146","$47,063,478",,,,,Bird Rights,44211146.0,2,22105573.0
3,4,James Harden,BRK,"$43,848,000","$46,872,000",,,,,Bird Rights,43848000.0,2,21924000.0
4,5,LeBron James,LAL,"$41,180,544","$44,474,988",,,,,Bird,85655532.0,2,42827766.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...
580,529,Emanuel Terry,PHO,"$85,578",,,,,,Minimum Salary,85578.0,1,85578.0
581,530,Tyrell Terry,MEM,"$1,517,981","$1,782,621",,,,,,85578.0,2,42789.0
582,531,Admiral Schofield,ORL,,,,,,,,169706.0,0,inf
583,532,Malcolm Hill,CHI,,,,,,,,53176.0,0,inf


Unnamed: 0,Rk,Player,Tm,2021-22,2022-23,2023-24,2024-25,2025-26,2026-27,BIRD,Guaranteed,Salary Count,Average Salary
0,1,Stephen Curry,GSW,"$45,780,966","$48,070,014","$51,915,615","$55,761,216","$59,606,817",,Bird Rights,261134628.0,5,52226925.6
1,2,John Wall,HOU,"$44,310,840","$47,366,760",,,,,Bird Rights,44310840.0,2,22155420.0
2,3,Russell Westbrook,LAL,"$44,211,146","$47,063,478",,,,,Bird Rights,44211146.0,2,22105573.0
3,4,James Harden,BRK,"$43,848,000","$46,872,000",,,,,Bird Rights,43848000.0,2,21924000.0
4,5,LeBron James,LAL,"$41,180,544","$44,474,988",,,,,Bird,85655532.0,2,42827766.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...
580,529,Emanuel Terry,PHO,"$85,578",,,,,,Minimum Salary,85578.0,1,85578.0
581,530,Tyrell Terry,MEM,"$1,517,981","$1,782,621",,,,,,85578.0,2,42789.0
582,531,Admiral Schofield,ORL,,,,,,,,169706.0,0,inf
583,532,Malcolm Hill,CHI,,,,,,,,53176.0,0,inf


Unnamed: 0,Rk,Player,Tm,2022-23,2023-24,2024-25,2025-26,2026-27,2027-28,Guaranteed,Salary Count,Average Salary
0,1,Stephen Curry,GSW,"$48,070,014","$51,915,615","$55,761,216","$59,606,817",,,215353662,4,5.383842e+07
1,2,Russell Westbrook,UTA,"$47,559,433",,,,,,47063478,1,4.706348e+07
2,3,LeBron James,LAL,"$44,474,988","$53,369,986","$57,639,585",,,,97844974,3,3.261499e+07
3,4,Kevin Durant,PHO,"$44,119,845","$47,649,433","$51,179,021","$54,708,609",,,197656908,4,4.941423e+07
4,5,Bradley Beal,WAS,"$43,279,250","$46,741,590","$50,203,930","$53,666,270","$57,128,610",,193891040,5,3.877821e+07
...,...,...,...,...,...,...,...,...,...,...,...,...
617,562,Charles Bassey,PHI,"$2,674,742","$2,600,000","$2,500,000","$2,500,000",,,74742,4,1.868550e+04
618,563,Trevor Keels,NYK,,,,,,,58493,0,inf
619,564,Lester Quinones,GSW,,,,,,,58493,0,inf
620,565,Stanley Umude,DET,"$58,493",,,,,,58493,1,5.849300e+04


Unnamed: 0,Rk,Player,Tm,2023-24,2024-25,2025-26,2026-27,2027-28,2028-29,Guaranteed,Salary Count,Average Salary
0,1,Stephen Curry,GSW,"$51,915,615","$55,761,216","$59,606,817",,,,167283648.0,3,55761216.0
1,2,Kevin Durant,PHO,"$47,649,433","$51,179,021","$54,708,609",,,,153537063.0,3,51179021.0
2,3,Nikola Jokić,DEN,"$47,607,350","$51,415,938","$55,224,526","$59,033,114","$62,841,702",,213280928.0,5,42656185.6
3,4,Joel Embiid,PHI,"$47,607,350","$51,415,938","$55,224,526","$59,033,114",,,154247814.0,4,38561953.5
4,5,LeBron James,LAL,"$47,607,350","$51,415,938",,,,,47607350.0,2,23803675.0
...,...,...,...,...,...,...,...,...,...,...,...,...
611,558,Matthew Hurt,MEM,,,,,,,64343.0,0,inf
612,559,Pete Nance,CLE,"$64,343",,,,,,64343.0,1,64343.0
613,560,Malik Williams,TOR,"$77,212",,,,,,77212.0,1,77212.0
616,561,Kai Jones,LAC,"$3,175,563","$2,196,970",,,,,11608.0,2,5804.0


Unnamed: 0,Rk,Player,Tm,2024-25,2025-26,2026-27,2027-28,2028-29,2029-30,Guaranteed,Salary Count,Average Salary
0,1,Stephen Curry,GSW,"$55,761,216","$59,606,817","$62,587,158",,,,177955191,3,59318397.0
1,2,Joel Embiid,PHI,"$51,415,938","$55,224,526","$57,985,752","$62,624,612","$67,263,472",,227250828,5,45450165.6
2,3,Nikola Jokić,DEN,"$51,415,938","$55,224,526","$59,033,114","$62,841,702",,,165673578,4,41418394.5
3,4,Kevin Durant,PHO,"$51,179,021","$54,708,609",,,,,105887630,2,52943815.0
4,5,Bradley Beal,PHO,"$50,203,930","$53,666,270","$57,128,610",,,,103870200,3,34623400.0
...,...,...,...,...,...,...,...,...,...,...,...,...
539,492,Javon Freeman-Liberty,TOR,"$100,000",,,,,,100000,1,100000.0
540,493,Erik Stevenson,WAS,"$66,503",,,,,,66503,1,66503.0
541,494,Branden Carlson,OKC,"$496,519",,,,,,496519,1,496519.0
542,495,Branden Carlson,OKC,"$496,519",,,,,,496519,1,496519.0


In [None]:
def load_stats(season):
    """
    Load stats from text{season}.txt and assign column names.
    season: int or str, e.g. 21, '22', 25
    """
    fn = f'text{season}.txt'
    df = pd.read_csv(
        fn,
        sep=',',
        header=None,
        encoding='utf-8'
    )
    df.columns = [
        'Rank','Player','Age','Team','Position',
        'Games','Games Started','MP','FG','FGA','FG%',
        '3P','3PA','3P%','2P','2PA','2P%','eFG%',
        'FT','FTA','FT%','ORB','DRB','TRB',
        'AST','STL','BLK','TOV','PF','PTS',
        'Player-additional'
    ]
    return df

# Now load each season’s stats in one line:
stats21 = load_stats(21)
stats22 = load_stats(22)
stats23 = load_stats(23)
stats24 = load_stats(24)
stats25 = load_stats(25)

Unnamed: 0,Rank,Player,Age,Team,Position,Games,Games Started,MP,FG,FGA,...,ORB,DRB,TRB,AST,STL,BLK,TOV,PF,PTS,Player-additional
0,1,Stephen Curry,32.0,GSW,PG,63.0,63.0,34.2,10.4,21.7,...,0.5,5.0,5.5,5.8,1.2,0.1,3.4,1.9,32.0,curryst01
1,2,Bradley Beal,27.0,WAS,SG,60.0,60.0,35.8,11.2,23.0,...,1.2,3.5,4.7,4.4,1.2,0.4,3.1,2.3,31.3,bealbr01
2,3,Damian Lillard,30.0,POR,PG,67.0,67.0,35.8,9.0,19.9,...,0.5,3.7,4.2,7.5,0.9,0.3,3.0,1.5,28.8,lillada01
3,4,Joel Embiid,26.0,PHI,C,51.0,51.0,31.1,9.0,17.6,...,2.2,8.4,10.6,2.8,1.0,1.4,3.1,2.4,28.5,embiijo01
4,5,Giannis Antetokounmpo,26.0,MIL,PF,61.0,61.0,33.0,10.3,18.0,...,1.6,9.4,11.0,5.9,1.2,1.2,3.4,2.8,28.1,antetgi01
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
537,538,Anžejs Pasečņiks,25.0,WAS,C,1.0,0.0,6.0,0.0,1.0,...,1.0,0.0,1.0,1.0,0.0,0.0,5.0,2.0,0.0,pasecan01
538,539,Noah Vonleh,25.0,BRK,C,4.0,0.0,2.8,0.0,0.8,...,0.0,0.3,0.3,0.3,0.0,0.0,0.5,0.5,0.0,vonleno01
539,540,Greg Whittington,27.0,DEN,PF,4.0,0.0,3.0,0.0,0.8,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,whittgr01
540,,League Average,,,,,,,,,...,,,,,,,,,,-9999


In [224]:
def merge_contract_stats(contract_df, stats_df):
    """
    Strips whitespace from both DataFrames' Player columns
    and returns their inner merge on Player.
    """
    # work on copies so we don’t clobber originals
    c = contract_df.copy()
    s = stats_df  .copy()
    
    c['Player'] = c['Player'].str.strip()
    s['Player'] = s['Player'].str.strip()
    
    merged = pd.merge(
        c, s,
        on='Player',
        how='inner',
        suffixes=('_contract','_stats')
    )
    return merged

In [225]:
merged25 = merge_contract_stats(contract25, stats25)
merged24 = merge_contract_stats(contract24, stats24)
merged23 = merge_contract_stats(contract23, stats23)
merged22 = merge_contract_stats(contract22, stats22)
merged21 = merge_contract_stats(contract21, stats21)


In [204]:
display(merged24)

Unnamed: 0,Rk,Player,Tm,2023-24,2024-25,2025-26,2026-27,2027-28,2028-29,Guaranteed,...,ORB,DRB,TRB,AST,STL,BLK,TOV,PF,PTS,Player-additional
0,1,Stephen Curry,GSW,"$51,915,615","$55,761,216","$59,606,817",,,,167283648.0,...,0.5,4.0,4.5,5.1,0.7,0.4,2.8,1.6,26.4,curryst01
1,2,Kevin Durant,PHO,"$47,649,433","$51,179,021","$54,708,609",,,,153537063.0,...,0.5,6.1,6.6,5.0,0.9,1.2,3.3,1.8,27.1,duranke01
2,3,Nikola Jokić,DEN,"$47,607,350","$51,415,938","$55,224,526","$59,033,114","$62,841,702",,213280928.0,...,2.8,9.5,12.4,9.0,1.4,0.9,3.0,2.5,26.4,jokicni01
3,4,Joel Embiid,PHI,"$47,607,350","$51,415,938","$55,224,526","$59,033,114",,,154247814.0,...,2.4,8.6,11.0,5.6,1.2,1.7,3.8,2.9,34.7,embiijo01
4,5,LeBron James,LAL,"$47,607,350","$51,415,938",,,,,47607350.0,...,0.9,6.4,7.3,8.3,1.3,0.5,3.5,1.1,25.7,jamesle01
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
525,557,Trey Jemison,MEM,,,,,,,64343.0,...,2.6,2.8,5.4,1.1,0.5,1.1,1.4,2.8,6.8,jemistr01
526,558,Matthew Hurt,MEM,,,,,,,64343.0,...,1.1,0.9,2.0,0.5,0.4,0.4,0.3,1.0,4.0,hurtma01
527,559,Pete Nance,CLE,"$64,343",,,,,,64343.0,...,0.0,0.4,0.4,0.0,0.1,0.0,0.1,0.3,0.4,nancepe01
528,560,Malik Williams,TOR,"$77,212",,,,,,77212.0,...,2.1,3.3,5.4,0.3,0.4,0.6,0.4,2.1,2.7,willima11


In [226]:
cap2021 = 109140000 
cap2022 = 112414000
cap2023 = 123655000 
cap2024 = 136021000
cap2025 = 140588000 

In [229]:
def percent_of_cap(years, df, cap):
    """
    For each column in `years`, removes any commas (or $),
    converts to float, and then adds a
      'Percent of Cap {year}'
    column (in percent).
    """
    df = df.copy()
    
    for year in years:
        # 1) remove commas or dollar signs, then cast to float
        df[year] = (
            df[year]
              .astype(str)
              .str.replace(r'[,\$]', '', regex=True)
              .astype(float)
        )
        
        # 2) compute percent of cap
        df[f'Percent of Cap {year}'] = (df[year] / cap)
    
    return df


In [231]:
final25 = percent_of_cap(years25, merged25, cap2025)
final24 = percent_of_cap(years24, merged24, cap2024)
final23 = percent_of_cap(years23, merged23, cap2023)
final22 = percent_of_cap(years22, merged22, cap2022)
final21 = percent_of_cap(years21, merged21, cap2021)