In [8]:
import requests
import pandas as pd
from pathlib import Path
import io # To fix the FutureWarning

YEAR = "2024"
TARGET_URL = f"https://www.spotrac.com/nba/cap/_/year/{YEAR}"

PROJECT_ROOT = Path().resolve().parent.parent
RAW_SALARY_FILE = PROJECT_ROOT / "data" / "raw" / "raw_team_caps.csv"

print(f"Project Root: {PROJECT_ROOT}")
print(f"Target URL: {TARGET_URL}")
print(f"Target File: {RAW_SALARY_FILE}")

Project Root: /Users/macychen/VSCodeProjects/Analysis-of-NBA-Contract-Valuation-Bias
Target URL: https://www.spotrac.com/nba/cap/_/year/2024
Target File: /Users/macychen/VSCodeProjects/Analysis-of-NBA-Contract-Valuation-Bias/data/raw/raw_team_caps.csv


In [33]:
# Set browser header
headers = {
    "User-Agent": "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36"
}

all_teams_df = pd.DataFrame()

try:
    response = requests.get(TARGET_URL, headers=headers)
    response.raise_for_status()
        
    # Use io.StringIO to avoid the warning
    tables = pd.read_html(io.StringIO(response.text))
        
    if tables:
        # Directly assign the first table to the DataFrame
        all_teams_df = tables[0]
        
        # Rename columns if needed
        if len(all_teams_df.columns) == 10:
            all_teams_df.columns = ['rank', 'team', 'record', 'active_players', 'avg_team_age', 
                                   'total_cap_used', 'remaining_cap_space', 'active_cap', 
                                   'active_top_3', 'dead_cap']
    else:
        print(f"No tables found.")
            
except requests.exceptions.RequestException as e:
    print(f"Error fetching page: {e}")

all_teams_df

Unnamed: 0,rank,team,record,active_players,avg_team_age,total_cap_used,remaining_cap_space,active_cap,active_top_3,dead_cap
0,1.0,DET DET,44-38,15.0,25.0,"$141,808,220","$-1,220,220","$133,989,678","$56,559,037","$7,818,542"
1,2.0,ORL ORL,41-41,15.0,25.3,"$152,959,238","$-12,371,238","$152,959,238","$63,257,000",-
2,3.0,UTA UTA,17-65,15.0,23.9,"$154,114,022","$-13,526,022","$142,648,013","$86,906,400","$11,466,009"
3,4.0,SAS SAS,34-48,15.0,25.2,"$162,809,380","$-22,221,380","$155,231,927","$83,196,166","$7,577,453"
4,5.0,OKC OKC,68-14,15.0,24.2,"$166,001,694","$-25,413,694","$162,209,242","$82,859,950","$3,792,452"
5,6.0,CHA CHA,19-63,15.0,25.2,"$167,423,486","$-26,835,486","$152,436,862","$80,445,913","$14,986,624"
6,7.0,MEM MEM,48-34,15.0,24.6,"$167,814,933","$-27,226,933","$155,850,296","$95,988,718","$8,014,853"
7,8.0,CHI CHI,39-43,15.0,24.6,"$168,006,873","$-27,418,873","$157,180,244","$59,395,348","$8,739,110"
8,9.0,PHI PHI,24-58,15.0,26.6,"$170,340,647","$-29,752,647","$169,548,876","$135,768,738","$791,771"
9,10.0,IND IND,50-32,15.0,25.7,"$171,232,577","$-30,644,577","$166,839,774","$104,281,300","$2,530,538"


In [None]:
# Drop rank column
team_caps_df = all_teams_df.drop('rank', axis = 1)
        
# Clean team column - extract only the first part before space
team_caps_df['team'] = team_caps_df['team'].str.split().str[0]

# Drop last two rows
team_caps_df = team_caps_df.iloc[:-2]

team_caps_df

Unnamed: 0,team,record,active_players,avg_team_age,total_cap_used,remaining_cap_space,active_cap,active_top_3,dead_cap
0,DET,44-38,15.0,25.0,"$141,808,220","$-1,220,220","$133,989,678","$56,559,037","$7,818,542"
1,ORL,41-41,15.0,25.3,"$152,959,238","$-12,371,238","$152,959,238","$63,257,000",-
2,UTA,17-65,15.0,23.9,"$154,114,022","$-13,526,022","$142,648,013","$86,906,400","$11,466,009"
3,SAS,34-48,15.0,25.2,"$162,809,380","$-22,221,380","$155,231,927","$83,196,166","$7,577,453"
4,OKC,68-14,15.0,24.2,"$166,001,694","$-25,413,694","$162,209,242","$82,859,950","$3,792,452"
5,CHA,19-63,15.0,25.2,"$167,423,486","$-26,835,486","$152,436,862","$80,445,913","$14,986,624"
6,MEM,48-34,15.0,24.6,"$167,814,933","$-27,226,933","$155,850,296","$95,988,718","$8,014,853"
7,CHI,39-43,15.0,24.6,"$168,006,873","$-27,418,873","$157,180,244","$59,395,348","$8,739,110"
8,PHI,24-58,15.0,26.6,"$170,340,647","$-29,752,647","$169,548,876","$135,768,738","$791,771"
9,IND,50-32,15.0,25.7,"$171,232,577","$-30,644,577","$166,839,774","$104,281,300","$2,530,538"
