In [12]:
from nba_api.stats.endpoints import leaguedashteamstats, commonteamroster
from nba_api.stats.static import teams
import pandas as pd
import time
import requests
from bs4 import BeautifulSoup

In [3]:
# Shortcuts
base_path = '../data/raw/season_stats'
playoff_path = "../data/raw/playoff_stats"
rosters_path = '../data/raw/rosters'
seasons = ["2014-15","2015-16","2016-17","2017-18","2018-19","2019-20","2020-21","2021-22","2022-23","2023-24","2024-25"]

In [6]:
#CSV Function
def save_df(df, path):
    df.to_csv(path, index=False)
    print(f"Saved file to {path}")

In [None]:
#Regular Season Team Stats
def fetchsave(season):
    # Base Stats
    base_stats = leaguedashteamstats.LeagueDashTeamStats(
        season=season,
        season_type_all_star='Regular Season',
        measure_type_detailed_defense='Base'
    )
    base_stats_df = base_stats.get_data_frames()[0]
    save_df(base_stats_df, f"{base_path}/{season}_teams_base_stats.csv")

    time.sleep(1) # API Care

    # Advanced Stats
    adv_stats = leaguedashteamstats.LeagueDashTeamStats(
        season=season,
        season_type_all_star='Regular Season',
        measure_type_detailed_defense='Advanced'
    )
    adv_stats_df = adv_stats.get_data_frames()[0]
    save_df(adv_stats_df, f"{base_path}/{season}_teams_advanced_stats.csv")

    time.sleep(1)

for year in seasons:
    fetchsave(year)

print("All Seasons Downloaded")

In [None]:
#Playoff Team Stats
def fetchsave(season):
    # Base Stats
    base_stats = leaguedashteamstats.LeagueDashTeamStats(
        season=season,
        season_type_all_star='Playoffs',
        measure_type_detailed_defense='Base'
    )
    base_stats_df = base_stats.get_data_frames()[0]
    save_df(base_stats_df, f"{playoff_path}/{season}_teams_base_stats.csv")

    time.sleep(1) # API Care

    # Advanced Stats
    adv_stats = leaguedashteamstats.LeagueDashTeamStats(
        season=season,
        season_type_all_star='Playoffs',
        measure_type_detailed_defense='Advanced'
    )
    adv_stats_df = adv_stats.get_data_frames()[0]
    save_df(adv_stats_df, f"{playoff_path}/{season}_teams_advanced_stats.csv")

    time.sleep(1)

for year in seasons:
    fetchsave(year)

print("All Seasons Downloaded")

Saved file to ../data/raw/playoff_stats/2014-15_teams_base_stats.csv
Saved file to ../data/raw/playoff_stats/2014-15_teams_advanced_stats.csv
Saved file to ../data/raw/playoff_stats/2015-16_teams_base_stats.csv
Saved file to ../data/raw/playoff_stats/2015-16_teams_advanced_stats.csv
Saved file to ../data/raw/playoff_stats/2016-17_teams_base_stats.csv
Saved file to ../data/raw/playoff_stats/2016-17_teams_advanced_stats.csv
Saved file to ../data/raw/playoff_stats/2017-18_teams_base_stats.csv
Saved file to ../data/raw/playoff_stats/2017-18_teams_advanced_stats.csv
Saved file to ../data/raw/playoff_stats/2018-19_teams_base_stats.csv
Saved file to ../data/raw/playoff_stats/2018-19_teams_advanced_stats.csv
Saved file to ../data/raw/playoff_stats/2019-20_teams_base_stats.csv
Saved file to ../data/raw/playoff_stats/2019-20_teams_advanced_stats.csv
Saved file to ../data/raw/playoff_stats/2020-21_teams_base_stats.csv
Saved file to ../data/raw/playoff_stats/2020-21_teams_advanced_stats.csv
Saved 

In [None]:
# Fetch Team Rosters
def fetch_roster(season, path):
    nba_teams = teams.get_teams()
    all_rosters = []
    for team in nba_teams:
        team_id = team['id']
        try:
            team_roster = commonteamroster.CommonTeamRoster(team_id=team_id, season=season)
            df = team_roster.get_data_frames()[0]
            df['TEAM_ID'] = team_id
            all_rosters.append(df)
            time.sleep(1)
        except Exception as e:
            print(f"Failed for team {team_id}: {e}")
    if all_rosters:
        full_df = pd.concat(all_rosters, ignore_index=True)
        save_df(full_df, f"{path}/{season}_team_rosters.csv")

for year in seasons:
    fetch_roster(year, rosters_path)

Saved file to ../data/raw/rosters/2014-15_team_rosters.csv
Saved file to ../data/raw/rosters/2015-16_team_rosters.csv
Saved file to ../data/raw/rosters/2016-17_team_rosters.csv
Saved file to ../data/raw/rosters/2017-18_team_rosters.csv
Saved file to ../data/raw/rosters/2018-19_team_rosters.csv
Saved file to ../data/raw/rosters/2019-20_team_rosters.csv
Saved file to ../data/raw/rosters/2020-21_team_rosters.csv
Saved file to ../data/raw/rosters/2021-22_team_rosters.csv
Saved file to ../data/raw/rosters/2022-23_team_rosters.csv
Saved file to ../data/raw/rosters/2023-24_team_rosters.csv
Saved file to ../data/raw/rosters/2024-25_team_rosters.csv


In [None]:
# Scrape Playoff Success Scores
def get_playoff_results(season):
    url = f"https://www.basketball-reference.com/playoffs/NBA_{season}.html"
    print(f"Scraping: {url}")
    res = requests.get(url, headers={"User-Agent": "Mozilla/5.0"})
    if res.status_code != 200:
        print(f"Failed to fetch {url}: {res.status_code}")
        return pd.DataFrame()
    soup = BeautifulSoup(res.content, 'html.parser')

    round_scores = {
        "First Round": 1,
        "Semifinals": 2,
        "Conference Finals": 3,
        "Finals": 4,
    }

    # Possible round names
    round_map = {
        "Eastern Conference First Round": "First Round",
        "Western Conference First Round": "First Round",
        "Eastern Conference Semifinals": "Semifinals",
        "Western Conference Semifinals": "Semifinals",
        "Eastern Conference Finals": "Conference Finals",
        "Western Conference Finals": "Conference Finals",
        "Finals": "Finals",
    }

    teams_scores = {}

    # Find the playoff series table
    playoff_table = soup.find("table")
    if not playoff_table:
        print("No playoff table found")
        return pd.DataFrame()

    for row in playoff_table.find_all("tr"):
        tds = row.find_all("td")
        if len(tds) < 2:
            continue
        round_name = tds[0].get_text(strip=True)
        if round_name not in round_map:
            continue
        round_key = round_map[round_name]
        score = round_scores[round_key]
        # Second <td> contains "Winner over Loser"
        links = tds[1].find_all("a")
        if len(links) >= 2:
            winner = links[0].get_text(strip=True)
            loser = links[1].get_text(strip=True)
            # Assign score to winner and loser
            if winner not in teams_scores or score > teams_scores[winner]:
                teams_scores[winner] = score
            if loser not in teams_scores or score > teams_scores[loser]:
                teams_scores[loser] = score

    return pd.DataFrame([{
        "SEASON": season,
        "TEAM_NAME": team,
        "SUCCESS_SCORE": score
    } for team, score in teams_scores.items()])

dfs = []
for season in range(2015, 2025):
    df = get_playoff_results(season)
    dfs.append(df)
    time.sleep(1)

df_all = pd.concat(dfs, ignore_index=True)
save_df(df_all, "../data/processed/nba_playoff_success.csv")
print("Playoff success scores saved.")