#### Imports

In [1]:
import pandas as pd
import requests
import json

#### Function to get box score data

In [2]:
def getStdBoxScores(season, team):
    url = "https://stats.nba.com/stats/teamgamelogs"
    headers = {
        "Accept": "*/*",
        "Accept-Encoding": "gzip, deflate, br",
        "Accept-Language": "en-US,en;q=0.9",
        "Connection": "keep-alive",
        "Origin": "https://www.nba.com",
        "Referer": "https://www.nba.com/",
        "Sec-Fetch-Dest": "empty",
        "Sec-Fetch-Mode": "cors",
        "Sec-Fetch-Site": "same-site",
        "User-Agent": "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/18.3.1 Safari/605.1.15"
    }

    params = {
        "DateFrom": "",
        "DateTo": "",
        "Season": season,
        "SeasonType": "Regular Season",
        "TeamID": team
    }

    response = requests.get(url, headers=headers, params=params)

    if response.status_code == 200:
        data = response.json()
        return pd.DataFrame(data['resultSets'][0]['rowSet'], columns=data['resultSets'][0]['headers'])
    else:
        print(f"Request failed with status code {response.status_code}")



In [3]:
def getAdvBoxScores(season, team):  
    url = "https://stats.nba.com/stats/teamgamelogs"
    headers = {
        "Accept": "*/*",
        "Accept-Encoding": "gzip, deflate, br",
        "Accept-Language": "en-US,en;q=0.9",
        "Connection": "keep-alive",
        "Origin": "https://www.nba.com",
        "Referer": "https://www.nba.com/",
        "Sec-Fetch-Dest": "empty",
        "Sec-Fetch-Mode": "cors",
        "Sec-Fetch-Site": "same-site",
        "User-Agent": "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/18.3.1 Safari/605.1.15"
    }

    params = {
        "DateFrom": "",
        "DateTo": "",
        "GameSegment": "",
        "ISTRound": "",
        "LastNGames": "0",
        "LeagueID": "00",
        "Location": "",
        "MeasureType": "Advanced",
        "Month": "0",
        "OpponentTeamID": "0",
        "Outcome": "",
        "PORound": "0",
        "PaceAdjust": "N",
        "PerMode": "Totals",
        "Period": "0",
        "PlusMinus": "N",
        "Rank": "N",
        "Season": season,
        "SeasonSegment": "",
        "SeasonType": "Regular Season",
        "ShotClockRange": "",
        "TeamID": team,
        "VsConference": "",
        "VsDivision": ""
    }

    response = requests.get(url, headers=headers, params=params)

    if response.status_code == 200:
        data = response.json()
        return pd.DataFrame(data['resultSets'][0]['rowSet'], columns=data['resultSets'][0]['headers'])
    else:
        print(f"Request failed with status code {response.status_code}")


#### Function to return merged boxscore data

In [4]:
def mergeBoxScoreData(data_std, data_adv):
    return pd.merge(data_std, data_adv.drop(['SEASON_YEAR', 'TEAM_ID', 'TEAM_ABBREVIATION', 'TEAM_NAME', 'GAME_DATE', \
    'MATCHUP', 'WL', 'MIN', 'GP_RANK', 'W_RANK', 'L_RANK', 'W_PCT_RANK', 'MIN_RANK', \
    'AVAILABLE_FLAG'],axis=1), on='GAME_ID')

#### Run box score collection for all 30 teams for the past 10 seasons

In [5]:
team_ids = [
    "1610612737", "1610612738", "1610612751", "1610612766", "1610612741",
    "1610612739", "1610612742", "1610612743", "1610612765", "1610612744",
    "1610612745", "1610612754", "1610612746", "1610612747", "1610612763",
    "1610612748", "1610612749", "1610612750", "1610612740", "1610612752",
    "1610612760", "1610612753", "1610612755", "1610612756", "1610612757",
    "1610612758", "1610612759", "1610612761", "1610612762", "1610612764"
]

nba_teams = {
    "1610612737": "Atlanta Hawks",
    "1610612738": "Boston Celtics",
    "1610612751": "Brooklyn Nets",
    "1610612766": "Charlotte Hornets",
    "1610612741": "Chicago Bulls",
    "1610612739": "Cleveland Cavaliers",
    "1610612742": "Dallas Mavericks",
    "1610612743": "Denver Nuggets",
    "1610612765": "Detroit Pistons",
    "1610612744": "Golden State Warriors",
    "1610612745": "Houston Rockets",
    "1610612754": "Indiana Pacers",
    "1610612746": "LA Clippers",
    "1610612747": "Los Angeles Lakers",
    "1610612763": "Memphis Grizzlies",
    "1610612748": "Miami Heat",
    "1610612749": "Milwaukee Bucks",
    "1610612750": "Minnesota Timberwolves",
    "1610612740": "New Orleans Pelicans",
    "1610612752": "New York Knicks",
    "1610612760": "Oklahoma City Thunder",
    "1610612753": "Orlando Magic",
    "1610612755": "Philadelphia 76ers",
    "1610612756": "Phoenix Suns",
    "1610612757": "Portland Trail Blazers",
    "1610612758": "Sacramento Kings",
    "1610612759": "San Antonio Spurs",
    "1610612761": "Toronto Raptors",
    "1610612762": "Utah Jazz",
    "1610612764": "Washington Wizards"
}

seasons = ['2023-24', '2022-23', '2021-22', '2020-21', '2019-20', \
    '2018-19', '2017-18', '2016-17', '2015-16', '2014-15']

In [6]:
""" team_ids = [
    "1610612737", "1610612738"
]
seasons = ['2024-25', '2023-24'] """

fullData = pd.DataFrame()
for s in range(len(seasons)):
    for t in range(len(team_ids)):
        #first iteration
        if (t==0) and (s==0):
            std = getStdBoxScores(seasons[s], team_ids[t])
            adv = getAdvBoxScores(seasons[s], team_ids[t])
            fullData = mergeBoxScoreData(std, adv)
            print(f"{seasons[s]}, {nba_teams.get(team_ids[t])}")
        else:
            std = getStdBoxScores(seasons[s], team_ids[t])
            adv = getAdvBoxScores(seasons[s], team_ids[t])
            data = mergeBoxScoreData(std, adv)
            fullData = pd.concat([fullData, data])
            print(f"{seasons[s]}, {nba_teams.get(team_ids[t])}")

fullData.to_csv('nba_team_boxscores.csv', index=False)

2023-24, Atlanta Hawks
2023-24, Boston Celtics
2023-24, Brooklyn Nets
2023-24, Charlotte Hornets
2023-24, Chicago Bulls
2023-24, Cleveland Cavaliers
2023-24, Dallas Mavericks
2023-24, Denver Nuggets
2023-24, Detroit Pistons
2023-24, Golden State Warriors
2023-24, Houston Rockets
2023-24, Indiana Pacers
2023-24, LA Clippers
2023-24, Los Angeles Lakers
2023-24, Memphis Grizzlies
2023-24, Miami Heat
2023-24, Milwaukee Bucks
2023-24, Minnesota Timberwolves
2023-24, New Orleans Pelicans
2023-24, New York Knicks
2023-24, Oklahoma City Thunder
2023-24, Orlando Magic
2023-24, Philadelphia 76ers
2023-24, Phoenix Suns
2023-24, Portland Trail Blazers
2023-24, Sacramento Kings
2023-24, San Antonio Spurs
2023-24, Toronto Raptors
2023-24, Utah Jazz
2023-24, Washington Wizards
2022-23, Atlanta Hawks
2022-23, Boston Celtics
2022-23, Brooklyn Nets
2022-23, Charlotte Hornets
2022-23, Chicago Bulls
2022-23, Cleveland Cavaliers
2022-23, Dallas Mavericks
2022-23, Denver Nuggets
2022-23, Detroit Pistons
202

In [7]:
fullData

Unnamed: 0,SEASON_YEAR,TEAM_ID,TEAM_ABBREVIATION,TEAM_NAME,GAME_ID,GAME_DATE,MATCHUP,WL,MIN,FGM,...,AST_TO_RANK,AST_RATIO_RANK,OREB_PCT_RANK,DREB_PCT_RANK,REB_PCT_RANK,TM_TOV_PCT_RANK,EFG_PCT_RANK,TS_PCT_RANK,PACE_RANK,PIE_RANK
0,2023-24,1610612737,ATL,Atlanta Hawks,0022301188,2024-04-14T00:00:00,ATL @ IND,L,48.0,39,...,67,50,66,62,78,65,59,45,7,81
1,2023-24,1610612737,ATL,Atlanta Hawks,0022301178,2024-04-12T00:00:00,ATL @ MIN,L,48.0,40,...,62,62,54,9,30,54,63,68,36,72
2,2023-24,1610612737,ATL,Atlanta Hawks,0022301159,2024-04-10T00:00:00,ATL vs. CHA,L,48.0,43,...,42,2,59,4,15,78,6,11,59,36
3,2023-24,1610612737,ATL,Atlanta Hawks,0022301147,2024-04-09T00:00:00,ATL vs. MIA,L,58.0,45,...,56,53,14,3,4,53,79,81,81,52
4,2023-24,1610612737,ATL,Atlanta Hawks,0022301130,2024-04-06T00:00:00,ATL @ DEN,L,48.0,37,...,51,23,74,21,75,58,64,55,17,79
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
77,2014-15,1610612764,WAS,Washington Wizards,0021400063,2014-11-05T00:00:00,WAS vs. IND,W,53.0,34,...,22,51,63,72,74,14,77,70,64,43
78,2014-15,1610612764,WAS,Washington Wizards,0021400051,2014-11-04T00:00:00,WAS @ NYK,W,48.0,39,...,17,18,27,80,58,25,20,30,74,10
79,2014-15,1610612764,WAS,Washington Wizards,0021400031,2014-11-01T00:00:00,WAS vs. MIL,W,48.0,40,...,49,15,65,74,69,66,6,6,7,9
80,2014-15,1610612764,WAS,Washington Wizards,0021400016,2014-10-30T00:00:00,WAS @ ORL,W,48.0,39,...,28,50,70,8,49,19,26,20,32,38


In [12]:
#pip install nba_api

In [None]:
import pandas as pd
from nba_api.stats.endpoints import leaguegamefinder

# Fetch playoff wins using nba_api
seasons = list(range(2015, 2025))
playoff_wins_list = []

for season in seasons:
    games = leaguegamefinder.LeagueGameFinder(
        season_nullable=f"{season-1}-{str(season)[2:]}",
        league_id_nullable="00",
        season_type_nullable="Playoffs"
    )
    df = games.get_data_frames()[0]

    if df.empty:
        print(f"No playoff data found for {season}")
        continue

    # Count playoff wins per team
    wins = df[df['WL'] == 'W'].groupby('TEAM_NAME').size().reset_index(name='PLAYOFF_WINS')
    wins["SEASON_YEAR"] = f"{season-1}-{str(season)[2:]}"
    
    playoff_wins_list.append(wins)


# Combine all years into one DataFrame
playoff_wins_df = pd.concat(playoff_wins_list, ignore_index=True)


# Sort by year and wins
playoff_wins_df = playoff_wins_df.sort_values(["SEASON_YEAR", "PLAYOFF_WINS"], ascending=[False, False])

# Save to CSV
playoff_wins_df.to_csv("nba_playoff_wins_2015_2025.csv", index=False)

No playoff data found for 2025


In [None]:
# Load the game stats dataset
game_stats_df = pd.read_csv("nba_team_boxscores.csv")  # Update with your actual filename

# Load the playoff wins dataset
playoff_wins_df = pd.read_csv("nba_playoff_wins_2015_2025.csv")

# Merge on season and team name
merged_df = game_stats_df.merge(playoff_wins_df, on=["SEASON_YEAR", "TEAM_NAME"], how="left")

# Fill NaN values (teams without playoff wins should have 0)
merged_df["PLAYOFF_WINS"] = merged_df["PLAYOFF_WINS"].fillna(0).astype(int)

# Reset index for clean formatting
merged_df = merged_df.reset_index(drop=True)

# Save merged data to CSV
merged_csv_filename = "nba_merged_data.csv"
merged_df.to_csv(merged_csv_filename, index=False)

# Print confirmation & preview
print(f"Saved merged data to {merged_csv_filename}")
merged_df.head(10)  # Show top 10 rows

Saved merged data to nba_merged_data.csv
  SEASON_YEAR     TEAM_ID TEAM_ABBREVIATION      TEAM_NAME   GAME_ID  \
0     2023-24  1610612737               ATL  Atlanta Hawks  22301188   
1     2023-24  1610612737               ATL  Atlanta Hawks  22301178   
2     2023-24  1610612737               ATL  Atlanta Hawks  22301159   
3     2023-24  1610612737               ATL  Atlanta Hawks  22301147   
4     2023-24  1610612737               ATL  Atlanta Hawks  22301130   
5     2023-24  1610612737               ATL  Atlanta Hawks  22301124   
6     2023-24  1610612737               ATL  Atlanta Hawks  22301104   
7     2023-24  1610612737               ATL  Atlanta Hawks  22301091   
8     2023-24  1610612737               ATL  Atlanta Hawks  22301076   
9     2023-24  1610612737               ATL  Atlanta Hawks  22301060   

             GAME_DATE      MATCHUP WL   MIN  FGM  ...  AST_RATIO_RANK  \
0  2024-04-14T00:00:00    ATL @ IND  L  48.0   39  ...              50   
1  2024-04-12T00:0