In [1]:
import pandas as pd
import requests
import time

def pull_data(url):
    headers = {
        "Host": "stats.nba.com",
        "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:72.0) Gecko/20100101 Firefox/72.0",
        "Accept": "application/json, text/plain, */*",
        "Accept-Language": "en-US,en;q=0.5",
        "Accept-Encoding": "gzip, deflate, br",
        "Connection": "keep-alive",
        "Referer": "https://stats.nba.com/"
    }
    response = requests.get(url, headers=headers)
    response.raise_for_status()
    json = response.json()

    if len(json["resultSets"]) == 1:
        data = json["resultSets"][0]["rowSet"]
        columns = json["resultSets"][0]["headers"]
    else:
        data = json["resultSets"]["rowSet"]
        columns = json["resultSets"]["headers"][1]['columnNames']
    
    df = pd.DataFrame.from_records(data, columns=columns)
    return df

def get_omatchups_by_date(season, date, mode='Offense', ps=False):
    """
    Pulls offensive or defensive matchup data for a specific date in a given season.
    mode: 'Offense' or 'Defense'
    
    """
    date=''
    stype = 'Playoffs' if ps else 'Regular%20Season'
    url = (
        'https://stats.nba.com/stats/leagueseasonmatchups?'
        f'DateFrom={date}&DateTo={date}&DefPlayerID=&DefTeamID=&LeagueID=00'
        f'&Matchup={mode}&OffPlayerID=&OffTeamID=&Outcome=&PORound=0&PerMode=Totals'
        f'&Season={season}&SeasonType={stype}'
    )
    return pull_data(url)

# === Pipeline ===

# Load game_dates.csv
df = pd.read_csv("game_dates.csv")

# Convert season and filter from 2017 onward
df['season_end_year'] = df['season'].str.split('-').str[0].astype(int) + 1
filtered_df = df[df['season_end_year'] >= 2024]

# Unique date/season pairs
unique_dates = filtered_df[['date', 'season']].drop_duplicates()
unique_dates=unique_dates.head(1)
# Change mode here: 'Offense' or 'Defense'
mode = 'Offense'  # Change to 'Defense' if needed

all_data = []

for _, row in unique_dates.iterrows():
    date = row['date']
    season = row['season']
    print(f"Pulling {mode} matchups for {date} in season {season}")
    
    try:
        matchup_df = get_omatchups_by_date(season, date, mode=mode)
        matchup_df['game_date'] = date
        matchup_df['season'] = season
        matchup_df['mode'] = mode
        all_data.append(matchup_df)
    except Exception as e:
        print(f"Failed to pull data for {date} in {season}: {e}")
    
    time.sleep(0.7)  # Be kind to the NBA API

# Combine
final_df = pd.concat(all_data, ignore_index=True)

# Save

final_df

Pulling Offense matchups for 20231024 in season 2023-24


Unnamed: 0,SEASON_ID,OFF_PLAYER_ID,OFF_PLAYER_NAME,DEF_PLAYER_ID,DEF_PLAYER_NAME,GP,MATCHUP_MIN,PARTIAL_POSS,PLAYER_PTS,TEAM_PTS,...,HELP_FGM,HELP_FGA,HELP_FG_PERC,MATCHUP_FTM,MATCHUP_FTA,SFL,MATCHUP_TIME_SEC,game_date,season,mode
0,22023,201942,DeMar DeRozan,1628384,OG Anunoby,6,43:45,229.5,32,189,...,0,0,0,5,7,4,2624.7,20231024,2023-24,Offense
1,22023,203999,Nikola Jokić,1627734,Domantas Sabonis,4,43:58,219.1,53,251,...,0,0,0,7,10,4,2638.1,20231024,2023-24,Offense
2,22023,1627734,Domantas Sabonis,202685,Jonas Valančiūnas,5,40:50,215.4,56,218,...,0,0,0,8,11,6,2450.4,20231024,2023-24,Offense
3,22023,1627736,Malik Beasley,1630169,Tyrese Haliburton,5,39:36,213.3,17,237,...,0,0,0,0,0,0,2376.4,20231024,2023-24,Offense
4,22023,203076,Anthony Davis,203994,Jusuf Nurkić,5,38:17,207.9,36,188,...,0,0,0,4,5,4,2296.5,20231024,2023-24,Offense
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
137689,22023,1631298,Jack White,1630561,David Duke Jr.,1,0:02,0.0,0,0,...,0,0,0,0,0,0,2.2,20231024,2023-24,Offense
137690,22023,1630587,Isaiah Livers,1627751,Jakob Poeltl,1,0:01,0.0,0,0,...,0,0,0,0,0,0,1.4,20231024,2023-24,Offense
137691,22023,1627759,Jaylen Brown,203200,Justin Holiday,1,0:00,0.0,0,0,...,0,0,0,0,0,0,0.2,20231024,2023-24,Offense
137692,22023,1630641,Ibou Badji,1629661,Cameron Johnson,1,0:00,0.0,0,0,...,0,0,0,0,0,0,0.1,20231024,2023-24,Offense
