In [6]:
import pandas as pd
import requests
import time

def pull_data(url):
    headers = {
        "Host": "stats.nba.com",
        "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:72.0) Gecko/20100101 Firefox/72.0",
        "Accept": "application/json, text/plain, */*",
        "Accept-Language": "en-US,en;q=0.5",
        "Accept-Encoding": "gzip, deflate, br",
        "Connection": "keep-alive",
        "Referer": "https://stats.nba.com/"
    }
    response = requests.get(url, headers=headers)
    response.raise_for_status()
    json = response.json()

    if len(json["resultSets"]) == 1:
        data = json["resultSets"][0]["rowSet"]
        columns = json["resultSets"][0]["headers"]
    else:
        data = json["resultSets"]["rowSet"]
        columns = json["resultSets"]["headers"][1]['columnNames']
    
    df = pd.DataFrame.from_records(data, columns=columns)
    return df

def get_omatchups_by_date(season, date, mode='Offense', ps=False):
    """
    Pulls offensive or defensive matchup data for a specific date in a given season.
    mode: 'Offense' or 'Defense'
    
    """
    date=''
    stype = 'Playoffs' if ps else 'Regular%20Season'
    url = (
        'https://stats.nba.com/stats/leagueseasonmatchups?'
        f'DateFrom={date}&DateTo={date}&DefPlayerID=&DefTeamID=&LeagueID=00'
        f'&Matchup={mode}&OffPlayerID=&OffTeamID=&Outcome=&PORound=0&PerMode=Totals'
        f'&Season={season}&SeasonType={stype}'
    )
    return pull_data(url)

# === Pipeline ===

# Load game_dates.csv
df = pd.read_csv("game_dates.csv")

# Convert season and filter from 2017 onward
df['season_end_year'] = df['season'].str.split('-').str[0].astype(int) + 1
filtered_df = df[df['season_end_year'] >= 2025]

# Unique date/season pairs
unique_dates = filtered_df[['date', 'season']].drop_duplicates()
unique_dates=unique_dates.head(1)
# Change mode here: 'Offense' or 'Defense'
mode = 'Offense'  # Change to 'Defense' if needed

all_data = []

for _, row in unique_dates.iterrows():
    date = row['date']
    season = row['season']
    print(f"Pulling {mode} matchups for {date} in season {season}")
    
    try:
        matchup_df = get_omatchups_by_date(season, date, mode=mode)
        matchup_df['game_date'] = date
        matchup_df['season'] = season
        matchup_df['mode'] = mode
        all_data.append(matchup_df)
    except Exception as e:
        print(f"Failed to pull data for {date} in {season}: {e}")
    
    time.sleep(0.7)  # Be kind to the NBA API

# Combine
final_df = pd.concat(all_data, ignore_index=True)

# Save

final_df.sort_values(by='GP')

Pulling Offense matchups for 20171017 in season 2017-18


Unnamed: 0,SEASON_ID,OFF_PLAYER_ID,OFF_PLAYER_NAME,DEF_PLAYER_ID,DEF_PLAYER_NAME,GP,MATCHUP_MIN,PARTIAL_POSS,PLAYER_PTS,TEAM_PTS,...,HELP_FGM,HELP_FGA,HELP_FG_PERC,MATCHUP_FTM,MATCHUP_FTA,SFL,MATCHUP_TIME_SEC,game_date,season,mode
66244,22017,1626192,Pat Connaughton,1626151,Aaron Harrison,1,0:52,4.1,0,2,...,0,0,0,0,0,0,51.9,20171017,2017-18,Offense
89281,22017,201943,Brandon Jennings,203463,Ben McLemore,1,0:31,2.2,0,2,...,0,0,0,0,0,0,30.9,20171017,2017-18,Offense
89280,22017,201569,Eric Gordon,203460,Andre Roberson,1,0:23,2.2,3,3,...,0,0,0,0,0,0,23.2,20171017,2017-18,Offense
89279,22017,1713,Vince Carter,201162,Jared Dudley,1,0:39,2.2,0,0,...,0,0,0,0,0,0,39.4,20171017,2017-18,Offense
89277,22017,201587,Nicolas Batum,201163,Wilson Chandler,1,0:35,2.2,0,5,...,0,0,0,0,0,0,34.9,20171017,2017-18,Offense
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
352,22017,1626163,Frank Kaminsky,101141,Ersan Ilyasova,7,20:57,98.0,20,116,...,0,0,0,6,6,3,1257.1,20171017,2017-18,Offense
10273,22017,202689,Kemba Walker,101141,Ersan Ilyasova,7,5:47,26.8,19,36,...,0,0,0,1,2,0,346.7,20171017,2017-18,Offense
18183,22017,1627747,Caris LeVert,101141,Ersan Ilyasova,7,3:55,17.8,9,29,...,0,0,0,0,0,1,235.4,20171017,2017-18,Offense
8016,22017,201960,DeMarre Carroll,101141,Ersan Ilyasova,7,6:34,31.1,5,21,...,0,0,0,0,0,2,393.7,20171017,2017-18,Offense
