In [7]:
import requests
import pandas as pd
import itertools
import time
import numpy as np

In [8]:
BASE_URL = "https://api3.natst.at"
API_KEY = "9384-3abba6"

In [55]:
def list_all_dates_for_season(league, year):
    url = f"{BASE_URL}/{API_KEY}/games/{league}/{year}"
    print(url)

    # initialize set
    dates = set()
    res = requests.get(url).json()
    for key, value in res['games'].items():
        dates.add(value['gameday'])
    next_page_url = res['meta'].get('page-next', False)
    
    # loop through pages
    while(next_page_url is not False):
        print(next_page_url)
        res = requests.get(next_page_url).json()
        for key, value in res['games'].items():
            dates.add(value['gameday'])
        next_page_url = res['meta'].get('page-next', False)
    
    return dates

def get_pbp_by_date(league, date):
    url = f"{BASE_URL}/{API_KEY}/playbyplay/{league}/{date}"
    print(url)

    # initialize df
    df = pd.DataFrame()
    res = requests.get(url).json()
    for key, value in res['playbyplay'].items():
        df_temp = pd.json_normalize(value)
        df = pd.concat([df, df_temp])
    next_page_url = res['meta'].get('page-next', False)

    # loop through pages
    while(next_page_url is not False):
        print(next_page_url)
        res = requests.get(next_page_url).json()
        for key, value in res['playbyplay'].items():
            df_temp = pd.json_normalize(value)
            df = pd.concat([df, df_temp])
        next_page_url = res['meta'].get('page-next', False)

    return df

def get_list_of_game_codes_from_single_day_pbp(df):
    return df['game.code'].unique().tolist()

def get_final_score(df, game_code):
    last_row = df[df['game.code'] == game_code].tail(1)
    home_team = last_row.iloc[0]['game.home']
    away_team = last_row.iloc[0]['game.visitor']
    winner = home_team if int(last_row.iloc[0]['game.score-home']) > int(last_row.iloc[0]['game.score-vis']) else away_team
    return {
        'gameTitle': last_row.iloc[0]['game.description'],
        'date': last_row.iloc[0]['game.gameday'],
        'home': home_team,
        'away': away_team,
        'winner': winner,
        'winnerScore': max(last_row.iloc[0]['game.score-home'], last_row.iloc[0]['game.score-vis']),
        'loseScore': min(last_row.iloc[0]['game.score-home'], last_row.iloc[0]['game.score-vis'])
    }

def clean_pbp_df(df: pd.DataFrame):
    mapping = {
        'Atlanta': 'Atlanta Hawks',
        'Boston': 'Boston Celtics',
        'Brooklyn': 'Brooklyn Nets',
        'Charlotte': 'Charlotte Hornets',
        'Chicago': 'Chicago Bulls',
        'Cleveland': 'Cleveland Cavaliers',
        'Dallas': 'Dallas Mavericks',
        'Denver': 'Denver Nuggets',
        'Detroit': 'Detroit Pistons',
        'Golden State': 'Golden State Warriors',
        'Houston': 'Houston Rockets',
        'Indiana': 'Indiana Pacers',
        'L.A. Clippers': 'Los Angeles Clippers',
        'L.A. Lakers': 'Los Angeles Lakers',
        'Memphis': 'Memphis Grizzlies',
        'Miami': 'Miami Heat',
        'Milwaukee': 'Milwaukee Bucks',
        'Minnesota': 'Minnesota Timberwolves',
        'New Orleans': 'New Orleans Pelicans',
        'New York': 'New York Knicks',
        'Oklahoma City': 'Oklahoma City Thunder',
        'Orlando': 'Orlando Magic',
        'Philadelphia': 'Philadelphia 76ers',
        'Phoenix': 'Phoenix Suns',
        'Portland': 'Portland Trail Blazers',
        'Sacramento': 'Sacramento Kings',
        'San Antonio': 'San Antonio Spurs',
        'Toronto': 'Toronto Raptors',
        'Utah': 'Utah Jazz',
        'Washington': 'Washington Wizards'
    }
    df['game.visitor'] = df['game.visitor'].map(mapping).fillna(df['game.visitor'])
    df['game.home'] = df['game.home'].map(mapping).fillna(df['game.home'])

def get_deficit_time(df, game_code):
    box_score = get_final_score(df, game_code)
    winning_team = box_score['winner']

    df_game = df[df['game.code'] == game_code]
    df_winner = df_game[df_game['team.team'] == winning_team]
    
    deficit = min(pd.to_numeric(df_winner['thediff'], errors='coerce', downcast='integer'))
    df_deficit = df_winner[df_winner['thediff'] == deficit].tail(1)
    return {
        'game title': box_score['gameTitle'],
        'date': box_score['date'],
        'winning_team': winning_team,
        'deficit': deficit,
        'period': df_deficit.iloc[0]['game.period'],
        'clock': df_deficit.iloc[0]['game.time'],
        'seconds_remaining': get_seconds_remaining(df_deficit.iloc[0]['game.period'], df_deficit.iloc[0]['game.time'])
    }

def get_seconds_remaining(period, clock: str):
    minutes, seconds = clock.split(':')
    if period < 4:
        return (4-int(period))*12*60 + int(minutes)*60 + float(seconds)
    else:
        return int(minutes)*60 + float(seconds)

In [10]:
active_game_dates = list_all_dates_for_season("NBA", 2024)
active_game_dates

https://api3.natst.at/9384-3abba6/games/NBA/2024
https://api3.natst.at/9384-3abba6/games/nba/2024/100
https://api3.natst.at/9384-3abba6/games/nba/2024/200
https://api3.natst.at/9384-3abba6/games/nba/2024/300
https://api3.natst.at/9384-3abba6/games/nba/2024/400
https://api3.natst.at/9384-3abba6/games/nba/2024/500
https://api3.natst.at/9384-3abba6/games/nba/2024/600
https://api3.natst.at/9384-3abba6/games/nba/2024/700
https://api3.natst.at/9384-3abba6/games/nba/2024/800
https://api3.natst.at/9384-3abba6/games/nba/2024/900
https://api3.natst.at/9384-3abba6/games/nba/2024/1000
https://api3.natst.at/9384-3abba6/games/nba/2024/1100
https://api3.natst.at/9384-3abba6/games/nba/2024/1200


{'2023-10-24',
 '2023-10-25',
 '2023-10-26',
 '2023-10-27',
 '2023-10-28',
 '2023-10-29',
 '2023-10-30',
 '2023-10-31',
 '2023-11-01',
 '2023-11-02',
 '2023-11-03',
 '2023-11-04',
 '2023-11-05',
 '2023-11-06',
 '2023-11-08',
 '2023-11-09',
 '2023-11-10',
 '2023-11-11',
 '2023-11-12',
 '2023-11-13',
 '2023-11-14',
 '2023-11-15',
 '2023-11-16',
 '2023-11-17',
 '2023-11-18',
 '2023-11-19',
 '2023-11-20',
 '2023-11-21',
 '2023-11-22',
 '2023-11-24',
 '2023-11-25',
 '2023-11-26',
 '2023-11-27',
 '2023-11-28',
 '2023-11-29',
 '2023-11-30',
 '2023-12-01',
 '2023-12-02',
 '2023-12-04',
 '2023-12-05',
 '2023-12-06',
 '2023-12-07',
 '2023-12-08',
 '2023-12-09',
 '2023-12-11',
 '2023-12-12',
 '2023-12-13',
 '2023-12-14',
 '2023-12-15',
 '2023-12-16',
 '2023-12-17',
 '2023-12-18',
 '2023-12-19',
 '2023-12-20',
 '2023-12-21',
 '2023-12-22',
 '2023-12-23',
 '2023-12-25',
 '2023-12-26',
 '2023-12-27',
 '2023-12-28',
 '2023-12-29',
 '2023-12-30',
 '2023-12-31',
 '2024-01-01',
 '2024-01-02',
 '2024-01-

In [11]:
len(active_game_dates)

208

In [27]:
df.columns

Index(['Unnamed: 0.2', 'Unnamed: 0.1', 'Unnamed: 0', 'id', 'explanation',
       'scoringplay', 'tags', 'thediff', 'players.secondary.code',
       'players.secondary.name', 'team.code', 'team.team',
       'opponent.opponent_id', 'opponent.opponent', 'game.code',
       'game.gameday', 'game.description', 'game.visitor', 'game.visitor-code',
       'game.score-vis', 'game.home', 'game.home-code', 'game.score-home',
       'game.period', 'game.time', 'game.sequence', 'game.playoffs',
       'game.finals', 'game.onfloorvis', 'game.onfloorhome', 'venue.code',
       'venue.name', 'venue.location', 'venue.longitude', 'venue.latitude',
       'gametype.series', 'gametype.seriesname', 'gametype.seriesgameno',
       'gametype.seriesstatus', 'players.primary.code', 'players.primary.name',
       'distance'],
      dtype='object')

In [48]:
df = pd.read_csv('full_output.csv')
# df = df[df['game.home'].notna()] #filter non-null rows out
# df.drop(columns=['Unnamed: 0.2', 'Unnamed: 0.1', 'Unnamed: 0'], inplace=True)
df

  df = pd.read_csv('full_output.csv')


Unnamed: 0.1,Unnamed: 0,id,explanation,scoringplay,tags,thediff,players.secondary.code,players.secondary.name,team.code,team.team,...,venue.location,venue.longitude,venue.latitude,gametype.series,gametype.seriesname,gametype.seriesgameno,gametype.seriesstatus,players.primary.code,players.primary.name,distance
0,0,40672415,"Myles Turner vs, Al Horford (Pascal Siakam gai...",N,MISC,0,119739.0,Pascal Siakam,5,Boston Celtics,...,"Boston, MA",-71.062390,42.365340,2024-BOS|IND-CF,Conference Finals,1.0,BOS leads 1-0,,,
1,1,40672416,Aaron Nesmith bad pass (Jayson Tatum steals),N,TO,0,329820.0,Jayson Tatum,5,Boston Celtics,...,"Boston, MA",-71.062390,42.365340,2024-BOS|IND-CF,Conference Finals,1.0,BOS leads 1-0,,,
2,2,40672417,Jaylen Brown makes dunk (Jayson Tatum assists),Y,FGM|FGA|AST,-2,329820.0,Jayson Tatum,18,Indiana Pacers,...,"Boston, MA",-71.062390,42.365340,2024-BOS|IND-CF,Conference Finals,1.0,BOS leads 1-0,119722.0,Jaylen Brown,
3,3,40672418,Al Horford blocks Pascal Siakam 1-foot driving...,N,FGA|BLK,2,119739.0,Pascal Siakam,5,Boston Celtics,...,"Boston, MA",-71.062390,42.365340,2024-BOS|IND-CF,Conference Finals,1.0,BOS leads 1-0,1754.0,Al Horford,1.0
4,4,40672419,Celtics Full timeout,N,TIMEOUT,-2,84053334.0,Nae'Qwan Tomlin,18,Indiana Pacers,...,"Boston, MA",-71.062390,42.365340,2024-BOS|IND-CF,Conference Finals,1.0,BOS leads 1-0,84053334.0,Nae'Qwan Tomlin,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
570996,571455,40632234,Jusuf Nurkic makes free throw 2 of 2,Y,FTM|FTA,-1,84053334.0,Nae'Qwan Tomlin,15,Sacramento Kings,...,"Sacramento, CA",-121.525154,38.525231,,,,,2867.0,Jusuf Nurkic,
570997,571456,40632235,Kings Full timeout,N,TIMEOUT,1,84053334.0,Nae'Qwan Tomlin,14,Phoenix Suns,...,"Sacramento, CA",-121.525154,38.525231,,,,,84053334.0,Nae'Qwan Tomlin,
570998,571457,40632236,Royce O'Neale replaces Jusuf Nurkic,N,SUB,-1,2867.0,Jusuf Nurkic,15,Sacramento Kings,...,"Sacramento, CA",-121.525154,38.525231,,,,,,,
570999,571458,40632237,Bradley Beal personal foul,N,PF,-1,84053334.0,Nae'Qwan Tomlin,15,Sacramento Kings,...,"Sacramento, CA",-121.525154,38.525231,,,,,242.0,Bradley Beal,


In [24]:
# continue querying
already_queried_dates = set(df['game.gameday'])
remaining = active_game_dates - already_queried_dates
len(remaining)

0

In [None]:
# for date in remaining:
#     print(f"Querying for day: {date}...")
#     single_day_pbp_df = get_pbp_by_date("NBA", date)
#     df = pd.concat([df, single_day_pbp_df])
#     print("Successfully added day. Sleeping for 5s...")
#     time.sleep(5)

Querying for day: 2024-05-07...
https://api3.natst.at/9384-3abba6/playbyplay/NBA/2024-05-07
https://api3.natst.at/9384-3abba6/playbyplay/nba/2024-05-07/500
Successfully added day. Sleeping for 5s...
Querying for day: 2023-11-22...
https://api3.natst.at/9384-3abba6/playbyplay/NBA/2023-11-22
https://api3.natst.at/9384-3abba6/playbyplay/nba/2023-11-22/500
https://api3.natst.at/9384-3abba6/playbyplay/nba/2023-11-22/1000
https://api3.natst.at/9384-3abba6/playbyplay/nba/2023-11-22/1500
https://api3.natst.at/9384-3abba6/playbyplay/nba/2023-11-22/2000
https://api3.natst.at/9384-3abba6/playbyplay/nba/2023-11-22/2500
https://api3.natst.at/9384-3abba6/playbyplay/nba/2023-11-22/3000
https://api3.natst.at/9384-3abba6/playbyplay/nba/2023-11-22/3500
https://api3.natst.at/9384-3abba6/playbyplay/nba/2023-11-22/4000
https://api3.natst.at/9384-3abba6/playbyplay/nba/2023-11-22/4500
https://api3.natst.at/9384-3abba6/playbyplay/nba/2023-11-22/5000
https://api3.natst.at/9384-3abba6/playbyplay/nba/2023-11-22/

In [49]:
clean_pbp_df(df)

In [51]:
sorted(df['game.home'].unique().tolist())

['Atlanta Hawks',
 'Boston Celtics',
 'Brooklyn Nets',
 'Charlotte Hornets',
 'Chicago Bulls',
 'Cleveland Cavaliers',
 'Dallas Mavericks',
 'Denver Nuggets',
 'Detroit Pistons',
 'Golden State Warriors',
 'Houston Rockets',
 'Indiana Pacers',
 'Los Angeles Clippers',
 'Los Angeles Lakers',
 'Memphis Grizzlies',
 'Miami Heat',
 'Milwaukee Bucks',
 'Minnesota Timberwolves',
 'New Orleans Pelicans',
 'New York Knicks',
 'Oklahoma City Thunder',
 'Orlando Magic',
 'Philadelphia 76ers',
 'Phoenix Suns',
 'Portland Trail Blazers',
 'Sacramento Kings',
 'San Antonio Spurs',
 'Toronto Raptors',
 'Utah Jazz',
 'Washington Wizards']

In [52]:
df.to_csv('full_output.csv')

In [53]:
len(get_list_of_game_codes_from_single_day_pbp(df))

1247

In [56]:
comebacks = []
for game in get_list_of_game_codes_from_single_day_pbp(df):
    output = get_deficit_time(df, game)
    comebacks.append(output)

In [57]:
comebacks_df = pd.DataFrame(comebacks)
comebacks_df

Unnamed: 0,game title,date,winning_team,deficit,period,clock,seconds_remaining
0,Indiana Pacers at Boston Celtics,2024-05-21,Boston Celtics,-5,4,1:43,103.0
1,Brooklyn Nets at Boston Celtics,2023-11-10,Boston Celtics,-1,1,10:28,2788.0
2,Charlotte Hornets at Washington Wizards,2023-11-10,Charlotte Hornets,-16,2,6:45,1845.0
3,Los Angeles Clippers at Dallas Mavericks,2023-11-10,Dallas Mavericks,-11,1,1:53,2273.0
4,Philadelphia 76ers at Detroit Pistons,2023-11-10,Philadelphia 76ers,-15,2,2:39,1599.0
...,...,...,...,...,...,...,...
1242,Houston Rockets at Portland Trail Blazers,2024-04-12,Houston Rockets,-8,1,4:58,2458.0
1243,Utah Jazz at Los Angeles Clippers,2024-04-12,Utah Jazz,-13,1,0:28.0,2188.0
1244,Toronto Raptors at Miami Heat,2024-04-12,Miami Heat,0,1,7:52,2632.0
1245,Milwaukee Bucks at Oklahoma City Thunder,2024-04-12,Oklahoma City Thunder,-3,1,9:26,2726.0


In [58]:
comebacks_df['analytical_lead_minus_five_abs'] = comebacks_df['deficit'].apply(lambda x: min(x+5, 0))
comebacks_df['analytical_time_required'] = (comebacks_df['analytical_lead_minus_five_abs'])**2
comebacks_df['analytical_diff'] = comebacks_df['analytical_time_required'] - comebacks_df['seconds_remaining']
comebacks_df

Unnamed: 0,game title,date,winning_team,deficit,period,clock,seconds_remaining,analytical_lead_minus_five_abs,analytical_time_required,analytical_diff
0,Indiana Pacers at Boston Celtics,2024-05-21,Boston Celtics,-5,4,1:43,103.0,0,0,-103.0
1,Brooklyn Nets at Boston Celtics,2023-11-10,Boston Celtics,-1,1,10:28,2788.0,0,0,-2788.0
2,Charlotte Hornets at Washington Wizards,2023-11-10,Charlotte Hornets,-16,2,6:45,1845.0,-11,121,-1724.0
3,Los Angeles Clippers at Dallas Mavericks,2023-11-10,Dallas Mavericks,-11,1,1:53,2273.0,-6,36,-2237.0
4,Philadelphia 76ers at Detroit Pistons,2023-11-10,Philadelphia 76ers,-15,2,2:39,1599.0,-10,100,-1499.0
...,...,...,...,...,...,...,...,...,...,...
1242,Houston Rockets at Portland Trail Blazers,2024-04-12,Houston Rockets,-8,1,4:58,2458.0,-3,9,-2449.0
1243,Utah Jazz at Los Angeles Clippers,2024-04-12,Utah Jazz,-13,1,0:28.0,2188.0,-8,64,-2124.0
1244,Toronto Raptors at Miami Heat,2024-04-12,Miami Heat,0,1,7:52,2632.0,0,0,-2632.0
1245,Milwaukee Bucks at Oklahoma City Thunder,2024-04-12,Oklahoma City Thunder,-3,1,9:26,2726.0,0,0,-2726.0


In [None]:
comebacks_df.to_csv('comebacks_v1.csv')