In [82]:
import pandas as pd
import arrow
from nba_api.stats.static import teams
from nba_api.stats import endpoints
pd.set_option('display.max_rows', 500)
pd.set_option('display.max_columns', 60)


In [28]:
def get_celtics_id():
    nba_teams = teams.get_teams()
    # Select the dictionary for the Celtics, which contains their team ID
    celtics = [team for team in nba_teams if team['abbreviation'] == 'BOS'][0]
    return celtics['id']    

def get_games_for_team(team_id: int, season: str = None, start_date: arrow.Arrow = None, end_date: arrow.Arrow = None, regular_season: bool = True) -> pd.DataFrame:

    season_type = None
    if regular_season:
        season_type = "Regular Season"

    gamefinder = endpoints.leaguegamefinder.LeagueGameFinder(team_id_nullable=team_id, season_type_nullable=season_type)
    # The first DataFrame of those returned is what we want.
    games = gamefinder.get_data_frames()[0]
    if season:
        games = games.loc[games.SEASON_ID.str[-4:] == season, :].reset_index(drop=True)

    # IDK why it's counting some random summer leage games as regular season games..., lets drop those.
    if regular_season:
        games = games.loc[games.GAME_DATE.str[5:7] != "07", :].reset_index(drop=True)
    return games.sort_values("GAME_DATE").reset_index(drop=True)

def get_game_scoring_timeline(game_id: str):
    play_by_play = endpoints.playbyplayv2.PlayByPlayV2(game_id).get_data_frames()[0]
    return play_by_play

In [38]:
CELTICS_TEAM_ID = get_celtics_id()
games = get_games_for_team(CELTICS_TEAM_ID, season="2022")


SEASON_ID                     22022
TEAM_ID                  1610612738
TEAM_ABBREVIATION               BOS
TEAM_NAME            Boston Celtics
GAME_ID                  0022200001
GAME_DATE                2022-10-18
MATCHUP                 BOS vs. PHI
WL                                W
MIN                             240
PTS                             126
FGM                              46
FGA                              82
FG_PCT                        0.561
FG3M                             12
FG3A                             35
FG3_PCT                       0.343
FTM                              22
FTA                              28
FT_PCT                        0.786
OREB                              6
DREB                             30
REB                              36
AST                              24
STL                               8
BLK                               3
TOV                              10
PF                               24
PLUS_MINUS                  

In [139]:
def is_home_game(game: pd.Series):
    return "vs." in game.MATCHUP

def parse_timeline_event(event: pd.Series):
    if not event.SCORE:
        return None
    away_score, home_score = event.SCORE.split(" - ")

    if event.PERIOD >= 5:
        min_per_period = 5
        previous_period_mins = 48 + (event.PERIOD - 5) * 5 
    else:
        min_per_period = 12
        previous_period_mins = 12 * (event.PERIOD - 1)
    
    period_remaining_min, period_remaining_sec = event.PCTIMESTRING.split(":")

    elapsed_min = previous_period_mins + min_per_period - (int(period_remaining_min) + 1)
    elapsed_sec = 60 - int(period_remaining_sec)
    
    return pd.Series({
        "HOME_SCORE": int(home_score),
        "AWAY_SCORE": int(away_score),
        "GAME_TIMESTRING": f"{elapsed_min:02}:{elapsed_sec:02}", 
        "SCOREMARGIN": int(home_score) - int(away_score)
    })


def calculate_team_specific_timeline_stats(event: pd.Series):
    game = games.loc[games.GAME_ID == event.GAME_ID, :]
    is_home = is_home_game(game)
    return pd.Series({
        "TEAM_SCORE": event.HOME_SCORE if is_home_game else event.AWAY_SCORE,
        "OPPONENT_SCORE": event.AWAY_SCORE if is_home_game else event.HOME_SCORE,
        "TEAM_MARGIN":  event.SCOREMARGIN if is_home_game else -1 * event.SCOREMARGIN,
        "OUTCOME": game.WL
    })

def parse_timeline(timeline: pd.DataFrame):
    is_scoring_play = timeline.SCORE.notnull() & timeline.PLAYER1_NAME.notnull()
    additions = timeline.loc[is_scoring_play, :].apply(parse_timeline_event, axis=1)
    score_timeline = timeline.loc[is_scoring_play, ["GAME_ID", "PERIOD", "SCORE", "PLAYER1_ID", "PLAYER1_NAME", "PLAYER1_TEAM_ID"]].join(additions)
    team_specific_stats = score_timeline.apply(calculate_team_specific_timeline_stats, axis=1)
    return score_timeline.join(team_specific_stats)
    
# Outcome functions
def outcome_early_lead(timeline: pd.DataFrame):
    is_early_lead = (timeline.TEAM_MARGIN > 15) & (timeline.GAME_TIMESTRING < "24:00")
    results = timeline.loc[is_early_lead, "OUTCOME"]
    if results.size == 0:
        return None
    return results.iloc[0]

In [96]:
celtics_timelines = games.GAME_ID.apply(get_game_scoring_timeline)

In [140]:
celtics_2022_scoring = pd.concat(celtics_timelines.apply(parse_timeline).tolist()).reset_index()

In [141]:
celtics_2022_scoring.groupby(["GAME_ID"]).apply(outcome_early_lead)

GAME_ID       
0022200001  3     NaN
0022200022  3     NaN
0022200030  3     NaN
0022200047  3       L
0022200072  3     NaN
0022200089  5       W
0022200107  3     NaN
0022200124  3     NaN
0022200135  3     NaN
0022200152  3     NaN
0022200163  3     NaN
0022200176  11      W
0022200186  3     NaN
0022200201  3     NaN
0022200214  3     NaN
0022200231  3     NaN
0022200250  3     NaN
0022200264  17      W
0022200275  18      W
0022200294  19      W
0022200301  20      W
0022200314  3     NaN
0022200328  3     NaN
0022200346  3     NaN
0022200355  3     NaN
0022200373  3     NaN
0022200392  3     NaN
0022200408  3     NaN
0022200413  3     NaN
0022200430  3     NaN
0022200445  3     NaN
0022200469  3     NaN
0022200482  3     NaN
0022200495  3     NaN
0022200507  3     NaN
0022200525  3     NaN
0022200549  3     NaN
0022200562  37      L
0022200577  3     NaN
0022200591  3     NaN
0022200606  3     NaN
0022200621  3     NaN
0022200627  3     NaN
0022200642  43      W
0022200656  3    