In [1]:
import pandas as pd
import arrow
import json
import time
from nba_api.stats.static import teams
from nba_api.stats.static import players
from nba_api.stats import endpoints
pd.set_option('display.max_rows', 10000)
pd.set_option('display.max_columns', 60)
from nba_api.stats.library import parameters


In [2]:
def get_celtics_id():
    nba_teams = teams.get_teams()
    # Select the dictionary for the Celtics, which contains their team ID
    celtics = [team for team in nba_teams if team['abbreviation'] == 'BOS'][0]
    return celtics['id']    

def get_games_for_team(team_id: int, season: str = None, start_date: arrow.Arrow = None, end_date: arrow.Arrow = None, regular_season: bool = True) -> pd.DataFrame:

    season_type = None
    if regular_season:
        season_type = "Regular Season"

    gamefinder = endpoints.leaguegamefinder.LeagueGameFinder(team_id_nullable=team_id, season_type_nullable=season_type)
    # The first DataFrame of those returned is what we want.
    games = gamefinder.get_data_frames()[0]
    if season:
        games = games.loc[games.SEASON_ID.str[-4:] == season, :].reset_index(drop=True)

    # IDK why it's counting some random summer leage games as regular season games..., lets drop those.
    if regular_season:
        games = games.loc[games.GAME_DATE.str[5:7] != "07", :].reset_index(drop=True)
    return games.sort_values("GAME_DATE").reset_index(drop=True)

def get_game_scoring_timeline(game_id: str):
    play_by_play = endpoints.playbyplayv2.PlayByPlayV2(game_id).get_data_frames()[0]
    return play_by_play


def get_top_scoring_players():
    endpoints.leaderstiles.LeadersTiles().get_data_frame()

In [3]:
CELTICS_TEAM_ID = get_celtics_id()
games = get_games_for_team(CELTICS_TEAM_ID, season="2022")


In [6]:
out = endpoints.leaderstiles.LeadersTiles(
    player_or_team=parameters.PlayerOrTeam.player,
    stat=parameters.Stat.points,
    season="2023-24"
).get_dict()

In [13]:
results = out['resultSet'][0]


In [16]:
rows = []
for row in results['rowSet']:
    d = {}
    for i, v in enumerate(row):
        d[results['headers'][i]] = v
    rows.append(d)

{'RANK': 5, 'PLAYER_ID': 201142, 'PLAYER': 'Kevin Durant', 'TEAM_ID': 1610612756, 'TEAM_ABBREVIATION': 'PHX', 'TEAM_NAME': 'Phoenix Suns', 'PTS': 27.1}


In [85]:
PLAYERS = [
    "Shai Gilgeous",
    'Jokic',
    'Jayson Tatum',
    'Ja Morant',
    'Kawhi',
    'Donovan Mitchell',
    'Devin Vassell',
    'Julius Randle',
    'Anthony edwards',
    'Luka Doncic',
    'Devin Booker',
    'LeBron',
    'Zion',
    'Paolo Banchero',
    'Karl-Anthony Towns',
    'Pascal Siakam',
    'Giannis Ant',
    'Stephen Curry',
    'Embiid',
    'Wembanyama',
    'Tyrese Maxey',
    'Jamal Murray',
    'Mikal Bridges',
    'Paul George',
    'Kevin Durant',
    'Jalen Brunson',
    'Jaylen Brown',
    'De\'Aaron Fox',
    'Dejounte Murray',
    'Damian Lill',
    'Kyrie Irving',
    'Haliburton',
]

In [96]:
nba_players = players.get_players()

def find_player(player):
    output = players.find_players_by_full_name(player)
    if len(output) == 1:
        found = output[0]
        return {
            "name": found["full_name"], "id": found["id"]
        }

player_info = pd.DataFrame(find_player(player) for player in PLAYERS)    

In [141]:
# player_id = 1628983
def query_games(player_info):
    all_games = []
    for player in player_info.to_dict('records'):
        out = endpoints.PlayerNextNGames(player_id=player["id"], season_all="2024-25").get_data_frames()[0]
        print(player)
        time.sleep(3.5)
        
        out["PLAYER_ID"] = player["id"]
        out["PLAYER_NAME"] = player["name"]
        out = out.set_index(["PLAYER_ID", "PLAYER_NAME", "GAME_ID"])
        all_games.append(out)
    player_games = pd.concat([a for a in all_games]).reset_index()
    return player_games

def format_games(player_games):
    player_games["GAME_DATE_YMD"] = player_games["GAME_DATE"].apply(lambda x: arrow.get(x.lower(), "MMM DD, YYYY").format("YYYY-MM-DD"))
    player_games["MATCHUP"] = player_games["VISITOR_TEAM_NICKNAME"] + " @ " + player_games["HOME_TEAM_NICKNAME"]
    cols = [c.upper() for c in ["player_id", "player_name", "game_id", "game_date_ymd", "matchup", "game_time"]]
    
    return player_games[cols].sort_values("GAME_DATE_YMD")
    

player_games = query_games(player_info)
player_games = format_games(player_games)





In [165]:
# endpoints.CumeStatsPlayerGames(player_id=1628983, season="2023-24").get_data_frames()[0]
endpoints.CumeStatsPlayer(player_id=1628983, game_ids=["0022301196"], season="2023-24").get_data_frames()[0].to_dict('records')[0]

{'DATE_EST': '04/14/2024',
 'VISITOR_TEAM': 'Mavericks',
 'HOME_TEAM': 'Thunder',
 'GP': 1,
 'GS': 1,
 'ACTUAL_MINUTES': 15,
 'ACTUAL_SECONDS': 55,
 'FG': 5,
 'FGA': 11,
 'FG_PCT': 0.455,
 'FG3': 0,
 'FG3A': 4,
 'FG3_PCT': 0.0,
 'FT': 5,
 'FTA': 5,
 'FT_PCT': 1.0,
 'OFF_REB': 1,
 'DEF_REB': 0,
 'TOT_REB': 1,
 'AVG_TOT_REB': 1.0,
 'AST': 6,
 'PF': 1,
 'DQ': 0,
 'STL': 0,
 'TURNOVERS': 0,
 'BLK': 0,
 'PTS': 15,
 'AVG_PTS': 15.0}

In [161]:
print(p.get_request_url())

# p.headershttps://stats.nba.com/stats/cumestatsplayer?GameIDs=0022301196&LeagueID=00&PlayerID=1628983&Season=2023-24&SeasonType=Regular+Season

https://stats.nba.com/stats/cumestatsplayer?GameIDs=0022301196&LeagueID=00&PlayerID=1628983&Season=2023-24&SeasonType=Regular+Season


In [None]:
endpoints.CumeStatsPlayer(player_id=1627759, season="2023-24")

In [126]:
player_games = pd.concat([a for a in all_games]).reset_index()

Unnamed: 0,PLAYER_ID,PLAYER_NAME,GAME_ID,GAME_DATE_YMD,MATCHUP,GAME_TIME
2080,1627759,Jaylen Brown,22400061,2024-10-22,Knicks @ Celtics,07:30 PM
160,1628369,Jayson Tatum,22400061,2024-10-22,Knicks @ Celtics,07:30 PM
560,203944,Julius Randle,22400062,2024-10-22,Timberwolves @ Lakers,10:00 PM
640,1630162,Anthony Edwards,22400062,2024-10-22,Timberwolves @ Lakers,10:00 PM
1760,1628969,Mikal Bridges,22400061,2024-10-22,Knicks @ Celtics,07:30 PM
1120,1626157,Karl-Anthony Towns,22400061,2024-10-22,Knicks @ Celtics,07:30 PM
2000,1628973,Jalen Brunson,22400061,2024-10-22,Knicks @ Celtics,07:30 PM
880,2544,LeBron James,22400062,2024-10-22,Timberwolves @ Lakers,10:00 PM
2320,203081,Damian Lillard,22400066,2024-10-23,Bucks @ 76ers,07:30 PM
320,202695,Kawhi Leonard,22400071,2024-10-23,Suns @ Clippers,10:00 PM


{'name': 'Shai Gilgeous-Alexander', 'id': 1628983}
{'name': 'Nikola Jokic', 'id': 203999}
{'name': 'Jayson Tatum', 'id': 1628369}
{'name': 'Ja Morant', 'id': 1629630}
{'name': 'Kawhi Leonard', 'id': 202695}
{'name': 'Donovan Mitchell', 'id': 1628378}
{'name': 'Devin Vassell', 'id': 1630170}
{'name': 'Julius Randle', 'id': 203944}
{'name': 'Anthony Edwards', 'id': 1630162}
{'name': 'Luka Doncic', 'id': 1629029}
{'name': 'Devin Booker', 'id': 1626164}
{'name': 'LeBron James', 'id': 2544}
{'name': 'Zion Williamson', 'id': 1629627}
{'name': 'Paolo Banchero', 'id': 1631094}
{'name': 'Karl-Anthony Towns', 'id': 1626157}
{'name': 'Pascal Siakam', 'id': 1627783}
{'name': 'Giannis Antetokounmpo', 'id': 203507}
{'name': 'Stephen Curry', 'id': 201939}
{'name': 'Joel Embiid', 'id': 203954}
{'name': 'Victor Wembanyama', 'id': 1641705}
{'name': 'Tyrese Maxey', 'id': 1630178}
{'name': 'Jamal Murray', 'id': 1627750}
{'name': 'Mikal Bridges', 'id': 1628969}
{'name': 'Paul George', 'id': 202331}
{'name'

In [139]:
def is_home_game(game: pd.Series):
    return "vs." in game.MATCHUP

def parse_timeline_event(event: pd.Series):
    if not event.SCORE:
        return None
    away_score, home_score = event.SCORE.split(" - ")

    if event.PERIOD >= 5:
        min_per_period = 5
        previous_period_mins = 48 + (event.PERIOD - 5) * 5 
    else:
        min_per_period = 12
        previous_period_mins = 12 * (event.PERIOD - 1)
    
    period_remaining_min, period_remaining_sec = event.PCTIMESTRING.split(":")

    elapsed_min = previous_period_mins + min_per_period - (int(period_remaining_min) + 1)
    elapsed_sec = 60 - int(period_remaining_sec)
    
    return pd.Series({
        "HOME_SCORE": int(home_score),
        "AWAY_SCORE": int(away_score),
        "GAME_TIMESTRING": f"{elapsed_min:02}:{elapsed_sec:02}", 
        "SCOREMARGIN": int(home_score) - int(away_score)
    })


def calculate_team_specific_timeline_stats(event: pd.Series):
    game = games.loc[games.GAME_ID == event.GAME_ID, :]
    is_home = is_home_game(game)
    return pd.Series({
        "TEAM_SCORE": event.HOME_SCORE if is_home_game else event.AWAY_SCORE,
        "OPPONENT_SCORE": event.AWAY_SCORE if is_home_game else event.HOME_SCORE,
        "TEAM_MARGIN":  event.SCOREMARGIN if is_home_game else -1 * event.SCOREMARGIN,
        "OUTCOME": game.WL
    })

def parse_timeline(timeline: pd.DataFrame):
    is_scoring_play = timeline.SCORE.notnull() & timeline.PLAYER1_NAME.notnull()
    additions = timeline.loc[is_scoring_play, :].apply(parse_timeline_event, axis=1)
    score_timeline = timeline.loc[is_scoring_play, ["GAME_ID", "PERIOD", "SCORE", "PLAYER1_ID", "PLAYER1_NAME", "PLAYER1_TEAM_ID"]].join(additions)
    team_specific_stats = score_timeline.apply(calculate_team_specific_timeline_stats, axis=1)
    return score_timeline.join(team_specific_stats)
    
# Outcome functions
def outcome_early_lead(timeline: pd.DataFrame):
    is_early_lead = (timeline.TEAM_MARGIN > 15) & (timeline.GAME_TIMESTRING < "24:00")
    results = timeline.loc[is_early_lead, "OUTCOME"]
    if results.size == 0:
        return None
    return results.iloc[0]

In [96]:
celtics_timelines = games.GAME_ID.apply(get_game_scoring_timeline)

In [140]:
celtics_2022_scoring = pd.concat(celtics_timelines.apply(parse_timeline).tolist()).reset_index()

In [141]:
celtics_2022_scoring.groupby(["GAME_ID"]).apply(outcome_early_lead)

GAME_ID       
0022200001  3     NaN
0022200022  3     NaN
0022200030  3     NaN
0022200047  3       L
0022200072  3     NaN
0022200089  5       W
0022200107  3     NaN
0022200124  3     NaN
0022200135  3     NaN
0022200152  3     NaN
0022200163  3     NaN
0022200176  11      W
0022200186  3     NaN
0022200201  3     NaN
0022200214  3     NaN
0022200231  3     NaN
0022200250  3     NaN
0022200264  17      W
0022200275  18      W
0022200294  19      W
0022200301  20      W
0022200314  3     NaN
0022200328  3     NaN
0022200346  3     NaN
0022200355  3     NaN
0022200373  3     NaN
0022200392  3     NaN
0022200408  3     NaN
0022200413  3     NaN
0022200430  3     NaN
0022200445  3     NaN
0022200469  3     NaN
0022200482  3     NaN
0022200495  3     NaN
0022200507  3     NaN
0022200525  3     NaN
0022200549  3     NaN
0022200562  37      L
0022200577  3     NaN
0022200591  3     NaN
0022200606  3     NaN
0022200621  3     NaN
0022200627  3     NaN
0022200642  43      W
0022200656  3    