In [6]:
# https://github.com/swar/nba_api

# Static headers for cdn requests, these never change
static_headers  = {
    'Connection': 'keep-alive',
    'Accept': 'application/json, text/plain, */*',
    'x-nba-stats-token': 'true',
    'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_14_6) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/79.0.3945.130 Safari/537.36',
    'x-nba-stats-origin': 'stats',
    'Sec-Fetch-Site': 'same-origin',
    'Sec-Fetch-Mode': 'cors',
    'Referer': 'https://stats.nba.com/',
    'Accept-Encoding': 'gzip, deflate, br',
    'Accept-Language': 'en-US,en;q=0.9',
}


import networkx as nx
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

dataset_path = './datasets'

We will be analysing games played in 2022 between Dallas Mavericks and Golden State Warriors. 
Example: https://github.com/swar/nba_api/blob/master/docs/examples/Finding%20Games.ipynb

In [2]:
from nba_api.stats.static import teams
from nba_api.stats.endpoints import leaguegamefinder
nba_teams = teams.get_teams()

def get_team_games(team_id: int=None, team_abbr: str=None, season: str=None, opponent_team: str=None): # -> pd.DataFrame:
    """ Get the games for a team, searchable by team_id or team_abbr. Keep only games form the given season.
    @team_id: ID of the team
    @team_abbr: Abbreviation of the team, eg. 'BOS', 'LAL'
    @season: Season in the format 'YYYY', if null then get all games
    @opponent_team: Abbreviation of the opponent team, eg. 'BOS', 'LAL'
    """
    # Find the team id if the team name is given
    if team_abbr:
        team_obj = [team for team in nba_teams if team['abbreviation'] == team_abbr][0]
        team_id = team_obj['id']

    # Get all games for the team in the given season, regular season and playoffs
    gamefinder = leaguegamefinder.LeagueGameFinder(team_id_nullable=team_id, season_type_nullable='Playoffs,Regular Season', season_nullable=season) 
    games = gamefinder.get_data_frames()[0]
    
    # If opponent team is given, keep only games against that team
    if opponent_team:
        games = games.loc[(games['MATCHUP'].str.contains(opponent_team))]
    return games


In [3]:
# Get games for Dallas from 2021-2022 versus Golden State warriors
den_games = get_team_games(team_id=None, team_abbr='DEN', season='2019-20', opponent_team='LAL')
den_games


Unnamed: 0,SEASON_ID,TEAM_ID,TEAM_ABBREVIATION,TEAM_NAME,GAME_ID,GAME_DATE,MATCHUP,WL,MIN,PTS,...,FT_PCT,OREB,DREB,REB,AST,STL,BLK,TOV,PF,PLUS_MINUS
0,42019,1610612743,DEN,Denver Nuggets,41900315,2020-09-26,DEN @ LAL,L,240,107,...,0.885,9,27,36,23,5,2,11,25,-10.0
1,42019,1610612743,DEN,Denver Nuggets,41900314,2020-09-24,DEN vs. LAL,L,241,108,...,0.87,6,27,33,22,4,3,11,28,-6.0
2,42019,1610612743,DEN,Denver Nuggets,41900313,2020-09-22,DEN vs. LAL,W,240,114,...,0.793,9,35,44,26,8,2,18,21,8.0
3,42019,1610612743,DEN,Denver Nuggets,41900312,2020-09-20,DEN @ LAL,L,240,103,...,0.758,7,24,31,22,12,4,19,22,-2.0
4,42019,1610612743,DEN,Denver Nuggets,41900311,2020-09-18,DEN @ LAL,L,239,114,...,0.821,9,28,37,23,3,2,16,26,-12.0
21,22019,1610612743,DEN,Denver Nuggets,21901296,2020-08-10,DEN @ LAL,L,240,121,...,0.783,10,25,35,27,6,2,14,20,-3.0
37,22019,1610612743,DEN,Denver Nuggets,21900817,2020-02-12,DEN vs. LAL,L,264,116,...,0.773,3,39,42,32,9,2,10,26,-4.0
64,22019,1610612743,DEN,Denver Nuggets,21900443,2019-12-22,DEN @ LAL,W,241,128,...,0.9,17,27,44,31,13,2,8,19,24.0
74,22019,1610612743,DEN,Denver Nuggets,21900304,2019-12-03,DEN vs. LAL,L,239,96,...,0.941,7,28,35,26,9,4,11,25,-9.0


We take three random games from the df shown above, each at a different stage in season. We chose the first matchup in the season, the first matchup in the playoff and the last game of west conference finals (game 6). <br>
Main endpoint for processing play by play (pbp) data (enter your own game_id)
https://cdn.nba.com/static/json/liveData/playbyplay/playbyplay_'game_id'.json


In [4]:
import requests
import pandas as pd

# create function that gets pbp logs from the nba api with game id
def get_pbp_data(game_id):
    play_by_play_url = "https://cdn.nba.com/static/json/liveData/playbyplay/playbyplay_"+game_id+".json"
    response = requests.get(url=play_by_play_url, headers=static_headers).json()
    play_by_play = response['game']['actions']
    df = pd.DataFrame(play_by_play)
    df['gameid'] = game_id
    return df

def save_pickle(df, filename):
    df.to_pickle(filename)

In [7]:
# Setup games of interest
games_list = ['0021900304', '0041900311', '0041900315']

# Get the play by play data for each game
for game in games_list:
    pbp = get_pbp_data(game)

    # Save the data to a pickle file
    save_pickle(pbp, 'pbp_'+game+'.pkl')


There are 55 columns in the pbp dataframe and around 500 rows, depending on the number of events. Some of them are useless, so we will do some cleaning. 
Columns of interest:
- <b>actionType</b>: what happened (2pt shot, 3pt shot, rebound, turnover)
- subType: additional information about actionType (for a 2pt -> layup/dunk/midrange )
- qualifiers: additional information about actionType (for a 2pt -> pointsinthepaint,2ndchance)
- <b>personId</b>: ID of player - primary id in network
- description: more detailed description of what happened (eg. S. Curry STEAL (1 STL))
- <b>personIdsFilter</b>: IDS of players who were part of the play (eg. K. Thompson 27' 3PT  (3 PTS) (K. Looney 1 AST) -> 202691,1626172)
- playerName: last name of player, used for labels in networks
- shotResult: sucess of  the shot (Made, Missed)
- <b>assistPersonId</b>: ID of player who asisted a shot

In [48]:
import networkx as nx

# Read the pickle file
pbp = pd.read_pickle('pbp_0021900304.pkl')

game_network_assists = nx.MultiDiGraph()
game_network_shots = nx.MultiDiGraph()

# Iterate through the rows and print the description
for index, row in pbp.iterrows():
    # Create a node for each player and add it to the graph
    if row['personId']:
        game_network_assists.add_node(row['personId'], person_name=row['playerName'])
        game_network_shots.add_node(row['personId'], person_name=row['playerName'])

    # Check if event is a shot attempt
    if row['shotResult'] in ['Made', 'Missed']:
        # Get the shooter and assist person ids
        pass

In [None]:
# Dataset created by the script from article https://github.com/AniMadurkar/NBA-Flow-Network-Analysis/tree/main
# Read data from csv
players = pd.read_csv(dataset_path + '/2019_Playoffs_Players.csv')
teams = pd.read_csv(dataset_path + '/2019_Playoffs.csv')