In [1]:
import requests
import pandas as pd
import tqdm
import bs4
import json
import matplotlib.pyplot as plt
from adjustText import adjust_text
from pathlib import Path
import time

In [2]:
from nba_api.stats.endpoints.playbyplayv2 import PlayByPlayV2

In [3]:
from nba_api.stats.endpoints.playbyplay import PlayByPlay
from nba_api.stats.endpoints.leaguegamefinder import LeagueGameFinder
from nba_api.stats.static.players import find_players_by_full_name
from nba_api.stats.static.teams import find_teams_by_full_name
from nba_api.stats.endpoints.playergamelogs import PlayerGameLogs

In [4]:
path = Path('pbp_data/2021-22.csv')
data_frame = None
if path.exists():
    data_frame = pd.read_csv(str(path))
    data_frame.loc[:, 'GAME_ID'] = [f"00{item}" for item in data_frame.GAME_ID.astype(str)]

## Downloading cur Season

In [5]:
season = '2021-22'
print(f"Processing {season}")
pbp_dfs = []
game_finder = LeagueGameFinder(season_nullable=season, league_id_nullable='00', season_type_nullable='Regular Season')
df = game_finder.get_data_frames()[0]
game_ids = df.GAME_ID.unique()
for game_id in tqdm.tqdm(game_ids):
    if data_frame is not None and game_id in data_frame.GAME_ID.tolist():
        continue
    while True:
        try:
            pbp_df = PlayByPlayV2(game_id=game_id).get_data_frames()[0]
            break
        except Exception as e:
            print(f"Error occured: {e}, Game ID: {game_id}, retrying")
            time.sleep(0.1)
    pbp_dfs.append(pbp_df)
    time.sleep(0.25)
full_df = pd.concat([data_frame] + pbp_dfs)

Processing 2021-22


100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 608/608 [00:39<00:00, 15.21it/s]
of pandas will change to not sort by default.

To accept the future behavior, pass 'sort=False'.




In [6]:
full_df.loc[:, "PCTIMESECONDS"] = [720 - (int(item.split(":")[0]) * 60 + int(item.split(":")[1])) for item in full_df.PCTIMESTRING.tolist()]

full_df = full_df.fillna({
    "HOMEDESCRIPTION": "",
    "NEUTRALDESCRIPTION": "",
    "VISITORDESCRIPTION": ""
})

In [7]:
full_df.to_csv('pbp_data/2021-22.csv', index=False)

In [11]:
full_df = pd.read_csv('pbp_data/2021-22.csv')

full_df = full_df.fillna({
    "HOMEDESCRIPTION": "",
    "NEUTRALDESCRIPTION": "",
    "VISITORDESCRIPTION": ""
})
full_df.loc[:, 'GAME_ID'] = [f"00{item}" for item in full_df.GAME_ID.astype(str)]

## Finding first shot of each game

In [10]:
game_ids = full_df.GAME_ID.unique()

In [19]:
first_shots = []
first_makes = []

for game_id in game_ids:
    cur_game = full_df.loc[full_df.GAME_ID == game_id]    
    first_shots.append(cur_game.loc[(cur_game.EVENTMSGTYPE == 2) | (cur_game.EVENTMSGTYPE == 1)].iloc[0])
    first_makes.append(cur_game.loc[(cur_game.EVENTMSGTYPE == 1)].iloc[0])

In [22]:
first_df = pd.DataFrame(first_shots)
first_makes_df = pd.DataFrame(first_makes)

In [18]:
first_df.to_csv('first_shots.csv')

In [23]:
first_makes_df.to_csv('first_makes.csv')

first_nba_shots

In [14]:
cur_game.loc[(cur_game.EVENTMSGTYPE == 2) | (cur_game.EVENTMSGTYPE == 1)]

EVENTMSGACTIONTYPE                                       6
EVENTMSGTYPE                                             2
EVENTNUM                                                 7
GAME_ID                                         0022100239
HOMEDESCRIPTION              MISS Barrett 2' Driving Layup
NEUTRALDESCRIPTION                                        
PCTIMESECONDS                                           17
PCTIMESTRING                                         11:43
PERIOD                                                   1
PERSON1TYPE                                              4
PERSON2TYPE                                              0
PERSON3TYPE                                              0
PLAYER1_ID                                         1629628
PLAYER1_NAME                                    RJ Barrett
PLAYER1_TEAM_ABBREVIATION                              NYK
PLAYER1_TEAM_CITY                                 New York
PLAYER1_TEAM_ID                                1.61061e+

In [25]:
df = game_finder.get_data_frames()[0]

In [27]:
for i in range(len(df)):
    print(f"{df.iloc[i].GAME_ID}, {df.iloc[i].MATCHUP}")

0022100305, PHI vs. ORL
0022100305, ORL @ PHI
0022100306, MIA vs. DEN
0022100306, DEN @ MIA
0022100308, HOU vs. OKC
0022100308, OKC @ HOU
0022100307, CHI vs. CHA
0022100307, CHA @ CHI
0022100309, MIN vs. IND
0022100309, IND @ MIN
0022100310, CLE @ DAL
0022100310, DAL vs. CLE
0022100311, WAS @ SAS
0022100311, SAS vs. WAS
0022100312, UTA vs. POR
0022100312, POR @ UTA
0022100313, LAC vs. NOP
0022100313, NOP @ LAC
0022100304, DET @ LAL
0022100301, MIL @ IND
0022100304, LAL vs. DET
0022100300, GSW @ LAC
0022100300, LAC vs. GSW
0022100301, IND vs. MIL
0022100303, MEM vs. SAC
0022100303, SAC @ MEM
0022100302, BOS @ TOR
0022100302, TOR vs. BOS
0022100292, PHI vs. MIN
0022100292, MIN @ PHI
0022100293, NYK @ ATL
0022100293, ATL vs. NYK
0022100299, NOP @ UTA
0022100299, UTA vs. NOP
0022100297, CHA @ HOU
0022100297, HOU vs. CHA
0022100298, DAL vs. WAS
0022100298, WAS @ DAL
0022100296, CHI vs. MIA
0022100296, MIA @ CHI
0022100295, CLE vs. ORL
0022100295, ORL @ CLE
0022100294, BKN vs. PHX
0022100294