In [1]:
from utils.utils import *

In [6]:
year = '2014_15'

In [7]:
season_games = get_game_df(year.replace('_', '-'))
pd.options.display.max_columns = None

season_games.head()

Unnamed: 0,GAME_ID,SEASON_ID,TEAM_ABBREVIATION,GAME_DATE
0,21400001,22014,"[NOP, ORL]",2014-10-28
1,21400002,22014,"[DAL, SAS]",2014-10-28
2,21400003,22014,"[HOU, LAL]",2014-10-28
3,21400004,22014,"[MIL, CHA]",2014-10-29
4,21400005,22014,"[IND, PHI]",2014-10-29


In [8]:
import pandas as pd
import time
import os
from tqdm import tqdm
import requests

tov_cols = ['team', 'player_id', 'player', 'type', 'period', 'clock', 'gameId', 'actionNumber', 'next_pos_points', 'shot_clock', 'opp_team']

# Load or initialize data
tov_save_path = f'tov_data_{year}.csv'
failed_save_path = f'failed_games_{year}.csv'

if os.path.exists(tov_save_path):
    tov_df = pd.read_csv(tov_save_path)
    processed_games = set(tov_df['gameId'].unique())
else:
    tov_df = pd.DataFrame(columns=tov_cols)
    processed_games = set()

if os.path.exists(failed_save_path):
    failed_games = pd.read_csv(failed_save_path)['gameId'].tolist()
else:
    failed_games = []

fails = 0
# Main loop
for row in tqdm(season_games.itertuples(index=False)):
    game = row.GAME_ID
    teams = row.TEAM_ABBREVIATION

    if int(game) in processed_games:
        continue  # Skip already processed games

    time.sleep(1)

    max_retries = 3
    retries = 0

    while retries < max_retries:
        try:
            play_by_play_df = get_play_df(str(game), timeout=2)
            fails = 0
            processed_df = tov_processor(play_by_play_df, teams)

            tov_df = pd.concat([tov_df, processed_df], ignore_index=True)


            # Save progress after each successful game
            tov_df.to_csv(tov_save_path, index=False)

            break
        except (requests.exceptions.Timeout, requests.exceptions.RequestException) as e:
            print(f"Timeout or error for game {game}: {e}. Retrying...")
            time.sleep((retries + 5) * 2)
            retries += 1
    else:
        print(f"Failed to fetch data for game {game} after {max_retries} retries.")
        failed_games.append(game)
        pd.DataFrame({'gameId': failed_games}).to_csv(failed_save_path, index=False)
        fails += 1

    if fails >= 3:
        time.sleep(500)


1230it [00:00, 1657241.86it/s]


In [4]:
tov_df.head()

Unnamed: 0,team,player_id,player,type,period,clock,gameId,actionNumber,next_pos_points,shot_clock,opp_team,dead_ball
0,ATL,1630552,Johnson,Traveling,1,677.0,22400001,14,0,19.0,BOS,1
1,BOS,1627759,Brown,Bad Pass,1,657.0,22400001,15,0,4.0,ATL,0
2,ATL,1630700,Daniels,Bad Pass,1,655.0,22400001,17,0,22.0,BOS,0
3,BOS,1628401,White,Bad Pass,1,654.0,22400001,19,3,23.0,ATL,0
4,BOS,201950,Holiday,Bad Pass,1,327.0,22400001,108,2,16.0,ATL,0


In [5]:
len(tov_df['gameId'].unique())

956

In [6]:
tov_df['gameId'].unique()

array([22400001, 22400002, 22400003, 22400004, 22400005, 22400006,
       22400007, 22400008, 22400009, 22400010, 22400011, 22400012,
       22400013, 22400014, 22400015, 22400016, 22400017, 22400018,
       22400019, 22400020, 22400021, 22400022, 22400023, 22400024,
       22400025, 22400026, 22400027, 22400028, 22400029, 22400030,
       22400031, 22400032, 22400033, 22400034, 22400035, 22400036,
       22400037, 22400038, 22400039, 22400040, 22400041, 22400042,
       22400043, 22400044, 22400045, 22400046, 22400047, 22400048,
       22400049, 22400050, 22400051, 22400052, 22400053, 22400054,
       22400055, 22400056, 22400057, 22400058, 22400059, 22400060,
       22400061, 22400062, 22400063, 22400064, 22400065, 22400066,
       22400067, 22400068, 22400069, 22400070, 22400071, 22400072,
       22400073, 22400074, 22400075, 22400076, 22400077, 22400078,
       22400079, 22400080, 22400081, 22400082, 22400083, 22400084,
       22400085, 22400086, 22400087, 22400088, 22400089, 22400

In [5]:
tov_df.sort_values('shot_clock', ascending=False).head(40)

Unnamed: 0,team,player_id,player,type,period,clock,gameId,actionNumber,next_pos_points,shot_clock,opp_team,dead_ball
1247,NOP,1627749,Murray,Out of Bounds - Bad Pass Turnover,1,672.0,22400042,15,2,25.5,MEM,1
44,DET,1627736,Beasley,Out of Bounds - Bad Pass Turnover,2,457.0,22400002,234,2,25.5,MIA,1
396,CHI,1629632,White,Bad Pass,1,436.0,22400013,58,2,24.5,CLE,0
1075,MIA,1629130,Robinson,Bad Pass,1,567.0,22400036,28,3,24.5,MIL,0
66,MIA,1610612748,,Excess Timeout Turnover,5,1.1,22400002,729,0,24.0,DET,1
1243,CLE,1628386,Allen,Bad Pass,4,393.0,22400041,582,0,24.0,ATL,0
636,CHA,1630182,Green,Out of Bounds - Bad Pass Turnover,2,515.0,22400022,200,0,24.0,BKN,1
684,DEN,1628427,Čančar,Out of Bounds - Bad Pass Turnover,2,322.0,22400023,322,0,24.0,MEM,1
388,ATL,1630700,Daniels,Out of Bounds Lost Ball Turnover,4,288.0,22400012,658,2,24.0,WAS,1
1032,LAC,201935,Harden,Bad Pass,2,2.9,22400034,341,0,24.0,SAC,0


In [None]:
get_play_video('0022400017', '34', download=False)

{'video': 'https://videos.nba.com/nba/pbp/media/2024/11/15/0022400017/34/707dff43-f67a-3423-5ab8-386d3f1122b8_1280x720.mp4', 'desc': 'Braun Out of Bounds - Bad Pass Turnover Turnover (P1.T1)'}


In [20]:
# time.sleep(200)
play_df = get_play_df('0022400017')
play_df.head(40)

Unnamed: 0,gameId,actionNumber,clock,period,teamId,teamTricode,personId,playerName,playerNameI,xLegacy,yLegacy,shotDistance,shotResult,isFieldGoal,scoreHome,scoreAway,pointsTotal,location,description,actionType,subType,videoAvailable,shotValue,actionId
0,22400017,2,720.0,1,0,,0,,,0,0,0,,0,0.0,0.0,0,,Start of 1st Period (8:10 PM EST),period,start,1,0,1
1,22400017,4,720.0,1,1610612740,NOP,1642274,Missi,Y. Missi,0,0,0,,0,,,0,h,,Jump Ball,,1,0,2
2,22400017,8,705.0,1,1610612743,DEN,1627750,Murray,J. Murray,153,201,25,Made,1,0.0,3.0,3,v,Murray 25' 3PT Jump Shot (3 PTS) (Šarić 1 AST),Made Shot,Jump Shot,1,3,3
3,22400017,10,688.0,1,1610612740,NOP,1627742,Ingram,B. Ingram,0,0,0,,0,,,0,h,Ingram Bad Pass Turnover (P1.T1),Turnover,Bad Pass,1,0,4
4,22400017,10,688.0,1,1610612743,DEN,1627750,Murray,J. Murray,0,0,0,,0,,,0,v,Murray STEAL (1 STL),,,1,0,5
5,22400017,12,685.0,1,1610612743,DEN,1627750,Murray,J. Murray,-11,261,26,Missed,1,,,0,v,MISS Murray 26' 3PT Running Pull-Up Jump Shot,Missed Shot,Running Pull-Up Jump Shot,1,3,6
6,22400017,13,682.0,1,1610612740,NOP,1642274,Missi,Y. Missi,0,0,0,,0,,,0,h,Missi REBOUND (Off:0 Def:1),Rebound,Unknown,1,0,7
7,22400017,14,671.0,1,1610612740,NOP,1629750,Green,J. Green,-218,131,25,Missed,1,,,0,h,MISS Green 25' 3PT Jump Shot,Missed Shot,Jump Shot,1,3,8
8,22400017,15,667.0,1,1610612743,DEN,1631212,Watson,P. Watson,0,0,0,,0,,,0,v,Watson REBOUND (Off:0 Def:1),Rebound,Unknown,1,0,9
9,22400017,16,660.0,1,1610612743,DEN,1629008,Porter Jr.,M. Porter Jr.,-72,239,25,Missed,1,,,0,v,MISS Porter Jr. 25' 3PT Pullup Jump Shot,Missed Shot,Pullup Jump shot,1,3,10
