In [1]:
import requests
import pandas as pd
import numpy as np
import json
import time
import datetime
import os
from datetime import timedelta

def get_team_dict():
    """Returns a dictionary mapping team abbreviations to team IDs."""
    return {
        'ATL': '1610612737', 'BKN': '1610612751', 'BOS': '1610612738', 'CHA': '1610612766',
        'CHI': '1610612741', 'CLE': '1610612739', 'DAL': '1610612742', 'DEN': '1610612743',
        'DET': '1610612765', 'GSW': '1610612744', 'HOU': '1610612745', 'IND': '1610612754',
        'LAC': '1610612746', 'LAL': '1610612747', 'MEM': '1610612763', 'MIA': '1610612748',
        'MIL': '1610612749', 'MIN': '1610612750', 'NOP': '1610612740', 'NYK': '1610612752',
        'OKC': '1610612760', 'ORL': '1610612753', 'PHI': '1610612755', 'PHX': '1610612756',
        'POR': '1610612757', 'SAC': '1610612758', 'SAS': '1610612759', 'TOR': '1610612761',
        'UTA': '1610612762', 'WAS': '1610612764'
    }

def determine_season(date_str):
    """Determine the season based on a date string."""
    year = int(date_str[:4])
    month = int(date_str[5:7])
    
    if month >= 9:  # New season starts around October
        return f"{year}-{str(year+1)[-2:]}"
    else:
        return f"{year-1}-{str(year)[-2:]}"

def get_date_ranges(start_date, end_date):
    """Generate 7-day date ranges between start and end dates."""
    start = datetime.datetime.strptime(start_date, '%Y-%m-%d')
    end = datetime.datetime.strptime(end_date, '%Y-%m-%d')
    
    date_ranges = []
    current = start
    
    while current < end:
        range_end = min(current + timedelta(days=6), end)
        date_ranges.append((
            current.strftime('%Y-%m-%d'),
            range_end.strftime('%Y-%m-%d')
        ))
        current = range_end + timedelta(days=1)
    
    return date_ranges

def fetch_possessions(team, start_date, end_date):
    """Fetch both offensive and defensive possessions for a team in the given date range."""
    team_dict = get_team_dict()
    season = determine_season(start_date)
    url = "https://api.pbpstats.com/get-possessions/nba"
    
    all_possessions = []
    
    # Fetch offensive possessions
    params = {
        "league": 'nba',
        "TeamId": team_dict[team],
        "Season": season,
        "SeasonType": "All",
        "OffDef": "Offense",
        "StartType": "All",
        "FromDate": start_date,
        "ToDate": end_date,
    }
    
    try:
        response = requests.get(url, params=params)
        response.raise_for_status()  # Raise exception for HTTP errors
        response_json = response.json()
        offensive_possessions = response_json.get("possessions", [])
        
        # Add team info to each possession
        for possession in offensive_possessions:
            possession['Team'] = team
            possession['IsOffense'] = True
        
        all_possessions.extend(offensive_possessions)
    except requests.exceptions.RequestException as e:
        print(f"Error fetching offensive possessions for {team}: {e}")
    
    # Fetch defensive possessions
    params['OffDef'] = "Defense"
    time.sleep(2)
    
    try:
        response = requests.get(url, params=params)
        response.raise_for_status()
        response_json = response.json()
        defensive_possessions = response_json.get("possessions", [])
        
        # Add team info to each possession
        for possession in defensive_possessions:
            possession['Team'] = team
            possession['IsOffense'] = False
        
        all_possessions.extend(defensive_possessions)
    except requests.exceptions.RequestException as e:
        print(f"Error fetching defensive possessions for {team}: {e}")
    
    print(f"Fetched {len(all_possessions)} possessions for {team} from {start_date} to {end_date}")
    return all_possessions

def process_possessions(possessions):
    """Process and normalize possession data for CSV export."""
    processed_data = []
    
    for possession in possessions:
        # Extract common fields
        row = {
            'Team': possession.get('Team', ''),
            'IsOffense': possession.get('IsOffense', True),
            'GameId': possession.get('GameId', ''),
            'GameDate': possession.get('GameDate', ''),
            'Opponent': possession.get('Opponent', ''),
            'Period': possession.get('Period', ''),
            'StartTime': possession.get('StartTime', ''),
            'EndTime': possession.get('EndTime', ''),
            'StartType': possession.get('StartType', ''),
            'StartScoreDifferential': possession.get('StartScoreDifferential', 0),
            'FG2M': possession.get('FG2M', 0),
            'FG2A': possession.get('FG2A', 0),
            'FG3M': possession.get('FG3M', 0),
            'FG3A': possession.get('FG3A', 0),
            'OffensiveRebounds': possession.get('OffensiveRebounds', 0),
            'Turnovers': possession.get('Turnovers', 0),
            'ShootingFoulsDrawn': possession.get('ShootingFoulsDrawn', 0),
            'NonShootingFoulsThatResultedInFts': possession.get('NonShootingFoulsThatResultedInFts', 0),
        }
        

        # Process VideoUrls array
        if 'VideoUrls' in possession and possession['VideoUrls']:
            row['VideoUrlsCount'] = len(possession['VideoUrls'])
            row['VideoUrls'] = '; '.join([v.get('url', '') for v in possession['VideoUrls']])
            row['VideoDescriptions'] = '; '.join([v.get('description', '') for v in possession['VideoUrls']])
        else:
            row['VideoUrlsCount'] = 0
            row['VideoUrls'] = ''
            row['VideoDescriptions'] = ''
        

        # Process Events array
        if 'Events' in possession and possession['Events']:

            row['EventsDescription'] = possession['Events']
            row['EventsCount'] = possession['Events'].count('\n')

            
        else:
            row['EventsDescription'] = ''

            row['EventsCount'] = 0
            
        processed_data.append(row)
        
    return processed_data

def scrape_nba_possessions(season=2025,start_date='2024-10-22', end_date='2025-02-21', output_dir='nba_possessions_data'):
    """Main function to scrape NBA possession data for all teams for the 2024-25 season."""
    teams = list(get_team_dict().keys())
    date_ranges = get_date_ranges(start_date, end_date)
    
    # Create output directory if it doesn't exist
    os.makedirs(output_dir, exist_ok=True)
    
    all_data = []
    
    # Iterate through each team and date range
    for team in teams:
        print(f"\nProcessing data for {team}...")
        team_data = []
        
        for start, end in date_ranges:
            print(f"  Fetching {start} to {end}...")
            possessions = fetch_possessions(team, start, end)
            if possessions:
                processed = process_possessions(possessions)
                team_data.extend(processed)
            
            # Add a small delay to avoid overwhelming the API
            time.sleep(2)
        print(f"Processed {len(team_data)} possessions for {team}")
        
        if team_data:
            # Save team-specific data
            team_df = pd.DataFrame(team_data)
            team_file = os.path.join(output_dir, f"{season}_{team}_possessions.csv")
            team_df.to_csv(team_file, index=False)
            print(f"Saved {len(team_data)} possessions for {team} to {team_file}")
            
            # Add to all data
            all_data.extend(team_data)
    
    # Save all data to a single file
    if all_data:
        all_df = pd.DataFrame(all_data)
        all_file = os.path.join(output_dir, f"{season}_all_teams_possessions.csv")
        all_df.to_csv(all_file, index=False)
        print(f"\nSaved {len(all_data)} total possessions to {all_file}")
    
    return all_data

if __name__ == "__main__":
    # Set the date range for the current 2024-25 season
    start_date = '2024-10-22'  # Opening night for 2024-25 season
    end_date = '2025-02-21'    # Current date (or any end date you prefer)
    
    print(f"Starting NBA possession data scraper for {start_date} to {end_date}...")
    scrape_nba_possessions(season=2025,start_date=start_date, end_date=end_date)

Starting NBA possession data scraper for 2024-10-22 to 2025-02-21...

Processing data for ATL...
  Fetching 2024-10-22 to 2024-10-28...
Fetched 820 possessions for ATL from 2024-10-22 to 2024-10-28
  Fetching 2024-10-29 to 2024-11-04...
Fetched 816 possessions for ATL from 2024-10-29 to 2024-11-04
  Fetching 2024-11-05 to 2024-11-11...
Fetched 618 possessions for ATL from 2024-11-05 to 2024-11-11
  Fetching 2024-11-12 to 2024-11-18...
Fetched 814 possessions for ATL from 2024-11-12 to 2024-11-18
  Fetching 2024-11-19 to 2024-11-25...
Fetched 660 possessions for ATL from 2024-11-19 to 2024-11-25
  Fetching 2024-11-26 to 2024-12-02...
Fetched 818 possessions for ATL from 2024-11-26 to 2024-12-02
  Fetching 2024-12-03 to 2024-12-09...
Fetched 646 possessions for ATL from 2024-12-03 to 2024-12-09
  Fetching 2024-12-10 to 2024-12-16...
Fetched 404 possessions for ATL from 2024-12-10 to 2024-12-16
  Fetching 2024-12-17 to 2024-12-23...
Fetched 648 possessions for ATL from 2024-12-17 to 2024-