In [1]:
import pandas as pd
import numpy as np
import os
import logging
from datetime import datetime
import sys
from collections import defaultdict

import requests
import pandas as pd
import numpy as np
import json
import time

import os

def get_team_dict():
    """Returns a dictionary mapping team abbreviations to team IDs."""
    return {
        'ATL': '1610612737', 'BKN': '1610612751', 'BOS': '1610612738', 'CHA': '1610612766',
        'CHI': '1610612741', 'CLE': '1610612739', 'DAL': '1610612742', 'DEN': '1610612743',
        'DET': '1610612765', 'GSW': '1610612744', 'HOU': '1610612745', 'IND': '1610612754',
        'LAC': '1610612746', 'LAL': '1610612747', 'MEM': '1610612763', 'MIA': '1610612748',
        'MIL': '1610612749', 'MIN': '1610612750', 'NOP': '1610612740', 'NYK': '1610612752',
        'OKC': '1610612760', 'ORL': '1610612753', 'PHI': '1610612755', 'PHX': '1610612756',
        'POR': '1610612757', 'SAC': '1610612758', 'SAS': '1610612759', 'TOR': '1610612761',
        'UTA': '1610612762', 'WAS': '1610612764'
    }

def get_id_to_team_abbrev():
    """Returns a dictionary mapping team IDs to team abbreviations."""
    team_dict = get_team_dict()
    return {v: k for k, v in team_dict.items()}

def convert_time_to_seconds(period, time_str):
    """Convert period and MM:SS format to total game seconds"""
    minutes, seconds = map(int, time_str.split(':'))
    
    # Calculate total seconds for all previous periods
    if period <= 4:
        period_seconds = (period - 1) * 720  # 12-minute periods
    else:
        period_seconds = 4 * 720 + (period - 4) * 300  # 5-minute OT periods
    
    # Calculate time passed in current period (counting down)
    current_period_length = 720 if period <= 4 else 300
    time_passed = current_period_length - (minutes * 60 + seconds)
    
    return period_seconds + time_passed

def main(year=2025):
    # Set up logging
    logging.basicConfig(
        level=logging.INFO,
        format='%(asctime)s - %(levelname)s - %(message)s',
        filename='player_tracking_log.txt',
        filemode='w'
    )
    console = logging.StreamHandler()
    console.setLevel(logging.INFO)
    formatter = logging.Formatter('%(asctime)s - %(levelname)s - %(message)s')
    console.setFormatter(formatter)
    logging.getLogger('').addHandler(console)
    
    ogyear = year
    season = str(year-1)+'-'+str(year)[-2:]
    logging.info("Starting player tracking process")
    
    # Get team ID to abbreviation mapping
    id_to_abbrev = get_id_to_team_abbrev()
    
    # Load the full datasets
    logging.info("Loading rotation and clips data")
    try:
        rotations_df = pd.read_csv(f"{year}_rotations.csv")
        clips_df = pd.read_csv(f"nba_possessions_data/{year}_all_teams_possessions.csv")
        
        # Convert team IDs to strings
        rotations_df['TEAM_ID'] = rotations_df['TEAM_ID'].astype(str)
        clips_df['TEAM_ID'] = clips_df['TEAM_ID'].astype(str)
        
        # Process rotation times (vectorized)
        rotations_df['IN_TIME_SEC'] = rotations_df['IN_TIME_REAL'] / 10
        rotations_df['OUT_TIME_SEC'] = rotations_df['OUT_TIME_REAL'] / 10
        
        logging.info(f"Loaded {len(rotations_df)} rotation records and {len(clips_df)} clip records")
    except Exception as e:
        logging.error(f"Error loading data: {str(e)}")
        return None
    
    # Extract year from GameDate (vectorized)
    clips_df['Year'] = pd.to_datetime(clips_df['GameDate']).dt.year
    
    # Add game_seconds columns (vectorized using apply)
    logging.info("Converting game times to seconds")
    clips_df['start_seconds'] = clips_df.apply(
        lambda row: convert_time_to_seconds(int(row['Period']), row['StartTime']), axis=1
    )
    clips_df['end_seconds'] = clips_df.apply(
        lambda row: convert_time_to_seconds(int(row['Period']), row['EndTime']), axis=1
    )
    clips_df['mid_seconds'] = (clips_df['start_seconds'] + clips_df['end_seconds']) / 2
    
    # Create output directory if it doesn't exist
    output_dir = 'tracking_output'
    if not os.path.exists(output_dir):
        os.makedirs(output_dir)
        logging.info(f"Created output directory: {output_dir}")
    
    # Pre-process rotation data to create a more efficient lookup structure
    logging.info("Creating rotation lookup structure")
    rotation_lookup = {}
    
    # Group rotations by game_id and team_id
    for game_id in rotations_df['GAME_ID'].unique():
        game_rotations = rotations_df[rotations_df['GAME_ID'] == game_id]
        rotation_lookup[game_id] = {}
        
        for team_id in game_rotations['TEAM_ID'].unique():
            team_rotations = game_rotations[game_rotations['TEAM_ID'] == team_id]
            rotation_lookup[game_id][team_id] = []
            
            for _, player_row in team_rotations.iterrows():
                rotation_lookup[game_id][team_id].append({
                    'player_id': str(player_row['PERSON_ID']),
                    'in_time': player_row['IN_TIME_SEC'],
                    'out_time': player_row['OUT_TIME_SEC']
                })
    
    # Function to get players on court using the efficient lookup structure
    def get_players_on_court(team_id, time_sec, game_id):
        if game_id not in rotation_lookup or team_id not in rotation_lookup[game_id]:
            return []
        
        on_court = []
        for player_data in rotation_lookup[game_id][team_id]:
            if player_data['in_time'] <= time_sec < player_data['out_time']:
                on_court.append(player_data['player_id'])
        
        if len(on_court) != 5:
            logging.warning(f"Found {len(on_court)} players for team {team_id} at time {time_sec} in game {game_id}. Expected 5 players.")
        
        return on_court
    
    # Process each team in parallel
    teams = clips_df['TEAM_ID'].unique()
    years = clips_df['Year'].unique()
    
    logging.info(f"Processing data for {len(teams)} teams across {len(years)} years")
    
    for team in teams:
        team_abbrev = id_to_abbrev.get(team, team)  # Get team abbreviation or use ID if not found
        team_clips = clips_df[clips_df['TEAM_ID'] == team]
        
        if len(team_clips) == 0:
            logging.info(f"No clips found for {team_abbrev}, skipping")
            continue
        
        logging.info(f"Processing {len(team_clips)} clips for {team_abbrev}")
        
        # Process players on court for all clips at once
        players_on_list = []
        
        # Group by game to minimize lookups
        for game_id, game_clips in team_clips.groupby('GameId'):
            if game_id not in rotation_lookup:
                logging.warning(f"Game {game_id} not found in rotation data")
                players_on_list.extend(["GAME_NOT_FOUND"] * len(game_clips))
                continue
            
            game_teams = list(rotation_lookup[game_id].keys())
            if len(game_teams) < 2:
                logging.warning(f"Not enough teams found for game {game_id}")
                players_on_list.extend(["TEAMS_NOT_FOUND"] * len(game_clips))
                continue
            
            for _, row in game_clips.iterrows():
                try:
                    mid_time = row['mid_seconds']
                    offensive_players = get_players_on_court(team, mid_time, game_id)
                    players_on_list.append('|'.join(offensive_players))
                except Exception as e:
                    logging.error(f"Error processing possession: {str(e)}")
                    players_on_list.append("ERROR")
        
        # Add players_on column
        team_clips['players_on'] = players_on_list
        team_clips['season'] = season
        
        # Save results using team abbreviation
        output_file = f"{output_dir}/{team_abbrev}_{ogyear}_clips_with_players.csv"
        team_clips.to_csv(output_file, index=False)
        
        # Log statistics
        error_count = team_clips[team_clips['players_on'].isin(['ERROR', 'GAME_NOT_FOUND', 'TEAMS_NOT_FOUND'])].shape[0]
        success_count = team_clips.shape[0] - error_count
        logging.info(f"Processed {team_clips.shape[0]} clips for {team_abbrev}. "
                     f"Success: {success_count} ({success_count/team_clips.shape[0]*100:.1f}%), "
                     f"Errors: {error_count} ({error_count/team_clips.shape[0]*100:.1f}%)")
    
    logging.info("Processing complete")
    return True

if __name__ == "__main__":
    start_time = datetime.now()
    logging.info(f"Script started at {start_time}")
    
    result = main()
    
    end_time = datetime.now()
    execution_time = end_time - start_time
    logging.info(f"Script completed at {end_time}")
    logging.info(f"Total execution time: {execution_time}")

AttributeError: module 'datetime' has no attribute 'now'