In [1]:
import asyncio
import aiohttp
import pandas as pd
import os
from pathlib import Path
import logging
from typing import Set, Tuple
import ujson
from concurrent.futures import ThreadPoolExecutor
import nest_asyncio

# Setup logging (unchanged)

async def fetch_wowy_data(session, url, params):
    async with session.get(url, params=params) as response:
        return await response.json(loads=ujson.loads)

async def wowy_shift(session, team_id, player1_id, seasons, ps=False):
    s_type = 'Playoffs' if ps else 'Regular Season'
    if ps == 'all':
        s_type = 'All'

    wowy_url = "https://api.pbpstats.com/get-wowy-stats/nba"
    params_on = {
        "0Exactly1OnFloor": player1_id,
        "TeamId": team_id,
        "Season": ",".join(seasons),
        "SeasonType": s_type,
        "Type": "Player",
    }
    params_off = {
        "0Exactly0OnFloor": player1_id,
        "TeamId": team_id,
        "Season": ",".join(seasons),
        "SeasonType": s_type,
        "Type": "Player",
    }

    wowy_on, wowy_off = await asyncio.gather(
        fetch_wowy_data(session, wowy_url, params_on),
        fetch_wowy_data(session, wowy_url, params_off)
    )

    df_on = pd.DataFrame(wowy_on["multi_row_table_data"])
    df_on['on'] = True
    df_off = pd.DataFrame(wowy_off["multi_row_table_data"])
    df_off['on'] = False

    return pd.concat([df_on, df_off])

# Setup folders function (unchanged)

def setup_folders(base_year: int, end_year: int,ps=False) -> None:
    """Create folders for each season if they don't exist."""
    trail='ps' if ps else ''
    for year in range(base_year, end_year + 1):
        Path(f"data/{year}{trail}").mkdir(parents=True, exist_ok=True)
def get_processed_combinations(year: int, ps=False) -> Set[Tuple[str, str]]:
    trail = 'ps' if ps else ''
    year_dir = Path(f"data/{year}{trail}")
    processed = set()
    if year_dir.exists():
        for file in year_dir.glob("*.csv"):
            nba_id = file.stem
            try:
                df = pd.read_csv(file, usecols=['TeamId'])
                team_ids = df['TeamId'].unique()
                for team_id in team_ids:
                    processed.add((nba_id, str(team_id)))
            except Exception as e:
                logging.error(f"Error reading file {file}: {e}")
    return processed

async def process_player(session, nba_id, team_id, year, is_postseason, seasons, output_file):
    try:
        logging.info(f"Processing {nba_id} - {team_id} for {year}")
        result = await wowy_shift(
            session,
            team_id=team_id,
            player1_id=str(int(nba_id)),
            seasons=seasons,
            ps=is_postseason
        )
        
        if output_file.exists():
            existing_data = pd.read_csv(output_file)
            combined_data = pd.concat([existing_data, result], ignore_index=True)
            combined_data.drop_duplicates().to_csv(output_file, index=False)
        else:
            result.to_csv(output_file, index=False)
        
        return nba_id, str(team_id)
    except Exception as e:
        logging.error(f"Error processing {nba_id} - {team_id} for {year}: {e}")
        return None

async def process_season_data(year: int, is_postseason: bool, index_df: pd.DataFrame, 
                              processed_combinations: Set[Tuple[str, str]]) -> None:
    trail = 'ps' if is_postseason else ''
    season_start = str(year - 1)
    season_end = str(year)
    seasons = [f"{season_start}-{season_end[-2:]}"]
    index_df['nba_id'] = index_df['nba_id'].astype(int)

    async with aiohttp.ClientSession() as session:
        tasks = []
        for nba_id, group in index_df[index_df['year'] == year].groupby('nba_id'):
            output_file = Path(f"data/{int(year)}{trail}/{int(nba_id)}.csv")
            team_ids = group['team_id'].unique()
            
            for team_id in team_ids:
                if (str(nba_id), str(team_id)) not in processed_combinations:
                    task = asyncio.create_task(process_player(session, nba_id, team_id, year, is_postseason, seasons, output_file))
                    tasks.append(task)
        
        results = await asyncio.gather(*tasks)
        for result in results:
            if result:
                processed_combinations.add(result)

async def main():
    # Load data
    try:
        index_reg = pd.read_csv('data/index_master.csv')
        index_reg = index_reg[index_reg.team != 'TOT']
        index_ps = pd.read_csv('data/index_master_ps.csv')
        index_ps = index_ps[index_ps.team != 'TOT']
    except Exception as e:
        logging.error(f"Error loading index files: {e}")
        return

    # Create folders
    setup_folders(2001, 2025)
    setup_folders(2001, 2024, ps=True)

    # Process regular season (2001-2025)
    for year in range(2001, 2026):
        logging.info(f"Processing regular season {year}")
        processed = get_processed_combinations(year)
        await process_season_data(year, False, index_reg, processed)

    # Process postseason (2001-2024)
    for year in range(2001, 2025):
        logging.info(f"Processing postseason {year}")
        processed = get_processed_combinations(year, ps=True)
        await process_season_data(year, True, index_ps, processed)

nest_asyncio.apply()
asyncio.run(main())



NameError: name 'setup_folders' is not defined