In [None]:
import pandas as pd
import requests
import os
import time
from pathlib import Path
import logging
from typing import Set, Tuple
import sys

def wowy_shift(team_id,player1_id,seasons,ps = False, common = False):
    player_id = player1_id

    team_id = team_id
    
    if ps == False:
        s_type = 'Regular Season'
    elif ps == 'all':
        s_type = 'All'
    else:
        s_type = 'Playoffs'
                                  
    wowy_url = "https://api.pbpstats.com/get-wowy-stats/nba"
    wowy_params = {
        "0Exactly1OnFloor": player_id, # Player on
        "TeamId": team_id, # Golden State Warriors
        "Season": ",".join(seasons),
        "SeasonType": s_type,
        "Type": "Player", # Player stats,

    }
    wowy_response = requests.get(wowy_url, params=wowy_params)
   
    wowy = wowy_response.json()
    player_stats_on = wowy["multi_row_table_data"]
    wowy_url = "https://api.pbpstats.com/get-wowy-stats/nba"
    wowy_params = {
        "0Exactly0OnFloor": player_id,# Player on
        "TeamId": team_id, # Golden State Warriors
        "Season":  ",".join(seasons),
        "SeasonType": s_type,
        "Type": "Player", # Player stats,
    }
    #print(seasons)
    time.sleep(.5)
    wowy_response = requests.get(wowy_url, params=wowy_params)
    wowy = wowy_response.json()
    player_stats_off = wowy["multi_row_table_data"]
    #print(rts)
   
    df = pd.DataFrame(player_stats_on)
    df['on'] = True
    df2 = pd.DataFrame(player_stats_off)
    df2['on'] = False

    combo = pd.concat([df,df2])




    return combo
# Set up logging
logging.basicConfig(
    level=logging.INFO,
    format='%(asctime)s - %(levelname)s - %(message)s',
    handlers=[
        logging.FileHandler('wowy_scraper.log'),
        logging.StreamHandler()
    ]
)

def setup_folders(base_year: int, end_year: int,ps=False) -> None:
    """Create folders for each season if they don't exist."""
    trail='ps' if ps else ''
    for year in range(base_year, end_year + 1):
        Path(f"data/{year}{trail}").mkdir(parents=True, exist_ok=True)

def get_processed_combinations(year: int,ps=False) -> Set[Tuple[str, str]]:
    """Get already processed player-team combinations for a given year."""
    trail ='ps' if ps else ''
    year_dir = Path(f"data/{year}{trail}")
    processed = set()
    if year_dir.exists():
        for file in year_dir.glob("*.csv"):
            nba_id = file.stem
            try:
                df = pd.read_csv(file)
                team_ids = df['TeamId'].unique()
                for team_id in team_ids:
                    processed.add((nba_id, str(team_id)))
            except Exception as e:
                logging.error(f"Error reading file {file}: {e}")
    logging.info(processed)
    return processed

def process_season_data(year: int, is_postseason: bool, index_df: pd.DataFrame, 
                       processed_combinations: Set[Tuple[str, str]]) -> None:
    """Process data for a single season."""
    trail = 'ps' if is_postseason else ''
    season_start = str(year - 1)
    season_end = str(year)
    seasons = [f"{season_start}-{season_end[-2:]}"]
    index_df['nba_id']=index_df['nba_id'].astype(int)
    # Group by nba_id to handle multiple teams
    print(len(processed_combinations))
    print(len(index_df[index_df['year'] == year].groupby('nba_id')))
    for nba_id, group in index_df[index_df['year'] == year].groupby('nba_id'):
        
        output_file = Path(f"data/{int(year)}{trail}/{int(nba_id)}.csv")
        
        # Get unique team_ids for this player in this season
        team_ids = group['team_id'].unique()
        
        for team_id in team_ids:
            # Skip if already processed
            if (str(nba_id), str(team_id)) in processed_combinations:
                logging.info(f"Skipping already processed combination: {nba_id} - {team_id} for {year}")
                continue
                
            try:
                logging.info(f"Processing {nba_id} - {team_id} for {year}")
                
                # Call wowy_shift function
                time.sleep(.4)
                result = wowy_shift(
                    team_id=team_id,
                    player1_id=str(int(nba_id)),
                    seasons=seasons,
                    ps=is_postseason
                )
               
                # If file exists, append; if not, create new
                if output_file.exists():
                    existing_data = pd.read_csv(output_file)
                    combined_data = pd.concat([existing_data, result], ignore_index=True)
                    combined_data.drop_duplicates().to_csv(output_file, index=False)
                else:
                    result.to_csv(output_file, index=False)
                
                # Add to processed set
                processed_combinations.add((nba_id, str(team_id)))
       
                
                # Rate limiting
               
                
            except Exception as e:
                logging.error(f"Error processing {nba_id} - {team_id} for {year}: {e}")
                time.sleep(5)
                continue

def main():
    # Load data
    try:
        index_reg = pd.read_csv('data/index_master.csv')
        index_reg=index_reg[index_reg.team!='TOT']
        index_ps = pd.read_csv('data/index_master_ps.csv')
        index_ps=index_ps[index_ps.team!='TOT']

    except Exception as e:
        logging.error(f"Error loading index files: {e}")
        return

    # Create folders
    setup_folders(2001, 2025)
    setup_folders(2001, 2024,ps=True)


    # Process regular season (2001-2025)
    for year in range(2001, 2026):
        logging.info(f"Processing regular season {year}")
        processed = get_processed_combinations(year)
        process_season_data(year, False, index_reg, processed)

    # Process postseason (2001-2024)
    for year in range(2001, 2025):
        logging.info(f"Processing postseason {year}")
        processed = get_processed_combinations(year,ps=True)
        process_season_data(year, True, index_ps, processed)

    


if __name__ == "__main__":
    main()

2025-02-07 14:32:11,548 - INFO - Processing regular season 2001
2025-02-07 14:32:13,488 - INFO - {('1600', '1610612761'), ('275', '1610612752'), ('672', '1610612745'), ('84', '1610612752'), ('1500', '1610612741'), ('136', '1610612766'), ('98', '1610612758'), ('137', '1610612742'), ('2128', '1610612764'), ('1761', '1610612742'), ('349', '1610612752'), ('363', '1610612742'), ('1722', '1610612763'), ('223', '1610612744'), ('1737', '1610612737'), ('1913', '1610612741'), ('369', '1610612752'), ('435', '1610612745'), ('2042', '1610612764'), ('965', '1610612747'), ('1916', '1610612764'), ('697', '1610612741'), ('994', '1610612751'), ('270', '1610612747'), ('1565', '1610612761'), ('902', '1610612739'), ('1630', '1610612765'), ('1504', '1610612744'), ('45', '1610612743'), ('393', '1610612764'), ('103', '1610612750'), ('375', '1610612747'), ('304', '1610612762'), ('1502', '1610612744'), ('358', '1610612756'), ('966', '1610612761'), ('960', '1610612756'), ('452', '1610612760'), ('976', '161061274

490
441


2025-02-07 14:32:13,690 - INFO - Skipping already processed combination: 1564 - 1610612739 for 2001
2025-02-07 14:32:13,691 - INFO - Skipping already processed combination: 1565 - 1610612761 for 2001
2025-02-07 14:32:13,691 - INFO - Skipping already processed combination: 1594 - 1610612738 for 2001
2025-02-07 14:32:13,692 - INFO - Skipping already processed combination: 1594 - 1610612752 for 2001
2025-02-07 14:32:13,693 - INFO - Skipping already processed combination: 1600 - 1610612761 for 2001
2025-02-07 14:32:13,693 - INFO - Skipping already processed combination: 1600 - 1610612765 for 2001
2025-02-07 14:32:13,694 - INFO - Skipping already processed combination: 1607 - 1610612753 for 2001
2025-02-07 14:32:13,694 - INFO - Skipping already processed combination: 1609 - 1610612758 for 2001
2025-02-07 14:32:13,695 - INFO - Skipping already processed combination: 1612 - 1610612744 for 2001
2025-02-07 14:32:13,696 - INFO - Skipping already processed combination: 1630 - 1610612765 for 2001


470
440


2025-02-07 14:32:19,526 - INFO - Skipping already processed combination: 1886 - 1610612754 for 2002
2025-02-07 14:32:19,527 - INFO - Skipping already processed combination: 1887 - 1610612750 for 2002
2025-02-07 14:32:19,529 - INFO - Skipping already processed combination: 1888 - 1610612764 for 2002
2025-02-07 14:32:19,529 - INFO - Skipping already processed combination: 1889 - 1610612739 for 2002
2025-02-07 14:32:19,530 - INFO - Skipping already processed combination: 1890 - 1610612756 for 2002
2025-02-07 14:32:19,531 - INFO - Skipping already processed combination: 1891 - 1610612737 for 2002
2025-02-07 14:32:19,532 - INFO - Skipping already processed combination: 1892 - 1610612739 for 2002
2025-02-07 14:32:19,533 - INFO - Skipping already processed combination: 1894 - 1610612746 for 2002
2025-02-07 14:32:19,533 - INFO - Skipping already processed combination: 1895 - 1610612750 for 2002
2025-02-07 14:32:19,534 - INFO - Skipping already processed combination: 1897 - 1610612741 for 2002


456
428


2025-02-07 14:32:21,716 - INFO - Skipping already processed combination: 2058 - 1610612747 for 2003
2025-02-07 14:32:21,719 - INFO - Skipping already processed combination: 2059 - 1610612742 for 2003
2025-02-07 14:32:21,720 - INFO - Skipping already processed combination: 2060 - 1610612746 for 2003
2025-02-07 14:32:21,721 - INFO - Skipping already processed combination: 2061 - 1610612756 for 2003
2025-02-07 14:32:21,721 - INFO - Skipping already processed combination: 2062 - 1610612744 for 2003
2025-02-07 14:32:21,722 - INFO - Skipping already processed combination: 2063 - 1610612756 for 2003
2025-02-07 14:32:21,722 - INFO - Skipping already processed combination: 2066 - 1610612747 for 2003
2025-02-07 14:32:21,723 - INFO - Skipping already processed combination: 2067 - 1610612748 for 2003
2025-02-07 14:32:21,724 - INFO - Skipping already processed combination: 2068 - 1610612752 for 2003
2025-02-07 14:32:21,725 - INFO - Skipping already processed combination: 2071 - 1610612753 for 2003


517
442


2025-02-07 14:32:23,994 - INFO - Skipping already processed combination: 2063 - 1610612756 for 2004
2025-02-07 14:32:23,994 - INFO - Skipping already processed combination: 2067 - 1610612746 for 2004
2025-02-07 14:32:23,995 - INFO - Skipping already processed combination: 2072 - 1610612749 for 2004
2025-02-07 14:32:23,995 - INFO - Skipping already processed combination: 2073 - 1610612744 for 2004
2025-02-07 14:32:23,996 - INFO - Skipping already processed combination: 2074 - 1610612758 for 2004
2025-02-07 14:32:23,996 - INFO - Skipping already processed combination: 2078 - 1610612759 for 2004
2025-02-07 14:32:23,997 - INFO - Skipping already processed combination: 2079 - 1610612757 for 2004
2025-02-07 14:32:23,997 - INFO - Skipping already processed combination: 2091 - 1610612749 for 2004
2025-02-07 14:32:23,998 - INFO - Skipping already processed combination: 2098 - 1610612747 for 2004
2025-02-07 14:32:23,999 - INFO - Skipping already processed combination: 2109 - 1610612757 for 2004


526
464


2025-02-07 14:32:26,358 - INFO - Skipping already processed combination: 2246 - 1610612762 for 2005
2025-02-07 14:32:26,359 - INFO - Skipping already processed combination: 2248 - 1610612763 for 2005
2025-02-07 14:32:26,359 - INFO - Skipping already processed combination: 2249 - 1610612752 for 2005
2025-02-07 14:32:26,360 - INFO - Skipping already processed combination: 2250 - 1610612746 for 2005
2025-02-07 14:32:26,361 - INFO - Skipping already processed combination: 2254 - 1610612761 for 2005
2025-02-07 14:32:26,361 - INFO - Skipping already processed combination: 2260 - 1610612762 for 2005
2025-02-07 14:32:26,362 - INFO - Skipping already processed combination: 2306 - 1610612762 for 2005
2025-02-07 14:32:26,362 - INFO - Skipping already processed combination: 2306 - 1610612765 for 2005
2025-02-07 14:32:26,363 - INFO - Skipping already processed combination: 2321 - 1610612756 for 2005
2025-02-07 14:32:26,363 - INFO - Skipping already processed combination: 2357 - 1610612747 for 2005


512
458


2025-02-07 14:32:28,760 - INFO - Skipping already processed combination: 2566 - 1610612757 for 2006
2025-02-07 14:32:28,761 - INFO - Skipping already processed combination: 2567 - 1610612747 for 2006
2025-02-07 14:32:28,761 - INFO - Skipping already processed combination: 2568 - 1610612765 for 2006
2025-02-07 14:32:28,762 - INFO - Skipping already processed combination: 2570 - 1610612738 for 2006
2025-02-07 14:32:28,763 - INFO - Skipping already processed combination: 2571 - 1610612756 for 2006
2025-02-07 14:32:28,764 - INFO - Skipping already processed combination: 2572 - 1610612742 for 2006
2025-02-07 14:32:28,764 - INFO - Skipping already processed combination: 2573 - 1610612740 for 2006
2025-02-07 14:32:28,765 - INFO - Skipping already processed combination: 2573 - 1610612745 for 2006
2025-02-07 14:32:28,765 - INFO - Skipping already processed combination: 2574 - 1610612748 for 2006
2025-02-07 14:32:28,766 - INFO - Skipping already processed combination: 2575 - 1610612747 for 2006


487
458


2025-02-07 14:32:31,072 - INFO - Skipping already processed combination: 2774 - 1610612766 for 2007
2025-02-07 14:32:31,072 - INFO - Skipping already processed combination: 2774 - 1610612751 for 2007
2025-02-07 14:32:31,073 - INFO - Skipping already processed combination: 2776 - 1610612761 for 2007
2025-02-07 14:32:31,073 - INFO - Skipping already processed combination: 2779 - 1610612745 for 2007
2025-02-07 14:32:31,074 - INFO - Skipping already processed combination: 2782 - 1610612737 for 2007
2025-02-07 14:32:31,074 - INFO - Skipping already processed combination: 2788 - 1610612742 for 2007
2025-02-07 14:32:31,074 - INFO - Skipping already processed combination: 2804 - 1610612741 for 2007
2025-02-07 14:32:31,075 - INFO - Skipping already processed combination: 2810 - 1610612760 for 2007
2025-02-07 14:32:31,077 - INFO - Skipping already processed combination: 2824 - 1610612760 for 2007
2025-02-07 14:32:31,077 - INFO - Skipping already processed combination: 2853 - 1610612748 for 2007


527
451


2025-02-07 14:32:33,429 - INFO - Skipping already processed combination: 2772 - 1610612747 for 2008
2025-02-07 14:32:33,430 - INFO - Skipping already processed combination: 2788 - 1610612744 for 2008
2025-02-07 14:32:33,430 - INFO - Skipping already processed combination: 2788 - 1610612747 for 2008
2025-02-07 14:32:33,431 - INFO - Skipping already processed combination: 2804 - 1610612741 for 2008
2025-02-07 14:32:33,431 - INFO - Skipping already processed combination: 2810 - 1610612763 for 2008
2025-02-07 14:32:33,432 - INFO - Skipping already processed combination: 2852 - 1610612749 for 2008
2025-02-07 14:32:33,433 - INFO - Skipping already processed combination: 2853 - 1610612748 for 2008
2025-02-07 14:32:33,433 - INFO - Skipping already processed combination: 2857 - 1610612746 for 2008
2025-02-07 14:32:33,434 - INFO - Skipping already processed combination: 2863 - 1610612760 for 2008
2025-02-07 14:32:33,434 - INFO - Skipping already processed combination: 2867 - 1610612748 for 2008


48
445


2025-02-07 14:32:36,502 - INFO - Skipping already processed combination: 979 - 1610612761 for 2009
2025-02-07 14:32:36,503 - INFO - Skipping already processed combination: 979 - 1610612748 for 2009
2025-02-07 14:32:36,504 - INFO - Skipping already processed combination: 980 - 1610612739 for 2009
2025-02-07 14:32:36,505 - INFO - Skipping already processed combination: 990 - 1610612752 for 2009
2025-02-07 14:32:36,505 - INFO - Skipping already processed combination: 990 - 1610612760 for 2009
2025-02-07 14:32:36,506 - INFO - Skipping already processed combination: 1088 - 1610612743 for 2009
2025-02-07 14:32:36,506 - INFO - Skipping already processed combination: 1088 - 1610612760 for 2009
2025-02-07 14:32:36,507 - INFO - Skipping already processed combination: 1112 - 1610612739 for 2009
2025-02-07 14:32:36,508 - INFO - Skipping already processed combination: 1477 - 1610612759 for 2009
2025-02-07 14:32:36,509 - INFO - Skipping already processed combination: 1495 - 1610612759 for 2009
2025-