In [None]:
import pandas as pd
import concurrent.futures
from functools import partial
import requests
from requests.adapters import HTTPAdapter
from requests.packages.urllib3.util.retry import Retry
import time
import pandas as pd
import concurrent.futures
from functools import partial
import requests
from requests.adapters import HTTPAdapter
from urllib3.util.retry import Retry
import time
from tqdm import tqdm


import os
def create_session():
    session = requests.Session()
    retries = Retry(total=5, backoff_factor=0.1, status_forcelist=[500, 502, 503, 504])
    session.mount('https://', HTTPAdapter(max_retries=retries))
    return session

def playerpull(session, player_id, team_id, season, opp=False, ps=False):
    term = "Opponent" if opp else "Team"
    s_type = "Playoffs" if ps else "Regular Season"
    
    wowy_url = "https://api.pbpstats.com/get-wowy-stats/nba"
    
    params_on = {
        "0Exactly1OnFloor": player_id,
        "TeamId": team_id,
        "Season": season,
        "SeasonType": s_type,
        "Type": term
    }
    
    params_off = {
        "0Exactly0OnFloor": player_id,
        "TeamId": team_id,
        "Season": season,
        "SeasonType": s_type,
        "Type": term
    }
    
    try:
        response_on = session.get(wowy_url, params=params_on, timeout=20)
        response_on.raise_for_status()
        player_stats_on = response_on.json()["single_row_table_data"]
        
        response_off = session.get(wowy_url, params=params_off, timeout=20)
        response_off.raise_for_status()
        player_stats_off = response_off.json()["single_row_table_data"]
        print('Row')
        time.sleep(.5)
        
        # Instead of creating dataframes and concatenating, we'll return the raw data
        return player_stats_on, player_stats_off
    except (requests.exceptions.RequestException, KeyError) as e:
        print(f"Error processing player {player_id}: {str(e)}")
        return None, None

def process_player(session, player_id, team_id, year, opp, ps):
    try:
        season = f"{year-1}-{str(year)[-2:]}"
        stats_on, stats_off = playerpull(session, player_id, team_id, season, opp=opp, ps=ps)
        if stats_on is not None and stats_off is not None:
            # Add the additional fields to both stat dictionaries
            for stats in [stats_on, stats_off]:
                stats['player_id'] = player_id
                stats['team_id'] = team_id
                stats['year'] = year
                stats['season'] = season
                stats['player_vs'] = opp
            
            stats_on['player_on'] = True
            stats_off['player_on'] = False
            
            # Return a list of two dictionaries instead of a dataframe
            return [stats_on, stats_off]
        return None
    except Exception as e:
        print(f"Error processing player {player_id}: {str(e)}")
        return None

def pull_onoff(years, opp=False, ps=False):
    # ... (previous code remains the same)
    opp_string = 'vs' if opp else ''
    pstring = 'ps' if ps else ''
    
    player_index = pd.read_csv('https://raw.githubusercontent.com/gabriel1200/shot_data/master/player_index'+pstring+'.csv')
    player_index = player_index[player_index.year > 2000]
    
    session = create_session()
    
    for year in years:
        season_index = player_index[player_index.year == year].reset_index(drop=True)
        old_file = (str(year))+opp_string+pstring+'.csv'
        print(old_file)
        if os.path.exists(old_file):
            old_df = pd.read_csv(old_file)
            old_df['total_id'] = old_df['player_id']+old_df['team_id']
            season_index['total_id']  = season_index['PLAYER_ID']+season_index['TEAM_ID']
            total_ids = old_df['total_id'].tolist()
            season_index = season_index[~season_index.total_id.isin(total_ids)]
            season_index.drop(columns='total_id',inplace=True)
        print(f"Processing {len(season_index)} players for year {year}")
        
        process_func = partial(process_player, session, year=year, opp=opp, ps=ps)
        # ... (previous code remains the same)
        
        with concurrent.futures.ThreadPoolExecutor(max_workers=12) as executor:
            futures = [executor.submit(process_func, row['PLAYER_ID'], row['TEAM_ID']) 
                       for _, row in season_index.iterrows()]
            
            all_stats = []
            for future in concurrent.futures.as_completed(futures):
                result = future.result()
                if result is not None:
                    all_stats.extend(result)
        
        if all_stats:
            # Create a dataframe from all collected stats at once
            year_frame = pd.DataFrame(all_stats)
            if os.path.exists(old_file):
                old_df =pd.read_csv(old_file)
                year_frame = pd.concat([old_df,year_frame])
            print(f'Year {year} completed, saving...')
            year_frame.to_csv(f"{year}{opp_string}{pstring}.csv", index=False)
        else:
            print(f"No data to save for year {year}")


if __name__ == "__main__":
    years = list(range(2007, 2018))
    pull_onoff(years, opp=True, ps=False)

  player_index = pd.read_csv('https://raw.githubusercontent.com/gabriel1200/shot_data/master/player_index'+pstring+'.csv')


2007vs.csv
Processing 89 players for year 2007
Row
Row
Row
Row
Row
Row
Row
Row
Row
Row
Row
Row
Row
Row
Row
Row
Row
Row
Row
Row
Row
Row
Row
Row
Row
Row
Row
Row
Row
Row
Row
Row
Row
Row
Row
Row
Row
Row
Row
Row
Row
Row
Row
Row
Row
Row
Row
Row
Row
Row
Row
Row
Row
Row
Row
Row
Row
Row
Row
Row
Row
Row
Row
Row
Row
Row
Row
Row
Row
Row
Row
Row
Row
Row
Row
Row
Row
Row
Row
Row
Row
Row
Row
Row
Row
Row
Row
Row
Row
Year 2007 completed, saving...
2008vs.csv
Processing 60 players for year 2008
Row
Row
Row
Row
Row
Row
Row
Row
Row
Row
Row
Row
Row
Row
Row
Row
Row
Row
Row
Row
Row
Row
Row
Row
Row
Row
Row
Row
Row
Row
Row
Row
Row
Row
Row
Row
Row
Row
Row
Row
Row
Row
Row
Row
Row
Row
Row
Row
Row
Row
Row
Row
Row
Row
Row
Row
Row
Row
Row
Row
Year 2008 completed, saving...
2009vs.csv
Processing 42 players for year 2009
Row
Row
Row
Row
Row
Row
Row
Row
Row
Row
Row
Row
Row
Row
Row
Row
Row
Row
Row
Row
Row
Row
Row
Row
Row
Row
Row
Row
Row
Row
Row
Row
Row
Row
Row
Row
Row
Row
Row
Row
Row
Row
Year 2009 completed, saving...
20