In [4]:
from nba_api.stats.static import players,teams
import pandas as pd
import requests
import sys
import os
import time
from datetime import datetime

def format_date_to_url(date):
    # Convert date from YYYYMMDD to datetime object
    date_obj = datetime.strptime(str(date), '%Y%m%d')
    
    # Format the date as MM%2FDD%2FYYYY
    formatted_date = date_obj.strftime('%m%%2F%d%%2F%Y')
    
    return formatted_date

# Example usage

def pull_data(url):


    headers = {
                                    "Host": "stats.nba.com",
                                    "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:72.0) Gecko/20100101 Firefox/72.0",
                                    "Accept": "application/json, text/plain, */*",
                                    "Accept-Language": "en-US,en;q=0.5",
                                    "Accept-Encoding": "gzip, deflate, br",

                                    "Connection": "keep-alive",
                                    "Referer": "https://stats.nba.com/"
                                }
    json = requests.get(url,headers = headers).json()

    if len(json["resultSets"])== 1:

        
        data = json["resultSets"][0]["rowSet"]
        #print(data)
        columns = json["resultSets"][0]["headers"]
        #print(columns)
        
        df = pd.DataFrame.from_records(data, columns=columns)
    else:

        data = json["resultSets"]["rowSet"]
        #print(json)
        columns = json["resultSets"]["headers"][1]['columnNames']
        #print(columns)
        df = pd.DataFrame.from_records(data, columns=columns)

    time.sleep(.5)
    return df


def pull_game_level(dates, unit='Player'):
    frames = []
    shotcolumns = ['FGA_FREQUENCY', 'FGM', 'FGA', 'FG_PCT', 'EFG_PCT', 'FG2A_FREQUENCY', 'FG2M', 'FG2A', 'FG2_PCT', 
                   'FG3A_FREQUENCY', 'FG3M', 'FG3A', 'FG3_PCT']
    
    for year in range(2021, 2025):
        year_frame = []
        year_dates = [date for date in dates if date > (year * 10000) and date < ((year + 1) * 10000)]
        season = str(year - 1) + '-' + str(year)[-2:]
        
        for date in year_dates:
            date_num = int(date)
            date = format_date_to_url(date)

            url = f'https://stats.nba.com/stats/leaguedashplayerstats?College=&Conference=&Country=&DateFrom={date}&DateTo={date}&Division=&DraftPick=&DraftYear=&GameScope=&GameSegment=&Height=&ISTRound=&LastNGames=0&LeagueID=00&Location=&MeasureType=Base&Month=0&OpponentTeamID=0&Outcome=&PORound=&PaceAdjust=N&PerMode=Totals&Period=0&PlayerExperience=&PlayerPosition=&PlusMinus=N&Rank=N&Season={season}&SeasonSegment=&SeasonType=Playoffs&ShotClockRange=&StarterBench=&TeamID=0&VsConference=&VsDivision=&Weight='
            df = pull_data(url)

            url2 = f'https://stats.nba.com/stats/leaguedashplayerstats?College=&Conference=&Country=&DateFrom={date}&DateTo={date}&Division=&DraftPick=&DraftYear=&GameScope=&GameSegment=&Height=&ISTRound=&LastNGames=0&LeagueID=00&Location=&MeasureType=Advanced&Month=0&OpponentTeamID=0&Outcome=&PORound=&PaceAdjust=N&PerMode=Totals&Period=0&PlayerExperience=&PlayerPosition=&PlusMinus=N&Rank=N&Season={season}&SeasonSegment=&SeasonType=Playoffs&ShotClockRange=&StarterBench=&TeamID=0&VsConference=&VsDivision=&Weight='
            df2 = pull_data(url2)

            url3 = f'https://stats.nba.com/stats/leaguedashptstats?College=&Conference=&Country=&DateFrom={date}&DateTo={date}&Division=&DraftPick=&DraftYear=&GameScope=&Height=&ISTRound=&LastNGames=0&LeagueID=00&Location=&Month=0&OpponentTeamID=0&Outcome=&PORound=&PerMode=Totals&PlayerExperience=&PlayerOrTeam={unit}&PlayerPosition=&PtMeasureType=Passing&Season={season}&SeasonSegment=&SeasonType=Playoffs&StarterBench=&TeamID=0&VsConference=&VsDivision=&Weight='
            df3 = pull_data(url3)

            url4 = f'https://stats.nba.com/stats/leaguedashptstats?College=&Conference=&Country=&DateFrom={date}&DateTo={date}&Division=&DraftPick=&DraftYear=&GameScope=&Height=&ISTRound=&LastNGames=0&LeagueID=00&Location=&Month=0&OpponentTeamID=0&Outcome=&PORound=&PerMode=Totals&PlayerExperience=&PlayerOrTeam={unit}&PlayerPosition=&PtMeasureType=Drives&Season={season}&SeasonSegment=&SeasonType=Playoffs&StarterBench=&TeamID=0&VsConference=&VsDivision=&Weight='
            df4 = pull_data(url4)

            url5 = f'https://stats.nba.com/stats/leaguedashptstats?College=&Conference=&Country=&DateFrom={date}&DateTo={date}&Division=&DraftPick=&DraftYear=&GameScope=&Height=&ISTRound=&LastNGames=0&LeagueID=00&Location=&Month=0&OpponentTeamID=0&Outcome=&PORound=&PerMode=Totals&PlayerExperience=&PlayerOrTeam={unit}&PlayerPosition=&PtMeasureType=Possessions&Season={season}&SeasonSegment=&SeasonType=Playoffs&StarterBench=&TeamID=0&VsConference=&VsDivision=&Weight='
            df5 = pull_data(url5)

            url6 = f'https://stats.nba.com/stats/leaguedashptstats?College=&Conference=&Country=&DateFrom={date}&DateTo={date}&Division=&DraftPick=&DraftYear=&GameScope=&Height=&ISTRound=&LastNGames=0&LeagueID=00&Location=&Month=0&OpponentTeamID=0&Outcome=&PORound=&PerMode=Totals&PlayerExperience=&PlayerOrTeam={unit}&PlayerPosition=&PtMeasureType=Rebounding&Season={season}&SeasonSegment=&SeasonType=Playoffs&StarterBench=&TeamID=0&VsConference=&VsDivision=&Weight='
            df6 = pull_data(url6)

            url7 = f'https://stats.nba.com/stats/leaguedashplayerptshot?CloseDefDistRange=0-2%20Feet%20-%20Very%20Tight&College=&Conference=&Country=&DateFrom={date}&DateTo={date}&Division=&DraftPick=&DraftYear=&DribbleRange=&GameScope=&GameSegment=&GeneralRange=&Height=&LastNGames=0&LeagueID=00&Location=&Month=0&OpponentTeamID=0&Outcome=&PORound=&PerMode=Totals&Period=0&PlayerExperience=&PlayerPosition=&Season={season}&SeasonSegment=&SeasonType=Playoffs&ShotClockRange=&ShotDistRange=&StarterBench=&TeamID=0&TouchTimeRange=&VsConference=&VsDivision=&Weight='
            df7 = pull_data(url7)

            term = 'very_tight_'
            df7.rename(columns={col: term + col for col in shotcolumns}, inplace=True)
            
            url8 = 'https://stats.nba.com/stats/leaguedashplayerptshot?CloseDefDistRange=2-4%20Feet%20-%20Tight&College=&Conference=&Country=&DateFrom=' + date + '&DateTo=' + date + '&Division=&DraftPick=&DraftYear=&DribbleRange=&GameScope=&GameSegment=&GeneralRange=&Height=&LastNGames=0&LeagueID=00&Location=&Month=0&OpponentTeamID=0&Outcome=&PORound=&PerMode=Totals&Period=0&PlayerExperience=&PlayerPosition=&Season=' + season + '&SeasonSegment=&SeasonType=Playoffs&ShotClockRange=&ShotDistRange=&StarterBench=&TeamID=0&TouchTimeRange=&VsConference=&VsDivision=&Weight='
            df8 = pull_data(url8)
            term = 'tight_'
            df8.rename(columns={col: term + col for col in shotcolumns},inplace=True)

            url9 = 'https://stats.nba.com/stats/leaguedashplayerptshot?CloseDefDistRange=4-6%20Feet%20-%20Open&College=&Conference=&Country=&DateFrom=' + date + '&DateTo=' + date + '&Division=&DraftPick=&DraftYear=&DribbleRange=&GameScope=&GameSegment=&GeneralRange=&Height=&LastNGames=0&LeagueID=00&Location=&Month=0&OpponentTeamID=0&Outcome=&PORound=&PerMode=Totals&Period=0&PlayerExperience=&PlayerPosition=&Season=' + season + '&SeasonSegment=&SeasonType=Playoffs&ShotClockRange=&ShotDistRange=&StarterBench=&TeamID=0&TouchTimeRange=&VsConference=&VsDivision=&Weight='
            df9 = pull_data(url9)
            term = 'open_'
            df9.rename(columns={col: term + col for col in shotcolumns},inplace=True)

            url10 = 'https://stats.nba.com/stats/leaguedashplayerptshot?CloseDefDistRange=6%2B%20Feet%20-%20Wide%20Open&College=&Conference=&Country=&DateFrom=' + date + '&DateTo=' + date + '&Division=&DraftPick=&DraftYear=&DribbleRange=&GameScope=&GameSegment=&GeneralRange=&Height=&LastNGames=0&LeagueID=00&Location=&Month=0&OpponentTeamID=0&Outcome=&PORound=&PerMode=Totals&Period=0&PlayerExperience=&PlayerPosition=&Season=' + season + '&SeasonSegment=&SeasonType=Playoffs&ShotClockRange=&ShotDistRange=&StarterBench=&TeamID=0&TouchTimeRange=&VsConference=&VsDivision=&Weight='
            df10 = pull_data(url10)
            term = 'wide_open_'
            df10.rename(columns={col: term + col for col in shotcolumns},inplace=True)
            url11 = 'https://stats.nba.com/stats/leaguedashptstats?College=&Conference=&Country=&DateFrom=' + date + '&DateTo=' + date + '&Division=&DraftPick=&DraftYear=&GameScope=&Height=&ISTRound=&LastNGames=0&LeagueID=00&Location=&Month=0&OpponentTeamID=0&Outcome=&PORound=&PerMode=Totals&PlayerExperience=&PlayerOrTeam=Player&PlayerPosition=&PtMeasureType=PullUpShot&Season=' + season + '&SeasonSegment=&SeasonType=Playoffs&StarterBench=&TeamID=0&VsConference=&VsDivision=&Weight='
            df11 = pull_data(url11) 
            shotcolumns2=shotcolumns+['EFG%']
            term='pullup_'
            df11.rename(columns={col: term + col for col in shotcolumns2},inplace=True)

            url12 = 'https://stats.nba.com/stats/leaguedashptstats?College=&Conference=&Country=&DateFrom=' + date + '&DateTo=' + date + '&Division=&DraftPick=&DraftYear=&GameScope=&Height=&ISTRound=&LastNGames=0&LeagueID=00&Location=&Month=0&OpponentTeamID=0&Outcome=&PORound=&PerMode=Totals&PlayerExperience=&PlayerOrTeam=Player&PlayerPosition=&PtMeasureType=Efficiency&Season=' + season + '&SeasonSegment=&SeasonType=Playoffs&StarterBench=&TeamID=0&VsConference=&VsDivision=&Weight='


            df12 = pull_data(url12) 
            frames = [df2, df3, df4, df5, df6, df7, df8, df9, df10,df11,df12]
            for frame in frames:
                
                joined_columns = set(frame.columns) - set(df.columns)
                joined_columns = list(joined_columns)
                joined_columns.append('PLAYER_ID')
                frame = frame[joined_columns]

                df = df.merge(frame, on='PLAYER_ID',how='left').reset_index(drop=True)

            df['year'] = year
            df['date']=date_num
            print(date)

            year_frame.append(df)

        yeardata=pd.concat(year_frame)
        yeardata.to_csv(str(year)+'.csv',index=False)
        frames.append(yeardata)
        print(f"Year: {year}")

    total = pd.concat(frames)
    return total


def pull_game_level_classic(dates, start_year,end_year,unit='Player'):
    frames = []
    shotcolumns = ['FGA_FREQUENCY', 'FGM', 'FGA', 'FG_PCT', 'EFG_PCT', 'FG2A_FREQUENCY', 'FG2M', 'FG2A', 'FG2_PCT', 
                   'FG3A_FREQUENCY', 'FG3M', 'FG3A', 'FG3_PCT']
    
    for year in range(start_year, end_year+1):
        year_frame = []
        year_dates = [date for date in dates if str(date)[0:4]==str(year)]
        season = str(year - 1) + '-' + str(year)[-2:]
        
        for date in year_dates:
            date_num = int(date)
            date = format_date_to_url(date)

            url = f'https://stats.nba.com/stats/leaguedashplayerstats?College=&Conference=&Country=&DateFrom={date}&DateTo={date}&Division=&DraftPick=&DraftYear=&GameScope=&GameSegment=&Height=&ISTRound=&LastNGames=0&LeagueID=00&Location=&MeasureType=Base&Month=0&OpponentTeamID=0&Outcome=&PORound=&PaceAdjust=N&PerMode=Totals&Period=0&PlayerExperience=&PlayerPosition=&PlusMinus=N&Rank=N&Season={season}&SeasonSegment=&SeasonType=Playoffs&ShotClockRange=&StarterBench=&TeamID=0&VsConference=&VsDivision=&Weight='
            df = pull_data(url)

            url2 = f'https://stats.nba.com/stats/leaguedashplayerstats?College=&Conference=&Country=&DateFrom={date}&DateTo={date}&Division=&DraftPick=&DraftYear=&GameScope=&GameSegment=&Height=&ISTRound=&LastNGames=0&LeagueID=00&Location=&MeasureType=Advanced&Month=0&OpponentTeamID=0&Outcome=&PORound=&PaceAdjust=N&PerMode=Totals&Period=0&PlayerExperience=&PlayerPosition=&PlusMinus=N&Rank=N&Season={season}&SeasonSegment=&SeasonType=Playoffs&ShotClockRange=&StarterBench=&TeamID=0&VsConference=&VsDivision=&Weight='
            df2 = pull_data(url2)


            url3=f"https://stats.nba.com/stats/leaguedashplayershotlocations?College=&Conference=&Country=&DateFrom={date}&DateTo={date}&DistanceRange=By%20Zone&Division=&DraftPick=&DraftYear=&GameScope=&GameSegment=&Height=&ISTRound=&LastNGames=0&Location=&MeasureType=Base&Month=0&OpponentTeamID=0&Outcome=&PORound=0&PaceAdjust=N&PerMode=Totals&Period=0&PlayerExperience=&PlayerPosition=&PlusMinus=N&Rank=N&Season={season}&SeasonSegment=&SeasonType=Playoffs&ShotClockRange=&StarterBench=&TeamID=0&VsConference=&VsDivision=&Weight="
            df3=pull_data(url3)

            zone_columns=['PLAYER_ID', 'PLAYER_NAME', 'TEAM_ID', 'TEAM_ABBREVIATION', 'AGE', 'NICKNAME',
             'RA_FGM', 'RA_FGA', 'RA_FG_PCT',               # Restricted Area
             'ITP_FGM', 'ITP_FGA', 'ITP_FG_PCT',             # In The Paint (Non-RA)
             'MID_FGM', 'MID_FGA', 'MID_FG_PCT',             # Mid Range
             'LEFT_CORNER_3_FGM', 'LEFT_CORNER_3_FGA', 'LEFT_CORNER_3_FG_PCT',  # Left Corner 3
             'RIGHT_CORNER_3_FGM', 'RIGHT_CORNER_3_FGA', 'RIGHT_CORNER_3_FG_PCT', # Right Corner 3
      

                           # All Corner 3s
             'ABOVE_BREAK_3_FGM', 'ABOVE_BREAK_3_FGA', 'ABOVE_BREAK_3_FG_PCT', 
                   'BACKCOURT_FGM', 'BACKCOURT_FGA', 'BACKCOURT_FG_PCT', # Right Corner 3
                          
                          'CORNER_3_FGM', 'CORNER_3_FGA', 'CORNER_3_FG_PCT'  ]  # Above the Break 3
            df3.columns=zone_columns



            url4=f"https://stats.nba.com/stats/leaguedashplayershotlocations?College=&Conference=&Country=&DateFrom={date}&DateTo={date}&DistanceRange=5ft%20Range&Division=&DraftPick=&DraftYear=&GameScope=&GameSegment=&Height=&ISTRound=&LastNGames=0&Location=&MeasureType=Base&Month=0&OpponentTeamID=0&Outcome=&PORound=0&PaceAdjust=N&PerMode=Totals&Period=0&PlayerExperience=&PlayerPosition=&PlusMinus=N&Rank=N&Season={season}&SeasonSegment=&SeasonType=Playoffs&ShotClockRange=&StarterBench=&TeamID=0&VsConference=&VsDivision=&Weight="
            df4=pull_data(url4)
            df4.columns=['PLAYER_ID', 'PLAYER_NAME', 'TEAM_ID', 'TEAM_ABBR', 'AGE', 'NICKNAME',
             'FGM_LT_5', 'FGA_LT_5', 'FGP_LT_5',      # Less than 5 feet
             'FGM_5_9', 'FGA_5_9', 'FGP_5_9',         # 5-9 feet
             'FGM_10_14', 'FGA_10_14', 'FGP_10_14',   # 10-14 feet
             'FGM_15_19', 'FGA_15_19', 'FGP_15_19',   # 15-19 feet
             'FGM_20_24', 'FGA_20_24', 'FGP_20_24',   # 20-24 feet
             'FGM_25_29', 'FGA_25_29', 'FGP_25_29',   # 25-29 feet
             'FGM_30_34', 'FGA_30_34', 'FGP_30_34',   # 30-34 feet
             'FGM_35_39', 'FGA_35_39', 'FGP_35_39',   # 35-39 feet
             'FGM_40_PLUS', 'FGA_40_PLUS', 'FGP_40_PLUS'  # 40+ feet
            ]
         
            frames = [df2, df3, df4]
            for frame in frames:
                
                joined_columns = set(frame.columns) - set(df.columns)
                joined_columns = list(joined_columns)
                joined_columns.append('PLAYER_ID')
                frame = frame[joined_columns]

                df = df.merge(frame, on='PLAYER_ID',how='left').reset_index(drop=True)

            df['year'] = year
            df['date']=date_num
            print(date)

            year_frame.append(df)

        yeardata=pd.concat(year_frame)
        yeardata.to_csv(str(year)+'.csv',index=False)
        frames.append(yeardata)
        print(f"Year: {year}")

    total = pd.concat(frames)
    return total

def get_dates():
    dates=[]
    for year in range(1996,2014):
    
        for team in teams.get_teams():
            team_id=team['id']
            path = '../team/'+str(year)+'ps/'+str(team_id)+'.csv'
            if os.path.exists(path):
                df=pd.read_csv(path)
    
                df=df[['PLAYER_ID','HTM','VTM','GAME_DATE']]
                df.drop_duplicates(inplace=True)
                dates.append(df)
    return pd.concat(dates)

dateframe=get_dates()
dates=dateframe['GAME_DATE'].unique().tolist()

start_year=1997
end_year=2013
df= pull_game_level_team(dates,start_year,end_year)
#data=pull_game_level(dates)


04%2F25%2F1997
04%2F27%2F1997
04%2F29%2F1997
05%2F02%2F1997
05%2F04%2F1997
05%2F06%2F1997
05%2F08%2F1997
05%2F10%2F1997
05%2F11%2F1997
05%2F13%2F1997
04%2F30%2F1997
05%2F20%2F1997
05%2F22%2F1997
05%2F24%2F1997
05%2F26%2F1997
05%2F28%2F1997
06%2F01%2F1997
06%2F04%2F1997
06%2F06%2F1997
06%2F08%2F1997
06%2F11%2F1997
06%2F13%2F1997
04%2F24%2F1997
04%2F26%2F1997
05%2F05%2F1997
05%2F07%2F1997
05%2F09%2F1997
05%2F15%2F1997
05%2F17%2F1997
05%2F19%2F1997
05%2F21%2F1997
05%2F23%2F1997
05%2F25%2F1997
05%2F27%2F1997
05%2F29%2F1997
04%2F28%2F1997
05%2F12%2F1997
05%2F01%2F1997
05%2F14%2F1997
05%2F16%2F1997
05%2F18%2F1997
05%2F03%2F1997
Year: 1997
04%2F23%2F1998
04%2F25%2F1998
04%2F28%2F1998
05%2F01%2F1998
04%2F27%2F1998
04%2F30%2F1998
04%2F24%2F1998
04%2F26%2F1998
04%2F29%2F1998
05%2F03%2F1998
05%2F06%2F1998
05%2F08%2F1998
05%2F10%2F1998
05%2F13%2F1998
05%2F17%2F1998
05%2F19%2F1998
05%2F23%2F1998
05%2F25%2F1998
05%2F27%2F1998
05%2F29%2F1998
05%2F31%2F1998
06%2F03%2F1998
06%2F05%2F1998
06%2F07%2F1998

In [None]:
dates[-1]