In [None]:
from nba_api.stats.static import players,teams
import pandas as pd
import requests
import sys
import os
import time
from datetime import datetime

def format_date_to_url(date):
    # Convert date from YYYYMMDD to datetime object
    date_obj = datetime.strptime(str(date), '%Y%m%d')
    
    # Format the date as MM%2FDD%2FYYYY
    formatted_date = date_obj.strftime('%m%%2F%d%%2F%Y')
    
    return formatted_date

# Example usage

def pull_data(url):


    headers = {
                                    "Host": "stats.nba.com",
                                    "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:72.0) Gecko/20100101 Firefox/72.0",
                                    "Accept": "application/json, text/plain, */*",
                                    "Accept-Language": "en-US,en;q=0.5",
                                    "Accept-Encoding": "gzip, deflate, br",

                                    "Connection": "keep-alive",
                                    "Referer": "https://stats.nba.com/"
                                }
    json = requests.get(url,headers = headers).json()
    if len(json["resultSets"])== 1:

        
        data = json["resultSets"][0]["rowSet"]
        #print(data)
        columns = json["resultSets"][0]["headers"]
        #print(columns)
        
        df = pd.DataFrame.from_records(data, columns=columns)
    else:

        data = json["resultSets"]["rowSet"]
        #print(json)
        columns = json["resultSets"]["headers"][1]['columnNames']
        #print(columns)
        df = pd.DataFrame.from_records(data, columns=columns)

    time.sleep(.5)
    return df


def pull_game_level(dates):
    frames = []
    shotcolumns=[ 'FGA_FREQUENCY',
       'FGM', 'FGA', 'FG_PCT', 'EFG_PCT', 'FG2A_FREQUENCY', 'FG2M', 'FG2A',
       'FG2_PCT', 'FG3A_FREQUENCY', 'FG3M', 'FG3A', 'FG3_PCT']
    for year in range(2021, 2025):
        year_frame=[]
        year_dates = [date for date in dates if date > (year * 10000) and date < ((year + 1) * 10000)]

        
        
        season = str(year - 1) + '-' + str(year)[-2:]
        
        for date in year_dates:
            date_num=int(date)
            date=format_date_to_url(date)
            url = 'https://stats.nba.com/stats/leaguedashplayerstats?College=&Conference=&Country=&DateFrom=' + date + '&DateTo=' + date + '&Division=&DraftPick=&DraftYear=&GameScope=&GameSegment=&Height=&ISTRound=&LastNGames=0&LeagueID=00&Location=&MeasureType=Base&Month=0&OpponentTeamID=0&Outcome=&PORound=&PaceAdjust=N&PerMode=Totals&Period=0&PlayerExperience=&PlayerPosition=&PlusMinus=N&Rank=N&Season=' + season + '&SeasonSegment=&SeasonType=Playoffs&ShotClockRange=&StarterBench=&TeamID=0&VsConference=&VsDivision=&Weight='
            df = pull_data(url)

            url2 = 'https://stats.nba.com/stats/leaguedashplayerstats?College=&Conference=&Country=&DateFrom=' + date + '&DateTo=' + date + '&Division=&DraftPick=&DraftYear=&GameScope=&GameSegment=&Height=&ISTRound=&LastNGames=0&LeagueID=00&Location=&MeasureType=Advanced&Month=0&OpponentTeamID=0&Outcome=&PORound=&PaceAdjust=N&PerMode=Totals&Period=0&PlayerExperience=&PlayerPosition=&PlusMinus=N&Rank=N&Season=' + season + '&SeasonSegment=&SeasonType=Playoffs&ShotClockRange=&StarterBench=&TeamID=0&VsConference=&VsDivision=&Weight='
            df2 = pull_data(url2)

            url3 = 'https://stats.nba.com/stats/leaguedashptstats?College=&Conference=&Country=&DateFrom=' + date + '&DateTo=' + date + '&Division=&DraftPick=&DraftYear=&GameScope=&Height=&ISTRound=&LastNGames=0&LeagueID=00&Location=&Month=0&OpponentTeamID=0&Outcome=&PORound=&PerMode=Totals&PlayerExperience=&PlayerOrTeam=Player&PlayerPosition=&PtMeasureType=Passing&Season=' + season + '&SeasonSegment=&SeasonType=Playoffs&StarterBench=&TeamID=0&VsConference=&VsDivision=&Weight='
            df3 = pull_data(url3)

            url4 = 'https://stats.nba.com/stats/leaguedashptstats?College=&Conference=&Country=&DateFrom=' + date + '&DateTo=' + date + '&Division=&DraftPick=&DraftYear=&GameScope=&Height=&ISTRound=&LastNGames=0&LeagueID=00&Location=&Month=0&OpponentTeamID=0&Outcome=&PORound=&PerMode=Totals&PlayerExperience=&PlayerOrTeam=Player&PlayerPosition=&PtMeasureType=Drives&Season=' + season + '&SeasonSegment=&SeasonType=Playoffs&StarterBench=&TeamID=0&VsConference=&VsDivision=&Weight='
            df4 = pull_data(url4)

            url5 = 'https://stats.nba.com/stats/leaguedashptstats?College=&Conference=&Country=&DateFrom=' + date + '&DateTo=' + date + '&Division=&DraftPick=&DraftYear=&GameScope=&Height=&ISTRound=&LastNGames=0&LeagueID=00&Location=&Month=0&OpponentTeamID=0&Outcome=&PORound=&PerMode=Totals&PlayerExperience=&PlayerOrTeam=Player&PlayerPosition=&PtMeasureType=Possessions&Season=' + season + '&SeasonSegment=&SeasonType=Playoffs&StarterBench=&TeamID=0&VsConference=&VsDivision=&Weight='
            df5 = pull_data(url5)

            url6 = 'https://stats.nba.com/stats/leaguedashptstats?College=&Conference=&Country=&DateFrom=' + date + '&DateTo=' + date + '&Division=&DraftPick=&DraftYear=&GameScope=&Height=&ISTRound=&LastNGames=0&LeagueID=00&Location=&Month=0&OpponentTeamID=0&Outcome=&PORound=&PerMode=Totals&PlayerExperience=&PlayerOrTeam=Player&PlayerPosition=&PtMeasureType=Rebounding&Season=' + season + '&SeasonSegment=&SeasonType=Playoffs&StarterBench=&TeamID=0&VsConference=&VsDivision=&Weight='
            df6 = pull_data(url6)

            url7 = 'https://stats.nba.com/stats/leaguedashplayerptshot?CloseDefDistRange=0-2%20Feet%20-%20Very%20Tight&College=&Conference=&Country=&DateFrom=' + date + '&DateTo=' + date + '&Division=&DraftPick=&DraftYear=&DribbleRange=&GameScope=&GameSegment=&GeneralRange=&Height=&LastNGames=0&LeagueID=00&Location=&Month=0&OpponentTeamID=0&Outcome=&PORound=&PerMode=Totals&Period=0&PlayerExperience=&PlayerPosition=&Season=' + season + '&SeasonSegment=&SeasonType=Playoffs&ShotClockRange=&ShotDistRange=&StarterBench=&TeamID=0&TouchTimeRange=&VsConference=&VsDivision=&Weight='
            df7 = pull_data(url7)
            term = 'very_tight_'
            df7.rename(columns={col: term + col for col in shotcolumns},inplace=True)

            url8 = 'https://stats.nba.com/stats/leaguedashplayerptshot?CloseDefDistRange=2-4%20Feet%20-%20Tight&College=&Conference=&Country=&DateFrom=' + date + '&DateTo=' + date + '&Division=&DraftPick=&DraftYear=&DribbleRange=&GameScope=&GameSegment=&GeneralRange=&Height=&LastNGames=0&LeagueID=00&Location=&Month=0&OpponentTeamID=0&Outcome=&PORound=&PerMode=Totals&Period=0&PlayerExperience=&PlayerPosition=&Season=' + season + '&SeasonSegment=&SeasonType=Playoffs&ShotClockRange=&ShotDistRange=&StarterBench=&TeamID=0&TouchTimeRange=&VsConference=&VsDivision=&Weight='
            df8 = pull_data(url8)
            term = 'tight_'
            df8.rename(columns={col: term + col for col in shotcolumns},inplace=True)

            url9 = 'https://stats.nba.com/stats/leaguedashplayerptshot?CloseDefDistRange=4-6%20Feet%20-%20Open&College=&Conference=&Country=&DateFrom=' + date + '&DateTo=' + date + '&Division=&DraftPick=&DraftYear=&DribbleRange=&GameScope=&GameSegment=&GeneralRange=&Height=&LastNGames=0&LeagueID=00&Location=&Month=0&OpponentTeamID=0&Outcome=&PORound=&PerMode=Totals&Period=0&PlayerExperience=&PlayerPosition=&Season=' + season + '&SeasonSegment=&SeasonType=Playoffs&ShotClockRange=&ShotDistRange=&StarterBench=&TeamID=0&TouchTimeRange=&VsConference=&VsDivision=&Weight='
            df9 = pull_data(url9)
            term = 'open_'
            df9.rename(columns={col: term + col for col in shotcolumns},inplace=True)

            url10 = 'https://stats.nba.com/stats/leaguedashplayerptshot?CloseDefDistRange=6%2B%20Feet%20-%20Wide%20Open&College=&Conference=&Country=&DateFrom=' + date + '&DateTo=' + date + '&Division=&DraftPick=&DraftYear=&DribbleRange=&GameScope=&GameSegment=&GeneralRange=&Height=&LastNGames=0&LeagueID=00&Location=&Month=0&OpponentTeamID=0&Outcome=&PORound=&PerMode=Totals&Period=0&PlayerExperience=&PlayerPosition=&Season=' + season + '&SeasonSegment=&SeasonType=Playoffs&ShotClockRange=&ShotDistRange=&StarterBench=&TeamID=0&TouchTimeRange=&VsConference=&VsDivision=&Weight='
            df10 = pull_data(url10)
            term = 'wide_open_'
            df10.rename(columns={col: term + col for col in shotcolumns},inplace=True)
            url11 = 'https://stats.nba.com/stats/leaguedashptstats?College=&Conference=&Country=&DateFrom=' + date + '&DateTo=' + date + '&Division=&DraftPick=&DraftYear=&GameScope=&Height=&ISTRound=&LastNGames=0&LeagueID=00&Location=&Month=0&OpponentTeamID=0&Outcome=&PORound=&PerMode=Totals&PlayerExperience=&PlayerOrTeam=Player&PlayerPosition=&PtMeasureType=PullUpShot&Season=' + season + '&SeasonSegment=&SeasonType=Playoffs&StarterBench=&TeamID=0&VsConference=&VsDivision=&Weight='
            df11 = pull_data(url11) 
            shotcolumns2=shotcolumns+['EFG%']
            term='pullup_'
            df11.rename(columns={col: term + col for col in shotcolumns2},inplace=True)

            url12 = 'https://stats.nba.com/stats/leaguedashptstats?College=&Conference=&Country=&DateFrom=' + date + '&DateTo=' + date + '&Division=&DraftPick=&DraftYear=&GameScope=&Height=&ISTRound=&LastNGames=0&LeagueID=00&Location=&Month=0&OpponentTeamID=0&Outcome=&PORound=&PerMode=Totals&PlayerExperience=&PlayerOrTeam=Player&PlayerPosition=&PtMeasureType=Efficiency&Season=' + season + '&SeasonSegment=&SeasonType=Playoffs&StarterBench=&TeamID=0&VsConference=&VsDivision=&Weight='


            df12 = pull_data(url12) 
            frames = [df2, df3, df4, df5, df6, df7, df8, df9, df10,df11,df12]
            for frame in frames:
                
                joined_columns = set(frame.columns) - set(df.columns)
                joined_columns = list(joined_columns)
                joined_columns.append('PLAYER_ID')
                frame = frame[joined_columns]

                df = df.merge(frame, on='PLAYER_ID',how='left').reset_index(drop=True)

            df['year'] = year
            df['date']=date_num
            print(date)

            year_frame.append(df)

        yeardata=pd.concat(year_frame)
        yeardata.to_csv(str(year)+'.csv',index=False)
        frames.append(yeardata)
        print(f"Year: {year}")

    total = pd.concat(frames)
    return total



def get_dates():
    dates=[]
    for year in range(2014,2025):
    
        for team in teams.get_teams():
            team_id=team['id']
            path = '../team/'+str(year)+'ps/'+str(team_id)+'.csv'
            if os.path.exists(path):
                df=pd.read_csv(path)
    
                df=df[['PLAYER_ID','HTM','VTM','GAME_DATE']]
                df.drop_duplicates(inplace=True)
                dates.append(df)
    return pd.concat(dates)

dateframe=get_dates()
dates=dateframe['GAME_DATE'].unique().tolist()

#df= pull_game_level(dates)
data=pull_game_level(dates)


04%2F15%2F2018
04%2F17%2F2018
04%2F20%2F2018
04%2F22%2F2018
04%2F24%2F2018
04%2F26%2F2018
04%2F28%2F2018
04%2F30%2F2018
05%2F03%2F2018
05%2F05%2F2018
05%2F07%2F2018
05%2F09%2F2018
05%2F13%2F2018
05%2F15%2F2018
05%2F19%2F2018
05%2F21%2F2018
05%2F23%2F2018
05%2F25%2F2018
05%2F27%2F2018
04%2F18%2F2018
04%2F25%2F2018
04%2F27%2F2018
04%2F29%2F2018
05%2F01%2F2018
05%2F31%2F2018
06%2F03%2F2018
06%2F06%2F2018
06%2F08%2F2018
04%2F14%2F2018
04%2F19%2F2018
04%2F21%2F2018
05%2F04%2F2018
05%2F06%2F2018
05%2F08%2F2018
04%2F16%2F2018
05%2F14%2F2018
05%2F16%2F2018
05%2F20%2F2018
05%2F22%2F2018
05%2F24%2F2018
05%2F26%2F2018
05%2F28%2F2018
04%2F23%2F2018
05%2F02%2F2018
Year: 2018
04%2F14%2F2019
04%2F17%2F2019
04%2F19%2F2019
04%2F21%2F2019
04%2F28%2F2019
04%2F30%2F2019
05%2F03%2F2019
05%2F06%2F2019
05%2F08%2F2019
04%2F13%2F2019
04%2F16%2F2019
04%2F18%2F2019
04%2F20%2F2019
04%2F23%2F2019
04%2F25%2F2019
04%2F27%2F2019
04%2F29%2F2019
05%2F01%2F2019
05%2F05%2F2019
05%2F07%2F2019
05%2F09%2F2019
05%2F12%2F2019

In [None]:
dates[-1]