# 1. <a id='toc1_'></a>[Web Scrapper for NBA Stats](#toc0_)

**Table of contents**<a id='toc0_'></a>    
- 1. [Web Scrapper for NBA Stats](#toc1_)    
  - 1.1. [Importing libraries](#toc1_1_)    
  - 1.2. [API Requesting](#toc1_2_)    
  - 1.3. [Treating the data](#toc1_3_)    
  - 1.4. [Expanding to previous seasons](#toc1_4_)    
  - 1.5. [Loading back the data](#toc1_5_)    

<!-- vscode-jupyter-toc-config
	numbering=true
	anchor=true
	flat=false
	minLevel=1
	maxLevel=6
	/vscode-jupyter-toc-config -->
<!-- THIS CELL WILL BE REPLACED ON TOC UPDATE. DO NOT WRITE YOUR TEXT IN THIS CELL -->

## 1.1. <a id='toc1_1_'></a>[Importing libraries](#toc0_)

In [48]:
import requests
import pandas as pd
import os

In [49]:
season_id = '2022-23'
per_mode = 'PerGame'
season_type = 'Regular%20Season'

url_players_stats = 'https://stats.nba.com/stats/leaguedashplayerstats?College=&Conference=&Country=&DateFrom=&DateTo=&Division=&DraftPick=&DraftYear=&GameScope=&GameSegment=&Height=&LastNGames=0&LeagueID=00&Location=&MeasureType=Base&Month=0&OpponentTeamID=0&Outcome=&PORound=0&PaceAdjust=N&PerMode='+per_mode+'&Period=0&PlayerExperience=&PlayerPosition=&PlusMinus=N&Rank=N&Season='+season_id+'&SeasonSegment=&SeasonType='+season_type+'&ShotClockRange=&StarterBench=&TeamID=0&VsConference=&VsDivision=&Weight='
url_players_bios = 'https://stats.nba.com/stats/leaguedashplayerbiostats?College=&Conference=&Country=&DateFrom=&DateTo=&Division=&DraftPick=&DraftYear=&GameScope=&GameSegment=&Height=&LastNGames=0&LeagueID=00&Location=&Month=0&OpponentTeamID=0&Outcome=&PORound=0&PerMode='+per_mode+'&Period=0&PlayerExperience=&PlayerPosition=&Season='+season_id+'&SeasonSegment=&SeasonType='+season_type+'&ShotClockRange=&StarterBench=&TeamID=0&VsConference=&VsDivision=&Weight='
url_players_hustle = 'https://stats.nba.com/stats/leaguehustlestatsplayer?College=&Conference=&Country=&DateFrom=&DateTo=&Division=&DraftPick=&DraftYear=&GameScope=&Height=&LastNGames=0&LeagueID=00&Location=&Month=0&OpponentTeamID=0&Outcome=&PORound=0&PaceAdjust=N&PerMode='+per_mode+'&PlayerExperience=&PlayerPosition=&PlusMinus=N&Rank=N&Season='+season_id+'&SeasonSegment=&SeasonType='+season_type+'&TeamID=0&VsConference=&VsDivision=&Weight='


In [50]:
headers = {
    'Connection': 'keep-alive',
    'Accept': 'application/json, text/plain, */*',
    'x-nba-stats-token': 'true',
    'User-Agent': 'Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/113.0.0.0 Safari/537.36',
    'x-nba-stats-origin': 'stats',
    'Sec-Fetch-Site': 'same-origin',
    'Sec-Fetch-Mode': 'cors',
    'Referer': 'https://stats.nba.com/',
    'Accept-Encoding': 'gzip, deflate, br',
    'Accept-Language': 'pt-BR,pt;q=0.9,en-US;q=0.8,en;q=0.7',
}

## 1.2. <a id='toc1_2_'></a>[API Requesting](#toc0_)

In [None]:
response_stats = requests.get(url=url_players_stats, headers=headers).json()
players_stats = response_stats['resultSets'][0]['rowSet']

In [None]:
response_bios = requests.get(url=url_players_bios, headers=headers).json()
players_bios = response_bios['resultSets'][0]['rowSet']

In [None]:
response_hustle = requests.get(url=url_players_hustle, headers=headers).json()
players_hustle = response_hustle['resultSets'][0]['rowSet']

## 1.3. <a id='toc1_3_'></a>[Treating the data](#toc0_)

In [51]:
columns_stats = ["PLAYER_ID",
                "PLAYER_NAME",
                "NICKNAME",
                "TEAM_ID",
                "TEAM_ABBREVIATION",
                "AGE",
                "GP",
                "W",
                "L",
                "W_PCT",
                "MIN",
                "FGM",
                "FGA",
                "FG_PCT",
                "FG3M",
                "FG3A",
                "FG3_PCT",
                "FTM",
                "FTA",
                "FT_PCT",
                "OREB",
                "DREB",
                "REB",
                "AST",
                "TOV",
                "STL",
                "BLK",
                "BLKA",
                "PF",
                "PFD",
                "PTS",
                "PLUS_MINUS",
                "NBA_FANTASY_PTS",
                "DD2",
                "TD3",
                "WNBA_FANTASY_PTS",
                "GP_RANK",
                "W_RANK",
                "L_RANK",
                "W_PCT_RANK",
                "MIN_RANK",
                "FGM_RANK",
                "FGA_RANK",
                "FG_PCT_RANK",
                "FG3M_RANK",
                "FG3A_RANK",
                "FG3_PCT_RANK",
                "FTM_RANK",
                "FTA_RANK",
                "FT_PCT_RANK",
                "OREB_RANK",
                "DREB_RANK",
                "REB_RANK",
                "AST_RANK",
                "TOV_RANK",
                "STL_RANK",
                "BLK_RANK",
                "BLKA_RANK",
                "PF_RANK",
                "PFD_RANK",
                "PTS_RANK",
                "PLUS_MINUS_RANK",
                "NBA_FANTASY_PTS_RANK",
                "DD2_RANK",
                "TD3_RANK",
                "WNBA_FANTASY_PTS_RANK"]

columns_bios = ["PLAYER_ID",
                "PLAYER_NAME",
                "TEAM_ID",
                "TEAM_ABBREVIATION",
                "AGE",
                "PLAYER_HEIGHT",
                "PLAYER_HEIGHT_INCHES",
                "PLAYER_WEIGHT",
                "COLLEGE",
                "COUNTRY",
                "DRAFT_YEAR",
                "DRAFT_ROUND",
                "DRAFT_NUMBER",
                "GP", 
                "PTS", 
                "REB", 
                "AST", 
                "NET_RATING",
                "OREB_PCT", 
                "DREB_PCT", 
                "USG_PCT",
                "TS_PCT", 
                "AST_PCT"
                ]

columns_hustle = ["PLAYER_ID",
                 "PLAYER_NAME",
                 "TEAM_ID",
                 "TEAM_ABBREVIATION",
                 "AGE",
                 "G",
                 "MIN",
                 "CONTESTED_SHOTS",
                 "CONTESTED_SHOTS_2PT",
                 "CONTESTED_SHOTS_3PT",
                 "DEFLECTIONS",
                 "CHARGES_DRAWN",
                 "SCREEN_ASSISTS",
                 "SCREEN_AST_PTS",
                 "OFF_LOOSE_BALLS_RECOVERED",
                 "DEF_LOOSE_BALLS_RECOVERED",
                 "LOOSE_BALLS_RECOVERED",
                 "PCT_LOOSE_BALLS_RECOVERED_OFF",
                 "PCT_LOOSE_BALLS_RECOVERED_DEF",
                 "OFF_BOXOUTS",
                 "DEF_BOXOUTS",
                 "BOX_OUTS",
                 "BOX_OUT_PLAYER_TEAM_REBS",
                 "BOX_OUT_PLAYER_REBS",
                 "PCT_BOX_OUTS_OFF",
                 "PCT_BOX_OUTS_DEF",
                 "PCT_BOX_OUTS_TEAM_REB",
                 "PCT_BOX_OUTS_REB"  
                 ]

In [None]:
players_stats_df = pd.DataFrame(players_stats, columns=columns_stats)
players_stats_df

In [None]:
players_bios_df = pd.DataFrame(players_bios, columns=columns_bios)
players_bios_df

In [None]:
players_hustle_df = pd.DataFrame(players_hustle, columns=columns_hustle)
players_hustle_df

In [None]:
os.getcwd()

In [None]:
# Exporting the DataFrames as CSV files

# players_bios_df.to_csv('/home/bruno/repos/NBA_2022-2023/data/players_bios_2022-23.csv', index=False)
# players_stats_df.to_csv('/home/bruno/repos/NBA_2022-2023/data/players_stats_2022-23.csv', index=False)
# players_hustle_df.to_csv('/home/bruno/repos/NBA_2022-2023/data/players_hustle_2022-23.csv', index=False)

## 1.4. <a id='toc1_4_'></a>[Expanding to previous seasons](#toc0_)

In [None]:
seasons_years = ['1995-96',
                 '1996-97',
                 '1997-98',
                 '1998-99',
                 '1999-00',
                 '2000-01',
                 '2001-02',
                 '2002-03',
                 '2003-04',
                 '2004-05',
                 '2005-06',
                 '2006-07',
                 '2007-08',
                 '2008-09',
                 '2009-10',
                 '2010-11',
                 '2011-12',
                 '2012-13',
                 '2013-14',
                 '2014-15',
                 '2015-16',
                 '2016-17',
                 '2017-18',
                 '2018-19',
                 '2019-20',
                 '2020-21',
                 '2021-22',
                 '2022-23',   
]

season_types = ['Regular%20Season', 'Playoffs']

In [None]:
df_stats = []
df_bios = []
df_hustle = []

for season_type in season_types:
    print(season_type)
    for season_id in seasons_years:
        url_players_stats = 'https://stats.nba.com/stats/leaguedashplayerstats?College=&Conference=&Country=&DateFrom=&DateTo=&Division=&DraftPick=&DraftYear=&GameScope=&GameSegment=&Height=&LastNGames=0&LeagueID=00&Location=&MeasureType=Base&Month=0&OpponentTeamID=0&Outcome=&PORound=0&PaceAdjust=N&PerMode='+per_mode+'&Period=0&PlayerExperience=&PlayerPosition=&PlusMinus=N&Rank=N&Season='+season_id+'&SeasonSegment=&SeasonType=Regular%20Season&ShotClockRange=&StarterBench=&TeamID=0&VsConference=&VsDivision=&Weight='
        url_players_bios = 'https://stats.nba.com/stats/leaguedashplayerbiostats?College=&Conference=&Country=&DateFrom=&DateTo=&Division=&DraftPick=&DraftYear=&GameScope=&GameSegment=&Height=&LastNGames=0&LeagueID=00&Location=&Month=0&OpponentTeamID=0&Outcome=&PORound=0&PerMode='+per_mode+'&Period=0&PlayerExperience=&PlayerPosition=&Season='+season_id+'&SeasonSegment=&SeasonType=Regular%20Season&ShotClockRange=&StarterBench=&TeamID=0&VsConference=&VsDivision=&Weight='
        url_players_hustle = 'https://stats.nba.com/stats/leaguehustlestatsplayer?College=&Conference=&Country=&DateFrom=&DateTo=&Division=&DraftPick=&DraftYear=&GameScope=&Height=&LastNGames=0&LeagueID=00&Location=&Month=0&OpponentTeamID=0&Outcome=&PORound=0&PaceAdjust=N&PerMode='+per_mode+'&PlayerExperience=&PlayerPosition=&PlusMinus=N&Rank=N&Season='+season_id+'&SeasonSegment=&SeasonType='+season_type+'&TeamID=0&VsConference=&VsDivision=&Weight='
        
        # Getting the API requests
        response_stats = requests.get(url=url_players_stats, headers=headers).json()
        players_stats = response_stats['resultSets'][0]['rowSet']
        
        response_bios = requests.get(url=url_players_bios, headers=headers).json()
        players_bios = response_bios['resultSets'][0]['rowSet']
        
        response_hustle = requests.get(url=url_players_hustle, headers=headers).json()
        players_hustle = response_hustle['resultSets'][0]['rowSet']
        
        # Turning the JSON requests into DataFrames
        players_stats_df = pd.DataFrame(players_stats, columns=columns_stats)
        players_bios_df = pd.DataFrame(players_bios, columns=columns_bios)
        players_hustle_df = pd.DataFrame(players_hustle, columns=columns_hustle)
        
        # Adding the season_id to the DataFrame
        players_stats_df['season_id'] = season_id
        players_bios_df['season_id'] = season_id
        players_hustle_df['season_id'] = season_id
        
        # Adding the season_type to the DataFrame
        players_stats_df['season_type'] = season_type
        players_bios_df['season_type'] = season_type
        players_hustle_df['season_type'] = season_type
        
        # Creating a appended list of lists
        df_stats.append(players_stats_df)
        df_bios.append(players_bios_df)
        df_hustle.append(players_hustle_df)
        
        # Printing the season

        print(season_id)

In [None]:
# Turning the lists of lists into DataFrames

df_bios = pd.concat(df_bios, sort=False)
df_stats = pd.concat(df_stats, sort=False)
df_hustle = pd.concat(df_hustle, sort=False)

In [None]:
# Saving the DataFrames into CSV files

df_bios.to_csv('/home/bruno/repos/NBA_2022-2023/data/scraped_all_seasons/df_bios_complete.csv', index = False)
df_stats.to_csv('/home/bruno/repos/NBA_2022-2023/data/scraped_all_seasons/df_stats_complete.csv', index = False)
df_hustle.to_csv('/home/bruno/repos/NBA_2022-2023/data/scraped_all_seasons/df_hustle_complete.csv', index = False)

## 1.5. <a id='toc1_5_'></a>[Loading back the data](#toc0_)

In [None]:
df_bios = pd.read_csv('/home/bruno/repos/NBA_2022-2023/data/scraped_all_seasons/df_bios_complete.csv', low_memory=False)
df_stats = pd.read_csv('/home/bruno/repos/NBA_2022-2023/data/scraped_all_seasons/df_stats_complete.csv', low_memory=False)
df_hustle = pd.read_csv('/home/bruno/repos/NBA_2022-2023/data/scraped_all_seasons/df_hustle_complete.csv', low_memory=False)

In [None]:
df_bios

In [None]:
df_stats

In [None]:
df_hustle