In [2]:
#reference to code: https://towardsdatascience.com/web-scraping-nba-stats-4b4f8c525994

from urllib.request import urlopen
from bs4 import BeautifulSoup
import pandas as pd

In [3]:
# NBA season we will be analyzing
year = 2020
# URL page we will scraping
url = "https://www.basketball-reference.com/leagues/NBA_{}_per_game.html".format(year)
# this is the HTML from the given URL
html = urlopen(url)
soup = BeautifulSoup(html)

In [4]:
# use findALL() to get the column headers
soup.findAll('tr', limit=2)
# use getText()to extract the text we need into a list
headers = [th.getText() for th in soup.findAll('tr', limit=2)[0].findAll('th')]
# exclude the first column as we will not need the ranking order from Basketball Reference for the analysis
headers = headers[1:]
headers

['Player',
 'Pos',
 'Age',
 'Tm',
 'G',
 'GS',
 'MP',
 'FG',
 'FGA',
 'FG%',
 '3P',
 '3PA',
 '3P%',
 '2P',
 '2PA',
 '2P%',
 'eFG%',
 'FT',
 'FTA',
 'FT%',
 'ORB',
 'DRB',
 'TRB',
 'AST',
 'STL',
 'BLK',
 'TOV',
 'PF',
 'PTS']

In [7]:
# avoid the first header row
rows = soup.findAll('tr')[1:]
player_stats = [[td.getText() for td in rows[i].findAll('td')]
            for i in range(len(rows))]

In [9]:
season_stats_df = pd.DataFrame(player_stats, columns=headers)
season_stats_df

Unnamed: 0,Player,Pos,Age,Tm,G,GS,MP,FG,FGA,FG%,...,FT%,ORB,DRB,TRB,AST,STL,BLK,TOV,PF,PTS
0,Steven Adams,C,26,OKC,63,63,26.7,4.5,7.6,.592,...,.582,3.3,6.0,9.3,2.3,0.8,1.1,1.5,1.9,10.9
1,Bam Adebayo,PF,22,MIA,72,72,33.6,6.1,11.0,.557,...,.691,2.4,7.8,10.2,5.1,1.1,1.3,2.8,2.5,15.9
2,LaMarcus Aldridge,C,34,SAS,53,53,33.1,7.4,15.0,.493,...,.827,1.9,5.5,7.4,2.4,0.7,1.6,1.4,2.4,18.9
3,Kyle Alexander,C,23,MIA,2,0,6.5,0.5,1.0,.500,...,,1.0,0.5,1.5,0.0,0.0,0.0,0.5,0.5,1.0
4,Nickeil Alexander-Walker,SG,21,NOP,47,1,12.6,2.1,5.7,.368,...,.676,0.2,1.6,1.8,1.9,0.4,0.2,1.1,1.2,5.7
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
672,Trae Young,PG,21,ATL,60,60,35.3,9.1,20.8,.437,...,.860,0.5,3.7,4.3,9.3,1.1,0.1,4.8,1.7,29.6
673,Cody Zeller,C,27,CHO,58,39,23.1,4.3,8.3,.524,...,.682,2.8,4.3,7.1,1.5,0.7,0.4,1.3,2.4,11.1
674,Tyler Zeller,C,30,SAS,2,0,2.0,0.5,2.0,.250,...,,1.5,0.5,2.0,0.0,0.0,0.0,0.0,0.0,1.0
675,Ante Žižić,C,23,CLE,22,0,10.0,1.9,3.3,.569,...,.737,0.8,2.2,3.0,0.3,0.3,0.2,0.5,1.2,4.4


In [11]:
all_teams = season_stats_df["Tm"].unique()
all_teams

array(['OKC', 'MIA', 'SAS', 'NOP', 'MEM', 'BRK', 'NYK', 'ORL', 'HOU',
       'MIL', 'LAL', 'POR', 'TOR', 'CHI', 'TOT', 'SAC', None, 'PHO',
       'CHO', 'DAL', 'DEN', 'MIN', 'WAS', 'ATL', 'GSW', 'LAC', 'IND',
       'UTA', 'PHI', 'CLE', 'DET', 'BOS'], dtype=object)

In [13]:
free_agents_df = season_stats_df.loc[season_stats_df["Tm"] == "None"]
free_agents_df

Unnamed: 0,Player,Pos,Age,Tm,G,GS,MP,FG,FGA,FG%,...,FT%,ORB,DRB,TRB,AST,STL,BLK,TOV,PF,PTS


In [15]:
for year in range(1950, 2020):
    print(str(year))
    # URL page we will scraping
    url = "https://www.basketball-reference.com/leagues/NBA_{}_per_game.html".format(year)
    # this is the HTML from the given URL
    html = urlopen(url)
    soup = BeautifulSoup(html)
    
    # use findALL() to get the column headers
    soup.findAll('tr', limit=2)
    # use getText()to extract the text we need into a list
    headers = [th.getText() for th in soup.findAll('tr', limit=2)[0].findAll('th')]
    # exclude the first column as we will not need the ranking order from Basketball Reference for the analysis
    headers = headers[1:]
    
    # avoid the first header row
    rows = soup.findAll('tr')[1:]
    player_stats = [[td.getText() for td in rows[i].findAll('td')] for i in range(len(rows))]
    
    season_stats_df = pd.DataFrame(player_stats, columns=headers)
    
    #uncomment here to generate the csvs
    #season_stats_df.to_csv(f"NBA_{year}_stats.csv")

    print(f"NBA stats for {year} done recorded!")

1950
NBA stats for 1950 done recorded!
1951
NBA stats for 1951 done recorded!
1952
NBA stats for 1952 done recorded!
1953
NBA stats for 1953 done recorded!
1954
NBA stats for 1954 done recorded!
1955
NBA stats for 1955 done recorded!
1956
NBA stats for 1956 done recorded!
1957
NBA stats for 1957 done recorded!
1958
NBA stats for 1958 done recorded!
1959
NBA stats for 1959 done recorded!
1960
NBA stats for 1960 done recorded!
1961
NBA stats for 1961 done recorded!
1962
NBA stats for 1962 done recorded!
1963
NBA stats for 1963 done recorded!
1964
NBA stats for 1964 done recorded!
1965
NBA stats for 1965 done recorded!
1966
NBA stats for 1966 done recorded!
1967
NBA stats for 1967 done recorded!
1968
NBA stats for 1968 done recorded!
1969
NBA stats for 1969 done recorded!
1970
NBA stats for 1970 done recorded!
1971
NBA stats for 1971 done recorded!
1972
NBA stats for 1972 done recorded!
1973
NBA stats for 1973 done recorded!
1974
NBA stats for 1974 done recorded!
1975
NBA stats for 1975 d