In [1]:
import pandas as pd
import numpy as np
import requests
from urllib.request import urlopen
from bs4 import BeautifulSoup

In [2]:
url = "https://www.basketball-reference.com/leagues/NBA_2023.html#all_per_game_team-opponent"

In [3]:
html = urlopen(url)
soup = BeautifulSoup(html)

# DATAFRAME

In [4]:
for table in soup.find_all('table'):
    print(table.get('id'))

confs_standings_E
confs_standings_W
divs_standings_E
divs_standings_W
per_game-team
per_game-opponent
totals-team
totals-opponent
per_poss-team
per_poss-opponent
advanced-team
shooting-team
shooting-opponent


In [5]:
table = soup.find(lambda tag: tag.name =='table' and tag.get('id')=='per_game-team')

In [6]:
columns = table.findAll(lambda tag: tag.name=='tr',limit=1)
columns

[<tr> <th aria-label="Rk" class="ranker poptip sort_default_asc show_partial_when_sorting center" data-stat="ranker" data-tip="Rank" scope="col">Rk</th> <th aria-label="team" class="poptip center" data-stat="team" scope="col">Team</th> <th aria-label="Games" class="poptip center" data-stat="g" data-tip="Games" scope="col">G</th> <th aria-label="Minutes Played" class="poptip center" data-stat="mp" data-tip="Minutes Played" scope="col">MP</th> <th aria-label="Field Goals" class="poptip center" data-stat="fg" data-tip="Field Goals" scope="col">FG</th> <th aria-label="FGA" class="poptip center" data-stat="fga" data-tip="Field Goal Attempts" scope="col">FGA</th> <th aria-label="Field Goal Percentage" class="poptip center" data-stat="fg_pct" data-tip="Field Goal Percentage" scope="col">FG%</th> <th aria-label="3-Point Field Goals" class="poptip center" data-stat="fg3" data-tip="3-Point Field Goals" scope="col">3P</th> <th aria-label="3-Point Field Goal Attempts" class="poptip center" data-st

In [7]:
headers = [th.getText() for th in columns[0].findAll('th')]
headers = headers[1:]
headers

['Team',
 'G',
 'MP',
 'FG',
 'FGA',
 'FG%',
 '3P',
 '3PA',
 '3P%',
 '2P',
 '2PA',
 '2P%',
 'FT',
 'FTA',
 'FT%',
 'ORB',
 'DRB',
 'TRB',
 'AST',
 'STL',
 'BLK',
 'TOV',
 'PF',
 'PTS']

In [8]:
rows = table.tbody.findAll('tr')[0:]

In [9]:
team_stats = [[td.getText() for td in rows[i].findAll('td')] for i in range(len(rows))]

In [10]:
df = pd.DataFrame(team_stats, columns = headers)
df.head()

Unnamed: 0,Team,G,MP,FG,FGA,FG%,3P,3PA,3P%,2P,...,FT%,ORB,DRB,TRB,AST,STL,BLK,TOV,PF,PTS
0,Boston Celtics,36,242.8,42.2,88.1,0.479,15.9,42.2,0.376,26.3,...,0.827,9.2,34.7,43.9,26.4,6.7,5.2,13.7,19.4,118.8
1,Sacramento Kings,34,240.7,42.7,87.2,0.49,13.2,37.1,0.357,29.5,...,0.777,8.6,33.3,41.9,26.9,7.0,3.2,14.7,20.8,118.3
2,New Orleans Pelicans,35,244.3,43.1,89.1,0.484,11.5,30.9,0.37,31.7,...,0.784,11.6,33.6,45.2,26.7,8.9,4.4,15.2,20.4,118.1
3,Utah Jazz,38,242.0,42.4,89.8,0.472,14.6,39.8,0.366,27.8,...,0.778,11.7,31.8,43.4,26.0,7.1,4.7,15.4,21.1,117.1
4,Denver Nuggets,35,241.4,43.7,86.0,0.508,11.8,30.0,0.394,31.9,...,0.743,10.0,32.8,42.8,28.7,7.2,4.3,15.3,19.5,116.4


In [11]:
df.shape

(30, 24)

## Cleaning

In [12]:
df.isnull().sum()

Team    0
G       0
MP      0
FG      0
FGA     0
FG%     0
3P      0
3PA     0
3P%     0
2P      0
2PA     0
2P%     0
FT      0
FTA     0
FT%     0
ORB     0
DRB     0
TRB     0
AST     0
STL     0
BLK     0
TOV     0
PF      0
PTS     0
dtype: int64

In [13]:
df.dtypes

Team    object
G       object
MP      object
FG      object
FGA     object
FG%     object
3P      object
3PA     object
3P%     object
2P      object
2PA     object
2P%     object
FT      object
FTA     object
FT%     object
ORB     object
DRB     object
TRB     object
AST     object
STL     object
BLK     object
TOV     object
PF      object
PTS     object
dtype: object

In [14]:
df = df.apply(pd.to_numeric, errors='ignore')

for col in df:
    if col == 'Team':
        df[col] = df[col].astype('string')
        
df.dtypes

Team     string
G         int64
MP      float64
FG      float64
FGA     float64
FG%     float64
3P      float64
3PA     float64
3P%     float64
2P      float64
2PA     float64
2P%     float64
FT      float64
FTA     float64
FT%     float64
ORB     float64
DRB     float64
TRB     float64
AST     float64
STL     float64
BLK     float64
TOV     float64
PF      float64
PTS     float64
dtype: object

## Stats

### PTS

In [18]:
df.sort_values(by=['PTS'],ascending=False,inplace=True)
df.index = np.arange(1,len(df)+1)
df.head(8)

Unnamed: 0,Team,G,MP,FG,FGA,FG%,3P,3PA,3P%,2P,...,FT%,ORB,DRB,TRB,AST,STL,BLK,TOV,PF,PTS
1,Boston Celtics,36,242.8,42.2,88.1,0.479,15.9,42.2,0.376,26.3,...,0.827,9.2,34.7,43.9,26.4,6.7,5.2,13.7,19.4,118.8
2,Sacramento Kings,34,240.7,42.7,87.2,0.49,13.2,37.1,0.357,29.5,...,0.777,8.6,33.3,41.9,26.9,7.0,3.2,14.7,20.8,118.3
3,New Orleans Pelicans,35,244.3,43.1,89.1,0.484,11.5,30.9,0.37,31.7,...,0.784,11.6,33.6,45.2,26.7,8.9,4.4,15.2,20.4,118.1
4,Utah Jazz,38,242.0,42.4,89.8,0.472,14.6,39.8,0.366,27.8,...,0.778,11.7,31.8,43.4,26.0,7.1,4.7,15.4,21.1,117.1
5,Denver Nuggets,35,241.4,43.7,86.0,0.508,11.8,30.0,0.394,31.9,...,0.743,10.0,32.8,42.8,28.7,7.2,4.3,15.3,19.5,116.4
6,Golden State Warriors,37,240.7,42.3,89.1,0.474,16.0,42.5,0.376,26.3,...,0.798,9.6,33.5,43.2,29.2,6.7,3.9,16.8,22.1,116.4
7,Los Angeles Lakers,36,242.1,42.8,88.8,0.482,10.4,30.8,0.338,32.4,...,0.799,9.4,35.5,44.9,25.0,6.5,4.8,14.8,18.5,116.0
8,Oklahoma City Thunder,35,244.3,42.8,92.9,0.461,11.6,33.5,0.347,31.2,...,0.797,11.5,32.9,44.5,24.1,8.1,5.2,14.4,21.6,115.7


In [19]:
df.sort_values(by=['3P'],ascending=False,inplace=True)
df.index = np.arange(1,len(df)+1)
df.head(8)

Unnamed: 0,Team,G,MP,FG,FGA,FG%,3P,3PA,3P%,2P,...,FT%,ORB,DRB,TRB,AST,STL,BLK,TOV,PF,PTS
1,Golden State Warriors,37,240.7,42.3,89.1,0.474,16.0,42.5,0.376,26.3,...,0.798,9.6,33.5,43.2,29.2,6.7,3.9,16.8,22.1,116.4
2,Boston Celtics,36,242.8,42.2,88.1,0.479,15.9,42.2,0.376,26.3,...,0.827,9.2,34.7,43.9,26.4,6.7,5.2,13.7,19.4,118.8
3,Dallas Mavericks,36,243.5,39.1,83.5,0.469,15.0,41.7,0.36,24.1,...,0.739,7.8,31.4,39.2,22.1,7.2,4.1,12.6,21.0,112.3
4,Indiana Pacers,36,240.0,41.1,89.1,0.461,14.8,39.7,0.373,26.3,...,0.79,9.7,32.2,41.9,26.9,7.8,5.9,15.4,21.8,114.9
5,Utah Jazz,38,242.0,42.4,89.8,0.472,14.6,39.8,0.366,27.8,...,0.778,11.7,31.8,43.4,26.0,7.1,4.7,15.4,21.1,117.1
6,Phoenix Suns,37,242.0,42.6,90.5,0.47,13.3,34.4,0.386,29.3,...,0.79,12.2,30.8,43.0,26.9,7.2,5.4,13.8,21.5,115.1
7,Sacramento Kings,34,240.7,42.7,87.2,0.49,13.2,37.1,0.357,29.5,...,0.777,8.6,33.3,41.9,26.9,7.0,3.2,14.7,20.8,118.3
8,Milwaukee Bucks,35,242.1,41.1,89.5,0.459,12.9,37.5,0.345,28.1,...,0.738,12.2,36.7,48.9,24.4,6.5,5.5,15.4,18.8,112.9


In [20]:
df.sort_values(by=['3PA'],ascending=False,inplace=True)
df.index = np.arange(1,len(df)+1)
df.head(8)

Unnamed: 0,Team,G,MP,FG,FGA,FG%,3P,3PA,3P%,2P,...,FT%,ORB,DRB,TRB,AST,STL,BLK,TOV,PF,PTS
1,Golden State Warriors,37,240.7,42.3,89.1,0.474,16.0,42.5,0.376,26.3,...,0.798,9.6,33.5,43.2,29.2,6.7,3.9,16.8,22.1,116.4
2,Boston Celtics,36,242.8,42.2,88.1,0.479,15.9,42.2,0.376,26.3,...,0.827,9.2,34.7,43.9,26.4,6.7,5.2,13.7,19.4,118.8
3,Dallas Mavericks,36,243.5,39.1,83.5,0.469,15.0,41.7,0.36,24.1,...,0.739,7.8,31.4,39.2,22.1,7.2,4.1,12.6,21.0,112.3
4,Utah Jazz,38,242.0,42.4,89.8,0.472,14.6,39.8,0.366,27.8,...,0.778,11.7,31.8,43.4,26.0,7.1,4.7,15.4,21.1,117.1
5,Indiana Pacers,36,240.0,41.1,89.1,0.461,14.8,39.7,0.373,26.3,...,0.79,9.7,32.2,41.9,26.9,7.8,5.9,15.4,21.8,114.9
6,Milwaukee Bucks,35,242.1,41.1,89.5,0.459,12.9,37.5,0.345,28.1,...,0.738,12.2,36.7,48.9,24.4,6.5,5.5,15.4,18.8,112.9
7,Sacramento Kings,34,240.7,42.7,87.2,0.49,13.2,37.1,0.357,29.5,...,0.777,8.6,33.3,41.9,26.9,7.0,3.2,14.7,20.8,118.3
8,Miami Heat,36,242.1,39.1,86.5,0.452,12.5,36.5,0.342,26.7,...,0.825,9.4,31.1,40.6,23.7,8.3,3.2,13.5,18.9,108.5


In [21]:
df.sort_values(by=['3P%'],ascending=False,inplace=True)
df.index = np.arange(1,len(df)+1)
df.head(8)

Unnamed: 0,Team,G,MP,FG,FGA,FG%,3P,3PA,3P%,2P,...,FT%,ORB,DRB,TRB,AST,STL,BLK,TOV,PF,PTS
1,Denver Nuggets,35,241.4,43.7,86.0,0.508,11.8,30.0,0.394,31.9,...,0.743,10.0,32.8,42.8,28.7,7.2,4.3,15.3,19.5,116.4
2,Brooklyn Nets,35,240.7,42.7,83.8,0.51,12.1,31.0,0.39,30.6,...,0.801,7.7,32.7,40.5,26.4,6.9,6.8,14.7,22.0,114.3
3,Phoenix Suns,37,242.0,42.6,90.5,0.47,13.3,34.4,0.386,29.3,...,0.79,12.2,30.8,43.0,26.9,7.2,5.4,13.8,21.5,115.1
4,Portland Trail Blazers,35,241.4,40.3,84.6,0.477,12.8,33.6,0.382,27.5,...,0.775,10.8,32.5,43.3,24.2,6.4,4.1,15.3,20.3,112.3
5,Philadelphia 76ers,34,242.9,39.8,83.3,0.478,12.6,33.1,0.381,27.1,...,0.82,8.0,32.0,40.0,24.4,8.4,4.7,14.5,20.6,111.8
6,Golden State Warriors,37,240.7,42.3,89.1,0.474,16.0,42.5,0.376,26.3,...,0.798,9.6,33.5,43.2,29.2,6.7,3.9,16.8,22.1,116.4
7,Boston Celtics,36,242.8,42.2,88.1,0.479,15.9,42.2,0.376,26.3,...,0.827,9.2,34.7,43.9,26.4,6.7,5.2,13.7,19.4,118.8
8,Indiana Pacers,36,240.0,41.1,89.1,0.461,14.8,39.7,0.373,26.3,...,0.79,9.7,32.2,41.9,26.9,7.8,5.9,15.4,21.8,114.9
