# 1. Importing The Data

The data is gotten from the http://basketball-reference.com for the 2021/2022 NBA Regular Season.

In [1]:
#import necesssary libraries

import pandas as pd
import numpy as np
import matplotlib as mpl
import matplotlib.pyplot as plt
import seaborn as sns

In [2]:
#Loading The Data
url = "https://www.basketball-reference.com/leagues/NBA_2022_per_game.html"
df = pd.read_html(url, header = 0)
df = df[0]



# 2. Data Cleaning and Preparation

In [4]:
df.shape

(842, 30)

In [5]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 842 entries, 0 to 841
Data columns (total 30 columns):
 #   Column  Non-Null Count  Dtype 
---  ------  --------------  ----- 
 0   Rk      842 non-null    object
 1   Player  842 non-null    object
 2   Pos     842 non-null    object
 3   Age     842 non-null    object
 4   Tm      842 non-null    object
 5   G       842 non-null    object
 6   GS      842 non-null    object
 7   MP      842 non-null    object
 8   FG      842 non-null    object
 9   FGA     842 non-null    object
 10  FG%     827 non-null    object
 11  3P      842 non-null    object
 12  3PA     842 non-null    object
 13  3P%     770 non-null    object
 14  2P      842 non-null    object
 15  2PA     842 non-null    object
 16  2P%     814 non-null    object
 17  eFG%    827 non-null    object
 18  FT      842 non-null    object
 19  FTA     842 non-null    object
 20  FT%     745 non-null    object
 21  ORB     842 non-null    object
 22  DRB     842 non-null    ob

In [6]:
df.head(30)

Unnamed: 0,Rk,Player,Pos,Age,Tm,G,GS,MP,FG,FGA,...,FT%,ORB,DRB,TRB,AST,STL,BLK,TOV,PF,PTS
0,1,Precious Achiuwa,C,22,TOR,73,28,23.6,3.6,8.3,...,.595,2.0,4.5,6.5,1.1,0.5,0.6,1.2,2.1,9.1
1,2,Steven Adams,C,28,MEM,76,75,26.3,2.8,5.1,...,.543,4.6,5.4,10.0,3.4,0.9,0.8,1.5,2.0,6.9
2,3,Bam Adebayo,C,24,MIA,56,56,32.6,7.3,13.0,...,.753,2.4,7.6,10.1,3.4,1.4,0.8,2.6,3.1,19.1
3,4,Santi Aldama,PF,21,MEM,32,0,11.3,1.7,4.1,...,.625,1.0,1.7,2.7,0.7,0.2,0.3,0.5,1.1,4.1
4,5,LaMarcus Aldridge,C,36,BRK,47,12,22.3,5.4,9.7,...,.873,1.6,3.9,5.5,0.9,0.3,1.0,0.9,1.7,12.9
5,6,Nickeil Alexander-Walker,SG,23,TOT,65,21,22.6,3.9,10.5,...,.743,0.6,2.3,2.9,2.4,0.7,0.4,1.4,1.6,10.6
6,6,Nickeil Alexander-Walker,SG,23,NOP,50,19,26.3,4.7,12.6,...,.722,0.7,2.6,3.3,2.8,0.8,0.4,1.7,1.8,12.8
7,6,Nickeil Alexander-Walker,SG,23,UTA,15,2,9.9,1.1,3.2,...,.917,0.1,1.5,1.5,1.1,0.3,0.3,0.5,1.0,3.5
8,7,Grayson Allen,SG,26,MIL,66,61,27.3,3.9,8.6,...,.865,0.5,2.9,3.4,1.5,0.7,0.3,0.7,1.5,11.1
9,8,Jarrett Allen,C,23,CLE,56,56,32.3,6.6,9.7,...,.708,3.4,7.3,10.8,1.6,0.8,1.3,1.7,1.7,16.1


In [7]:
#Checking for null values

df.isnull().sum()

Rk         0
Player     0
Pos        0
Age        0
Tm         0
G          0
GS         0
MP         0
FG         0
FGA        0
FG%       15
3P         0
3PA        0
3P%       72
2P         0
2PA        0
2P%       28
eFG%      15
FT         0
FTA        0
FT%       97
ORB        0
DRB        0
TRB        0
AST        0
STL        0
BLK        0
TOV        0
PF         0
PTS        0
dtype: int64

In [8]:
#Replacing all the missing values with zero.

df = df.fillna(0)

In [9]:
#Checking if the missing value has been replaced

df.isnull().sum()


Rk        0
Player    0
Pos       0
Age       0
Tm        0
G         0
GS        0
MP        0
FG        0
FGA       0
FG%       0
3P        0
3PA       0
3P%       0
2P        0
2PA       0
2P%       0
eFG%      0
FT        0
FTA       0
FT%       0
ORB       0
DRB       0
TRB       0
AST       0
STL       0
BLK       0
TOV       0
PF        0
PTS       0
dtype: int64

In [10]:
df.dtypes

Rk        object
Player    object
Pos       object
Age       object
Tm        object
G         object
GS        object
MP        object
FG        object
FGA       object
FG%       object
3P        object
3PA       object
3P%       object
2P        object
2PA       object
2P%       object
eFG%      object
FT        object
FTA       object
FT%       object
ORB       object
DRB       object
TRB       object
AST       object
STL       object
BLK       object
TOV       object
PF        object
PTS       object
dtype: object

In [11]:
# Renaming the column Rank to No


df.rename (columns = {"Rk" : "No"}, inplace = True)
df.head(30)


Unnamed: 0,No,Player,Pos,Age,Tm,G,GS,MP,FG,FGA,...,FT%,ORB,DRB,TRB,AST,STL,BLK,TOV,PF,PTS
0,1,Precious Achiuwa,C,22,TOR,73,28,23.6,3.6,8.3,...,.595,2.0,4.5,6.5,1.1,0.5,0.6,1.2,2.1,9.1
1,2,Steven Adams,C,28,MEM,76,75,26.3,2.8,5.1,...,.543,4.6,5.4,10.0,3.4,0.9,0.8,1.5,2.0,6.9
2,3,Bam Adebayo,C,24,MIA,56,56,32.6,7.3,13.0,...,.753,2.4,7.6,10.1,3.4,1.4,0.8,2.6,3.1,19.1
3,4,Santi Aldama,PF,21,MEM,32,0,11.3,1.7,4.1,...,.625,1.0,1.7,2.7,0.7,0.2,0.3,0.5,1.1,4.1
4,5,LaMarcus Aldridge,C,36,BRK,47,12,22.3,5.4,9.7,...,.873,1.6,3.9,5.5,0.9,0.3,1.0,0.9,1.7,12.9
5,6,Nickeil Alexander-Walker,SG,23,TOT,65,21,22.6,3.9,10.5,...,.743,0.6,2.3,2.9,2.4,0.7,0.4,1.4,1.6,10.6
6,6,Nickeil Alexander-Walker,SG,23,NOP,50,19,26.3,4.7,12.6,...,.722,0.7,2.6,3.3,2.8,0.8,0.4,1.7,1.8,12.8
7,6,Nickeil Alexander-Walker,SG,23,UTA,15,2,9.9,1.1,3.2,...,.917,0.1,1.5,1.5,1.1,0.3,0.3,0.5,1.0,3.5
8,7,Grayson Allen,SG,26,MIL,66,61,27.3,3.9,8.6,...,.865,0.5,2.9,3.4,1.5,0.7,0.3,0.7,1.5,11.1
9,8,Jarrett Allen,C,23,CLE,56,56,32.3,6.6,9.7,...,.708,3.4,7.3,10.8,1.6,0.8,1.3,1.7,1.7,16.1


In [12]:
# Dropping the rows with empty data called player 

df = df.drop(df[df.Player == "Player"].index)
df.head(30)


Unnamed: 0,No,Player,Pos,Age,Tm,G,GS,MP,FG,FGA,...,FT%,ORB,DRB,TRB,AST,STL,BLK,TOV,PF,PTS
0,1,Precious Achiuwa,C,22,TOR,73,28,23.6,3.6,8.3,...,0.595,2.0,4.5,6.5,1.1,0.5,0.6,1.2,2.1,9.1
1,2,Steven Adams,C,28,MEM,76,75,26.3,2.8,5.1,...,0.543,4.6,5.4,10.0,3.4,0.9,0.8,1.5,2.0,6.9
2,3,Bam Adebayo,C,24,MIA,56,56,32.6,7.3,13.0,...,0.753,2.4,7.6,10.1,3.4,1.4,0.8,2.6,3.1,19.1
3,4,Santi Aldama,PF,21,MEM,32,0,11.3,1.7,4.1,...,0.625,1.0,1.7,2.7,0.7,0.2,0.3,0.5,1.1,4.1
4,5,LaMarcus Aldridge,C,36,BRK,47,12,22.3,5.4,9.7,...,0.873,1.6,3.9,5.5,0.9,0.3,1.0,0.9,1.7,12.9
5,6,Nickeil Alexander-Walker,SG,23,TOT,65,21,22.6,3.9,10.5,...,0.743,0.6,2.3,2.9,2.4,0.7,0.4,1.4,1.6,10.6
6,6,Nickeil Alexander-Walker,SG,23,NOP,50,19,26.3,4.7,12.6,...,0.722,0.7,2.6,3.3,2.8,0.8,0.4,1.7,1.8,12.8
7,6,Nickeil Alexander-Walker,SG,23,UTA,15,2,9.9,1.1,3.2,...,0.917,0.1,1.5,1.5,1.1,0.3,0.3,0.5,1.0,3.5
8,7,Grayson Allen,SG,26,MIL,66,61,27.3,3.9,8.6,...,0.865,0.5,2.9,3.4,1.5,0.7,0.3,0.7,1.5,11.1
9,8,Jarrett Allen,C,23,CLE,56,56,32.3,6.6,9.7,...,0.708,3.4,7.3,10.8,1.6,0.8,1.3,1.7,1.7,16.1


In [13]:
# Dropping player names that are duplicates 

df.drop_duplicates(subset=['Player'],inplace=True)
df.head(30)

Unnamed: 0,No,Player,Pos,Age,Tm,G,GS,MP,FG,FGA,...,FT%,ORB,DRB,TRB,AST,STL,BLK,TOV,PF,PTS
0,1,Precious Achiuwa,C,22,TOR,73,28,23.6,3.6,8.3,...,0.595,2.0,4.5,6.5,1.1,0.5,0.6,1.2,2.1,9.1
1,2,Steven Adams,C,28,MEM,76,75,26.3,2.8,5.1,...,0.543,4.6,5.4,10.0,3.4,0.9,0.8,1.5,2.0,6.9
2,3,Bam Adebayo,C,24,MIA,56,56,32.6,7.3,13.0,...,0.753,2.4,7.6,10.1,3.4,1.4,0.8,2.6,3.1,19.1
3,4,Santi Aldama,PF,21,MEM,32,0,11.3,1.7,4.1,...,0.625,1.0,1.7,2.7,0.7,0.2,0.3,0.5,1.1,4.1
4,5,LaMarcus Aldridge,C,36,BRK,47,12,22.3,5.4,9.7,...,0.873,1.6,3.9,5.5,0.9,0.3,1.0,0.9,1.7,12.9
5,6,Nickeil Alexander-Walker,SG,23,TOT,65,21,22.6,3.9,10.5,...,0.743,0.6,2.3,2.9,2.4,0.7,0.4,1.4,1.6,10.6
8,7,Grayson Allen,SG,26,MIL,66,61,27.3,3.9,8.6,...,0.865,0.5,2.9,3.4,1.5,0.7,0.3,0.7,1.5,11.1
9,8,Jarrett Allen,C,23,CLE,56,56,32.3,6.6,9.7,...,0.708,3.4,7.3,10.8,1.6,0.8,1.3,1.7,1.7,16.1
10,9,Jose Alvarado,PG,23,NOP,54,1,15.4,2.4,5.4,...,0.679,0.5,1.4,1.9,2.8,1.3,0.1,0.7,1.4,6.1
11,10,Justin Anderson,SF,28,TOT,16,6,19.8,2.3,5.9,...,0.789,0.3,2.6,2.9,2.1,0.5,0.4,0.5,1.4,6.4


In [14]:
df.shape

(605, 30)

In [15]:
df_nba2022 = df.to_csv("NBA Stats 2022.csv", index = False )
df_nba2022 = pd.read_csv("NBA Stats 2022.csv")

# Exploratory Data Analysis

#### 1. NBA Player with the most Points per Game in the 2021/2022 season

In [18]:
df_nba2022[df_nba2022.PTS == df_nba2022.PTS.max()]

Unnamed: 0,No,Player,Pos,Age,Tm,G,GS,MP,FG,FGA,...,FT%,ORB,DRB,TRB,AST,STL,BLK,TOV,PF,PTS
161,162,Joel Embiid,C,27,PHI,68,68,33.8,9.8,19.6,...,0.814,2.1,9.6,11.7,4.2,1.1,1.5,3.1,2.7,30.6


#### 2. NBA Player with the most Assist per Game in the 2021/2022 season

In [19]:
df_nba2022[df_nba2022.AST == df_nba2022.AST.max()]

Unnamed: 0,No,Player,Pos,Age,Tm,G,GS,MP,FG,FGA,...,FT%,ORB,DRB,TRB,AST,STL,BLK,TOV,PF,PTS
437,438,Chris Paul,PG,36,PHO,65,65,32.9,5.6,11.3,...,0.837,0.3,4.0,4.4,10.8,1.9,0.3,2.4,2.1,14.7


#### 3. NBA Player with the most Steals per Game in the 2021/2022 season

In [24]:
df_nba2022[df_nba2022.STL == df_nba2022.STL.max()]

Unnamed: 0,No,Player,Pos,Age,Tm,G,GS,MP,FG,FGA,...,FT%,ORB,DRB,TRB,AST,STL,BLK,TOV,PF,PTS
565,566,Derrick Walton,PG,26,DET,3,3,36.0,2.0,8.7,...,1.0,0.7,2.7,3.3,7.0,2.3,1.3,3.7,2.7,6.3
