In [3]:
import pandas as pd
import plotly.express as px
import plotly.graph_objects as go
pd.set_option('display.max_columns', None)

data = pd.read_csv('NBA_player_data.csv')

In [4]:
data.sample(10)

Unnamed: 0,Year,Season_type,PLAYER_ID,RANK,PLAYER,TEAM_ID,TEAM,GP,MIN,FGM,FGA,FG_PCT,FG3M,FG3A,FG3_PCT,FTM,FTA,FT_PCT,OREB,DREB,REB,AST,STL,BLK,TOV,PTS,EFF
217,2010-11,Playoffs,1889,32,Andre Miller,1610612757,POR,6,32.3,5.5,11.2,0.493,0.7,1.7,0.4,3.2,4.0,0.792,0.8,2.3,3.2,5.5,0.3,0.0,2.2,14.8,15.2
1654,2014-15,Playoffs,2207,24,Joe Johnson,1610612751,BKN,6,41.5,5.7,15.7,0.362,2.0,6.8,0.293,3.2,4.0,0.792,0.8,6.8,7.7,4.8,1.2,0.0,0.8,16.5,18.5
1377,2014-15,Regular%20Season,201942,14,DeMar DeRozan,1610612761,TOR,60,35.0,6.8,16.5,0.413,0.4,1.5,0.284,6.0,7.2,0.832,0.7,3.9,4.6,3.5,1.2,0.2,2.3,20.1,16.4
4822,2022-23,Playoffs,1627777,120,Georges Niang,1610612755,PHI,11,14.3,1.6,3.3,0.5,1.1,2.4,0.462,0.0,0.0,0.0,0.0,0.4,0.4,0.2,0.0,0.2,0.5,4.4,3.0
528,2011-12,Playoffs,201188,26,Marc Gasol,1610612763,MEM,7,37.3,5.1,9.9,0.522,0.0,0.1,0.0,4.9,6.1,0.791,2.3,4.4,6.7,3.1,0.3,1.9,1.9,15.1,19.3
615,2011-12,Playoffs,2224,113,Jamaal Tinsley,1610612762,UTA,4,16.3,1.3,5.0,0.25,0.3,1.0,0.25,1.0,1.0,1.0,0.0,0.5,0.5,3.0,0.5,0.0,1.5,3.8,2.5
476,2011-12,Regular%20Season,2733,154,Shaun Livingston,1610612749,MIL,58,18.8,2.2,4.7,0.469,0.0,0.1,0.667,1.1,1.4,0.785,0.7,1.4,2.1,2.1,0.5,0.3,1.1,5.5,6.6
575,2011-12,Playoffs,201156,73,Nick Young,1610612746,LAC,11,18.2,2.6,6.1,0.433,1.5,3.0,0.515,1.5,1.6,0.889,0.1,1.0,1.1,0.3,0.3,0.4,0.6,8.3,6.0
537,2011-12,Playoffs,201568,35,Danilo Gallinari,1610612743,DEN,7,31.7,4.9,13.4,0.362,0.6,3.3,0.174,3.1,3.4,0.917,1.3,3.9,5.1,2.4,0.7,0.6,1.3,13.4,12.1
2138,2015-16,Playoffs,202684,94,Tristan Thompson,1610612739,CLE,21,29.6,2.3,4.4,0.527,0.0,0.0,0.0,2.0,3.5,0.575,4.1,4.9,9.0,0.7,0.4,0.9,0.8,6.7,13.2


In [5]:
data.shape

(4847, 27)

# DATA CLEANING AND ANALYSIS

In [6]:
data.isna().sum()

Year           0
Season_type    0
PLAYER_ID      0
RANK           0
PLAYER         0
TEAM_ID        0
TEAM           0
GP             0
MIN            0
FGM            0
FGA            0
FG_PCT         0
FG3M           0
FG3A           0
FG3_PCT        0
FTM            0
FTA            0
FT_PCT         0
OREB           0
DREB           0
REB            0
AST            0
STL            0
BLK            0
TOV            0
PTS            0
EFF            0
dtype: int64

In [7]:
data.drop(columns=['RANK','EFF'], inplace=True)


In [8]:
data['season_start_year'] =data['Year'].str[:4].astype(int)

In [9]:
data['TEAM'].replace(to_replace=['NOP','NOH'], value='NO', inplace=True)

In [10]:
data['Season_type'].replace('Regular%20Season', 'RS', inplace=True)

In [17]:
rs_df = data[data['Season_type']=='RS']
playoffs_df = data[data['Season_type']=='RS']

In [20]:
total_cols = ['MIN','FGM','FG3M','FG3A','FTM','FTA','OREB','DREB','REB','AST','STL','BLK','TOV','PTS']

# Which player stats are correlated with each other?

In [22]:
data_per_min = data.groupby(['PLAYER','PLAYER_ID','Year'])[total_cols].sum().reset_index()
for col in data_per_min.columns[4:]:
    data_per_min[col] =  data_per_min[col]/data_per_min["MIN"]

data_per_min['FG%'] = data_per_min['FGM']/data_per_min['FGA']
data_per_min['3PT%'] = data_per_min['FG3M']/data_per_min['FG3A']
data_per_min['FT%'] = data_per_min['FTM']/data_per_min['FTA']
data_per_min['FG3A%'] = data_per_min['FG3A']/data_per_min['FGA']
data_per_min['PTS/FGA'] = data_per_min['PTS']/data_per_min['FGA']
data_per_min['FG3M/FGM'] = data_per_min['FG3M']/data_per_min['FGM']
data_per_min['FTA/FGA'] = data_per_min['FTA']/data_per_min['FGA']
data_per_min['TRU%'] = 0.5*data_per_min['PTS']/(data_per_min['FGA']+0.475*data_per_min['FTA'])
data_per_min['AST_TOV'] = data_per_min['AST']/data_per_min['TOV']


data_per_min = data_per_min[data_per_min['MIN']>=50]
data_per_min.drop(columns='PLAYER_ID', inplace=True)

fig = px.imshow(data_per_min.corr())
fig.show()

Unnamed: 0,PLAYER,PLAYER_ID,Year,MIN,FGM,FG3M,FG3A,FTM,FTA,OREB,DREB,REB,AST,STL,BLK,TOV,PTS
0,AJ Griffin,1631100,2022-23,19.5,0.174359,0.071795,0.184615,0.030769,0.035897,0.025641,0.082051,0.107692,0.051282,0.030769,0.010256,0.030769,0.456410
1,AJ Price,201985,2010-11,16.0,0.162500,0.087500,0.200000,0.112500,0.125000,0.025000,0.062500,0.087500,0.075000,0.037500,0.000000,0.087500,0.525000
2,Aaron Brooks,201166,2012-13,11.2,0.196429,0.017857,0.133929,0.044643,0.071429,0.044643,0.089286,0.133929,0.160714,0.017857,0.017857,0.089286,0.446429
3,Aaron Brooks,201166,2013-14,21.6,0.148148,0.060185,0.157407,0.055556,0.060185,0.027778,0.060185,0.087963,0.148148,0.032407,0.009259,0.074074,0.416667
4,Aaron Brooks,201166,2014-15,34.0,0.176471,0.064706,0.176471,0.061765,0.079412,0.020588,0.085294,0.102941,0.120588,0.029412,0.008824,0.079412,0.473529
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
3463,Zaza Pachulia,2585,2017-18,14.1,0.156028,0.000000,0.000000,0.078014,0.092199,0.092199,0.241135,0.333333,0.113475,0.042553,0.014184,0.070922,0.382979
3464,Zaza Pachulia,2585,2018-19,12.9,0.100775,0.000000,0.007752,0.108527,0.139535,0.116279,0.186047,0.302326,0.100775,0.038760,0.023256,0.062016,0.302326
3465,Ziaire Williams,1630533,2021-22,38.5,0.140260,0.059740,0.194805,0.049351,0.057143,0.020779,0.075325,0.096104,0.038961,0.028571,0.005195,0.023377,0.389610
3466,Zion Williamson,1629627,2020-21,33.2,0.313253,0.006024,0.018072,0.180723,0.262048,0.081325,0.135542,0.216867,0.111446,0.027108,0.018072,0.081325,0.813253


# How many minutes played distributed?

# How has the game changed over the past 10 Years?