# Data Exploration

In [36]:
import pandas as pd
import numpy as np
from sklearn.ensemble import RandomForestClassifier
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder

In [37]:
# Load dataset

file_path = "../data/cleaned_data.csv"
df = pd.read_csv(file_path)

In [38]:
df.head()

Unnamed: 0.1,Unnamed: 0,Player,Tm,Opp,Res,MP,FG,FGA,FG%,3P,...,DRB,TRB,AST,STL,BLK,TOV,PF,PTS,GmSc,Data
0,0,Jayson Tatum,BOS,NYK,W,30.3,14,18,0.778,8,...,4,4,10,1,1,1,1,37,38.1,2024-10-22
1,1,Anthony Davis,LAL,MIN,W,37.58,11,23,0.478,1,...,13,16,4,1,3,1,1,36,34.0,2024-10-22
2,2,Derrick White,BOS,NYK,W,26.63,8,13,0.615,6,...,3,3,4,1,0,0,1,24,22.4,2024-10-22
3,3,Jrue Holiday,BOS,NYK,W,30.52,7,9,0.778,4,...,2,4,4,1,0,0,2,18,19.5,2024-10-22
4,4,Miles McBride,NYK,BOS,L,25.85,8,10,0.8,4,...,0,0,2,0,0,1,1,22,17.8,2024-10-22


In [39]:
# TOP 10 Players by average points
player_avg_stats = df.groupby(['Player']).mean(numeric_only=True).round(2)
player_avg_stats.drop(player_avg_stats.columns[0], axis=1, inplace=True)                                                                   
top_10_pts_players = player_avg_stats.sort_values(by="PTS", ascending=False).head(10)
top_10_pts_players 

Unnamed: 0_level_0,MP,FG,FGA,FG%,3P,3PA,3P%,FT,FTA,FT%,ORB,DRB,TRB,AST,STL,BLK,TOV,PF,PTS,GmSc
Player,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1
Shai Gilgeous-Alexander,34.07,11.38,21.5,0.53,1.98,5.76,0.37,7.7,8.54,0.9,0.9,4.32,5.22,5.9,1.94,1.0,2.68,2.2,32.44,26.74
Giannis Antetokounmpo,34.95,12.64,20.86,0.61,0.14,0.76,0.07,6.19,10.74,0.54,2.33,10.0,12.33,5.98,0.79,1.36,3.45,2.45,31.62,26.38
Nikola Jokić,36.23,11.28,19.72,0.59,2.09,4.5,0.5,5.04,6.2,0.8,3.02,9.67,12.7,10.33,1.83,0.67,3.09,2.09,29.7,30.57
Luka Dončić,35.66,9.82,21.18,0.46,3.41,9.64,0.35,5.09,6.64,0.77,0.73,7.59,8.32,7.82,2.0,0.41,3.41,2.64,28.14,22.7
LaMelo Ball,33.36,9.72,23.34,0.41,4.12,12.22,0.34,4.47,5.44,0.74,0.97,4.25,5.22,7.38,1.34,0.31,3.69,3.44,28.03,18.81
Tyrese Maxey,38.03,9.66,21.75,0.44,3.34,9.66,0.34,5.18,5.93,0.88,0.27,3.16,3.43,6.2,1.93,0.39,2.48,2.3,27.84,20.47
Anthony Edwards,36.73,9.14,20.49,0.44,4.16,9.88,0.41,4.78,5.67,0.8,0.82,4.96,5.78,4.53,1.14,0.69,3.45,1.98,27.22,18.78
Kevin Durant,36.16,9.77,18.64,0.53,2.36,5.92,0.42,5.03,6.08,0.82,0.41,5.67,6.08,4.18,0.82,1.33,3.03,1.69,26.92,20.33
Jaren Jackson Jr.,32.11,9.67,19.0,0.52,2.39,5.94,0.38,4.83,6.56,0.74,1.56,4.83,6.39,2.94,1.0,1.44,2.33,3.56,26.56,19.29
Jayson Tatum,36.32,8.92,19.76,0.45,3.55,9.96,0.36,5.16,6.45,0.81,0.61,8.04,8.65,5.65,1.22,0.53,2.86,2.2,26.55,20.43


In [40]:
# TOP 10 Players by average assists                                                                 
top_10_ast_players = player_avg_stats.sort_values(by="AST", ascending=False).head(10)
top_10_ast_players 

Unnamed: 0_level_0,MP,FG,FGA,FG%,3P,3PA,3P%,FT,FTA,FT%,ORB,DRB,TRB,AST,STL,BLK,TOV,PF,PTS,GmSc
Player,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1
Trae Young,36.07,7.15,17.6,0.4,2.92,8.6,0.33,6.04,7.06,0.8,0.48,2.71,3.19,11.38,1.27,0.21,4.67,1.96,23.25,18.45
Nikola Jokić,36.23,11.28,19.72,0.59,2.09,4.5,0.5,5.04,6.2,0.8,3.02,9.67,12.7,10.33,1.83,0.67,3.09,2.09,29.7,30.57
Cade Cunningham,35.68,9.57,21.15,0.46,2.28,6.43,0.34,4.17,4.94,0.76,0.89,5.4,6.3,9.45,0.98,0.74,4.43,2.66,25.6,19.19
LeBron James,34.67,9.39,18.22,0.51,2.17,5.57,0.35,3.5,4.57,0.76,0.87,6.91,7.78,9.04,0.87,0.52,3.78,1.39,24.46,20.94
Tyrese Haliburton,34.04,6.12,13.65,0.42,2.86,7.84,0.33,2.67,3.16,0.64,0.61,2.98,3.59,8.53,1.33,0.67,1.78,1.31,17.78,17.27
James Harden,33.89,6.23,15.77,0.39,2.79,8.12,0.34,6.06,6.79,0.86,0.6,5.15,5.75,8.42,1.46,0.6,4.5,2.12,21.31,16.86
Chris Paul,29.06,3.1,7.43,0.42,1.76,4.78,0.37,1.39,1.45,0.6,0.47,3.61,4.08,8.16,1.31,0.2,1.53,1.92,9.35,11.64
Luka Dončić,35.66,9.82,21.18,0.46,3.41,9.64,0.35,5.09,6.64,0.77,0.73,7.59,8.32,7.82,2.0,0.41,3.41,2.64,28.14,22.7
Jalen Brunson,35.27,9.02,18.44,0.49,2.32,5.84,0.36,5.72,7.04,0.79,0.44,2.46,2.9,7.52,0.96,0.08,2.42,2.18,26.08,20.29
Ja Morant,28.36,7.1,15.67,0.46,1.6,4.9,0.31,4.87,5.97,0.78,0.63,3.53,4.17,7.5,1.33,0.3,3.93,1.87,20.67,15.72


In [41]:
# Overall Distribution
df.describe().round(2)

Unnamed: 0.1,Unnamed: 0,MP,FG,FGA,FG%,3P,3PA,3P%,FT,FTA,...,ORB,DRB,TRB,AST,STL,BLK,TOV,PF,PTS,GmSc
count,16512.0,16512.0,16512.0,16512.0,16512.0,16512.0,16512.0,16512.0,16512.0,16512.0,...,16512.0,16512.0,16512.0,16512.0,16512.0,16512.0,16512.0,16512.0,16512.0,16512.0
mean,8255.5,22.41,3.85,8.27,0.43,1.25,3.48,0.26,1.57,2.02,...,1.03,3.07,4.1,2.45,0.77,0.47,1.27,1.75,10.52,8.58
std,4766.75,10.9,3.26,6.07,0.25,1.52,3.15,0.29,2.23,2.69,...,1.37,2.72,3.48,2.61,0.99,0.83,1.43,1.44,8.83,7.82
min,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,-7.6
25%,4127.75,14.43,1.0,4.0,0.29,0.0,1.0,0.0,0.0,0.0,...,0.0,1.0,2.0,1.0,0.0,0.0,0.0,1.0,3.0,2.4
50%,8255.5,23.43,3.0,7.0,0.44,1.0,3.0,0.25,1.0,1.0,...,1.0,2.0,3.0,2.0,0.0,0.0,1.0,2.0,9.0,7.1
75%,12383.25,31.32,6.0,12.0,0.57,2.0,5.0,0.5,2.0,3.0,...,2.0,4.0,6.0,4.0,1.0,1.0,2.0,3.0,16.0,13.3
max,16511.0,50.48,22.0,38.0,1.0,10.0,20.0,1.0,18.0,26.0,...,12.0,23.0,28.0,22.0,8.0,10.0,11.0,6.0,60.0,54.2


In [42]:
player_avg_stats.corr()

Unnamed: 0,MP,FG,FGA,FG%,3P,3PA,3P%,FT,FTA,FT%,ORB,DRB,TRB,AST,STL,BLK,TOV,PF,PTS,GmSc
MP,1.0,0.889447,0.897081,0.559361,0.71768,0.73588,0.568136,0.744615,0.738861,0.805185,0.466843,0.754029,0.709776,0.73875,0.755169,0.466992,0.782101,0.783218,0.888468,0.891754
FG,0.889447,1.0,0.978403,0.552253,0.700942,0.707097,0.508708,0.875534,0.873504,0.844267,0.425046,0.750942,0.694594,0.759906,0.655868,0.44507,0.843822,0.649977,0.992122,0.974116
FGA,0.897081,0.978403,1.0,0.452173,0.774675,0.803235,0.552705,0.868611,0.855391,0.840863,0.321407,0.69123,0.61762,0.78286,0.676696,0.37321,0.858954,0.633444,0.985221,0.934166
FG%,0.559361,0.552253,0.452173,1.0,0.258375,0.206561,0.36593,0.406424,0.432111,0.514778,0.595531,0.55661,0.602044,0.342845,0.398911,0.483841,0.404009,0.555879,0.514911,0.599777
3P,0.71768,0.700942,0.774675,0.258375,1.0,0.97856,0.772073,0.557134,0.493462,0.595329,-0.059723,0.331324,0.229426,0.572342,0.506552,0.098192,0.583444,0.439366,0.753664,0.654052
3PA,0.73588,0.707097,0.803235,0.206561,0.97856,1.0,0.722775,0.58055,0.518683,0.613709,-0.065896,0.350743,0.24205,0.600151,0.536071,0.106392,0.615865,0.453994,0.759827,0.64843
3P%,0.568136,0.508708,0.552705,0.36593,0.772073,0.722775,1.0,0.367928,0.316811,0.422487,-0.075863,0.22828,0.147226,0.403769,0.432738,0.024331,0.390064,0.359464,0.546052,0.479619
FT,0.744615,0.875534,0.868611,0.406424,0.557134,0.58055,0.367928,1.0,0.984162,0.87856,0.341783,0.644598,0.589131,0.718306,0.545783,0.372391,0.804287,0.545303,0.907435,0.885905
FTA,0.738861,0.873504,0.855391,0.432111,0.493462,0.518683,0.316811,0.984162,1.0,0.85034,0.410422,0.683794,0.639838,0.704062,0.547177,0.412684,0.805099,0.563362,0.894114,0.882676
FT%,0.805185,0.844267,0.840863,0.514778,0.595329,0.613709,0.422487,0.87856,0.85034,1.0,0.389108,0.652259,0.609497,0.663143,0.576409,0.390489,0.756357,0.634802,0.865863,0.84885
