In [1]:
import kagglehub
path = kagglehub.dataset_download("jacobbaruch/basketball-players-stats-per-season-49-leagues")

print("Path to dataset files:", path)

  from .autonotebook import tqdm as notebook_tqdm


Path to dataset files: C:\Users\jeroa\.cache\kagglehub\datasets\jacobbaruch\basketball-players-stats-per-season-49-leagues\versions\10


In [2]:
import os
import pandas as pd

# List files in the downloaded dataset folder
files = [os.path.join(path, f) for f in os.listdir(path)]
files

['C:\\Users\\jeroa\\.cache\\kagglehub\\datasets\\jacobbaruch\\basketball-players-stats-per-season-49-leagues\\versions\\10\\players_stats_by_season_full_details.csv']

In [3]:
# Load dataset
csv_path = files[0]
df = pd.read_csv(csv_path)

df.columns

Index(['League', 'Season', 'Stage', 'Player', 'Team', 'GP', 'MIN', 'FGM',
       'FGA', '3PM', '3PA', 'FTM', 'FTA', 'TOV', 'PF', 'ORB', 'DRB', 'REB',
       'AST', 'STL', 'BLK', 'PTS', 'birth_year', 'birth_month', 'birth_date',
       'height', 'height_cm', 'weight', 'weight_kg', 'nationality',
       'high_school', 'draft_round', 'draft_pick', 'draft_team'],
      dtype='object')

In [12]:
import numpy as np

# Columns we want to analyze
cols = ['GP', 'MIN', 'FGM', 'FGA', '3PM', '3PA', 'FTM', 'FTA', 'PTS', 'STL', 'BLK']

# Group stats by player and season
season_totals = df.groupby(['Player', 'Season'], as_index=False)[cols].sum()

# Load data into NumPy ndarrays
gp  = season_totals['GP'].to_numpy()
min = season_totals['MIN'].to_numpy()
fgm = season_totals['FGM'].to_numpy()
fga = season_totals['FGA'].to_numpy()
tpm = season_totals['3PM'].to_numpy()
tpa = season_totals['3PA'].to_numpy()
ftm = season_totals['FTM'].to_numpy()
fta = season_totals['FTA'].to_numpy()
pts = season_totals['PTS'].to_numpy()
stl = season_totals['STL'].to_numpy()
blk = season_totals['BLK'].to_numpy()

# Helper function to avoid division by zero
def safe_divide(numerator, denominator):
    result = np.zeros_like(numerator, dtype=float)
    mask = denominator != 0
    result[mask] = numerator[mask] / denominator[mask]
    result[~mask] = np.nan
    return result

# Shooting accuracy calculations
fg_accuracy = safe_divide(fgm, fga)
three_pt_accuracy = safe_divide(tpm, tpa)
ft_accuracy = safe_divide(ftm, fta)

# Scoring calculations
points_per_minute = safe_divide(pts, min)
points_per_game = safe_divide(pts, gp)

# Overall shooting accuracy
overall_shooting_accuracy = safe_divide(
    fgm + tpm + ftm,
    fga + tpa + fta
)

# Defensive metrics
blocks_per_game = safe_divide(blk, gp)
steals_per_game = safe_divide(stl, gp)

# Add results back to the DataFrame
season_totals['fg_accuracy'] = fg_accuracy
season_totals['three_pt_accuracy'] = three_pt_accuracy
season_totals['ft_accuracy'] = ft_accuracy
season_totals['points_per_minute'] = points_per_minute
season_totals['points_per_game'] = points_per_game
season_totals['overall_shooting_accuracy'] = overall_shooting_accuracy
season_totals['blocks_per_game'] = blocks_per_game
season_totals['steals_per_game'] = steals_per_game

# Final result table
result = season_totals[[
    'Player', 'Season',
    'fg_accuracy', 'three_pt_accuracy', 'ft_accuracy',
    'points_per_minute', 'points_per_game',
    'overall_shooting_accuracy',
    'blocks_per_game', 'steals_per_game'
]]

result.head()


Unnamed: 0,Player,Season,fg_accuracy,three_pt_accuracy,ft_accuracy,points_per_minute,points_per_game,overall_shooting_accuracy,blocks_per_game,steals_per_game
0,. Rufukatijiang,2019 - 2020,0.583333,,0.703704,0.526104,6.55,0.609756,0.05,0.25
1,A'uston Calhoun,2013 - 2014,0.448598,0.304348,0.62,0.448864,9.115385,0.454839,0.230769,0.423077
2,A'uston Calhoun,2014 - 2015,0.454545,0.347826,0.823529,0.510367,18.823529,0.498753,0.705882,0.941176
3,A'uston Calhoun,2018 - 2019,0.375,0.307692,0.9,0.37561,11.0,0.405172,0.142857,0.428571
4,A.C. Green,1999 - 2000,0.440252,0.25,0.694915,0.213316,4.790476,0.489149,0.2,0.638095


In [None]:
metrics = {
    'field_goal_accuracy': 'fg_accuracy',
    'three_point_accuracy': 'three_pt_accuracy',
    'free_throw_accuracy': 'ft_accuracy',
    'avg_points_per_game': 'points_per_game',
    'overall_shooting_accuracy': 'overall_shooting_accuracy',
    'avg_blocks_per_game': 'blocks_per_game',
    'avg_steals_per_game': 'steals_per_game'
}

Top100 = {
    label: (    
        result[['Player', 'Season', col]]
          .dropna(subset=[col])
          .sort_values(col, ascending=False)
          .head(100)
          .reset_index(drop=True)
          .rename(columns={col: label})
    )
    for label, col in metrics.items()
}

Top100

{'field_goal_accuracy':                  Player       Season  field_goal_accuracy
 0       Kohei Takahashi  2019 - 2020             1.000000
 1    Nemanja Zdravkovic  2019 - 2020             1.000000
 2      Fotios Georgalas  2019 - 2020             1.000000
 3          Samuel Taiwo  2019 - 2020             1.000000
 4   Giannis Sidiroilias  2019 - 2020             0.857143
 ..                  ...          ...                  ...
 95          Isaac Butts  2019 - 2020             0.711712
 96           Sasha Kaun  2011 - 2012             0.711538
 97       William Mosley  2018 - 2019             0.711538
 98       Sitapha Savane  2016 - 2017             0.711111
 99       DeAndre Jordan  2014 - 2015             0.710569
 
 [100 rows x 3 columns],
 'three_point_accuracy':                 Player       Season  three_point_accuracy
 0      Dominykas Milka  2014 - 2015                   1.0
 1           Ivan Grgat  2001 - 2002                   1.0
 2         JaVale McGee  2012 - 2013     

In [8]:
import pandas as pd
from IPython.display import display

pd.set_option('display.max_rows', 200)

for label, table in Top100.items():
    print(f"\n=== {label} (top 100) ===")
    display(table)


=== field_goal_accuracy (top 100) ===


Unnamed: 0,Player,Season,field_goal_accuracy
0,Kohei Takahashi,2019 - 2020,1.0
1,Nemanja Zdravkovic,2019 - 2020,1.0
2,Fotios Georgalas,2019 - 2020,1.0
3,Samuel Taiwo,2019 - 2020,1.0
4,Giannis Sidiroilias,2019 - 2020,0.857143
5,Primoz Brezec,2010 - 2011,0.854545
6,Brandan Wright,2013 - 2014,0.833333
7,Tamas Harazin,2015 - 2016,0.830357
8,Theo Ratliff,2008 - 2009,0.818182
9,Jozsef Szendrei,2004 - 2005,0.810811



=== three_point_accuracy (top 100) ===


Unnamed: 0,Player,Season,three_point_accuracy
0,Dominykas Milka,2014 - 2015,1.0
1,Ivan Grgat,2001 - 2002,1.0
2,JaVale McGee,2012 - 2013,1.0
3,David Kravish,2018 - 2019,1.0
4,David Doblas,2012 - 2013,1.0
5,Ahmet Duverioglu,2019 - 2020,1.0
6,Jamelle Hagins,2014 - 2015,1.0
7,Igor Zamanskiy,2012 - 2013,1.0
8,Ilija Zolotic,2013 - 2014,1.0
9,Ilya Popov,2019 - 2020,1.0



=== free_throw_accuracy (top 100) ===


Unnamed: 0,Player,Season,free_throw_accuracy
0,Paulinho Boracini,2011 - 2012,1.098039
1,Jimmy Salem,2019 - 2020,1.0
2,Jeff Schroeder,2019 - 2020,1.0
3,Tyler Scott,2019 - 2020,1.0
4,Oscar Robertson,2019 - 2020,1.0
5,Dominik Mavra,2017 - 2018,1.0
6,Oleksandr Skutyelnik,2009 - 2010,1.0
7,Matt Geiger,2000 - 2001,1.0
8,Matt Dellavedova,2017 - 2018,1.0
9,A.J. Hess,2018 - 2019,1.0



=== avg_points_per_game (top 100) ===


Unnamed: 0,Player,Season,avg_points_per_game
0,Jonathan Gibson,2015 - 2016,41.972222
1,Pierre Jackson,2018 - 2019,39.8
2,Errick McCollum,2014 - 2015,39.578947
3,Willie Warren,2014 - 2015,38.941176
4,Darius Adams,2017 - 2018,38.731707
5,Joe Young,2019 - 2020,38.340909
6,Dominique Jones,2019 - 2020,37.785714
7,Jimmer Fredette,2016 - 2017,37.609756
8,Errick McCollum,2016 - 2017,37.472222
9,Jimmer Fredette,2017 - 2018,36.853659



=== overall_shooting_accuracy (top 100) ===


Unnamed: 0,Player,Season,overall_shooting_accuracy
0,Samuel Taiwo,2019 - 2020,0.857143
1,Kohei Takahashi,2019 - 2020,0.833333
2,Acie Law,2007 - 2008,0.833333
3,Primoz Brezec,2010 - 2011,0.8125
4,Rasko Katic,2018 - 2019,0.8
5,Emanuel Matias,2019 - 2020,0.8
6,Jozsef Szendrei,2004 - 2005,0.783333
7,Riccardo Cervi,2013 - 2014,0.77931
8,Eric Zenners,2019 - 2020,0.777778
9,Giannis Sidiroilias,2019 - 2020,0.777778



=== avg_blocks_per_game (top 100) ===


Unnamed: 0,Player,Season,avg_blocks_per_game
0,Volodymyr Koniev,2018 - 2019,4.0
1,Justin Williams,2015 - 2016,3.919355
2,Hamady Ndiaye,2012 - 2013,3.903226
3,Mouhamadou N'doye,2014 - 2015,3.8
4,Ekpe Udoh,2019 - 2020,3.75
5,Alonzo Mourning,1999 - 2000,3.674157
6,Theo Ratliff,2003 - 2004,3.611765
7,Robert Upshaw,2016 - 2017,3.611111
8,Hassan Whiteside,2015 - 2016,3.578313
9,Marcus Camby,2007 - 2008,3.578313



=== avg_steals_per_game (top 100) ===


Unnamed: 0,Player,Season,avg_steals_per_game
0,Joe Spinks,2003 - 2004,4.3
1,Kevin Rice,2004 - 2005,4.2
2,Joe Spinks,2002 - 2003,4.1
3,Antonio Bivins,2019 - 2020,4.0
4,Eric Gilchrese,2016 - 2017,3.962963
5,Brian Starr,2019 - 2020,3.95
6,Nate Green,2003 - 2004,3.870968
7,Aliaksandr Kudrautsau,2014 - 2015,3.714286
8,Ivica Maric,2000 - 2001,3.7
9,Jameil Rich,2000 - 2001,3.7
