In [1]:
import numpy as np
import csv
import os

In [3]:
os.listdir()

['-1.14-windows.xml',
 '.anaconda',
 '.cisco',
 '.conda',
 '.condarc',
 '.continuum',
 '.gitconfig',
 '.ipynb_checkpoints',
 '.ipython',
 '.jupyter',
 '.packettracer',
 '.thumbnails',
 '.VirtualBox',
 '3D Objects',
 'AMDRM_Install.log',
 'AMD_RyzenMaster.log',
 'anaconda3',
 'ansel',
 'AppData',
 'Application Data',
 'captureMsi.log',
 'Cisco Packet Tracer 8.2.0',
 'Contacts',
 'Cookies',
 'Documents',
 'Downloads',
 'DRKLAB1.ipynb',
 'Favorites',
 'Lab2DRK-Numpy.ipynb',
 'Lab2DRK.ipynb',
 'Lab_1_Standard_Modules.ipynb',
 'Lab_1_Standard_Modules_filled.ipynb',
 'Lab_2_Numpy.ipynb',
 'Lab_2_Numpy_COMPLETED.ipynb',
 'Links',
 'Local Settings',
 'MicrosoftEdgeBackups',
 'Music',
 'My Documents',
 'NetHood',
 'NTUSER.DAT',
 'ntuser.dat.LOG1',
 'ntuser.dat.LOG2',
 'NTUSER.DAT{c3031be1-a4d5-11f0-b9cd-feda415b7d69}.TM.blf',
 'NTUSER.DAT{c3031be1-a4d5-11f0-b9cd-feda415b7d69}.TMContainer00000000000000000001.regtrans-ms',
 'NTUSER.DAT{c3031be1-a4d5-11f0-b9cd-feda415b7d69}.TMContainer000000000000

In [5]:
rows = []

with open("players_stats_by_season_full_details.csv", newline="", encoding="utf-8") as f:
    reader = csv.DictReader(f)
    for row in reader:
        rows.append(row)

len(rows)

53949

In [7]:
rows[0].keys()

dict_keys(['League', 'Season', 'Stage', 'Player', 'Team', 'GP', 'MIN', 'FGM', 'FGA', '3PM', '3PA', 'FTM', 'FTA', 'TOV', 'PF', 'ORB', 'DRB', 'REB', 'AST', 'STL', 'BLK', 'PTS', 'birth_year', 'birth_month', 'birth_date', 'height', 'height_cm', 'weight', 'weight_kg', 'nationality', 'high_school', 'draft_round', 'draft_pick', 'draft_team'])

In [9]:
filtered_rows = [
    r for r in rows
    if r["League"] == "NBA" and r["Stage"] == "Regular_Season"
]

len(filtered_rows)

4658

In [19]:
def to_float(values):
    return np.array(
        [float(v) if v not in ("", None) else 0.0 for v in values],
        dtype=float
    )

player = np.array([r["Player"] for r in filtered_rows])
season = np.array([r["Season"] for r in filtered_rows])

gp   = to_float([r["GP"] for r in filtered_rows])
mins = to_float([r["MIN"] for r in filtered_rows])

In [21]:
pts = to_float([r["PTS"] for r in filtered_rows])

pts_per_min = np.divide(
    pts,
    mins,
    out=np.zeros_like(pts),
    where=mins != 0
)

pts_per_min[:10]

array([0.74106861, 0.67402431, 0.71089243, 0.69716088, 0.57868613,
       0.61594663, 0.68659942, 0.57261795, 0.53550808, 0.63680556])

In [35]:
fgm = to_float([r["FGM"] for r in filtered_rows])
fga = to_float([r["FGA"] for r in filtered_rows])
fg3m = to_float([r["3PM"] for r in filtered_rows])
fg3a = to_float([r["3PA"] for r in filtered_rows])
ftm = to_float([r["FTM"] for r in filtered_rows])
fta = to_float([r["FTA"] for r in filtered_rows])

# Accuracy calculations (safe division)
fg_acc = np.divide(fgm, fga, out=np.zeros_like(fgm), where=fga != 0)
fg3_acc = np.divide(fg3m, fg3a, out=np.zeros_like(fg3m), where=fg3a != 0)
ft_acc = np.divide(ftm, fta, out=np.zeros_like(ftm), where=fta != 0)

In [37]:
fg_acc[:10], fg3_acc[:10], ft_acc[:10]

(array([0.57417417, 0.46462264, 0.50948509, 0.42065782, 0.44837935,
        0.42778162, 0.48945148, 0.49737877, 0.45721271, 0.48320413]),
 array([0.        , 0.40254237, 0.25      , 0.34099617, 0.34038462,
        0.28819444, 0.34693878, 0.37037037, 0.40080972, 0.28421053]),
 array([0.52427184, 0.79128857, 0.797023  , 0.71290323, 0.73522459,
        0.81530343, 0.79470199, 0.76485149, 0.82018927, 0.75120773]))

In [39]:
overall_acc = np.divide(
    fgm + ftm,
    fga + fta,
    out=np.zeros_like(fgm),
    where=(fga + fta) != 0
)

overall_acc[:10]


array([0.55765368, 0.5447263 , 0.60541761, 0.49766256, 0.50646242,
       0.56099773, 0.5804541 , 0.55336788, 0.51612903, 0.53975535])

In [41]:
blk = to_float([r["BLK"] for r in filtered_rows])
stl = to_float([r["STL"] for r in filtered_rows])

blk_pg = np.divide(blk, gp, out=np.zeros_like(blk), where=gp != 0)
stl_pg = np.divide(stl, gp, out=np.zeros_like(stl), where=gp != 0)

blk_pg[:10], stl_pg[:10]


(array([3.02531646, 1.12195122, 0.86585366, 0.07142857, 0.2195122 ,
        0.43902439, 0.58108108, 1.55555556, 0.3902439 , 1.70666667]),
 array([0.4556962 , 1.34146341, 0.96341463, 2.05714286, 1.86585366,
        1.25609756, 1.39189189, 1.48148148, 1.32926829, 1.6       ]))

In [43]:
results = np.column_stack((
    player,
    season,
    fg_acc,
    fg3_acc,
    ft_acc,
    pts_per_min,
    overall_acc,
    blk_pg,
    stl_pg
))
results.shape

(4658, 9)

In [45]:
header = (
    "Player,Season,FG_Accuracy,3P_Accuracy,FT_Accuracy,"
    "PTS_per_Min,Overall_Accuracy,BLK_per_Game,STL_per_Game"
)

np.savetxt(
    "metrics_by_player_season.csv",
    results,
    delimiter=",",
    fmt="%s",
    header=header,
    comments=""
)

In [47]:
def top100(metric, name):
    idx = np.argsort(metric)[::-1][:100]
    data = np.column_stack((player[idx], season[idx], metric[idx]))
    np.savetxt(
        f"top100_{name}.csv",
        data,
        delimiter=",",
        fmt="%s",
        header="Player,Season,Value",
        comments=""
    )

top100(fg_acc, "fg_accuracy")
top100(fg3_acc, "3p_accuracy")
top100(ft_acc, "ft_accuracy")
top100(pts_per_min, "pts_per_min")
top100(overall_acc, "overall_accuracy")
top100(blk_pg, "blk_per_game")
top100(stl_pg, "stl_per_game")

In [49]:
import os
[f for f in os.listdir() if f.startswith("top100_")] + ["metrics_by_player_season.csv"]


['top100_3p_accuracy.csv',
 'top100_blk_per_game.csv',
 'top100_fg_accuracy.csv',
 'top100_ft_accuracy.csv',
 'top100_overall_accuracy.csv',
 'top100_pts_per_min.csv',
 'top100_stl_per_game.csv',
 'metrics_by_player_season.csv']

In [51]:
fg_acc[:10]

array([0.57417417, 0.46462264, 0.50948509, 0.42065782, 0.44837935,
       0.42778162, 0.48945148, 0.49737877, 0.45721271, 0.48320413])

In [63]:
fg_acc = np.divide(fgm, fga, out=np.zeros_like(fgm), where=fga != 0)


In [65]:
fg_acc = np.divide(fgm, fga, out=np.zeros_like(fgm), where=fga != 0)


In [68]:
ft_acc = np.divide(ftm, fta, out=np.zeros_like(ftm), where=fta != 0)


In [70]:
pts_per_min = np.divide(pts, mins, out=np.zeros_like(pts), where=mins != 0)


In [72]:
overall_acc = np.divide(
    fgm + ftm,
    fga + fta,
    out=np.zeros_like(fgm),
    where=(fga + fta) != 0
)

In [74]:
blk_pg = np.divide(blk, gp, out=np.zeros_like(blk), where=gp != 0)


In [76]:
stl_pg = np.divide(stl, gp, out=np.zeros_like(stl), where=gp != 0)



In [78]:
k = 0
player[k], season[k], fg_acc[k], fg3_acc[k], ft_acc[k], pts_per_min[k], overall_acc[k], blk_pg[k], stl_pg[k]


("Shaquille O'Neal",
 '1999 - 2000',
 0.5741741741741742,
 0.0,
 0.5242718446601942,
 0.741068605754031,
 0.5576536761751707,
 3.0253164556962027,
 0.45569620253164556)