In [1]:
import pandas as pd
import os

csv_path = r"C:\Users\Aissatou Ndiaye\Downloads\players_stats_extracted\players_stats_by_season_full_details.csv"
output_dir = r"C:\Users\Aissatou Ndiaye\Downloads\players_stats_extracted\results"
os.makedirs(output_dir, exist_ok=True)

df = pd.read_csv(csv_path, low_memory=False)

num_cols = ['FGM','FGA','3PM','3PA','FTM','FTA','PTS','GP','BLK','STL']
for c in num_cols:
    if c in df.columns:
        df[c] = pd.to_numeric(df[c], errors='coerce')

if 'FGM' in df.columns and 'FGA' in df.columns:
    df['FG_pct'] = df['FGM'] / df['FGA']
else:
    df['FG_pct'] = pd.NA

if '3PM' in df.columns and '3PA' in df.columns:
    df['3P_pct'] = df['3PM'] / df['3PA']
else:
    df['3P_pct'] = pd.NA

if 'FTM' in df.columns and 'FTA' in df.columns:
    df['FT_pct'] = df['FTM'] / df['FTA']
else:
    df['FT_pct'] = pd.NA

if 'PTS' in df.columns and 'GP' in df.columns:
    df['PPG'] = df['PTS'] / df['GP']
else:
    df['PPG'] = pd.NA

if 'FGM' in df.columns and '3PM' in df.columns and 'FGA' in df.columns:
    df['eFG'] = (df['FGM'] + 0.5 * df['3PM']) / df['FGA']
else:
    df['eFG'] = pd.NA

if 'BLK' in df.columns and 'GP' in df.columns:
    df['BLK_pg'] = df['BLK'] / df['GP']
else:
    df['BLK_pg'] = pd.NA

if 'STL' in df.columns and 'GP' in df.columns:
    df['STL_pg'] = df['STL'] / df['GP']
else:
    df['STL_pg'] = pd.NA

metrics = {
    'field_goal_accuracy': 'FG_pct',
    'three_point_accuracy': '3P_pct',
    'free_throw_accuracy': 'FT_pct',
    'points_per_game': 'PPG',
    'overall_shooting_accuracy_eFG': 'eFG',
    'blocks_per_game': 'BLK_pg',
    'steals_per_game': 'STL_pg',
}

key_cols = []
for col in ['Player','Season']:
    if col in df.columns:
        key_cols.append(col)

for name, col in metrics.items():
    if col not in df.columns:
        print(f"Metric column {col} not found, skipping {name}")
        continue
    out = df[[*key_cols, col]].dropna(subset=[col])
    out_sorted = out.sort_values(by=col, ascending=False).head(100)
    out_path = os.path.join(output_dir, f"top100_{name}.csv")
    out_sorted.to_csv(out_path, index=False)
    print(f"Wrote {out_path} ({len(out_sorted)} rows)")

print('Done')

Wrote C:\Users\Aissatou Ndiaye\Downloads\players_stats_extracted\results\top100_field_goal_accuracy.csv (100 rows)
Wrote C:\Users\Aissatou Ndiaye\Downloads\players_stats_extracted\results\top100_three_point_accuracy.csv (100 rows)
Wrote C:\Users\Aissatou Ndiaye\Downloads\players_stats_extracted\results\top100_free_throw_accuracy.csv (100 rows)
Wrote C:\Users\Aissatou Ndiaye\Downloads\players_stats_extracted\results\top100_points_per_game.csv (100 rows)
Wrote C:\Users\Aissatou Ndiaye\Downloads\players_stats_extracted\results\top100_overall_shooting_accuracy_eFG.csv (100 rows)
Wrote C:\Users\Aissatou Ndiaye\Downloads\players_stats_extracted\results\top100_blocks_per_game.csv (100 rows)
Wrote C:\Users\Aissatou Ndiaye\Downloads\players_stats_extracted\results\top100_steals_per_game.csv (100 rows)
Done


In [3]:
import pandas as pd

csv_path = r"C:\Users\Aissatou Ndiaye\Downloads\players_stats_extracted\players_stats_by_season_full_details.csv"
output_dir = r"C:\Users\Aissatou Ndiaye\Downloads\players_stats_extracted\results"
os.makedirs(output_dir, exist_ok=True)

class SeasonStats:
    def __init__(self, name, season, fgm, fga, fg3m, fg3a, ftm, fta, pts, mins, games, blk, stl):
        self.name = name
        self.season = season
        self.fgm, self.fga = fgm, fga
        self.fg3m, self.fg3a = fg3m, fg3a
        self.ftm, self.fta = ftm, fta
        self.pts, self.mins, self.games = pts, mins, games
        self.blk, self.stl = blk, stl

    def get_fg_accuracy(self):
        return self.fgm / self.fga if self.fga > 0 else 0.0

    def get_3p_accuracy(self):
        return self.fg3m / self.fg3a if self.fg3a > 0 else 0.0

    def get_ft_accuracy(self):
        return self.ftm / self.fta if self.fta > 0 else 0.0

    def get_ppm(self):
        return self.pts / self.mins if self.mins > 0 else 0.0

    def get_ts_pct(self):
        denom = 2 * (self.fga + 0.44 * self.fta)
        return self.pts / denom if denom > 0 else 0.0

    def get_avg_blocks(self):
        return self.blk / self.games if self.games > 0 else 0.0

    def get_avg_steals(self):
        return self.stl / self.games if self.games > 0 else 0.0

def process_top_100(data_list):
    metrics = {
        "FG Accuracy": lambda x: x.get_fg_accuracy(),
        "3P Accuracy": lambda x: x.get_3p_accuracy(),
        "FT Accuracy": lambda x: x.get_ft_accuracy(),
        "Points Per Minute": lambda x: x.get_ppm(),
        "Overall (TS%)": lambda x: x.get_ts_pct(),
        "Blocks Per Game": lambda x: x.get_avg_blocks(),
        "Steals Per Game": lambda x: x.get_avg_steals()
    }
    
    results = {}
    for label, func in metrics.items():
        sorted_stats = sorted(data_list, key=func, reverse=True)[:100]
        results[label] = [(s.name, s.season, func(s)) for s in sorted_stats]
    return results