In [16]:
import pandas as pd
import numpy as np

path = "/Users/m2air/Downloads/players_stats_by_season_full_details.csv"
class BasketballStatsProcessor:
    """
    A class to process basketball statistics, calculate performance metrics 
    using NumPy, and identify players stats based on games.
    """
    
    def __init__(self, filepath):
       
        try:
            self.data = pd.read_csv(filepath)
            print(f"Successfully loaded data from {filepath}")
        except Exception as e:
            print(f"Error loading file: {e}")

    def calculate_metrics(self):
        """
        Calculates efficiency and volume metrics using NumPy.
        """
        # Extracting columns as NumPy arrays for calculations
        fgm = self.data['FGM'].values
        fga = self.data['FGA'].values
        pm3 = self.data['3PM'].values
        pa3 = self.data['3PA'].values
        ftm = self.data['FTM'].values
        fta = self.data['FTA'].values
        pts = self.data['PTS'].values
        mins = self.data['MIN'].values
        gp = self.data['GP'].values
        blk = self.data['BLK'].values
        stl = self.data['STL'].values

        #  Field Goal Accuracy
        self.data['FG_Accuracy'] = np.divide(fgm, fga, out=np.zeros_like(fgm, dtype=float), where=fga!=0)

        # Three Point Accuracy
        self.data['3P_Accuracy'] = np.divide(pm3, pa3, out=np.zeros_like(pm3, dtype=float), where=pa3!=0)

        # Free Throw Accuracy
        self.data['FT_Accuracy'] = np.divide(ftm, fta, out=np.zeros_like(ftm, dtype=float), where=fta!=0)

        # Average Points Scored Per Minute
        self.data['PTS_Per_Minute'] = np.divide(pts, mins, out=np.zeros_like(pts, dtype=float), where=mins!=0)

        # Overall Shooting Accuracy: (FGM + FTM) / (FGA + FTA)
        total_makes = fgm + ftm
        total_attempts = fga + fta
        self.data['Overall_Shooting_Accuracy'] = np.divide(total_makes, total_attempts, 
                                                            out=np.zeros_like(total_makes, dtype=float), 
                                                            where=total_attempts!=0)

        # Average Points Per Game
        self.data['PTS_Per_Game'] = np.divide(pts, gp, out=np.zeros_like(pts, dtype=float), where=gp!=0)

        # Average Blocks and Steals per Game
        self.data['Blocks_Per_Game'] = np.divide(blk, gp, out=np.zeros_like(blk, dtype=float), where=gp!=0)
        self.data['Steals_Per_Game'] = np.divide(stl, gp, out=np.zeros_like(stl, dtype=float), where=gp!=0)

    def get_top_100_lists(self):
        """
        Generates top 100 lists for each metric. 
        Includes limitations to filter out players with low game/attempt volume.
        """
        metrics = [
            'FG_Accuracy', '3P_Accuracy', 'FT_Accuracy', 'PTS_Per_Minute', 
            'Overall_Shooting_Accuracy', 'PTS_Per_Game', 'Blocks_Per_Game', 'Steals_Per_Game'
        ]
        
        top_100_results = {}

        for metric in metrics:
            # Applying limitations: At least 5 games played
            filtered_df = self.data[self.data['GP'] >= 5]
            
            # Specific attempt thresholds for accuracy metrics to ensure reliability
            if 'Accuracy' in metric:
                if 'FG' in metric: filtered_df = filtered_df[filtered_df['FGA'] > 20]
                if '3P' in metric: filtered_df = filtered_df[filtered_df['3PA'] > 10]
                if 'FT' in metric: filtered_df = filtered_df[filtered_df['FTA'] > 10]

            # Sorting and taking the top 100
            top_list = filtered_df[['Player', 'Season', metric]].sort_values(by=metric, ascending=False).head(100)
            top_100_results[metric] = top_list
            
        return top_100_results

    def export_results(self, top_100_dict):
        """Exports the processed data and top 100 lists to CSV files."""
        self.data.to_csv('processed_player_stats.csv', index=False)
        for metric, df in top_100_dict.items():
            filename = f"top_100_{metric}.csv"
            df.to_csv(filename, index=False)
        print("All files have been exported successfully.")


if __name__ == "__main__":
    # file pathway
    path = '/Users/m2air/Downloads/players_stats_by_season_full_details.csv'
    
   
    processor = BasketballStatsProcessor(path)
    
    # calculation function
    processor.calculate_metrics()
    
    # top 100 list
    top_100_data = processor.get_top_100_lists()
    
   # csv export functiojn
    processor.export_results(top_100_data)

Successfully loaded data from /Users/m2air/Downloads/players_stats_by_season_full_details.csv
All files have been exported successfully.
