#  Visualization General Public
#  The dataset named "NBA Player Statistics".
#  The dataset was provided by data.world
# we can obtain the data from https://www.basketball-reference.com/ and https://colab.research.google.com/drive/1fb4y802jp9b7DE-aEq5aSyQAIhmOqzPF?usp=sharing
# the dataset is available for public.
# The dataset file size is approximately 2.1 MB.
# The dataset contains 14,573 items (rows).


In [123]:
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np

In [124]:
# Load the CSV file into a DataFrame
nba_data = pd.read_csv("NBA_Player_Stats_2.csv")

In [125]:
# Display the first few rows of the DataFrame
nba_data.head()

Unnamed: 0,Rk,Player,Pos,Age,Tm,G,GS,MP,FG,FGA,...,DRB,TRB,AST,STL,BLK,TOV,PF,PTS,Season,MVP
0,1,Mahmoud Abdul-Rauf,PG,28,SAC,31,0,17.1,3.3,8.8,...,1.0,1.2,1.9,0.5,0.0,0.6,1.0,7.3,1997-98,False
1,2,Tariq Abdul-Wahad,SG,23,SAC,59,16,16.3,2.4,6.1,...,1.2,2.0,0.9,0.6,0.2,1.1,1.4,6.4,1997-98,False
2,3,Shareef Abdur-Rahim,SF,21,VAN,82,82,36.0,8.0,16.4,...,4.3,7.1,2.6,1.1,0.9,3.1,2.5,22.3,1997-98,False
3,4,Cory Alexander,PG,24,TOT,60,22,21.6,2.9,6.7,...,2.2,2.4,3.5,1.2,0.2,1.9,1.6,8.1,1997-98,False
4,4,Cory Alexander,PG,24,SAS,37,3,13.5,1.6,3.9,...,1.1,1.3,1.9,0.7,0.1,1.3,1.4,4.5,1997-98,False


In [126]:
# Filter the desired columns
filtered_nba_data = nba_data[["Rk", "Player", "Pos", "Age", "Tm", "G", "TRB", "AST", "STL", "BLK", "PTS", "Season", "MVP"]]

# Display the first few rows of the filtered data
filtered_nba_data.head()

Unnamed: 0,Rk,Player,Pos,Age,Tm,G,TRB,AST,STL,BLK,PTS,Season,MVP
0,1,Mahmoud Abdul-Rauf,PG,28,SAC,31,1.2,1.9,0.5,0.0,7.3,1997-98,False
1,2,Tariq Abdul-Wahad,SG,23,SAC,59,2.0,0.9,0.6,0.2,6.4,1997-98,False
2,3,Shareef Abdur-Rahim,SF,21,VAN,82,7.1,2.6,1.1,0.9,22.3,1997-98,False
3,4,Cory Alexander,PG,24,TOT,60,2.4,3.5,1.2,0.2,8.1,1997-98,False
4,4,Cory Alexander,PG,24,SAS,37,1.3,1.9,0.7,0.1,4.5,1997-98,False


In [127]:
# Filter out rows where the Season is before "2008-09"
filtered_nba_data = filtered_nba_data[filtered_nba_data["Season"] > "2008-09"]

# Display the first few rows of the updated data
filtered_nba_data.head()

Unnamed: 0,Rk,Player,Pos,Age,Tm,G,TRB,AST,STL,BLK,PTS,Season,MVP
6388,1,Arron Afflalo,SG,24,DEN,82,3.1,1.7,0.6,0.4,8.8,2009-10,False
6389,2,Alexis Ajinça,C,21,CHA,6,0.7,0.0,0.2,0.2,1.7,2009-10,False
6390,3,LaMarcus Aldridge,PF,24,POR,78,8.0,2.1,0.9,0.6,17.9,2009-10,False
6391,4,Joe Alexander,SF,23,CHI,8,0.6,0.3,0.1,0.1,0.5,2009-10,False
6392,5,Malik Allen,PF,31,DEN,51,1.6,0.3,0.2,0.1,2.1,2009-10,False


In [128]:
# Modify the "Season" column to keep only the last two characters
filtered_nba_data["Season"] = filtered_nba_data["Season"].str[-2:]

# Display the first few rows of the updated data
filtered_nba_data.head()

Unnamed: 0,Rk,Player,Pos,Age,Tm,G,TRB,AST,STL,BLK,PTS,Season,MVP
6388,1,Arron Afflalo,SG,24,DEN,82,3.1,1.7,0.6,0.4,8.8,10,False
6389,2,Alexis Ajinça,C,21,CHA,6,0.7,0.0,0.2,0.2,1.7,10,False
6390,3,LaMarcus Aldridge,PF,24,POR,78,8.0,2.1,0.9,0.6,17.9,10,False
6391,4,Joe Alexander,SF,23,CHI,8,0.6,0.3,0.1,0.1,0.5,10,False
6392,5,Malik Allen,PF,31,DEN,51,1.6,0.3,0.2,0.1,2.1,10,False


In [129]:
# Drop the "Tm" column
filtered_nba_data = filtered_nba_data.drop(columns=["Tm"])

# Display the first few rows of the updated data
filtered_nba_data.head()

Unnamed: 0,Rk,Player,Pos,Age,G,TRB,AST,STL,BLK,PTS,Season,MVP
6388,1,Arron Afflalo,SG,24,82,3.1,1.7,0.6,0.4,8.8,10,False
6389,2,Alexis Ajinça,C,21,6,0.7,0.0,0.2,0.2,1.7,10,False
6390,3,LaMarcus Aldridge,PF,24,78,8.0,2.1,0.9,0.6,17.9,10,False
6391,4,Joe Alexander,SF,23,8,0.6,0.3,0.1,0.1,0.5,10,False
6392,5,Malik Allen,PF,31,51,1.6,0.3,0.2,0.1,2.1,10,False


In [130]:
# Group by Player and Season and calculate the weighted stats
grouped = filtered_nba_data.groupby(['Player', 'Season'])
grouped.head()

Unnamed: 0,Rk,Player,Pos,Age,G,TRB,AST,STL,BLK,PTS,Season,MVP
6388,1,Arron Afflalo,SG,24,82,3.1,1.7,0.6,0.4,8.8,10,False
6389,2,Alexis Ajinça,C,21,6,0.7,0.0,0.2,0.2,1.7,10,False
6390,3,LaMarcus Aldridge,PF,24,78,8.0,2.1,0.9,0.6,17.9,10,False
6391,4,Joe Alexander,SF,23,8,0.6,0.3,0.1,0.1,0.5,10,False
6392,5,Malik Allen,PF,31,51,1.6,0.3,0.2,0.1,2.1,10,False
...,...,...,...,...,...,...,...,...,...,...,...,...
14568,601,Thaddeus Young,PF,33,26,4.4,1.7,1.2,0.4,6.3,22,False
14569,602,Trae Young,PG,23,76,3.7,9.7,0.9,0.1,28.4,22,False
14570,603,Omer Yurtseven,C,23,56,5.3,0.9,0.3,0.4,5.3,22,False
14571,604,Cody Zeller,C,29,27,4.6,0.8,0.3,0.2,5.2,22,False


In [131]:
# Calculate the weighted values for the specified columns based on the number of games played
weighted_stats = filtered_nba_data.groupby(['Player', 'Season']).apply(
    lambda x: pd.Series({
        'TRB': (x['TRB'] * x['G']).sum() / x['G'].sum(),
        'AST': (x['AST'] * x['G']).sum() / x['G'].sum(),
        'STL': (x['STL'] * x['G']).sum() / x['G'].sum(),
        'BLK': (x['BLK'] * x['G']).sum() / x['G'].sum(),
        'PTS': (x['PTS'] * x['G']).sum() / x['G'].sum(),
        'G': x['G'].sum(),
        'Pos': x['Pos'].mode()[0],  # Mode will help in selecting the most frequent position for the player
        'Age': x['Age'].mean(),  # Taking average age in case there are multiple entries
        'MVP': x['MVP'].max()  # Max will help in capturing if the player was an MVP in any of the rows
    })
).reset_index()

# Merge with original dataframe to get the 'Rk' column
merged_data = pd.merge(filtered_nba_data[['Player', 'Season', 'Rk']], weighted_stats, on=['Player', 'Season'], how='right')

# Remove duplicates
unique_data = merged_data.drop_duplicates(subset=['Player', 'Season']).reset_index(drop=True)

unique_data

Unnamed: 0,Player,Season,Rk,TRB,AST,STL,BLK,PTS,G,Pos,Age,MVP
0,A.J. Hammons,17,171,1.6,0.2,0.0,0.6,2.2,22,C,24.0,False
1,A.J. Price,10,335,1.6,1.9,0.6,0.1,7.3,56,PG,23.0,False
2,A.J. Price,11,339,1.4,2.2,0.6,0.0,6.5,50,PG,24.0,False
3,A.J. Price,12,357,1.4,2.0,0.5,0.0,3.9,44,PG,25.0,False
4,A.J. Price,13,344,2.0,3.6,0.6,0.1,7.7,57,PG,26.0,False
...,...,...,...,...,...,...,...,...,...,...,...,...
6432,Álex Abrines,19,1,1.5,0.6,0.5,0.2,5.3,31,SG,25.0,False
6433,Ángel Delgado,19,132,2.0,0.0,0.5,0.0,1.5,2,C,24.0,False
6434,Ömer Aşık,11,24,3.7,0.4,0.2,0.7,2.8,82,C,24.0,False
6435,Ömer Aşık,12,21,5.3,0.5,0.5,1.0,3.1,66,C,25.0,False


In [132]:
# Sort the dataframe by the "Season" column in ascending order
sorted_data = unique_data.sort_values(by="Season", ascending=True).reset_index(drop=True)
sorted_data

Unnamed: 0,Player,Season,Rk,TRB,AST,STL,BLK,PTS,G,Pos,Age,MVP
0,O.J. Mayo,10,277,3.8,3.0,1.2,0.2,17.5,82,SG,22.0,False
1,Lamar Odom,10,315,9.8,3.3,0.9,0.7,10.8,82,PF,30.0,False
2,Tony Allen,10,7,2.7,1.3,1.1,0.4,6.1,54,SG,28.0,False
3,Antonio Anderson,10,12,1.0,0.0,0.0,0.0,2.0,1,SF,24.0,False
4,Solomon Jones,10,240,2.8,0.6,0.3,0.7,4.0,52,C,25.0,False
...,...,...,...,...,...,...,...,...,...,...,...,...
6432,JaQuori McLaughlin,22,371,0.0,0.5,0.0,0.0,0.0,4,PG,24.0,False
6433,Otto Porter Jr.,22,453,5.7,1.5,1.1,0.5,8.2,63,PF,28.0,False
6434,Cat Barber,22,30,1.0,1.0,0.0,0.0,0.0,3,PG,27.0,False
6435,Timothé Luwawu-Cabarrot,22,342,1.6,0.8,0.3,0.1,4.4,52,SF,26.0,False


In [133]:
# Round the specified columns to two decimal places
cols_to_round = ["TRB", "AST", "STL", "BLK", "PTS"]
sorted_data[cols_to_round] = sorted_data[cols_to_round].round(2)

sorted_data.head()

Unnamed: 0,Player,Season,Rk,TRB,AST,STL,BLK,PTS,G,Pos,Age,MVP
0,O.J. Mayo,10,277,3.8,3.0,1.2,0.2,17.5,82,SG,22.0,False
1,Lamar Odom,10,315,9.8,3.3,0.9,0.7,10.8,82,PF,30.0,False
2,Tony Allen,10,7,2.7,1.3,1.1,0.4,6.1,54,SG,28.0,False
3,Antonio Anderson,10,12,1.0,0.0,0.0,0.0,2.0,1,SF,24.0,False
4,Solomon Jones,10,240,2.8,0.6,0.3,0.7,4.0,52,C,25.0,False


In [140]:
# Define the weights
weights = {
    "PTS": 1.5,
    "TRB": 2,
    "AST": 2,
    "STL": 3,
    "BLK": 3
}

# Calculate the Player Score for each row
sorted_data["Player Score"] = (
    sorted_data["PTS"] * weights["PTS"] +
    sorted_data["TRB"] * weights["TRB"] +
    sorted_data["AST"] * weights["AST"] +
    sorted_data["STL"] * weights["STL"] +
    sorted_data["BLK"] * weights["BLK"]
)

# Determine the best player for each season
best_players = sorted_data.loc[sorted_data.groupby("Season")["Player Score"].idxmax()][["Season", "Player", "Player Score", "TRB", "AST", "STL", "BLK", "PTS"]]

best_players


Unnamed: 0,Season,Player,Player Score,TRB,AST,STL,BLK,PTS
357,10,LeBron James,84.15,7.3,8.6,1.6,1.0,29.7
710,11,Dwight Howard,76.75,14.1,1.4,1.4,2.4,22.9
1265,12,LeBron James,76.95,7.9,6.2,1.9,0.8,27.1
1437,13,LeBron James,78.6,8.0,7.3,1.7,0.9,26.8
2075,14,Kevin Durant,79.8,7.4,5.5,1.3,0.7,32.0
2585,15,Russell Westbrook,80.85,7.3,8.6,2.1,0.2,28.1
3143,16,DeMarcus Cousins,78.95,11.5,3.3,1.6,1.4,26.9
3436,17,Russell Westbrook,95.6,10.7,10.4,1.6,0.4,31.6
3863,18,Russell Westbrook,85.2,10.1,10.3,1.8,0.3,25.4
4382,19,James Harden,90.45,6.6,7.5,2.0,0.7,36.1
