# Load the dataset NBA Regular Season 2023 - 2024

In [2]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

In [3]:
# Load the dataset
df = pd.read_csv('https://raw.githubusercontent.com/renatomaaliw3/public_files/refs/heads/master/Data%20Sets/NBA2324-R.csv')
df.head()

Unnamed: 0,Rk,Player,Pos,Age,Tm,G,GS,MP,FG,FGA,...,FT%,ORB,DRB,TRB,AST,STL,BLK,TOV,PF,PTS
0,1,Precious Achiuwa,PF-C,24,TOT,74,18,21.9,3.2,6.3,...,0.616,2.6,4.0,6.6,1.3,0.6,0.9,1.1,1.9,7.6
1,1,Precious Achiuwa,C,24,TOR,25,0,17.5,3.1,6.8,...,0.571,2.0,3.4,5.4,1.8,0.6,0.5,1.2,1.6,7.7
2,1,Precious Achiuwa,PF,24,NYK,49,18,24.2,3.2,6.1,...,0.643,2.9,4.3,7.2,1.1,0.6,1.1,1.1,2.1,7.6
3,2,Bam Adebayo,C,26,MIA,71,71,34.0,7.5,14.3,...,0.755,2.2,8.1,10.4,3.9,1.1,0.9,2.3,2.2,19.3
4,3,Ochai Agbaji,SG,23,TOT,78,28,21.0,2.3,5.6,...,0.661,0.9,1.8,2.8,1.1,0.6,0.6,0.8,1.5,5.8


Data Description:

Rk : Rank
Player : Player's name
Pos : Position
Age : Player's age
Tm : Team
G : Games played
GS : Games started
MP : Minutes played per game
FG : Field goals per game
FGA : Field goal attempts per game
FG% : Field goal percentage
3P : 3-point field goals per game
3PA : 3-point field goal attempts per game
3P% : 3-point field goal percentage
2P : 2-point field goals per game
2PA : 2-point field goal attempts per game
2P% : 2-point field goal percentage
eFG% : Effective field goal percentage
FT : Free throws per game
FTA : Free throw attempts per game
FT% : Free throw percentage
ORB : Offensive rebounds per game
DRB : Defensive rebounds per game
TRB : Total rebounds per game
AST : Assists per game
STL : Steals per game
BLK : Blocks per game
TOV : Turnovers per game
PF : Personal fouls per game
PTS : Points per game

In NBA statistics, "TOT" stands for "Total." It represents a player's combined statistics
when they played for multiple teams within a single season.
For instance, if a player was traded or moved between teams, their stats for each team are recorded individually,
and "TOT" aggregates these stats across all teams they played for.

# B. Answer the following questions using Native Python or SQL, or combinations of both

ANSWER THE FOLLOWING
If numerical, 2 decimal places only (no rounding)

In [7]:
# 1. Which Team has the most 3-point field goal attempts per game in the season?
team_most_3pt = df.groupby('Tm')['3PA'].mean().idxmax()
team_most_3pt

'ATL'

In [10]:
# 2. If I have to recruit the best rebounding (in terms of rebounds per game) shooting guard (SG) in the NBA with at least 1 steal per game, who should I consider?
best_rebounding_sg = df[(df['Pos'] == 'SG') & (df['STL'] >= 1)].sort_values(by='TRB', ascending=False).iloc[0]
best_rebounding_sg['Player']

'Scottie Barnes'

In [13]:
# 3.  Which Player is the best defensive player in terms of combined statistics of (total rebounds per game, steals per game, and blocks per game)?
df['Defensive_Stats'] = df['TRB'] + df['STL'] + df['BLK']
best_defensive_player = df.sort_values(by='Defensive_Stats', ascending=False).iloc[0]
best_defensive_player['Player']

'Anthony Davis'

In [15]:
# 4.  What is the average Effective field goal percentage of players with pure PG and pure SG positions combined?
average_efg_pg_sg = df[df['Pos'].isin(['PG', 'SG'])]['eFG%'].mean()
average_efg_pg_sg

0.4851362126245847

In [17]:
# 5. Can you give me the average Field goals per game (FG) of Position SG for the Tm of 'GSW'
average_fg_sg_gsw = df[(df['Pos'] == 'SG') & (df['Tm'] == 'GSW')]['FG'].mean()
average_fg_sg_gsw

2.1799999999999997

In [18]:
# 6. What is the player's first name with the highest points per game (PTS) among those who have an effective field goal percentage (eFG%) above the team average for their respective teams, and have played at least 50 games in the season?
team_avg_efg = df.groupby('Tm')['eFG%'].transform('mean')

player_high_pts_efg_above_avg = df[(df['eFG%'] > team_avg_efg) & (df['G'] >= 50)].sort_values(by='PTS', ascending=False).iloc[0]
player_high_pts_efg_above_avg['Player'].split()[0]

'Luka'

In [21]:
# 7. Which player has the highest free throw percentage (FT%) among those with at least 4 free throw attempts per game (FTA) and who play for a team in the Western Conference? You can google which teams in the NBA are Western Conference teams.
western_teams = ['DAL', 'DEN', 'GSW', 'HOU', 'LAC', 'LAL', 'MEM', 'MIN', 'NOP', 'OKC', 'PHX', 'POR', 'SAC', 'SAS', 'UTA']

best_ft_west = df[(df['Tm'].isin(western_teams)) & (df['FTA'] >= 4)].sort_values(by='FT%', ascending=False).iloc[0]
best_ft_west['Player']


'Stephen Curry'

In [23]:
# 8. Which player have names that start with the letter 'J' and play the Center (C) position with the most PTS?
center_j_players = df[(df['Pos'] == 'C') & (df['Player'].str.startswith('J'))]
player_with_most_pts = center_j_players.sort_values(by='PTS', ascending=False).iloc[0]
player_with_most_pts['Player']



'Joel Embiid'

In [24]:
# 9. Among all players, who has played the most minutes per game (MP) but has the lowest points per game (PTS)?
max_mp_min_pts = df.sort_values(by=['MP', 'PTS'], ascending=[False, True]).iloc[0]
max_mp_min_pts['Player']

'DeMar DeRozan'

In [25]:
# 10. Identify the player with the best combination of offensive and defensive performance, among those whose names contain the letter 'a'  and who have played at least 60 games. The ideal player should have high points per game (PTS) for offensive impact and high combined  defensive stats (total rebounds per game (TRB) + steals per game (STL) + blocks per game (BLK))

df['Combined_Performance'] = df['PTS'] + df['Defensive_Stats']

best_combined_perf = df[(df['Player'].str.contains('a')) & (df['G'] >= 60)].sort_values(by='Combined_Performance', ascending=False).iloc[0]
best_combined_perf['Player']

'Luka Don?i?'