In [1]:
import pandas as pd
pd.set_option('display.max_columns', None)


df = pd.read_excel('../data/gsw_box_score_player_stats.xlsx')

### Stephen Curry Stats

In [2]:
steph_df = df[(df['NAME'] == 'Stephen Curry') & (df['MIN'].notna())]
steph_df.NAME.nunique()

1

In [3]:
steph_df.tail(3)

Unnamed: 0.1,Unnamed: 0,ID,NAME,TEAM,OPP,STATUS,DATE,MIN,FGM,FGA,FG%,3PM,3PA,3P%,FTM,FTA,FT%,OREB,DREB,TREB,AST,STL,BLK,TOV,PF,PTS,+/-
2104,2104,201939,Stephen Curry,GSW,SAS,home,2025-04-09,36:36,12,24,0.5,5,14,0.357,1,1,1.0,2,6,8,3,2,0,1,0,30,13
2118,2118,201939,Stephen Curry,GSW,POR,away,2025-04-11,27:20,6,14,0.429,2,8,0.25,0,0,0.0,1,4,5,5,0,1,1,2,14,8
2164,2164,201939,Stephen Curry,GSW,LAC,home,2025-04-13,37:60,10,20,0.5,7,12,0.583,9,9,1.0,1,2,3,6,2,0,8,2,36,-16


In [4]:
def get_best_or_worst_player_stats(stat: str, lowest: bool, other_columns: list, n: int=5):
    filtered_games = steph_df.sort_values(by=stat, ascending=lowest).head(n)
    print(f"\nSteph Curry's average {stat} per game this season: {round(steph_df[stat].mean(),2)}")
    print(f"Steph Curry's average {stat} based on this {n}-game sample: {filtered_games[stat].mean()}")
    print(filtered_games[['TEAM', 'OPP', 'STATUS', 'DATE', 'MIN', stat] + other_columns].sort_values(stat, ascending=lowest))
    return filtered_games

In [5]:
steph_lowest_pts_games = get_best_or_worst_player_stats(stat='PTS', lowest=True, other_columns=['FG%', '3P%', 'FT%'], n=5)
steph_highest_pts_games = get_best_or_worst_player_stats(stat='PTS', lowest=False, other_columns=['FG%', '3P%', 'FT%'], n=5)


Steph Curry's average PTS per game this season: 24.54
Steph Curry's average PTS based on this 5-game sample: 7.4
     TEAM  OPP STATUS        DATE    MIN  PTS    FG%    3P%  FT%
661   GSW  MEM   away  2024-12-19  24:22    2  0.000  0.000  1.0
2045  GSW  HOU   home  2025-04-06  32:40    3  0.100  0.125  0.0
725   GSW  IND   home  2024-12-23  35:25   10  0.154  0.222  1.0
834   GSW  CLE   home  2024-12-30  29:05   11  0.286  0.273  0.0
1729  GSW  SAC   home  2025-03-13  30:16   11  0.444  0.333  1.0

Steph Curry's average PTS per game this season: 24.54
Steph Curry's average PTS based on this 5-game sample: 44.8
     TEAM  OPP STATUS        DATE    MIN  PTS    FG%    3P%    FT%
1528  GSW  ORL   away  2025-02-27  34:18   56  0.640  0.632  1.000
1945  GSW  MEM   away  2025-04-01  36:32   52  0.516  0.600  1.000
1632  GSW  BKN   away  2025-03-06  35:36   40  0.600  0.538  1.000
753   GSW  LAL   home  2024-12-25  35:37   38  0.583  0.533  1.000
1365  GSW  MIL   away  2025-02-10  33:59   38 

In [6]:
steph_lowest_3pt_games = get_best_or_worst_player_stats(stat='3PM', lowest=True, other_columns=['3PA', '3P%', 'PTS'], n=5)
steph_highest_3pt_games = get_best_or_worst_player_stats(stat='3PM', lowest=False, other_columns=['3PA', '3P%', 'PTS'], n=5)


Steph Curry's average 3PM per game this season: 4.44
Steph Curry's average 3PM based on this 5-game sample: 0.8
     TEAM  OPP STATUS        DATE    MIN  3PM  3PA    3P%  PTS
661   GSW  MEM   away  2024-12-19  24:22    0    6  0.000    2
220   GSW  CLE   away  2024-11-08  23:46    1    4  0.250   12
1246  GSW  PHX   home  2025-01-31  31:13    1    6  0.167   14
1102  GSW  SAC   away  2025-01-22  33:50    1    4  0.250   14
2045  GSW  HOU   home  2025-04-06  32:40    1    8  0.125    3

Steph Curry's average 3PM per game this season: 4.44
Steph Curry's average 3PM based on this 5-game sample: 9.6
     TEAM  OPP STATUS        DATE    MIN  3PM  3PA    3P%  PTS
1528  GSW  ORL   away  2025-02-27  34:18   12   19  0.632   56
1945  GSW  MEM   away  2025-04-01  36:32   12   20  0.600   52
937   GSW  MIA   home  2025-01-07  33:24    8   17  0.471   31
443   GSW  BKN   home  2024-11-25  29:28    8   16  0.500   28
1338  GSW  CHI   away  2025-02-08  33:56    8   16  0.500   34


---

### Best Scorers Against GSW

In [7]:
opp_df = df[df.TEAM != 'GSW'] # Exclude GSW players
print(f"Unique teams: {opp_df.TEAM.nunique()}")
print(f"MIN null values: {opp_df.MIN.isnull().sum()}")

# Filter out players with 0 MIN
opp_df = opp_df[opp_df.MIN.notna()]
print(f"MIN null values: {opp_df.MIN.isnull().sum()}")
opp_df.shape

Unique teams: 29
MIN null values: 225
MIN null values: 0


(865, 27)

In [8]:
# Highest Points scored by a player against GSW
opp_df.sort_values(by='PTS', ascending=False)[['NAME', 'TEAM', 'DATE', 'FG%', '3P%', 'FT%', 'PTS']].head(10)

Unnamed: 0,NAME,TEAM,DATE,FG%,3P%,FT%,PTS
1204,Shai Gilgeous-Alexander,OKC,2025-01-29,0.552,0.333,0.857,52
636,Luka Dončić,DAL,2024-12-15,0.696,0.545,0.778,45
1568,Quentin Grimes,PHI,2025-03-01,0.75,0.667,0.25,44
1405,Kyrie Irving,DAL,2025-02-12,0.6,0.7,1.0,42
1322,LeBron James,LAL,2025-02-06,0.56,0.667,0.8,42
1538,Paolo Banchero,ORL,2025-02-27,0.593,0.4,0.625,41
2149,James Harden,LAC,2025-04-13,0.565,0.625,0.889,39
516,Nikola Jokić,DEN,2024-12-03,0.583,0.75,0.778,38
1765,Aaron Gordon,DEN,2025-03-17,0.609,0.667,1.0,38
1049,Jordan Poole,WAS,2025-01-18,0.48,0.533,1.0,38


In [9]:
# Highest Opponent Player PPG (4 games or more) OPTION 1
games_played = opp_df.groupby('ID').size().reset_index(name='GAMES')

excl_cols = ['index', 'Unnamed: 0','NAME', 'TEAM', 'OPP', 'STATUS', 'DATE', 'MIN']
incl_cols = [col for col in opp_df.columns if col not in excl_cols]
info_cols = ['ID', 'NAME']

opp_avg = opp_df.groupby(by=['ID'], as_index=False)[incl_cols].mean()

player_info = opp_df[info_cols].drop_duplicates(subset='ID')
new_opp_avg_df = pd.merge(opp_avg, games_played, on='ID')
new_opp_avg_df = pd.merge(new_opp_avg_df, player_info, on='ID')

new_cols = info_cols + [col for col in new_opp_avg_df.columns if col not in info_cols]
new_opp_avg_df = new_opp_avg_df[new_cols]
new_opp_avg_df[new_opp_avg_df['GAMES'] >= 4].sort_values(by=['PTS', 'FG%'], ascending=False).head(10)

Unnamed: 0,ID,NAME,FGM,FGA,FG%,3PM,3PA,3P%,FTM,FTA,FT%,OREB,DREB,TREB,AST,STL,BLK,TOV,PF,PTS,+/-,GAMES
0,2544.0,LeBron James,12.0,21.75,0.563,3.25,5.5,0.448,5.5,6.5,0.79725,0.75,7.0,7.75,9.75,1.25,0.75,2.75,1.5,32.75,5.25,4
200,1630162.0,Anthony Edwards,8.5,19.0,0.45125,4.25,9.75,0.441,4.75,5.25,0.91475,0.5,5.5,6.0,5.5,0.75,0.75,4.0,2.25,26.0,4.25,4
12,201942.0,DeMar DeRozan,8.75,15.5,0.56425,1.75,4.75,0.26775,6.0,6.5,0.927,0.0,3.5,3.5,4.0,1.25,0.5,1.5,1.75,25.25,6.0,4
18,202681.0,Kyrie Irving,9.5,18.0,0.526,2.5,5.75,0.3625,3.75,4.25,0.9,1.0,3.5,4.5,4.5,0.5,0.5,2.5,2.25,25.25,-3.75,4
258,1630559.0,Austin Reaves,7.0,16.75,0.43375,3.5,9.75,0.30325,6.5,7.0,0.90125,1.25,4.75,6.0,6.0,1.5,0.25,2.5,2.0,24.0,4.25,4
136,1628991.0,Jaren Jackson Jr.,8.5,17.75,0.481,0.75,4.5,0.1805,5.75,6.0,0.97725,1.25,5.75,7.0,2.25,1.25,1.25,1.5,3.5,23.5,4.5,4
11,201935.0,James Harden,7.25,17.75,0.39425,2.75,8.5,0.34375,5.75,6.0,0.97225,0.75,5.0,5.75,11.0,1.25,1.25,5.0,2.25,23.0,3.25,4
75,1626181.0,Norman Powell,7.75,15.75,0.48525,2.5,6.5,0.393,2.0,2.25,0.66675,0.0,1.75,1.75,0.25,1.0,0.5,2.75,3.0,20.0,8.25,4
22,202691.0,Klay Thompson,6.25,14.5,0.4365,5.25,11.0,0.4805,2.0,2.25,0.7,0.0,4.5,4.5,2.75,1.25,0.75,1.5,3.0,19.75,7.0,4
225,1630217.0,Desmond Bane,7.0,14.25,0.51375,2.5,6.75,0.375,2.0,2.25,0.6875,0.25,4.25,4.5,6.0,1.75,0.5,4.25,0.5,18.5,6.25,4


In [10]:
# Highest Opponent Player PPG (4 games or more) OPTION 2
gps = opp_df.groupby(by='ID').size().reset_index(name='GAMES')
opp_player_avg_df = df.groupby(by='ID').agg({
    'NAME': 'first', 'FGM': 'mean', 'FGA': 'mean', 'FG%': 'mean', '3PM': 'mean', '3PA': 'mean',
    '3P%': 'mean', 'FTM': 'mean', 'FTA': 'mean', 'FT%': 'mean', 'OREB': 'mean', 'DREB': 'mean', 'TREB': 'mean',
    'AST': 'mean', 'STL': 'mean', 'BLK': 'mean', 'TOV': 'mean', 'PF': 'mean', 'PTS': 'mean', '+/-': 'mean'
}).reset_index()

# Rounding all numeric columns to 2 decimal places
numeric_cols = opp_player_avg_df.select_dtypes(include='number').columns
opp_player_avg_df[numeric_cols] = opp_player_avg_df[numeric_cols].round(2)

opp_player_avg_stats_df = pd.merge(opp_player_avg_df, gps, on='ID')
opp_player_avg_stats_df[opp_player_avg_stats_df['GAMES'] >= 4].sort_values(by=['PTS', 'FG%'], ascending=False)[['NAME', 'FG%', '3P%', 'FT%', 'PTS']].head(10)

Unnamed: 0,NAME,FG%,3P%,FT%,PTS
0,LeBron James,0.56,0.45,0.8,32.75
200,Anthony Edwards,0.45,0.44,0.91,26.0
12,DeMar DeRozan,0.56,0.27,0.93,25.25
18,Kyrie Irving,0.53,0.36,0.9,25.25
258,Austin Reaves,0.43,0.3,0.9,24.0
136,Jaren Jackson Jr.,0.48,0.18,0.98,23.5
11,James Harden,0.39,0.34,0.97,23.0
75,Norman Powell,0.49,0.39,0.67,20.0
22,Klay Thompson,0.44,0.48,0.7,19.75
225,Desmond Bane,0.51,0.38,0.69,18.5


In [11]:
opp_base_cols = ['ID', 'NAME', 'TEAM', 'STATUS', 'DATE', 'MIN']
opp_cols_of_interest = ['FGM', 'FGA', '3PM', '3PA', 'FTM', 'FTA',
    'OREB', 'DREB', 'TREB', 'AST', 'STL', 'BLK', 'TOV', 'PF', 'PTS', '+/-']

for col in opp_cols_of_interest:
    print(opp_df[opp_df[col] == opp_df[col].max()][opp_base_cols + [col]], "\n")

           ID            NAME TEAM STATUS        DATE    MIN  FGM
1568  1629656  Quentin Grimes  PHI   home  2025-03-01  36:43   18 

           ID                     NAME TEAM STATUS        DATE    MIN  FGA
1204  1628983  Shai Gilgeous-Alexander  OKC   away  2025-01-29  39:06   29 

           ID           NAME TEAM STATUS        DATE    MIN  3PM
1984  1630559  Austin Reaves  LAL   home  2025-04-03  40:03    9 

          ID            NAME TEAM STATUS        DATE    MIN  3PA
178  1642273  Kyshawn George  WAS   home  2024-11-04  37:51   17 

           ID                     NAME TEAM STATUS        DATE    MIN  FTM
1204  1628983  Shai Gilgeous-Alexander  OKC   away  2025-01-29  39:06   18 

           ID                     NAME TEAM STATUS        DATE    MIN  FTA
1204  1628983  Shai Gilgeous-Alexander  OKC   away  2025-01-29  39:06   21 

           ID            NAME TEAM STATUS        DATE    MIN  OREB
1299  1631117  Walker Kessler  UTA   home  2025-02-05  35:32     8 

          

---