In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt


''''
Goal is given a player and some of his stats from the last couple games and some stats on the opponent to output a prediction on the amount of points he will score in a game.
Glossary:

Rk -- Rank
Gcar -- Career Game Number for Player
Click to see career stats through the
end of this game for this player.
Gtm -- Season Game Number for Team
Click to see season stats through the
end of this game for this player.
GS -- Games Started
MP -- Minutes Played
FG -- Field Goals
FGA -- Field Goal Attempts
FG% -- Field Goal Percentage
3P -- 3-Point Field Goals
3PA -- 3-Point Field Goal Attempts
3P% -- 3-Point Field Goal Percentage
2P -- 2-Point Field Goals
2PA -- 2-Point Field Goal Attempts
2P% -- 2-Point Field Goal Percentage
eFG% -- Effective Field Goal Percentage
This statistic adjusts for the fact that a 3-point field goal is worth one more point than a 2-point field goal.
FT -- Free Throws
FTA -- Free Throw Attempts
FT% -- Free Throw Percentage
ORB -- Offensive Rebounds
DRB -- Defensive Rebounds
TRB -- Total Rebounds
AST -- Assists
STL -- Steals
BLK -- Blocks
TOV -- Turnovers
PF -- Personal Fouls
PTS -- Points
GmSc -- Game Score
+/- -- Plus-Minus


Feature set: (all computed before the game to avoid leakage)
1) Player recent form (rolling windows)

last5_pts_avg, last10_pts_avg

last5_min_avg, last5_fga_avg, last5_3pa_avg, last5_fta_avg

last1_min (workload last game)

Shooting efficiency proxies: last5_fg_pct, last5_ts_pct (if you have FTA/FGA/3PA)

Stability: last5_pts_std (variance in role/performance)

2) Workload & rest

days_rest (since previous game)

is_b2b (back-to-back), games_past_7d

3) Opponent team defense & pace (season-to-date or last-N games)

opp_def_rating (points allowed per 100 possessions). 
NBA
+1

Opponent Four Factors (defense): opp_def_efg, opp_def_tov_pct, opp_def_drb_pct, opp_def_ft_fga. 
NBA
+1

opp_pace (possessions per 48). 
Basketball Reference

4) Context

home (1=home, 0=away)

month_sin, month_cos (seasonality)

team_pace_sd (your team’s pace season-to-date)



'''

In [50]:
numeric_cols = ["Rk","Gcar","Gtm","FG","FGA","FG%","3P","3PA","3P%","2P","2PA","2P%","eFG%","FT","FTA","FT%","ORB","DRB","TRB","AST","STL","BLK","TOV","PF","PTS","GmSc","+/-"]
giannis_2025_df = pd.read_csv("data/giannis_2025.csv", converters={c: (lambda x: pd.to_numeric(x, errors="ignore")) for c in numeric_cols})

giannis_2025_df.head()
giannis_2025_df.dtypes

  giannis_2025_df = pd.read_csv("data/giannis_2025.csv", converters={c: (lambda x: pd.to_numeric(x, errors="ignore")) for c in numeric_cols})


Rk            float64
Gcar          float64
Gtm           float64
Date           object
Team           object
Unnamed: 5     object
Opp            object
Result         object
GS             object
MP             object
FG             object
FGA            object
FG%            object
3P             object
3PA            object
3P%            object
2P             object
2PA            object
2P%            object
eFG%           object
FT             object
FTA            object
FT%            object
ORB            object
DRB            object
TRB            object
AST            object
STL            object
BLK            object
TOV            object
PF             object
PTS            object
GmSc           object
+/-            object
dtype: object

In [53]:
giannis_2025_df.rename(columns={"Unnamed: 5": "Home Game"}, inplace=True)
giannis_2025_df.head()

Unnamed: 0,Rk,Gcar,Gtm,Date,Team,Home Game,Opp,Result,GS,MP,...,DRB,TRB,AST,STL,BLK,TOV,PF,PTS,GmSc,+/-
0,1.0,793.0,1.0,2024-10-23,MIL,@,PHI,W 124-109,*,31:12,...,13,14,7,0,1,3,4,25,23.3,7
1,2.0,794.0,2.0,2024-10-25,MIL,,CHI,L 122-133,*,35:08,...,10,11,5,0,0,3,1,38,28.5,-12
2,3.0,795.0,3.0,2024-10-27,MIL,@,BRK,L 102-115,*,33:19,...,11,12,7,1,1,4,3,22,21.3,-11
3,4.0,796.0,4.0,2024-10-28,MIL,@,BOS,L 108-119,*,39:05,...,9,10,6,0,0,7,4,30,14.8,-19
4,5.0,797.0,5.0,2024-10-31,MIL,@,MEM,L 99-122,*,33:31,...,10,11,4,1,1,1,4,37,32.0,0


In [54]:
giannis_2025_df["Home Game"] = giannis_2025_df["Home Game"] != "@"
giannis_2025_df.head()

Unnamed: 0,Rk,Gcar,Gtm,Date,Team,Home Game,Opp,Result,GS,MP,...,DRB,TRB,AST,STL,BLK,TOV,PF,PTS,GmSc,+/-
0,1.0,793.0,1.0,2024-10-23,MIL,False,PHI,W 124-109,*,31:12,...,13,14,7,0,1,3,4,25,23.3,7
1,2.0,794.0,2.0,2024-10-25,MIL,True,CHI,L 122-133,*,35:08,...,10,11,5,0,0,3,1,38,28.5,-12
2,3.0,795.0,3.0,2024-10-27,MIL,False,BRK,L 102-115,*,33:19,...,11,12,7,1,1,4,3,22,21.3,-11
3,4.0,796.0,4.0,2024-10-28,MIL,False,BOS,L 108-119,*,39:05,...,9,10,6,0,0,7,4,30,14.8,-19
4,5.0,797.0,5.0,2024-10-31,MIL,False,MEM,L 99-122,*,33:31,...,10,11,4,1,1,1,4,37,32.0,0


In [84]:
def mmss_to_minutes(x):
    if pd.isna(x):
        return np.nan
    try:
        m, s = str(x).split(":")
        return int(m) + int(s) / 60.0
    except Exception:
        return np.nan

giannis_2025_df["MP_min"] = giannis_2025_df["MP"].apply(mmss_to_minutes)
giannis_2025_df.head()

Unnamed: 0,Rk,Gcar,Gtm,Date,Team,Home Game,Opp,Result,GS,MP,...,TRB,AST,STL,BLK,TOV,PF,PTS,GmSc,+/-,MP_min
0,1.0,793.0,1.0,2024-10-23,MIL,False,PHI,W 124-109,*,31:12,...,14,7,0,1,3,4,25,23.3,7,31.2
1,2.0,794.0,2.0,2024-10-25,MIL,True,CHI,L 122-133,*,35:08,...,11,5,0,0,3,1,38,28.5,-12,35.133333
2,3.0,795.0,3.0,2024-10-27,MIL,False,BRK,L 102-115,*,33:19,...,12,7,1,1,4,3,22,21.3,-11,33.316667
3,4.0,796.0,4.0,2024-10-28,MIL,False,BOS,L 108-119,*,39:05,...,10,6,0,0,7,4,30,14.8,-19,39.083333
4,5.0,797.0,5.0,2024-10-31,MIL,False,MEM,L 99-122,*,33:31,...,11,4,1,1,1,4,37,32.0,0,33.516667


In [112]:
giannis_2025_df["Date"] = pd.to_datetime(giannis_2025_df["Date"], errors="coerce")

In [111]:
def get_rolling_average(cat, amount_num, df):
        rolling_average = pd.Series()
        for i in range(amount_num, len(df)):
            rolling_average[i] = pd.to_numeric(df[cat][(i - amount_num):i], errors="coerce").mean()
        return rolling_average

In [100]:
get_rolling_average("PTS", amount_num=10, df=giannis_2025_df).head(20)

10    31.555556
11    31.333333
12    33.666667
13    33.666667
14    32.555556
15    33.000000
16    33.333333
17    33.200000
18    33.444444
19    35.444444
20    33.777778
21    34.666667
22    31.444444
23    32.777778
24    34.666667
25    33.666667
26    33.222222
27    33.375000
28    33.375000
29    32.142857
dtype: float64

In [101]:
giannis_2025_df["MP_min"][5:]

5     38.866667
6           NaN
7     30.200000
8     32.200000
9     39.150000
        ...    
78    37.650000
79    25.566667
80    37.766667
81          NaN
82          NaN
Name: MP_min, Length: 78, dtype: float64

In [102]:
giannis_2025_df["MP_min"][5:].shift(1)

5           NaN
6     38.866667
7           NaN
8     30.200000
9     32.200000
        ...    
78          NaN
79    37.650000
80    25.566667
81    37.766667
82          NaN
Name: MP_min, Length: 78, dtype: float64

In [109]:
giannis_2025_df.iloc[3]

Rk                  4.0
Gcar              796.0
Gtm                 4.0
Date         2024-10-28
Team                MIL
Home Game         False
Opp                 BOS
Result        L 108-119
GS                    *
MP                39:05
FG                   14
FGA                  26
FG%               0.538
3P                    0
3PA                   2
3P%                 0.0
2P                   14
2PA                  24
2P%               0.583
eFG%              0.538
FT                    2
FTA                   6
FT%               0.333
ORB                   1
DRB                   9
TRB                  10
AST                   6
STL                   0
BLK                   0
TOV                   7
PF                    4
PTS                  30
GmSc               14.8
+/-                 -19
MP_min        39.083333
Name: 3, dtype: object

In [120]:
(giannis_2025_df["Date"] - giannis_2025_df["Date"].shift(1)).dt.days

0     NaN
1     2.0
2     2.0
3     1.0
4     3.0
     ... 
78    2.0
79    2.0
80    1.0
81    2.0
82    NaN
Name: Date, Length: 83, dtype: float64

In [119]:
giannis_2025_df["Date"]

0    2024-10-23
1    2024-10-25
2    2024-10-27
3    2024-10-28
4    2024-10-31
        ...    
78   2025-04-08
79   2025-04-10
80   2025-04-11
81   2025-04-13
82          NaT
Name: Date, Length: 83, dtype: datetime64[ns]

In [121]:
# Recent game data

giannis_feature_set_df["last5_pts_avg"] = get_rolling_average(cat="PTS", amount_num=5, df=giannis_2025_df)
giannis_feature_set_df["last5_min_avg"] = get_rolling_average(cat="MP_min", amount_num=5, df=giannis_2025_df)
giannis_feature_set_df["last5_fga_avg"] = get_rolling_average(cat="FGA", amount_num=5, df=giannis_2025_df)
giannis_feature_set_df["last5_3pa_avg"] = get_rolling_average(cat="3PA", amount_num=5, df=giannis_2025_df)
giannis_feature_set_df["last5_fta_avg"] = get_rolling_average(cat="FTA", amount_num=5, df=giannis_2025_df)
giannis_feature_set_df["last1_min_played"] = giannis_2025_df["MP_min"].shift(1)
giannis_feature_set_df["last5_efg_avg"] = get_rolling_average(cat="eFG%", amount_num=5, df=giannis_2025_df)
giannis_feature_set_df["days_rest"] = (giannis_2025_df["Date"] - giannis_2025_df["Date"].shift(1)).dt.days
giannis_feature_set_df.head(10)

Unnamed: 0,last5_pts_avg,last5_min_avg,last5_fga_avg,last5_3pa_avg,last5_fta_avg,last1_min_played,last5_efg_avg,days_rest
0,,,,,,,,
1,,,,,,31.2,,2.0
2,,,,,,35.133333,,2.0
3,,,,,,33.316667,,1.0
4,,,,,,39.083333,,3.0
5,30.4,34.45,18.6,0.6,11.4,33.516667,0.6652,2.0
6,32.2,35.983333,21.8,0.6,9.6,38.866667,0.631,2.0
7,30.75,36.195833,21.5,0.5,8.0,,0.62575,3.0
8,33.0,35.416667,24.0,0.5,8.25,30.2,0.59775,1.0
9,31.5,33.695833,22.75,0.25,8.25,32.2,0.59425,2.0
