#### Imports

In [13]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns

In [14]:
df_clean = pd.read_parquet("../data/cleaned.parquet")
fe_df = df_clean.copy()

In [15]:
#Normalize ppg, ast, trb per 36 minutes
eps = 1e-9
fe_df["pts_per_36"] = (fe_df["pts_per_g"] / (fe_df["mp_per_g"] + eps)) * 36
fe_df["ast_per_36"] = (fe_df["ast_per_g"] / (fe_df["mp_per_g"] + eps)) * 36
fe_df["trb_per_36"] = (fe_df["trb_per_g"] / (fe_df["mp_per_g"] + eps)) * 36

In [16]:
#Shot profile + scoring style
fe_df["fg3_share_of_pts"] = (fe_df["fg3_per_g"] * 3) / (fe_df["pts_per_g"] + eps)
fe_df["ft_share_of_pts"]  = fe_df["ft_per_g"] / (fe_df["pts_per_g"] + eps)

In [17]:
#Two way impact evaluation
fe_df["two_way_ws_balance"] = (fe_df["ows"] - fe_df["dws"])  # positive = offense-heavy
fe_df["two_way_bpm_balance"] = (fe_df["obpm"] - fe_df["dbpm"]) 

In [18]:
#Normalize key stats by season using z-score
season_key_cols = [
    "pts_per_g", "ts_pct", "usg_pct",
    "bpm", "vorp", "ws_per_48",
    "mp_per_g", "g",
    "win_loss_pct", "mov"
]

def zscore(s):
    return (s - s.mean()) / (s.std(ddof=0) + 1e-9)

for c in season_key_cols:
    fe_df[f"{c}_z_season"] = fe_df.groupby("season")[c].transform(zscore)

In [19]:
#Ranking player key stats by season
rank_cols = ["pts_per_g", "bpm", "vorp", "ws", "ts_pct", "win_loss_pct"]

for c in rank_cols:
    fe_df[f"{c}_rank_season"] = (
        fe_df.groupby("season")[c]
        .rank(ascending=False, method="average")
    )

In [20]:
#Total minutes load in a season
fe_df["minutes_load"] = fe_df["mp_per_g"] * fe_df["g"]

In [21]:
#Usage efficiency
fe_df["usage_efficiency"] = fe_df["usg_pct"] * fe_df["ts_pct"]

In [22]:
# Produce an artifact
fe_df.to_parquet("../data/features.parquet", index=False)