# Generate xFG using defensive features as well


In [1]:
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.pipeline import Pipeline
from sklearn.impute import SimpleImputer
from sklearn.preprocessing import StandardScaler
from sklearn.linear_model import Ridge

In [2]:
shots_df = pd.read_csv("../data/processed/def_variables.csv")
shots_df.head()


Unnamed: 0,GAME_ID,GAME_EVENT_ID,PERIOD,MINUTES_REMAINING,SECONDS_REMAINING,SHOT_MADE_FLAG,LOC_X,LOC_Y,PLAYER_ID,TEAM_ID,...,def_accel_mean,def_accel_max,shooter_speed_mean,shooter_speed_max,shooter_accel_mean,shooter_accel_max,window_frames,game_clock_release,shot_clock_release,release_idx
0,21500622,8,1,10,17,0,-25.0,219.0,201567,1610612739,...,3.020805,7.275966,16.081145,17.255065,3.380943,7.433706,26,617.03,21.72,164
1,21500622,10,1,10,10,1,168.0,178.0,202691,1610612744,...,5.548112,10.658684,9.37605,12.851462,10.286402,24.658259,26,610.24,24.0,334
2,21500622,28,1,8,36,0,-52.0,244.0,2544,1610612739,...,1.951771,4.857779,11.89017,12.715391,1.671228,3.731838,26,516.01,22.57,359
3,21500622,30,1,8,23,0,-135.0,214.0,203110,1610612744,...,4.650577,11.870451,10.710293,13.155977,6.231974,10.759305,26,503.01,23.17,309
4,21500622,34,1,8,6,0,22.0,51.0,101106,1610612744,...,4.505002,13.466623,13.65364,15.155399,3.536798,7.147216,26,486.02,22.94,163


### 1) Choose Columns


In [3]:
target = shots_df["SHOT_MADE_FLAG"] - shots_df["xFG_base"]

defense_columns = [
    "close_def_dist_release",
    "close_def_closing_speed_mean",
    "def_speed_mean",
    "def_accel_mean",
    "shooter_speed_mean",
    "shooter_accel_mean",
]

base_columns = ["xFG_base"]  # shot prior / shooter+location prior
feature_columns = base_columns + defense_columns


### 2) Define Learning Target
- "defense effect beyond expectation"  
- residual > 0 means shot went better than xFG expected

In [4]:
shots = shots_df.dropna(subset=["SHOT_MADE_FLAG", "xFG_base"]).copy()
shots["residual"] = shots["SHOT_MADE_FLAG"].astype(float) - shots["xFG_base"].astype(float)

### 3) Fit a smooth model of residual from (xFG_base + defense state)

In [5]:
X = shots[feature_columns]
y = shots["residual"]

X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.2, random_state=42
)

residual_model = Pipeline(steps=[
    ("impute", SimpleImputer(strategy="median")),
    ("scale", StandardScaler()),
    ("ridge", Ridge(alpha=1.0))
])

residual_model.fit(X_train, y_train)

0,1,2
,"steps  steps: list of tuples List of (name of step, estimator) tuples that are to be chained in sequential order. To be compatible with the scikit-learn API, all steps must define `fit`. All non-last steps must also define `transform`. See :ref:`Combining Estimators ` for more details.","[('impute', ...), ('scale', ...), ...]"
,"transform_input  transform_input: list of str, default=None The names of the :term:`metadata` parameters that should be transformed by the pipeline before passing it to the step consuming it. This enables transforming some input arguments to ``fit`` (other than ``X``) to be transformed by the steps of the pipeline up to the step which requires them. Requirement is defined via :ref:`metadata routing `. For instance, this can be used to pass a validation set through the pipeline. You can only set this if metadata routing is enabled, which you can enable using ``sklearn.set_config(enable_metadata_routing=True)``. .. versionadded:: 1.6",
,"memory  memory: str or object with the joblib.Memory interface, default=None Used to cache the fitted transformers of the pipeline. The last step will never be cached, even if it is a transformer. By default, no caching is performed. If a string is given, it is the path to the caching directory. Enabling caching triggers a clone of the transformers before fitting. Therefore, the transformer instance given to the pipeline cannot be inspected directly. Use the attribute ``named_steps`` or ``steps`` to inspect estimators within the pipeline. Caching the transformers is advantageous when fitting is time consuming. See :ref:`sphx_glr_auto_examples_neighbors_plot_caching_nearest_neighbors.py` for an example on how to enable caching.",
,"verbose  verbose: bool, default=False If True, the time elapsed while fitting each step will be printed as it is completed.",False

0,1,2
,"missing_values  missing_values: int, float, str, np.nan, None or pandas.NA, default=np.nan The placeholder for the missing values. All occurrences of `missing_values` will be imputed. For pandas' dataframes with nullable integer dtypes with missing values, `missing_values` can be set to either `np.nan` or `pd.NA`.",
,"strategy  strategy: str or Callable, default='mean' The imputation strategy. - If ""mean"", then replace missing values using the mean along  each column. Can only be used with numeric data. - If ""median"", then replace missing values using the median along  each column. Can only be used with numeric data. - If ""most_frequent"", then replace missing using the most frequent  value along each column. Can be used with strings or numeric data.  If there is more than one such value, only the smallest is returned. - If ""constant"", then replace missing values with fill_value. Can be  used with strings or numeric data. - If an instance of Callable, then replace missing values using the  scalar statistic returned by running the callable over a dense 1d  array containing non-missing values of each column. .. versionadded:: 0.20  strategy=""constant"" for fixed value imputation. .. versionadded:: 1.5  strategy=callable for custom value imputation.",'median'
,"fill_value  fill_value: str or numerical value, default=None When strategy == ""constant"", `fill_value` is used to replace all occurrences of missing_values. For string or object data types, `fill_value` must be a string. If `None`, `fill_value` will be 0 when imputing numerical data and ""missing_value"" for strings or object data types.",
,"copy  copy: bool, default=True If True, a copy of X will be created. If False, imputation will be done in-place whenever possible. Note that, in the following cases, a new copy will always be made, even if `copy=False`: - If `X` is not an array of floating values; - If `X` is encoded as a CSR matrix; - If `add_indicator=True`.",True
,"add_indicator  add_indicator: bool, default=False If True, a :class:`MissingIndicator` transform will stack onto output of the imputer's transform. This allows a predictive estimator to account for missingness despite imputation. If a feature has no missing values at fit/train time, the feature won't appear on the missing indicator even if there are missing values at transform/test time.",False
,"keep_empty_features  keep_empty_features: bool, default=False If True, features that consist exclusively of missing values when `fit` is called are returned in results when `transform` is called. The imputed value is always `0` except when `strategy=""constant""` in which case `fill_value` will be used instead. .. versionadded:: 1.2",False

0,1,2
,"copy  copy: bool, default=True If False, try to avoid a copy and do inplace scaling instead. This is not guaranteed to always work inplace; e.g. if the data is not a NumPy array or scipy.sparse CSR matrix, a copy may still be returned.",True
,"with_mean  with_mean: bool, default=True If True, center the data before scaling. This does not work (and will raise an exception) when attempted on sparse matrices, because centering them entails building a dense matrix which in common use cases is likely to be too large to fit in memory.",True
,"with_std  with_std: bool, default=True If True, scale the data to unit variance (or equivalently, unit standard deviation).",True

0,1,2
,"alpha  alpha: {float, ndarray of shape (n_targets,)}, default=1.0 Constant that multiplies the L2 term, controlling regularization strength. `alpha` must be a non-negative float i.e. in `[0, inf)`. When `alpha = 0`, the objective is equivalent to ordinary least squares, solved by the :class:`LinearRegression` object. For numerical reasons, using `alpha = 0` with the `Ridge` object is not advised. Instead, you should use the :class:`LinearRegression` object. If an array is passed, penalties are assumed to be specific to the targets. Hence they must correspond in number.",1.0
,"fit_intercept  fit_intercept: bool, default=True Whether to fit the intercept for this model. If set to false, no intercept will be used in calculations (i.e. ``X`` and ``y`` are expected to be centered).",True
,"copy_X  copy_X: bool, default=True If True, X will be copied; else, it may be overwritten.",True
,"max_iter  max_iter: int, default=None Maximum number of iterations for conjugate gradient solver. For 'sparse_cg' and 'lsqr' solvers, the default value is determined by scipy.sparse.linalg. For 'sag' solver, the default value is 1000. For 'lbfgs' solver, the default value is 15000.",
,"tol  tol: float, default=1e-4 The precision of the solution (`coef_`) is determined by `tol` which specifies a different convergence criterion for each solver: - 'svd': `tol` has no impact. - 'cholesky': `tol` has no impact. - 'sparse_cg': norm of residuals smaller than `tol`. - 'lsqr': `tol` is set as atol and btol of scipy.sparse.linalg.lsqr,  which control the norm of the residual vector in terms of the norms of  matrix and coefficients. - 'sag' and 'saga': relative change of coef smaller than `tol`. - 'lbfgs': maximum of the absolute (projected) gradient=max|residuals|  smaller than `tol`. .. versionchanged:: 1.2  Default value changed from 1e-3 to 1e-4 for consistency with other linear  models.",0.0001
,"solver  solver: {'auto', 'svd', 'cholesky', 'lsqr', 'sparse_cg', 'sag', 'saga', 'lbfgs'}, default='auto' Solver to use in the computational routines: - 'auto' chooses the solver automatically based on the type of data. - 'svd' uses a Singular Value Decomposition of X to compute the Ridge  coefficients. It is the most stable solver, in particular more stable  for singular matrices than 'cholesky' at the cost of being slower. - 'cholesky' uses the standard :func:`scipy.linalg.solve` function to  obtain a closed-form solution. - 'sparse_cg' uses the conjugate gradient solver as found in  :func:`scipy.sparse.linalg.cg`. As an iterative algorithm, this solver is  more appropriate than 'cholesky' for large-scale data  (possibility to set `tol` and `max_iter`). - 'lsqr' uses the dedicated regularized least-squares routine  :func:`scipy.sparse.linalg.lsqr`. It is the fastest and uses an iterative  procedure. - 'sag' uses a Stochastic Average Gradient descent, and 'saga' uses  its improved, unbiased version named SAGA. Both methods also use an  iterative procedure, and are often faster than other solvers when  both n_samples and n_features are large. Note that 'sag' and  'saga' fast convergence is only guaranteed on features with  approximately the same scale. You can preprocess the data with a  scaler from :mod:`sklearn.preprocessing`. - 'lbfgs' uses L-BFGS-B algorithm implemented in  :func:`scipy.optimize.minimize`. It can be used only when `positive`  is True. All solvers except 'svd' support both dense and sparse data. However, only 'lsqr', 'sag', 'sparse_cg', and 'lbfgs' support sparse input when `fit_intercept` is True. .. versionadded:: 0.17  Stochastic Average Gradient descent solver. .. versionadded:: 0.19  SAGA solver.",'auto'
,"positive  positive: bool, default=False When set to ``True``, forces the coefficients to be positive. Only 'lbfgs' solver is supported in this case.",False
,"random_state  random_state: int, RandomState instance, default=None Used when ``solver`` == 'sag' or 'saga' to shuffle the data. See :term:`Glossary ` for details. .. versionadded:: 0.17  `random_state` to support Stochastic Average Gradient.",


### 4) Tight Guardness Feature

In [6]:
def add_guard_tightness_feature(
    df,
    model,
    dist_col="close_def_dist_release",
    eps=0.5,        # small distance perturbation
    scale=True
):
    df = df.copy()

    X = df[feature_columns].copy()

    # prediction at current distance
    pred_cur = model.predict(X)

    # prediction at slightly MORE space
    X_more = X.copy()
    X_more[dist_col] = X_more[dist_col] + eps
    pred_more = model.predict(X_more)

    # local sensitivity: how much worse defense gets per unit space
    # positive => giving space increases offensive advantage
    sensitivity = (pred_more - pred_cur) / eps

    # weight by shot danger
    guard_tightness = df["xFG_base"].values * sensitivity
    

    # optional rescale to something usable
    if scale:
        # robust scaling: map to roughly [0, 1]
        q1, q99 = np.nanpercentile(guard_tightness, [1, 99])
        if q99 > q1:
            guard_tightness = (guard_tightness - q1) / (q99 - q1)
            guard_tightness = np.clip(guard_tightness, 0.0, 1.0)

    df["guard_tightness"] = guard_tightness
    df["guard_tightness_rank"] = (df["guard_tightness"].rank(method="average", pct=True))   
    df["guard_sensitivity_raw"] = sensitivity
    return df


In [11]:
shots_with_feature = add_guard_tightness_feature(shots_df, residual_model)
shots_with_feature["guard_tightness_rank"].describe()
shots_with_feature["guard_tightness_rank"]


0     0.757895
1     0.694737
2     0.747368
3     0.810526
4     0.326316
        ...   
90    0.900000
91    0.900000
92    0.947368
93    0.189474
94    0.926316
Name: guard_tightness_rank, Length: 95, dtype: float64