In [2]:
import sys
sys.path.append('../')

import pandas as pd 
import numpy as np
from sklearn.neighbors import KernelDensity
from model.feature_engineering import generate_features
from sklearn.model_selection import train_test_split
import numpy as np
from nba.utils import load_config, retrieve_data
import matplotlib.pyplot as plt
import datetime as dt
from model.generate_pmf import build_kde
# https://kdepy.readthedocs.io/en/latest/examples.html

In [3]:
query = "SELECT * FROM basketball.stat_sheet ss"
data = retrieve_data(query)
input_data = pd.Series({
    "player": "Zion Williamson",
    "team": "NOP",
    "opponent": "MIA",
    "home": "NOP",
    "date": dt.date(2023, 6, 1),
    "points": 15,
    "total_rebounds": 8,
    "assists": 3
})
features = ["same_opponent", "same_player", "same_venue_status", "days_since_game"]
weights = [1] * len(features)

In [4]:
kde = build_kde(data, input_data, features, weights, bandwidth=0.5)

In [5]:
# # goal here is to build out the loss function
# # want to buidl out maybe an 80 15 5 split of train_kde, train_weights, test
# help(kde.score_samples)

In [6]:
arr = np.array(input_data[["points", "total_rebounds", "assists"]])

In [7]:
kde.score_samples(arr.reshape(1, -1))

array([-6.61573828])

KDE probability with weighting. In our case, the $w_i(\theta)$ is just a linear combination.
$$
p(x;\theta) = \frac{1}{\sum_{i=1}^Nw_i(\theta)}\sum_{i=1}^Nw_i(\theta)K\Big(\frac{x-x_i}{\sigma}\Big)
$$
Loss function
$$
L = -\text{log } p(x_{new};\theta)
$$
Gradient of loss function
$$
\nabla_\theta L(\theta) = -\frac{1}{p(x_{new};\theta)} \cdot \frac{1}{\sum_{i=1}^Nw_i(\theta)}\Big(\sum_{i=1}^N\nabla_\theta w_i(\theta) K\big(\frac{x_{new}-x_i}{\sigma}\big) - \frac{p(x_{new};\theta)}{\sum_{j=1}^Nw_j(\theta)}\sum_{i=1}^Nw_i(\theta) K\big(\frac{x_{new}-x_i}{\sigma}\big)\sum_{k=1}^N\nabla_\theta w_k(\theta)\Big)
$$
which is a little hairy. Let's break it down below.

We have defined $$p(x;\theta)$$ easily. This is just the score of the KDE, so the first term is done.
The second term is the sum of the weights, also easy.
What is $$\nabla_\theta w_i(\theta)$$? it's just $$\theta^T \phi_i$$

Kernel evaluation is also easy.
N in this case is the number of training data.
First of all, what dimension is this? Nabla with respect to theta should be of size theta.

In [8]:
p_xnew = np.exp(kde.score_samples(np.array(input_data[["points", "total_rebounds", "assists"]]).reshape(-1, 3)))

# nabla_wi is the weight of each point
nabla_wi = data[features]  * weights
# sum of the weights
resampling_weights = (nabla_wi).sum(axis=1)
sum_wi = sum(resampling_weights)


In [9]:
pra = np.array(input_data[["points", "total_rebounds", "assists"]])

In [10]:
max_pts = data["points"].max()
max_rb = data["total_rebounds"].max()
max_ast = data["assists"].max()
pra_norm = np.divide(pra, np.array([max_pts, max_rb, max_ast]))
np.linalg.norm(pra_norm)

0.3624501514710004

In [11]:
def epanechnikov(u):
    return 3/4*(1-u**2)

epanechnikov(np.linalg.norm(pra_norm))

0.6514724157739866

In [None]:
# TODO: figure out this part
sigma = 1

In [None]:
z_term = (input_data - arr)/sigma

$$
\nabla_\theta L(\theta) = -\frac{1}{p(x_{new};\theta)} \cdot \frac{1}{\sum_{i=1}^Nw_i(\theta)}\Big(\sum_{i=1}^N\nabla_\theta w_i(\theta) K\big(\frac{x_{new}-x_i}{\sigma}\big) - \frac{p(x_{new};\theta)}{\sum_{j=1}^Nw_j(\theta)}\sum_{i=1}^Nw_i(\theta) K\big(\frac{x_{new}-x_i}{\sigma}\big)\sum_{k=1}^N\nabla_\theta w_k(\theta)\Big)
$$

In [12]:
-1/p_xnew * 1/sum_wi * (np.sum(nabla_wi * epanechnikov(np.linalg.norm(z_term))) - p_xnew/sum_wi)

same_opponent        -27.650890
same_player           -0.966562
same_venue_status   -386.516258
days_since_game     -331.622134
dtype: float64

In [13]:
nabla_wi.sum(axis=0)

same_opponent         3061.000000
same_player            107.000000
same_venue_status    42788.000000
days_since_game      36711.128163
dtype: float64