In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
%matplotlib inline
import seaborn as sns

In [None]:
nba = pd.read_csv('data/nba18-19.csv', index_col=0)
nba.index.name = None

In [None]:
nba.head(5)

In [None]:
nba[['FG', 'AST', '3PA', 'PTS']]

In [None]:
X = nba[['FG', 'AST', '3PA']]
X.insert(0, 'Bias', 1)
X

In [None]:
X = X.to_numpy()
X.shape

In [None]:
Y = nba[["PTS"]].to_numpy()
n = len(Y)
print("number datapoints", n)
Y[:5]

In [None]:
theta_arbitrary = np.array([[0.5], [-1.14], [0.65], [1.52]])
theta_arbitrary

In [None]:
display((X @ theta_arbitrary)[:5])

display(Y[:5])

In [None]:
theta_arbitrary = np.array([0.5, -1.14, 0.65, 1.52])
def mse_nba(theta):
  return (1/n) * (np.linalg.norm(Y - X @ theta) ** 2)

mse_nba(theta_arbitrary)

In [None]:
from numpy.linalg import inv

In [None]:
def least_squares_estimate(X, Y):
  return inv(X.T @ X) @ X.T @ Y

theta_hat = least_squares_estimate(X, Y)
theta_hat

In [None]:
print("Arbitrary theta MSE:", mse_nba(theta_arbitrary))
print("Optimal theta MSE:" mse_nba(theta_hat))

In [None]:
Y_hat = X @ theta_hat

In [None]:
sns.scatterplot(x = Y_hat.flatten(), y = (Y - Y_hat).flatten())
sns.lineplot(x=[-1, 35], y=[0,0], label="$\hat{Y}=Y$", color = 'orange', linewidth = 3)
plt.xlabel("$\hat{Y}$")
plt.ylabel("$\hat{Y} - Y$")
plt.xlim([0,32])
plt.show()

In [None]:
r2_ast_fg_3pa = np.var(Y_hat) / np.var(Y)
r2_ast_fg_3pa

In [None]:
X_3d = nba[['AST', '3PA']]
X_3d.insert(0, 'Bias', 1)
X_3d = X_3d.to_numpy()

theta_ast_3pa = least_squares_estimate(X_3d, Y)
Y_hat_ast_3pa = X_3d @ theta_ast_3pa

r2_ast_3pa = np.var(Y_hat_ast_3pa) / np.var(Y)
r2_ast_3pa

In [None]:
X_slr = nba[['AST']]
X_slr.insert(0, 'Bias', 1)
X_slr = X_slr.to_numpy()

theta_ast_only = least_squares_estimate(X_slr, Y)
Y_hat_ast_only = X_slr @ theta_ast_only

r2_ast_only = np.var(Y_hat_ast_only) / np.var(Y)
r2_ast_only

In [None]:
theta_ast_only

In [None]:
theta_hat

In [None]:
print("(SLR) intercept, AST:    ", r2_ast_only)
print("intercept, 3PA, AST:     ", r2_ast_3pa)
print("intercept, FG, 3PA, AST: ", r2_ast_fg_3pa)

In [None]:
r = np.corrcoef(nba['AST'], nba['PTS'])[0,1]
r ** 2