# Modeling

In [17]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.svm import LinearSVC
from sklearn.preprocessing import StandardScaler
from sklearn.linear_model import LogisticRegression, LinearRegression, Lasso, LassoCV
from sklearn.model_selection import GridSearchCV, train_test_split, cross_val_score
from xgboost import XGBClassifier
from sklearn.ensemble import RandomForestClassifier, ExtraTreesClassifier
from sklearn.naive_bayes import MultinomialNB, GaussianNB

## Read In Data

In [3]:
rockets_log_num = pd.read_csv('./data/rockets_log_num.csv')

In [4]:
rockets_log_num.head()

Unnamed: 0,HOME/AWAY,FG,FGA,FG%,3P,3PA,3P%,FT,FTA,FT%,TRB,AST,PTS,GAME_SCORE,+/-
0,0,5,12,0.417,3,8,0.375,0,0,0.0,8,1,13,11.4,8
1,0,2,7,0.286,2,7,0.286,4,4,1.0,5,0,10,7.4,5
2,1,1,4,0.25,1,4,0.25,0,0,0.0,10,1,3,4.7,19
3,1,2,7,0.286,1,4,0.25,0,0,0.0,6,2,5,3.7,12
4,0,5,11,0.455,4,9,0.444,0,0,0.0,1,0,14,9.3,-1


## Baseline Model

In [6]:
rockets_log_num['3P'].value_counts(normalize=True)

0    0.362069
3    0.150862
1    0.146552
2    0.118534
4    0.103448
5    0.058190
6    0.047414
7    0.008621
8    0.004310
Name: 3P, dtype: float64

## Split Data

In [7]:
X = rockets_log_num.drop(columns= ['3P', '3P%'])
y = rockets_log_num['3P']

## Train/Test Split Data

In [18]:
X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=42, stratify=y)

## Scale Data

In [19]:
sc = StandardScaler()
Z_train = sc.fit_transform(X_train)
Z_test = sc.transform(X_test)

## XGBoost

In [27]:
params = {
    'n_estimators': [50, 75, 100, 125],
    'max_features': [None, 'auto', 'log2'],
    'max_depth': [2, 3, 4]
}

sXGB = XGBClassifier()
gXGB = GridSearchCV(sXGB, params, n_jobs=4)
gXGB.fit(Z_train, y_train);



In [28]:
print(f"XGBoost Training Accuracy: {gXGB.score(Z_train, y_train)} ")
print(f"XGBoost Testing Accuracy: {gXGB.score(Z_test, y_test)} ")

XGBoost Training Accuracy: 1.0 
XGBoost Testing Accuracy: 0.603448275862069 


In [29]:
gXGB.best_params_


{'max_depth': 3, 'max_features': None, 'n_estimators': 50}

## Linear Regression

In [30]:
lr = LinearRegression()


In [31]:
lr.fit(Z_train, y_train)

LinearRegression()

In [33]:
lr.score(Z_train, y_train)

1.0

In [34]:
lr.score(Z_test, y_test)

1.0

## LASSO Regression