# Predicting NHL Game Outcomes

## Sandbox Notebook

Working notebook 
When components (e.g. scraping functions) are working, move them to eventual production home

In [1]:
# Standard Packages
import pandas as pd
import numpy as np
import requests
import re
import time
import os
import warnings

# Viz Packages
import seaborn as sns
import matplotlib.pyplot as plt
%matplotlib inline 

# Modeling Packages
## Modeling Prep
from sklearn.model_selection import train_test_split, cross_val_score, cross_validate, KFold, \
GridSearchCV, RandomizedSearchCV

## SKLearn Data Prep Modules
from sklearn.preprocessing import StandardScaler, OneHotEncoder, LabelEncoder, \
PolynomialFeatures, PowerTransformer, Normalizer, MaxAbsScaler

from sklearn.impute import SimpleImputer

## SKLearn Classification Models
from sklearn.linear_model import LogisticRegression, Ridge, Lasso, ElasticNet
from sklearn.neighbors import KNeighborsClassifier
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import BaggingClassifier, RandomForestClassifier,\
ExtraTreesClassifier, VotingClassifier, StackingRegressor

## SKLearn Pipeline Setup
from sklearn.pipeline import Pipeline
from sklearn.compose import ColumnTransformer

## SKLearn Model Optimization
from sklearn.feature_selection import RFE, f_regression

## Boosting
from xgboost import XGBRegressor
from xgboost import XGBClassifier

## SKLearn Metrics
### Classification Scoring/Evaluation
from sklearn.metrics import classification_report, accuracy_score, recall_score, precision_score, f1_score, \
ConfusionMatrixDisplay, log_loss, confusion_matrix, RocCurveDisplay, make_scorer, roc_auc_score

In [2]:
# Notebook Config
## Suppress Python Warnings (Future, Deprecation)
warnings.filterwarnings("ignore", category= FutureWarning)
warnings.filterwarnings("ignore", category=DeprecationWarning)

## Suppress Pandas Warnings (SettingWithCopy)
pd.options.mode.chained_assignment = None

## Pandas Display Config
pd.options.display.max_columns = None
pd.options.display.width = None

## Display SKLearn estimators as diagrams
from sklearn import set_config
set_config(display= 'diagram')

In [3]:
# Scraping Requirents
from bs4 import BeautifulSoup


In [None]:
# Run Baseline Model with all available features in our dataset
all_features = 

In [None]:
# Logistic Regression 

# Parameters
c = [0.1, 1, 10, 100]
max_iter = [100, 1000]
solver = ['liblinear']
penalty = ['l1', 'l2']

logreg_params = {
    'logreg__C': c,
    'logreg__max_iter': max_iter,
    'logreg__solver': solver,
    'logreg__penalty': penalty
}

# Model
logreg_top_model_pipe = Pipeline(steps=[("logreg", LogisticRegression(n_jobs=1))], verbose=False)
logreg_top_model = GridSearchCV(estimator=logreg_top_model_pipe, param_grid=logreg_params, scoring='accuracy', cv=5, verbose=0)
logreg_top_model.fit(X_train, y_train)

# Displaying Data
y_pred = logreg_top_model.predict(X_test)
logreg_top_score = logreg_top_model.score(X_test, y_test)

# Model Selection
for i in range(0, 11):
    print(f"Model {i}. Current Top Score: {logreg_top_score}")
        
    # Split X and y
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.20, shuffle=True)
    
    # Model Building
    logreg_cur_pipe = Pipeline(steps=[("logreg", LogisticRegression(n_jobs=1))], verbose=False)
    logreg_cur_gs = GridSearchCV(estimator=logreg_cur_pipe, param_grid=logreg_params, scoring='accuracy', cv=5, verbose=0)
    logreg_cur_gs.fit(X_train, y_train)
    
    # Comparing and Replacing Data
    y_pred = logreg_cur_gs.predict(X_test)
    logreg_cur_score = logreg_cur_gs.score(X_test, y_test)
    
    if logreg_cur_score > logreg_top_score:
        logreg_top_model = logreg_cur_gs
        logreg_top_score = logreg_cur_score