## Sports Betting Model

In [17]:
# import statements

import numpy as np
import pandas as pd

from sklearn.linear_model import LassoCV
from sklearn.neural_network import MLPRegressor
from sklearn import metrics
from sklearn.preprocessing import StandardScaler
from sklearn.pipeline import Pipeline
from sklearn.model_selection import GridSearchCV
from sklearn.linear_model import Lasso

import selenium 
from bs4 import BeautifulSoup

### Functions

In [2]:
# given type of model, returns explained variance score and mean squared error

def train_and_test_model(model_type, X_true, y_true):
    
    model = model_type().fit(X_true, y_true)
    y_pred = model.predict(X_true)
    
    # optimal return is 1, 0
    return metrics.explained_variance_score(y_true, y_pred), metrics.mean_squared_error(y_true, y_pred)

In [12]:
# returns True if bet would have hit and False otherwise

def hit(line, prediction, actual):
    
    if actual < line and prediction < line:
        return True
    elif actual > line and prediction > line:
        return True
    else:
        return False

### Load Data

In [3]:
# Load Data

from sklearn.datasets import fetch_california_housing
X, y = fetch_california_housing(return_X_y=True, as_frame=True)

In [23]:
features = fetch_california_housing()['feature_names']
features

['MedInc',
 'HouseAge',
 'AveRooms',
 'AveBedrms',
 'Population',
 'AveOccup',
 'Latitude',
 'Longitude']

In [4]:
X.head(5)

Unnamed: 0,MedInc,HouseAge,AveRooms,AveBedrms,Population,AveOccup,Latitude,Longitude
0,8.3252,41.0,6.984127,1.02381,322.0,2.555556,37.88,-122.23
1,8.3014,21.0,6.238137,0.97188,2401.0,2.109842,37.86,-122.22
2,7.2574,52.0,8.288136,1.073446,496.0,2.80226,37.85,-122.24
3,5.6431,52.0,5.817352,1.073059,558.0,2.547945,37.85,-122.25
4,3.8462,52.0,6.281853,1.081081,565.0,2.181467,37.85,-122.25


In [5]:
y.head(5)

0    4.526
1    3.585
2    3.521
3    3.413
4    3.422
Name: MedHouseVal, dtype: float64

### Feature Selection

In [19]:
feature_selection_pipeline = Pipeline([
    ('scaler',StandardScaler()),
    ('model',Lasso())
])

In [20]:
search = GridSearchCV(feature_selection_pipeline,
                      {'model__alpha':np.arange(0.1,10,0.1)},
                      cv = 5, scoring="neg_mean_squared_error",verbose=3
                     )
search.fit(X,y)

Fitting 5 folds for each of 99 candidates, totalling 495 fits
[CV] model__alpha=0.1 ................................................
[CV] ................... model__alpha=0.1, score=-0.585, total=   0.0s
[CV] model__alpha=0.1 ................................................
[CV] ................... model__alpha=0.1, score=-0.700, total=   0.0s
[CV] model__alpha=0.1 ................................................
[CV] ................... model__alpha=0.1, score=-0.761, total=   0.0s
[CV] model__alpha=0.1 ................................................
[CV] ................... model__alpha=0.1, score=-0.816, total=   0.0s
[CV] model__alpha=0.1 ................................................
[CV] ................... model__alpha=0.1, score=-0.696, total=   0.0s
[CV] model__alpha=0.2 ................................................
[CV] ................... model__alpha=0.2, score=-0.651, total=   0.0s
[CV] model__alpha=0.2 ................................................
[CV] ..........

[Parallel(n_jobs=1)]: Using backend SequentialBackend with 1 concurrent workers.
[Parallel(n_jobs=1)]: Done   1 out of   1 | elapsed:    0.0s remaining:    0.0s
[Parallel(n_jobs=1)]: Done   2 out of   2 | elapsed:    0.0s remaining:    0.0s


[CV] ................... model__alpha=0.6, score=-0.941, total=   0.0s
[CV] model__alpha=0.6 ................................................
[CV] ................... model__alpha=0.6, score=-1.261, total=   0.0s
[CV] model__alpha=0.6 ................................................
[CV] ................... model__alpha=0.6, score=-1.046, total=   0.0s
[CV] model__alpha=0.6 ................................................
[CV] ................... model__alpha=0.6, score=-1.223, total=   0.0s
[CV] model__alpha=0.7000000000000001 .................................
[CV] .... model__alpha=0.7000000000000001, score=-1.178, total=   0.0s
[CV] model__alpha=0.7000000000000001 .................................
[CV] .... model__alpha=0.7000000000000001, score=-1.045, total=   0.0s
[CV] model__alpha=0.7000000000000001 .................................
[CV] .... model__alpha=0.7000000000000001, score=-1.434, total=   0.0s
[CV] model__alpha=0.7000000000000001 .................................
[CV] .

[CV] .... model__alpha=1.9000000000000001, score=-1.514, total=   0.0s
[CV] model__alpha=2.0 ................................................
[CV] ................... model__alpha=2.0, score=-1.306, total=   0.0s
[CV] model__alpha=2.0 ................................................
[CV] ................... model__alpha=2.0, score=-1.199, total=   0.0s
[CV] model__alpha=2.0 ................................................
[CV] ................... model__alpha=2.0, score=-1.573, total=   0.0s
[CV] model__alpha=2.0 ................................................
[CV] ................... model__alpha=2.0, score=-1.259, total=   0.0s
[CV] model__alpha=2.0 ................................................
[CV] ................... model__alpha=2.0, score=-1.514, total=   0.0s
[CV] model__alpha=2.1 ................................................
[CV] ................... model__alpha=2.1, score=-1.306, total=   0.0s
[CV] model__alpha=2.1 ................................................
[CV] .

[CV] ................... model__alpha=3.2, score=-1.259, total=   0.0s
[CV] model__alpha=3.2 ................................................
[CV] ................... model__alpha=3.2, score=-1.514, total=   0.0s
[CV] model__alpha=3.3000000000000003 .................................
[CV] .... model__alpha=3.3000000000000003, score=-1.306, total=   0.0s
[CV] model__alpha=3.3000000000000003 .................................
[CV] .... model__alpha=3.3000000000000003, score=-1.199, total=   0.0s
[CV] model__alpha=3.3000000000000003 .................................
[CV] .... model__alpha=3.3000000000000003, score=-1.573, total=   0.0s
[CV] model__alpha=3.3000000000000003 .................................
[CV] .... model__alpha=3.3000000000000003, score=-1.259, total=   0.0s
[CV] model__alpha=3.3000000000000003 .................................
[CV] .... model__alpha=3.3000000000000003, score=-1.514, total=   0.0s
[CV] model__alpha=3.4000000000000004 .................................
[CV] .

[CV] ................... model__alpha=4.5, score=-1.259, total=   0.0s
[CV] model__alpha=4.5 ................................................
[CV] ................... model__alpha=4.5, score=-1.514, total=   0.0s
[CV] model__alpha=4.6 ................................................
[CV] ................... model__alpha=4.6, score=-1.306, total=   0.0s
[CV] model__alpha=4.6 ................................................
[CV] ................... model__alpha=4.6, score=-1.199, total=   0.0s
[CV] model__alpha=4.6 ................................................
[CV] ................... model__alpha=4.6, score=-1.573, total=   0.0s
[CV] model__alpha=4.6 ................................................
[CV] ................... model__alpha=4.6, score=-1.259, total=   0.0s
[CV] model__alpha=4.6 ................................................
[CV] ................... model__alpha=4.6, score=-1.514, total=   0.0s
[CV] model__alpha=4.7 ................................................
[CV] .

[CV] ................... model__alpha=5.8, score=-1.199, total=   0.1s
[CV] model__alpha=5.8 ................................................
[CV] ................... model__alpha=5.8, score=-1.573, total=   0.0s
[CV] model__alpha=5.8 ................................................
[CV] ................... model__alpha=5.8, score=-1.259, total=   0.0s
[CV] model__alpha=5.8 ................................................
[CV] ................... model__alpha=5.8, score=-1.514, total=   0.0s
[CV] model__alpha=5.9 ................................................
[CV] ................... model__alpha=5.9, score=-1.306, total=   0.0s
[CV] model__alpha=5.9 ................................................
[CV] ................... model__alpha=5.9, score=-1.199, total=   0.0s
[CV] model__alpha=5.9 ................................................
[CV] ................... model__alpha=5.9, score=-1.573, total=   0.0s
[CV] model__alpha=5.9 ................................................
[CV] .

[CV] ................... model__alpha=7.0, score=-1.306, total=   0.0s
[CV] model__alpha=7.0 ................................................
[CV] ................... model__alpha=7.0, score=-1.199, total=   0.0s
[CV] model__alpha=7.0 ................................................
[CV] ................... model__alpha=7.0, score=-1.573, total=   0.0s
[CV] model__alpha=7.0 ................................................
[CV] ................... model__alpha=7.0, score=-1.259, total=   0.0s
[CV] model__alpha=7.0 ................................................
[CV] ................... model__alpha=7.0, score=-1.514, total=   0.0s
[CV] model__alpha=7.1 ................................................
[CV] ................... model__alpha=7.1, score=-1.306, total=   0.0s
[CV] model__alpha=7.1 ................................................
[CV] ................... model__alpha=7.1, score=-1.199, total=   0.0s
[CV] model__alpha=7.1 ................................................
[CV] .

[CV] ................... model__alpha=8.1, score=-1.514, total=   0.0s
[CV] model__alpha=8.2 ................................................
[CV] ................... model__alpha=8.2, score=-1.306, total=   0.0s
[CV] model__alpha=8.2 ................................................
[CV] ................... model__alpha=8.2, score=-1.199, total=   0.0s
[CV] model__alpha=8.2 ................................................
[CV] ................... model__alpha=8.2, score=-1.573, total=   0.0s
[CV] model__alpha=8.2 ................................................
[CV] ................... model__alpha=8.2, score=-1.259, total=   0.0s
[CV] model__alpha=8.2 ................................................
[CV] ................... model__alpha=8.2, score=-1.514, total=   0.0s
[CV] model__alpha=8.3 ................................................
[CV] ................... model__alpha=8.3, score=-1.306, total=   0.0s
[CV] model__alpha=8.3 ................................................
[CV] .

[CV] ................... model__alpha=9.8, score=-1.573, total=   0.0s
[CV] model__alpha=9.8 ................................................
[CV] ................... model__alpha=9.8, score=-1.259, total=   0.0s
[CV] model__alpha=9.8 ................................................
[CV] ................... model__alpha=9.8, score=-1.514, total=   0.0s
[CV] model__alpha=9.9 ................................................
[CV] ................... model__alpha=9.9, score=-1.306, total=   0.0s
[CV] model__alpha=9.9 ................................................
[CV] ................... model__alpha=9.9, score=-1.199, total=   0.0s
[CV] model__alpha=9.9 ................................................
[CV] ................... model__alpha=9.9, score=-1.573, total=   0.0s
[CV] model__alpha=9.9 ................................................
[CV] ................... model__alpha=9.9, score=-1.259, total=   0.0s
[CV] model__alpha=9.9 ................................................
[CV] .

[Parallel(n_jobs=1)]: Done 495 out of 495 | elapsed:    3.4s finished


GridSearchCV(cv=5,
             estimator=Pipeline(steps=[('scaler', StandardScaler()),
                                       ('model', Lasso())]),
             param_grid={'model__alpha': array([0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 1. , 1.1, 1.2, 1.3,
       1.4, 1.5, 1.6, 1.7, 1.8, 1.9, 2. , 2.1, 2.2, 2.3, 2.4, 2.5, 2.6,
       2.7, 2.8, 2.9, 3. , 3.1, 3.2, 3.3, 3.4, 3.5, 3.6, 3.7, 3.8, 3.9,
       4. , 4.1, 4.2, 4.3, 4.4, 4.5, 4.6, 4.7, 4.8, 4.9, 5. , 5.1, 5.2,
       5.3, 5.4, 5.5, 5.6, 5.7, 5.8, 5.9, 6. , 6.1, 6.2, 6.3, 6.4, 6.5,
       6.6, 6.7, 6.8, 6.9, 7. , 7.1, 7.2, 7.3, 7.4, 7.5, 7.6, 7.7, 7.8,
       7.9, 8. , 8.1, 8.2, 8.3, 8.4, 8.5, 8.6, 8.7, 8.8, 8.9, 9. , 9.1,
       9.2, 9.3, 9.4, 9.5, 9.6, 9.7, 9.8, 9.9])},
             scoring='neg_mean_squared_error', verbose=3)

In [28]:
coefficients = search.best_estimator_.named_steps['model'].coef_
importance = np.abs(coefficients)
selected_features = np.array(features)[importance > 0]
print(selected_features)

['MedInc' 'HouseAge' 'Latitude']


In [29]:
unselected_features = np.array(features)[importance == 0]
print(unselected_features)

['AveRooms' 'AveBedrms' 'Population' 'AveOccup' 'Longitude']


In [31]:
X = X[selected_features]
X.head(5)

Unnamed: 0,MedInc,HouseAge,Latitude
0,8.3252,41.0,37.88
1,8.3014,21.0,37.86
2,7.2574,52.0,37.85
3,5.6431,52.0,37.85
4,3.8462,52.0,37.85


### Models

In [32]:
# Train and Test Models

print(train_and_test_model(MLPRegressor, X, y))

(0.5484514539963422, 0.601357832311774)
