## Sports Betting Model

In [1]:
# import statements

import numpy as np
import pandas as pd

from sklearn.pipeline import Pipeline
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import GridSearchCV
from sklearn.linear_model import Lasso

from sklearn.neural_network import MLPRegressor

from sklearn.neural_network import MLPClassifier

from sklearn import metrics
import statistics
import scipy.stats

import selenium 
from bs4 import BeautifulSoup

### Functions

In [2]:
# given regression model, returns r^2 score, mean squared error, and mean absolute error

def evaluate_regression_model(model, X_true, y_true):
    
    model.fit(X_true, y_true)
    y_pred = model.predict(X_true)
    
    # optimal return is 1, 0, 0
    r2 = metrics.r2_score(y_true, y_pred)
    mse = metrics.mean_squared_error(y_true, y_pred) 
    mae = metrics.mean_absolute_error(y_true, y_pred)
    return r2, mse, mae

In [3]:
# given classification model, returns accuracy, precision, recall

def evaluate_classification_model(model_type, X_true, y_true):
    
    model.fit(X_true, y_true)
    y_pred = model.predict(X_true)
    
    # optimal return is 1, 1, 1
    accuracy = metrics.accuracy_score(y_true, y_pred)
    precision = metrics.precision_score(y_true, y_pred) 
    recall = metrics.recall_score(y_true, y_pred)
    return accuracy, precision, recall

In [4]:
# returns True if bet would have hit and False otherwise

def hit(line, prediction, actual):
    
    if actual < line and prediction < line:
        return True
    elif actual > line and prediction > line:
        return True
    else:
        return False

In [5]:
# returns only selected features of X

def select_features(X):
    
    feature_selection_pipeline = Pipeline([
        ('scaler',StandardScaler()),
        ('model',Lasso())
    ])
    
    search = GridSearchCV(feature_selection_pipeline,
                          {'model__alpha':np.arange(0.1,10,0.1)},
                          cv = 5, scoring="neg_mean_squared_error",verbose=0
                         )
    search.fit(X, y)

    coefficients = search.best_estimator_.named_steps['model'].coef_
    importance = np.abs(coefficients)
    selected_features = np.array(features)[importance > 0]
    print("Selected:", selected_features)

    unselected_features = np.array(features)[importance == 0]
    print("Unselected:", unselected_features)

    return X[selected_features]

In [18]:
# return probability of OVER given line, estimate, mean_error, and stdev_error

def regression_probability(line, estimate, mean_error, stdev_error):
    
    z_score = ((line-estimate)-mean_error) / stdev_error
    return 1-scipy.stats.norm.cdf(z_score) 

### Load Data

#### Player Data

In [7]:
# Load Data

from sklearn.datasets import fetch_california_housing
X, y = fetch_california_housing(return_X_y=True, as_frame=True)

In [8]:
features = fetch_california_housing()['feature_names']
features

['MedInc',
 'HouseAge',
 'AveRooms',
 'AveBedrms',
 'Population',
 'AveOccup',
 'Latitude',
 'Longitude']

In [9]:
# Scale Data

X = pd.DataFrame(StandardScaler().fit_transform(X), columns=features)

In [10]:
X.head(5)

Unnamed: 0,MedInc,HouseAge,AveRooms,AveBedrms,Population,AveOccup,Latitude,Longitude
0,2.344766,0.982143,0.628559,-0.153758,-0.974429,-0.049597,1.052548,-1.327835
1,2.332238,-0.607019,0.327041,-0.263336,0.861439,-0.092512,1.043185,-1.322844
2,1.782699,1.856182,1.15562,-0.049016,-0.820777,-0.025843,1.038503,-1.332827
3,0.932968,1.856182,0.156966,-0.049833,-0.766028,-0.050329,1.038503,-1.337818
4,-0.012881,1.856182,0.344711,-0.032906,-0.759847,-0.085616,1.038503,-1.337818


In [11]:
y.head(5)

0    4.526
1    3.585
2    3.521
3    3.413
4    3.422
Name: MedHouseVal, dtype: float64

#### Betting Data

In [12]:
pass

### Feature Selection

In [13]:
# X = select_features(X)

### Test Various Models

https://scikit-learn.org/stable/supervised_learning.html

In [14]:
# Test Regression Models

model_input = X
model_output = y

MLPRegressor_model = MLPRegressor()

regression_models = [MLPRegressor_model]

print("Model (R2, MSE, MAE)")
for model in regression_models:
    print(model, evaluate_regression_model(model, model_input, model_output))

Model (R2, MSE, MAE)
MLPRegressor() (0.7844539861651083, 0.2870103594032874, 0.36228508119641123)


In [15]:
# Test Classification Models

model_input = X
model_output = [1] * len(y)

MLPClassifier_model = MLPClassifier()

classification_models = [MLPClassifier_model]

print("Model (Accuracy, Precision, Recall)")
for model in classification_models:
    print(model, evaluate_classification_model(model, model_input, model_output))

Model (Accuracy, Precision, Recall)
MLPClassifier() (1.0, 1.0, 1.0)


### Make Predictions

In [24]:
# Regression-Based (Estimate, Mean Error, Standard Deviation Error)

model_input = X
model_output = y

model = MLPRegressor_model
model_predictions = model.predict(model_input)

errors = model_predictions - model_output
mean_error = statistics.mean(errors)
stdev_error = statistics.stdev(errors)

round(regression_probability(25, 24, 0, 1)*100, 2)

15.87

In [20]:
# Classification-Based (Class, Probability)

model_input = X
model_output = [1] * len(y)

model = MLPClassifier_model
model_predictions = model.predict(model_input)
model_probabilities = [round(max(vals)*100, 2) for vals in model.predict_proba(model_input)]

### Strategies

1. Profitability (calculate bet EV based on betting odds + probability estimation)
2. Probability (anything above a probability threshold)