In [3]:
import numpy as np
import pandas as pd
from tqdm import tqdm
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import confusion_matrix

import sys
sys.path.append('./src/misc')
import tools
import sql_tools

In [4]:
path_to_data = "./src/sql/"
data = sql_tools.read_database(path_to_data+"trailing_database.db", "5_game_trailing")
data = data[data['O/U_line']!='']
data['O/U_result'] = data.apply(lambda row: tools.OU(row['O/U_line'],float(row['total'])),axis=1)

In [5]:
X,y = tools.model_preprocessing(data,("2016-01-10","2023-12-12"))

## Random Forest

In [4]:
n = 15
n_estimators = 50
n_trials = 10
OU_results = list(y[-n:])

acc_vals = []

print(f"Backtesting for {n} games with {n_estimators} estimators with {n_trials} trials.\n")
for trial in tqdm(range(0,n_trials)):
    pred = []
    for i in range(0,n):
        rf_model = RandomForestClassifier(n_estimators=n_estimators)
        
        X_train = X[n+1-i:]
        y_train = y[n+1-i:]
        
        X_test = X[n-i]
        y_test = y[n-i]
        
        rf_model.fit(X_train,y_train)
        yhat = rf_model.predict(X_test.reshape(1, -1))
        
        pred.append(yhat[0])

    acc = tools.score_results(OU_results,pred)
    acc_vals.append(acc)

  0%|          | 0/20 [00:00<?, ?it/s]

Trial 0


  5%|▌         | 1/20 [00:16<05:22, 16.98s/it]

predicted:  [1, 1, 1, 1, 1, 1, 0, 0, 0, 1]
true:       [1, 0, 0, 1, 0, 1, 1, 2, 0, 1]
accuracy:   50.0
Trial 1


 10%|█         | 2/20 [00:33<05:05, 16.98s/it]

predicted:  [0, 1, 1, 1, 0, 1, 0, 0, 0, 1]
true:       [1, 0, 0, 1, 0, 1, 1, 2, 0, 1]
accuracy:   50.0
Trial 2


 15%|█▌        | 3/20 [00:50<04:46, 16.86s/it]

predicted:  [0, 0, 0, 1, 0, 1, 0, 0, 0, 1]
true:       [1, 0, 0, 1, 0, 1, 1, 2, 0, 1]
accuracy:   70.0
Trial 3


 20%|██        | 4/20 [01:07<04:30, 16.88s/it]

predicted:  [0, 1, 0, 1, 0, 1, 0, 0, 0, 1]
true:       [1, 0, 0, 1, 0, 1, 1, 2, 0, 1]
accuracy:   60.0
Trial 4


 25%|██▌       | 5/20 [01:24<04:12, 16.85s/it]

predicted:  [0, 1, 1, 1, 0, 1, 0, 0, 0, 1]
true:       [1, 0, 0, 1, 0, 1, 1, 2, 0, 1]
accuracy:   50.0
Trial 5


 30%|███       | 6/20 [01:41<03:55, 16.83s/it]

predicted:  [0, 1, 0, 1, 0, 1, 0, 0, 0, 1]
true:       [1, 0, 0, 1, 0, 1, 1, 2, 0, 1]
accuracy:   60.0
Trial 6


 35%|███▌      | 7/20 [01:58<03:39, 16.86s/it]

predicted:  [0, 1, 1, 1, 0, 1, 0, 0, 0, 1]
true:       [1, 0, 0, 1, 0, 1, 1, 2, 0, 1]
accuracy:   50.0
Trial 7


 40%|████      | 8/20 [02:15<03:23, 16.92s/it]

predicted:  [0, 1, 0, 1, 0, 1, 0, 0, 0, 1]
true:       [1, 0, 0, 1, 0, 1, 1, 2, 0, 1]
accuracy:   60.0
Trial 8


 45%|████▌     | 9/20 [02:32<03:05, 16.91s/it]

predicted:  [0, 1, 1, 1, 0, 1, 0, 0, 0, 1]
true:       [1, 0, 0, 1, 0, 1, 1, 2, 0, 1]
accuracy:   50.0
Trial 9


 50%|█████     | 10/20 [02:49<02:49, 16.95s/it]

predicted:  [0, 1, 0, 1, 1, 1, 0, 0, 0, 1]
true:       [1, 0, 0, 1, 0, 1, 1, 2, 0, 1]
accuracy:   50.0
Trial 10


 55%|█████▌    | 11/20 [03:06<02:32, 16.95s/it]

predicted:  [0, 0, 1, 1, 0, 1, 0, 0, 0, 1]
true:       [1, 0, 0, 1, 0, 1, 1, 2, 0, 1]
accuracy:   60.0
Trial 11


 60%|██████    | 12/20 [03:22<02:15, 16.91s/it]

predicted:  [0, 1, 1, 1, 0, 1, 0, 0, 0, 1]
true:       [1, 0, 0, 1, 0, 1, 1, 2, 0, 1]
accuracy:   50.0
Trial 12


 65%|██████▌   | 13/20 [03:40<01:58, 16.99s/it]

predicted:  [0, 0, 0, 1, 0, 1, 0, 0, 0, 1]
true:       [1, 0, 0, 1, 0, 1, 1, 2, 0, 1]
accuracy:   70.0
Trial 13


 70%|███████   | 14/20 [03:57<01:42, 17.08s/it]

predicted:  [0, 1, 1, 1, 0, 1, 0, 0, 0, 1]
true:       [1, 0, 0, 1, 0, 1, 1, 2, 0, 1]
accuracy:   50.0
Trial 14


 75%|███████▌  | 15/20 [04:14<01:25, 17.04s/it]

predicted:  [0, 1, 1, 1, 1, 1, 0, 0, 0, 1]
true:       [1, 0, 0, 1, 0, 1, 1, 2, 0, 1]
accuracy:   40.0
Trial 15


 80%|████████  | 16/20 [04:31<01:07, 16.97s/it]

predicted:  [0, 1, 1, 1, 0, 1, 0, 0, 0, 1]
true:       [1, 0, 0, 1, 0, 1, 1, 2, 0, 1]
accuracy:   50.0
Trial 16


 85%|████████▌ | 17/20 [04:47<00:50, 16.84s/it]

predicted:  [0, 1, 1, 1, 1, 1, 0, 0, 0, 1]
true:       [1, 0, 0, 1, 0, 1, 1, 2, 0, 1]
accuracy:   40.0
Trial 17


 90%|█████████ | 18/20 [05:04<00:33, 16.72s/it]

predicted:  [0, 1, 1, 1, 0, 1, 0, 0, 0, 1]
true:       [1, 0, 0, 1, 0, 1, 1, 2, 0, 1]
accuracy:   50.0
Trial 18


 95%|█████████▌| 19/20 [05:21<00:16, 16.86s/it]

predicted:  [1, 0, 1, 1, 0, 1, 0, 0, 0, 1]
true:       [1, 0, 0, 1, 0, 1, 1, 2, 0, 1]
accuracy:   70.0
Trial 19


100%|██████████| 20/20 [05:38<00:00, 16.90s/it]

predicted:  [0, 1, 1, 1, 0, 1, 0, 0, 0, 1]
true:       [1, 0, 0, 1, 0, 1, 1, 2, 0, 1]
accuracy:   50.0





In [11]:
overall_acc = sum(acc_vals)/len(acc_vals)
print(f"Random Forest Classifier achieved {overall_acc*100}% overall accruacy for the past {n} games. Accuracy is calculated from {20} trials.")

0.5399999999999999

## NN Model

In [191]:
import tensorflow as tf
from tensorflow import keras

#earling stopping
callback = keras.callbacks.EarlyStopping(monitor='loss', patience=10)

model = tf.keras.models.Sequential()
model.add(keras.layers.Flatten(input_shape = X[0].shape))
model.add(tf.keras.layers.Dense(128, activation="relu6"))
model.add(keras.layers.Dropout(0.2))
model.add(tf.keras.layers.Dense(3, activation="softmax"))

model.compile(optimizer='adam', loss='sparse_categorical_crossentropy', metrics=['accuracy'])

Epoch 1/100
Epoch 2/100
Epoch 3/100
Epoch 4/100
Epoch 5/100
Epoch 6/100
Epoch 7/100
Epoch 8/100
Epoch 9/100
Epoch 10/100
Epoch 11/100
Epoch 12/100
Epoch 13/100
Epoch 14/100
Epoch 15/100
Epoch 16/100
Epoch 17/100
Epoch 18/100
Epoch 19/100
Epoch 20/100
Epoch 21/100
Epoch 22/100
Epoch 23/100
Epoch 24/100
Epoch 25/100
Epoch 26/100
Epoch 27/100
Epoch 28/100
Epoch 29/100
Epoch 30/100
Epoch 31/100
Epoch 32/100
Epoch 33/100
Epoch 34/100
Epoch 35/100
Epoch 36/100
Epoch 37/100
Epoch 38/100
Epoch 39/100
Epoch 40/100
Epoch 41/100
Epoch 42/100
Epoch 43/100
Epoch 44/100
Epoch 45/100
Epoch 46/100
Epoch 47/100
Epoch 48/100
Epoch 49/100
Epoch 50/100
Epoch 51/100
Epoch 52/100
Epoch 53/100
Epoch 54/100
Epoch 55/100
Epoch 56/100
Epoch 57/100
Epoch 58/100
Epoch 59/100
Epoch 60/100
Epoch 61/100
Epoch 62/100
Epoch 63/100
Epoch 64/100
Epoch 65/100
Epoch 66/100
Epoch 67/100
Epoch 68/100
Epoch 69/100
Epoch 70/100
Epoch 71/100
Epoch 72/100
Epoch 73/100
Epoch 74/100
Epoch 75/100
Epoch 76/100
Epoch 77/100
Epoch 78

<keras.callbacks.History at 0x3196292b0>

In [196]:
X,y = tools.model_preprocessing(data,("2017-01-10","2023-12-12"))

callback = keras.callbacks.EarlyStopping(monitor='loss', patience=5)

model.fit(X[301:], 
          y[301:], 
          epochs=100, 
          validation_split=0.1, 
          batch_size=100,
          verbose=0
          callbacks=[callback])

print(f"Backtesting for {n} games with {n_trials} trials.\n")

n = 300
OU_results = list(y[-n:])
pred = []
acc_vals = []

for i in tqdm(range(0,n)):
  #X_train = X[n+1-i:]
  #y_train = y[n+1-i:]
  
  if i%10:
      model.fit(X[n+1-i:], 
          y[n+1-i:], 
          epochs=100, 
          validation_split=0.1, 
          batch_size=100,
          callbacks=[callback])

  X_test = X[n-i]
  y_test = y[n-i]
  
  prediction_output = model.predict(X_test.reshape(1,-1),verbose=0)
  pred.append(tools.prediction_classifier(prediction_output)[0])

acc = tools.score_results(OU_results,pred)
acc_vals.append(acc)
  
overall_acc = sum(acc_vals)/len(acc_vals)
print(f"\nNeural network achieved {overall_acc*100}% overall accruacy for the past {n} games. Accuracy is calculated from {n_trials} trials.")

SyntaxError: invalid syntax (<ipython-input-196-37d1c855448c>, line 11)

In [188]:
pred

[0.0,
 1.0,
 1.0,
 1.0,
 1.0,
 1.0,
 0.0,
 1.0,
 1.0,
 0.0,
 0.0,
 0.0,
 1.0,
 0.0,
 0.0,
 1.0,
 0.0,
 1.0,
 0.0,
 0.0,
 1.0,
 0.0,
 0.0,
 1.0,
 1.0,
 0.0,
 0.0,
 1.0,
 1.0,
 0.0,
 1.0,
 1.0,
 1.0,
 1.0,
 0.0,
 1.0,
 1.0,
 1.0,
 0.0,
 0.0,
 1.0,
 0.0,
 1.0,
 0.0,
 0.0,
 1.0,
 1.0,
 0.0,
 1.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 1.0,
 1.0,
 0.0,
 0.0,
 1.0,
 0.0,
 0.0,
 1.0,
 1.0,
 0.0,
 1.0,
 0.0,
 0.0,
 0.0,
 1.0,
 1.0,
 0.0,
 0.0,
 0.0,
 0.0,
 1.0,
 0.0,
 0.0,
 1.0,
 0.0,
 0.0,
 1.0,
 1.0,
 1.0,
 1.0,
 1.0,
 1.0,
 1.0,
 0.0,
 0.0,
 1.0,
 0.0,
 0.0,
 0.0,
 1.0,
 0.0,
 1.0,
 0.0,
 1.0,
 0.0,
 1.0,
 0.0,
 1.0,
 1.0,
 1.0,
 0.0,
 0.0,
 1.0,
 1.0,
 1.0,
 1.0,
 1.0,
 1.0,
 1.0,
 0.0,
 0.0,
 1.0,
 1.0,
 0.0,
 0.0,
 1.0,
 0.0,
 0.0,
 0.0,
 1.0,
 0.0,
 1.0,
 1.0,
 1.0,
 0.0,
 0.0,
 1.0,
 0.0,
 1.0,
 0.0,
 0.0,
 0.0,
 1.0,
 1.0,
 1.0,
 1.0,
 0.0,
 0.0,
 0.0,
 1.0,
 1.0,
 0.0,
 1.0,
 0.0,
 0.0,
 0.0,
 1.0,
 0.0,
 0.0,
 1.0,
 1.0,
 1.0,
 1.0,
 1.0,
 0.0,
 0.0,
 1.0,
 1.0,
 1.0,
 0.0,
 1.0,
 0.0,
 0.0

In [189]:
OU_results 

[0,
 0,
 1,
 1,
 1,
 1,
 1,
 0,
 1,
 0,
 1,
 1,
 1,
 0,
 0,
 0,
 0,
 1,
 0,
 1,
 1,
 0,
 0,
 1,
 1,
 1,
 0,
 1,
 0,
 1,
 0,
 1,
 1,
 0,
 0,
 1,
 0,
 0,
 1,
 0,
 1,
 1,
 1,
 0,
 0,
 0,
 0,
 0,
 1,
 0,
 1,
 2,
 0,
 1,
 1,
 0,
 0,
 1,
 0,
 0,
 1,
 0,
 1,
 0,
 0,
 0,
 1,
 0,
 0,
 0,
 0,
 1,
 0,
 1,
 1,
 1,
 1,
 0,
 0,
 1,
 0,
 1,
 1,
 1,
 0,
 0,
 0,
 0,
 1,
 0,
 0,
 1,
 0,
 1,
 1,
 0,
 0,
 1,
 1,
 1,
 0,
 0,
 1,
 0,
 1,
 1,
 1,
 1,
 0,
 0,
 0,
 1,
 0,
 1,
 1,
 0,
 0,
 0,
 1,
 0,
 0,
 0,
 0,
 0,
 1,
 0,
 1,
 0,
 1,
 0,
 0,
 0,
 1,
 1,
 0,
 0,
 1,
 1,
 1,
 0,
 0,
 0,
 1,
 1,
 0,
 0,
 1,
 1,
 0,
 1,
 0,
 1,
 0,
 1,
 0,
 0,
 1,
 1,
 1,
 0,
 1,
 1,
 1,
 0,
 0,
 0,
 0,
 0,
 0,
 1,
 0,
 1,
 2,
 0,
 1,
 0,
 0,
 0,
 0,
 1,
 1,
 0,
 0,
 1,
 1,
 1,
 0,
 0,
 1,
 1,
 1,
 0,
 0,
 1,
 1,
 1,
 0,
 1,
 0,
 0,
 1,
 1,
 1,
 1,
 0,
 1,
 0,
 1,
 1,
 1,
 1,
 0,
 0,
 1,
 0,
 0,
 1,
 1,
 1,
 1,
 1,
 0,
 1,
 1,
 1,
 0,
 0,
 1,
 1,
 0,
 1,
 0,
 1,
 0,
 0,
 0,
 1,
 1,
 0,
 0,
 1,
 1,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,


1