#### Import the relevant librries

In [115]:
import pandas as pd
import tensorflow as tf
import numpy as np
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Dropout
from tensorflow.keras.wrappers.scikit_learn import KerasRegressor
from sklearn.model_selection import cross_val_score
from sklearn.model_selection import KFold
from sklearn.preprocessing import StandardScaler
from sklearn.pipeline import Pipeline
from sklearn.model_selection import train_test_split
from tensorflow.keras.regularizers import l2
from tensorflow.keras import backend as K
from tensorflow.keras.callbacks import TensorBoard
from tensorflow.keras.optimizers import Adam
import pickle
import time, datetime, os
%reload_ext tensorboard

####  Set tensorboard and import final_dataset as stats.


In [116]:
NAME = "264x32x16x1_MSE_sv{}".format(int(time.time()))
log_dir = "logs/fit/" + NAME
tensorboard_callback = tf.keras.callbacks.TensorBoard(log_dir=log_dir, histogram_freq=1)
stats = pd.read_csv(r'Data\final_dataset.csv')

#### Slice stats into X and y. Split the data into training and test set (80/20) and normalize the data

In [120]:
X = stats.iloc[:,3:-4]
y = stats.iloc[:,-1:]
random_state = 12
#split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.2, shuffle = True, random_state=random_state)
X_train = tf.keras.utils.normalize(X_train, axis=1)
X_test = tf.keras.utils.normalize(X_test, axis=1)
stats['Results'] = stats['Results'].astype('float32')

### Define the custom loss functions

##### BCE: Binary-cross entropy with added parameters to optimize our return on investment.

$L(y\hat, y) = -\sum\limits_{i=1}^{m} (Odds_{1i}*y_{i}-1) * log(Odds_{1i}*\hat y_{i}-1) + (Odds_{2i}(1 - y_{i})-1) * log(Odds_{2i}*(1- \hat y_{i}) -1)$

Source: https://www.vantage-ai.com/en/blog/beating-the-bookies-with-machine-learning

We will try to fit these loss function in a near future.

##### MSE: Mean Squared Error with odds decorrelation

$L(p\hat, y)= \frac{1}{N} \sum\limits_{i=1}^{N}(\hat p_{i} - y_{i})^{2} - C * (\hat p_{i} - \frac {1}{Odds_{i}})^{2}$

The first part $(\hat p_{i} - y_{i})^{2}$ is the Mean Squared Error, the difference between the predicted outcome versus the actual aoutcome. For the second part, C is a constant that determines the significance of the decorrelation effect. $(\hat p_{i} - \frac {1}{Odds_{i}})^{2}$, $\hat p_{i}$ is the predicted probability for the home team and $\frac {1}{Odds_{i}}$ is the probability that the bookmakers gives to the home team victory.

Source: Hubáček, Ondřej, Gustav Šourek, and Filip Železný. "Exploiting sports-betting market using machine learning." International Journal of Forecasting 35.2 (2019): 783-796.

Keras only allow the parameters y_pred and y_true in the loss functions, to make our custom loss function we pass a wrapper function with our extra parameter, the odds.


In [121]:
odds1 = stats.iloc[:, -3:-2].values
odds1_prob = 1/odds1

def coef_bce(y_pred,y_true, odds1, odds2):
    return -1 * K.mean((odds1 * y_true -1) * K.log(odds1 * y_pred -1) + (odds2 * (1- y_true) -1) * K.log(odds2 * (1-y_pred)-1))

def bce_custom(odds1, odds2):
    def bce(y_pred, y_true):
        return coef_bce(y_pred, y_true, odds1, odds2)
    return bce

#loss_bce = bce_custom(odds1, odds2)


def coef_mse(y_pred, y_true, odds_game ):
    mse = K.square(y_pred - y_true)  #squared difference
    odds = 0.05 * K.square(y_pred - odds_game)   
    loss = K.mean(mse - odds , axis=-1) #mean over last dimension
    return loss

def mse_custom(odds_game):
    def mse( y_pred, y_true):
        return coef_mse(y_pred, y_true, odds_game)
    return mse

loss_mse = mse_custom(odds1_prob)


def mse_simple(y_true, y_pred):
    mse = math_ops.squared_difference(y_pred, y_true) 
    loss = K.mean(mse, axis=-1)
    return loss

### Model Architecture:
After configuring GridSearch, the best parameters where:
- layers: 64x32 // Input is 264(n. of features) and output is 1. So (264 x 64 x 32 x 1)
- epochs: 5
- batch size: 3
- regularisation: l2(0.01)
- activation function: ReLU except output layer (sigmoid)
- optimizer: Adam

In [122]:
#opt = tf.keras.optimizers.Adam(clipnorm=0.3)
#bias_regularizer=l2(0.01)

model = Sequential()
#model.add(tf.keras.layers.Flatten())
model.add(Dense(264, input_dim=264, kernel_initializer='normal', activation='relu',bias_regularizer=l2(0.01)))
model.add(Dense(64, kernel_initializer='normal', activation='relu',bias_regularizer=l2(0.01)))
model.add(Dense(32, kernel_initializer='normal', activation='relu',bias_regularizer=l2(0.01)))
model.add(Dense(1, kernel_initializer='normal', activation='sigmoid'))
model.compile(optimizer='Adam', loss=loss_mse, metrics=['accuracy'])
model.fit(X_train, y_train, batch_size= 1, epochs=5, validation_data=(X_test, y_test),callbacks=[tensorboard_callback])

Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5


<tensorflow.python.keras.callbacks.History at 0x1ffe93243a0>

In [105]:
#val_loss, val_acc = model.evaluate(X_test, y_test)


### Retrieve the information attached to the predictions and  store the predictions

We attach the prediction with the outcome. We also retrieve the infomration regarding the game such as the data, the teams, the score and the odds so we can later perform an Error analysis. We finally store the dataframe in a csv file.

In [38]:
prediction = model.predict(X_test[:])
label = y_test[:]
prediction_l = prediction.tolist()
pred =pd.DataFrame(prediction_l, columns=['prediction'])

In [123]:
stats.reset_index
index = stats[['GAME_DATE', 'HOME', 'AWAY','Odds1', 'Odds2', 'Score']]
data = label.join(index)
data.reset_index(inplace=True)
pred_data = data.join(pred)
pred_data.to_csv('predicted_data.csv')
pred_data

Unnamed: 0,index,Results,GAME_DATE,HOME,AWAY,Odds1,Odds2,Score,prediction
0,8985,1,04/03/2017,SAS,MIN,1.30,3.75,97:90,0.808359
1,7320,1,03/12/2015,TOR,DEN,1.18,5.33,105:106,0.432308
2,1662,0,10/02/2010,CHI,ORL,2.67,1.50,87:107,0.863924
3,2694,0,04/02/2011,TOR,MIN,1.60,2.40,111:100,0.547415
4,3665,1,16/03/2012,ORL,BKN,1.10,7.57,86:70,0.643577
...,...,...,...,...,...,...,...,...,...
2846,3446,0,15/02/2012,CLE,IND,2.03,1.80,98:87,0.384152
2847,10005,1,23/02/2018,IND,ATL,1.31,3.67,116:93,0.491052
2848,4325,0,05/01/2013,BKN,SAC,1.33,3.52,113:93,0.683397
2849,5980,1,27/03/2014,MIL,LAL,1.72,2.19,108:105,0.674776


### GridSearchCV

We use the GridSearchCV from sklearn to find the parameters that best optimize our model.

In [91]:
from sklearn.model_selection import GridSearchCV
from keras.wrappers.scikit_learn import KerasClassifier
def model_nn(units='8', units2='8', drop='0,1', bias = 0.01):
    model = Sequential()
    #model.add(tf.keras.layers.Flatten())
    model.add(Dense(264, input_dim=264, kernel_initializer='normal', activation='relu',bias_regularizer=l2(bias)))
    model.add(Dense(units=units, kernel_initializer='normal', activation='relu',bias_regularizer=l2(bias)))
    model.add(Dense(units=units2, kernel_initializer='normal', activation='relu',bias_regularizer=l2(bias)))
    model.add(Dense(1, kernel_initializer='normal', activation='sigmoid'))

    model.compile(optimizer='adam', loss='MeanSquaredError', metrics=['accuracy'])
    #model.fit(X_train, y_train, batch_size= 5, epochs=10, validation_data=(X_test, y_test), callbacks=[tensorboard_callback])
    return model

model = KerasClassifier(build_fn=model_nn)
params={'bias':[0.01], 
        'units':[16,32, 64],
        'units2':[16, 32, 64],
        'batch_size':[3], 
        'nb_epoch':[5]

        }
gs=GridSearchCV(estimator=model, param_grid=params, cv=10)
# now fit the dataset to the GridSearchCV object. 
gs = gs.fit(X_train, y_train)







In [92]:
best_params=gs.best_params_
accuracy=gs.best_score_
accuracy
best_params

{'batch_size': 3, 'bias': 0.01, 'nb_epoch': 5, 'units': 64, 'units2': 32}

In [43]:
#%tensorboard --logdir logs/fit