In [306]:
import pandas as pd
import tensorflow as tf
import numpy as np
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Dropout
from tensorflow.keras.wrappers.scikit_learn import KerasRegressor
from sklearn.model_selection import cross_val_score
from sklearn.model_selection import KFold
from sklearn.preprocessing import StandardScaler
from sklearn.pipeline import Pipeline
from sklearn.model_selection import train_test_split
from tensorflow.keras.regularizers import l2
from tensorflow.keras import backend as K
from tensorflow.keras.callbacks import TensorBoard
from tensorflow.keras.optimizers import Adam
import pickle
import time, datetime, os
%reload_ext tensorboard

In [307]:
NAME = "264x32x16x1_MSE_sv{}".format(int(time.time()))
log_dir = "logs/fit/" + NAME
tensorboard_callback = tf.keras.callbacks.TensorBoard(log_dir=log_dir, histogram_freq=1)
stats = pd.read_csv(r'Data\final_dataset.csv')
#stats = stats.drop('Score',1)


In [308]:
X = stats.iloc[:,3:-4]
y = stats.iloc[:,-1:]
random_state = 12
#split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.2, shuffle = True, random_state=random_state)
X_train = tf.keras.utils.normalize(X_train, axis=1)
X_test = tf.keras.utils.normalize(X_test, axis=1)

In [309]:
"""
# Data Split chronologically
# Randomised and Normalised data
train_set, test_set= np.split(stats, [int(.80 *len(stats))])
train_set = train_set.values
np.random.shuffle(train_set)
X_train = train_set[:, 3:-4]
y_train = train_set[:, -1:]
X_test = test_set.iloc[:,3:-4]
y_test = test_set.iloc[:,-1:]

X_train = np.asarray(X_train).astype(np.float32)
X_test = np.asarray(X_test).astype(np.float32)
y_train = np.asarray(y_train).astype(np.float32)
y_test = np.asarray(y_test).astype(np.float32)

X_train = tf.keras.utils.normalize(X_train, axis=1)
X_test = tf.keras.utils.normalize(X_test, axis=1)
"""

'\n# Data Split chronologically\n# Randomised and Normalised data\ntrain_set, test_set= np.split(stats, [int(.80 *len(stats))])\ntrain_set = train_set.values\nnp.random.shuffle(train_set)\nX_train = train_set[:, 3:-4]\ny_train = train_set[:, -1:]\nX_test = test_set.iloc[:,3:-4]\ny_test = test_set.iloc[:,-1:]\n\nX_train = np.asarray(X_train).astype(np.float32)\nX_test = np.asarray(X_test).astype(np.float32)\ny_train = np.asarray(y_train).astype(np.float32)\ny_test = np.asarray(y_test).astype(np.float32)\n\nX_train = tf.keras.utils.normalize(X_train, axis=1)\nX_test = tf.keras.utils.normalize(X_test, axis=1)\n'

**Custom Loss Function**
Keras does not allow to give any parameter other than y_true and y_pred, to get around we encode the odds in the y_true tensor.

In [310]:
from tensorflow.python.ops import math_ops

def custom_loss_bce(y_true, y_pred):
    """
    y_true = (batch_size, output nodes)
    y_pred = (batch_size, output nodes)
    """
    odds1 = y_true[:, 0:1]
    odds2 = y_true[:, 1:2]
    y_true = y_true[:, :-1]
    loss = -1 * K.sum((odds1 * y_true -1) * K.log(odds1 * y_pred -1) + (odds2 * (1- y_true) -1) * K.log(odds2 * (1-y_pred)-1))
    
    return loss


def coef_mse(odds_game, y_true, y_pred):
    mse = math_ops.squared_difference(y_pred, y_true)  #squared difference
    odds = math_ops.squared_difference(y_pred, odds_game)   
    loss = K.mean(mse - 0.05 * odds, axis=-1) #mean over last dimension
    return loss

def mse_custom(odds_game):
    def mse(y_true, y_pred):
        return coef_mse(y_pred, y_true, odds_game )
    return mse

loss_mse = mse_custom(odds1)


def mse_simple(y_true, y_pred):
    mse = math_ops.squared_difference(y_pred, y_true) 
    loss = K.mean(mse, axis=-1)
    return loss
    

In [311]:
#opt = tf.keras.optimizers.Adam(clipnorm=0.3)

model = Sequential()
#model.add(tf.keras.layers.Flatten())
model.add(Dense(264, input_dim=264, kernel_initializer='normal', activation='relu', bias_regularizer=l2(0.01)))
model.add(Dense(32, kernel_initializer='normal', activation='relu', bias_regularizer=l2(0.01)))
model.add(Dense(32, kernel_initializer='normal', activation='relu', bias_regularizer=l2(0.01)))
model.add(Dense(1, kernel_initializer='normal', activation='sigmoid'))
model.compile(optimizer='Adam', loss='MeanSquaredError', metrics=['accuracy'])
model.fit(X_train, y_train, batch_size= 3, epochs=5, validation_data=(X_test, y_test),callbacks=[tensorboard_callback])

Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5


<tensorflow.python.keras.callbacks.History at 0x1863660abe0>

In [312]:
val_loss, val_acc = model.evaluate(X_test, y_test)




In [313]:
prediction = model.predict(X_test[:])
label = y_test[:]
prediction_l = prediction.tolist()
pred =pd.DataFrame(prediction_l, columns=['prediction'])

stats.reset_index
index = stats[['GAME_DATE', 'HOME', 'AWAY','Odds1', 'Odds2', 'Score']]
data = label.join(index)
data.reset_index(inplace=True)
pred_data = data.join(pred)
#pred_data.to_csv('predicted_data.csv')

In [314]:
%tensorboard --logdir logs/fit

Reusing TensorBoard on port 6006 (pid 18168), started 3 days, 19:56:20 ago. (Use '!kill 18168' to kill it.)

In [132]:
from sklearn.model_selection import GridSearchCV
from keras.wrappers.scikit_learn import KerasClassifier
def model_nn(units='8', drop='0,1'):
    model = Sequential()
    #model.add(tf.keras.layers.Flatten())
    model.add(Dense(264, input_dim=264, kernel_initializer='normal', activation='relu'))
    model.add(Dense(units=units, kernel_initializer='normal', activation='relu'))
    model.add(Dense(units=units, kernel_initializer='normal', activation='relu'))
    model.add(Dropout(drop))
    model.add(Dense(1, kernel_initializer='normal', activation='sigmoid'))

    model.compile(optimizer='adam', loss='MeanSquaredError', metrics=['accuracy'])
    #model.fit(X_train, y_train, batch_size= 5, epochs=10, validation_data=(X_test, y_test), callbacks=[tensorboard_callback])
    return model

model = KerasClassifier(build_fn=model_nn)
params={#'bias':[0.01, 0.03, 0.05, 0.07, 0.1], 
        'drop':[0.1, 0.2, 0.4, 0.5],
        'units':[8,16, 32, 64, 128],
        'batch_size':[3,5,8,10], 
        'nb_epoch':[3,5,8,10]

        }
gs=GridSearchCV(estimator=model, param_grid=params, cv=10)
# now fit the dataset to the GridSearchCV object. 
gs = gs.fit(X_train, y_train)

Using TensorFlow backend.




KeyboardInterrupt: 

In [122]:
best_params=gs.best_params_
accuracy=gs.best_score_
accuracy
best_params

NameError: name 'gs' is not defined

In [45]:
from sklearn.pipeline import Pipeline
from sklearn.preprocessing import StandardScaler
from sklearn.decomposition import PCA
from sklearn.linear_model import LogisticRegression
_sc = StandardScaler()
_pca = PCA(n_components = None)
_model = LogisticRegression()
log_regress_model = Pipeline([
    ('std_scaler', _sc),
    ('pca', _pca),
('regressor', _model)])

In [47]:
'''
# perform a split
X_train, X_test, y_train, y_test = \
    train_test_split(X, y, 
                     test_size=0.3,
                     shuffle=True, 
                     random_state=random_state)
# train the model using the PCA components
'''
log_regress_model.fit(X_train,y_train)



  return f(*args, **kwargs)
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


Pipeline(steps=[('std_scaler', StandardScaler()), ('pca', PCA()),
                ('regressor', LogisticRegression())])

In [48]:
log_regress_model.score(X_test,y_test)

0.6262237762237762