# Model: Draw Percentage

In [1]:
import tensorflow as tf
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.utils import shuffle
from sklearn import preprocessing
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Activation

In [2]:
filtered_matches = pd.read_csv('V_FILTERED_MATCH_DATA_AND_ODDS.csv', sep=';')

filtered_matches.drop('match_id', axis=1, inplace=True)
filtered_matches.drop('avg_home_chance', axis=1, inplace=True)
filtered_matches.drop('avg_away_chance', axis=1, inplace=True)

match_data = shuffle(filtered_matches)

inputs = match_data.iloc[:,:-1]
targets = match_data.iloc[:,[-1]]

match_data.iloc[:,:-1] = preprocessing.scale(inputs)


train_data, validation_data, test_data = np.split(match_data, [int(.8 * len(match_data)), int(.9 * len(match_data))])

train_data_inputs, train_data_targets = train_data.iloc[:,:-1], train_data.iloc[:,[-1]]
validation_data_inputs, validation_data_targets = validation_data.iloc[:,:-1], validation_data.iloc[:,[-1]]
test_data_inputs, test_data_targets = test_data.iloc[:,:-1], test_data.iloc[:,[-1]]



In [3]:
# determine the maximum number of epochs
NUM_EPOCHS = 10

In [4]:
np_train_data = train_data.to_numpy()
np_val_inputs = validation_data_inputs.to_numpy()
np_val_targets = validation_data_targets.to_numpy()

In [5]:
##56 inputs
#1 output
input_size = 56
output_size = 1

hidden_layer_size = 50

model = Sequential([
            Dense(hidden_layer_size, input_shape=(56,)),
            
            Dense(hidden_layer_size, activation='tanh'), # 1st hidden layer
            Dense(hidden_layer_size, activation='relu'), # 2nd hidden layer
            Dense(hidden_layer_size, activation='relu'), # 3rd hidden layer
            Dense(hidden_layer_size, activation='relu'), # 4th hidden layer
            Dense(hidden_layer_size, activation='tanh'), # 5th hidden layer
    
            Dense(output_size, activation='sigmoid') # output layer
])


In [6]:
model.compile(optimizer='adam', loss='mean_absolute_error', metrics=['mean_absolute_error'])

In [7]:
model.fit(x=train_data_inputs, y=train_data_targets, epochs=NUM_EPOCHS, validation_data=(np_val_inputs, np_val_targets), 
          validation_steps=len(np_val_inputs), verbose=2)

Train on 11622 samples, validate on 1453 samples
Epoch 1/10
11622/11622 - 4s - loss: 0.0289 - mean_absolute_error: 0.0289 - val_loss: 0.0204 - val_mean_absolute_error: 0.0204
Epoch 2/10
11622/11622 - 3s - loss: 0.0203 - mean_absolute_error: 0.0203 - val_loss: 0.0201 - val_mean_absolute_error: 0.0201
Epoch 3/10
11622/11622 - 3s - loss: 0.0194 - mean_absolute_error: 0.0194 - val_loss: 0.0190 - val_mean_absolute_error: 0.0190
Epoch 4/10
11622/11622 - 3s - loss: 0.0191 - mean_absolute_error: 0.0191 - val_loss: 0.0186 - val_mean_absolute_error: 0.0186
Epoch 5/10
11622/11622 - 4s - loss: 0.0186 - mean_absolute_error: 0.0186 - val_loss: 0.0194 - val_mean_absolute_error: 0.0194
Epoch 6/10
11622/11622 - 5s - loss: 0.0183 - mean_absolute_error: 0.0183 - val_loss: 0.0187 - val_mean_absolute_error: 0.0187
Epoch 7/10
11622/11622 - 4s - loss: 0.0180 - mean_absolute_error: 0.0180 - val_loss: 0.0184 - val_mean_absolute_error: 0.0184
Epoch 8/10
11622/11622 - 4s - loss: 0.0178 - mean_absolute_error: 0.0

<tensorflow.python.keras.callbacks.History at 0x1671f6c6fc8>

In [8]:
weights = model.layers[0].get_weights()[0]
bias = model.layers[0].get_weights()[1]
print('Weights:',weights,'\nBias:',bias)

Weights: [[ 0.10388693  0.24279061 -0.01504399 ... -0.0825574   0.00635989
  -0.10064153]
 [-0.22230203  0.21765487  0.1815826  ... -0.13047081  0.09688583
  -0.18554786]
 [-0.06198262 -0.02685416 -0.25947735 ...  0.18992311 -0.11955694
  -0.16059868]
 ...
 [-0.14838268 -0.19496898  0.01706151 ... -0.17905974 -0.15052901
   0.06941277]
 [ 0.09872416  0.2503076  -0.06050095 ...  0.23690121  0.14080322
   0.1947692 ]
 [-0.29799613 -0.20580654 -0.2227571  ... -0.05833258 -0.1148632
   0.06743816]] 
Bias: [-0.1683362  -0.1316827  -0.06758738 -0.00444329  0.09384669 -0.00834655
  0.13494486 -0.0692472   0.18227772  0.02011504 -0.13464421 -0.02466296
 -0.0873442  -0.0471939   0.16037095  0.08087379 -0.04079102 -0.04480609
  0.08442058 -0.03173761  0.14703475  0.0031235   0.01236831  0.04070292
 -0.00492287 -0.00047396 -0.06400999  0.01365006 -0.00592786  0.02138147
  0.09924943 -0.04445203 -0.07675245  0.04186434 -0.00199742  0.14489253
  0.10026147  0.22814535  0.0591078   0.01821437 -0.243

In [9]:
model.evaluate(x=test_data_inputs, y=test_data_targets, verbose=2)

1453/1453 - 0s - loss: 0.0172 - mean_absolute_error: 0.0172


[0.017204361362030237, 0.017204363]