# Model: Home Percentage

In [1]:
import tensorflow as tf
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.utils import shuffle
from sklearn import preprocessing
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Activation

In [2]:
filtered_matches = pd.read_csv('V_MATCH_DATA_AND_ODDS.csv', sep=';')

filtered_matches.drop('match_id', axis=1, inplace=True)
filtered_matches.drop('avg_draw_chance', axis=1, inplace=True)
filtered_matches.drop('avg_away_chance', axis=1, inplace=True)

match_data = shuffle(filtered_matches)

inputs = match_data.iloc[:,:-1]
targets = match_data.iloc[:,[-1]]

match_data.iloc[:,:-1] = preprocessing.scale(inputs)


train_data, validation_data, test_data = np.split(match_data, [int(.8 * len(match_data)), int(.9 * len(match_data))])

train_data_inputs, train_data_targets = train_data.iloc[:,:-1], train_data.iloc[:,[-1]]
validation_data_inputs, validation_data_targets = validation_data.iloc[:,:-1], validation_data.iloc[:,[-1]]
test_data_inputs, test_data_targets = test_data.iloc[:,:-1], test_data.iloc[:,[-1]]



In [3]:
# determine the maximum number of epochs
NUM_EPOCHS = 10

In [4]:
np_train_data = train_data.to_numpy()
np_val_inputs = validation_data_inputs.to_numpy()
np_val_targets = validation_data_targets.to_numpy()

In [5]:
##56 inputs
#1 output
input_size = 56
output_size = 1

hidden_layer_size = 50

model = Sequential([
            Dense(hidden_layer_size, input_shape=(56,)),
            
            Dense(hidden_layer_size, activation='tanh'), # 1st hidden layer
            Dense(hidden_layer_size, activation='relu'), # 2nd hidden layer
            Dense(hidden_layer_size, activation='relu'), # 3rd hidden layer
            Dense(hidden_layer_size, activation='relu'), # 4th hidden layer
            Dense(hidden_layer_size, activation='tanh'), # 5th hidden layer
    
            Dense(output_size, activation='sigmoid') # output layer
])


In [6]:
model.compile(optimizer='adam', loss='mean_absolute_error', metrics=['mean_absolute_error'])

In [7]:
model.fit(x=train_data_inputs, y=train_data_targets, epochs=NUM_EPOCHS, validation_data=(np_val_inputs, np_val_targets), 
          validation_steps=len(np_val_inputs), verbose=2)

Train on 17834 samples, validate on 2229 samples
Epoch 1/10
17834/17834 - 9s - loss: 0.0693 - mean_absolute_error: 0.0693 - val_loss: 0.0703 - val_mean_absolute_error: 0.0703
Epoch 2/10
17834/17834 - 7s - loss: 0.0636 - mean_absolute_error: 0.0636 - val_loss: 0.0653 - val_mean_absolute_error: 0.0653
Epoch 3/10
17834/17834 - 7s - loss: 0.0627 - mean_absolute_error: 0.0627 - val_loss: 0.0640 - val_mean_absolute_error: 0.0640
Epoch 4/10
17834/17834 - 7s - loss: 0.0617 - mean_absolute_error: 0.0617 - val_loss: 0.0620 - val_mean_absolute_error: 0.0620
Epoch 5/10
17834/17834 - 8s - loss: 0.0615 - mean_absolute_error: 0.0615 - val_loss: 0.0621 - val_mean_absolute_error: 0.0621
Epoch 6/10
17834/17834 - 8s - loss: 0.0610 - mean_absolute_error: 0.0610 - val_loss: 0.0652 - val_mean_absolute_error: 0.0652
Epoch 7/10
17834/17834 - 8s - loss: 0.0607 - mean_absolute_error: 0.0607 - val_loss: 0.0617 - val_mean_absolute_error: 0.0617
Epoch 8/10
17834/17834 - 8s - loss: 0.0602 - mean_absolute_error: 0.0

<tensorflow.python.keras.callbacks.History at 0x2345f7cb688>

In [8]:
weights = model.layers[0].get_weights()[0]
bias = model.layers[0].get_weights()[1]
print('Weights:',weights,'\nBias:',bias)

Weights: [[-0.21996641  0.11627667  0.11595031 ...  0.12832798 -0.28834695
   0.15553707]
 [ 0.10679084 -0.13966583  0.13365592 ...  0.06349149 -0.1411187
  -0.1074095 ]
 [-0.1119759   0.02808411 -0.12869304 ... -0.17122754 -0.18191028
   0.09573498]
 ...
 [ 0.03947723 -0.20779364 -0.03335626 ... -0.22651953  0.13973345
   0.18269868]
 [ 0.09513097 -0.2500962  -0.02996685 ... -0.19513454  0.16070208
   0.49649084]
 [ 0.31121954 -0.11309698  0.09626629 ... -0.08522611  0.08487654
   0.2166311 ]] 
Bias: [ 0.05671831  0.24692705 -0.06788044 -0.11486588 -0.07205781  0.00717139
  0.00952448 -0.06020162  0.19003627 -0.04663411 -0.1226982  -0.1119312
 -0.04275553  0.08923396 -0.08954637  0.0037681   0.01397459  0.1996133
  0.10394748 -0.05582986  0.13814144  0.02066846 -0.06135906  0.1669729
 -0.05693385  0.00606518 -0.20967059  0.03453903  0.02741701  0.16466492
  0.09356448  0.08241426  0.0948251  -0.13242082 -0.10233413 -0.04765001
  0.00237435 -0.01711112 -0.1237902  -0.12403596 -0.074666

In [9]:
model.evaluate(x=test_data_inputs, y=test_data_targets, verbose=2)

2230/2230 - 0s - loss: 0.0622 - mean_absolute_error: 0.0622


[0.06217353489968274, 0.062173538]