# Model: Home Win Percentage

In [30]:
import tensorflow as tf
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.utils import shuffle
from sklearn import preprocessing
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Activation

In [52]:
filtered_matches = pd.read_csv('V_FILTERED_MATCH_DATA_AND_ODDS.csv', sep=';')

filtered_matches.drop('match_id', axis=1, inplace=True)
filtered_matches.drop('avg_draw_chance', axis=1, inplace=True)
filtered_matches.drop('avg_away_chance', axis=1, inplace=True)

match_data = shuffle(filtered_matches)

inputs = match_data.iloc[:,:-1]
targets = match_data.iloc[:,[-1]]

match_data.iloc[:,:-1] = preprocessing.scale(inputs)


train_data, validation_data, test_data = np.split(match_data, [int(.8 * len(match_data)), int(.9 * len(match_data))])

train_data_inputs, train_data_targets = train_data.iloc[:,:-1], train_data.iloc[:,[-1]]
validation_data_inputs, validation_data_targets = validation_data.iloc[:,:-1], validation_data.iloc[:,[-1]]
test_data_inputs, test_data_targets = test_data.iloc[:,:-1], test_data.iloc[:,[-1]]



In [62]:
# determine the maximum number of epochs
NUM_EPOCHS = 10

In [56]:
np_train_data = train_data.to_numpy()
np_val_inputs = validation_data_inputs.to_numpy()
np_val_targets = validation_data_targets.to_numpy()

In [81]:
##56 inputs
#1 output
input_size = 56
output_size = 1

hidden_layer_size = 50

model = Sequential([
            Dense(hidden_layer_size, input_shape=(56,)),
            
            Dense(hidden_layer_size, activation='tanh'), # 1st hidden layer
            Dense(hidden_layer_size, activation='relu'), # 2nd hidden layer
            Dense(hidden_layer_size, activation='relu'), # 3rd hidden layer
            Dense(hidden_layer_size, activation='relu'), # 4th hidden layer
            Dense(hidden_layer_size, activation='tanh'), # 5th hidden layer
    
            Dense(output_size, activation='sigmoid') # output layer
])


In [82]:
model.compile(optimizer='adam', loss='mean_absolute_error', metrics=['mean_absolute_error'])

In [83]:
model.fit(x=train_data_inputs, y=train_data_targets, epochs=NUM_EPOCHS, validation_data=(np_val_inputs, np_val_targets), 
          validation_steps=len(np_val_inputs), verbose=2)

Train on 11622 samples, validate on 1453 samples
Epoch 1/10
11622/11622 - 3s - loss: 0.0706 - mean_absolute_error: 0.0706 - val_loss: 0.0621 - val_mean_absolute_error: 0.0621
Epoch 2/10
11622/11622 - 3s - loss: 0.0629 - mean_absolute_error: 0.0629 - val_loss: 0.0626 - val_mean_absolute_error: 0.0626
Epoch 3/10
11622/11622 - 5s - loss: 0.0615 - mean_absolute_error: 0.0615 - val_loss: 0.0652 - val_mean_absolute_error: 0.0652
Epoch 4/10
11622/11622 - 6s - loss: 0.0604 - mean_absolute_error: 0.0604 - val_loss: 0.0609 - val_mean_absolute_error: 0.0609
Epoch 5/10
11622/11622 - 5s - loss: 0.0599 - mean_absolute_error: 0.0599 - val_loss: 0.0594 - val_mean_absolute_error: 0.0594
Epoch 6/10
11622/11622 - 6s - loss: 0.0596 - mean_absolute_error: 0.0596 - val_loss: 0.0596 - val_mean_absolute_error: 0.0596
Epoch 7/10
11622/11622 - 5s - loss: 0.0588 - mean_absolute_error: 0.0588 - val_loss: 0.0602 - val_mean_absolute_error: 0.0602
Epoch 8/10
11622/11622 - 4s - loss: 0.0582 - mean_absolute_error: 0.0

<tensorflow.python.keras.callbacks.History at 0x2188d2b6888>

In [85]:
weights = model.layers[0].get_weights()[0]
bias = model.layers[0].get_weights()[1]
print('Weights:',weights,'\nBias:',bias)

Weights: [[ 0.10206466  0.21830001  0.0898657  ...  0.03890531  0.01779376
  -0.19366246]
 [ 0.25049007  0.03871264 -0.22089934 ... -0.10277379 -0.08752891
   0.00952068]
 [ 0.13230602 -0.23210098  0.24776262 ...  0.18382853 -0.0822441
   0.09625279]
 ...
 [-0.15147911  0.21984723 -0.0954643  ... -0.17499673 -0.1731663
   0.21045342]
 [ 0.02381097 -0.06595867  0.4001905  ... -0.01576135  0.19751753
   0.13592038]
 [-0.1356036  -0.08161564  0.02012036 ... -0.26615858  0.22189014
   0.05646576]] 
Bias: [ 1.01760402e-01 -1.65853411e-01 -4.87331785e-02 -3.48317474e-02
  1.58734843e-01  1.10200480e-01  2.88222879e-02  4.10243459e-02
  1.52023628e-01 -6.80497140e-02  1.74816176e-02 -9.38425213e-02
  1.05227314e-01 -2.41516177e-02  1.19439829e-02 -4.16721702e-02
  2.07457989e-02 -9.10397619e-02  1.17443413e-01 -2.56718118e-02
  1.42743103e-02  4.26617963e-03 -8.87356773e-02  2.99004675e-03
  2.97746975e-02  3.09709553e-02 -1.53843313e-01 -9.66278836e-02
 -1.57938397e-03 -1.38874049e-03 -1.799

In [86]:
model.evaluate(x=test_data_inputs, y=test_data_targets, verbose=2)

1453/1453 - 0s - loss: 0.0592 - mean_absolute_error: 0.0592


[0.059178140937400044, 0.05917814]