# Model: Away Percentage

In [1]:
import tensorflow as tf
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.utils import shuffle
from sklearn import preprocessing
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Activation

In [2]:
filtered_matches = pd.read_csv('V_MATCH_DATA_AND_ODDS.csv', sep=';')

filtered_matches.drop('match_id', axis=1, inplace=True)
filtered_matches.drop('avg_draw_chance', axis=1, inplace=True)
filtered_matches.drop('avg_home_chance', axis=1, inplace=True)

match_data = shuffle(filtered_matches)

inputs = match_data.iloc[:,:-1]
targets = match_data.iloc[:,[-1]]

match_data.iloc[:,:-1] = preprocessing.scale(inputs)


train_data, validation_data, test_data = np.split(match_data, [int(.8 * len(match_data)), int(.9 * len(match_data))])

train_data_inputs, train_data_targets = train_data.iloc[:,:-1], train_data.iloc[:,[-1]]
validation_data_inputs, validation_data_targets = validation_data.iloc[:,:-1], validation_data.iloc[:,[-1]]
test_data_inputs, test_data_targets = test_data.iloc[:,:-1], test_data.iloc[:,[-1]]



In [3]:
# determine the maximum number of epochs
NUM_EPOCHS = 10

In [4]:
np_train_data = train_data.to_numpy()
np_val_inputs = validation_data_inputs.to_numpy()
np_val_targets = validation_data_targets.to_numpy()

In [5]:
##56 inputs
#1 output
input_size = 56
output_size = 1

hidden_layer_size = 50

model = Sequential([
            Dense(hidden_layer_size, input_shape=(56,)),
            
            Dense(hidden_layer_size, activation='tanh'), # 1st hidden layer
            Dense(hidden_layer_size, activation='relu'), # 2nd hidden layer
            Dense(hidden_layer_size, activation='relu'), # 3rd hidden layer
            Dense(hidden_layer_size, activation='relu'), # 4th hidden layer
            Dense(hidden_layer_size, activation='tanh'), # 5th hidden layer
    
            Dense(output_size, activation='sigmoid') # output layer
])


In [6]:
model.compile(optimizer='adam', loss='mean_absolute_error', metrics=['mean_absolute_error'])

In [7]:
model.fit(x=train_data_inputs, y=train_data_targets, epochs=NUM_EPOCHS, validation_data=(np_val_inputs, np_val_targets), 
          validation_steps=len(np_val_inputs), verbose=2)

Train on 17834 samples, validate on 2229 samples
Epoch 1/10
17834/17834 - 7s - loss: 0.0618 - mean_absolute_error: 0.0618 - val_loss: 0.0582 - val_mean_absolute_error: 0.0582
Epoch 2/10
17834/17834 - 7s - loss: 0.0562 - mean_absolute_error: 0.0562 - val_loss: 0.0547 - val_mean_absolute_error: 0.0547
Epoch 3/10
17834/17834 - 10s - loss: 0.0551 - mean_absolute_error: 0.0551 - val_loss: 0.0551 - val_mean_absolute_error: 0.0551
Epoch 4/10
17834/17834 - 7s - loss: 0.0544 - mean_absolute_error: 0.0544 - val_loss: 0.0545 - val_mean_absolute_error: 0.0545
Epoch 5/10
17834/17834 - 7s - loss: 0.0542 - mean_absolute_error: 0.0542 - val_loss: 0.0547 - val_mean_absolute_error: 0.0547
Epoch 6/10
17834/17834 - 7s - loss: 0.0535 - mean_absolute_error: 0.0535 - val_loss: 0.0557 - val_mean_absolute_error: 0.0557
Epoch 7/10
17834/17834 - 7s - loss: 0.0535 - mean_absolute_error: 0.0535 - val_loss: 0.0547 - val_mean_absolute_error: 0.0547
Epoch 8/10
17834/17834 - 8s - loss: 0.0531 - mean_absolute_error: 0.

<tensorflow.python.keras.callbacks.History at 0x13886a08b08>

In [8]:
weights = model.layers[0].get_weights()[0]
bias = model.layers[0].get_weights()[1]
print('Weights:',weights,'\nBias:',bias)

Weights: [[ 0.12415186 -0.10059033  0.17459673 ...  0.23551574 -0.10637433
  -0.08660821]
 [-0.04406554  0.09460229  0.00806717 ...  0.16606538 -0.16979578
  -0.13516305]
 [ 0.07165766 -0.10130985 -0.09929176 ... -0.02688134 -0.07924639
  -0.10402209]
 ...
 [ 0.11406249  0.13629973  0.00190357 ... -0.00750989 -0.04181852
   0.16349515]
 [ 0.29017445  0.18121815  0.25181982 ... -0.25391293 -0.15847446
  -0.3485326 ]
 [ 0.16480426  0.07096547  0.08549314 ...  0.02736171  0.15357596
  -0.23183767]] 
Bias: [-0.1749386  -0.02969323 -0.10143669  0.06779473 -0.01896543 -0.06016402
  0.19284761  0.16590321  0.02934716 -0.12643662 -0.10541745 -0.17237599
  0.04890103 -0.24790168  0.24032857  0.05177315 -0.13648981  0.14722107
  0.16472714  0.16779825 -0.00572857 -0.09639323  0.09646895  0.01811767
 -0.0775419  -0.00554851  0.014658    0.11623517 -0.04757046 -0.11560573
  0.11102628  0.03774563 -0.04361786  0.00364473 -0.19528031 -0.15158932
  0.01798431 -0.07920744 -0.01173909 -0.01565387 -0.15

In [9]:
model.evaluate(x=test_data_inputs, y=test_data_targets, verbose=2)

2230/2230 - 0s - loss: 0.0535 - mean_absolute_error: 0.0535


[0.05353992861014845, 0.053539917]