# Model: Away Win Percentage

In [1]:
import tensorflow as tf
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.utils import shuffle
from sklearn import preprocessing
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Activation

In [2]:
filtered_matches = pd.read_csv('V_FILTERED_MATCH_DATA_AND_ODDS.csv', sep=';')

filtered_matches.drop('match_id', axis=1, inplace=True)
filtered_matches.drop('avg_home_chance', axis=1, inplace=True)
filtered_matches.drop('avg_draw_chance', axis=1, inplace=True)

match_data = shuffle(filtered_matches)

inputs = match_data.iloc[:,:-1]
targets = match_data.iloc[:,[-1]]

match_data.iloc[:,:-1] = preprocessing.scale(inputs)


train_data, validation_data, test_data = np.split(match_data, [int(.8 * len(match_data)), int(.9 * len(match_data))])

train_data_inputs, train_data_targets = train_data.iloc[:,:-1], train_data.iloc[:,[-1]]
validation_data_inputs, validation_data_targets = validation_data.iloc[:,:-1], validation_data.iloc[:,[-1]]
test_data_inputs, test_data_targets = test_data.iloc[:,:-1], test_data.iloc[:,[-1]]



In [3]:
# determine the maximum number of epochs
NUM_EPOCHS = 10

In [4]:
np_train_data = train_data.to_numpy()
np_val_inputs = validation_data_inputs.to_numpy()
np_val_targets = validation_data_targets.to_numpy()

In [5]:
##56 inputs
#1 output
input_size = 56
output_size = 1

hidden_layer_size = 50

model = Sequential([
            Dense(hidden_layer_size, input_shape=(56,)),
            
            Dense(hidden_layer_size, activation='tanh'), # 1st hidden layer
            Dense(hidden_layer_size, activation='relu'), # 2nd hidden layer
            Dense(hidden_layer_size, activation='relu'), # 3rd hidden layer
            Dense(hidden_layer_size, activation='relu'), # 4th hidden layer
            Dense(hidden_layer_size, activation='tanh'), # 5th hidden layer
    
            Dense(output_size, activation='sigmoid') # output layer
])


In [6]:
model.compile(optimizer='adam', loss='mean_absolute_error', metrics=['mean_absolute_error'])

In [7]:
model.fit(x=train_data_inputs, y=train_data_targets, epochs=NUM_EPOCHS, validation_data=(np_val_inputs, np_val_targets), 
          validation_steps=len(np_val_inputs), verbose=2)

Train on 11622 samples, validate on 1453 samples
Epoch 1/10
11622/11622 - 4s - loss: 0.0643 - mean_absolute_error: 0.0643 - val_loss: 0.0548 - val_mean_absolute_error: 0.0548
Epoch 2/10
11622/11622 - 4s - loss: 0.0548 - mean_absolute_error: 0.0548 - val_loss: 0.0544 - val_mean_absolute_error: 0.0544
Epoch 3/10
11622/11622 - 3s - loss: 0.0543 - mean_absolute_error: 0.0543 - val_loss: 0.0527 - val_mean_absolute_error: 0.0527
Epoch 4/10
11622/11622 - 4s - loss: 0.0527 - mean_absolute_error: 0.0527 - val_loss: 0.0530 - val_mean_absolute_error: 0.0530
Epoch 5/10
11622/11622 - 4s - loss: 0.0525 - mean_absolute_error: 0.0525 - val_loss: 0.0544 - val_mean_absolute_error: 0.0544
Epoch 6/10
11622/11622 - 5s - loss: 0.0517 - mean_absolute_error: 0.0517 - val_loss: 0.0512 - val_mean_absolute_error: 0.0512
Epoch 7/10
11622/11622 - 4s - loss: 0.0510 - mean_absolute_error: 0.0510 - val_loss: 0.0537 - val_mean_absolute_error: 0.0537
Epoch 8/10
11622/11622 - 4s - loss: 0.0508 - mean_absolute_error: 0.0

<tensorflow.python.keras.callbacks.History at 0x1f2f34986c8>

In [8]:
weights = model.layers[0].get_weights()[0]
bias = model.layers[0].get_weights()[1]
print('Weights:',weights,'\nBias:',bias)

Weights: [[ 0.12735386  0.08076914 -0.1517998  ...  0.0868189   0.03418408
   0.11976431]
 [ 0.00432024 -0.05226181  0.1969868  ... -0.05728327 -0.06328007
  -0.10835534]
 [-0.0157342  -0.15316749 -0.05873314 ... -0.20975208 -0.11809862
  -0.11761456]
 ...
 [-0.2509967   0.08950213  0.1793398  ...  0.07985525  0.2488518
   0.00612446]
 [ 0.09311803  0.14829226 -0.10724083 ... -0.1070584   0.12281338
  -0.25542092]
 [ 0.05832912 -0.17578146 -0.24933635 ...  0.21987855  0.33965066
  -0.27734914]] 
Bias: [ 0.02500812  0.22834527  0.04662276 -0.0231568   0.04222771  0.07133017
  0.04612348  0.1101325   0.01102189  0.20431095 -0.07013229  0.05363092
 -0.04448056 -0.0566646  -0.09817509 -0.0506188   0.11090074 -0.14473751
  0.0228925  -0.03464027  0.06122638 -0.11304903 -0.04114898  0.0231714
  0.15037096  0.03952263 -0.11226122 -0.13273135 -0.00894949 -0.0999833
 -0.01717315 -0.0077774   0.02251551  0.08131626 -0.00848938 -0.09757219
  0.11732256  0.10249638  0.06033636 -0.06576779  0.21689

In [9]:
model.evaluate(x=test_data_inputs, y=test_data_targets, verbose=2)

1453/1453 - 0s - loss: 0.0562 - mean_absolute_error: 0.0562


[0.05618946172978988, 0.056189466]