In [83]:
import numpy as np
import pandas as pd
import data_preprocessing as prep
import importlib

import train_b
importlib.reload(train_b)
from train_b import score, score_sample, predictions_to_goals, prediction_to_goals, show_predictions

importlib.reload(prep)

<module 'data_preprocessing' from 'c:\\Users\\bmk1bj\\Documents\\GIT_repositories\\AIMatch\\data_preprocessing.py'>

### Ideas

- [x] Split train and val randomly
- [] Drop early matches on dataframe level
- [] y = [home_goals - away_goals, total goals]
- [x] model with two unrelated outputs   - OK
- [] sample_weight extended by tournament_group
- [] consider team strength trend this year (or this cup)


In [85]:
dataset = prep.Dataset()
X_train, Y_train, X_val, Y_val, X_test, sample_weights_train, sample_weights_val = dataset.get_input_data(label_weights=[1, 1], sample_weights_degree=2, random_split=False, keep_tail=3)
results_df = pd.read_csv("Data/results.txt")

# Reference values
# = total score for validation data if results are hard-coded and all same without any prediction
# all models should overcome those values
print("    Reference values:")
max_score = 4 * len(Y_val)
ref_score_1 = score(np.zeros(Y_val.shape) * dataset.label_weights, Y_val, label_weights=dataset.label_weights) # 0:0
print("0:0", ref_score_1, "/", max_score, " - %s points per match" % (np.round(ref_score_1/len(Y_val), 2)))
ref_score_2 = score(np.ones(Y_val.shape) * dataset.label_weights, Y_val, label_weights=dataset.label_weights) # 1:0
print("1:0", ref_score_2, "/", max_score, " - %s points per match" % (np.round(ref_score_2/len(Y_val), 2)))
Y_pred = np.ones(Y_val.shape) * dataset.label_weights # 1:1
Y_pred[:, 0] = 0
ref_score_3 = score(Y_pred, Y_val, label_weights=dataset.label_weights) 
print("1:1", ref_score_3, "/", max_score, " - %s points per match" % (np.round(ref_score_3/len(Y_val), 2)))
Y_pred = np.ones(Y_val.shape) * dataset.label_weights # 0:1
Y_pred[:, 0] = -1
ref_score_3 = score(Y_pred, Y_val, label_weights=dataset.label_weights) 
print("0:1", ref_score_3, "/", max_score, " - %s points per match" % (np.round(ref_score_3/len(Y_val), 2)))
Y_pred = np.ones(Y_val.shape) # 2:1
Y_pred[:, 1] = 2
ref_score_3 = score(Y_pred * dataset.label_weights, Y_val, label_weights=dataset.label_weights) 
print("2:1", ref_score_3, "/", max_score, " - %s points per match" % (np.round(ref_score_3/len(Y_val), 2)))
Y_pred = 2 * np.ones(Y_val.shape) # 2:0
ref_score_3 = score(Y_pred * dataset.label_weights, Y_val, label_weights=dataset.label_weights) 
print("2:0", ref_score_3, "/", max_score, " - %s points per match" % (np.round(ref_score_3/len(Y_val), 2)))

Number of relevant labeled matches: 5167/14644
X shape =  (5167, 405)
Y shape =  (5167, 2)
sample weights shape =  (5167,)
X shape =  (48, 405)
Y shape =  (48, 2)
sample weights shape =  (48,)
    Reference values:
0:0 846 / 2068  - 1.64 points per match
1:0 943 / 2068  - 1.82 points per match
1:1 864 / 2068  - 1.67 points per match
0:1 394 / 2068  - 0.76 points per match
2:1 914 / 2068  - 1.77 points per match
2:0 853 / 2068  - 1.65 points per match


In [86]:
from sklearn.model_selection import cross_val_score
from sklearn.tree import DecisionTreeRegressor
from sklearn.linear_model import LinearRegression
from sklearn.svm import SVR
from sklearn.neural_network import MLPRegressor
from sklearn.metrics import make_scorer


def fit_simple_regressor(X_train, Y_train, X_val, Y_val, X_test, sample_weights_train, regressor, name: str, label_weights: list, show_predicted_indexes = [0, 1], split_models=False):
    print(name)
    Y_val_pred, Y_test_pred = np.zeros(Y_val.shape), np.zeros((X_test.shape[0], Y_val.shape[1]))
    
    if split_models:
        try:
            regressor.fit(X_train, Y_train[:, 0].reshape(-1, 1),  sample_weight = sample_weights_train)        
        except:
            regressor.fit(X_train, Y_train[:, 0].reshape(-1, 1))
            print("Sample weights unused")
            
        Y_val_pred = regressor.predict(X_val).reshape(-1, 1)
        Y_test_pred = regressor.predict(X_test).reshape(-1, 1)
        
        try:
            regressor.fit(X_train, Y_train[:, 1].reshape(-1, 1),  sample_weight = sample_weights_train)        
        except:
            regressor.fit(X_train, Y_train[:, 1].reshape(-1, 1))
            print("Sample weights unused")
        Y_val_pred = np.hstack((Y_val_pred, regressor.predict(X_val).reshape(-1, 1)))
        Y_test_pred = np.hstack((Y_test_pred, regressor.predict(X_test).reshape(-1, 1)))
        
    else:
        try:
            regressor.fit(X_train, Y_train,  sample_weight = sample_weights_train)        
        except:
            regressor.fit(X_train, Y_train)
            print("Sample weights unused")
        
        Y_val_pred = regressor.predict(X_val)
        Y_test_pred = regressor.predict(X_test)
        
        
    reg_score = score(np.round(Y_val_pred), Y_val, label_weights=label_weights)
    print(name)
    print(reg_score, "/", max_score, " - %s points per match" % (np.round(reg_score/len(Y_val), 2)))
    
    print("Val:")
    show_predictions(dataset, X_val, Y_val, Y_val_pred, show_predicted_indexes, label_weights)
    print("Test:")
    Y_test = np.hstack((np.vstack(results_df.home_score), np.vstack(results_df.away_score)))
    show_predictions(dataset, X_test, Y_test, Y_test_pred, show_predicted_indexes, label_weights)
    

models = {
    #"Tree": DecisionTreeRegressor(random_state=0),
    "Linear": LinearRegression(),
    "MLP": MLPRegressor(hidden_layer_sizes = (128, 32, 8), activation="identity")
}

show_predicted_indexes = [i for i in range(0, len(X_test))]
trainable = 0.1
start = int((1 - trainable) * len(X_train))
for name, model in models. items():
    fit_simple_regressor(X_train[start:], Y_train[start:], X_val, Y_val, X_test,
                         sample_weights_train[start:], model, name, 
                         show_predicted_indexes = show_predicted_indexes, label_weights=dataset.label_weights,
                         split_models = True
                         )


Linear
Linear
740 / 2068  - 1.43 points per match
Val:
Uruguay  x  Chile :  [0.57666016 1.15576172] - [0. 1.]  ...................  output (weighted):  [-0.57910156  0.57666016] [0. 1.]    original:  [-0.57910156  0.57666016] [0. 1.]
Argentina  x  Paraguay :  [ 0.89697266 -0.32373047] - [1. 1.]  ...................  output (weighted):  [1.22070312 0.89697266] [1. 1.]    original:  [1.22070312 0.89697266] [1. 1.]
Scotland  x  Croatia :  [3.81201172 6.23388672] - [-2.  3.]  ...................  output (weighted):  [-2.421875    3.81201172] [-2.  3.]    original:  [-2.421875    3.81201172] [-2.  3.]
England  x  Czech Republic :  [ 1.38916016 -0.44970703] - [1. 1.]  ...................  output (weighted):  [1.83886719 1.38916016] [1. 1.]    original:  [1.83886719 1.38916016] [1. 1.]
Spain  x  Slovakia :  [2.90039062 0.44140625] - [5. 5.]  ...................  output (weighted):  [2.45898438 2.90039062] [5. 5.]    original:  [2.45898438 2.90039062] [5. 5.]
Sweden  x  Poland :  [1.50878906 2

  y = column_or_1d(y, warn=True)


Sample weights unused


  y = column_or_1d(y, warn=True)


Sample weights unused
MLP
851 / 2068  - 1.65 points per match
Val:
Uruguay  x  Chile :  [0.79829348 1.36447032] - [0. 1.]  ...................  output (weighted):  [-0.56617685  0.79829348] [0. 1.]    original:  [-0.56617685  0.79829348] [0. 1.]
Argentina  x  Paraguay :  [ 0.8729937  -0.22631425] - [1. 1.]  ...................  output (weighted):  [1.09930794 0.8729937 ] [1. 1.]    original:  [1.09930794 0.8729937 ] [1. 1.]
Scotland  x  Croatia :  [3.91547162 6.36203366] - [-2.  3.]  ...................  output (weighted):  [-2.44656204  3.91547162] [-2.  3.]    original:  [-2.44656204  3.91547162] [-2.  3.]
England  x  Czech Republic :  [ 1.20071166 -0.84525228] - [1. 1.]  ...................  output (weighted):  [2.04596394 1.20071166] [1. 1.]    original:  [2.04596394 1.20071166] [1. 1.]
Spain  x  Slovakia :  [2.69520412 0.25901056] - [5. 5.]  ...................  output (weighted):  [2.43619356 2.69520412] [5. 5.]    original:  [2.43619356 2.69520412] [5. 5.]
Sweden  x  Poland :  [

In [81]:
print(np.hstack((np.vstack(results_df.home_score), np.vstack(results_df.away_score))))

[[ 0.  2.]
 [ 6.  2.]
 [ 0.  2.]
 [ 1.  1.]
 [ 1.  2.]
 [ 0.  0.]
 [ 0.  0.]
 [ 4.  1.]
 [ 0.  0.]
 [ 1.  2.]
 [ 7.  0.]
 [ 1.  0.]
 [ 1.  0.]
 [ 0.  0.]
 [ 3.  2.]
 [ 3.  2.]
 [ 0.  2.]
 [nan nan]
 [nan nan]
 [nan nan]
 [nan nan]
 [nan nan]
 [nan nan]
 [nan nan]
 [nan nan]
 [nan nan]
 [nan nan]
 [nan nan]
 [nan nan]
 [nan nan]
 [nan nan]
 [nan nan]
 [nan nan]
 [nan nan]
 [nan nan]
 [nan nan]
 [nan nan]
 [nan nan]
 [nan nan]
 [nan nan]
 [nan nan]
 [nan nan]
 [nan nan]
 [nan nan]
 [nan nan]
 [nan nan]
 [nan nan]
 [nan nan]
 [nan nan]
 [nan nan]
 [nan nan]
 [nan nan]
 [nan nan]
 [nan nan]
 [nan nan]
 [nan nan]
 [nan nan]
 [nan nan]
 [nan nan]
 [nan nan]
 [nan nan]
 [nan nan]
 [nan nan]
 [nan nan]]


In [18]:
import keras
from keras.models import Model, save_model
from keras.optimizers import SGD
from keras.layers import Input, Dense
from keras.callbacks import Callback

In [19]:
class Scorer(Callback):
    def __init__(self, X, Y, label_weights):
        self.X_val, self.Y_val = X, Y
        self.label_weights = label_weights
        
    def on_epoch_end(self, batch, logs={}):
        Y_pred = np.hstack(self.model.predict(self.X_val))

        print("X_val score = ", score(Y_pred, self.Y_val, label_weights=self.label_weights))
        return

In [20]:
model_input = Input(shape=(X_train.shape[1],)) 
# First branch
a_dense_1 = Dense(128, activation = "relu")(model_input)
a_dense_2 = Dense(32, activation = "relu")(a_dense_1)
a_dense_3 = Dense(8, activation = "relu")(a_dense_2)
a_dense_4 = Dense(1, name = "goal_diff", activation = "linear")(a_dense_2)
# Second branch
b_dense_1 = Dense(128, activation = "relu")(model_input)
b_dense_2 = Dense(32, activation = "relu")(b_dense_1)
b_dense_3 = Dense(8, activation = "relu")(b_dense_2)
b_dense_4 = Dense(1, name = "winner_goals", activation = "relu")(b_dense_1)

model = Model(model_input, outputs=[a_dense_4, b_dense_4])

optimizer = SGD(lr=0.02)
model.compile(optimizer=optimizer,loss={'goal_diff': 'mse', 'winner_goals': 'mae'}, metrics={'goal_diff': 'mse', 'winner_goals': 'mae'})

  super().__init__(name, **kwargs)


In [35]:
model.fit(X_train, Y_train, validation_data=(X_val, Y_val), epochs=17, batch_size=16, callbacks=[Scorer(X_val, Y_val, dataset.label_weights)], shuffle=True)

Epoch 1/17
X_val score =  9174
Epoch 2/17
X_val score =  9247
Epoch 3/17
X_val score =  9324
Epoch 4/17
X_val score =  9257
Epoch 5/17
X_val score =  9376
Epoch 6/17
X_val score =  9504
Epoch 7/17
X_val score =  9340
Epoch 8/17
X_val score =  9444
Epoch 9/17
X_val score =  9458
Epoch 10/17
X_val score =  9306
Epoch 11/17
X_val score =  9176
Epoch 12/17
X_val score =  9405
Epoch 13/17
X_val score =  9251
Epoch 14/17
X_val score =  9428
Epoch 15/17
X_val score =  9351
Epoch 16/17
X_val score =  9293
Epoch 17/17
X_val score =  9412


<keras.callbacks.History at 0x1f4ce90f8e0>

In [36]:
Y_test_pred = np.hstack(model.predict(X_test))
print(Y_test_pred)
Y_val_pred = np.hstack(model.predict(X_val))

[[ 1.4983909   1.3967335 ]
 [ 0.93080544  1.4370933 ]
 [ 0.35802567  0.51986897]
 [ 0.36150658  0.98317933]
 [ 2.895749    1.6963801 ]
 [ 1.6186655   1.3270063 ]
 [ 0.29726958  1.2512233 ]
 [ 3.7903662   1.5981017 ]
 [-0.03740227  0.6660776 ]
 [ 1.7563066   1.593806  ]
 [ 1.586961    1.7046671 ]
 [ 0.7479663   1.3598689 ]
 [ 0.3257562   0.9505944 ]
 [ 1.8843226   0.7929591 ]
 [ 1.6551496   1.2634948 ]
 [ 2.0282583   1.6181508 ]
 [ 0.48179615  1.0961652 ]
 [ 0.9493679   0.9181212 ]
 [ 1.6722834   1.8470808 ]
 [ 1.2761515   1.7836837 ]
 [ 1.2313883   0.9348569 ]
 [ 2.333213    1.2558079 ]
 [ 0.8096037   1.7885025 ]
 [ 3.091448    1.5915183 ]
 [ 0.6172296   1.2838815 ]
 [ 0.55260324  0.97841   ]
 [ 1.0278314   1.2831444 ]
 [ 0.82484555  1.2056735 ]
 [ 0.7106583   0.7047482 ]
 [ 0.72409344  0.8212836 ]
 [ 1.4734733   2.305264  ]
 [ 1.2289196   1.0155398 ]
 [ 0.46421027  0.88693726]
 [ 3.6491055   1.8744656 ]
 [ 0.24436712  0.26062563]
 [ 0.66399586  1.7688018 ]
 [ 0.46509916  0.8841263 ]
 

In [49]:
show_predicted_indexes = [i for i in range(0, 10)]

print("Val:")
show_predictions(dataset, X_val, Y_val, Y_val_pred, show_predicted_indexes, dataset.label_weights)
print("Test:")
show_predictions(dataset, X_test, np.zeros(Y_test_pred.shape), Y_test_pred, show_predicted_indexes, dataset.label_weights)

Val:
Morocco  x  Zimbabwe :  [2.07186627 1.04846382] - [1. 0.]  ...................  output (weighted):  [1.0234025 2.0718663] [1. 1.]    original:  [1.02340245 2.07186627] [1. 1.]
Senegal  x  DR Congo :  [1.01010394 0.25383711] - [0. 0.]  ...................  output (weighted):  [0.75626683 1.010104  ] [0. 0.]    original:  [0.75626683 1.01010394] [0. 0.]
Tunisia  x  Ghana :  [ 1.02262831 -0.0704391 ] - [2. 0.]  ...................  output (weighted):  [1.0930674 1.0226283] [2. 2.]    original:  [1.09306741 1.02262831] [2. 2.]
Morocco  x  Angola :  [1.93998945 0.42680788] - [2. 2.]  ...................  output (weighted):  [1.5131816 1.9399894] [0. 2.]    original:  [1.51318157 1.93998945] [0. 2.]
Saudi Arabia  x  Sweden :  [0.9934597  0.40741765] - [1. 1.]  ...................  output (weighted):  [0.58604205 0.9934597 ] [0. 1.]    original:  [0.58604205 0.9934597 ] [0. 1.]
United Arab Emirates  x  South Korea :  [0.8671416 0.134148 ] - [1. 0.]  ...................  output (weighted)

In [44]:
import tempfile
import os

MODEL_DIR = tempfile.gettempdir()
version = 3
export_path = os.path.join("tf_models", str(version))
print('export_path = {}\n'.format(export_path))


save_model(
    model,
    export_path,
    overwrite=True,
    include_optimizer=True,
    save_format=None,
    signatures=None,
    options=None,
)


export_path = tf_models\3

INFO:tensorflow:Assets written to: tf_models\3\assets


In [39]:
print(X_test[0].tolist())

[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0,

In [43]:
import requests
import json

headers = {"content-type": "application/json"}
data = json.dumps({"signature_name": "serving_default", "instances": X_test[0:3].tolist()})
json_response = requests.post('http://localhost:8501/v1/models/aimatch/versions/1:predict', data=data, headers=headers)
predictions_resp = json.loads(json_response.text)['predictions']
print(predictions_resp)


[{'goal_diff': [1.05069733], 'winner_goals': [0.992996514]}, {'goal_diff': [0.487721443], 'winner_goals': [1.76281738]}, {'goal_diff': [-0.405676842], 'winner_goals': [0.963412285]}]
