In [5]:
import numpy as np
import pandas as pd
import data_preprocessing as prep
import importlib

import train_b
importlib.reload(train_b)
from train_b import score, score_sample, predictions_to_goals, prediction_to_goals

importlib.reload(prep)

<module 'data_preprocessing' from 'c:\\Users\\bmk1bj\\Documents\\GIT_repositories\\AIMatch\\data_preprocessing.py'>

### Ideas

- Split train and val randomly
- y = [home_goals - away_goals, total goals]
- model with two unrelated outputs   - OK
- sample_weight extended by tournament_group
- consider team strength trend this year (or this cup)


In [2]:
dataset = prep.Dataset()
X_train, Y_train, X_val, Y_val, X_test, sample_weights_train, sample_weights_val = dataset.get_input_data(label_weights=[1, 1], sample_weights_degree=2)

# Reference values
# = total score for validation data if results are hard-coded and all same without any prediction
# all models should overcome those values
print("    Reference values:")
max_score = 4 * len(Y_val)
ref_score_1 = score(np.zeros(Y_val.shape) * dataset.label_weights, Y_val, label_weights=dataset.label_weights) # 0:0
print("0:0", ref_score_1, "/", max_score, " - %s points per match" % (np.round(ref_score_1/len(Y_val), 2)))
ref_score_2 = score(np.ones(Y_val.shape) * dataset.label_weights, Y_val, label_weights=dataset.label_weights) # 1:0
print("1:0", ref_score_2, "/", max_score, " - %s points per match" % (np.round(ref_score_2/len(Y_val), 2)))
Y_pred = np.ones(Y_val.shape) * dataset.label_weights # 1:1
Y_pred[:, 0] = 0
ref_score_3 = score(Y_pred, Y_val, label_weights=dataset.label_weights) 
print("1:1", ref_score_3, "/", max_score, " - %s points per match" % (np.round(ref_score_3/len(Y_val), 2)))
Y_pred = np.ones(Y_val.shape) * dataset.label_weights # 0:1
Y_pred[:, 0] = -1
ref_score_3 = score(Y_pred, Y_val, label_weights=dataset.label_weights) 
print("0:1", ref_score_3, "/", max_score, " - %s points per match" % (np.round(ref_score_3/len(Y_val), 2)))
Y_pred = np.ones(Y_val.shape) # 2:1
Y_pred[:, 1] = 2
ref_score_3 = score(Y_pred * dataset.label_weights, Y_val, label_weights=dataset.label_weights) 
print("2:1", ref_score_3, "/", max_score, " - %s points per match" % (np.round(ref_score_3/len(Y_val), 2)))
Y_pred = 2 * np.ones(Y_val.shape) # 2:0
ref_score_3 = score(Y_pred * dataset.label_weights, Y_val, label_weights=dataset.label_weights) 
print("2:0", ref_score_3, "/", max_score, " - %s points per match" % (np.round(ref_score_3/len(Y_val), 2)))

Number of relevant labeled matches: 18532/44060
X shape =  (18532, 457)
Y shape =  (18532, 2)
sample weights shape =  (18532,)
X shape =  (48, 457)
Y shape =  (48, 2)
sample weights shape =  (48,)
0:0 8856 / 22240  - 1.59 points per match
1:0 9656 / 22240  - 1.74 points per match
1:1 9020 / 22240  - 1.62 points per match
0:1 4560 / 22240  - 0.82 points per match
2:1 9460 / 22240  - 1.7 points per match
2:0 9033 / 22240  - 1.62 points per match


In [37]:
from sklearn.model_selection import cross_val_score
from sklearn.tree import DecisionTreeRegressor
from sklearn.linear_model import LinearRegression
from sklearn.svm import SVR
from sklearn.neural_network import MLPRegressor
from sklearn.metrics import make_scorer



def show_predictions(X, Y, Y_pred, indexes, label_weights):
    for i in indexes:
        x = X[i]
        home_team_part = x[:len(dataset.all_teams)]
        away_team_part = x[len(dataset.all_teams):2*len(dataset.all_teams)]
        home_team = dataset.all_teams[np.where(home_team_part == 1)[0][0]]
        away_team = dataset.all_teams[np.where(away_team_part == 1)[0][0]]
        print(home_team, " x ", away_team, ": ", prediction_to_goals(Y_pred[i], label_weights), "-", prediction_to_goals(Y[i], label_weights), " ...................  output (weighted): ", Y_pred[i], Y[i], "   original: ", Y_pred[i]/label_weights, Y[i]/label_weights)


def fit_simple_regressor(X_train, Y_train, X_val, Y_val, X_test, sample_weights_train, regressor, name: str, label_weights: list, show_predicted_indexes = [0, 1], split_models=False):
    print(name)
    Y_val_pred, Y_test_pred = np.zeros(Y_val.shape), np.zeros((X_test.shape[0], Y_val.shape[1]))
    
    if split_models:
        try:
            regressor.fit(X_train, Y_train[:, 0].reshape(-1, 1),  sample_weight = sample_weights_train)        
        except:
            regressor.fit(X_train, Y_train[:, 0].reshape(-1, 1))
            print("Sample weights unused")
            
        Y_val_pred = regressor.predict(X_val).reshape(-1, 1)
        Y_test_pred = regressor.predict(X_test).reshape(-1, 1)
        
        try:
            regressor.fit(X_train, Y_train[:, 1].reshape(-1, 1),  sample_weight = sample_weights_train)        
        except:
            regressor.fit(X_train, Y_train[:, 1].reshape(-1, 1))
            print("Sample weights unused")
        Y_val_pred = np.hstack((Y_val_pred, regressor.predict(X_val).reshape(-1, 1)))
        Y_test_pred = np.hstack((Y_test_pred, regressor.predict(X_test).reshape(-1, 1)))
        
    else:
        try:
            regressor.fit(X_train, Y_train,  sample_weight = sample_weights_train)        
        except:
            regressor.fit(X_train, Y_train)
            print("Sample weights unused")
        
        Y_val_pred = regressor.predict(X_val)
        Y_test_pred = regressor.predict(X_test)
        
        
    reg_score = score(np.round(Y_val_pred), Y_val, label_weights=label_weights)
    print(name)
    print(reg_score, "/", max_score, " - %s points per match" % (np.round(reg_score/len(Y_val), 2)))
    
    print("Val:")
    show_predictions(X_val, Y_val, Y_val_pred, show_predicted_indexes, label_weights)
    print("Test:")
    show_predictions(X_test, np.zeros(Y_test_pred.shape), Y_test_pred, show_predicted_indexes, label_weights)
    

models = {
    #"Tree": DecisionTreeRegressor(random_state=0),
    "Linear": LinearRegression(),
    "MLP": MLPRegressor(hidden_layer_sizes = (128, 32, 8), activation="identity")
}

show_predicted_indexes = [i for i in range(12,24)]
trainable = 0.1
start = int((1 - trainable) * len(X_train))
for name, model in models. items():
    fit_simple_regressor(X_train[start:], Y_train[start:], X_val, Y_val, X_test,
                         sample_weights_train[start:], model, name, 
                         show_predicted_indexes = show_predicted_indexes, label_weights=dataset.label_weights,
                         split_models = True
                         )


Linear
Linear
9470 / 22240  - 1.7 points per match
Val:
Nigeria  x  Ghana :  [1.77148438 1.84570312] - [1. 0.]  ...................  output (weighted):  [-0.07421875  1.77148438] [1. 1.]    original:  [-0.07421875  1.77148438] [1. 1.]
Zimbabwe  x  Senegal :  [3.58707674e+11 2.75740670e+10] - [2. 4.]  ...................  output (weighted):  [3.31133607e+11 3.58707674e+11] [-2.  2.]    original:  [3.31133607e+11 3.58707674e+11] [-2.  2.]
Egypt  x  Morocco :  [2.41992188 1.15429688] - [0. 0.]  ...................  output (weighted):  [1.265625   2.41992188] [0. 0.]    original:  [1.265625   2.41992188] [0. 0.]
Cameroon  x  Togo :  [1.05078125 1.1484375 ] - [2. 0.]  ...................  output (weighted):  [-0.09765625  1.05078125] [2. 2.]    original:  [-0.09765625  1.05078125] [2. 2.]
Ecuador  x  Honduras :  [2.5      1.390625] - [1. 0.]  ...................  output (weighted):  [1.109375 2.5     ] [1. 1.]    original:  [1.109375 2.5     ] [1. 1.]
Finland  x  South Korea :  [1.95117188 

  y = column_or_1d(y, warn=True)


Sample weights unused


  y = column_or_1d(y, warn=True)


Sample weights unused
MLP
9654 / 22240  - 1.74 points per match
Val:
Nigeria  x  Ghana :  [1.72357433 1.74139229] - [1. 0.]  ...................  output (weighted):  [-0.01781795  1.72357433] [1. 1.]    original:  [-0.01781795  1.72357433] [1. 1.]
Zimbabwe  x  Senegal :  [1.77036066 2.40186033] - [2. 4.]  ...................  output (weighted):  [-0.63149967  1.77036066] [-2.  2.]    original:  [-0.63149967  1.77036066] [-2.  2.]
Egypt  x  Morocco :  [2.45217315 1.24474674] - [0. 0.]  ...................  output (weighted):  [1.2074264  2.45217315] [0. 0.]    original:  [1.2074264  2.45217315] [0. 0.]
Cameroon  x  Togo :  [1.1531879  1.21018509] - [2. 0.]  ...................  output (weighted):  [-0.05699719  1.1531879 ] [2. 2.]    original:  [-0.05699719  1.1531879 ] [2. 2.]
Ecuador  x  Honduras :  [2.57834799 1.67122422] - [1. 0.]  ...................  output (weighted):  [0.90712376 2.57834799] [1. 1.]    original:  [0.90712376 2.57834799] [1. 1.]
Finland  x  South Korea :  [1.9701

In [58]:
import keras
from keras.models import Model, save_model
from keras.optimizers import SGD
from keras.layers import Input, Dense
from keras.callbacks import Callback

In [66]:
class Scorer(Callback):
    def __init__(self, X, Y, label_weights):
        self.X_val, self.Y_val = X, Y
        self.label_weights = label_weights
        
    def on_epoch_end(self, batch, logs={}):
        Y_pred = np.hstack(model.predict(self.X_val))

        print("X_val score = ", score(Y_pred, self.Y_val, label_weights=self.label_weights))
        return

In [67]:
model_input = Input(shape=(X_train.shape[1],)) 
# First branch
a_dense_1 = Dense(128, activation = "relu")(model_input)
a_dense_2 = Dense(32, activation = "relu")(a_dense_1)
a_dense_3 = Dense(8, activation = "relu")(a_dense_2)
a_dense_4 = Dense(1, name = "goal_diff", activation = "linear")(a_dense_2)
# Second branch
b_dense_1 = Dense(128, activation = "relu")(model_input)
b_dense_2 = Dense(32, activation = "relu")(b_dense_1)
b_dense_3 = Dense(8, activation = "relu")(b_dense_2)
b_dense_4 = Dense(1, name = "winner_goals", activation = "relu")(b_dense_1)

model = Model(model_input, outputs=[a_dense_4, b_dense_4])

optimizer = SGD(lr=0.02)
model.compile(optimizer=optimizer,loss={'goal_diff': 'mse', 'winner_goals': 'mae'}, metrics={'goal_diff': 'mse', 'winner_goals': 'mae'})

In [68]:
model.fit(X_train, Y_train, validation_data=(X_val, Y_val), epochs=50, batch_size=16, callbacks=[Scorer(X_val, Y_val, dataset.label_weights)], shuffle=True)

Epoch 1/50
X_val score =  9491
Epoch 2/50
X_val score =  9277
Epoch 3/50
X_val score =  9498
Epoch 4/50
X_val score =  9098
Epoch 5/50
X_val score =  9240
Epoch 6/50
X_val score =  9249
Epoch 7/50
X_val score =  9244
Epoch 8/50
X_val score =  9393
Epoch 9/50
X_val score =  9465
Epoch 10/50
X_val score =  9390
Epoch 11/50
X_val score =  9442
Epoch 12/50
X_val score =  9520
Epoch 13/50
X_val score =  9338
Epoch 14/50
X_val score =  9472
Epoch 15/50
X_val score =  9345
Epoch 16/50
X_val score =  9379
Epoch 17/50
X_val score =  9327
Epoch 18/50
X_val score =  9339
Epoch 19/50
X_val score =  9269
Epoch 20/50
X_val score =  9283
Epoch 21/50
X_val score =  9258
Epoch 22/50
X_val score =  9361
Epoch 23/50
X_val score =  9361
Epoch 24/50
X_val score =  9285
Epoch 25/50
X_val score =  9337
Epoch 26/50
X_val score =  9215
Epoch 27/50
X_val score =  9367
Epoch 28/50
X_val score =  9124
Epoch 29/50
X_val score =  9337
Epoch 30/50
X_val score =  9232
Epoch 31/50
X_val score =  9356
Epoch 32/50
X_val

<keras.callbacks.History at 0x298a1363f70>

In [69]:
Y_test_pred = np.hstack(model.predict(X_test))
print(Y_test_pred)
Y_val_pred = np.hstack(model.predict(X_val))

[[ 1.8482063   1.019687  ]
 [ 1.200696    1.1615962 ]
 [-0.80745804  0.03948702]
 [ 0.5784939   0.47459874]
 [ 2.2440252   2.0270145 ]
 [ 1.5324353   1.8348349 ]
 [ 0.4759792   0.7122674 ]
 [ 3.1477332   1.8839133 ]
 [-0.34375596  0.26776487]
 [ 1.5461541   1.0864367 ]
 [ 1.0799475   2.1335046 ]
 [ 0.8389342   1.257649  ]
 [ 0.19691443  0.6653434 ]
 [ 1.2687755   1.1026537 ]
 [ 4.667077    1.2975998 ]
 [ 2.0540388   1.4375825 ]
 [-0.21709049  0.3169538 ]
 [ 1.971118    0.20917663]
 [ 1.4676036   1.7756962 ]
 [ 0.98591673  1.6700385 ]
 [ 1.2591944   1.4132878 ]
 [ 3.6427813   1.3367293 ]
 [ 0.97793007  1.7661307 ]
 [ 2.9610825   1.7375698 ]
 [ 0.88394904  1.5971909 ]
 [ 0.69935083  0.94272864]
 [ 0.5725364   1.1468176 ]
 [ 1.0759832   1.4848827 ]
 [ 0.6535374   0.6326175 ]
 [ 0.6324643   0.4609554 ]
 [ 2.1767495   2.2886596 ]
 [ 1.7437046   1.4307841 ]
 [ 0.30518985  0.3371962 ]
 [ 5.601728    2.0803425 ]
 [-0.07189357  0.        ]
 [ 0.3264246   1.6310463 ]
 [ 0.11537647  0.7229376 ]
 

In [70]:
show_predicted_indexes = [i for i in range(0, 10)]

print("Val:")
show_predictions(X_val, Y_val, Y_val_pred, show_predicted_indexes, dataset.label_weights)
print("Test:")
show_predictions(X_test, np.zeros(Y_test_pred.shape), Y_test_pred, show_predicted_indexes, dataset.label_weights)

Val:
Morocco  x  Zimbabwe :  [1.44664299 0.501701  ] - [1. 0.]  ...................  output (weighted):  [0.944942 1.446643] [1. 1.]    original:  [0.944942   1.44664299] [1. 1.]
Senegal  x  DR Congo :  [0.87146139 0.07595283] - [0. 0.]  ...................  output (weighted):  [0.79550856 0.8714614 ] [0. 0.]    original:  [0.79550856 0.87146139] [0. 0.]
Tunisia  x  Ghana :  [1.29323637 0.34739906] - [2. 0.]  ...................  output (weighted):  [0.9458373 1.2932364] [2. 2.]    original:  [0.94583732 1.29323637] [2. 2.]
Morocco  x  Angola :  [1.73500919 0.167256  ] - [2. 2.]  ...................  output (weighted):  [1.5677532 1.7350092] [0. 2.]    original:  [1.5677532  1.73500919] [0. 2.]
Saudi Arabia  x  Sweden :  [ 0.5058071  -0.15448231] - [1. 1.]  ...................  output (weighted):  [0.6602894 0.5058071] [0. 1.]    original:  [0.66028941 0.5058071 ] [0. 1.]
United Arab Emirates  x  South Korea :  [ 0.80222273 -0.02786255] - [1. 0.]  ...................  output (weighted)

In [26]:
import tempfile
import os

MODEL_DIR = tempfile.gettempdir()
version = 1
export_path = os.path.join("", str(version))
print('export_path = {}\n'.format(export_path))


save_model(
    model,
    export_path,
    overwrite=True,
    include_optimizer=True,
    save_format=None,
    signatures=None,
    options=None,
)


export_path = 1

INFO:tensorflow:Assets written to: 1\assets


In [28]:
print(X_test[0].tolist())

[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0,

In [71]:
import requests
import json

headers = {"content-type": "application/json"}
data = json.dumps({"signature_name": "serving_default", "instances": X_test[0:3].tolist()})
json_response = requests.post('http://localhost:50001/v1/models/aimatch:predict', data=data, headers=headers)
predictions_resp = json.loads(json_response.text)['predictions']
print(predictions_resp)


[{'goal_diff': [1.05069733], 'winner_goals': [0.992996514]}, {'goal_diff': [0.487721443], 'winner_goals': [1.76281738]}, {'goal_diff': [-0.405676842], 'winner_goals': [0.963412285]}]
