In [12]:
import numpy as np
import tensorflow as tf
import tensorflow_addons as tfa
import tensorflow.keras.utils as utils
from sklearn.metrics import accuracy_score, f1_score, precision_score, recall_score
import pyarrow.parquet as pq
import pandas as pd
import optuna

random_seed = 0
tf.random.set_seed(random_seed) 
one_hot = True
num_eval_samples = 50

metric_name = 'Accuracy'
metric_target = 0

n_trials = 100
epochs = 80
early_stopping_epochs=35,
learning_rate_decay_factor=0.2
learning_rate_decay_epochs=10
min_layers=3
max_layers=5
min_units=32
max_units=128
units_increment=16

batch_size = 32
input_shape = (19,)
noise_range = 0.1992
num_hidden_layers = 4
hidden_units_list = [32,48,64,80,96,112,128]
hidden_layers = [32,80,112,112]
batch_normalizations = [False,False,False,False]
activations = ['relu', 'gelu','tanh','gelu']
regularizations = [ 'l1', 'l1', 'l2', 'l2']
dropouts = [0.4,0.2,0.2,0.0]
optimizer = 'yogi'
learning_rate = 0.0007
columnus = ['Season', 'Date', 'Result', 'Home Team', 'Away Team', 'HG', 'AG']

In [13]:
def preprocess_training_dataframe(matches_df: pd.DataFrame, one_hot: bool) -> (np.ndarray, np.ndarray):
    inputs = matches_df.dropna().drop(columns=columnus)
    inputs = inputs.to_numpy(dtype=np.float64)
    targets = matches_df['Season'].replace({
            'H' : 0,
            'D' : 1,
            'A' : 2}).to_numpy(dtype=np.int64)
    if one_hot:
        targets = utils.to_categorical(targets)
    return inputs, targets

def split_train_targets(
        inputs: np.ndarray,
        targets: np.ndarray,
        num_eval_samples: int) -> (np.ndarray, np.ndarray, np.ndarray, np.ndarray):
    x_train = inputs[num_eval_samples:]
    y_train = targets[num_eval_samples:]
    x_test = inputs[: num_eval_samples]
    y_test = targets[: num_eval_samples]
    return x_train, y_train, x_test, y_test

In [18]:
matches_df = pq.read_table('England.Premier-League.parquet').to_pandas()
inputs, targets = preprocess_training_dataframe(matches_df=matches_df, one_hot=one_hot)
x_train, y_train, x_test, y_test = split_train_targets(inputs=inputs, targets=targets, num_eval_samples=num_eval_samples)

In [22]:
x_train.shape

(3009, 19)

In [23]:
y_train.shape

(3009, 2023)

In [15]:
def create_model() -> float:
    optimizer = tfa.optimizers.Yogi(learning_rate=learning_rate)
    model = tf.keras.Sequential()
    model.add(tf.keras.layers.Input(shape= matches_df.shape[1:]))
    model.add(tf.keras.layers.GaussianNoise(stddev=noise_range))
    
    for i, units in enumerate(hidden_layers):
        regularizer = regularizations[i]
        batch_norm = batch_normalizations[i]
        dropout = dropouts[i]
        model.add(tf.keras.layers.Dense(
                units=units,
                activation=activations[i],
                use_bias=not batch_norm,
                kernel_regularizer=regularizer))
        
        if batch_normalizations[i]:
            model.add(tf.keras.layers.BatchNormalization())
        if dropout > 0.0:
            model.add(tf.keras.layers.Dropout(rate=dropout))

    model.add(tf.keras.layers.Dense(units=3, activation='softmax'))
    model.compile(
        optimizer=optimizer,
        loss='categorical_crossentropy',
        metrics=['accuracy'])
    
    return model

In [None]:

def train(metric_name: str, metric_target: str):
    match metric_name:
        case 'Accuracy':
            metric = lambda y_true, y_pred: accuracy_score(y_true=y_true, y_pred=y_pred)
        case 'F1':
            metric = lambda y_true, y_pred: f1_score(y_true=y_true, y_pred=y_pred, average=None)[metric_target]
        case 'Precision':
            metric = lambda y_true, y_pred: precision_score(y_true=y_true, y_pred=y_pred, average=None)[metric_target]
        case 'Recall':
            metric = lambda y_true, y_pred: recall_score(y_true=y_true, y_pred=y_pred, average=None)[metric_target]
        case _:
            raise NotImplementedError(f'Error: Metric "{metric_name}" has not been implemented yet')
    
    tuner = _construct_tuner(
        n_trials=n_trials_var,
        metric=metric,
        matches_df=matches_df,
        num_eval_samples=num_eval_samples_var,
        random_seed=random_seed) 
    best_params = tuner.tune()
    
    model = construct_model(input_shape=x_train.shape[1:], random_seed=random_seed)
    build_model(model=model, best_params=best_params)
    eval_metrics = model.train(
        x_train=x_train,
        y_train=y_train,
        x_test=x_test,
        y_test=y_test,
        use_over_sampling=best_params['user_over_sampling'])
    return model

In [16]:
def _objective(trial) -> float:
    model = create_model()
    use_over_sampling = bool(trial.suggest_categorical('user_over_sampling', [True, False]))
    model.train(
        x_train=x_train,
        y_train=y_train,
        x_test=x_test,
        y_test=y_test,
        use_over_sampling=use_over_sampling)
    y_pred, _ = model.predict(x=x_test)
    return _evaluate(y_true=y_test, y_pred=y_pred)

study = optuna.create_study(direction='maximize')
study.optimize(_objective, n_trials=n_trials)
best_params = study.best_trial.params

[I 2023-08-20 17:34:24,422] A new study created in memory with name: no-name-e5ceb769-4186-423c-a5fe-0e157126b0f1
[W 2023-08-20 17:34:24,644] Trial 0 failed with parameters: {'user_over_sampling': False} because of the following error: AttributeError("'Sequential' object has no attribute 'train'").
Traceback (most recent call last):
  File "/usr/local/lib/python3.10/dist-packages/optuna/study/_optimize.py", line 200, in _run_trial
    value_or_values = func(trial)
  File "/tmp/ipykernel_3218/3332776971.py", line 4, in _objective
    model.train(
AttributeError: 'Sequential' object has no attribute 'train'
[W 2023-08-20 17:34:24,645] Trial 0 failed with value None.


AttributeError: 'Sequential' object has no attribute 'train'

In [None]:
#model = _construct_model(input_shape=x_train.shape[1:], random_seed=random_seed)
#_build_model(model=model, best_params=best_params)
#eval_metrics = model.train(x_train=x_train,y_train=y_train,x_test=x_test,y_test=y_test,use_over_sampling=best_params['user_over_sampling'])