# TESTING

In [1]:
!pip freeze | grep tensorflow || pip install tensorflow
!pip freeze | grep google-cloud-storage || pip install google-cloud-storage

tensorflow==2.10.1
tensorflow-cloud==0.1.16
tensorflow-datasets==4.8.2
tensorflow-estimator==2.10.0
tensorflow-hub==0.13.0
tensorflow-io==0.27.0
tensorflow-io-gcs-filesystem==0.27.0
tensorflow-metadata==1.11.0
tensorflow-probability==0.19.0
tensorflow-serving-api==2.10.1
tensorflow-transform==1.11.0
google-cloud-storage==2.9.0


In [2]:
from google.cloud import storage
import logging
import os

import numpy as np
import tensorflow as tf
from tensorflow.keras import callbacks, models
from tensorflow.keras.layers import (
    Concatenate,
    Dense,
    Input,
)

2023-08-01 13:36:17.965374: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations:  AVX2 FMA
To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags.
2023-08-01 13:36:22.791277: E tensorflow/stream_executor/cuda/cuda_blas.cc:2981] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered
2023-08-01 13:36:33.691403: W tensorflow/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer.so.7'; dlerror: libnvinfer.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /usr/local/cuda/lib64:/usr/local/cuda/lib:/usr/local/lib/x86_64-linux-gnu:/usr/local/nvidia/lib:/usr/local/nvidia/lib64:/usr/local/nvidia/lib:/usr/local/nvidia/lib64
2023-08-01 13:36:33.691672: W tensorflow/strea

In [3]:
PROJECT = !gcloud config get-value project
PROJECT = PROJECT[0]
BUCKET = PROJECT
REGION = "europe-west1"

OUTDIR = f"gs://{BUCKET}/fpl/data"

%env PROJECT=$PROJECT
%env BUCKET=$BUCKET
%env REGION=$REGION
%env OUTDIR=$OUTDIR
%env TFVERSION=2.8

env: PROJECT=bf-fpl-pred-080723
env: BUCKET=bf-fpl-pred-080723
env: REGION=europe-west1
env: OUTDIR=gs://bf-fpl-pred-080723/fpl/data
env: TFVERSION=2.8


In [7]:
logging.info(tf.version.VERSION)

# TODO: Parametrise the column list to use the same package for all positions.
CSV_COLUMNS = [
    "hash_id",
    "element_code",
    "season_name",
    "next_season_points",
    "minutes",
    "goals_scored",
    "assists",
    "clean_sheets",
    "penalties_missed",
    "bps",
    "yellow_threshold",
    "red_cards",
    "own_goals",
    "influence",
    "creativity",
    "threat",
    "start_cost",
    "end_cost"
]

LABEL_COLUMN = "next_season_points"
DEFAULTS = ["", [0.0], "", [0.0], [0.0], [0.0], [0.0], [0.0], [0.0], [0.0], [0.0], [0.0], [0.0], [0.0], [0.0], [0.0], [0.0], [0.0]]
UNWANTED_COLS = ["hash_id", "element_code", "season_name"]

INPUT_COLS = [
    c for c in CSV_COLUMNS if c != LABEL_COLUMN and c not in UNWANTED_COLS
]


In [5]:
def features_and_labels(row_data):
    for unwanted_col in UNWANTED_COLS:
        row_data.pop(unwanted_col)
    label = row_data.pop(LABEL_COLUMN)
    return row_data, label


def load_dataset(pattern, batch_size, num_repeat):
    dataset = tf.data.experimental.make_csv_dataset(
        file_pattern=pattern,
        batch_size=batch_size,
        column_names=CSV_COLUMNS,
        column_defaults=DEFAULTS,
        num_epochs=num_repeat,
        shuffle_buffer_size=1000000,
    )
    return dataset.map(features_and_labels)


def create_train_dataset(pattern, batch_size):
    dataset = load_dataset(pattern, batch_size, num_repeat=None)
    return dataset.prefetch(1)


def create_eval_dataset(pattern, batch_size):
    dataset = load_dataset(pattern, batch_size, num_repeat=1)
    return dataset.prefetch(1)


def rmse(y_true, y_pred):
    return tf.sqrt(tf.reduce_mean(tf.square(y_pred - y_true)))


def build_dnn_model(nnsize, lr):
    inputs = {
        colname: Input(name=colname, shape=(1,), dtype="float32")
        for colname in INPUT_COLS
    }

    # Concatenate numeric inputs
    dnn_inputs = Concatenate()(list(inputs.values()))

    x = dnn_inputs
    for layer, nodes in enumerate(nnsize):
        x = Dense(nodes, activation="relu", name=f"h{layer}")(x)
    output = Dense(1, name="next_sason_points")(x)

    model = models.Model(inputs, output)

    lr_optimizer = tf.keras.optimizers.Adam(learning_rate=lr)
    model.compile(optimizer=lr_optimizer, loss="mse", metrics=[rmse, "mse"])

    return model


def train_and_evaluate(batch_size, lr, nnsize, eval_data_path, num_evals, num_examples_to_train_on, output_dir, train_data_path):
    # TODO 1b
    batch_size = batch_size
    lr = lr
    nnsize = [int(s) for s in nnsize.split()]
    eval_data_path = eval_data_path
    num_evals = num_evals
    num_examples_to_train_on = num_examples_to_train_on
    output_dir = output_dir
    train_data_path = train_data_path

    model_export_path = os.path.join(output_dir, "savedmodel")
    checkpoint_path = os.path.join(output_dir, "checkpoints")
    tensorboard_path = os.path.join(output_dir, "tensorboard")

    if tf.io.gfile.exists(output_dir):
        tf.io.gfile.rmtree(output_dir)

    model = build_dnn_model(nnsize, lr)
    logging.info(model.summary())

    trainds = create_train_dataset(train_data_path, batch_size)
    evalds = create_eval_dataset(eval_data_path, batch_size)

    steps_per_epoch = num_examples_to_train_on // (batch_size * num_evals)

    checkpoint_cb = callbacks.ModelCheckpoint(
        checkpoint_path, save_weights_only=True, verbose=1
    )
    tensorboard_cb = callbacks.TensorBoard(tensorboard_path, histogram_freq=1)

    history = model.fit(
        trainds,
        validation_data=evalds,
        epochs=num_evals,
        steps_per_epoch=max(1, steps_per_epoch),
        verbose=2,  # 0=silent, 1=progress bar, 2=one line per epoch
        callbacks=[checkpoint_cb, tensorboard_cb],
    )

    # Exporting the model with default serving function.
    model.save(model_export_path)
    return history


In [8]:

# Run with 1 eval
history = train_and_evaluate(5, 0.001, "32 8", "gs://bf-fpl-pred-080723/fpl/data/mid-test*", 1, 100, "./fpl-model", "gs://bf-fpl-pred-080723/fpl/data/mid-train*")

Model: "model_1"
__________________________________________________________________________________________________
 Layer (type)                   Output Shape         Param #     Connected to                     
 minutes (InputLayer)           [(None, 1)]          0           []                               
                                                                                                  
 goals_scored (InputLayer)      [(None, 1)]          0           []                               
                                                                                                  
 assists (InputLayer)           [(None, 1)]          0           []                               
                                                                                                  
 clean_sheets (InputLayer)      [(None, 1)]          0           []                               
                                                                                            

INFO:tensorflow:Assets written to: ./fpl-model/savedmodel/assets
