# FPL Model Building
We'll build a model using Keras and set it up as a package so we can train both locally and on the cloud.

We need to create a model as a Python package, so we'll need an `__init__.py` to identify the directory as a package, a `model.py` to hold the model code, and a `task.py` to pass command line parameters to our model.We need to create a model as a Python package, so we'll need an `__init__.py` to identify the directory as a package, a `model.py` to hold the model code, and a `task.py` to pass command line parameters to our model.

In [1]:
!pip freeze | grep tensorflow || pip install tensorflow
!pip freeze | grep google-cloud-storage || pip install google-cloud-storage

tensorflow==2.10.1
tensorflow-cloud==0.1.16
tensorflow-datasets==4.8.2
tensorflow-estimator==2.10.0
tensorflow-hub==0.13.0
tensorflow-io==0.27.0
tensorflow-io-gcs-filesystem==0.27.0
tensorflow-metadata==1.11.0
tensorflow-probability==0.19.0
tensorflow-serving-api==2.10.1
tensorflow-transform==1.11.0
google-cloud-storage==2.9.0


In [2]:
from google.cloud import storage

In [3]:
PROJECT = !gcloud config get-value project
PROJECT = PROJECT[0]
BUCKET = PROJECT
REGION = "europe-west1"

OUTDIR = f"gs://{BUCKET}/fpl/data"

%env PROJECT=$PROJECT
%env BUCKET=$BUCKET
%env REGION=$REGION
%env OUTDIR=$OUTDIR
%env TFVERSION=2.8

env: PROJECT=bf-fpl-pred-080723
env: BUCKET=bf-fpl-pred-080723
env: REGION=europe-west1
env: OUTDIR=gs://bf-fpl-pred-080723/fpl/data
env: TFVERSION=2.8


In [4]:
%%writefile ./models/trainers/mid/__init__.py
# Empty init.

Overwriting ./models/trainers/mid/__init__.py


In [5]:
%%writefile ./models/trainers/mid/model.py
"""Data prep, train and evaluate DNN model."""

import logging
import os

import numpy as np
import tensorflow as tf
from tensorflow.keras import callbacks, models
from tensorflow.keras.layers import (
    Concatenate,
    Dense,
    Input,
)

logging.info(tf.version.VERSION)

# TODO: Parametrise the column list to use the same package for all positions.
CSV_COLUMNS = [
    "hash_id",
    "element_code",
    "season_name",
    "next_season_points",
    "minutes",
    "goals_scored",
    "assists",
    "clean_sheets",
    "penalties_missed",
    "bps",
    "yellow_threshold",
    "red_cards",
    "own_goals",
    "influence",
    "creativity",
    "threat",
    "start_cost",
    "end_cost"
]

LABEL_COLUMN = "next_season_points"
DEFAULTS = ["", [0.0], "", [0.0], [0.0], [0.0], [0.0], [0.0], [0.0], [0.0], [0.0], [0.0], [0.0], [0.0], [0.0], [0.0], [0.0], [0.0]]
UNWANTED_COLS = ["hash_id", "element_code", "season_name"]

INPUT_COLS = [
    c for c in CSV_COLUMNS if c != LABEL_COLUMN and c not in UNWANTED_COLS
]

def features_and_labels(row_data):
    for unwanted_col in UNWANTED_COLS:
        row_data.pop(unwanted_col)
    label = row_data.pop(LABEL_COLUMN)
    return row_data, label


def load_dataset(pattern, batch_size, num_repeat):
    dataset = tf.data.experimental.make_csv_dataset(
        file_pattern=pattern,
        batch_size=batch_size,
        column_names=CSV_COLUMNS,
        column_defaults=DEFAULTS,
        num_epochs=num_repeat,
        shuffle_buffer_size=1000000,
    )
    return dataset.map(features_and_labels)


def create_train_dataset(pattern, batch_size):
    dataset = load_dataset(pattern, batch_size, num_repeat=None)
    return dataset.prefetch(1)


def create_eval_dataset(pattern, batch_size):
    dataset = load_dataset(pattern, batch_size, num_repeat=1)
    return dataset.prefetch(1)


def rmse(y_true, y_pred):
    return tf.sqrt(tf.reduce_mean(tf.square(y_pred - y_true)))


def build_dnn_model(nnsize, lr):
    inputs = {
        colname: Input(name=colname, shape=(1,), dtype="float32")
        for colname in INPUT_COLS
    }

    # Concatenate numeric inputs
    dnn_inputs = Concatenate()(list(inputs.values()))

    x = dnn_inputs
    for layer, nodes in enumerate(nnsize):
        x = Dense(nodes, activation="relu", name=f"h{layer}")(x)
    output = Dense(1, name="next_sason_points")(x)

    model = models.Model(inputs, output)

    lr_optimizer = tf.keras.optimizers.Adam(learning_rate=lr)
    model.compile(optimizer=lr_optimizer, loss="mse", metrics=[rmse, "mse"])

    return model


def train_and_evaluate(hparams):
    batch_size = hparams["batch_size"]
    lr = hparams["lr"]
    nnsize = [int(s) for s in hparams["nnsize"].split()]
    eval_data_path = hparams["eval_data_path"]
    num_evals = hparams["num_evals"]
    num_examples_to_train_on = hparams["num_examples_to_train_on"]
    output_dir = hparams["output_dir"]
    train_data_path = hparams["train_data_path"]

    model_export_path = os.path.join(output_dir, "savedmodel")
    checkpoint_path = os.path.join(output_dir, "checkpoints")
    tensorboard_path = os.path.join(output_dir, "tensorboard")

    if tf.io.gfile.exists(output_dir):
        tf.io.gfile.rmtree(output_dir)

    model = build_dnn_model(nnsize, lr)
    logging.info(model.summary())

    trainds = create_train_dataset(train_data_path, batch_size)
    evalds = create_eval_dataset(eval_data_path, batch_size)

    steps_per_epoch = num_examples_to_train_on // (batch_size * num_evals)

    checkpoint_cb = callbacks.ModelCheckpoint(
        checkpoint_path, save_weights_only=True, verbose=1
    )
    tensorboard_cb = callbacks.TensorBoard(tensorboard_path, histogram_freq=1)

    history = model.fit(
        trainds,
        validation_data=evalds,
        epochs=num_evals,
        steps_per_epoch=max(1, steps_per_epoch),
        verbose=2,  # 0=silent, 1=progress bar, 2=one line per epoch
        callbacks=[checkpoint_cb, tensorboard_cb],
    )

    # Exporting the model with default serving function.
    model.save(model_export_path)
    return history


Overwriting ./models/trainers/mid/model.py


In [11]:
%%writefile ./models/trainers/mid/task.py
"""Argument definitions for model training code in `trainer.model`."""
# TODO: Add CSV_COLUMNS.

import argparse

from mid import model

if __name__ == "__main__":
    parser = argparse.ArgumentParser()
    parser.add_argument(
        "--batch_size",
        help="Batch size for training steps",
        type=int,
        default=32,
    )
    parser.add_argument(
        "--eval_data_path",
        help="GCS location pattern of eval files",
        required=True,
    )
    parser.add_argument(
        "--nnsize",
        help="Hidden layer sizes (provide space-separated sizes)",
        default="32 8",
    )
    parser.add_argument(
        "--lr", help="learning rate for optimizer", type=float, default=0.001
    )
    parser.add_argument(
        "--num_evals",
        help="Number of times to evaluate model on eval data training.",
        type=int,
        default=5,
    )
    parser.add_argument(
        "--num_examples_to_train_on",
        help="Number of examples to train on.",
        type=int,
        default=100,
    )
    parser.add_argument(
        "--output_dir",
        help="GCS location to write checkpoints and export models",
        required=True,
    )
    parser.add_argument(
        "--train_data_path",
        help="GCS location pattern of train files containing eval URLs",
        required=True,
    )
    args = parser.parse_args()
    hparams = args.__dict__
    model.train_and_evaluate(hparams)

Overwriting ./models/trainers/mid/task.py


Next we package up the code as a source distribution to be able to run it on Vertex AI.

In [18]:
%%writefile ./models/trainers/setup.py
"""Using `setuptools` to create a source distribution."""

from setuptools import find_packages, setup

setup(
    name="mid-trainer",
    version="0.1",
    packages=['mid'],
    include_package_data=True,
    description="Midfielder model training application.",
)

Overwriting ./models/trainers/setup.py


In [19]:
%%bash
cd models/trainers
python setup.py sdist --formats=gztar

running sdist
running egg_info
writing mid_trainer.egg-info/PKG-INFO
writing dependency_links to mid_trainer.egg-info/dependency_links.txt
writing top-level names to mid_trainer.egg-info/top_level.txt
reading manifest file 'mid_trainer.egg-info/SOURCES.txt'
writing manifest file 'mid_trainer.egg-info/SOURCES.txt'
running check
creating mid-trainer-0.1
creating mid-trainer-0.1/mid
creating mid-trainer-0.1/mid_trainer.egg-info
copying files to mid-trainer-0.1...
copying setup.py -> mid-trainer-0.1
copying mid/__init__.py -> mid-trainer-0.1/mid
copying mid/model.py -> mid-trainer-0.1/mid
copying mid/task.py -> mid-trainer-0.1/mid
copying mid_trainer.egg-info/PKG-INFO -> mid-trainer-0.1/mid_trainer.egg-info
copying mid_trainer.egg-info/SOURCES.txt -> mid-trainer-0.1/mid_trainer.egg-info
copying mid_trainer.egg-info/dependency_links.txt -> mid-trainer-0.1/mid_trainer.egg-info
copying mid_trainer.egg-info/top_level.txt -> mid-trainer-0.1/mid_trainer.egg-info
Writing mid-trainer-0.1/setup.cfg




Move files over to a GCS bucket.

In [20]:
!gsutil cp models/trainers/dist/mid-trainer-0.1.tar.gz gs://$BUCKET/fpl/trainers/

Copying file://models/trainers/dist/mid-trainer-0.1.tar.gz [Content-Type=application/x-tar]...
/ [1 files][  2.6 KiB/  2.6 KiB]                                                
Operation completed over 1 objects/2.6 KiB.                                      


In [21]:
%%bash

# Output directory and jobID
TIMESTAMP=$(date -u +%Y%m%d_%H%M%S)
OUTDIR=gs://${BUCKET}/fpl/trained-models/mid_model_$TIMESTAMP
JOB_NAME=mid_$TIMESTAMP
echo ${OUTDIR} ${REGION} ${JOB_NAME}

PYTHON_PACKAGE_URIS=gs://${BUCKET}/fpl/trainers/mid-trainer-0.1.tar.gz
MACHINE_TYPE=n1-standard-4
REPLICA_COUNT=1
PYTHON_PACKAGE_EXECUTOR_IMAGE_URI="us-docker.pkg.dev/vertex-ai/training/tf-cpu.2-8:latest"
PYTHON_MODULE=mid.task

# Model and training hyperparameters
BATCH_SIZE=50
NUM_EXAMPLES_TO_TRAIN_ON=5000
NUM_EVALS=100
LR=0.001
NNSIZE="32 8"


# GCS paths
GCS_PROJECT_PATH=gs://$BUCKET
DATA_PATH=$GCS_PROJECT_PATH/fpl/data
TRAIN_DATA_PATH=$DATA_PATH/mid-train*
EVAL_DATA_PATH=$DATA_PATH/mid-test*

WORKER_POOL_SPEC="machine-type=$MACHINE_TYPE,\
replica-count=$REPLICA_COUNT,\
executor-image-uri=$PYTHON_PACKAGE_EXECUTOR_IMAGE_URI,\
python-module=$PYTHON_MODULE"

ARGS="--eval_data_path=$EVAL_DATA_PATH,\
--output_dir=$OUTDIR,\
--train_data_path=$TRAIN_DATA_PATH,\
--batch_size=$BATCH_SIZE,\
--num_examples_to_train_on=$NUM_EXAMPLES_TO_TRAIN_ON,\
--num_evals=$NUM_EVALS,\
--lr=$LR,\
--nnsize=$NNSIZE"

# Create a custom job

gcloud ai custom-jobs create \
  --region=${REGION} \
  --display-name=$JOB_NAME \
  --python-package-uris=$PYTHON_PACKAGE_URIS \
  --worker-pool-spec=$WORKER_POOL_SPEC \
  --args="$ARGS"

gs://bf-fpl-pred-080723/fpl/trained-models/mid_model_20230805_164422 europe-west1 mid_20230805_164422


Using endpoint [https://europe-west1-aiplatform.googleapis.com/]
CustomJob [projects/343566520815/locations/europe-west1/customJobs/8750713153672708096] is submitted successfully.

Your job is still active. You may view the status of your job with the command

  $ gcloud ai custom-jobs describe projects/343566520815/locations/europe-west1/customJobs/8750713153672708096

or continue streaming the logs with the command

  $ gcloud ai custom-jobs stream-logs projects/343566520815/locations/europe-west1/customJobs/8750713153672708096
