In [None]:
'''

This notebook provides a quick example for how to train a TF model. 

For our framework, we needed to train an ML model in order to showcase 
various ways to deploy that model with GKE as well as on Vertex AI for
low-latency serving. 

'''

In [107]:
import numpy as np
import pandas as pd
import pathlib
import random

# Developed with TF 2.8.3
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import layers

In [108]:

GCS_DEST_PATH = 'gs://stealth-air-23412-bucket/game_telemetry/model'


In [109]:
# Generate Dataset used for Demo

simulations = 10000

data = []
for i in range(simulations):
    event_payload = {
        "playerid": f'player_{random.randint(100000,100000+player_count)}',
        "xcoord": random.random() * random.choice([-1,1]),
        "ycoord": random.random() * random.choice([-1,1]),
        "zcoord": random.random() * random.choice([-1,1]),
        "dow": random.randint(0,6),
        "hour": random.randint(0,23),
        "score": random.randint(1,100),
        "minutesPlayed": random.randint(0,60),
        "timeInStore": random.randint(0,30),
        "purchaseAmount": random.triangular(0,100,0) if random.random() >= 0.65 else 0,
    }
    data.append(event_payload)

dataset = pd.DataFrame.from_records(data)


In [110]:
dataset = dataset.dropna()
dataset.head()

Unnamed: 0,playerid,xcoord,ycoord,zcoord,score,bonus,offensePct,defensePct,minutesPlayed,timeInStore,purchaseAmount
0,player_100220,-0.769363,0.30795,-0.896356,92,8,0.427718,0.574016,10,28,2.239007
1,player_100439,-0.159957,-0.619474,0.672355,93,4,0.989199,0.856421,55,15,42.09267
2,player_100390,0.60961,-0.67259,-0.735435,31,8,0.79905,0.112738,1,11,55.655362
3,player_100135,-0.06492,0.551692,0.171363,85,7,0.856214,0.762807,16,26,77.661039
4,player_100328,-0.431839,0.320315,-0.949538,83,9,0.674749,0.453796,9,5,47.119691


In [111]:
dataset.isna().sum()

playerid          0
xcoord            0
ycoord            0
zcoord            0
score             0
bonus             0
offensePct        0
defensePct        0
minutesPlayed     0
timeInStore       0
purchaseAmount    0
dtype: int64

In [112]:
# Remove playerid from training data
dataset.pop("playerid")

# Test and Train Dataset Split
train_dataset = dataset.sample(frac=0.8,random_state=0)
test_dataset  = dataset.drop(train_dataset.index)

In [113]:
# Set target as the "purchaseAmount"

train_target = train_dataset.pop('purchaseAmount')
test_target  = test_dataset.pop('purchaseAmount')


In [114]:
train_stats = train_dataset.describe()
train_stats = train_stats.transpose()
train_stats

Unnamed: 0,count,mean,std,min,25%,50%,75%,max
xcoord,8000.0,0.001518,0.577702,-0.999903,-0.502598,0.00989,0.507541,0.99961
ycoord,8000.0,0.006933,0.576455,-0.999444,-0.48859,0.000206,0.512011,0.999739
zcoord,8000.0,0.004736,0.581705,-0.999793,-0.502685,0.003495,0.510919,0.999934
score,8000.0,50.68075,28.940936,1.0,26.0,51.0,76.0,100.0
bonus,8000.0,5.50825,2.872667,1.0,3.0,5.0,8.0,10.0
offensePct,8000.0,0.501828,0.288245,0.000172,0.252939,0.506451,0.748673,0.999815
defensePct,8000.0,0.496789,0.287349,3.9e-05,0.248307,0.488592,0.748061,0.999952
minutesPlayed,8000.0,29.798875,17.657595,0.0,15.0,30.0,45.0,60.0
timeInStore,8000.0,14.9215,8.972633,0.0,7.0,15.0,23.0,30.0


In [115]:
# Normalize Columns
def norm(x):
  return (x - train_stats['mean']) / train_stats['std']

normed_train_data = norm(train_dataset)
normed_test_data  = norm(test_dataset)

In [116]:
normed_train_data.head()

Unnamed: 0,xcoord,ycoord,zcoord,score,bonus,offensePct,defensePct,minutesPlayed,timeInStore
9394,-1.64971,0.592593,1.299057,1.013072,-0.176926,0.692416,1.141016,-0.271774,0.008749
898,1.083135,-0.1899,0.739297,0.460222,1.215508,0.211144,1.001982,-0.668204,-1.105751
2398,-1.603806,-1.567376,-1.651491,-0.88735,-1.569361,1.513848,-1.712861,0.690985,0.788899
5906,-1.352705,0.621741,1.343909,-1.232882,0.519291,1.255782,-0.038352,0.974149,1.346149
2343,-0.718523,0.449775,-0.322439,-0.576372,-1.569361,-1.183543,1.54946,-1.404431,-0.437051


In [117]:
# Build our demo Model

def build_model():
  model = keras.Sequential([
    layers.Dense(64, activation='relu', input_shape=[len(train_dataset.keys())]),
    layers.Dense(64, activation='relu'),
    layers.Dense(1)
  ])

  optimizer = tf.keras.optimizers.RMSprop(0.001)

  model.compile(loss='mse',
                optimizer=optimizer,
                metrics=['mae', 'mse'])
  return model

model = build_model()

model.summary()

# The patience parameter is the amount of epochs to check for improvement
early_stop = keras.callbacks.EarlyStopping(monitor='val_loss', patience=15)

early_history = model.fit(normed_train_data, train_target, 
                    epochs=1000, validation_split=0.2, 
                    callbacks=[early_stop])


Model: "sequential_5"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 dense_15 (Dense)            (None, 64)                640       
                                                                 
 dense_16 (Dense)            (None, 64)                4160      
                                                                 
 dense_17 (Dense)            (None, 1)                 65        
                                                                 
Total params: 4,865
Trainable params: 4,865
Non-trainable params: 0
_________________________________________________________________
Epoch 1/1000
Epoch 2/1000
Epoch 3/1000
Epoch 4/1000
Epoch 5/1000
Epoch 6/1000
Epoch 7/1000
Epoch 8/1000
Epoch 9/1000
Epoch 10/1000
Epoch 11/1000
Epoch 12/1000
Epoch 13/1000
Epoch 14/1000
Epoch 15/1000
Epoch 16/1000
Epoch 17/1000
Epoch 18/1000
Epoch 19/1000
Epoch 20/1000
Epoch 21/1000
Epoch 22/1000
Epoch 23/1000
Epo

In [118]:
# Save Training Model to GCS

model.save(GCS_DEST_PATH)

2023-02-27 16:29:03.175718: W tensorflow/python/util/util.cc:368] Sets are not currently considered sequences, but this may change in the future, so consider avoiding using them.


INFO:tensorflow:Assets written to: gs://stealth-air-23412-bucket/game_telemetry/model/assets
