In [7]:
import tensorflow.compat.v1 as tf
import tensorflow.lite as tflite
import pandas as pd
import matplotlib.pyplot as plt
from tensorflow import keras
from tensorflow.keras.utils import to_categorical
from tensorflow.keras.models import Sequential, Model
from tensorflow.keras.layers import Dense, Activation, concatenate, Input, Embedding
from tensorflow.keras.layers import Reshape, Concatenate, BatchNormalization, Dropout
from tensorflow.keras.utils import plot_model
from tensorflow.keras.optimizers import Adam
import tensorflow.keras.backend as K
import numpy as np
from sklearn.preprocessing import StandardScaler, RobustScaler
import glob
import wandb
from wandb.keras import WandbCallback
import plotly
from google.cloud import storage
from datetime import datetime

%matplotlib inline

client=storage.Client()

pd.set_option('display.max_columns', None)

In [66]:
# TODO: REPLACE WITH YOUR OWN FILE. 
# This example demonstrates how to use files from GCS. You can use local files as well.

# Get file lists from GCS
training_files = list(filter(lambda s: '.csv' in s, [blob.name for blob in client.list_blobs('iap-optimization-codelab', prefix='training-data', )]))
validation_files = list(filter(lambda s: '.csv' in s, [blob.name for blob in client.list_blobs('iap-optimization-codelab', prefix='validation-data', )]))
test_files = list(filter(lambda s: '.csv' in s, [blob.name for blob in client.list_blobs('iap-optimization-codelab', prefix='test-data', )]))


# Add full path to it
training_files = ['gs://iap-optimization-codelab/' + f for f in training_files]
validation_files = ['gs://iap-optimization-codelab/' + f for f in validation_files]
test_files = ['gs://iap-optimization-codelab/' + f for f in test_files]

print("Training files: {}, validation files: {}, test files: {}".format(len(training_files), len(validation_files), len(test_files)))



Training files: 1, validation files: 1, test files: 1


## Data preprocessing

In [67]:
# List of categorical columns.
CAT_COLUMNS = [
 'geo_country', 
 'device_os',
 'last_run_end_reason',
]

## Load sample data to initialize scaler and actions mapping

We need to understand the data so we can prepare it for preprocessing.

In [68]:
def read_files_into_df(file_list):
    li = []
    for filename in file_list:
        print("reading {}".format(filename))
        df = pd.read_csv(filename, index_col=None, header=0)
        df.sample(frac=1) # Shuffle
        li.append(df)
    return pd.concat(li, axis=0, ignore_index=True)

sample_data = read_files_into_df(training_files)
sample_data

reading gs://iap-optimization-codelab/training-data/training.csv


Unnamed: 0,distance_avg,coins_spent,game_day,geo_country,device_os,last_run_end_reason,presented_powerup,is_powerup_clicked
0,194,670,29,China,Android,wall,parachute,False
1,135,526,127,UK,iOS,wall,extra_life,True
2,85,1515,0,UK,iOS,laser,time_machine,False
3,60,470,102,Russia,iOS,laser,head_start,False
4,174,1010,49,China,Android,laser,sparky_armor,True
...,...,...,...,...,...,...,...,...
699996,103,1375,268,US,Android,wall,extra_life,False
699997,81,1401,53,South Korea,Android,laser,nuclear_missle,False
699998,24,894,158,Russia,Android,wall,nuclear_missle,False
699999,102,3043,162,Italy,iOS,wall,head_start,False


In [69]:
# Go through all data to determine the category space for each column.
category_space_mapping = {}
for c in CAT_COLUMNS:
    category_space_mapping[c] = list(map(lambda v: c+'_'+str(v), list(sample_data[c].astype('category').cat.categories)))

category_space_mapping

{'geo_country': ['geo_country_Canada',
  'geo_country_China',
  'geo_country_France',
  'geo_country_Germany',
  'geo_country_India',
  'geo_country_Italy',
  'geo_country_Japan',
  'geo_country_Russia',
  'geo_country_South Korea',
  'geo_country_UK',
  'geo_country_US'],
 'device_os': ['device_os_Android', 'device_os_iOS'],
 'last_run_end_reason': ['last_run_end_reason_laser',
  'last_run_end_reason_wall']}

In [70]:
def one_hot(df, cols):
  """Returns one-hot encoding of DataFrame df including columns in cols."""
  for col in cols:
    dummies = pd.get_dummies(df[col], prefix=col, drop_first=False)
    dummies = dummies.T.reindex(category_space_mapping[col]).T.fillna(0)
    df = pd.concat([df, dummies], axis=1)
    df = df.drop(col, axis=1)
  return df

In [71]:
# Preprocess a dataframe read from CSV into state action reward triples that can be used in model training.
def pre_process(df):
    # List of all numerical columns
    numerical_columns = list(filter(lambda x: x not in (CAT_COLUMNS + ['is_powerup_clicked', 'presented_powerup']), df.columns.values.tolist()))

    # Drop row if action or reward is na
    df = df[df['presented_powerup'].notna()]
    
    # Find all NAs 
    nans = df.isna().sum()
    nans= nans[nans!=0]
    columns_needs_filling = nans.index.tolist()

    # Determine which columns to fill
    numerical_columns_to_fill = list(set(columns_needs_filling) & set(numerical_columns))
    cat_columns_to_fill = list(set(columns_needs_filling) & set(CAT_COLUMNS))

    # Fill NAs
    df[cat_columns_to_fill] = df[cat_columns_to_fill].fillna(method='bfill').fillna(method='ffill')
    df[numerical_columns_to_fill] = df[numerical_columns_to_fill].fillna(value=0)

    # Verify all NA is filled
    assert df.isna().sum().sum() == 0
    
    # actions for on all data
    actions = df['presented_powerup'].astype('category').cat.codes
    # action space - list of all actions to choose from
    action_space = df['presented_powerup'].astype('category').cat.categories
    # mapping from category code back to string
    actions_mapping = dict(enumerate(df['presented_powerup'].astype('category').cat.categories))
    
    rewards = df['is_powerup_clicked']
    states = df.drop(['presented_powerup', 'is_powerup_clicked'], axis=1)
    
    states = one_hot(states, CAT_COLUMNS)
    
    # Assert all categorical state has been found from previously established mapping
    assert states.isna().sum().sum() == 0
    
    return (states, actions, rewards)

In [72]:
sample_states, sample_actions, sample_rewards = pre_process(sample_data)

# List of all numerical columns
NUM_COLUMNS = list(filter(lambda x: x not in (CAT_COLUMNS + ['is_powerup_clicked', 'presented_powerup']), sample_data.columns.values.tolist()))

In [73]:
sample_states

Unnamed: 0,distance_avg,coins_spent,game_day,geo_country_Canada,geo_country_China,geo_country_France,geo_country_Germany,geo_country_India,geo_country_Italy,geo_country_Japan,geo_country_Russia,geo_country_South Korea,geo_country_UK,geo_country_US,device_os_Android,device_os_iOS,last_run_end_reason_laser,last_run_end_reason_wall
0,194,670,29,0,1,0,0,0,0,0,0,0,0,0,1,0,0,1
1,135,526,127,0,0,0,0,0,0,0,0,0,1,0,0,1,0,1
2,85,1515,0,0,0,0,0,0,0,0,0,0,1,0,0,1,1,0
3,60,470,102,0,0,0,0,0,0,0,1,0,0,0,0,1,1,0
4,174,1010,49,0,1,0,0,0,0,0,0,0,0,0,1,0,1,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
699996,103,1375,268,0,0,0,0,0,0,0,0,0,0,1,1,0,0,1
699997,81,1401,53,0,0,0,0,0,0,0,0,1,0,0,1,0,1,0
699998,24,894,158,0,0,0,0,0,0,0,1,0,0,0,1,0,0,1
699999,102,3043,162,0,0,0,0,0,1,0,0,0,0,0,0,1,0,1


In [74]:
STATE_SIZE = sample_states.shape[1]
ACTION_SPACE = sample_data['presented_powerup'].astype('category').cat.categories
ACTIONS_MAPPING = dict(enumerate(sample_data['presented_powerup'].astype('category').cat.categories))
ACTION_SPACE_SIZE = len(ACTION_SPACE)

In [75]:
## scikit-learn scaler object to scale the states
scaler = StandardScaler()
scaler.fit(sample_states[NUM_COLUMNS])

def scale_transform(df):
    df[NUM_COLUMNS] = scaler.transform(df[NUM_COLUMNS])

## Computing the sample weights

In [76]:
# Calculate the sample weight based on frequency of reward showing up.
actions_distribution = sample_data['presented_powerup'].value_counts(normalize=True)

# Sample with reward of 1 is 2x the weight of those with reward of 0.
# This is decided based on the distribution of rewards.
# Then the samples with reward of 1 is scaled based on their distribution.
actions_weight = dict((2/actions_distribution) ** 0.5)

# Sample weight is 1 if reward is 0. Otherwise it's weighted by action's distribution frequency
def get_sample_weight(rewards, actions):
    weight_from_action = actions.map(lambda a: actions_weight[ACTIONS_MAPPING[a]])
    weight = weight_from_action.combine(rewards, (lambda w_a, r : 1 if r == 0 else w_a))
    return weight

## Custom callback to evaluate model

After each epoch, we will test our model against validation data to see how it's performing relative to the benchmark. We will use random selection of actions as the benchmark.

To test our data, we will first **filter for the samples that yield a positive reward**, and **see if our model can predict the action that generated that positive reward**.

Because our dataset might be biased (one action appearing more frequently than others), we need to **downsample all actions so they are evenly distributed**, otherwise the test result will be biased. For example, if `action_1` is appears twice as frequently as the other actions, a model that only predicts `action_1` will be "twice as good as the benchmark", where in reality this will not be true.

![downsample_diagram](./graphics/downsample_test_data.png)

In [77]:
# Softmax to convert model prediction to action index.
def get_action(model_prediction):
    return np.argmax(model_prediction)

class ValidationCallback(tf.keras.callbacks.Callback):
    def on_epoch_end(self, epoch, logs=None):
        test_data = read_files_into_df(validation_files)
        test_states, test_actions, test_rewards = pre_process(test_data)
        scale_transform(test_states)
        
        test_actions.rename('action', inplace=True)
        test_data = pd.concat([test_states, test_actions, test_rewards], axis=1)

        # Filter only the ones clicked on
        positive_test_data = test_data[test_data['is_powerup_clicked']==1]
        # Find the count of the action with the smallest sample size
        down_sample_size = positive_test_data.groupby('action').count()['is_powerup_clicked'].min()
        # Downsample
        down_sampled_positive_test_data = positive_test_data.groupby('action').apply(lambda x: x.sample(down_sample_size))

        # Now we have the test dataset ready, run it through the model
        prediction_result = self.model.predict({
            'states': tf.convert_to_tensor(down_sampled_positive_test_data.iloc[:, :-2].values)
        })
        prediction_result = np.array(list(map(get_action, prediction_result)))

        # Construct all result into dataframe
        test_df = pd.DataFrame({'pred':prediction_result, 'real': down_sampled_positive_test_data['action']})
        test_df['is_match'] = test_df['pred'] == test_df['real']

        # Count number of matches
        test_counts = test_df['is_match'].value_counts()

        # Compute benchmark by randomly selecting across 8 actions
        benchmarking_result = test_counts.append(pd.Series([test_df.shape[0] / ACTION_SPACE_SIZE], index=['benchmark']))
        print("Model performance comparing to random: {}".format(benchmarking_result[1]/benchmarking_result[2]))

        # Log the distribution of all actions
        test_action_count = test_df['pred'].value_counts()
        
        log_for_current_epoch = {}
        
        for i in range(ACTION_SPACE_SIZE):
            log_for_current_epoch = {
                **log_for_current_epoch,
                ('action {}'.format(i)): test_action_count.get(i, 0),
            }
        optimizer = self.model.optimizer
        
        # To calculate statistical significance of the improvements,
        # try using the Binomial distribution calculator:
        # https://www.socscistatistics.com/tests/binomial/default2.aspx
        wandb.log({
            **log_for_current_epoch,
            'False prediction': benchmarking_result[0],
            'Correct prediction': benchmarking_result[1],
            'Benchmark': benchmarking_result[2],
                  })
        

## Defining the data generator

In [78]:
TRAIN_BATCH_SIZE = 2048

def train_generator():
    batch_size = TRAIN_BATCH_SIZE
    for filename in training_files * 100:
        print("reading {}".format(filename))
        df = pd.read_csv(filename, index_col=None, header=0)
        df.sample(frac=1) # Shuffle
        states, actions, rewards = pre_process(df)
        scale_transform(states)
        i = 0
        while i * batch_size < states.shape[0]:
            s = states[i * batch_size : (i + 1) * batch_size]
            a = actions[i * batch_size : (i + 1) * batch_size]
            r = rewards[i * batch_size : (i + 1) * batch_size]
            yield ({ 'states': tf.convert_to_tensor(s.values), 'actions': tf.convert_to_tensor(a.values)},
                   tf.convert_to_tensor(r.values),
                   tf.convert_to_tensor(get_sample_weight(r, a))
                  )
            i += 1


## Model Definition

### Loss computation
The loss computation for our model is slightly different from how it's computed in a classification model.

Say performing action `a3` given state `s`, gives the reward `r` of `1.0`. This can only tells us `a3` is a good option, but it doesn't tell us anything about the other actions. Therefore, we can only learn about action `a3`.

Putting this in the code means **only the output node for `a3` will have a non-zero loss**.

![loss computation](./graphics/loss_compuation_diagram.png)

To achieve this with keras, we subclassed `keras.Model` and wrote a customized `train_step` to use as a wrapper of our `keras.Sequential` model, for further details about this, you can read this tutorial [here](https://keras.io/guides/customizing_what_happens_in_fit/).

In [79]:
class NeuralBanditModel(keras.Model):
    def __init__(self, input_dim, output_dim):
        super(NeuralBanditModel, self).__init__()
        self.nn = tf.keras.Sequential(
            [
                Input(shape=(input_dim,)),
                Dense(256, activation='relu'),
                Dense(512, activation='relu'),
                Dense(512, activation='relu'),
                Dense(256, activation='relu'),
                Dropout(0.2),
                Dense(128, activation='relu'),
                Dense(64, activation='relu'),
                Dropout(0.2),
                Dense(32, activation='relu'),
                Dense(output_dim, activation='relu'),
            ],
            name="neural_greedy",
        )
    
    def call(self, inputs):
        return self.nn(inputs['states'])
    
    # Override train_step to allow custom training logic
    def train_step(self, data):
        # Unpack the data. Its structure depends on your model and
        # on what you pass to `fit()`.
        x, y, sample_weight = data
        
        states = x['states']
        actions = x['actions']
        
        with tf.GradientTape() as tape:
            y_pred = self(x, training=True)  # Forward pass

            # Start target from prediction result
            target = y_pred.numpy()
            # Find the ones that are affected by the actions and update them accordingly
            target[np.arange(states.shape[0]), actions] = y
            # Convert back to tensor
            target_tensor = tf.convert_to_tensor(target)
            
            # Compute the loss value
            # (the loss function is configured in `compile()`)
            loss = self.compiled_loss(target_tensor, y_pred, regularization_losses=self.losses, sample_weight=sample_weight,)

        # Compute gradients
        gradients = tape.gradient(loss, self.nn.trainable_weights)
        # Update weights
        self.optimizer.apply_gradients(zip(gradients, self.nn.trainable_weights))

        # Update metrics
        self.compiled_metrics.update_state(target_tensor, y_pred)

        # Return a dict mapping metric names to current value
        return {m.name: m.result() for m in self.metrics}

    def test_step(self, data):
        # Unpack the data
        x, y = data
        
        states = x['states']
        actions = x['actions']
        
        y_pred = self(x, training=True)  # Forward pass

        # Start target from prediction result
        target = y_pred.numpy()
        # Find the ones that are affected by the actions and update them accordingly
        target[np.arange(states.shape[0]), actions] = y
        # Convert back to tensor
        target_tensor = tf.convert_to_tensor(target)
        
        self.compiled_loss(target_tensor, y_pred, regularization_losses=self.losses)
        # Update the metrics.
        self.compiled_metrics.update_state(target_tensor, y_pred)
        
        # Return a dict mapping metric names to current value.
        # Note that it will include the loss (tracked in self.metrics).
        return {m.name: m.result() for m in self.metrics}


In [81]:
# TODO: (Optional) Replace with your own project ID on https://wandb.ai
# Alternatively you can remove wandb logging.
wandb.init(project="iap-optimization-codelab")

EPOCHS = 10
STEPS_PER_EPOCH = 256

initial_learning_rate = 0.0002
lr_schedule = tf.keras.optimizers.schedules.ExponentialDecay(
    initial_learning_rate,
    decay_steps=10000,
    decay_rate=0.96,
    staircase=True)

model = NeuralBanditModel(STATE_SIZE, ACTION_SPACE.shape[0])
model.compile(optimizer=Adam(learning_rate = lr_schedule), 
              loss="mse", 
              run_eagerly=True)

wandb.config.update({"n_train": EPOCHS * STEPS_PER_EPOCH * TRAIN_BATCH_SIZE, 
                     "batch_size": TRAIN_BATCH_SIZE,
                     "epochs": EPOCHS,
                     "activation": 'relu',
                     'learning_rate': tf.keras.optimizers.schedules.serialize(lr_schedule),
                     'architecture': list(map(lambda l : l.get_output_at(0).get_shape().as_list(), model.nn.layers))
                    })

print("Training starts")
history = model.fit(x=train_generator(),
                    verbose=1, 
                    epochs=EPOCHS, 
                    steps_per_epoch=STEPS_PER_EPOCH,
                    shuffle=True, 
                    callbacks=[
                        WandbCallback(),
                        ValidationCallback(),
                    ]
                   )

ERROR! Session/line number was not unique in database. History logging moved to new session 145


[34m[1mwandb[0m: Wandb version 0.10.26 is available!  To upgrade, please run:
[34m[1mwandb[0m:  $ pip install wandb --upgrade


Training starts
reading gs://iap-optimization-codelab/training-data/training.csv


To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass dtype='float64' to the layer constructor. If you are the author of this layer, you can disable autocasting by passing autocast=False to the base Layer constructor.





To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass dtype='float64' to the layer constructor. If you are the author of this layer, you can disable autocasting by passing autocast=False to the base Layer constructor.



Epoch 1/10
Model performance comparing to random: 1.2419174102510895
Epoch 2/10
Model performance comparing to random: 1.2719651379954346
Epoch 3/10
Model performance comparing to random: 1.2664038182195476
Epoch 4/10
Model performance comparing to random: 1.2797675866362317
Epoch 5/10
Model performance comparing to random: 1.2809296534550736
Epoch 6/10
Model performance comparing to random: 1.2656152728781904
Epoch 7/10
Model performance comparing to random: 1.3057480805146295
Epoch 8/10
Model performance comparing to random: 1.2481427682091721
Epoch 9/10
Model performance comparing to random: 1.266694334924258
Epoch 10/10
Model performance comparing to random: 1.2871965137995436


## Model testing

Use data in the test set to test the model performance

In [82]:
def test_model():
    test_data = read_files_into_df(test_files)
    test_states, test_actions, test_rewards = pre_process(test_data)
    scale_transform(test_states)

    test_actions.rename('action', inplace=True)
    test_data = pd.concat([test_states, test_actions, test_rewards], axis=1)

    # Filter only the ones clicked on
    positive_test_data = test_data[test_data['is_powerup_clicked']==1]
    # Find the count of the action with the smallest sample size
    down_sample_size = positive_test_data.groupby('action').count()['is_powerup_clicked'].min()
    # Downsample
    down_sampled_positive_test_data = positive_test_data.groupby('action').apply(lambda x: x.sample(down_sample_size))

    # Now we have the test dataset ready, run it through the model
    prediction_result = model.predict({
        'states': tf.convert_to_tensor(down_sampled_positive_test_data.iloc[:, :-2].values)
    })
    prediction_result = np.array(list(map(get_action, prediction_result)))

    # Construct all result into dataframe
    test_df = pd.DataFrame({'pred':prediction_result, 'real': down_sampled_positive_test_data['action']})
    test_df['is_match'] = test_df['pred'] == test_df['real']

    # Count number of matches
    test_counts = test_df['is_match'].value_counts()

    # Compute benchmark by randomly selecting across 13 actions
    benchmarking_result = test_counts.append(pd.Series([test_df.shape[0] / ACTION_SPACE_SIZE], index=['benchmark']))
    print("Model performance comparing to random: {}".format(benchmarking_result[1]/benchmarking_result[2]))

    # Log the distribution of all actions
    test_action_count = test_df['pred'].value_counts()

    log_for_current_epoch = {}

    for i in range(ACTION_SPACE_SIZE):
        log_for_current_epoch = {
            **log_for_current_epoch,
            ('Test - action {}'.format(i)): test_action_count.get(i, 0),
        }

    wandb.log({
        **log_for_current_epoch,
        'Test - False prediction': benchmarking_result[0],
        'Test - Correct prediction': benchmarking_result[1],
        'Test - Benchmark': benchmarking_result[2],
    })

test_model()

reading gs://iap-optimization-codelab/test-data/test.csv
Model performance comparing to random: 1.2815172297011261


[34m[1mwandb[0m: Wandb version 0.10.26 is available!  To upgrade, please run:
[34m[1mwandb[0m:  $ pip install wandb --upgrade


## Save model weights

In [83]:
model.save('./model_checkpoint ' + str(datetime.now()))

INFO:tensorflow:Assets written to: ./model_checkpoint 2021-04-14 18:27:49.853364/assets


INFO:tensorflow:Assets written to: ./model_checkpoint 2021-04-14 18:27:49.853364/assets


## Convert model to TFLite

In [84]:
converter = tflite.TFLiteConverter.from_keras_model(model.nn)
tflite_model = converter.convert()

with tf.io.gfile.GFile('iap-optimizer.tflite', 'wb') as f:
  f.write(tflite_model)

INFO:tensorflow:Assets written to: /tmp/tmpxmak97qa/assets


INFO:tensorflow:Assets written to: /tmp/tmpxmak97qa/assets


### Collecting model metadata

https://www.tensorflow.org/lite/convert/metadata

### Serializing numerical columns

In [85]:
import json

data = {}

for i, col in enumerate(NUM_COLUMNS):
    data[col] = {}
    data[col]['type'] = 'numerical'
    data[col]['mean'] = scaler.mean_[i]
    data[col]['std'] = scaler.scale_[i]
    

### Serializing categorical columns

In [86]:
for col in CAT_COLUMNS:
    dummies = pd.get_dummies(sample_data[col], drop_first=False)    
    data[col] = {}
    data[col]['type'] = 'categorical'
    data[col]['all_values'] = list(dummies.columns)

data['output_mapping'] = list(ACTIONS_MAPPING.values())

with open('preprocess.json', 'w') as f:
    json.dump(data, f)

print(json.dumps(data, indent=4, sort_keys=True))

{
    "coins_spent": {
        "mean": 1012.2316639547657,
        "std": 576.6266132193391,
        "type": "numerical"
    },
    "device_os": {
        "all_values": [
            "Android",
            "iOS"
        ],
        "type": "categorical"
    },
    "distance_avg": {
        "mean": 99.49789786014591,
        "std": 39.65095273506135,
        "type": "numerical"
    },
    "game_day": {
        "mean": 108.33904808707416,
        "std": 86.79340575071703,
        "type": "numerical"
    },
    "geo_country": {
        "all_values": [
            "Canada",
            "China",
            "France",
            "Germany",
            "India",
            "Italy",
            "Japan",
            "Russia",
            "South Korea",
            "UK",
            "US"
        ],
        "type": "categorical"
    },
    "last_run_end_reason": {
        "all_values": [
            "laser",
            "wall"
        ],
        "type": "categorical"
    },
    "output_mapping": 

### Writing metadata to TFLite

In [87]:
from tflite_support import flatbuffers
from tflite_support import metadata as _metadata
from tflite_support import metadata_schema_py_generated as _metadata_fb

# Creates model info.
model_meta = _metadata_fb.ModelMetadataT()
model_meta.name = "IAP optimizer"
model_meta.description = ("Determines the expected reward for each action given the state of a user")
model_meta.version = "v1"

b = flatbuffers.Builder(0)
b.Finish(
    model_meta.Pack(b),
    _metadata.MetadataPopulator.METADATA_FILE_IDENTIFIER)
metadata_buf = b.Output()

populator = _metadata.MetadataPopulator.with_model_file('./iap-optimizer.tflite')
populator.load_associated_files(["./preprocess.json"])
populator.populate()

  "tflite model is still allowed.".format(f))


### Run inference on model

Run inference on 10 rows to see what results we get.

We will run inference again on the TF-lite model on client side for comparison.

In [88]:
raw_test_data = read_files_into_df(test_files[:1])
test_data = raw_test_data.sample(frac=0.05) 
test_states, test_actions, test_rewards = pre_process(test_data)
scale_transform(test_states)

# Same input to the model to a csv for valiation
test_states[:10].to_csv('model_input_samples.csv')
test_actions.rename('action', inplace=True)
test_data = pd.concat([test_states, test_actions, test_rewards], axis=1)
# Now we have the test dataset ready, run it through the model
prediction_result = model.predict({
    'states': tf.convert_to_tensor(test_data
                                .iloc[:, :-2].values)
})

reading gs://iap-optimization-codelab/test-data/test.csv


In [89]:
# Build raw data and output into one CSV for validation
s = raw_test_data[:10]
v = pd.DataFrame(prediction_result[:10], columns=list(ACTIONS_MAPPING.values()))
r = df_concat = pd.concat([s, v], axis=1)
r.to_csv('./integration_validation_samples.csv')