# SETUP the Colab Env.

In [1]:
# Mount Google Drive
from google.colab import drive # import drive from google colab

ROOT = "/content/drive"     # default location for the drive
print(ROOT)                 # print content of ROOT (Optional)

drive.mount(ROOT)           # we mount the google drive at /content/drive

/content/drive
Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [2]:
# Clone github repository setup
# import join used to join ROOT path and MY_GOOGLE_DRIVE_PATH
from os.path import join  

# path to your project on Google Drive
MY_GOOGLE_DRIVE_PATH = 'MyDrive/Colab Notebooks/'  
# Replace with your github repository
GIT_REPOSITORY = "promo-recommendation" 
PROJECT_PATH = join(ROOT, MY_GOOGLE_DRIVE_PATH)

# It's good to print out the value if you are not sure 
print("PROJECT_PATH: ", PROJECT_PATH)   

# In case we haven't created the folder already; we will create a folder in the project path 
# !mkdir "{PROJECT_PATH}"    

GIT_PATH = "https://" + "@github.com/" + "fellowship/" + GIT_REPOSITORY + ".git"
print("GIT_PATH: ", GIT_PATH)

PROJECT_PATH:  /content/drive/MyDrive/Colab Notebooks/
GIT_PATH:  https://@github.com/fellowship/promo-recommendation.git


In [3]:
%cd "{PROJECT_PATH}{"promo-recommendation/"}"

/content/drive/MyDrive/Colab Notebooks/promo-recommendation


# Neural Network Training

In [None]:
!python ./00.generate_data.py

In [4]:
import gc
import importlib
import os
from routine.utilities import generate_CSV, df_to_dataloader, generate_feature_columns
from routine.data_generation import generate_data
from routine.models import build_wide_model, build_deep_model, build_wide_and_deep_model, \
    build_bayesian_model, evaluate_bandit
from os.path import exists
from pprint import pprint
import tensorflow as tf
import sys
import numpy as np
import pandas as pd

In [5]:
data_regenerate = False
if data_regenerate:
    obs_df, user_df, camp_df = generate_data(
        num_users=1000,
        num_campaigns=100,
        samples_per_campaign=10000,
        num_cohort=10,
        cohort_variances=np.linspace(0.05, 0.6, 10),
        fh_cohort=True,
        response_sig_a=10,
        even_cohort=True,
        cross_response=False,
        magnify_hf=1
    )
else:
    obs_df = pd.read_csv('observation_even.csv')


INPUT_DATA_PATH = './deep_and_wide/NN_Inputs/input_data'
if not os.path.isdir(INPUT_DATA_PATH):
    os.makedirs(INPUT_DATA_PATH)

In [6]:
# Creating the training, validation, and testing data for the model
train_path = INPUT_DATA_PATH + "/train.csv"
val_path = INPUT_DATA_PATH + "/val.csv"
test_path = INPUT_DATA_PATH + "/test.csv"
re_create = False
if re_create:
    generate_CSV(obs_df,
                 train_path,
                 val_path,
                 test_path,
                 verbose=True)

In [7]:
obs_df.head()

Unnamed: 0.1,Unnamed: 0,user_id,camp_id,cohort,user_f0,user_f1,user_fh,camp_f0,camp_f1,camp_fh,response
0,0,917,30,9,-0.252,-0.257,0.339,0.753,-0.555,-0.953,0
1,1,129,59,1,0.088,-0.093,-0.237,-0.484,-0.785,-0.749,1
2,2,60,84,0,-0.179,-0.339,-0.267,0.126,0.44,-0.698,0
3,3,494,12,4,-0.207,0.21,-0.402,-0.755,0.506,-0.926,1
4,4,152,97,1,0.079,-0.129,-0.14,0.999,0.713,-0.221,1


In [8]:
# Preparing dataset for evaluation
batch_size = 500
n_epochs = 300
feature_columns = ["user_id", "camp_id", "cohort",
                   "user_f0", "user_f1", "user_fh",
                   "camp_f0", "camp_f1", "camp_fh"]

target_column = "response"

train_dl = df_to_dataloader(train_path,
                            feature_columns,
                            target_column,
                            batch_size=batch_size)
val_dl = df_to_dataloader(val_path,
                          feature_columns,
                          target_column,
                          batch_size=batch_size)
test_dl = df_to_dataloader(test_path,
                           feature_columns,
                           target_column,
                           shuffle=False,
                           batch_size=batch_size)

print("[INFO] Train dataloader:")
pprint(train_dl)
print("[INFO] Val dataloader:")
pprint(val_dl)
print("[INFO] Test dataloader:")
pprint(test_dl)


[INFO] Train dataloader:
<BatchDataset element_spec=({'user_id': TensorSpec(shape=(None,), dtype=tf.int64, name=None), 'camp_id': TensorSpec(shape=(None,), dtype=tf.int64, name=None), 'cohort': TensorSpec(shape=(None,), dtype=tf.int64, name=None), 'user_f0': TensorSpec(shape=(None,), dtype=tf.float64, name=None), 'user_f1': TensorSpec(shape=(None,), dtype=tf.float64, name=None), 'user_fh': TensorSpec(shape=(None,), dtype=tf.float64, name=None), 'camp_f0': TensorSpec(shape=(None,), dtype=tf.float64, name=None), 'camp_f1': TensorSpec(shape=(None,), dtype=tf.float64, name=None), 'camp_fh': TensorSpec(shape=(None,), dtype=tf.float64, name=None)}, TensorSpec(shape=(None, 10), dtype=tf.float32, name=None))>
[INFO] Val dataloader:
<BatchDataset element_spec=({'user_id': TensorSpec(shape=(None,), dtype=tf.int64, name=None), 'camp_id': TensorSpec(shape=(None,), dtype=tf.int64, name=None), 'cohort': TensorSpec(shape=(None,), dtype=tf.int64, name=None), 'user_f0': TensorSpec(shape=(None,), dtype=

In [9]:
# Creating TF feature columns
feature_column_dict, feature_column_input_dict = generate_feature_columns()
# defining the input to be fed into each model
inputs = {**feature_column_input_dict["numeric"], **feature_column_input_dict["embedding"]}


In [10]:
print(feature_column_dict)
print(feature_column_input_dict)
print(inputs)

{'numeric': [NumericColumn(key='user_f0', shape=(1,), default_value=None, dtype=tf.float32, normalizer_fn=None), NumericColumn(key='user_f1', shape=(1,), default_value=None, dtype=tf.float32, normalizer_fn=None), NumericColumn(key='camp_f0', shape=(1,), default_value=None, dtype=tf.float32, normalizer_fn=None), NumericColumn(key='camp_f1', shape=(1,), default_value=None, dtype=tf.float32, normalizer_fn=None)], 'embedding': [EmbeddingColumn(categorical_column=HashedCategoricalColumn(key='user_id', hash_bucket_size=1000, dtype=tf.int64), dimension=16, combiner='mean', initializer=<tensorflow.python.ops.init_ops.TruncatedNormal object at 0x7fe5000bb950>, ckpt_to_load_from=None, tensor_name_in_ckpt=None, max_norm=None, trainable=True, use_safe_embedding_lookup=True), EmbeddingColumn(categorical_column=HashedCategoricalColumn(key='camp_id', hash_bucket_size=100, dtype=tf.int64), dimension=7, combiner='mean', initializer=<tensorflow.python.ops.init_ops.TruncatedNormal object at 0x7fe5000f6ed

In [11]:
# Models
models_dir = './deep_and_wide/NN_checkpoint'
if not os.path.isdir(models_dir):
    os.makedirs(models_dir)
# create the folders to save the checkpoints
wmodel_dir = models_dir + '/Wide'
dmodel_dir = models_dir + '/Deep'
wdmodel_dir = models_dir + '/W&D'
bayesian_dir = models_dir + '/Bayesian'
os.makedirs(wmodel_dir, exist_ok=True)
os.makedirs(dmodel_dir, exist_ok=True)
os.makedirs(wdmodel_dir, exist_ok=True)
os.makedirs(bayesian_dir, exist_ok=True)
# setting the hyperparameters
lr = 1e-3
gc.collect()

283

# WIDE MODEL ONLY

In [None]:
wmodel, wmodel_path, w_es, w_mc = build_wide_model(feature_column_dict,
                                                   inputs,
                                                   wmodel_dir=wmodel_dir)
wmodel.summary()  # To display the architecture

In [None]:
again_training = True
if again_training:
    # create callback for model saving
    w_m = tf.keras.callbacks.ModelCheckpoint(
        filepath=wmodel_path,
        monitor='val_loss',
        verbose=1,
        save_best_only=True,
        mode='min')
    H = wmodel.fit(train_dl,
                   batch_size=batch_size,
                   epochs=n_epochs,
                   validation_data=val_dl,
                   shuffle=False,
                   validation_batch_size=batch_size,
                   callbacks=[w_es, w_mc, w_m])
else:
    wmodel = tf.keras.models.load_model(wmodel_path)

In [None]:
eval_wmodel_train = wmodel.evaluate(train_dl)
eval_wmodel_val = wmodel.evaluate(val_dl)
eval_wmodel_test = wmodel.evaluate(test_dl)
# Print the results
print("\n[INFO] On Training Set:")
print(eval_wmodel_train)
print("\n[INFO] On Validation Set:")
print(eval_wmodel_val)
print("\n[INFO] On Test Set:")
print(eval_wmodel_test)

# DEEP MODEL ONLY

## 1. With only embeddings

In [15]:
dmodel_1_emb, dmodel_1_emb_path, d1_es, d1_mc = build_deep_model(feature_column_dict["embedding"],
                                                                 inputs,
                                                                 dmodel_dir,
                                                                 name="dmodel_1_emb.h5",
                                                                 ckpt_name="dmodel_1_emb_checkpoint.h5")
dmodel_1_emb.summary()  # To display the architecture

Model: "model_1"
__________________________________________________________________________________________________
 Layer (type)                   Output Shape         Param #     Connected to                     
 camp_f0 (InputLayer)           [(None,)]            0           []                               
                                                                                                  
 camp_f1 (InputLayer)           [(None,)]            0           []                               
                                                                                                  
 camp_id (InputLayer)           [(None,)]            0           []                               
                                                                                                  
 cohort (InputLayer)            [(None,)]            0           []                               
                                                                                            

In [16]:
again_training = True
if again_training:
    # create callback for model saving
    d1_m = tf.keras.callbacks.ModelCheckpoint(
        filepath=dmodel_1_emb_path,
        monitor='val_loss',
        verbose=1,
        save_best_only=True,
        mode='min')
    H1 = dmodel_1_emb.fit(train_dl,
                          batch_size=batch_size,
                          epochs=n_epochs,
                          validation_data=val_dl,
                          shuffle=False,
                          validation_batch_size=batch_size,
                          callbacks=[d1_es, d1_mc, d1_m])
else:
    dmodel_1_emb = tf.keras.models.load_model(dmodel_1_emb_path)

Epoch 1/300


  inputs = self._flatten_to_reference_inputs(inputs)


Epoch 1: val_accuracy improved from -inf to 0.71434, saving model to ./deep_and_wide/NN_checkpoint/Deep/dmodel_1_emb_checkpoint.h5

Epoch 1: val_loss improved from inf to 0.54790, saving model to ./deep_and_wide/NN_checkpoint/Deep/dmodel_1_emb.h5
Epoch 2/300
Epoch 2: val_accuracy improved from 0.71434 to 0.74348, saving model to ./deep_and_wide/NN_checkpoint/Deep/dmodel_1_emb_checkpoint.h5

Epoch 2: val_loss improved from 0.54790 to 0.50504, saving model to ./deep_and_wide/NN_checkpoint/Deep/dmodel_1_emb.h5
Epoch 3/300
Epoch 3: val_accuracy improved from 0.74348 to 0.74473, saving model to ./deep_and_wide/NN_checkpoint/Deep/dmodel_1_emb_checkpoint.h5

Epoch 3: val_loss improved from 0.50504 to 0.49763, saving model to ./deep_and_wide/NN_checkpoint/Deep/dmodel_1_emb.h5
Epoch 4/300
Epoch 4: val_accuracy improved from 0.74473 to 0.74742, saving model to ./deep_and_wide/NN_checkpoint/Deep/dmodel_1_emb_checkpoint.h5

Epoch 4: val_loss improved from 0.49763 to 0.49582, saving model to ./deep

In [17]:
eval_dmodel_1_emb_train = dmodel_1_emb.evaluate(train_dl, batch_size=batch_size)
eval_dmodel_1_emb_val = dmodel_1_emb.evaluate(val_dl, batch_size=batch_size)
eval_dmodel_1_emb_test = dmodel_1_emb.evaluate(test_dl, batch_size=batch_size)
# Print the results
print("\n[INFO] On Training Set:")
print(eval_dmodel_1_emb_train)
print("\n[INFO] On Validation Set:")
print(eval_dmodel_1_emb_val)
print("\n[INFO] On Test Set:")
print(eval_dmodel_1_emb_test)


[INFO] On Training Set:
[0.4571966230869293, 0.7663237452507019, 0.9842695593833923]

[INFO] On Validation Set:
[0.49976927042007446, 0.7479562759399414, 0.9810992479324341]

[INFO] On Test Set:
[0.5014410614967346, 0.7478200197219849, 0.9810281991958618]


## 2. With only numerical features

In [None]:
dmodel_2_num, dmodel_2_num_path, d2_es, d2_mc = build_deep_model(feature_column_dict["numeric"],
                                                                 inputs,
                                                                 dmodel_dir,
                                                                 name="dmodel_2_num.h5",
                                                                 ckpt_name="dmodel_2_num_checkpoint.h5")
dmodel_2_num.summary()


In [None]:
again_training = True
if again_training:
    # create callback for model saving
    d2_m = tf.keras.callbacks.ModelCheckpoint(
        filepath=dmodel_2_num_path,
        monitor='val_loss',
        verbose=1,
        save_best_only=True,
        mode='min')
    H2 = dmodel_2_num.fit(train_dl,
                          batch_size=batch_size,
                          epochs=n_epochs,
                          validation_data=val_dl,
                          shuffle=False,
                          validation_batch_size=batch_size,
                          callbacks=[d2_es, d2_mc, d2_m])
else:
    dmodel_2_num = tf.keras.models.load_model(dmodel_2_num_path)

In [None]:
eval_dmodel_2_num_train = dmodel_2_num.evaluate(train_dl, batch_size=batch_size)
eval_dmodel_2_num_val = dmodel_2_num.evaluate(val_dl, batch_size=batch_size)
eval_dmodel_2_num_test = dmodel_2_num.evaluate(test_dl, batch_size=batch_size)
# Print the results
print("\n[INFO] On Training Set:")
print(eval_dmodel_2_num_train)
print("\n[INFO] On Validation Set:")
print(eval_dmodel_2_num_val)
print("\n[INFO] On Test Set:")
print(eval_dmodel_2_num_test)

## 3. With embeddings and numerical features

In [None]:
dmodel_3_num_emb, dmodel_3_num_emb_path, d3_es, d3_mc = build_deep_model(feature_column_dict,
                                                                         inputs,
                                                                         dmodel_dir,
                                                                         name="dmodel_3_num_emb.h5",
                                                                         ckpt_name="dmodel_3_num_emb_checkpoint.h5")
dmodel_3_num_emb.summary()

In [None]:
again_training = True
if again_training:
    # create callback for model saving
    d3_m = tf.keras.callbacks.ModelCheckpoint(
        filepath=dmodel_3_num_emb_path,
        monitor='val_loss',
        verbose=1,
        save_best_only=True,
        mode='min')
    H3 = dmodel_3_num_emb.fit(train_dl,
                              batch_size=batch_size,
                              epochs=n_epochs,
                              validation_data=val_dl,
                              shuffle=False,
                              validation_batch_size=batch_size,
                              callbacks=[d3_es, d3_mc, d3_m])
else:
    dmodel_3_num_emb = tf.keras.models.load_model(dmodel_3_num_emb_path)

In [None]:
eval_dmodel_3_num_emb_train = dmodel_3_num_emb.evaluate(train_dl, batch_size=batch_size)
eval_dmodel_3_num_emb_val = dmodel_3_num_emb.evaluate(val_dl, batch_size=batch_size)
eval_dmodel_3_num_emb_test = dmodel_3_num_emb.evaluate(test_dl, batch_size=batch_size)
# Print the results
print("\n[INFO] On Training Set:")
print(eval_dmodel_3_num_emb_train)
print("\n[INFO] On Validation Set:")
print(eval_dmodel_3_num_emb_val)
print("\n[INFO] On Test Set:")
print(eval_dmodel_3_num_emb_test)

## 4. With normal and hidden numeric features

In [None]:
# Get the new feature column and input dicts
feature_column_dict_hidden, feature_column_input_dict_hidden = generate_feature_columns(hidden_include=True)
inputs_hidden = {**feature_column_input_dict_hidden["numeric"], **feature_column_input_dict_hidden["embedding"]}
dmodel_4_hid, dmodel_4_hid_path, d4_es, d4_mc = build_deep_model(feature_column_dict_hidden,
                                                                 inputs_hidden,
                                                                 dmodel_dir,
                                                                 name="dmodel_4_hid.h5",
                                                                 ckpt_name="dmodel_4_hid_checkpoint.h5")
dmodel_4_hid.summary()

In [None]:
again_training = True
if again_training:
    # create callback for model saving
    d4_m = tf.keras.callbacks.ModelCheckpoint(
        filepath=dmodel_4_hid_path,
        monitor='val_loss',
        verbose=1,
        save_best_only=True,
        mode='min')
    H4 = dmodel_4_hid.fit(train_dl,
                          batch_size=batch_size,
                          epochs=n_epochs,
                          validation_data=val_dl,
                          shuffle=False,
                          validation_batch_size=batch_size,
                          callbacks=[d4_es, d4_mc, d4_m])
else:
    dmodel_4_hid = tf.keras.models.load_model(dmodel_4_hid_path)

In [None]:
eval_dmodel_4_hid_train = dmodel_4_hid.evaluate(train_dl, batch_size=batch_size)
eval_dmodel_4_hid_val = dmodel_4_hid.evaluate(val_dl, batch_size=batch_size)
eval_dmodel_4_hid_test = dmodel_4_hid.evaluate(test_dl, batch_size=batch_size)
# Print the results
print("\n[INFO] On Training Set:")
print(eval_dmodel_4_hid_train)
print("\n[INFO] On Validation Set:")
print(eval_dmodel_4_hid_val)
print("\n[INFO] On Test Set:")
print(eval_dmodel_4_hid_test)

# WIDE & DEEP MODEL

In [12]:
wdmodel, wdmodel_path, wd_es, wd_mc = build_wide_and_deep_model(feature_column_dict,
                                                                inputs,
                                                                wdmodel_dir=wdmodel_dir)
wdmodel.summary()  # To display the architecture

Model: "model"
__________________________________________________________________________________________________
 Layer (type)                   Output Shape         Param #     Connected to                     
 camp_f0 (InputLayer)           [(None,)]            0           []                               
                                                                                                  
 camp_f1 (InputLayer)           [(None,)]            0           []                               
                                                                                                  
 camp_id (InputLayer)           [(None,)]            0           []                               
                                                                                                  
 cohort (InputLayer)            [(None,)]            0           []                               
                                                                                              

In [13]:
again_training = True
if again_training:
    # create callback for model saving
    wd_m = tf.keras.callbacks.ModelCheckpoint(
        filepath=wdmodel_path,
        monitor='val_loss',
        verbose=1,
        save_best_only=True,
        mode='min')
    H = wdmodel.fit(train_dl,
                    batch_size=batch_size,
                    epochs=n_epochs,
                    validation_data=val_dl,
                    shuffle=False,
                    validation_batch_size=batch_size,
                    callbacks=[wd_es, wd_mc, wd_m])
else:
    wdmodel = tf.keras.models.load_model(wdmodel_path)

Epoch 1/300


  inputs = self._flatten_to_reference_inputs(inputs)


Epoch 1: val_accuracy improved from -inf to 0.76569, saving model to ./deep_and_wide/NN_checkpoint/W&D/wdmodel_checkpoint.h5

Epoch 1: val_loss improved from inf to 0.49668, saving model to ./deep_and_wide/NN_checkpoint/W&D/wdmodel.h5
Epoch 2/300
Epoch 2: val_accuracy improved from 0.76569 to 0.82674, saving model to ./deep_and_wide/NN_checkpoint/W&D/wdmodel_checkpoint.h5

Epoch 2: val_loss improved from 0.49668 to 0.37814, saving model to ./deep_and_wide/NN_checkpoint/W&D/wdmodel.h5
Epoch 3/300
Epoch 3: val_accuracy improved from 0.82674 to 0.83591, saving model to ./deep_and_wide/NN_checkpoint/W&D/wdmodel_checkpoint.h5

Epoch 3: val_loss improved from 0.37814 to 0.36089, saving model to ./deep_and_wide/NN_checkpoint/W&D/wdmodel.h5
Epoch 4/300
Epoch 4: val_accuracy improved from 0.83591 to 0.83741, saving model to ./deep_and_wide/NN_checkpoint/W&D/wdmodel_checkpoint.h5

Epoch 4: val_loss improved from 0.36089 to 0.35924, saving model to ./deep_and_wide/NN_checkpoint/W&D/wdmodel.h5
Epo

In [14]:
eval_wdmodel_train = wdmodel.evaluate(train_dl, batch_size=batch_size)
eval_wdmodel_val = wdmodel.evaluate(val_dl, batch_size=batch_size)
eval_wdmodel_test = wdmodel.evaluate(test_dl, batch_size=batch_size)
# Print the results
print("\n[INFO] On Training Set:")
print(eval_wdmodel_train)
print("\n[INFO] On Validation Set:")
print(eval_wdmodel_val)
print("\n[INFO] On Test Set:")
print(eval_wdmodel_test)


[INFO] On Training Set:
[0.2960934042930603, 0.8612719178199768, 0.9938235878944397]

[INFO] On Validation Set:
[0.40061330795288086, 0.8328808546066284, 0.9872100949287415]

[INFO] On Test Set:
[0.4036564528942108, 0.8314800262451172, 0.9869565963745117]


# BAYESIAN WIDE & DEEP MODEL

In [None]:
bmodel, bmodel_path, b_es, b_mc = build_bayesian_model(feature_column_dict,
                                                       inputs,
                                                       bayesian_dir)
bmodel.summary()  # To display the architecture

In [None]:
again_training = True
if again_training:
    # create callback for model saving
    b_m = tf.keras.callbacks.ModelCheckpoint(
        filepath=bmodel_path,
        monitor='val_loss',
        verbose=1,
        save_best_only=True,
        mode='min')
    H = bmodel.fit(train_dl,
                   batch_size=batch_size,
                   epochs=n_epochs,
                   validation_data=val_dl,
                   shuffle=False,
                   validation_batch_size=batch_size,
                   callbacks=[b_es, b_mc, b_m])
else:
    bmodel = tf.keras.models.load_model(bmodel_path)

In [None]:
ts_train, ucb_train = evaluate_bandit(bmodel, train_dl)
ts_val, ucb_val = evaluate_bandit(bmodel, val_dl)
ts_test, ucb_test = evaluate_bandit(bmodel, test_dl)
# Print the results
print("\nUCB\n[INFO] On Training Set:")
print(ucb_train)
print("\n[INFO] On Validation Set:")
print(ucb_val)
print("\n[INFO] On Test Set:")
print(ucb_test)

# Print the results
print("\nThompson Sampling\n[INFO] On Training Set:")
print(ts_train)
print("\n[INFO] On Validation Set:")
print(ts_val)
print("\n[INFO] On Test Set:")
print(ts_test)
