In [2]:
import sys

In [25]:
import tensorflow as tf
from tensorflow import keras

from tensorflow.keras import layers

# Helper libraries
import numpy as np
import matplotlib.pyplot as plt
import os
import subprocess

print('TensorFlow version: {}'.format(tf.__version__))

TensorFlow version: 2.9.1


In [9]:
import pandas as pd

In [11]:
whole_set = pd.read_csv("table_november.csv")

whole_set.head()

Unnamed: 0,event_wday,user_id,session_id,session_duration,is_new_visitor,account_sign_in,saw_search_results_n,product_list_pageview_n,product_pageviews_n,add_to_cart_n,reached_cart_n,start_checkout_n,shipping_stage_completed_n,payment_stage_completed,transactions
0,Sunday,82023970.0,1090192382,16075196,1,0,0,1,0,0,0,0,0,0,0
1,Sunday,50214260.0,9011333152,15868556,1,0,0,0,0,0,0,0,0,0,0
2,Sunday,7659669.0,9613106892,27746287,1,0,0,1,0,0,0,0,0,0,0
3,Sunday,46173770.0,5207106256,33435383,1,1,0,0,0,0,0,0,0,0,0
4,Sunday,36571040.0,2516603404,77100232,1,0,0,1,0,0,0,0,0,0,0


In [80]:
# Number of rows
len(whole_set.index)

360974

In [81]:
train, val, test = np.split(whole_set.sample(frac=1), [int(0.8*len(whole_set)), int(0.9*len(whole_set))])

In [82]:
print(len(train), 'training examples')
print(len(val), 'validation examples')
print(len(test), 'test examples')

288779 training examples
36097 validation examples
36098 test examples


In [83]:
def df_to_dataset(dataframe, shuffle=True, batch_size=32):
    df = dataframe.copy()
    labels = df.pop('transactions')
    df = {key: np.asarray(value)[:,tf.newaxis] for key, value in dataframe.items()}
    ds = tf.data.Dataset.from_tensor_slices((dict(df), labels))
    if shuffle:
        ds = ds.shuffle(buffer_size=len(dataframe))
    ds = ds.batch(batch_size)
    ds = ds.prefetch(batch_size)
    return ds

In [84]:
batch_size = 5
train_ds = df_to_dataset(train, batch_size=batch_size)

In [85]:
[(train_features, label_batch)] = train_ds.take(1)
print('Every feature:', list(train_features.keys()))
print('A batch of account sign-in:', train_features['account_sign_in'])
print('A batch of targets:', label_batch)

Every feature: ['event_wday', 'user_id', 'session_id', 'session_duration', 'is_new_visitor', 'account_sign_in', 'saw_search_results_n', 'product_list_pageview_n', 'product_pageviews_n', 'add_to_cart_n', 'reached_cart_n', 'start_checkout_n', 'shipping_stage_completed_n', 'payment_stage_completed', 'transactions']
A batch of account sign-in: tf.Tensor(
[[0]
 [0]
 [0]
 [0]
 [1]], shape=(5, 1), dtype=int64)
A batch of targets: tf.Tensor([0 0 0 0 0], shape=(5,), dtype=int64)


In [86]:
def get_normalization_layer(name, dataset):
    # Create a Normalization layer for the feature.
    normalizer = layers.Normalization(axis=None)

    # Prepare a Dataset that only yields the feature.
    feature_ds = dataset.map(lambda x, y: x[name])

    # Learn the statistics of the data.
    normalizer.adapt(feature_ds)

    return normalizer

In [87]:
duration = train_features['session_duration']
layer = get_normalization_layer('session_duration', train_ds)
layer(duration)

<tf.Tensor: shape=(5, 1), dtype=float32, numpy=
array([[-0.361432  ],
       [-0.37743   ],
       [-0.35623235],
       [-0.3613375 ],
       [ 0.08979747]], dtype=float32)>

In [88]:
def get_category_encoding_layer(name, dataset, dtype, max_tokens=None):
    # Create a layer that turns strings into integer indices.
    if dtype == 'string':
        index = layers.StringLookup(max_tokens=max_tokens)
    # Otherwise, create a layer that turns integer values into integer indices.
    else:
        index = layers.IntegerLookup(max_tokens=max_tokens)

    # Prepare a `tf.data.Dataset` that only yields the feature.
    feature_ds = dataset.map(lambda x, y: x[name])

    # Learn the set of possible values and assign them a fixed integer index.
    index.adapt(feature_ds)

    # Encode the integer indices.
    encoder = layers.CategoryEncoding(num_tokens=index.vocabulary_size())

    # Apply multi-hot encoding to the indices. The lambda function captures the
    # layer, so you can use them, or include them in the Keras Functional model later.
    return lambda feature: encoder(index(feature))

In [89]:
test_event_wday = train_features['event_wday']
test_event_wday_layer = get_category_encoding_layer(name='event_wday',
                                              dataset=train_ds,
                                              dtype='string')
test_event_wday_layer(test_event_wday)

<tf.Tensor: shape=(5, 8), dtype=float32, numpy=
array([[0., 0., 0., 0., 0., 0., 0., 1.],
       [0., 0., 0., 1., 0., 0., 0., 0.],
       [0., 0., 0., 0., 1., 0., 0., 0.],
       [0., 0., 1., 0., 0., 0., 0., 0.],
       [0., 0., 0., 1., 0., 0., 0., 0.]], dtype=float32)>

In [144]:
batch_size = 256
train_ds = df_to_dataset(train, batch_size=batch_size)
val_ds = df_to_dataset(val, shuffle=False, batch_size=batch_size)
test_ds = df_to_dataset(test, shuffle=False, batch_size=batch_size)

In [145]:
all_inputs = []
encoded_features = []

# Numerical features.
for header in ['session_duration']:
    numeric_col = tf.keras.Input(shape=(1,), name=header)
    normalization_layer = get_normalization_layer(header, train_ds)
    encoded_numeric_col = normalization_layer(numeric_col)
    all_inputs.append(numeric_col)
    encoded_features.append(encoded_numeric_col)

In [146]:
categorical_cols = ['event_wday']

for header in categorical_cols:
    categorical_col = tf.keras.Input(shape=(1,), name=header, dtype='string')
    encoding_layer = get_category_encoding_layer(name=header,
                                               dataset=train_ds,
                                               dtype='string',
                                               max_tokens=7)
    encoded_categorical_col = encoding_layer(categorical_col)
    all_inputs.append(categorical_col)
    encoded_features.append(encoded_categorical_col)

In [148]:
binary_cols = ['is_new_visitor', 'account_sign_in', 'saw_search_results_n', 'product_list_pageview_n', 'product_pageviews_n', 'add_to_cart_n', 'reached_cart_n', 'start_checkout_n', 'shipping_stage_completed_n', 'payment_stage_completed']

for header in binary_cols:
    binary_cols = tf.keras.Input(shape=(1,), name=header, dtype='int32')
    encoding_layer = get_category_encoding_layer(name=header,
                                               dataset=train_ds,
                                               dtype='int32',
                                               max_tokens=2)
    encoded_categorical_col = encoding_layer(binary_cols)
    all_inputs.append(binary_cols)
    encoded_features.append(encoded_categorical_col)

In [149]:
encoded_features

[<KerasTensor: shape=(None, 1) dtype=float32 (created by layer 'normalization_9')>,
 <KerasTensor: shape=(None, 7) dtype=float32 (created by layer 'category_encoding_62')>,
 <KerasTensor: shape=(None, 2) dtype=float32 (created by layer 'category_encoding_63')>,
 <KerasTensor: shape=(None, 2) dtype=float32 (created by layer 'category_encoding_64')>,
 <KerasTensor: shape=(None, 2) dtype=float32 (created by layer 'category_encoding_65')>,
 <KerasTensor: shape=(None, 2) dtype=float32 (created by layer 'category_encoding_66')>,
 <KerasTensor: shape=(None, 2) dtype=float32 (created by layer 'category_encoding_67')>,
 <KerasTensor: shape=(None, 2) dtype=float32 (created by layer 'category_encoding_68')>,
 <KerasTensor: shape=(None, 2) dtype=float32 (created by layer 'category_encoding_69')>,
 <KerasTensor: shape=(None, 2) dtype=float32 (created by layer 'category_encoding_70')>,
 <KerasTensor: shape=(None, 2) dtype=float32 (created by layer 'category_encoding_71')>,
 <KerasTensor: shape=(None

In [150]:
all_features = tf.keras.layers.concatenate(encoded_features)
x = tf.keras.layers.Dense(32, activation="relu")(all_features)
x = tf.keras.layers.Dropout(0.5)(x)
output = tf.keras.layers.Dense(1)(x)

model = tf.keras.Model(all_inputs, output)

In [151]:
model.compile(optimizer='adam',
              loss=tf.keras.losses.BinaryCrossentropy(from_logits=True),
              metrics=["accuracy"])

In [152]:
tf.keras.utils.plot_model(model, show_shapes=True, rankdir="LR")

You must install pydot (`pip install pydot`) and install graphviz (see instructions at https://graphviz.gitlab.io/download/) for plot_model/model_to_dot to work.


In [153]:
model.fit(train_ds, epochs=10, validation_data=val_ds)

Epoch 1/10


  inputs = self._flatten_to_reference_inputs(inputs)


Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


<keras.callbacks.History at 0x26198edb550>

In [154]:
loss, accuracy = model.evaluate(test_ds)
print("Accuracy", accuracy)

Accuracy 0.9939054846763611


In [174]:
a = model.predict(test_ds)



In [175]:
a

array([[-15.088604],
       [-16.739521],
       [-16.46595 ],
       ...,
       [-19.319647],
       [-17.611403],
       [-18.234976]], dtype=float32)

In [176]:
a = [0 if a_ <= 0 else 1 for a_ in a]

In [177]:
a

[0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 1,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 1,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 1,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,


In [178]:
a = tf.convert_to_tensor(a)

In [179]:
a

<tf.Tensor: shape=(36098,), dtype=int32, numpy=array([0, 0, 0, ..., 0, 0, 0])>

In [180]:
test_ds

<PrefetchDataset element_spec=({'event_wday': TensorSpec(shape=(None, 1), dtype=tf.string, name=None), 'user_id': TensorSpec(shape=(None, 1), dtype=tf.float64, name=None), 'session_id': TensorSpec(shape=(None, 1), dtype=tf.int64, name=None), 'session_duration': TensorSpec(shape=(None, 1), dtype=tf.int64, name=None), 'is_new_visitor': TensorSpec(shape=(None, 1), dtype=tf.int64, name=None), 'account_sign_in': TensorSpec(shape=(None, 1), dtype=tf.int64, name=None), 'saw_search_results_n': TensorSpec(shape=(None, 1), dtype=tf.int64, name=None), 'product_list_pageview_n': TensorSpec(shape=(None, 1), dtype=tf.int64, name=None), 'product_pageviews_n': TensorSpec(shape=(None, 1), dtype=tf.int64, name=None), 'add_to_cart_n': TensorSpec(shape=(None, 1), dtype=tf.int64, name=None), 'reached_cart_n': TensorSpec(shape=(None, 1), dtype=tf.int64, name=None), 'start_checkout_n': TensorSpec(shape=(None, 1), dtype=tf.int64, name=None), 'shipping_stage_completed_n': TensorSpec(shape=(None, 1), dtype=tf.i

In [184]:
test_ds

<PrefetchDataset element_spec=({'event_wday': TensorSpec(shape=(None, 1), dtype=tf.string, name=None), 'user_id': TensorSpec(shape=(None, 1), dtype=tf.float64, name=None), 'session_id': TensorSpec(shape=(None, 1), dtype=tf.int64, name=None), 'session_duration': TensorSpec(shape=(None, 1), dtype=tf.int64, name=None), 'is_new_visitor': TensorSpec(shape=(None, 1), dtype=tf.int64, name=None), 'account_sign_in': TensorSpec(shape=(None, 1), dtype=tf.int64, name=None), 'saw_search_results_n': TensorSpec(shape=(None, 1), dtype=tf.int64, name=None), 'product_list_pageview_n': TensorSpec(shape=(None, 1), dtype=tf.int64, name=None), 'product_pageviews_n': TensorSpec(shape=(None, 1), dtype=tf.int64, name=None), 'add_to_cart_n': TensorSpec(shape=(None, 1), dtype=tf.int64, name=None), 'reached_cart_n': TensorSpec(shape=(None, 1), dtype=tf.int64, name=None), 'start_checkout_n': TensorSpec(shape=(None, 1), dtype=tf.int64, name=None), 'shipping_stage_completed_n': TensorSpec(shape=(None, 1), dtype=tf.i

In [183]:
y_test = tf.data.Dataset.from_tensor_slices(list(test_ds))

AttributeError: type object 'DatasetV2' has no attribute 'from_tensor'

In [182]:
y_test

<tf.Tensor: shape=(256, 1), dtype=int64, numpy=
array([[0],
       [0],
       [0],
       [0],
       [0],
       [0],
       [0],
       [0],
       [0],
       [0],
       [0],
       [0],
       [0],
       [0],
       [0],
       [0],
       [0],
       [0],
       [0],
       [0],
       [0],
       [0],
       [0],
       [0],
       [0],
       [0],
       [0],
       [0],
       [0],
       [0],
       [0],
       [0],
       [0],
       [0],
       [0],
       [0],
       [0],
       [0],
       [0],
       [0],
       [0],
       [0],
       [0],
       [0],
       [0],
       [0],
       [0],
       [0],
       [0],
       [0],
       [0],
       [0],
       [0],
       [0],
       [0],
       [0],
       [0],
       [0],
       [0],
       [0],
       [0],
       [0],
       [0],
       [0],
       [0],
       [0],
       [0],
       [0],
       [0],
       [0],
       [0],
       [0],
       [0],
       [0],
       [0],
       [0],
       [0],
       [0],
       [0],
    

In [159]:
model.save('my_classifier')

Cause: Unable to locate the source code of <function canonicalize_signatures.<locals>.signature_wrapper at 0x0000026198F86040>. Note that functions defined in certain environments, like the interactive Python shell, do not expose their source code. If that is the case, you should define them in a .py source file. If you are certain the code is graph-compatible, wrap the call using @tf.autograph.experimental.do_not_convert. Original error: could not get source code
Cause: Unable to locate the source code of <function canonicalize_signatures.<locals>.signature_wrapper at 0x0000026198F86040>. Note that functions defined in certain environments, like the interactive Python shell, do not expose their source code. If that is the case, you should define them in a .py source file. If you are certain the code is graph-compatible, wrap the call using @tf.autograph.experimental.do_not_convert. Original error: could not get source code
Cause: Unable to locate the source code of <function trace_sav

Cause: Unable to locate the source code of <function trace_save_restore_functions.<locals>.restore_fn at 0x000002626AA32C10>. Note that functions defined in certain environments, like the interactive Python shell, do not expose their source code. If that is the case, you should define them in a .py source file. If you are certain the code is graph-compatible, wrap the call using @tf.autograph.experimental.do_not_convert. Original error: could not get source code
Cause: Unable to locate the source code of <function trace_save_restore_functions.<locals>.restore_fn at 0x000002626AA32C10>. Note that functions defined in certain environments, like the interactive Python shell, do not expose their source code. If that is the case, you should define them in a .py source file. If you are certain the code is graph-compatible, wrap the call using @tf.autograph.experimental.do_not_convert. Original error: could not get source code
Cause: Unable to locate the source code of <function trace_save_re

Cause: Unable to locate the source code of <function trace_save_restore_functions.<locals>.restore_fn at 0x000002626AA32CA0>. Note that functions defined in certain environments, like the interactive Python shell, do not expose their source code. If that is the case, you should define them in a .py source file. If you are certain the code is graph-compatible, wrap the call using @tf.autograph.experimental.do_not_convert. Original error: could not get source code
Cause: Unable to locate the source code of <function trace_save_restore_functions.<locals>.restore_fn at 0x000002626AA32CA0>. Note that functions defined in certain environments, like the interactive Python shell, do not expose their source code. If that is the case, you should define them in a .py source file. If you are certain the code is graph-compatible, wrap the call using @tf.autograph.experimental.do_not_convert. Original error: could not get source code
Cause: Unable to locate the source code of <function trace_save_re

Cause: Unable to locate the source code of <function trace_save_restore_functions.<locals>.restore_fn at 0x000002626AA32E50>. Note that functions defined in certain environments, like the interactive Python shell, do not expose their source code. If that is the case, you should define them in a .py source file. If you are certain the code is graph-compatible, wrap the call using @tf.autograph.experimental.do_not_convert. Original error: could not get source code
Cause: Unable to locate the source code of <function trace_save_restore_functions.<locals>.restore_fn at 0x000002626AA32E50>. Note that functions defined in certain environments, like the interactive Python shell, do not expose their source code. If that is the case, you should define them in a .py source file. If you are certain the code is graph-compatible, wrap the call using @tf.autograph.experimental.do_not_convert. Original error: could not get source code
Cause: Unable to locate the source code of <function trace_save_re

Cause: could not parse the source code of <function _trace_resource_initializers.<locals>._wrap_obj_initializer.<locals>.<lambda> at 0x000002619874C5E0>: no matching AST found among candidates:

Cause: could not parse the source code of <function _trace_resource_initializers.<locals>._wrap_obj_initializer.<locals>.<lambda> at 0x000002619874C5E0>: no matching AST found among candidates:

Cause: could not parse the source code of <function _trace_resource_initializers.<locals>._wrap_obj_initializer.<locals>.<lambda> at 0x000002619874C5E0>: no matching AST found among candidates:

Cause: could not parse the source code of <function _trace_resource_initializers.<locals>._wrap_obj_initializer.<locals>.<lambda> at 0x000002619874C5E0>: no matching AST found among candidates:

Cause: could not parse the source code of <function _trace_resource_initializers.<locals>._wrap_obj_initializer.<locals>.<lambda> at 0x000002619874C5E0>: no matching AST found among candidates:

Cause: could not parse th

Cause: could not parse the source code of <function _trace_resource_initializers.<locals>._wrap_obj_initializer.<locals>.<lambda> at 0x000002619874C5E0>: no matching AST found among candidates:

Cause: could not parse the source code of <function _trace_resource_initializers.<locals>._wrap_obj_initializer.<locals>.<lambda> at 0x000002619874C5E0>: no matching AST found among candidates:

Cause: could not parse the source code of <function _trace_resource_initializers.<locals>._wrap_obj_initializer.<locals>.<lambda> at 0x000002619874C5E0>: no matching AST found among candidates:

Cause: could not parse the source code of <function _trace_resource_initializers.<locals>._wrap_obj_initializer.<locals>.<lambda> at 0x000002619874C5E0>: no matching AST found among candidates:

Cause: could not parse the source code of <function _trace_resource_initializers.<locals>._wrap_obj_initializer.<locals>.<lambda> at 0x000002619874C5E0>: no matching AST found among candidates:

Cause: could not parse th

Cause: could not parse the source code of <function _trace_resource_initializers.<locals>._wrap_obj_initializer.<locals>.<lambda> at 0x000002619874C5E0>: no matching AST found among candidates:

Cause: could not parse the source code of <function _trace_resource_initializers.<locals>._wrap_obj_initializer.<locals>.<lambda> at 0x000002619874C5E0>: no matching AST found among candidates:

Cause: could not parse the source code of <function _trace_resource_initializers.<locals>._wrap_obj_initializer.<locals>.<lambda> at 0x000002619874C5E0>: no matching AST found among candidates:

Cause: could not parse the source code of <function _trace_resource_initializers.<locals>._wrap_obj_initializer.<locals>.<lambda> at 0x000002619874C5E0>: no matching AST found among candidates:

Cause: could not parse the source code of <function _trace_resource_initializers.<locals>._wrap_obj_initializer.<locals>.<lambda> at 0x000002619874C5E0>: no matching AST found among candidates:

INFO:tensorflow:Assets wr

In [161]:
reloaded_model = tf.keras.models.load_model('my_classifier')

In [164]:
sample = {
    "event_wday": "Saturday",
  "user_id": "36638696.4",
  "session_id": "6944996097",
  "session_duration": 417760792,
  "is_new_visitor": 1,
  "account_sign_in": 1,
  "saw_search_results_n": 1,
  "product_list_pageview_n": 1,
  "product_pageviews_n": 1,
  "add_to_cart_n": 1,
  "reached_cart_n": 1,
  "start_checkout_n": 1,
  "shipping_stage_completed_n": 1,
  "payment_stage_completed": 1
}

input_dict = {name: tf.convert_to_tensor([value]) for name, value in sample.items()}
predictions = reloaded_model.predict(input_dict)
prob = tf.nn.sigmoid(predictions[0])

print(
    "This person has a %.1f percent probability "
    "of buying something." % (100 * prob)
)

This person has a 79.0 percent probability of buying something.


In [56]:
print("Evaluate on test data")
results = model.evaluate(test_ds)
print("test loss, test acc:", results)

Evaluate on test data
test loss, test acc: [0.042620085179805756, 0.9854562282562256]


In [None]:
model.compile(optimizer='sgd',
              loss='mse',
              metrics=[tf.keras.metrics.Recall()])