In [1]:
import os
import numpy as np
import tensorflow as tf
import pandas as pd
from data_frame import DataFrame
import tabnet_model

In [2]:
data_dir = '../data/interim'
data_cols = [
            'user_id',
            'product_id',
            'aisle_id',
            'department_id',
            'is_ordered_history',
            'index_in_order_history',
            'order_dow_history',
            'order_hour_history',
            'days_since_prior_order_history',
            'order_size_history',
            'reorder_size_history',
            'order_number_history',
            'history_length',
            'product_name',
            'product_name_length',
            'eval_set',
            'label'
        ]

data = [np.load(os.path.join(data_dir, '{}.npy'.format(i)), mmap_mode='r') for i in data_cols]
test_df = DataFrame(columns=data_cols, data=data)
                        
print(test_df.shapes())
print('loaded data')

train_df, val_df = test_df.train_test_split(train_size=0.8)

print('train size', len(train_df))
print('val size', len(val_df))
print('test size', len(test_df))

user_id                               (13514162,)
product_id                            (13514162,)
aisle_id                              (13514162,)
department_id                         (13514162,)
is_ordered_history                (13514162, 100)
index_in_order_history            (13514162, 100)
order_dow_history                 (13514162, 100)
order_hour_history                (13514162, 100)
days_since_prior_order_history    (13514162, 100)
order_size_history                (13514162, 100)
reorder_size_history              (13514162, 100)
order_number_history              (13514162, 100)
history_length                        (13514162,)
product_name                       (13514162, 30)
product_name_length                   (13514162,)
eval_set                              (13514162,)
label                                 (13514162,)
dtype: object
loaded data
train size 10811329
val size 2702833
test size 13514162


In [16]:
# Input layers
user_id = Input(shape=(), dtype=tf.int32, name='user_id')
product_id = Input(shape=(), dtype=tf.int32, name='product_id')
aisle_id = Input(shape=(), dtype=tf.int32, name='aisle_id')
department_id = Input(shape=(), dtype=tf.int32, name='department_id')
is_none = Input(shape=(), dtype=tf.int32, name='is_none')
history_length = Input(shape=(), dtype=tf.int32, name='history_length')

is_ordered_history = Input(shape=(100,), dtype=tf.int32, name='is_ordered_history')
index_in_order_history = Input(shape=(100,), dtype=tf.int32, name='index_in_order_history')
order_dow_history = Input(shape=(100,), dtype=tf.int32, name='order_dow_history')
order_hour_history = Input(shape=(100,), dtype=tf.int32, name='order_hour_history')
days_since_prior_order_history = Input(shape=(100,), dtype=tf.int32, name='days_since_prior_order_history')
order_size_history = Input(shape=(100,), dtype=tf.int32, name='order_size_history')
reorder_size_history = Input(shape=(100,), dtype=tf.int32, name='reorder_size_history')
order_number_history = Input(shape=(100,), dtype=tf.int32, name='order_number_history')
product_name = Input(shape=(30,), dtype=tf.int32, name='product_name')
product_name_length = Input(shape=(), dtype=tf.int32, name='product_name_length')
next_is_ordered = Input(shape=(100,), dtype=tf.int32, name='next_is_ordered')

# Product data
product_embeddings = Embedding(input_dim=50000, output_dim=lstm_size, name='product_embeddings')(product_id)
aisle_embeddings = Embedding(input_dim=250, output_dim=50, name='aisle_embeddings')(aisle_id)
department_embeddings = Embedding(input_dim=50, output_dim=10, name='department_embeddings')(department_id)

# One-hot encoding and reduction
product_names = tf.one_hot(product_name, 2532)
product_names = tf.reduce_max(product_names, 1)
product_names = dense_layer(product_names, 100, activation=tf.nn.relu)

# Cast and expand dimensions
is_none = tf.cast(tf.expand_dims(is_none, 1), tf.float32)

In [17]:
# Concatenate all the inputs
x_product = Concatenate(axis=1)([
    product_embeddings,
    aisle_embeddings,
    department_embeddings,
    is_none,
    product_names
])

# Tile the tensor
x_product = tf.tile(tf.expand_dims(x_product, 1), (1, 100, 1))

In [18]:
# User data
user_embeddings = Embedding(input_dim=207000, output_dim=lstm_size, name='user_embeddings')(user_id)
x_user = tf.expand_dims(user_embeddings, 1)
x_user = tf.tile(x_user, (1, 100, 1))

In [19]:
# Sequence data
is_ordered_history = tf.one_hot(is_ordered_history, depth=2)
index_in_order_history = tf.one_hot(index_in_order_history, depth=20)
order_dow_history = tf.one_hot(order_dow_history, depth=8)
order_hour_history = tf.one_hot(order_hour_history, depth=25)
days_since_prior_order_history = tf.one_hot(days_since_prior_order_history, depth=31)
order_size_history = tf.one_hot(order_size_history, depth=60)
reorder_size_history = tf.one_hot(reorder_size_history, depth=50)
order_number_history = tf.one_hot(order_number_history, depth=101)

index_in_order_history_scalar = tf.cast(index_in_order_history, tf.float32) / 20.0
order_dow_history_scalar = tf.cast(order_dow_history, tf.float32) / 8.0
order_hour_history_scalar = tf.cast(order_hour_history, tf.float32) / 25.0
days_since_prior_order_history_scalar = tf.cast(days_since_prior_order_history, tf.float32) / 31.0
order_size_history_scalar = tf.cast(order_size_history, tf.float32) / 60.0
reorder_size_history_scalar = tf.cast(reorder_size_history, tf.float32) / 50.0
order_number_history_scalar = tf.cast(order_number_history, tf.float32) / 100.0

In [20]:
# Concatenating all histories
x_history = Concatenate(axis=2)([
    is_ordered_history,
    index_in_order_history,
    order_dow_history,
    order_hour_history,
    days_since_prior_order_history,
    order_size_history,
    reorder_size_history,
    order_number_history,
    index_in_order_history_scalar,
    order_dow_history_scalar,
    order_hour_history_scalar,
    days_since_prior_order_history_scalar,
    order_size_history_scalar,
    reorder_size_history_scalar,
    order_number_history_scalar
])

In [21]:
# Concatenate all inputs
x = Concatenate(axis=2)([x_history, x_product, x_user])

In [22]:
x.shape

TensorShape([None, 100, 1353])

In [23]:
lstm = LSTM(
    units=lstm_size,
    return_sequences=True,
)   

In [24]:
# LSTM layer
y_hat = lstm(x)

# Define the model
model = Model(inputs=x, outputs=y_hat)

In [25]:
model.summary()

Model: "model"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 input_1 (InputLayer)        [(None, 100, 1353)]       0         
                                                                 
 lstm (LSTM)                 (None, 100, 300)          1984800   
                                                                 
Total params: 1984800 (7.57 MB)
Trainable params: 1984800 (7.57 MB)
Non-trainable params: 0 (0.00 Byte)
_________________________________________________________________


In [26]:
model.compile(optimizer=tf.keras.optimizers.Adam(learning_rate=1e-3),
              loss=tf.keras.losses.BinaryCrossentropy(),
              metrics=[tf.keras.metrics.BinaryAccuracy(),
                       tf.keras.metrics.FalseNegatives()])



In [38]:
def batch_generator(batch_size, df, shuffle=True, num_epochs=10000, is_test=False):
    batch_gen = df.batch_generator(batch_size, shuffle=shuffle, num_epochs=num_epochs, allow_smaller_final_batch=is_test)
    for batch in batch_gen:
        batch['order_dow_history'] = np.roll(batch['order_dow_history'], -1, axis=1)
        batch['order_hour_history'] = np.roll(batch['order_hour_history'], -1, axis=1)
        batch['days_since_prior_order_history'] = np.roll(batch['days_since_prior_order_history'], -1, axis=1)
        batch['order_number_history'] = np.roll(batch['order_number_history'], -1, axis=1)
        batch['next_is_ordered'] = np.roll(batch['is_ordered_history'], -1, axis=1)
        batch['is_none'] = batch['product_id'] == 0
        if not is_test:
            batch['history_length'] = batch['history_length'] - 1
        yield batch
        
def train_batch_generator(batch_size):
    return batch_generator(
        batch_size=batch_size,
        df=train_df,
        shuffle=True,
        num_epochs=10000,
        is_test=False
    )

def val_batch_generator(batch_size):
    return batch_generator(
        batch_size=batch_size,
        df=val_df,
        shuffle=True,
        num_epochs=10000,
        is_test=False
    )

def test_batch_generator(batch_size):
    return batch_generator(
        batch_size=batch_size,
        df=test_df,
        shuffle=False,
        num_epochs=1,
        is_test=True
    )

In [27]:
num_training_steps = 10
batch_size = 128
num_validation_batches = 4

In [33]:
len(train_df.data)

16

In [29]:
# Train the model
history = model.fit(train_df.data, Y, epochs=num_training_steps, batch_size=batch_size)

Epoch 1/10


ValueError: in user code:

    File "/opt/homebrew/anaconda3/lib/python3.11/site-packages/keras/src/engine/training.py", line 1377, in train_function  *
        return step_function(self, iterator)
    File "/opt/homebrew/anaconda3/lib/python3.11/site-packages/keras/src/engine/training.py", line 1360, in step_function  **
        outputs = model.distribute_strategy.run(run_step, args=(data,))
    File "/opt/homebrew/anaconda3/lib/python3.11/site-packages/keras/src/engine/training.py", line 1349, in run_step  **
        outputs = model.train_step(data)
    File "/opt/homebrew/anaconda3/lib/python3.11/site-packages/keras/src/engine/training.py", line 1126, in train_step
        y_pred = self(x, training=True)
    File "/opt/homebrew/anaconda3/lib/python3.11/site-packages/keras/src/utils/traceback_utils.py", line 70, in error_handler
        raise e.with_traceback(filtered_tb) from None
    File "/opt/homebrew/anaconda3/lib/python3.11/site-packages/keras/src/engine/input_spec.py", line 219, in assert_input_compatibility
        raise ValueError(

    ValueError: Layer "model" expects 1 input(s), but it received 16 input tensors. Inputs received: [<tf.Tensor 'IteratorGetNext:0' shape=(None,) dtype=int32>, <tf.Tensor 'IteratorGetNext:1' shape=(None,) dtype=int32>, <tf.Tensor 'IteratorGetNext:2' shape=(None,) dtype=int16>, <tf.Tensor 'IteratorGetNext:3' shape=(None,) dtype=int8>, <tf.Tensor 'IteratorGetNext:4' shape=(None, 100) dtype=int8>, <tf.Tensor 'IteratorGetNext:5' shape=(None, 100) dtype=int8>, <tf.Tensor 'IteratorGetNext:6' shape=(None, 100) dtype=int8>, <tf.Tensor 'IteratorGetNext:7' shape=(None, 100) dtype=int8>, <tf.Tensor 'IteratorGetNext:8' shape=(None, 100) dtype=int8>, <tf.Tensor 'IteratorGetNext:9' shape=(None, 100) dtype=int8>, <tf.Tensor 'IteratorGetNext:10' shape=(None, 100) dtype=int8>, <tf.Tensor 'IteratorGetNext:11' shape=(None, 100) dtype=int8>, <tf.Tensor 'IteratorGetNext:12' shape=(None,) dtype=int8>, <tf.Tensor 'IteratorGetNext:13' shape=(None, 30) dtype=int32>, <tf.Tensor 'IteratorGetNext:14' shape=(None,) dtype=int8>, <tf.Tensor 'IteratorGetNext:15' shape=(None,) dtype=string>]


In [70]:
val_batch_df = next(val_generator)
print(val_batch_df['label'])

[ 1 -1 -1  0 -1  0  0  0  0 -1  0  0 -1  0 -1  0  0 -1 -1  0  0 -1  0 -1
 -1  0  0  0 -1 -1 -1  0 -1 -1 -1  0  0 -1  0 -1  0  0  0 -1 -1 -1 -1 -1
  0 -1  0  0  0  0  0 -1  0  0  0 -1 -1 -1  0  0  0 -1  1  0 -1 -1  0  0
  0  1 -1  1  0 -1 -1  0  1 -1  0 -1 -1  0 -1  0  0  0 -1  0  0 -1  0  0
  0 -1  0 -1 -1  0  0  0  0 -1  0  0  0 -1  0  1  0  0 -1 -1  0 -1  0  1
  0  0 -1 -1 -1  0  0  0 -1  0  0 -1  0 -1  0 -1 -1 -1  0  0  0  1  1  0
  0  0  0 -1 -1  0  0  0  0  0 -1 -1  0 -1 -1  0  0 -1 -1  0 -1  0 -1  0
  0  0  0 -1  0  0 -1 -1 -1  0  0  0  0  0 -1  1  1  0  0  0  0  0  0 -1
 -1  0 -1  0  1 -1  1 -1 -1 -1  1  0  0 -1  0  0 -1  0  0 -1 -1 -1  0  0
  0 -1 -1 -1 -1  0 -1  0  0  0 -1 -1  0  0 -1  0  0  1  1  0 -1  1  0  0
 -1  0  0  0  0 -1  0  0  0  0  0 -1 -1 -1  0  0  0 -1  1  0  0  0  0  0
  0  0  0 -1 -1  0  0  0 -1 -1  0 -1  0  0  0  0 -1  0  0  0 -1 -1  0 -1
  0  0 -1  0 -1 -1 -1  0 -1 -1  0  0 -1  1  0  0 -1  0  0 -1  0  0 -1 -1
  1  0  0  0 -1  0  0  0  0  0  0  0  0 -1  1  0  0

In [68]:
step = 0

best_validation_loss, best_validation_tstep = float('inf'), 0
restarts = 0

for step in range(num_training_steps):
    # Validation evaluation
    val_batch_df = next(val_generator)
    val_feed_dict = {
        is_ordered_history: val_batch_df['is_ordered_history'],
        index_in_order_history: val_batch_df['index_in_order_history'],
        order_dow_history: val_batch_df['order_dow_history'],
        order_hour_history: val_batch_df['order_hour_history'],
        days_since_prior_order_history: val_batch_df['days_since_prior_order_history'],
        order_size_history: val_batch_df['order_size_history'],
        reorder_size_history: val_batch_df['reorder_size_history'],
        order_number_history: val_batch_df['order_number_history']
        # Add other inputs here
    }
    val_loss = model.evaluate(val_feed_dict, verbose=0)
    val_loss_history.append(val_loss)
    print(val_loss)
    
    step += 1

#     # Train step
#     train_batch_df = next(train_generator)
#     train_feed_dict = {
#         is_ordered_history: train_batch_df['is_ordered_history'],
#         index_in_order_history: train_batch_df['index_in_order_history'],
#         order_dow_history: train_batch_df['order_dow_history'],
#         order_hour_history: train_batch_df['order_hour_history'],
#         days_since_prior_order_history: train_batch_df['days_since_prior_order_history'],
#         order_size_history: train_batch_df['order_size_history'],
#         reorder_size_history: train_batch_df['reorder_size_history'],
#         order_number_history: train_batch_df['order_number_history']
#         # Add other inputs here
#     }
#     train_loss = model.train_on_batch(train_feed_dict, verbose=0)
#     train_loss_history.append(train_loss)

#     if step % log_interval == 0:
#         avg_train_loss = sum(train_loss_history) / len(train_loss_history)
#         avg_val_loss = sum(val_loss_history) / len(val_loss_history)
#         metric_log = (
#             "[[step {:>8}]]     "
#             "[[train]]     loss: {:<12}     "
#             "[[val]]     loss: {:<12}     "
#         ).format(step, round(avg_train_loss, 8), round(avg_val_loss, 8))
#         logging.info(metric_log)

#         if avg_val_loss < best_validation_loss:
#             best_validation_loss = avg_val_loss
#             best_validation_tstep = step
#             if step > min_steps_to_checkpoint:
#                 model.save_weights('model_checkpoint.h5')
#                 if enable_parameter_averaging:
#                     # Save averaged weights if needed
#                     pass

#     if step - best_validation_tstep > early_stopping_steps:
#         if num_restarts is None or restarts >= num_restarts:
#             logging.info('best validation loss of {} at training step {}'.format(
#                 best_validation_loss, best_validation_tstep))
#             logging.info('early stopping - ending training.')
#             break

#         if restarts < num_restarts:
#             model.load_weights('model_checkpoint.h5')
#             logging.info('halving learning rate')
#             new_lr = model.optimizer.learning_rate / 2.0
#             tf.keras.backend.set_value(model.optimizer.learning_rate, new_lr)
#             early_stopping_steps /= 2
#             step = best_validation_tstep
#             restarts += 1

# if step <= min_steps_to_checkpoint:
#     best_validation_tstep = step
#     model.save_weights('final_model.h5')
#     if enable_parameter_averaging:
#         # Save averaged weights if needed
#         pass

# logging.info('num_training_steps reached - ending training')

TypeError: Tensors are unhashable (this tensor: KerasTensor(type_spec=TensorSpec(shape=(None, 100, 2), dtype=tf.float32, name=None), name='tf.one_hot_1/one_hot:0', description="created by layer 'tf.one_hot_1'")). Instead, use tensor.ref() as the key.