In [1]:
import os

import numpy as np

import os
import sys
sys.path.append('..')
#from layers import LSTMLayer, WaveNet, TimeDistributedDense, TemporalConvolution

In [2]:
import pandas as pd
from pandas import DataFrame

In [6]:
import tensorflow as tf
from tensorflow.keras import layers

In [None]:
from google.colab import drive
drive.mount('/content/drive')

In [None]:
!ls -a /content/drive/MyDrive/recsys_data/rnn_product_data

In [7]:
!mkdir data

In [None]:
!unzip /content/drive/MyDrive/recsys_data/rnn_product_data/data.zip -d data

In [None]:
try:
  tpu = tf.distribute.cluster_resolver.TPUClusterResolver()  # TPU detection
  print(f'Running on a TPU w/{tpu.num_accelerators()["TPU"]} cores')
except ValueError:
  raise BaseException('ERROR: Not connected to a TPU runtime; please see the previous cell in this notebook for instructions!')

tf.config.experimental_connect_to_cluster(tpu)
tf.tpu.experimental.initialize_tpu_system(tpu)
tpu_strategy = tf.distribute.TPUStrategy(tpu)

In [3]:
class TFDataReader:
    def __init__(self, data_dir):
        # Define feature columns and label columns
        self.feature_cols = [
            'user_id', 'product_id', 'aisle_id', 'department_id',
            'is_ordered_history', 'index_in_order_history',
            'order_dow_history', 'order_hour_history',
            'days_since_prior_order_history', 'order_size_history',
            'reorder_size_history', 'order_number_history',
            'history_length', 'product_name', 'product_name_length',
        ]
        self.expand_cols = ['user_id', 'product_id', 'aisle_id', 'department_id',
            'history_length', 'product_name_length','label']
        self.label_cols = ['label']

        # Load all numpy arrays
        self.data = {}
        for col in self.feature_cols + self.label_cols:
            self.data[col] = np.load(os.path.join(data_dir, f'{col}.npy'), mmap_mode='r')[:10000]
            #if col in self.expand_cols:
            #    self.data[col] = self.data[col].reshape(-1,1)
        #rint(self.data.keys())
        # Create train/val split
        total_size = len(next(iter(self.data.values())))
        train_size = int(0.9 * total_size)

        self.train_indices = np.arange(train_size)
        self.val_indices = np.arange(train_size, total_size)
        self.all_indices = np.arange(total_size)

    def _process_features(self, original_features, is_test):

        # Create new features dictionary with augmented features
        features = {
            # Copy original features
            **original_features,

            # Add augmented features
            'order_dow_history': tf.roll(original_features['order_dow_history'], -1, axis=1),
            'order_hour_history': tf.roll(original_features['order_hour_history'], -1, axis=1),
            'days_since_prior_order_history': tf.roll(original_features['days_since_prior_order_history'], -1, axis=1),
            'order_number_history': tf.roll(original_features['order_number_history'], -1, axis=1),
            'is_none': tf.cast(tf.equal(original_features['product_id'],
                                    tf.constant(0, dtype=tf.int32)), tf.int32)
        }
        # Adjust history length for non-test data
        if not is_test:
            features['history_length'] = original_features['history_length'] - 1
        else:
            features['history_length'] = original_features['history_length']
        for key in features.keys():
            print(key, features[key].shape)
        return features, {'next_is_ordered': tf.cast(tf.roll(original_features['is_ordered_history'], -1, axis=1), dtype=tf.float32)}

    def _create_dataset(self, indices, shuffle=True, is_test=False):
        # Create feature datasets
        feature_datasets = {
            col: tf.data.Dataset.from_tensor_slices(tf.cast(self.data[col][indices], tf.int32))
            for col in self.feature_cols
        }
        #print("Feature datasets keys:", feature_datasets.keys())

        # Create label datasets
        # label_datasets = {
        #     col: tf.data.Dataset.from_tensor_slices(tf.cast(self.data[col][indices], tf.float32))
        #     for col in self.label_cols
        # }

        # # Combine features into a single dictionary dataset
        features_dataset = tf.data.Dataset.zip(feature_datasets)
        # #for element in features_dataset.take(1):
        #   #print("After zip - element structure:", element)

        # # Combine labels into a single dictionary dataset
        # labels_dataset = tf.data.Dataset.zip(label_datasets)

        # # Combine features and labels
        # dataset = tf.data.Dataset.zip((features_dataset, labels_dataset))

        if shuffle:
            features_dataset = features_dataset.shuffle(buffer_size=10000)

        return features_dataset

    def get_train_dataset(self, batch_size):
        dataset = self._create_dataset(self.train_indices, shuffle=True)
        dataset = dataset.batch(batch_size)
        #for element in dataset.take(1):
          #print(element[0])
        # Process features after batching
        dataset = dataset.map(lambda x: self._process_features((x), is_test=False))
        return dataset

    def get_val_dataset(self, batch_size):
        dataset = self._create_dataset(self.val_indices, shuffle=True)
        dataset = dataset.batch(batch_size)
        dataset = dataset.map(lambda x: self._process_features((x), is_test=False))
        return dataset

    def get_test_dataset(self, batch_size):
        dataset = self._create_dataset(self.all_indices, shuffle=False, is_test=True)
        dataset = dataset.batch(batch_size, drop_remainder=False)
        dataset = dataset.map(lambda x: self._process_features((x), is_test=True))
        return dataset

In [37]:
class TFDataReader2:
    def __init__(self, data_dir):
        # Define feature columns and label columns
        self.feature_cols = [
            'user_id', 'product_id', 'aisle_id', 'department_id',
            'is_ordered_history', 'index_in_order_history',
            'order_dow_history', 'order_hour_history',
            'days_since_prior_order_history', 'order_size_history',
            'reorder_size_history', 'order_number_history',
            'history_length', 'product_name', 'product_name_length',
        ]
        self.expand_cols = ['user_id', 'product_id', 'aisle_id', 'department_id',
            'history_length', 'product_name_length','label']
        self.label_cols = ['label']

        # Load all numpy arrays
        self.data = {}
        for col in self.feature_cols + self.label_cols:
            self.data[col] = np.load(os.path.join(data_dir, f'{col}.npy'), mmap_mode='r')[:10000]
            #if col in self.expand_cols:
                #self.data[col] = self.data[col].reshape(-1,1)
        #rint(self.data.keys())
        # Create train/val split
        total_size = len(next(iter(self.data.values())))
        train_size = int(0.9 * total_size)

        self.train_indices = np.arange(train_size)
        self.val_indices = np.arange(train_size, total_size)
        self.all_indices = np.arange(total_size)

    def _process_features(self, original_features, is_test):

        # Create new features dictionary with augmented features
        batch_size = original_features['user_id'].shape[0]
        features = {
            # Copy original features
            **original_features,

            # Add augmented features
            'order_dow_history': tf.roll(original_features['order_dow_history'], -1, axis=0),
            'order_hour_history': tf.roll(original_features['order_hour_history'], -1, axis=0),
            'days_since_prior_order_history': tf.roll(original_features['days_since_prior_order_history'], -1, axis=0),
            'order_number_history': tf.roll(original_features['order_number_history'], -1, axis=0),
            'is_none': tf.cast(tf.equal(original_features['product_id'],
                                    tf.constant(0, dtype=tf.int32)), tf.int32)
        }
        # Adjust history length for non-test data
        if not is_test:
            features['history_length'] = original_features['history_length'] - 1
        else:
            features['history_length'] = original_features['history_length']
        return features, {'in_next_order': tf.cast(features['is_ordered_history'][np.arange(batch_size), features['history_length'] - 1], dtype=tf.float32)}

    def _create_dataset(self, indices, shuffle=True, is_test=False):
        # Create feature datasets
        features_dict = {col: tf.cast(self.data[col][indices], tf.int32) for col in self.feature_cols}

        dataset = tf.data.Dataset.from_tensor_slices(features_dict)
        # Apply processing before batching
        dataset = dataset.map(
            lambda x: self._process_features(x, is_test),
            num_parallel_calls=tf.data.AUTOTUNE
        )
        if shuffle:
            dataset = dataset.shuffle(buffer_size=10000)

        # Enable prefetching
        dataset = dataset.prefetch(tf.data.AUTOTUNE)

        return dataset

    def get_train_dataset(self, batch_size):
        dataset = self._create_dataset(self.train_indices, shuffle=True)
        dataset = dataset.batch(batch_size, drop_remainder=True)
        #for element in dataset.take(1):
          #print(element[0])
        # Process features after batching
        #dataset = dataset.map(lambda x: self._process_features((x), is_test=False))
        return dataset

    def get_val_dataset(self, batch_size):
        dataset = self._create_dataset(self.val_indices, shuffle=True)
        dataset = dataset.batch(batch_size, drop_remainder=True)
        #dataset = dataset.map(lambda x: self._process_features((x), is_test=False))
        return dataset

    def get_test_dataset(self, batch_size):
        dataset = self._create_dataset(self.all_indices, shuffle=False, is_test=True)
        dataset = dataset.batch(batch_size, drop_remainder=False)
        #dataset = dataset.map(lambda x: self._process_features((x), is_test=True))
        return dataset

In [8]:
class WaveNetLayer(tf.keras.layers.Layer):
    def __init__(self, dilations, filter_widths, skip_channels, residual_channels, **kwargs):
        super(WaveNetLayer, self).__init__(**kwargs)
        self.dilations = dilations
        self.filter_widths = filter_widths
        self.skip_channels = skip_channels
        self.residual_channels = residual_channels

        # Initial projection
        self.input_proj = tf.keras.layers.Conv1D(
            filters=residual_channels,
            kernel_size=1,
            activation='tanh',
            name='x-proj'
        )

        # Dilated convolution layers
        self.dilated_conv_layers = []
        self.output_projs = []

        for i, (dilation, filter_width) in enumerate(zip(dilations, filter_widths)):
            self.dilated_conv_layers.append(
                tf.keras.layers.Conv1D(
                    filters=2*residual_channels,  # Double for gate and filter
                    kernel_size=filter_width,
                    padding='causal',
                    dilation_rate=dilation,
                    name=f'cnn-{i}'
                )
            )

            self.output_projs.append(
                tf.keras.layers.Conv1D(
                    filters=skip_channels + residual_channels,
                    kernel_size=1,
                    name=f'cnn-{i}-proj'
                )
            )

    def call(self, inputs):
        # Initial projection
        x = self.input_proj(inputs)

        skip_outputs = []
        inputs_plus_residuals = x

        # Process through dilated convolutions
        for dilated_conv, output_proj in zip(self.dilated_conv_layers, self.output_projs):
            # Dilated convolution
            dilated_out = dilated_conv(inputs_plus_residuals)

            # Split and apply gating
            conv_filter, conv_gate = tf.split(dilated_out, 2, axis=-1)
            dilated_out = tf.nn.tanh(conv_filter) * tf.nn.sigmoid(conv_gate)

            # Project to skip and residual
            outputs = output_proj(dilated_out)
            skips, residuals = tf.split(
                outputs,
                [self.skip_channels, self.residual_channels],
                axis=-1
            )

            # Add residual connection
            inputs_plus_residuals += residuals
            skip_outputs.append(skips)

        # Combine skip connections
        skip_outputs = tf.concat(skip_outputs, axis=-1)
        return tf.nn.relu(skip_outputs)

In [9]:
@tf.function
def sequence_log_loss(y, y_hat, sequence_lengths, max_sequence_length=100, eps=1e-7):
    # Ensure y is float32 for calculations
    #y = tf.cast(y, tf.float32)
    #print('casted', y.shape)
    # Clip predictions to avoid NaNs in log calculations
    y_hat = tf.clip_by_value(y_hat, eps, 1.0 - eps)
    #print('clipped', y_hat.shape)
    # Compute log losses
    log_losses = y * tf.math.log(y_hat) + (1.0 - y) * tf.math.log(1.0 - y_hat)
    #print("logged", log_losses.shape)
    # Create a sequence mask based on sequence lengths
    sequence_mask = tf.cast(tf.sequence_mask(sequence_lengths, maxlen=max_sequence_length), tf.float32)
    #print("mask created", sequence_mask.shape)
    # Apply the sequence mask to the log losses
    masked_log_losses = log_losses *sequence_mask
    #print("masked", masked_log_losses.shape)
    # Compute the average log loss
    avg_log_loss = -tf.reduce_sum(masked_log_losses) / tf.cast(tf.reduce_sum(sequence_lengths), tf.float32)
    #print("reduced")
    return avg_log_loss

In [30]:
class CustomModel(tf.keras.Model):
    def __init__(self, lstm_size, dilations, filter_widths, skip_channels, residual_channels, **kwargs):
        super(CustomModel, self).__init__(**kwargs)
        self.lstm_size = lstm_size
        self.lstm_layer = tf.keras.layers.GRU(lstm_size, return_sequences=True, dropout=0.0)
        self.wavenet = WaveNetLayer(dilations, filter_widths, skip_channels, residual_channels)
        self.dense1 = tf.keras.layers.Dense(50, activation='relu')
        self.dense2 = tf.keras.layers.Dense(1, activation='sigmoid')
        self.product_embeddings = tf.keras.layers.Embedding(50000, lstm_size, name='product_embeddings')
        self.aisle_embeddings = tf.keras.layers.Embedding(250, 50, name='aisle_embeddings')
        self.department_embeddings = tf.keras.layers.Embedding(50, 10, name='department_embeddings')
        self.user_embeddings = tf.keras.layers.Embedding(207000, lstm_size, name='user_embeddings')
        self.product_name_dense = tf.keras.layers.Dense(100, activation='relu')
        self.bce_loss_func = tf.keras.losses.BinaryCrossentropy(from_logits=False)
    def call(self,inputs):
        user_id = inputs['user_id']
        product_id = inputs['product_id']
        aisle_id = inputs['aisle_id']
        department_id = inputs['department_id']
        is_none = inputs['is_none']
        self.history_length = inputs['history_length']

        is_ordered_history = inputs['is_ordered_history']
        index_in_order_history = inputs['index_in_order_history']
        order_dow_history = inputs['order_dow_history']
        order_hour_history = inputs['order_hour_history']
        days_since_prior_order_history = inputs['days_since_prior_order_history']
        order_size_history = inputs['order_size_history']
        reorder_size_history = inputs['reorder_size_history']
        order_number_history = inputs['order_number_history']
        product_name = inputs['product_name']

        product_names = tf.one_hot(product_name, 2532)
        #print(product_names.shape)
        product_names = tf.reduce_max(product_names, axis=1)
        #print(product_names.shape)
        product_names = self.product_name_dense(product_names)
        #print(product_names.shape)

        is_none_float = tf.cast(tf.expand_dims(is_none, 1), tf.float32)

        product_embeddings = self.product_embeddings(product_id)
        aisle_embeddings = self.aisle_embeddings(aisle_id)
        department_embeddings = self.department_embeddings(department_id)

        #print(product_embeddings.shape, aisle_embeddings.shape, department_embeddings.shape, is_none_float.shape, product_names.shape)
        x_product = tf.concat([
            product_embeddings,
            aisle_embeddings,
            department_embeddings,
            is_none_float,
            product_names
        ], axis=1)
        x_product = tf.tile(tf.expand_dims(x_product, 1), (1, 100, 1))

        # User data
        user_embeddings = self.user_embeddings(user_id)
        x_user = tf.tile(tf.expand_dims(user_embeddings, 1), (1, 100, 1))

        # Sequence data
        is_ordered_history_onehot = tf.one_hot(is_ordered_history, 2)
        index_in_order_history_onehot = tf.one_hot(index_in_order_history, 20)
        order_dow_history_onehot = tf.one_hot(order_dow_history, 8)
        order_hour_history_onehot = tf.one_hot(order_hour_history, 25)
        days_since_prior_order_history_onehot = tf.one_hot(days_since_prior_order_history, 31)
        order_size_history_onehot = tf.one_hot(order_size_history, 60)
        reorder_size_history_onehot = tf.one_hot(reorder_size_history, 50)
        order_number_history_onehot = tf.one_hot(order_number_history, 101)

        index_in_order_history_scalar = tf.expand_dims(tf.cast(index_in_order_history, tf.float32) / 20.0, 2)
        order_dow_history_scalar = tf.expand_dims(tf.cast(order_dow_history, tf.float32) / 8.0, 2)
        order_hour_history_scalar = tf.expand_dims(tf.cast(order_hour_history, tf.float32) / 25.0, 2)
        days_since_prior_order_history_scalar = tf.expand_dims(tf.cast(days_since_prior_order_history, tf.float32) / 31.0, 2)
        order_size_history_scalar = tf.expand_dims(tf.cast(order_size_history, tf.float32) / 60.0, 2)
        reorder_size_history_scalar = tf.expand_dims(tf.cast(reorder_size_history, tf.float32) / 50.0, 2)
        order_number_history_scalar = tf.expand_dims(tf.cast(order_number_history, tf.float32) / 100.0, 2)

        x_history = tf.concat([
            is_ordered_history_onehot,
            index_in_order_history_onehot,
            order_dow_history_onehot,
            order_hour_history_onehot,
            days_since_prior_order_history_onehot,
            order_size_history_onehot,
            reorder_size_history_onehot,
            order_number_history_onehot,
            index_in_order_history_scalar,
            order_dow_history_scalar,
            order_hour_history_scalar,
            days_since_prior_order_history_scalar,
            order_size_history_scalar,
            reorder_size_history_scalar,
            order_number_history_scalar,
        ], axis=2)

        outputs = tf.concat([x_history, x_product, x_user], axis=2)
        print(outputs.shape)
        h = self.lstm_layer(outputs)
        print(h.shape)
        c = self.wavenet(outputs)
        print(c.shape)
        h = tf.concat([h,c,outputs], axis=-1)
        h = tf.keras.layers.TimeDistributed(self.dense1, name='hidden_states')(h)
        y_hat = tf.keras.layers.TimeDistributed(self.dense2)(h)
        y_hat = tf.squeeze(y_hat, axis=-1)
        final_temporal_idx = tf.stack([tf.range(tf.shape(self.history_length)[0]), self.history_length - 1], axis=1)
        final_states = tf.gather_nd(h, final_temporal_idx)
        print(final_states.shape)
        print(y_hat.shape)
        final_predictions = tf.gather_nd(y_hat, final_temporal_idx)
        # final_states = tf.keras.layers.Lambda(lambda x: x, name='final_states')(final_states)
        # final_predictions = tf.keras.layers.Lambda(lambda x: x, name='final_predictions')(final_predictions)
        return {'next_is_ordered': y_hat, 'final_states': final_states, 'in_next_order': final_predictions}
    @tf.function
    def train_step(self, data):
        x, y = data
        history_length = x['history_length']

        with tf.GradientTape() as tape:
            y_pred = self(x, training=True)
            # Pass both prediction and history_length to loss
            loss = sequence_log_loss(y['next_is_ordered'], y_pred['next_is_ordered'], history_length, 100)

        trainable_vars = self.trainable_variables
        gradients = tape.gradient(loss, trainable_vars)
        self.optimizer.apply_gradients(zip(gradients, trainable_vars))
        self.compiled_metrics.update_state(y['next_is_ordered'], y_pred['next_is_ordered'])
        return {m.name: m.result() for m in self.metrics}
    @tf.function
    def test_step(self, data):
        x, y = data
        history_length = x['history_length']

        y_pred = self(x, training=False)
        # Pass both prediction and history_length to loss
        loss = sequence_log_loss(y['next_is_ordered'], y_pred['next_is_ordered'], history_length, 100)

        trainable_vars = self.trainable_variables
        self.compiled_metrics.update_state(y['next_is_ordered'], y_pred['next_is_ordered'])

        # Return metrics and val_loss
        results = {m.name: m.result() for m in self.metrics}
        results['loss'] = loss
        return results




In [31]:
reader = TFDataReader2('data')
train_dataset = reader.get_train_dataset(128)
val_dataset = reader.get_val_dataset(512)

In [32]:
checkpoint_path = "/content/drive/MyDrive/recsys_data/checkpoints/rnn_products/cp-{epoch:04d}.ckpt"


In [33]:
callbacks = [
    # Early stopping to prevent overfitting
    tf.keras.callbacks.EarlyStopping(
        monitor='val_loss',
        patience=5,
        restore_best_weights=True,
        verbose=1
    ),

    # Model checkpoint to save best model
    tf.keras.callbacks.ModelCheckpoint(
        filepath='models/best_model.h5',
        monitor='val_loss',
        save_best_only=True,
        save_weights_only=True,
        verbose=1
    ),

    # Learning rate reduction on plateau
    tf.keras.callbacks.ReduceLROnPlateau(
        monitor='val_loss',
        factor=0.5,
        patience=3,
        min_lr=1e-6,
        verbose=1
    ),

    # CSV logger
    tf.keras.callbacks.CSVLogger(
        'training_log.csv',
        separator=',',
        append=False
    ),
    tf.keras.callbacks.ModelCheckpoint(
        filepath=checkpoint_path,
        save_weights_only=True,
        save_freq='epoch',
        verbose=1
)

]


In [34]:
element = None
for el in val_dataset.take(1):
    element = el[0]
    break

In [None]:


model = CustomModel(
        lstm_size=300,
        dilations=[2**i for i in range(6)],
        filter_widths=[2]*6,
        skip_channels=64,
        residual_channels=128
    )
model.compile(
    optimizer=tf.keras.optimizers.Adam(0.001),
    metrics=[
        tf.keras.metrics.Accuracy(name='accuracy'),
        tf.keras.metrics.AUC(name='auc'),
        tf.keras.metrics.Precision(name='precision'),
        tf.keras.metrics.Recall(name='recall'),
        tf.keras.metrics.F1Score(name='f1')
    ]
)
model(element)



In [None]:
history = model.fit(
    train_dataset,
    validation_data=val_dataset,
    epochs=10,
    steps_per_epoch=None,
    validation_steps=None,
    callbacks=callbacks,
    verbose=1,  # 0: silent, 1: progress bar, 2: one line per epoch
    shuffle=True,  # Shuffle training data
    max_queue_size=10,
)

In [32]:
model.save_weights(
            f'models/epoch_1.h5')

In [None]:
eval_results = model.evaluate(val_dataset, verbose=1)
