In [2]:
import os

import numpy as np

import os
import sys
sys.path.append('..')
#from layers import LSTMLayer, WaveNet, TimeDistributedDense, TemporalConvolution

In [2]:
import pandas as pd
from pandas import DataFrame

In [3]:
import tensorflow as tf
from tensorflow.keras import layers

In [None]:
from google.colab import drive
drive.mount('/content/drive')

In [None]:
!ls -a /content/drive/MyDrive/recsys_data/rnn_product_data

In [None]:
!mkdir data

In [None]:
!unzip /content/drive/MyDrive/recsys_data/rnn_product_data/data.zip -d data

In [None]:
try:
  tpu = tf.distribute.cluster_resolver.TPUClusterResolver()  # TPU detection
  print(f'Running on a TPU w/{tpu.num_accelerators()["TPU"]} cores')
except ValueError:
  raise BaseException('ERROR: Not connected to a TPU runtime; please see the previous cell in this notebook for instructions!')

tf.config.experimental_connect_to_cluster(tpu)
tf.tpu.experimental.initialize_tpu_system(tpu)
tpu_strategy = tf.distribute.TPUStrategy(tpu)

In [4]:
class TFDataReader2:
    def __init__(self, data_dir):
        # Define feature columns and label columns
        self.feature_cols = [
            'user_id',
            'history_length',
            'order_size_history',
            'reorder_size_history',
            'order_number_history',
            'order_dow_history',
            'order_hour_history',
            'days_since_prior_order_history',
        ]
        self.expand_cols = ['user_id', 'product_id', 'aisle_id', 'department_id',
            'history_length', 'product_name_length','label']

        # Load all numpy arrays
        self.data = {}
        for col in self.feature_cols:
            self.data[col] = np.load(os.path.join(data_dir, f'{col}.npy'), mmap_mode='r')
            #if col in self.expand_cols:
                #self.data[col] = self.data[col].reshape(-1,1)
        #rint(self.data.keys())
        # Create train/val split
        total_size = len(next(iter(self.data.values())))
        remainder = total_size % 512
        for col in self.feature_cols:
            self.data[col] = np.concatenate([self.data[col], np.zeros((512-remainder, *self.data[col].shape[1:]), dtype=self.data[col].dtype)], axis=0)
        train_size = int(0.9 * total_size)

        self.train_indices = np.arange(train_size)
        self.val_indices = np.arange(train_size, total_size)
        self.all_indices = np.arange(total_size + 512 - remainder)

    def _process_features(self, original_features, is_test):

        # Create new features dictionary with augmented features
        features = {
            # Copy original features
            **original_features,

            # Add augmented features
            'order_dow_history': tf.roll(original_features['order_dow_history'], -1, axis=0),
            'order_hour_history': tf.roll(original_features['order_hour_history'], -1, axis=0),
            'days_since_prior_order_history': tf.roll(original_features['days_since_prior_order_history'], -1, axis=0),
            'order_number_history': tf.roll(original_features['order_number_history'], -1, axis=0),
        }
        # Adjust history length for non-test data
        if not is_test:
            features['history_length'] = original_features['history_length'] - 1
        else:
            features['history_length'] = original_features['history_length']
        return features, {'next_reorder_size': tf.cast(tf.roll(original_features['reorder_size_history'], -1, axis=0), dtype=tf.float32)}

    def _create_dataset(self, indices, shuffle=True, is_test=False):
        # Create feature datasets
        features_dict = {col: tf.cast(self.data[col][indices], tf.int32) for col in self.feature_cols}

        dataset = tf.data.Dataset.from_tensor_slices(features_dict)
        # Apply processing before batching
        dataset = dataset.map(
            lambda x: self._process_features(x, is_test),
            num_parallel_calls=tf.data.AUTOTUNE
        )
        if shuffle:
            dataset = dataset.shuffle(buffer_size=10000)

        # Enable prefetching

        return dataset

    def get_train_dataset(self, batch_size):
        dataset = self._create_dataset(self.train_indices, shuffle=True)
        dataset = dataset.batch(batch_size, drop_remainder=True)
        dataset = dataset.prefetch(tf.data.AUTOTUNE)

        #for element in dataset.take(1):
          #print(element[0])
        # Process features after batching
        #dataset = dataset.map(lambda x: self._process_features((x), is_test=False))
        return dataset

    def get_val_dataset(self, batch_size):
        dataset = self._create_dataset(self.val_indices, shuffle=True)
        dataset = dataset.batch(batch_size, drop_remainder=True)
        dataset = dataset.prefetch(tf.data.AUTOTUNE)

        #dataset = dataset.map(lambda x: self._process_features((x), is_test=False))
        return dataset

    def get_test_dataset(self, batch_size):
        dataset = self._create_dataset(self.all_indices, shuffle=False, is_test=True)
        dataset = dataset.batch(batch_size, drop_remainder=False)
        dataset = dataset.prefetch(tf.data.AUTOTUNE)

        #dataset = dataset.map(lambda x: self._process_features((x), is_test=True))
        return dataset

In [27]:
@tf.function
def sequence_rmse_loss(y, y_hat, sequence_lengths, max_sequence_length=100, eps=1e-7):
    # Ensure y is float32 for calculations
    #y = tf.cast(y, tf.float32)
    #print('casted', y.shape)
    # Clip predictions to avoid NaNs in log calculations
    square_loss = tf.square(y-y_hat)
    #print('clipped', y_hat.shape)
    # Create a sequence mask based on sequence lengths
    sequence_mask = tf.cast(tf.sequence_mask(sequence_lengths, maxlen=max_sequence_length), tf.float32)
    #print("mask created", sequence_mask.shape)
    # Apply the sequence mask to the  losses
    # Compute the average log loss
    avg_sq_loss = tf.reduce_sum(square_loss*sequence_mask) / tf.cast(tf.reduce_sum(sequence_lengths), tf.float32)
    #print("reduced")
    return tf.sqrt(avg_sq_loss)

In [28]:
class CustomModel(tf.keras.Model):
    def __init__(self, lstm_size, **kwargs):
        super(CustomModel, self).__init__(**kwargs)
        self.lstm_size = lstm_size
        self.lstm_layer = tf.keras.layers.GRU(lstm_size, return_sequences=True, dropout=0.0)
        self.dense1 = tf.keras.layers.Dense(50, activation='relu')
        self.dense2 = tf.keras.layers.Dense(1, activation='sigmoid')
    def call(self,inputs):
        user_id = inputs['user_id']
        self.history_length = inputs['history_length']

        order_size_history = inputs['order_size_history']
        reorder_size_history = inputs['reorder_size_history']
        order_number_history = inputs['order_number_history']
        order_dow_history = inputs['order_dow_history']
        order_hour_history = inputs['order_hour_history']
        days_since_prior_order_history = inputs['days_since_prior_order_history']

        

        #print(product_embeddings.shape, aisle_embeddings.shape, department_embeddings.shape, is_none_float.shape, product_names.shape)

        # Sequence data
        order_dow_history_onehot = tf.one_hot(order_dow_history, 8)
        order_hour_history_onehot = tf.one_hot(order_hour_history, 25)
        days_since_prior_order_history_onehot = tf.one_hot(days_since_prior_order_history, 31)
        order_size_history_onehot = tf.one_hot(order_size_history, 60)
        reorder_size_history_onehot = tf.one_hot(reorder_size_history, 50)
        order_number_history_onehot = tf.one_hot(order_number_history, 101)



        order_dow_history_scalar = tf.expand_dims(tf.cast(order_dow_history, tf.float32) / 8.0, 2)
        order_hour_history_scalar = tf.expand_dims(tf.cast(order_hour_history, tf.float32) / 25.0, 2)
        days_since_prior_order_history_scalar = tf.expand_dims(tf.cast(days_since_prior_order_history, tf.float32) / 31.0, 2)
        order_size_history_scalar = tf.expand_dims(tf.cast(order_size_history, tf.float32) / 60.0, 2)
        reorder_size_history_scalar = tf.expand_dims(tf.cast(reorder_size_history, tf.float32) / 50.0, 2)
        order_number_history_scalar = tf.expand_dims(tf.cast(order_number_history, tf.float32) / 100.0, 2)



        outputs = tf.concat([
            order_dow_history_onehot,
            order_hour_history_onehot,
            days_since_prior_order_history_onehot,
            order_size_history_onehot,
            reorder_size_history_onehot,
            order_number_history_onehot,
            order_dow_history_scalar,
            order_hour_history_scalar,
            days_since_prior_order_history_scalar,
            order_size_history_scalar,
            reorder_size_history_scalar,
            order_number_history_scalar,
        ], axis=2)


        h = self.lstm_layer(outputs)
        h = tf.concat([h,outputs], axis=-1)
        h = tf.keras.layers.TimeDistributed(self.dense1, name='hidden_states')(h)
        y_hat = tf.keras.layers.TimeDistributed(self.dense2)(h)
        y_hat = tf.squeeze(y_hat, axis=-1)
        
        final_temporal_idx = tf.stack([tf.range(tf.shape(self.history_length)[0]), self.history_length - 1], axis=1)
        final_states = tf.gather_nd(h, final_temporal_idx)
        final_predictions = tf.gather_nd(y_hat, final_temporal_idx)
        # final_states = tf.keras.layers.Lambda(lambda x: x, name='final_states')(final_states)
        # final_predictions = tf.keras.layers.Lambda(lambda x: x, name='final_predictions')(final_predictions)
        return {'next_reorder_size': y_hat, 'final_states': final_states, 'final_predictions': final_predictions}
    @tf.function
    def train_step(self, data):
        x, y = data
        history_length = x['history_length']

        with tf.GradientTape() as tape:
            y_pred = self(x, training=True)
            # Pass both prediction and history_length to loss
            loss = sequence_rmse_loss(y['next_reorder_size'], y_pred['next_reorder_size'], history_length, 100)

        trainable_vars = self.trainable_variables
        gradients = tape.gradient(loss, trainable_vars)
        self.optimizer.apply_gradients(zip(gradients, trainable_vars))
        self.compiled_metrics.update_state(y['next_reorder_size'], y_pred['next_reorder_size'])
        results = {m.name: m.result() for m in self.metrics}
        results['loss'] = loss
        return results
    @tf.function
    def test_step(self, data):
        x, y = data
        history_length = x['history_length']

        y_pred = self(x, training=False)
        # Pass both prediction and history_length to loss
        loss = sequence_rmse_loss(y['next_reorder_size'], y_pred['next_reorder_size'], history_length, 100)

        self.compiled_metrics.update_state(y['next_reorder_size'], y_pred['next_reorder_size'])

        # Return metrics and val_loss
        results = {m.name: m.result() for m in self.metrics}
        results['loss'] = loss
        return results




In [33]:
reader = TFDataReader2('data')
train_dataset = reader.get_train_dataset(128)
val_dataset = reader.get_val_dataset(256)

In [40]:
checkpoint_path = "checkpoints/cp-{epoch:04d}.ckpt"


In [41]:
callbacks = [
    # Early stopping to prevent overfitting
    tf.keras.callbacks.EarlyStopping(
        monitor='val_loss',
        patience=3,
        restore_best_weights=True,
        verbose=1
    ),

    # Model checkpoint to save best model
    tf.keras.callbacks.ModelCheckpoint(
        filepath='models/best_model.h5',
        monitor='val_loss',
        save_best_only=True,
        save_weights_only=True,
        verbose=1
    ),

    # Learning rate reduction on plateau
    tf.keras.callbacks.ReduceLROnPlateau(
        monitor='val_loss',
        factor=0.5,
        patience=1,
        min_lr=1e-6,
        verbose=1
    ),

    # CSV logger
    tf.keras.callbacks.CSVLogger(
        'training_log.csv',
        separator=',',
        append=False
    ),
    tf.keras.callbacks.ModelCheckpoint(
        filepath=checkpoint_path,
        save_weights_only=True,
        save_freq='epoch',
        verbose=1
)

]


In [44]:
element = None
for input, output in train_dataset.take(1):
  element = input
  break

In [45]:

model = CustomModel(
        lstm_size=300,
    )
model.compile(
    optimizer=tf.keras.optimizers.legacy.Adam(0.001),
    metrics=[
        tf.keras.metrics.Accuracy(name='accuracy'),
        tf.keras.metrics.AUC(name='auc'),
        tf.keras.metrics.Precision(name='precision'),
        tf.keras.metrics.Recall(name='recall'),
        tf.keras.metrics.F1Score(name='f1')
    ]

)
model(element)
model.load_weights('models/best_model.h5')



In [None]:
history = model.fit(
    train_dataset,
    validation_data=val_dataset,
    epochs=10,
    steps_per_epoch=None,
    validation_steps=None,
    callbacks=callbacks,
    verbose=1,  # 0: silent, 1: progress bar, 2: one line per epoch
    shuffle=True,  # Shuffle training data
    max_queue_size=10,
)

In [None]:
test_dataset = reader.get_test_dataset(512)

In [None]:
outputs = model.predict(test_dataset, verbose=1)

In [49]:
np.save('pred_data/final_states.npy', outputs['final_states'] )
np.save('pred_data/final_predictions.npy', outputs['final_predictions'])

In [None]:
model.save_weights(
            f'models/epoch_1.h5')

In [None]:
eval_results = model.evaluate(val_dataset, verbose=1)
