# Preprocessing and Modeling

## Import required modules

In [None]:
import tensorflow as tf
from sklearn.model_selection import train_test_split
import tensorflow_datasets as tfds
from keras.api.layers import Dense, Embedding, GRU, LeakyReLU, Concatenate, Masking, Layer
from keras.api import Input
from keras.api.models import Model
from keras.api.losses import SparseCategoricalCrossentropy
from keras.api.metrics import SparseCategoricalAccuracy, Mean, TopKCategoricalAccuracy
from transformers.models.bert import TFBertTokenizer, TFBertEmbeddings  # embedding and tokenizer for description/nlp related stuff
import matplotlib.pyplot as plt
import pandas as pd

## Preprocessing

### Load CSV

In [None]:
df = pd.read_csv("")
df

### Prepare Tensorflow Datasets

In [None]:
item_sequences = []
item_feature_sequences = []
next_item_sequences = [] # target features (next item for each sequences)


dataset = tf.data.Dataset.from_tensor_slices((item_sequences, item_feature_sequences, next_item_sequences))

def preprocesses_data(sequence):
    # add sliding windows of sequence
    pass

## Define Model class

In [None]:
class ItemFeatureEmbedding(Layer):
    def __init__(self, num_features, num_items, item_embed_dim, feature_embed_dim):
        super(ItemFeatureEmbedding, self).__init__()
        self.feature_embedding = Embedding(input_dim=num_features, output_dim=feature_embed_dim)
        self.item_embedding = Embedding(input_dim=num_items, output_dim=item_embed_dim, mask_zero=True)
    
    def call(self, items, features):
        items_embedded = self.item_embedding(items)
        features_embedded = self.feature_embedding(features)
        return items_embedded, features_embedded

class GRU4REC(Model):
    def __init__(self, k, num_features, num_items, rnn_params, item_embed_dim, feature_embed_dim, ffn1_units, feature_dense_units):
        super(GRU4REC, self).__init__()
        self.k = k
        self.embedding = ItemFeatureEmbedding(num_features, num_items, item_embed_dim, feature_embed_dim)
        
        # RNN layers
        self.rnn = []
        self.rnn.append(GRU(**rnn_params[0], return_sequences=True))
        for i in range(1, len(rnn_params)-1):
            self.rnn.append(GRU(**rnn_params[i], return_sequences=True)) # this layer will have two inputs (from embedding layer, or from previous GRU layer)
        
        self.rnn.append(GRU(**rnn_params[-1], return_sequences=False))
        
        self.concat = Concatenate(axis=-1)
        
        # feed-forward layer
        self.feature_dense = Dense(feature_dense_units, activation='relu')
        self.ffn1 = Dense(ffn1_units)
        self.activation1 = LeakyReLU(alpha=0.2)
        self.out = Dense(k, activation='softmax')
    
    def call(self, inputs, training=False):
        
        item_sequences, item_features = inputs
        
        # Embed items
        item_embedded, feature_embedded = self.embedding(item_sequences, item_features)
        
        feature_transformed = self.feature_dense(feature_embedded)
        
        x = self.rnn[0](item_embedded)
        for i in range(1, len(self.rnn)):
            x = self.concat([item_embedded, x])
            x = self.rnn[i](x)
        
        x = self.concat([x, feature_transformed])
        x = self.ffn1(x)
        x = self.activation1(x)
        output = self.out(x)
        return output

## Training Loop

In [None]:
def custom_train_gru4rec(model, dataset, optimizer, loss_fn, num_epochs, top_k=5):
    """Custom training loop for GRU4REC."""
    # Metrics to track loss and top k precision
    train_loss = Mean(name='train_loss')
    
    train_top_k_precision = TopKCategoricalAccuracy(k=top_k, name='train_top_k_precision')
    
    loss_history = []
    metrics_history = []

    for epoch in range(num_epochs):
        print(f"\nEpoch {epoch + 1}/{num_epochs}")

        # Reset metrics at the start of each epoch
        train_loss.reset_state()
        train_top_k_precision.reset_state()

        # Iterate over the dataset
        for batch, (item_sequences, item_features, labels) in enumerate(dataset):

            with tf.GradientTape() as tape:
                # Forward pass
                predictions = model((item_sequences, item_features), training=True)
                loss = loss_fn(labels, predictions)

            # Backward pass and optimization
            gradients = tape.gradient(loss, model.trainable_variables)
            optimizer.apply_gradients(zip(gradients, model.trainable_variables))

            # Update metrics
            train_loss.update_state(loss)
            train_top_k_precision.update_state(labels, predictions)

            print(f"Batch {batch}, Loss: {train_loss.result().numpy():.4f}, "
                f"Accuracy: {train_top_k_precision.result().numpy():.4f}")

        epoch_loss = train_loss.result().numpy()
        epoch_top_k_precision = train_top_k_precision.result().numpy()
        loss_history.append(epoch_loss)
        metrics_history.append(epoch_top_k_precision)
        
        # Print epoch summary
        print(f"Epoch {epoch + 1}, Loss: {epoch_loss:.4f}, "
              f"Accuracy: {epoch_top_k_precision:.4f}")
    
    plot_training_history(loss_history, metrics_history, f'Precision@{top_k}', top_k)

def plot_training_history(loss_history, metric_history, metric_name, top_k):
    """Plot the training loss and accuracy."""
    epochs = range(1, len(loss_history) + 1)
    
    # Create subplots for loss and accuracy
    fig, (ax1, ax2) = plt.subplots(2, 1, figsize=(10, 8))

    # Plot the training loss
    ax1.plot(epochs, loss_history, label='Loss', color='blue', linestyle='-', marker='o')
    ax1.set_title('Training Loss')
    ax1.set_xlabel('Epochs')
    ax1.set_ylabel('Loss')
    ax1.legend()

    # Plot the top-k accuracy
    ax2.plot(epochs, metric_history, label=metric_name, color='green', linestyle='-', marker='o')
    ax2.set_title(f'Training {metric_name}')
    ax2.set_xlabel('Epochs')
    ax2.set_ylabel(f'{metric_name}')
    ax2.legend()

    plt.tight_layout()
    plt.show()
        

## Run the training process

In [None]:


gru_4_rec = GRU4REC()