In [1]:
import tensorflow as tf


class UserAutoEncoder(tf.keras.Model):
    def __init__(self, input_feature_len, latent_dim):
        super(UserAutoEncoder, self).__init__()
        self.latent_dim = latent_dim
        self.input_feature_len = input_feature_len
        
        # Define encoder layers
        self.encoder = tf.keras.Sequential([
            tf.keras.layers.Dense(100 * self.latent_dim, activation='relu'),
            tf.keras.layers.BatchNormalization(),
            tf.keras.layers.Dropout(0.2),
            tf.keras.layers.Dense(10 * self.latent_dim, activation='relu'),
            tf.keras.layers.BatchNormalization(),
            tf.keras.layers.Dropout(0.2),
            tf.keras.layers.Dense(self.latent_dim, activation='relu'),
            tf.keras.layers.BatchNormalization(),
            tf.keras.layers.Dropout(0.2),
        ])

        # Define decoder layers
        self.decoder = tf.keras.Sequential([
            tf.keras.layers.Dense(10 * self.latent_dim, activation='relu'),
            tf.keras.layers.BatchNormalization(),
            tf.keras.layers.Dropout(0.2),
            tf.keras.layers.Dense(100 * self.latent_dim, activation='relu'),
            tf.keras.layers.BatchNormalization(),
            tf.keras.layers.Dropout(0.2),
            tf.keras.layers.Dense(self.input_feature_len, activation='softmax')
        ])

    def call(self, inputs):
        encoded = self.encoder(inputs)
        decoded = self.decoder(encoded)
        return decoded

    def encode(self, inputs, training=False):
        return self.encoder(inputs, training=training)

class ItemAutoEncoder(tf.keras.Model):
    def __init__(self, input_feature_len, latent_dim):
        super(ItemAutoEncoder, self).__init__()
        self.latent_dim = latent_dim
        self.input_feature_len = input_feature_len
        
        # Define encoder layers
        self.encoder = tf.keras.Sequential([
            tf.keras.layers.Dense(100 * self.latent_dim, activation='relu'),
            tf.keras.layers.BatchNormalization(),
            tf.keras.layers.Dropout(0.2),
            tf.keras.layers.Dense(10 * self.latent_dim, activation='relu'),
            tf.keras.layers.BatchNormalization(),
            tf.keras.layers.Dropout(0.2),
            tf.keras.layers.Dense(self.latent_dim, activation='relu'),
            tf.keras.layers.BatchNormalization(),
            tf.keras.layers.Dropout(0.2),
        ])

        # Define decoder layers
        self.decoder = tf.keras.Sequential([
            tf.keras.layers.Dense(10 * self.latent_dim, activation='relu'),
            tf.keras.layers.BatchNormalization(),
            tf.keras.layers.Dropout(0.2),
            tf.keras.layers.Dense(100 * self.latent_dim, activation='relu'),
            tf.keras.layers.BatchNormalization(),
            tf.keras.layers.Dropout(0.2),
            tf.keras.layers.Dense(self.input_feature_len, activation='softmax')
        ])

    def call(self, inputs):
        encoded = self.encoder(inputs)
        decoded = self.decoder(encoded)
        return decoded

    def encode(self, inputs, training=False):
        return self.encoder(inputs, training=training)





In [2]:
import argparse
import pandas as pd
import numpy as np
import os
from sklearn.preprocessing import LabelEncoder
import pickle

import tensorflow as tf
# from sklearn.decomposition import PCA
from scipy.linalg import svd
from tensorflow.keras import Model
# from tensorflow.keras.optimizers.legacy import Adam

class XMY:
    def __init__(self, latent_side_info, M_max_rank, lambda_M, step, dataset_name, p_value, dtype=tf.float32):
        self.latent_side_info = latent_side_info
        self.M_max_rank = M_max_rank
        self.lambda_M = lambda_M
        self.step = step
        self.dataset_name = dataset_name
        self.p_value = p_value
        self.dtype = dtype

        # Path to save/load weights
        self.autoencoder_X_weights_path = f'real_datasets/{dataset_name}/Weighted_XMY/p={p_value}/latent_dim={latent_side_info}/autoencoder_X_weights.h5'
        self.autoencoder_Y_weights_path = f'real_datasets/{dataset_name}/Weighted_XMY/p={p_value}/latent_dim={latent_side_info}/autoencoder_Y_weights.h5'

    def _check_weights(self, path):
        """Check if weights file exists at the given path."""
        return os.path.exists(path)

    def fit(self, R_bar_train, R_bar_val, R_bar_test, R_train, R_val, R_test, tensorboard_dir=None):
        tf.random.set_seed(42)

        #matrix_processor = MatrixProcessor()
        #X, Y = matrix_processor.compute_svd(R_bar_train, self.latent_side_info)
        #R_bar_train = matrix_processor.compute_terms(R_bar_train)

        # Initialize M
        self.U, self.V = self._init_low_rank_matrices()

        #matrix_processor = MatrixProcessor()
        #user_matrix_multiplier = matrix_processor.compute_terms(R_bar_train)

        batch_size = 32
        #user_dataset = tf.data.Dataset.from_tensor_slices((R_bar_train, R_bar_train, user_matrix_multiplier))
        user_dataset = tf.data.Dataset.from_tensor_slices((R_bar_train, R_bar_train))
        user_dataset = user_dataset.shuffle(buffer_size=100).batch(batch_size)

        ae_optimizer = tf.keras.optimizers.Adam(learning_rate=0.01)
        M_optimizer = tf.keras.optimizers.Adam(learning_rate=0.01)

        #print('Latent side info :', self.latent_side_info)
        self.autoencoder_X = UserAutoEncoder(R_bar_train.shape[1], self.latent_side_info)
        self.autoencoder_X.compile(optimizer=ae_optimizer, loss='binary_crossentropy')

        #item_matrix_multiplier = matrix_processor.compute_terms(tf.transpose(R_bar_train))
        item_dataset = tf.data.Dataset.from_tensor_slices((tf.transpose(R_bar_train), tf.transpose(R_bar_train)))
        item_dataset = item_dataset.shuffle(buffer_size=100).batch(batch_size)

        self.autoencoder_Y = ItemAutoEncoder(R_bar_train.shape[0], self.latent_side_info)
        self.autoencoder_Y.compile(optimizer=ae_optimizer, loss='binary_crossentropy')

        # # # Initialize autoencoder
        # self.autoencoder_X = UserAutoEncoder(R_bar_train.shape[1], self.latent_side_info)
        # self.autoencoder_Y = ItemAutoEncoder(R_bar_train.shape[0], self.latent_side_info)

        # ae_optimizer = tf.keras.optimizers.Adam(learning_rate=0.001)
        # M_optimizer = tf.keras.optimizers.Adam(learning_rate=0.01)

        # self.autoencoder_X.compile(optimizer=ae_optimizer, loss='binary_crossentropy')
        # self.autoencoder_Y.compile(optimizer=ae_optimizer, loss='binary_crossentropy')

        # Dummy forward pass to build the model (ensure weights are created)
        _ = self.autoencoder_X(R_bar_train)
        _ = self.autoencoder_Y(tf.transpose(R_bar_train))

        # Check if weights exist for autoencoder_X
        if self._check_weights(self.autoencoder_X_weights_path):
            self.autoencoder_X.load_weights(self.autoencoder_X_weights_path)
        else:
            self.autoencoder_X.fit(user_dataset, epochs=300, verbose=0, batch_size=batch_size)

            # Ensure the directory exists
            os.makedirs(os.path.dirname(self.autoencoder_X_weights_path), exist_ok=True)

            self.autoencoder_X.save_weights(self.autoencoder_X_weights_path)

        # Check if weights exist for autoencoder_Y
        if self._check_weights(self.autoencoder_Y_weights_path):
            self.autoencoder_Y.load_weights(self.autoencoder_Y_weights_path)
        else:
            self.autoencoder_Y.fit(item_dataset, epochs=300, verbose=0, batch_size=batch_size)

            # Ensure the directory exists
            os.makedirs(os.path.dirname(self.autoencoder_Y_weights_path), exist_ok=True)

            self.autoencoder_Y.save_weights(self.autoencoder_Y_weights_path)


        # # Check if weights exist for autoencoder_X
        # if self._check_weights(self.autoencoder_X_weights_path):
        #     self.autoencoder_X.load_weights(self.autoencoder_X_weights_path)
        # else:
        #     self.autoencoder_X.fit(R_bar_train, R_bar_train, epochs=100, verbose=0)

        #     # Ensure the directory exists
        #     os.makedirs(os.path.dirname(self.autoencoder_X_weights_path), exist_ok=True)

        #     self.autoencoder_X.save_weights(self.autoencoder_X_weights_path)

        # # Check if weights exist for autoencoder_Y
        # if self._check_weights(self.autoencoder_Y_weights_path):
        #     self.autoencoder_Y.load_weights(self.autoencoder_Y_weights_path)
        # else:
        #     self.autoencoder_Y.fit(tf.transpose(R_bar_train), tf.transpose(R_bar_train), epochs=100, verbose=0)

        #     # Ensure the directory exists
        #     os.makedirs(os.path.dirname(self.autoencoder_Y_weights_path), exist_ok=True)

        #     self.autoencoder_Y.save_weights(self.autoencoder_Y_weights_path)

        # Define terms
        reg_normalizer = tf.math.sqrt(tf.cast(R_bar_train.shape[0] * R_bar_train.shape[1], tf.float32))

        train_mask = self._create_mask(R_train)
        val_mask = self._create_mask(R_val)
        test_mask = self._create_mask(R_test)

        #Create tensorboard dir
        #os.makedirs(tensorboard_dir, exist_ok=True)
        #writer = tf.summary.create_file_writer(tensorboard_dir)

        # Initialize high minimums for the best RMSEs
        best_train_rmse = float('inf')
        best_val_rmse = float('inf')
        best_test_rmse = float('inf')

        # Early stopping parameters
        patience = 200 # Number of epochs to wait for improvement before stopping

        # Initialize variables
        patience_counter = 0
        train_rmse_window = []
        val_rmse_window = []
        test_rmse_window = []

        for i in range(self.step):
            with tf.GradientTape(persistent=True) as tape:
                # Extract latent vectors X and Y
                X = self.autoencoder_X.encode(R_bar_train)
                #print('X Shape :', X.shape)
                Y = self.autoencoder_Y.encode(tf.transpose(R_bar_train))
                #print('Y Shape :', Y.shape)

                # Compute prediction term - XMY
                XMY = tf.matmul(tf.matmul(X, tf.matmul(self.U, tf.transpose(self.V))), tf.transpose(Y))

                # Compute squared loss
                train_loss = tf.reduce_sum(train_mask * tf.math.squared_difference(R_train, XMY)) / tf.reduce_sum(train_mask)
                val_loss = tf.reduce_sum(val_mask * tf.math.squared_difference(R_val, XMY)) / tf.reduce_sum(val_mask)
                test_loss = tf.reduce_sum(test_mask * tf.math.squared_difference(R_test, XMY)) / tf.reduce_sum(test_mask)

                # Compute regularization loss
                loss_U = self.lambda_M * tf.reduce_sum(tf.math.square(self.U)) / reg_normalizer
                loss_V = self.lambda_M * tf.reduce_sum(tf.math.square(self.V)) / reg_normalizer

                # # Compute autoencoder loss
                # loss_autoencoder_X = tf.reduce_mean(tf.keras.losses.mean_squared_error(R_bar_train, self.autoencoder_X(R_bar_train)))
                # loss_autoencoder_Y = tf.reduce_mean(tf.keras.losses.mean_squared_error(tf.transpose(R_bar_train), self.autoencoder_Y(tf.transpose(R_bar_train))))

                loss_autoencoder_X = tf.reduce_mean(tf.keras.losses.binary_crossentropy(R_bar_train, self.autoencoder_X(R_bar_train)))
                loss_autoencoder_Y = tf.reduce_mean(tf.keras.losses.binary_crossentropy(tf.transpose(R_bar_train), self.autoencoder_Y(tf.transpose(R_bar_train))))

                # # For autoencoder_X
                # x_pred = self.autoencoder_X(R_bar_train)
                # loss_autoencoder_X = (self.autoencoder_X.compute_custom_loss(R_bar_train, x_pred, user_matrix_multiplier) / 1500)

                # # For autoencoder_Y
                # y_true_transposed = tf.transpose(R_bar_train)
                # y_pred_transposed = self.autoencoder_Y(y_true_transposed)
                # loss_autoencoder_Y = (self.autoencoder_Y.compute_custom_loss(y_true_transposed, y_pred_transposed, item_matrix_multiplier) / 1500)

                # Total train loss
                total_loss = train_loss + loss_U + loss_V + loss_autoencoder_X + loss_autoencoder_Y

            # Do backpropagation
            M_grads = tape.gradient(total_loss, [self.U, self.V])
            M_optimizer.apply_gradients(zip(M_grads, [self.U, self.V]))

            autoencoder_variables = self.autoencoder_X.trainable_variables + self.autoencoder_Y.trainable_variables
            ae_grads = tape.gradient(total_loss, autoencoder_variables)
            ae_optimizer.apply_gradients(zip(ae_grads, autoencoder_variables))

            del tape

            # Compute rmse
            train_rmse = tf.math.sqrt(train_loss)
            val_rmse = tf.math.sqrt(val_loss)
            test_rmse = tf.math.sqrt(test_loss)

            print('train_rmse :', train_rmse)

            # If the current val rmse is less than the minimum in val rmse window, reset the patience counter
            if i != 0 and val_rmse < min(val_rmse_window):
                patience_counter = 0
            else:
                #If the val rmse didn't improve, increment patience counter
                patience_counter += 1

            # Stop training if patience exceeded
            if patience_counter > patience:
                break

            # Add current RMSEs to the windows
            train_rmse_window.append(train_rmse)
            val_rmse_window.append(val_rmse)
            test_rmse_window.append(test_rmse)

            # # Tensorboard logging
            # with writer.as_default():
            #     # Log rmse
            #     tf.summary.scalar('train_rmse', train_rmse, i)
            #     tf.summary.scalar('val_rmse', val_rmse, i)
            #     tf.summary.scalar('test_rmse', test_rmse, i)

            #     # Log autoencoder losses
            #     tf.summary.scalar('User Autoencoder Loss', loss_autoencoder_X, i)
            #     tf.summary.scalar('Item Autoencoder Loss', loss_autoencoder_Y, i)

        # After training is done or patience is exceeded, get the best RMSE
        min_val_index = val_rmse_window.index(min(val_rmse_window))

        best_train_rmse = train_rmse_window[min_val_index]
        best_val_rmse = val_rmse_window[min_val_index]
        best_test_rmse = test_rmse_window[min_val_index]

        return best_train_rmse, best_val_rmse, best_test_rmse


    def _create_mask(self, X):
        mask_tf = tf.where(X == 0, tf.zeros_like(X), 1)

        return mask_tf

    def _init_low_rank_matrices(self):
        # Initialize low-rank matrices randomly
        U = tf.Variable(tf.random.normal([self.latent_side_info, self.M_max_rank]), dtype=tf.float32)
        V = tf.Variable(tf.random.normal([self.latent_side_info, self.M_max_rank]), dtype=tf.float32)

        #Z_1 = tf.Variable(tf.linalg.qr(tf.random.normal([R.shape[0], self.Z_max_rank]))[0], dtype=tf.float32)
        #Z_2 = tf.Variable(tf.linalg.qr(tf.random.normal([R.shape[1], self.Z_max_rank]))[0], dtype=tf.float32)

        return U, V#, Z_1, Z_2

## Dataset Preparation

In [3]:
from utils.data_utils import prepare_data  # Assuming this function exists in your utils module

dataset_name = 'ml_100k'
p_value = 0.0

R_bar_train, R_bar_val, R_bar_test, R_train, R_val, R_test = prepare_data(dataset_name, p_value)

2024-01-14 11:52:46.550354: I tensorflow/core/platform/cpu_feature_guard.cc:151] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations:  AVX2 FMA
To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags.


In [4]:
R_bar_train

<tf.Tensor: shape=(943, 1682), dtype=float32, numpy=
array([[1., 1., 1., ..., 0., 0., 0.],
       [1., 0., 0., ..., 0., 0., 0.],
       [0., 0., 0., ..., 0., 0., 0.],
       ...,
       [1., 0., 0., ..., 0., 0., 0.],
       [0., 0., 0., ..., 0., 0., 0.],
       [0., 1., 0., ..., 0., 0., 0.]], dtype=float32)>

In [5]:
R_train

<tf.Tensor: shape=(943, 1682), dtype=float32, numpy=
array([[5., 3., 4., ..., 0., 0., 0.],
       [4., 0., 0., ..., 0., 0., 0.],
       [0., 0., 0., ..., 0., 0., 0.],
       ...,
       [5., 0., 0., ..., 0., 0., 0.],
       [0., 0., 0., ..., 0., 0., 0.],
       [0., 5., 0., ..., 0., 0., 0.]], dtype=float32)>

In [9]:
model_name = 'JointModel'
latent_side_info = 8
lambda_ = 10
is_weighted_implicit =False
latent_M = 4
step = 10000
lr_M = 0.01
lr_ae = 0.01

config={
    'dataset': dataset_name,
    'p_value': p_value,
    'latent_dim': latent_side_info,
    'M_max_rank': latent_M,
    'lambda_M': lambda_,
    'step': step,
    #'num_layer': num_layer,
    'lr_M': lr_M,
    'lr_ae': lr_ae,
    'embeddings_epochs': 100,
    'is_weighted_implicit': is_weighted_implicit,
    'droputout': 0.2,
    'batch_norm': True,
}

In [10]:
from models.joint_model import JointModel

model = JointModel(config, dataset_name, 'Weighted_XMY')

Configuration: {'dataset': 'ml_100k', 'p_value': 0.0, 'latent_dim': 8, 'M_max_rank': 4, 'lambda_M': 10, 'step': 10000, 'lr_M': 0.01, 'lr_ae': 0.01, 'embeddings_epochs': 100, 'is_weighted_implicit': False, 'droputout': 0.2, 'batch_norm': True}


In [11]:
best_train_rmse, best_val_rmse, best_test_rmse = model.fit(R_bar_train, R_bar_val, R_bar_test, R_train, R_val, R_test)

(943, 8)
(1682, 8)
Epoch 1 train_loss : 38.77296447753906
Epoch 2 train_loss : 25.995100021362305
Epoch 3 train_loss : 17.991588592529297
Epoch 4 train_loss : 15.652216911315918
Epoch 5 train_loss : 12.384057998657227
Epoch 6 train_loss : 11.95546817779541
Epoch 7 train_loss : 10.168505668640137
Epoch 8 train_loss : 8.919242858886719
Epoch 9 train_loss : 9.494385719299316
Epoch 10 train_loss : 8.015703201293945
Epoch 11 train_loss : 7.2010416984558105
Epoch 12 train_loss : 6.681835174560547
Epoch 13 train_loss : 6.403016090393066
Epoch 14 train_loss : 6.087214469909668
Epoch 15 train_loss : 5.964330196380615
Epoch 16 train_loss : 5.9408721923828125
Epoch 17 train_loss : 5.4376749992370605
Epoch 18 train_loss : 5.495115280151367
Epoch 19 train_loss : 5.595522403717041
Epoch 20 train_loss : 5.484116554260254
Epoch 21 train_loss : 5.092400074005127
Epoch 22 train_loss : 4.721324443817139
Epoch 23 train_loss : 4.60207462310791
Epoch 24 train_loss : 4.589034557342529
Epoch 25 train_loss : 4

KeyboardInterrupt: 

In [21]:
from models.joint_model import JointModel

# model = JointModel(config, dataset_name, 'Weighted_XMY')
model = XMY(latent_side_info, latent_M, lambda_, 1000, dataset_name, p_value)

In [22]:
best_train_rmse, best_val_rmse, best_test_rmse = model.fit(R_bar_train, R_bar_val, R_bar_test, R_train, R_val, R_test)

train_rmse : tf.Tensor(134.82095, shape=(), dtype=float32)
train_rmse : tf.Tensor(25.278225, shape=(), dtype=float32)
train_rmse : tf.Tensor(15.830902, shape=(), dtype=float32)
train_rmse : tf.Tensor(11.510918, shape=(), dtype=float32)
train_rmse : tf.Tensor(8.729328, shape=(), dtype=float32)
train_rmse : tf.Tensor(7.0511446, shape=(), dtype=float32)
train_rmse : tf.Tensor(7.0346913, shape=(), dtype=float32)
train_rmse : tf.Tensor(9.223146, shape=(), dtype=float32)
train_rmse : tf.Tensor(51.757206, shape=(), dtype=float32)
train_rmse : tf.Tensor(4.8253813, shape=(), dtype=float32)
train_rmse : tf.Tensor(6.5064387, shape=(), dtype=float32)
train_rmse : tf.Tensor(7.334402, shape=(), dtype=float32)
train_rmse : tf.Tensor(6.8647223, shape=(), dtype=float32)
train_rmse : tf.Tensor(5.7995777, shape=(), dtype=float32)
train_rmse : tf.Tensor(5.9679575, shape=(), dtype=float32)
train_rmse : tf.Tensor(4.2167177, shape=(), dtype=float32)
train_rmse : tf.Tensor(3.6763873, shape=(), dtype=float32)


KeyboardInterrupt: 