# **DNMF**

In [None]:
#final deepnmf
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error, mean_absolute_error, precision_score, recall_score, f1_score
from sklearn.preprocessing import StandardScaler
import tensorflow as tf
from tensorflow.keras.layers import Input, Dense, Dropout
from tensorflow.keras.models import Model


np.random.seed(42)

source_data = user_item_matrix_books
target_data = user_item_matrix_dvds

train_data, test_data = train_test_split(target_data, test_size=0.2, random_state=42)

scaler = StandardScaler()
source_data = scaler.fit_transform(source_data)
train_data = scaler.fit_transform(train_data)
test_data = scaler.transform(test_data)

def build_deepNMF_model(input_dim, latent_dim):
    input_layer = Input(shape=(input_dim,))
    encoded = Dense(latent_dim, activation='relu')(input_layer)
    encoded = Dropout(0.5)(encoded)
    encoded = Dense(latent_dim, activation='relu')(encoded)
    decoded = Dense(input_dim, activation='sigmoid')(encoded)

    autoencoder = Model(inputs=input_layer, outputs=decoded)
    encoder = Model(inputs=input_layer, outputs=encoded)

    autoencoder.compile(optimizer='adam', loss='mean_squared_error')
    return autoencoder, encoder

input_dim_source = source_data.shape[1]
input_dim_target = train_data.shape[1]
latent_dim = 30  # Increased latent dimension

autoencoder_source, encoder_source = build_deepNMF_model(input_dim_source, latent_dim)
autoencoder_target, encoder_target = build_deepNMF_model(input_dim_target, latent_dim)

autoencoder_source.fit(source_data, source_data, epochs=100, batch_size=20, shuffle=True)  # Increased epochs and batch size

# Transfer learning: Initialize target domain encoder with matching weights from the source domain encoder
source_weights = encoder_source.get_weights()
target_weights = encoder_target.get_weights()

# The weight matrices are stored in the form [weights, biases]
# Partially update the target encoder weights with the source encoder weights
target_weights[0][:input_dim_source] = source_weights[0][:input_dim_target]
target_weights[1] = source_weights[1]

encoder_target.set_weights(target_weights)

autoencoder_target.fit(train_data, train_data, epochs=100, batch_size=20, shuffle=True)  # Increased epochs and batch size

test_data_pred = autoencoder_target.predict(test_data)

rmse = np.sqrt(mean_squared_error(test_data, test_data_pred))
mae = mean_absolute_error(test_data, test_data_pred)

print(f'RMSE: {rmse}')
print(f'MAE: {mae}')

threshold = 0.5
test_data_bin = (test_data > threshold).astype(int)
test_data_pred_bin = (test_data_pred > threshold).astype(int)

precision = precision_score(test_data_bin.flatten(), test_data_pred_bin.flatten(), average='macro')
recall = recall_score(test_data_bin.flatten(), test_data_pred_bin.flatten(), average='macro')
f1 = f1_score(test_data_bin.flatten(), test_data_pred_bin.flatten(), average='macro')

print(f'Precision: {precision}')
print(f'Recall: {recall}')
print(f'F1-score: {f1}')


In [None]:
import numpy as np
from sklearn.model_selection import KFold
from sklearn.metrics import mean_squared_error, mean_absolute_error, precision_score, recall_score, f1_score
from sklearn.preprocessing import StandardScaler
import tensorflow as tf
from tensorflow.keras.layers import Input, Dense, Dropout
from tensorflow.keras.models import Model
import pandas as pd

# Example data for source and target domains
np.random.seed(42)

# Placeholder for user-item matrices (replace with actual matrices)
source_data = user_item_matrix_books
target_data = user_item_matrix_dvds

# Data preparation
scaler = StandardScaler()
source_data = scaler.fit_transform(source_data)
target_data = scaler.fit_transform(target_data)

# Function to build the DeepNMF model
def build_deepNMF_model(input_dim, latent_dim):
    input_layer = Input(shape=(input_dim,))
    encoded = Dense(latent_dim, activation='relu')(input_layer)
    encoded = Dropout(0.5)(encoded)
    encoded = Dense(latent_dim, activation='relu')(encoded)
    decoded = Dense(input_dim, activation='sigmoid')(encoded)

    autoencoder = Model(inputs=input_layer, outputs=decoded)
    encoder = Model(inputs=input_layer, outputs=encoded)

    autoencoder.compile(optimizer='adam', loss='mean_squared_error')
    return autoencoder, encoder

# Function to perform 5-fold cross-validation with verbose outputs
def cross_validate_deepNMF_verbose(train_data, k=5, latent_dim=30, epochs=100, batch_size=20):
    # Ensure the data is a NumPy array
    if isinstance(train_data, pd.DataFrame):
        train_data = train_data.to_numpy()

    kfold = KFold(n_splits=k, shuffle=True, random_state=42)
    metrics = {'rmse': [], 'mae': [], 'precision': [], 'recall': [], 'f1': []}

    for fold, (train_idx, val_idx) in enumerate(kfold.split(train_data)):
        print(f"\n===== Starting Fold {fold + 1} =====")

        # Split data into training and validation sets for the current fold
        train_fold = train_data[train_idx]
        val_fold = train_data[val_idx]

        # Build the model for the current fold
        input_dim = train_fold.shape[1]
        autoencoder, encoder = build_deepNMF_model(input_dim, latent_dim)

        # Train the model on the training fold
        autoencoder.fit(train_fold, train_fold, epochs=epochs, batch_size=batch_size, shuffle=True, verbose=0)

        # Predict on the validation fold
        val_pred = autoencoder.predict(val_fold)

        # Calculate metrics
        rmse = np.sqrt(mean_squared_error(val_fold, val_pred))
        mae = mean_absolute_error(val_fold, val_pred)

        threshold = 0.5
        val_fold_bin = (val_fold > threshold).astype(int)
        val_pred_bin = (val_pred > threshold).astype(int)

        precision = precision_score(val_fold_bin.flatten(), val_pred_bin.flatten(), average='macro', zero_division=0)
        recall = recall_score(val_fold_bin.flatten(), val_pred_bin.flatten(), average='macro', zero_division=0)
        f1 = f1_score(val_fold_bin.flatten(), val_pred_bin.flatten(), average='macro', zero_division=0)

        # Store the metrics
        metrics['rmse'].append(rmse)
        metrics['mae'].append(mae)
        metrics['precision'].append(precision)
        metrics['recall'].append(recall)
        metrics['f1'].append(f1)

        # Print results for the current fold
        print(f"Fold {fold + 1} Results:")
        print(f"  RMSE: {rmse:.4f}")
        print(f"  MAE: {mae:.4f}")
        print(f"  Precision: {precision:.4f}")
        print(f"  Recall: {recall:.4f}")
        print(f"  F1-Score: {f1:.4f}")

    # Print average metrics across all folds
    print("\n===== Cross-Validation Results =====")
    for metric, values in metrics.items():
        print(f"{metric.upper()} (average over {k} folds): {np.mean(values):.4f} ± {np.std(values):.4f}")

# Apply cross-validation on the target domain data
cross_validate_deepNMF_verbose(target_data, k=5, latent_dim=30, epochs=100, batch_size=20)


# **ONMF**

In [None]:
import numpy as np
import tensorflow as tf
from sklearn.metrics import mean_squared_error, mean_absolute_error, precision_score, recall_score, f1_score
from sklearn.model_selection import KFold
import matplotlib.pyplot as plt

# Define Orthogonal NMF model with modifications
class OrthogonalNMF(tf.keras.Model):
    def __init__(self, num_users, num_items, num_features, reg_lambda=0.001, ortho_lambda=0.01):
        super(OrthogonalNMF, self).__init__()
        self.user_features = self.add_weight(
            shape=(num_users, num_features),
            initializer='random_normal',
            trainable=True,
            name='user_features'
        )
        self.item_features = self.add_weight(
            shape=(num_items, num_features),
            initializer='random_normal',
            trainable=True,
            name='item_features'
        )
        self.reg_lambda = reg_lambda
        self.ortho_lambda = ortho_lambda

    def call(self, inputs):
        user_idx, item_idx = inputs
        user_features = tf.gather(self.user_features, user_idx)
        item_features = tf.gather(self.item_features, item_idx)
        ratings = tf.reduce_sum(user_features * item_features, axis=1)
        return ratings

    def compute_loss(self, true_ratings, pred_ratings):
        mse_loss = tf.reduce_mean(tf.square(true_ratings - pred_ratings))
        reg_loss = self.reg_lambda * (tf.nn.l2_loss(self.user_features) + tf.nn.l2_loss(self.item_features))

        # Orthogonality constraint applied to features (latent dimensions)
        user_ortho_loss = tf.reduce_sum(
            tf.square(tf.matmul(self.user_features, self.user_features, transpose_a=True) - tf.eye(self.user_features.shape[1]))
        )
        item_ortho_loss = tf.reduce_sum(
            tf.square(tf.matmul(self.item_features, self.item_features, transpose_a=True) - tf.eye(self.item_features.shape[1]))
        )

        ortho_loss = self.ortho_lambda * (user_ortho_loss + item_ortho_loss)
        return mse_loss + reg_loss + ortho_loss

# Normalize ratings
def normalize_ratings(ratings):
    mean_rating = np.mean(ratings[ratings > 0])
    std_rating = np.std(ratings[ratings > 0])
    normalized_ratings = np.zeros_like(ratings)
    normalized_ratings[ratings > 0] = (ratings[ratings > 0] - mean_rating) / std_rating
    return normalized_ratings, mean_rating, std_rating

def denormalize_ratings(norm_ratings, mean_rating, std_rating):
    return norm_ratings * std_rating + mean_rating

# Evaluate the model
def evaluate_model(model, data, mean_rating, std_rating):
    user_idx, item_idx = np.nonzero(data)
    true_ratings = data[user_idx, item_idx].astype(np.float32)

    # Normalize true ratings
    true_ratings_norm = (true_ratings - mean_rating) / std_rating

    pred_ratings_norm = model((user_idx, item_idx)).numpy()

    # Denormalize predicted ratings
    pred_ratings = denormalize_ratings(pred_ratings_norm, mean_rating, std_rating)

    # Clip predicted ratings to valid range
    min_rating = np.min(true_ratings)
    max_rating = np.max(true_ratings)
    pred_ratings = np.clip(pred_ratings, min_rating, max_rating)

    # Compute RMSE and MAE
    rmse = np.sqrt(mean_squared_error(true_ratings, pred_ratings))
    mae = mean_absolute_error(true_ratings, pred_ratings)

    # Binarize the ratings for classification metrics
    threshold = np.mean(true_ratings)
    true_binary = (true_ratings >= threshold).astype(int)
    pred_binary = (pred_ratings >= threshold).astype(int)

    precision = precision_score(true_binary, pred_binary, zero_division=0)
    recall = recall_score(true_binary, pred_binary, zero_division=0)
    f1 = f1_score(true_binary, pred_binary, zero_division=0)

    return rmse, mae, precision, recall, f1

# Cross-validation training
def train_model_cross_val(ratings, num_folds=5, epochs=100, learning_rate=0.001, num_features=50, reg_lambda=0.001, ortho_lambda=0.01):
    num_users, num_items = ratings.shape
    kf = KFold(n_splits=num_folds, shuffle=True, random_state=42)
    fold_results = []

    for fold, (train_idx, test_idx) in enumerate(kf.split(ratings)):
        print(f"\nStarting Fold {fold + 1}...")
        train_data, test_data = ratings[train_idx], ratings[test_idx]

        model = OrthogonalNMF(num_users, num_items, num_features, reg_lambda, ortho_lambda)
        optimizer = tf.keras.optimizers.Adam(learning_rate=learning_rate)

        user_idx, item_idx = np.nonzero(train_data)
        true_ratings = train_data[user_idx, item_idx].astype(np.float32)

        # Normalize ratings
        mean_rating = np.mean(true_ratings)
        std_rating = np.std(true_ratings)
        true_ratings_norm = (true_ratings - mean_rating) / std_rating

        # Create a dataset pipeline
        dataset = tf.data.Dataset.from_tensor_slices((user_idx, item_idx, true_ratings_norm))
        dataset = dataset.shuffle(buffer_size=100000).batch(512)

        for epoch in range(epochs):
            total_loss = 0
            num_batches = 0
            for batch_user_idx, batch_item_idx, batch_ratings in dataset:
                with tf.GradientTape() as tape:
                    pred_ratings = model((batch_user_idx, batch_item_idx))
                    loss = model.compute_loss(batch_ratings, pred_ratings)
                gradients = tape.gradient(loss, model.trainable_variables)
                optimizer.apply_gradients(zip(gradients, model.trainable_variables))
                total_loss += loss.numpy()
                num_batches += 1
            avg_loss = total_loss / num_batches
            if epoch % 10 == 0:
                print(f"Epoch {epoch}, Loss: {avg_loss:.4f}")

        # Evaluate the model
        rmse, mae, precision, recall, f1 = evaluate_model(model, test_data, mean_rating, std_rating)
        print(f"Fold {fold + 1} Results -> RMSE: {rmse:.4f}, MAE: {mae:.4f}, Precision: {precision:.4f}, Recall: {recall:.4f}, F1-score: {f1:.4f}")
        fold_results.append((rmse, mae, precision, recall, f1))

    # Compute average metrics across folds
    fold_results = np.array(fold_results)
    avg_metrics = np.mean(fold_results, axis=0)
    std_metrics = np.std(fold_results, axis=0)

    print("\nCross-Validation Results:")
    print(f"RMSE: {avg_metrics[0]:.4f} ± {std_metrics[0]:.4f}")
    print(f"MAE: {avg_metrics[1]:.4f} ± {std_metrics[1]:.4f}")
    print(f"Precision: {avg_metrics[2]:.4f} ± {std_metrics[2]:.4f}")
    print(f"Recall: {avg_metrics[3]:.4f} ± {std_metrics[3]:.4f}")
    print(f"F1-score: {avg_metrics[4]:.4f} ± {std_metrics[4]:.4f}")

    return avg_metrics, std_metrics

# Align users across datasets
if isinstance(user_item_matrix_books, pd.DataFrame):
    user_item_matrix_books = user_item_matrix_books.values
if isinstance(user_item_matrix_dvds, pd.DataFrame):
    user_item_matrix_dvds = user_item_matrix_dvds.values

num_users_books, num_items_books = user_item_matrix_books.shape
num_users_dvds, num_items_dvds = user_item_matrix_dvds.shape

if num_users_books != num_users_dvds:
    print("Aligning users between Books and DVDs datasets...")
    common_users = min(num_users_books, num_users_dvds)
    user_item_matrix_books = user_item_matrix_books[:common_users, :]
    user_item_matrix_dvds = user_item_matrix_dvds[:common_users, :]

num_users, num_items_books = user_item_matrix_books.shape
_, num_items_dvds = user_item_matrix_dvds.shape
num_features = 50

# Pretrain on Books
print("Training on Books dataset for transfer learning...")
model_books = OrthogonalNMF(num_users, num_items_books, num_features, reg_lambda=0.001, ortho_lambda=0.01)
optimizer_books = tf.keras.optimizers.Adam(learning_rate=0.001)

user_idx_books, item_idx_books = np.nonzero(user_item_matrix_books)
true_ratings_books = user_item_matrix_books[user_idx_books, item_idx_books].astype(np.float32)
mean_rating_books, std_rating_books = np.mean(true_ratings_books), np.std(true_ratings_books)
true_ratings_books_norm = (true_ratings_books - mean_rating_books) / std_rating_books

dataset_books = tf.data.Dataset.from_tensor_slices((user_idx_books, item_idx_books, true_ratings_books_norm))
dataset_books = dataset_books.shuffle(buffer_size=100000).batch(512)

for epoch in range(50):
    total_loss = 0
    for batch_user_idx, batch_item_idx, batch_ratings in dataset_books:
        with tf.GradientTape() as tape:
            pred_ratings = model_books((batch_user_idx, batch_item_idx))
            loss = model_books.compute_loss(batch_ratings, pred_ratings)
        gradients = tape.gradient(loss, model_books.trainable_variables)
        optimizer_books.apply_gradients(zip(gradients, model_books.trainable_variables))
        total_loss += loss.numpy()
    print(f"Books Pretraining - Epoch {epoch + 1}, Loss: {total_loss:.4f}")

pretrained_user_features = model_books.user_features.numpy()

# Train on DVDs using transfer learning
print("\nStarting training on DVDs dataset using transfer learning...")
model_dvds = OrthogonalNMF(num_users, num_items_dvds, num_features, reg_lambda=0.001, ortho_lambda=0.01)
model_dvds.user_features.assign(pretrained_user_features)

# Perform cross-validation on DVDs
avg_metrics, std_metrics = train_model_cross_val(
    user_item_matrix_dvds,
    num_folds=5,
    epochs=100,
    learning_rate=0.001,
    num_features=num_features,
    reg_lambda=0.001,
    ortho_lambda=0.01
)


# **SNMF**

In [None]:
# Define NMF model with biases
class StandardNMF(tf.keras.Model):
    def __init__(self, num_users, num_items, num_features, regularization=0.01):
        super(StandardNMF, self).__init__()
        self.num_users = num_users
        self.num_items = num_items
        self.num_features = num_features
        self.regularization = regularization
        self.user_features = self.add_weight(
            shape=(num_users, num_features),
            initializer='random_uniform',
            trainable=True,
            name='user_features'
        )
        self.item_features = self.add_weight(
            shape=(num_items, num_features),
            initializer='random_uniform',
            trainable=True,
            name='item_features'
        )
        self.user_bias = self.add_weight(
            shape=(num_users,),
            initializer='zeros',
            trainable=True,
            name='user_bias'
        )
        self.item_bias = self.add_weight(
            shape=(num_items,),
            initializer='zeros',
            trainable=True,
            name='item_bias'
        )
        self.global_bias = self.add_weight(
            shape=(),
            initializer='zeros',
            trainable=True,
            name='global_bias'
        )

    def call(self, inputs):
        user_idx, item_idx = inputs
        user_features = tf.gather(self.user_features, user_idx)
        item_features = tf.gather(self.item_features, item_idx)
        user_bias = tf.gather(self.user_bias, user_idx)
        item_bias = tf.gather(self.item_bias, item_idx)
        dot_product = tf.reduce_sum(user_features * item_features, axis=1)
        return dot_product + user_bias + item_bias + self.global_bias

    def compute_loss(self, true_ratings, pred_ratings):
        base_loss = tf.reduce_mean(tf.square(true_ratings - pred_ratings))
        reg_loss = self.regularization * (
            tf.nn.l2_loss(self.user_features) + tf.nn.l2_loss(self.item_features) +
            tf.nn.l2_loss(self.user_bias) + tf.nn.l2_loss(self.item_bias)
        )
        return base_loss + reg_loss


# **NSNMF**

In [None]:
# Function to apply non-smooth NMF
def apply_non_smooth_nmf(data, n_components=50, max_iter=1000, alpha=0.0, l1_ratio=0.0, init_W=None):
    model = NMF(
        n_components=n_components,
        init='custom' if init_W is not None else 'nndsvda',
        solver='cd',
        beta_loss='frobenius',
        max_iter=max_iter,
        alpha_W=alpha,
        alpha_H=alpha,
        l1_ratio=l1_ratio,
        random_state=42
    )
    if init_W is not None:
        H_init = np.abs(np.random.rand(n_components, data.shape[1])) + 1e-6
        model.fit(data, W=init_W, H=H_init)
    else:
        model.fit(data)
    return model

# Function to evaluate metrics
def evaluate_metrics(target, prediction, threshold=None):
    observed_mask = target > 0
    target_observed = target[observed_mask]
    prediction_observed = prediction[observed_mask]

    rmse_value = sqrt(mean_squared_error(target_observed, prediction_observed))
    mae_value = mean_absolute_error(target_observed, prediction_observed)

    if threshold is None:
        threshold = np.mean(prediction_observed)

    binary_target = (target_observed >= threshold).astype(int)
    binary_prediction = (prediction_observed >= threshold).astype(int)
    precision, recall, f1_score, _ = precision_recall_fscore_support(
        binary_target, binary_prediction, average='binary', zero_division=0
    )
    return rmse_value, mae_value, precision, recall, f1_score

# Function to filter sparse data
def filter_sparse_data(ratings, min_user_ratings=5, min_item_ratings=5):
    user_counts = np.sum(ratings > 0, axis=1)
    user_filter = user_counts >= min_user_ratings
    ratings_filtered = ratings[user_filter, :]

    item_counts = np.sum(ratings_filtered > 0, axis=0)
    item_filter = item_counts >= min_item_ratings
    ratings_filtered = ratings_filtered[:, item_filter]

    return ratings_filtered
