In [None]:
import tensorflow as tf

In [1]:
#import the necessary libraries
import pandas as pd
import numpy as np
from sklearn.decomposition import NMF
from sklearn.metrics import mean_squared_error
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import MaxAbsScaler

In [None]:
# Load datasets
compat_df = pd.read_csv('/data/compat.csv')
items_df = pd.read_csv('/data/items.csv')
recs_df = pd.read_csv('/data/recs.csv')

In [None]:
# Preprocessing
# Create a user-feature matrix
user_features = compat_df.pivot(index=compat_df.columns[0], columns=compat_df.columns[1], values=compat_df.columns[2])

# Create an item-feature matrix
item_features = items_df.pivot(index=items_df.columns[0], columns=items_df.columns[1], values=items_df.columns[2])

# Normalize user features
scaler = MaxAbsScaler()
user_features_scaled = scaler.fit_transform(user_features.fillna(0))

In [None]:
# Split the data into training and test sets
train_recs, test_recs = train_test_split(recs_df, test_size=0.2, random_state=42)

In [None]:
# Create user-item matrices for training and testing
train_matrix = train_recs.pivot(index=train_recs.columns[0], columns=train_recs.columns[1], values=train_recs.columns[2]).fillna(0)

test_matrix = test_recs.pivot(index=test_recs.columns[0], columns=test_recs.columns[1], values=test_recs.columns[2]).fillna(0)

In [None]:
# Build and train the model NMF (Non-negative Matrix Factorization)
nmf = NMF(n_components=15, init='random', random_state=0)
user_matrix = nmf.fit_transform(train_matrix)
item_matrix = nmf.components_

In [None]:
# Make predictions
train_predictions = np.dot(user_matrix, item_matrix)
test_predictions = np.dot(nmf.transform(test_matrix), item_matrix)


In [None]:
# Flatten the test matrix and predictions to compute MSE
non_zero_mask = test_matrix.values != 0
mse = mean_squared_error(test_matrix.values[non_zero_mask], test_predictions[non_zero_mask])
print(f'Mean Squared Error (MSE): {mse}')

In [None]:
# Function to compute nDCG at k for a set of predictions and true values
def ndcg_at_k(y_true, y_score, k=10):
    actual = np.argsort(y_true)[::-1][:k]
    predicted = np.argsort(y_score)[::-1][:k]
    idcg = np.sum(1.0/np.log2(np.arange(2, k + 2)))
    dcg = np.sum(1.0/np.log2(np.arange(2, k + 2)) * (y_true[predicted] == y_true[actual]))
    return dcg / idcg

In [None]:
# Calculate nDCG for the test set
ndcg_scores = []
for i in range(test_matrix.shape[0]):
    actual_ratings = test_matrix.values[i, :]
    predicted_ratings = test_predictions[i, :]
    if actual_ratings.sum() > 0:  # User has rated at least one item
        ndcg = ndcg_at_k(actual_ratings, predicted_ratings)
        ndcg_scores.append(ndcg)

average_ndcg = np.mean(ndcg_scores)
print(f'Average nDCG: {average_ndcg}')