In [1]:
import numpy as np
import pandas as pd
import scipy as sp
from sklearn.decomposition import NMF
from sklearn.metrics import mean_squared_error, mean_absolute_error
from sklearn.preprocessing import MinMaxScaler

In [None]:
# Create pivot table for user-item ratings matrix
def create_pivot_table(df):
    pv = df.pivot_table(index=['user_id'], columns=['game_name'], values='rating')

    # Normalize the ratings to ensure non-negative values for NMF
    scaler = MinMaxScaler()  # Using MinMaxScaler to scale values between 0 and 1
    pv_scaled = pd.DataFrame(scaler.fit_transform(pv.fillna(0)), columns=pv.columns, index=pv.index)

    # Fill missing values with 0 (indicating no rating)
    pv_scaled = pv_scaled.fillna(0)

    # Convert the pivot table to a sparse matrix format (CSR format)
    pv_sparse = sp.sparse.csr_matrix(pv_scaled.values)
    
    return pv_sparse, pv_scaled

# Function to calculate RMSE
def calculate_rmse(actual, predicted):
    # Convert sparse matrix to dense
    actual_dense = actual.toarray() if sp.sparse.issparse(actual) else actual
    predicted_dense = predicted.toarray() if sp.sparse.issparse(predicted) else predicted
    return np.sqrt(mean_squared_error(actual_dense, predicted_dense))

# Function to calculate MAE
def calculate_mae(actual, predicted):
    # Convert sparse matrix to dense
    actual_dense = actual.toarray() if sp.sparse.issparse(actual) else actual
    predicted_dense = predicted.toarray() if sp.sparse.issparse(predicted) else predicted
    return mean_absolute_error(actual_dense, predicted_dense)

# Function to calculate Recall@k
def recall_at_k(y_true, y_pred, k):
    recall_scores = []
    for true, pred in zip(y_true.toarray(), y_pred):  # Convert y_true to dense format
        # Get top-k predictions
        top_k_preds = np.argsort(pred)[::-1][:k]
        relevant_items = np.where(true > 0)[0]  # assuming non-zero values are relevant items
        # Calculate recall at k
        recall = len(np.intersect1d(top_k_preds, relevant_items)) / len(relevant_items) if len(relevant_items) > 0 else 0
        recall_scores.append(recall)
    return np.mean(recall_scores)

# Function to fit NMF model and evaluate metrics
def nmf_model_evaluation(pv_sparse, n_components=450, max_iter=100, tol=1e-1, k=5):
    # Initialize and fit NMF model
    nmf_model = NMF(n_components=n_components, max_iter=max_iter, tol=tol, init='random', random_state=42)
    nmf_matrix = nmf_model.fit_transform(pv_sparse)
    predicted_ratings = np.dot(nmf_matrix, nmf_model.components_)

    # Calculate RMSE, MAE
    rmse = calculate_rmse(pv_sparse, predicted_ratings)
    mae = calculate_mae(pv_sparse, predicted_ratings)
    
    # Evaluate Recall@k
    recall_k = recall_at_k(pv_sparse, predicted_ratings, k)

    return rmse, mae, recall_k, predicted_ratings