In [1]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [2]:
import numpy as np
import pandas as pd
from scipy.linalg import svd
from sklearn.metrics import mean_squared_error, mean_absolute_error

In [4]:
# Path to the dataset
ratings_path = '/content/drive/My Drive/MatrixCompletion/ratings.csv'
movies_path = '/content/drive/My Drive/MatrixCompletion/movies.csv'
tags_path = '/content/drive/My Drive/MatrixCompletion/tags.csv'
links_path = '/content/drive/My Drive/MatrixCompletion/links.csv'

# Load the dataset
ratings = pd.read_csv(ratings_path)
movies = pd.read_csv(movies_path)
tags = pd.read_csv(tags_path)
links = pd.read_csv(links_path)

In [5]:
# Create the ratings matrix (rows: users, columns: movies)
ratings_matrix = ratings.pivot(index='userId', columns='movieId', values='rating')
ratings_array = ratings_matrix.values

In [6]:
# M# Mask for observed entries
observed_mask = ~np.isnan(ratings_array)

In [7]:
# Fill NaN values with zeros for initialization
ratings_array[np.isnan(ratings_array)] = 0

In [8]:
# ADMM Parameters
lambda_reg = 0.1  # Regularization parameter
rho = 1.0  # Augmented Lagrangian parameter
max_iter = 100  # Maximum iterations
tol = 1e-4  # Convergence tolerance

In [9]:
# Initialize variables
X = np.copy(ratings_array)  # The completed matrix
Y = np.zeros_like(ratings_array)  # Dual variable
Z = np.zeros_like(ratings_array)  # Auxiliary variable

In [10]:
# ADMM iterations
for iteration in range(max_iter):
    # Update X (minimization step for the observed entries)
    X = np.where(observed_mask, (ratings_array + rho * (Z - Y)) / (1 + rho), Z - Y)

    # Update Z (proximal step for nuclear norm minimization)
    U, S, Vt = np.linalg.svd(X + Y, full_matrices=False)
    S_thresholded = np.maximum(S - lambda_reg / rho, 0)  # Shrinkage
    Z = U @ np.diag(S_thresholded) @ Vt

    # Update Y (dual variable update)
    Y += X - Z

    # Check convergence
    error = np.linalg.norm(X - Z, ord='fro') / np.linalg.norm(X, ord='fro')
    if error < tol:
        print(f"Converged in {iteration + 1} iterations")
        break

Converged in 2 iterations


In [11]:
# Clip predictions to the valid range [0.5, 5.0]
completed_matrix = np.clip(Z, 0.5, 5.0)

# Test mask (20% of observed entries)
np.random.seed(42)
train_mask = observed_mask & (np.random.rand(*ratings_array.shape) > 0.2)
test_mask = observed_mask & ~train_mask

# Extract predictions and actual values for the test set
test_predictions = completed_matrix[test_mask]
actual_test_ratings = ratings_array[test_mask]

# Compute RMSE and MAE
rmse = np.sqrt(mean_squared_error(actual_test_ratings, test_predictions))
mae = mean_absolute_error(actual_test_ratings, test_predictions)

print(f"ADMM Nuclear Norm Minimization RMSE: {rmse:.4f}")
print(f"ADMM Nuclear Norm Minimization MAE: {mae:.4f}")

ADMM Nuclear Norm Minimization RMSE: 0.9211
ADMM Nuclear Norm Minimization MAE: 0.8818
