# 1. Load Trained SGLD Model


In [None]:
from google.colab import drive
import os

# Google Drive Mount
drive.mount('/content/drive')

# Define Dataset Path
dataset_path = "/content/drive/MyDrive/Netflix_Dataset/"

# Check Files in Dataset Folder
print("📂 Files in dataset directory:", os.listdir(dataset_path))

Mounted at /content/drive
📂 Files in dataset directory: ['README', 'combined_data_3.txt', 'combined_data_4.txt', 'probe.txt', 'qualifying.txt', 'combined_data_1.txt', 'movie_titles.csv', 'combined_data_2.txt', 'combined_data_1_fixed.csv', 'movies_data_fixed.csv', 'sparse_matrix.npz', 'U_sgld.npy', 'V_sgld.npy', 'U_sgd.npy', 'V_sgd.npy', 'U_als.npy', 'V_als.npy']


In [None]:
from sklearn.metrics import mean_squared_error, mean_absolute_error
import numpy as np
from scipy.sparse import load_npz

#  Load Ground Truth Ratings (Sparse Matrix)
sparse_matrix_path = "/content/drive/MyDrive/Netflix_Dataset/sparse_matrix.npz"
rating_matrix_sparse = load_npz(sparse_matrix_path)

#  Load SGLD-trained matrices
U_sgld = np.load("/content/drive/MyDrive/Netflix_Dataset/U_sgld.npy")
V_sgld = np.load("/content/drive/MyDrive/Netflix_Dataset/V_sgld.npy")

#  Extract non-zero indices (Only compare known ratings)
user_indices, movie_indices = rating_matrix_sparse.nonzero()
actual_ratings = rating_matrix_sparse.data  # 실제 평점

# Predicted ratings using the dot product of U and V
predicted_ratings = np.sum(U_sgld[user_indices] * V_sgld[movie_indices], axis=1)

# RMSE (Root Mean Squared Error)
rmse = np.sqrt(mean_squared_error(actual_ratings, predicted_ratings))

# MAE (Mean Absolute Error)
mae = mean_absolute_error(actual_ratings, predicted_ratings)

# Display evaluation metrics
print(f" SGLD RMSE: {rmse:.4f}")
print(f" SGLD MAE: {mae:.4f}")


 SGLD RMSE: 0.9671
 SGLD MAE: 0.7289


In [None]:
def precision_at_k(U, V, rating_matrix_sparse, k=10):
    """
    Precision@K evaluation function:
    - Measures how many of the top-rated movies by the user are included in the recommended list.
    """
    user_indices, movie_indices = rating_matrix_sparse.nonzero()
    actual_ratings = rating_matrix_sparse.toarray()

    precision_scores = []

    for user_id in np.unique(user_indices):
        # Compute predicted ratings for the user
        predicted_scores = np.dot(U[user_id], V.T)

        # Retrieve movies that the user rated highly (rating ≥ 4)
        actual_top_movies = set(np.where(actual_ratings[user_id] >= 4)[0])

        # Recommend the top K movies based on predicted ratings
        recommended_top_movies = set(np.argsort(predicted_scores)[-k:])

        # Calculate Precision
        precision = len(actual_top_movies & recommended_top_movies) / k
        precision_scores.append(precision)

    return np.mean(precision_scores)


# Compute Precision@10
precision_sgld = precision_at_k(U_sgld, V_sgld, rating_matrix_sparse, k=10)
print(f"SGLD Precision@10: {precision_sgld:.4f}")


SGLD Precision@10: 0.0250


# ✅ Other Model In processing

## 1. SGD (Stochastic Gradient Descent)


## 2. ALS (Alternating Least Squares)


In [None]:
# 1. Load Required Libraries
import numpy as np
import pandas as pd
import time
from scipy.sparse import load_npz, csr_matrix
from sklearn.metrics import mean_squared_error, mean_absolute_error

# 2. Mount Google Drive and Load Data
from google.colab import drive
drive.mount('/content/drive')

dataset_path = "/content/drive/MyDrive/Netflix_Dataset/"
sparse_matrix_path = dataset_path + "sparse_matrix.npz"

# 3. Load Sparse Matrix
rating_matrix_sparse = load_npz(sparse_matrix_path)
rating_matrix_sparse = csr_matrix(rating_matrix_sparse)  # Conversion if needed

print(f"✅ Sparse matrix loaded! Shape: {rating_matrix_sparse.shape}")

# 4. Check Dataset Information
num_users, num_movies = rating_matrix_sparse.shape
latent_dim = 50  # Number of latent dimensions

# 5. SGD Training Function
def sgd_train(U, V, rating_matrix_sparse, learning_rate=0.01, epochs=10, batch_size=5000):
    """
    Stochastic Gradient Descent (SGD) for Matrix Factorization
    """
    start_time = time.time()
    user_indices, movie_indices = rating_matrix_sparse.nonzero()
    ratings = rating_matrix_sparse.data

    for epoch in range(epochs):
        shuffled_indices = np.random.permutation(len(user_indices))

        for i in range(0, len(shuffled_indices), batch_size):
            batch_indices = shuffled_indices[i : i + batch_size]

            # Prepare Mini-batch Data
            user_batch = user_indices[batch_indices]
            movie_batch = movie_indices[batch_indices]
            rating_batch = ratings[batch_indices]

            # Compute Predictions and Errors
            pred_batch = np.sum(U[user_batch] * V[movie_batch], axis=1)
            error_batch = rating_batch - pred_batch

            # Gradient Descent Update
            grad_U = -error_batch[:, np.newaxis] * V[movie_batch]
            grad_V = -error_batch[:, np.newaxis] * U[user_batch]

            U[user_batch] -= learning_rate * grad_U
            V[movie_batch] -= learning_rate * grad_V

        print(f"✅ SGD Epoch {epoch+1}/{epochs} completed.")

    elapsed_time = time.time() - start_time
    print(f"🎯 SGD Training Completed in {elapsed_time:.2f} seconds.")
    return U, V

# 6. ALS Training Function
def als_train(rating_matrix_sparse, latent_dim=50, epochs=10, reg_lambda=0.1):
    """
    Alternating Least Squares (ALS) for Matrix Factorization
    """
    start_time = time.time()
    num_users, num_movies = rating_matrix_sparse.shape

    #  Random Initialization
    U = np.random.normal(0, 0.1, (num_users, latent_dim))
    V = np.random.normal(0, 0.1, (num_movies, latent_dim))

    # ALS Iterative Optimization
    for epoch in range(epochs):
        # Update User Matrix U
        for u in range(num_users):
            idx = rating_matrix_sparse[u, :].nonzero()[1]
            if len(idx) == 0:
                continue
            V_sub = V[idx]
            ratings = rating_matrix_sparse[u, idx].toarray().flatten()
            U[u] = np.linalg.solve(V_sub.T @ V_sub + reg_lambda * np.eye(latent_dim), V_sub.T @ ratings)

        # Update Movie Matrix V
        for v in range(num_movies):
            idx = rating_matrix_sparse[:, v].nonzero()[0]
            if len(idx) == 0:
                continue
            U_sub = U[idx]
            ratings = rating_matrix_sparse[idx, v].toarray().flatten()
            V[v] = np.linalg.solve(U_sub.T @ U_sub + reg_lambda * np.eye(latent_dim), U_sub.T @ ratings)

        print(f" ALS Epoch {epoch+1}/{epochs} completed.")

    elapsed_time = time.time() - start_time
    print(f" ALS Training Completed in {elapsed_time:.2f} seconds.")
    return U, V

# 7. Execute Model Training
#  Run SGD
U_sgd = np.random.normal(0, 0.1, (num_users, latent_dim))
V_sgd = np.random.normal(0, 0.1, (num_movies, latent_dim))
U_sgd, V_sgd = sgd_train(U_sgd, V_sgd, rating_matrix_sparse)

#  Run ALS
U_als, V_als = als_train(rating_matrix_sparse)

# 8. RMSE and MAE Evaluation Function
def evaluate_model(U, V, rating_matrix_sparse):
    user_indices, movie_indices = rating_matrix_sparse.nonzero()
    ratings = rating_matrix_sparse.data

    pred_ratings = np.sum(U[user_indices] * V[movie_indices], axis=1)
    rmse = np.sqrt(mean_squared_error(ratings, pred_ratings))
    mae = mean_absolute_error(ratings, pred_ratings)

    return rmse, mae

# 9. Evaluate Model Performance
rmse_sgd, mae_sgd = evaluate_model(U_sgd, V_sgd, rating_matrix_sparse)
rmse_als, mae_als = evaluate_model(U_als, V_als, rating_matrix_sparse)

print(f" SGD RMSE: {rmse_sgd:.4f}, MAE: {mae_sgd:.4f}")
print(f" ALS RMSE: {rmse_als:.4f}, MAE: {mae_als:.4f}")

# 10. Save Trained Models
np.save(dataset_path + "U_sgd.npy", U_sgd)
np.save(dataset_path + "V_sgd.npy", V_sgd)
np.save(dataset_path + "U_als.npy", U_als)
np.save(dataset_path + "V_als.npy", V_als)

print("Models saved successfully!")


Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).
✅ Sparse matrix loaded! Shape: (470758, 4499)
✅ SGD Epoch 1/10 completed.
✅ SGD Epoch 2/10 completed.
✅ SGD Epoch 3/10 completed.
✅ SGD Epoch 4/10 completed.
✅ SGD Epoch 5/10 completed.
✅ SGD Epoch 6/10 completed.
✅ SGD Epoch 7/10 completed.
✅ SGD Epoch 8/10 completed.
✅ SGD Epoch 9/10 completed.
✅ SGD Epoch 10/10 completed.
🎯 SGD Training Completed in 201.45 seconds.
✅ ALS Epoch 1/10 completed.
✅ ALS Epoch 2/10 completed.
✅ ALS Epoch 3/10 completed.
✅ ALS Epoch 4/10 completed.
✅ ALS Epoch 5/10 completed.
✅ ALS Epoch 6/10 completed.
✅ ALS Epoch 7/10 completed.
✅ ALS Epoch 8/10 completed.
✅ ALS Epoch 9/10 completed.
✅ ALS Epoch 10/10 completed.
🎯 ALS Training Completed in 5807.02 seconds.
📊 SGD RMSE: 0.6496, MAE: 0.4992
📊 ALS RMSE: 0.5274, MAE: 0.3781
Models saved successfully!
