# SVD

In [6]:
import numpy as np
import pandas as pd
from scipy.sparse import csr_matrix
from scipy.sparse.linalg import svds
from sklearn.metrics import mean_squared_error
from torch.utils.data import DataLoader
import torch
import yaml

import dataset
import utils

config_path = '../config/config.yaml'
config = utils.load_config(config_path)

train_path = config['data_config']['train_path']
test_path = config['data_config']['test_path']
num_users = config['data_config']['num_users']
num_items = config['data_config']['num_items']


def load_data_to_matrix(dataset):
    """ Convert MovieLens dataset to a sparse matrix using the reindexed user and item IDs """

    # Create lists for rows, cols, and data to build the sparse matrix
    rows = []
    cols = []
    data = []

    for idx in range(len(dataset)):
        user_idx, item_idx, rating = dataset[idx]

        rows.append(user_idx.item() - 1)
        cols.append(item_idx.item() - 1)
        data.append(rating.item())

    # Build the ratings matrix as a CSR matrix
    return csr_matrix((data, (rows, cols)), shape=(num_users, num_items))


train_dataset = dataset.MovieLensDataset(data_path=train_path)
test_dataset = dataset.MovieLensDataset(data_path=test_path)

train_loader = DataLoader(train_dataset, batch_size=None, shuffle=False)
test_loader = DataLoader(test_dataset, batch_size=None, shuffle=False)

# Load data to matrices
train_matrix = load_data_to_matrix(train_dataset)
test_matrix = load_data_to_matrix(test_dataset)

# Perform SVD
U, sigma, Vt = svds(train_matrix, k=4)  # k is the number of latent factors
print(U.shape)
print(Vt.shape)
print(sigma)
sigma = np.diag(sigma)

# Predict ratings
all_user_predicted_ratings = np.dot(np.dot(U, sigma), Vt)

# Evaluate the model
test_nonzero = test_matrix.nonzero()
test_actual = test_matrix[test_nonzero].A1
test_predictions = all_user_predicted_ratings[test_nonzero].flatten()

rmse = np.sqrt(mean_squared_error(test_actual, test_predictions))
print(f'Test RMSE: {rmse}')


(943, 4)
(4, 1682)
[137.38630633 182.96028336 207.15151185 525.77314725]
Test RMSE: 2.971723570289332
