In [11]:
import pandas as pd
import numpy as np
import torch
from scipy.sparse import coo_matrix
import os


input_dir = '/mnt/workspace/Book-Rec-Sys/input/dataset'

def load_data():
    ratings = pd.read_csv(f'{input_dir}/ratings.csv')
    print("Ratings Data Loaded")
    print(ratings.head())
    return ratings

ratings = load_data()

num_users = ratings['user_id'].max()
num_books = ratings['book_id'].max()

# Convert to a sparse matrix
rows = ratings['user_id'] - 1
cols = ratings['book_id'] - 1
values = ratings['rating']
rating_matrix = coo_matrix((values, (rows, cols)), shape=(num_users, num_books))

Ratings Data Loaded
   user_id  book_id  rating
0        1      258       5
1        2     4081       4
2        2      260       5
3        2     9296       5
4        2     2318       3


In [9]:
def load_matrices(step, output_dir):
    P_path = os.path.join(output_dir, f'P_step_{step}.pt')
    Q_path = os.path.join(output_dir, f'Q_step_{step}.pt')

    if os.path.exists(P_path) and os.path.exists(Q_path):
        P = torch.load(P_path)
        Q = torch.load(Q_path)
        return P, Q.T
    else:
        print(f"Files for step {step} not found.")
        return None, None


output_dir = '/mnt/workspace/Book-Rec-Sys/output'  # Update with your directory path
steps = range(0, 2251, 250) 

for step in steps:
    P, Q = load_matrices(step, output_dir)
    if P is not None and Q is not None:
        print(f"Matrices on step {step} loaded successfully.")
        # You can now use P and Q for further computations

Matrices on step 0 loaded successfully.
Matrices on step 250 loaded successfully.
Matrices on step 500 loaded successfully.
Matrices on step 750 loaded successfully.
Matrices on step 1000 loaded successfully.
Matrices on step 1250 loaded successfully.
Matrices on step 1500 loaded successfully.
Matrices on step 1750 loaded successfully.
Matrices on step 2000 loaded successfully.
Matrices on step 2250 loaded successfully.


In [13]:
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
for step in steps:
    P, Q = load_matrices(step, output_dir)
    if P is not None and Q is not None:
        print(f"Matrices on step {step} loaded successfully.")
        # You can now use P and Q for further computations

    predicted_ratings = torch.matmul(P, Q.T)
    actual_ratings = torch.FloatTensor(rating_matrix.toarray()).to(device)  # Convert to PyTorch tensor
    mask = actual_ratings > 0

    # Ensure both tensors are on the same device before subtraction
    error = torch.sqrt(torch.mean((actual_ratings[mask] - predicted_ratings[mask]) ** 2))

    print("Prediction Error:", error.item())  # Convert to Python scalar for printing

Matrices on step 0 loaded successfully.
Prediction Error: 3.0257887840270996
Matrices on step 250 loaded successfully.
Prediction Error: 0.9021819233894348
Matrices on step 500 loaded successfully.
Prediction Error: 0.836444079875946
Matrices on step 750 loaded successfully.
Prediction Error: 0.8191856741905212
Matrices on step 1000 loaded successfully.
Prediction Error: 0.8147655129432678
Matrices on step 1250 loaded successfully.
Prediction Error: 0.8132274150848389
Matrices on step 1500 loaded successfully.
Prediction Error: 0.8125643134117126
Matrices on step 1750 loaded successfully.
Prediction Error: 0.8122180700302124
Matrices on step 2000 loaded successfully.
Prediction Error: 0.8119957447052002
Matrices on step 2250 loaded successfully.
Prediction Error: 0.8118148446083069
