In [1]:
import os
import numpy as np
import pandas as pd
from surprise import Dataset, Reader
from surprise.model_selection import train_test_split
from surprise import accuracy
from RBMAlgorithm import RBMAlgorithm
from RecommenderMetrics import RecommenderMetrics

from surprise.model_selection import LeaveOneOut
from surprise import KNNBaseline

2025-05-25 18:09:11.671666: I tensorflow/core/util/port.cc:153] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`.
2025-05-25 18:09:11.680406: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:467] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
E0000 00:00:1748196551.689264   52059 cuda_dnn.cc:8579] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
E0000 00:00:1748196551.692127   52059 cuda_blas.cc:1407] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered
W0000 00:00:1748196551.699119   52059 computation_placer.cc:177] computation placer already registered. Please check linkage and avoid linking 

In [2]:
def Evaluated(algo, data, n=5):

    fullTrainSet = data.build_full_trainset()
    fullAntiTestSet = fullTrainSet.build_anti_testset()
    
    #Build a "leave one out" train/test split for evaluating top-N recommenders
    #And build an anti-test-set for building predictions
    LOOCV = LeaveOneOut(n_splits=1, random_state=1)
    for train, test in LOOCV.split(data):
        LOOCVTrain = train
        LOOCVTest = test
        
    LOOCVAntiTestSet = LOOCVTrain.build_anti_testset()
    sim_options = {'name': 'cosine', 'user_based': False}
    simsAlgo = KNNBaseline(sim_options=sim_options)
    simsAlgo.fit(fullTrainSet)

    metrics = {}
    trainset, testset = train_test_split(data, test_size=0.25, random_state=1)
    algo.fit(trainset)
    predictions = algo.test(testset)
    # rmse = accuracy.rmse(predictions, verbose=True)
    # mae = accuracy.mae(predictions, verbose=True)
    metrics["RMSE"] = RecommenderMetrics.RMSE(predictions)
    metrics["MAE"] = RecommenderMetrics.MAE(predictions)

    algo.fit(LOOCVTrain)
    leftOutPredictions = algo.test(LOOCVTest)        
    # Build predictions for all ratings not in the training set
    allPredictions = algo.test(LOOCVAntiTestSet)
    # Compute top 10 recs for each user
    topNPredicted = RecommenderMetrics.GetTopN(allPredictions, n)
    # See how often we recommended a movie the user actually rated
    metrics["HR"] = RecommenderMetrics.HitRate(topNPredicted, leftOutPredictions)   
    # See how often we recommended a movie the user actually liked
    metrics["cHR"] = RecommenderMetrics.CumulativeHitRate(topNPredicted, leftOutPredictions)
    # Compute ARHR
    metrics["ARHR"] = RecommenderMetrics.AverageReciprocalHitRank(topNPredicted, leftOutPredictions)

    
    print("\n{:<10} {:<10} {:<10} {:<10} {:<10}".format(
            "RMSE", "MAE", "HR", "cHR", "ARHR"))
    print("{:<10.4f} {:<10.4f} {:<10.4f} {:<10.4f} {:<10.4f}".format(
            metrics["RMSE"], metrics["MAE"], metrics["HR"], metrics["cHR"], metrics["ARHR"]))
    print("\nLegend:\n")
    print("RMSE:      Root Mean Squared Error. Lower values mean better accuracy.")
    print("MAE:       Mean Absolute Error. Lower values mean better accuracy.")
    print("HR:        Hit Rate; how often we are able to recommend a left-out rating. Higher is better.")
    print("cHR:       Cumulative Hit Rate; hit rate, confined to ratings above a certain threshold. Higher is better.")
    print("ARHR:      Average Reciprocal Hit Rank - Hit rate that takes the ranking into account. Higher is better.\n" )

In [3]:

def recommend_for_user(algo, data, hotel_map, testSubject=85, k=10):
    
    trainSet = data.build_full_trainset()
    algo.fit(trainSet)
    
    testSet = GetAntiTestSetForUser(data, testSubject)

    predictions = algo.test(testSet)
    
    recommendations = []
    
    print ("\nI recommend for user "+str(testSubject)+": ")
    for userID, hotelID, actualRating, estimatedRating, _ in predictions:
        intHotelID = int(hotelID)
        recommendations.append((intHotelID, estimatedRating))
    
    recommendations.sort(key=lambda x: x[1], reverse=True)
    
    for ratings in recommendations[:k]:
        print(hotel_map[ratings[0]], ratings[1])

In [4]:
def GetAntiTestSetForUser(data, testSubject):
    trainset = data.build_full_trainset()
    fill = trainset.global_mean
    anti_testset = []
    u = trainset.to_inner_uid(str(testSubject))
    user_items = set([j for (j, _) in trainset.ur[u]])
    anti_testset += [(trainset.to_raw_uid(u), trainset.to_raw_iid(i), fill) for
                                i in trainset.all_items() if
                                i not in user_items]
    return anti_testset

In [5]:
testSubject = 1284  # Example user ID for testing

df = pd.read_csv("output.csv")
hotel_map = dict(zip(df["HotelID"], df["Name_Hotel"]))

reader = Reader(line_format='user item rating', sep=',', skip_lines=1)
data = Dataset.load_from_file("ratings.csv", reader=reader)

algo = RBMAlgorithm(epochs=20, hiddenDim=10, learningRate=0.01, batchSize=10)

# Evaluate the RBMAlgorithm
Evaluated(algo, data)
# Recommend hotels for the specified user
recommend_for_user(algo, data, hotel_map, testSubject=testSubject, k=5)

Estimating biases using als...
Computing the cosine similarity matrix...
Done computing similarity matrix.


I0000 00:00:1748196554.219189   52059 gpu_device.cc:2019] Created device /job:localhost/replica:0/task:0/device:GPU:0 with 1767 MB memory:  -> device: 0, name: NVIDIA GeForce RTX 3050 Laptop GPU, pci bus id: 0000:01:00.0, compute capability: 8.6


Trained epoch  0
Trained epoch  1
Trained epoch  2
Trained epoch  3
Trained epoch  4
Trained epoch  5
Trained epoch  6
Trained epoch  7
Trained epoch  8
Trained epoch  9
Trained epoch  10
Trained epoch  11
Trained epoch  12
Trained epoch  13
Trained epoch  14
Trained epoch  15
Trained epoch  16
Trained epoch  17
Trained epoch  18
Trained epoch  19
Processing user  0
Processing user  50
Trained epoch  0
Trained epoch  1
Trained epoch  2
Trained epoch  3
Trained epoch  4
Trained epoch  5
Trained epoch  6
Trained epoch  7
Trained epoch  8
Trained epoch  9
Trained epoch  10
Trained epoch  11
Trained epoch  12
Trained epoch  13
Trained epoch  14
Trained epoch  15
Trained epoch  16
Trained epoch  17
Trained epoch  18
Trained epoch  19
Processing user  0

RMSE       MAE        HR         cHR        ARHR      
1.0600     0.9309     0.0431     0.0431     0.0193    

Legend:

RMSE:      Root Mean Squared Error. Lower values mean better accuracy.
MAE:       Mean Absolute Error. Lower values mean 