In [1]:
import os
import numpy as np
import pandas as pd
from tqdm import tqdm

In [2]:
from dataset import YelpReviewsDataset
from recommendation_engine import CFARecommendationEngine, RandomRecommendationEngine

In [3]:
dataset = YelpReviewsDataset()

## RMSE Error

Given a dataset split, the RMSE error calculates the root mean squared prediction error of a recommendation engine on a sample of user ratings in the dataset split.

In [4]:
def rmse(model, dataset, mode='val', n_samples=64):
    errors = []
    ids = dataset.get_ids(mode)
    if n_samples is not None:
        ids = np.random.choice(ids, n_samples, replace=False)
    for id in tqdm(ids):
        data_point = dataset[id]
        predicted = model.predict_score(data_point['user'], data_point['item_id'])
        gt = dataset[id]['stars']
        errors.append(predicted - gt)
    errors = np.array(errors)[~np.isnan(errors)]
    return np.sqrt(np.mean(np.square(errors)))

In [5]:
random_model_rmse = rmse(RandomRecommendationEngine(), dataset, mode='val')
print("RMSE of Random Model: {}".format(random_model_rmse))

100%|██████████| 64/64 [00:29<00:00,  2.19it/s]

RMSE of Random Model: 2.0288399406493864





In [6]:
random_model_rmse = rmse(CFARecommendationEngine(df_review=dataset.dataset_train), dataset, mode='val')
print("RMSE of Aspect-based Collaborative Filtering Model: {}".format(random_model_rmse))

Initializing recommendation engine...


  user_vectors = user_vectors / np.linalg.norm(user_vectors, axis=1, keepdims=True)
  0%|          | 0/64 [00:00<?, ?it/s]

Recommendation engine initialization finished.


  out=out, **kwargs)
  ret = ret.dtype.type(ret / rcount)
  return w_uk / np.linalg.norm(w_uk)
  item_sim = np.dot(item_vector, review_vector)/(np.linalg.norm(item_vector) * np.linalg.norm(review_vector))
100%|██████████| 64/64 [08:01<00:00,  7.53s/it]


RMSE of Aspect-based Collaborative Filtering Model: 1.3068726595090912
