### Loading Data

In [1]:
from models.csv_loader import CSVLoader
from models.products.product_registry import ProductRegistry
from models.products.product_mapping_row import ProductMappingRow
from models.products.product_row import ProductRow

product_registry = ProductRegistry(CSVLoader(ProductRow).read(), CSVLoader(ProductMappingRow).read())

In [2]:
from models.users.user_registry import UserRegistry
from models.users.user_mapping_row import UserMappingRow
from models.users.user_row import UserRow

user_registry = UserRegistry(CSVLoader(UserRow).read(), CSVLoader(UserMappingRow).read())

In [3]:
from models.ratings.rating_registry import RatingRegistry
from models.ratings.rating_row import RatingRow

rating_registry = RatingRegistry(CSVLoader(RatingRow).read(), user_registry, product_registry)

### Loading Recommender

In [4]:
from recommendation.recommenders.random_recommender import RandomRecommender
recommender = RandomRecommender(product_registry, user_registry, rating_registry)

### Generating Recommendations top-10 paths for all users

In [5]:
from tqdm import tqdm

def recommendation_for_all_users(user_registry, recommender, last_user:int=6039):
    """Generating the top-10 recommendation from first user up to 'last_user'.
    The generated recommendation are saved in the format Dict[int, list[int]] with the
    key being the user uid and the list[int] being the top-10 recommended products.
    
    Note that 6039 is the last uid you can use  for 'last_user' if there are no additional users.
    
    param:
        user_registry: a UserRegistry class instanciated.
        recommender: a class derived from AbstractRecommender.
        last_user: the int representing the range from users you want to evaluate.
        
    return:
        users_recommendation: Dict[int, list[int]]; A dictionary containing the user uid until 
        the 'last_user' and the values being the top-10 recommendation for each user uid.''
    """
    users_recommendation = {}
    
    for uid in tqdm(range(1,last_user+1)):
        target_user = user_registry.find_by_uid(uid)
        recommendation_paths = recommender.recommend(target_user, k=10)
        recommended_nodes = [int(reco_path.recommendation[1].entity_id) for reco_path in recommendation_paths]
        users_recommendation[uid] = recommended_nodes
        
    return users_recommendation


## Testing the time execution
# import time

# def recommendation_for_all_users(user_registry, recommender, nbr=):
#     users_recommendation = {}
    
#     start_time = time.time()
    
#     for uid in tqdm(range(1, 10)):
#         loop_start_time = time.time()

#         # Measure time for finding the target user
#         find_user_start = time.time()
#         target_user = user_registry.find_by_uid(uid)
#         find_user_end = time.time()
#         print(f"Time to find user {uid}: {find_user_end - find_user_start:.4f} seconds")

#         # Measure time for generating recommendations
#         recommend_start = time.time()
#         recommendation_paths = recommender.recommend(target_user, k=10)
#         recommend_end = time.time()
#         print(f"Time to recommend for user {uid}: {recommend_end - recommend_start:.4f} seconds")

#         # Measure time for processing recommendation paths
#         process_start = time.time()
#         recommended_nodes = [int(reco_path.recommendation[1].entity_id) for reco_path in recommendation_paths]
#         users_recommendation[uid] = recommended_nodes
#         process_end = time.time()
#         print(f"Time to process recommendations for user {uid}: {process_end - process_start:.4f} seconds")
        
#         loop_end_time = time.time()
#         print(f"Total time for user {uid}: {loop_end_time - loop_start_time:.4f} seconds")

#     total_time = time.time() - start_time
#     print(f"Total time for all users: {total_time:.4f} seconds")
    
#     return users_recommendation

In [6]:
topk_items = recommendation_for_all_users(user_registry=user_registry, recommender=recommender, last_user=4)

100%|████████████████████████████████████████████████████████████████████████████████████| 4/4 [01:04<00:00, 16.06s/it]


In [7]:
print(topk_items)

{1: [1218, 2165, 2613, 1151, 821, 492, 2585, 546, 2147, 1230], 2: [2396, 694, 773, 1880, 2689, 209, 2752, 465, 2261, 1189], 3: [2418, 1994, 2099, 611, 411, 2338, 1030, 1266, 1861, 200], 4: [2738, 2200, 2662, 2727, 2491, 1231, 1476, 466, 86, 631]}


### Evaluation

In [11]:
from metrics_alone.eval_all_metrics import evaluate_rec_quality

out = evaluate_rec_quality(dataset_name="ml1m", 
                     topk_items=topk_items)

Evaluating rec quality for None: 100%|███████████████████████████████████████████████████████████| 4/4 [00:00<?, ?it/s]

Number of users: 4, average topk size: 10.00
ndcg: 0.0, mrr: 0.0, precision: 0.0, recall: 0.0, serendipity: 1.0, diversity: 0.5, novelty: 0.96, coverage: 0.01





In [12]:
out

({'ndcg': [0.0, 0.0, 0.0, 0.0],
  'mrr': [0.0, 0.0, 0.0, 0.0],
  'precision': [0.0, 0.0, 0.0, 0.0],
  'recall': [0.0, 0.0, 0.0, 0.0],
  'serendipity': [1.0, 1.0, 1.0, 1.0],
  'diversity': [0.5, 0.5, 0.4, 0.6],
  'novelty': [0.9390083217753121,
   0.9592233009708739,
   0.9765603328710126,
   0.9643550624133148]},
 {'ndcg': 0.0,
  'mrr': 0.0,
  'precision': 0.0,
  'recall': 0.0,
  'serendipity': 1.0,
  'diversity': 0.5,
  'novelty': 0.9597867545076284,
  'coverage': 0.01340931947703654})

In [9]:
# Verifying that we obtain the same results compared to the initial github
# If we use their pgpr recommendation file "top10_items.pkl"
# We should obtain the following : 
#ndcg: 0.25, mrr: 0.18, precision: 0.09, recall: 0.04, serendipity: 0.72, diversity: 0.85, novelty: 0.92, coverage: 0.39

import pickle
with open(f"top10_items.pkl", 'rb') as pred_top_items_file:
    pgpr_item_topks = pickle.load(pred_top_items_file)
    
out = evaluate_rec_quality(dataset_name="ml1m", 
                     topk_items=pgpr_item_topks)

Evaluating rec quality for None: 100%|██████████████████████████████████████████| 6040/6040 [00:00<00:00, 11104.97it/s]

Number of users: 6040, average topk size: 10.00
ndcg: 0.25, mrr: 0.18, precision: 0.09, recall: 0.04, serendipity: 0.72, diversity: 0.4, novelty: 0.92, coverage: 0.39





In [None]:
out