In [18]:
import sys
import os

sys.path.append(os.path.abspath('..'))

from modules.adaptive.filters.rule_based import RuleBasedRecommender
from modules.adaptive.filters.collaborative import CollaborativeFiltering
from modules.adaptive.filters.content_based import ContentBasedFiltering
from pipeline import Pipeline

In [19]:
pipeline = Pipeline(
    ratings_path='../storage/u.data',
    items_path='../storage/u.item',
    users_path='../storage/u.user'
)
pipeline.load_dataset()

In [20]:
rule_recommender = RuleBasedRecommender(pipeline.ratings_df, pipeline.items_df)

print("Top Movies Overall:")
print(rule_recommender.recommend_top_movies(n=10))

genre = "Drama"
print(f"Top Movies in Genre: {genre}")
print(rule_recommender.recommend_by_genre(genre=genre, n=10))

Top Movies Overall:
   item_id  rating                                              title
0      814     5.0                      Great Day in Harlem, A (1994)
1     1599     5.0                      Someone Else's America (1995)
2     1201     5.0         Marlene Dietrich: Shadow and Light (1996) 
3     1122     5.0                     They Made Me a Criminal (1939)
4     1653     5.0  Entertaining Angels: The Dorothy Day Story (1996)
5     1293     5.0                                    Star Kid (1997)
6     1500     5.0                          Santa with Muscles (1996)
7     1189     5.0                                 Prefontaine (1997)
8     1536     5.0                               Aiqing wansui (1994)
9     1467     5.0               Saint of Fort Washington, The (1993)
Top Movies in Genre: Drama
   item_id    rating                                              title
0     1122  5.000000                     They Made Me a Criminal (1939)
1     1536  5.000000                   

In [21]:
ratings_file = '../storage/u.data'
metadata_file = '../storage/u.item'

In [22]:
cf_recommender = CollaborativeFiltering(
    ratings_file=ratings_file, metadata_file=metadata_file
)
cf_recommender.fit()
print("Collaborative Filtering RMSE:", cf_recommender.evaluate())

RMSE: 0.9384
Collaborative Filtering RMSE: 0.9383590945877811


In [23]:
cb_recommender = ContentBasedFiltering(ratings_file, metadata_file)
print("Content-Based Filtering RMSE:", cb_recommender.evaluate())

Content-Based Filtering RMSE: 1.7903511577811684


In [24]:
pipeline = Pipeline(
    ratings_path='../storage/u.data',
    items_path='../storage/u.item',
    users_path='../storage/u.user'
)
pipeline.load_dataset()
_, test_df = pipeline.partition_data(partition_type='user_stratified')

In [27]:
import numpy as np

def calculate_ndcg(recommended_items, relevant_items):
    dcg = 0
    for i, item in enumerate(recommended_items, start=1):
        if item in relevant_items:
            dcg += 1 / (np.log2(i + 1))

    ideal_dcg = sum(1 / (np.log2(i + 1)) for i in range(1, len(relevant_items) + 1))
    return dcg / ideal_dcg if ideal_dcg > 0 else 0.0

In [41]:
n = 10
user_precision_list = []
user_recall_list = []
user_ndcg_list = []

for user_id in test_df['user_id'].unique():
    relevant_items = set(
        test_df[(test_df['user_id'] == user_id) & (test_df['rating'] >= 4)]['item_id']
    )
    
    if not relevant_items:
        continue

    top_movies = rule_recommender.recommend_top_movies(n=n)['item_id'].tolist()

    precision = len([item for item in top_movies if item in relevant_items]) / n
    recall = len([item for item in top_movies if item in relevant_items]) / len(relevant_items)
    ndcg = calculate_ndcg(top_movies, relevant_items)

    user_precision_list.append(precision)
    user_recall_list.append(recall)
    user_ndcg_list.append(ndcg)

# Aggregate user-specific metrics
rule_precision = np.mean(user_precision_list)
rule_recall = np.mean(user_recall_list)
rule_ndcg = np.mean(user_ndcg_list)

print("Rule-Based Evaluation (User-Level):")
print(f"Average Precision@{n}: {rule_precision}")
print(f"Average Recall@{n}: {rule_recall}")
print(f"Average nDCG@{n}: {rule_ndcg}")

Rule-Based Evaluation (User-Level):
Average Precision@10: 0.0021164021164021165
Average Recall@10: 0.00030784265654227444
Average nDCG@10: 0.0004482383226558441


In [42]:
n = 10
cf_precision, cf_recall, cf_ndcg = [], [], []

for user_id in test_df['user_id'].unique():
    item_scores = [
        (item_id, cf_recommender.predict(user_id, item_id))
        for item_id in test_df['item_id'].unique()
    ]
    top_items = [item for item, _ in sorted(item_scores, key=lambda x: x[1], reverse=True)[:n]]
    relevant_items = set(test_df[(test_df['user_id'] == user_id) & (test_df['rating'] >= 4)]['item_id'])

    precision = len([item for item in top_items if item in relevant_items]) / n
    recall = len([item for item in top_items if item in relevant_items]) / len(relevant_items) if relevant_items else 0
    ndcg = calculate_ndcg(top_items, relevant_items)

    cf_precision.append(precision)
    cf_recall.append(recall)
    cf_ndcg.append(ndcg)

print("Collaborative Filtering Evaluation:")
print(f"Average Precision@{n}: {np.mean(cf_precision)}")
print(f"Average Recall@{n}: {np.mean(cf_recall)}")
print(f"Average nDCG@{n}: {np.mean(cf_ndcg)}")

Collaborative Filtering Evaluation:
Average Precision@10: 0.3380952380952381
Average Recall@10: 0.06583799998481352
Average nDCG@10: 0.11219658274849809


In [43]:
n = 10
cb_precision, cb_recall, cb_ndcg = [], [], []

for user_id in test_df['user_id'].unique():
    user_profile = cb_recommender._get_user_profile(user_id)
    item_scores = cb_recommender.item_profiles.dot(user_profile)
    top_items = item_scores.nlargest(n).index.tolist()
    relevant_items = set(test_df[(test_df['user_id'] == user_id) & (test_df['rating'] >= 4)]['item_id'])

    precision = len([item for item in top_items if item in relevant_items]) / n
    recall = len([item for item in top_items if item in relevant_items]) / len(relevant_items) if relevant_items else 0
    ndcg = calculate_ndcg(top_items, relevant_items)

    cb_precision.append(precision)
    cb_recall.append(recall)
    cb_ndcg.append(ndcg)

print("Content-Based Filtering Evaluation:")
print(f"Average Precision@{n}: {np.mean(cb_precision)}")
print(f"Average Recall@{n}: {np.mean(cb_recall)}")
print(f"Average nDCG@{n}: {np.mean(cb_ndcg)}")

Content-Based Filtering Evaluation:
Average Precision@10: 0.11322751322751322
Average Recall@10: 0.02298696361241716
Average nDCG@10: 0.04146808075971509


In [48]:
models = ['Rule-Based', 'Collaborative Filtering', 'Content-Based']
precision_scores = [user_precision_list, cf_precision, cb_precision]
recall_scores = [user_recall_list, cf_recall, cb_recall]
ndcg_scores = [user_ndcg_list, cf_ndcg, cb_ndcg]
print("Precision Scores:", precision_scores)
print("Recall Scores:", recall_scores)
print("nDCG Scores:", ndcg_scores)

Precision Scores: [[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.1, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.1, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.1, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.1, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0], [0.0, 0.6, 0.6, 0.9, 1.0, 0.0, 0.3