<a href="https://colab.research.google.com/github/backlashblitz/Bangla-Book-Recommendation-Dataset/blob/main/colabnotebooks/global_popularity_colab.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# Global Popularity Recommender
**Bangla Book Recommendation Dataset**

‚ñ∂Ô∏è **Just click `Runtime ‚Üí Run all` to get started!**

In [1]:
import os, shutil
from huggingface_hub import hf_hub_download

# Install dependencies
os.system("pip install -q huggingface_hub tqdm")

# Setup Directory
REPO_ID = "DevnilMaster1/Bangla-Book-Recommendation-Dataset"
DATA_FOLDER = "RokomariBG_Dataset"
os.makedirs(DATA_FOLDER, exist_ok=True)

FILES_NEEDED = ['user_to_review.json', 'book_to_review.json', 'book_to_category.json']

for filename in FILES_NEEDED:
    dest = os.path.join(DATA_FOLDER, filename)
    if not os.path.exists(dest):
        try:
            downloaded_path = hf_hub_download(repo_id=REPO_ID, filename=filename, repo_type="dataset")
            shutil.copy(downloaded_path, dest)
            print(f"‚úÖ Saved: {filename}")
        except Exception as e:
            print(f"‚ö†Ô∏è Error downloading {filename}: {e}")
    else:
        print(f"‚úÖ Already present: {filename}")

print("\nüéâ Environment Ready!")

The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


user_to_review.json:   0%|          | 0.00/16.1M [00:00<?, ?B/s]

‚úÖ Saved: user_to_review.json


book_to_review.json:   0%|          | 0.00/15.0M [00:00<?, ?B/s]

‚úÖ Saved: book_to_review.json


book_to_category.json:   0%|          | 0.00/17.6M [00:00<?, ?B/s]

‚úÖ Saved: book_to_category.json

üéâ Environment Ready!


In [2]:
import json
from collections import defaultdict, Counter
from typing import Dict, List, Set
import random
import numpy as np
from tqdm import tqdm

def load_json(path: str):
    if not os.path.exists(path):
        return []
    with open(path, "r", encoding="utf-8") as f:
        return json.load(f)

class GlobalPopularityRecommender():
    def __init__(self, book_to_review_path: str):
        self.book_to_review_path = book_to_review_path
        self.popular_books = []

    def fit(self):
        book_to_review = load_json(self.book_to_review_path)
        book_counter = Counter()
        for x in book_to_review:
            book_id = str(x["book_id"])
            book_counter[book_id] += 1
        self.popular_books = [book_id for book_id, _ in book_counter.most_common()]

    def recommend(self, user_id: str = None, k: int = 10) -> List[str]:
        return self.popular_books[:k]

In [3]:
class RankingEvaluator:
    def hit_at_k(self, predictions, ground_truth, k):
        top_k = predictions[:k]
        return 1.0 if any(item in ground_truth for item in top_k) else 0.0

    def mrr_at_k(self, predictions, ground_truth, k):
        top_k = predictions[:k]
        for rank, item in enumerate(top_k, start=1):
            if item in ground_truth: return 1.0 / rank
        return 0.0

    def ndcg_at_k(self, predictions, ground_truth, k):
        dcg = 0.0
        top_k = predictions[:k]
        for rank, item in enumerate(top_k, start=1):
            if item in ground_truth: dcg += 1.0 / np.log2(rank + 1)
        ideal_k = min(len(ground_truth), k)
        idcg = sum(1.0 / np.log2(rank + 1) for rank in range(1, ideal_k + 1))
        return dcg / idcg if idcg > 0 else 0.0

    def evaluate(self, predictions, ground_truth):
        metrics = {
            'Hit@5': [], 'Hit@10': [], 'Hit@50': [],
            'MRR@10': [], 'NDCG@10': [], 'NDCG@50': []
        }
        common_users = set(predictions.keys()) & set(ground_truth.keys())
        for u in common_users:
            p, gt = predictions[u], ground_truth[u]
            if not gt: continue
            metrics['Hit@5'].append(self.hit_at_k(p, gt, 5))
            metrics['Hit@10'].append(self.hit_at_k(p, gt, 10))
            metrics['Hit@50'].append(self.hit_at_k(p, gt, 50))
            metrics['MRR@10'].append(self.mrr_at_k(p, gt, 10))
            metrics['NDCG@10'].append(self.ndcg_at_k(p, gt, 10))
            metrics['NDCG@50'].append(self.ndcg_at_k(p, gt, 50))
        return {m: np.mean(v) for m, v in metrics.items()}

def split_user_interactions(user_book, seed=42):
    random.seed(seed)
    train, val, test = {}, {}, {}
    for user, books in user_book.items():
        books = list(set(books))
        random.shuffle(books)
        n = len(books)
        if n < 3:
            train[user], val[user], test[user] = books, [], []
            continue
        n_train, n_val = int(0.7 * n), int(0.15 * n)
        train[user] = books[:n_train]
        val[user] = books[n_train : n_train + n_val]
        test[user] = books[n_train + n_val :]
    return train, val, test

In [4]:
def build_user_book_interactions(u_path, b_path):
    u_data, b_data = load_json(u_path), load_json(b_path)
    r_to_u = {str(x["review_id"]): str(x["user_id"]) for x in u_data}
    r_to_b = {str(x["review_id"]): str(x["book_id"]) for x in b_data}
    user_book = defaultdict(list)
    for rid, uid in r_to_u.items():
        if rid in r_to_b: user_book[uid].append(r_to_b[rid])
    return user_book

# Configuration
DATA_FOLDER = "RokomariBG_Dataset/"
U_PATH = DATA_FOLDER + "user_to_review.json"
B_PATH = DATA_FOLDER + "book_to_review.json"

# Process and Split
user_book = build_user_book_interactions(U_PATH, B_PATH)
_, _, test_user_book = split_user_interactions(user_book)
ground_truth = {u: set(books) for u, books in test_user_book.items() if books}

# Train and Predict
model = GlobalPopularityRecommender(B_PATH)
model.fit()

# IMPORTANT: To get valid @50 metrics, we must recommend 50 books
test_users = list(ground_truth.keys())
preds = {u: model.recommend(u, k=10) for u in tqdm(test_users, desc="Recommending")}

# Results
results = RankingEvaluator().evaluate(preds, ground_truth)
print("\n===== Global Popularity Results =====")
for m, v in results.items():
    print(f"{m}: {v:.4f}")

Recommending: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 15427/15427 [00:00<00:00, 499656.59it/s]



===== Global Popularity Results =====
Hit@5: 0.0941
Hit@10: 0.1521
Hit@50: 0.1521
MRR@10: 0.0546
NDCG@10: 0.0556
NDCG@50: 0.0555
