In [2]:
import pandas as pd 
import numpy as np
from sklearn.metrics.pairwise import cosine_distances
from sklearn.model_selection import train_test_split
from sklearn.metrics import precision_score, recall_score
import requests
import zipfile
import os

In [3]:
# load the data
ratings = pd.read_csv('./data/01_BX-Book-Ratings.csv', index_col=0)
users = pd.read_csv('./data//02_BX-Users.csv', index_col=0)
books = pd.read_csv('./data/03_BX-Books.csv', index_col=0)

  books = pd.read_csv('./data/03_BX-Books.csv', index_col=0)


In [4]:
# preprocess the data
ratings = ratings.merge(books[['ISBN', 'Book-Title']], on='ISBN')
ratings = ratings.drop(['ISBN'], axis=1)
ratings

Unnamed: 0,User-ID,Book-Rating,Book-Title
0,276725,0,Flesh Tones: A Novel
1,276726,5,Rites of Passage
2,276727,0,The Notebook
3,276729,3,Help!: Level 1
4,276729,6,The Amsterdam Connection : Level 4 (Cambridge ...
...,...,...,...
1031131,276704,0,Edgar Cayce on the Akashic Records: The Book o...
1031132,276704,9,Get Clark Smart : The Ultimate Guide for the S...
1031133,276706,0,Eight Weeks to Optimum Health: A Proven Progra...
1031134,276709,10,The Sherbrooke Bride (Bride Trilogy (Paperback))


In [5]:
# filter books with at least min_bool_rarings ratings
min_book_ratings = 10
book_rating_counts = ratings['Book-Title'].value_counts()
filtered_books = book_rating_counts[book_rating_counts >= min_book_ratings].index
ratings = ratings[ratings['Book-Title'].isin(filtered_books)]
ratings

Unnamed: 0,User-ID,Book-Rating,Book-Title
0,276725,0,Flesh Tones: A Novel
1,276726,5,Rites of Passage
2,276727,0,The Notebook
5,276733,0,Les Particules Elementaires
6,276744,7,A Painted House
...,...,...,...
1031128,276704,7,Dreamcatcher
1031129,276704,0,All I Really Need to Know
1031133,276706,0,Eight Weeks to Optimum Health: A Proven Progra...
1031134,276709,10,The Sherbrooke Bride (Bride Trilogy (Paperback))


In [6]:
# filter users who rated at least min_user_ratings books
min_user_ratings = 5
user_rating_counts = ratings['User-ID'].value_counts()
filtered_user = user_rating_counts[user_rating_counts >= min_user_ratings].index
ratings = ratings[ratings['User-ID'].isin(filtered_user)]
ratings

Unnamed: 0,User-ID,Book-Rating,Book-Title
7,276746,0,Lightning
8,276746,0,Manhattan Hunt Club
9,276746,0,Dark Paradise
10,276746,0,Night Sins
12,276746,0,Make Them Cry
...,...,...,...
1031123,276704,0,Obsidian Butterfly
1031124,276704,0,Mirror Image
1031125,276704,0,Plum Island
1031128,276704,7,Dreamcatcher


In [7]:
# create the user-item matrix
user_item_matrix = ratings.pivot_table(index='User-ID', columns='Book-Title', values='Book-Rating').fillna(0)

In [8]:
# split user indices into train and test sets
user_indices = np.arange(user_item_matrix.shape[0])
train_user_indices, test_user_indices = train_test_split(user_indices, test_size=0.2, random_state=42)

In [9]:
def random_recommendation(ratings, n=10):
    unique_books = ratings['Book-Title'].unique()
    random_books = np.random.choice(unique_books, size=n, replace=False)
    return random_books

random_books = random_recommendation(ratings, n=10)
print('Random Score-Based Recommendations:')
for i, book in enumerate(random_books, 1):
    print(f"{i}, {book}")

Random Score-Based Recommendations:
1, Sex and the Single Vampire
2, The Hobbit : The Enchanting Prelude to The Lord of the Rings
3, The Bad Luck Wedding Cake
4, El Club Dumas
5, Robot Adept (Apprentice Adept (Paperback))
6, PIRANHAS
7, Strange Stories of the Supernatural (A Watermill Classic)
8, Mexican Hat (Kevin Kerney Novels (Paperback))
9, Myst: The Book of Ti'ana
10, Imponderables


In [10]:
def popularity_recommendation(ratings, n=10):
    popular_books = ratings.groupby('Book-Title')['Book-Rating'].count().sort_values(ascending=False).head(n).index
    return popular_books
    
popular_books = popularity_recommendation(ratings, n=10)
print("\nPopularity-based Recommendations:")
for i, book in enumerate(popular_books, 1):
    print(f"{i}, {book}")


Popularity-based Recommendations:
1, Wild Animus
2, The Lovely Bones: A Novel
3, The Da Vinci Code
4, The Nanny Diaries: A Novel
5, Bridget Jones's Diary
6, A Painted House
7, The Secret Life of Bees
8, Divine Secrets of the Ya-Ya Sisterhood: A Novel
9, Angels &amp; Demons
10, Life of Pi


In [11]:
def recommend_books(user_index, strategy, k=10):
    if strategy == "random":
        top_k_books = random_recommendation(ratings, n=k)
    elif strategy == "popularity":
        top_k_books = popularity_recommendation(ratings, n=k)
    else:
        raise ValueError("Invalid recommendation strategy")
    return top_k_books

In [14]:
def evaluate_model(strategy, k=10):
    true_positive = 0
    false_positive = 0
    false_negative = 0

    for user_index in test_user_indices:
        true_books = set(user_item_matrix.iloc[user_index][user_item_matrix.iloc[user_index] > 0].index)
        recommended_books = set(recommend_books(user_index, strategy, k))

        tp = len(true_books.intersection(recommended_books))
        fp = len(recommended_books - true_books)
        fn = len(true_books - recommended_books)

        true_positive += tp
        false_positive += fp
        false_negative += fn

    precision = true_positive / (true_positive + false_positive)
    recall = true_positive / (true_positive + false_negative)

    return precision, recall

In [13]:
# Evaluate the random score-based recommendation model
random_precision, random_recall = evaluate(strategy='random')
print(f"Random Score-Based: Precision = {random_precision:.4f}, Recall = {random_recall:.4f}")

Random Score-Based: Precision = 0.0007, Recall = 0.0006


In [15]:
# Evaluate the popularity-based recommendation model
popularity_precision, popularity_recall = evaluate(strategy='popularity')
print(f"Popularity-Based: Precision = {popularity_precision:.4f}, Recall = {popularity_recall:.4f}")

Popularity-Based: Precision = 0.0200, Recall = 0.0167
