In [1]:
import pandas as pd
import numpy as np 
from sklearn.metrics.pairwise import cosine_similarity
from sklearn.model_selection import train_test_split
from sklearn.metrics import precision_score, recall_score

# Load the data
ratings = pd.read_csv('./data/01_BX-Book-Ratings.csv', index_col=0)
users = pd.read_csv('./data/02_BX-Users.csv', index_col=0)
books = pd.read_csv('./data/03_BX-Books.csv', index_col=0)

  books = pd.read_csv('./data/03_BX-Books.csv', index_col=0)


In [2]:
# preprocess tge data
ratings = ratings.merge(books[['ISBN', 'Book-Title']], on='ISBN')
ratings = ratings.drop(['ISBN'], axis=1)
ratings

Unnamed: 0,User-ID,Book-Rating,Book-Title
0,276725,0,Flesh Tones: A Novel
1,276726,5,Rites of Passage
2,276727,0,The Notebook
3,276729,3,Help!: Level 1
4,276729,6,The Amsterdam Connection : Level 4 (Cambridge ...
...,...,...,...
1031131,276704,0,Edgar Cayce on the Akashic Records: The Book o...
1031132,276704,9,Get Clark Smart : The Ultimate Guide for the S...
1031133,276706,0,Eight Weeks to Optimum Health: A Proven Progra...
1031134,276709,10,The Sherbrooke Bride (Bride Trilogy (Paperback))


In [3]:
# filter books at least min_book_ratings ratings
min_book_ratings = 10
book_rating_counts = ratings['Book-Title'].value_counts()
filtered_books = book_rating_counts[book_rating_counts >= min_book_ratings].index
ratings = ratings[ratings['Book-Title'].isin(filtered_books)]

# filter users who rated at least min_user_ratings books
min_user_ratings = 5
user_rating_counts = ratings['User-ID'].value_counts()
filtered_users = user_rating_counts[user_rating_counts >= min_user_ratings].index
ratings = ratings[ratings['User-ID'].isin(filtered_users)]

# create the user-item matrix
user_item_matrix = ratings.pivot_table(index='User-ID', columns='Book-Title', values='Book-Rating').fillna(0)

In [5]:
# compute the cosine similarity matrix
cosine_sim_matrix = cosine_similarity(user_item_matrix)

In [6]:
# split user indices into train and test sets
user_indices = np.arange(user_item_matrix.shape[0])
train_user_indices, test_user_indices = train_test_split(user_indices, test_size=0.2, random_state=42)

In [7]:
def collaborative_filtering_recommendation(user_index, k=10):
    user_similarities = cosine_sim_matrix[user_index]
    top_k_similar_users = np.argsort(user_similarities)[-k-1:-1][::-1]
    similar_user_ratings = user_item_matrix.iloc[top_k_similar_users]
    mean_ratings = similar_user_ratings.mean(axis=0)
    top_k_books = mean_ratings.sort_values(ascending=True).head(k).index
    return top_k_books

In [8]:
def random_recommendation(ratings, n=10):
    unique_books = ratings['Book-Title'].unique()
    random_books = np.random.choice(unique_books, size=n, replace=False)
    return random_books

random_books = random_recommendation(ratings, n=10)
print("Random Score-Based Recommendatios:")
for i, book in enumerate(random_books, 1):
    print(f"{i}, {book}")

Random Score-Based Recommendatios:
1, The Silent Blade (Forgotten Realms:  Paths of Darkness, Book 1)
2, Becoming a Woman of Excellence
3, Revolution from Within : A Book of Self-Esteem
4, Enchanted
5, Julie of the Wolves (Julie of the Wolves)
6, Playing Away
7, Summers at Castle Auburn
8, Mr. Palomar
9, Conceived Without Sin
10, Devil-May-Care


In [9]:
def popularity_recommendation(ratings, n=10):
    popular_books = ratings.groupby('Book-Title')['Book-Rating'].count().sort_values(ascending=False).head(n).index
    return popular_books

popular_books = popularity_recommendation(ratings, n=10)
print('\nPopularity-Based Recommendations:')
for i, book in enumerate(popular_books, 1):
    print(f"{i}, {book}")


Popularity-Based Recommendations:
1, Wild Animus
2, The Lovely Bones: A Novel
3, The Da Vinci Code
4, The Nanny Diaries: A Novel
5, Bridget Jones's Diary
6, A Painted House
7, The Secret Life of Bees
8, Divine Secrets of the Ya-Ya Sisterhood: A Novel
9, Angels &amp; Demons
10, Life of Pi


In [10]:
def recommend_books(user_index, strategy, k=10):
    if strategy == "collaborative":
        top_k_books = collaborative_filtering_recommendation(user_index, k=k)
    elif strategy == "random":
        top_k_books = random_recommendation(ratings, n=k)
    elif strategy == "popularity":
        top_k_books = popularity_recommendation(ratings, n=k)
    else:
        raise ValueError('Invalid recommendation strategy')
    
    return top_k_books

In [11]:
def evaluate_model(strategy, k=10):
    true_positive = 0
    false_positive = 0
    false_negative = 0
    
    for user_index in test_user_indices:
        true_books = set(user_item_matrix.iloc[user_index][user_item_matrix.iloc[user_index] > 0].index)
        recommended_books = set(recommend_books(user_index, strategy, k))
        
        tp = len(true_books.intersection(recommended_books))
        fp = len(recommended_books - true_books)
        fn = len(true_books - recommended_books)
        
        true_positive += tp 
        false_positive += fp 
        false_negative += fn
        
    precision = true_positive / (true_positive + false_positive)
    recall = true_positive / (true_positive + false_negative)
    
    return precision, recall

In [13]:
# evaluate the collaborative filtering model
cf_precision, cf_recall = evaluate_model(strategy='collaborative')
print(f"Collaborative Filtering: Precision = {cf_precision:.4f}, Recall = {cf_recall:.4f}")

Collaborative Filtering: Precision = 0.0003, Recall = 0.0003


In [14]:
# evaluate the random score-based recommended model
random_precision, random_recall = evaluate_model(strategy='random')
print(f"Random Score-Based: Precision = {random_precision:.4f}, Recall = {random_recall:.4f}")

Random Score-Based: Precision = 0.0007, Recall = 0.0006


In [15]:
# evaluate the popularity recommended model
populariy_precision, popularity_recall = evaluate_model(strategy='popularity')
print(f"Popularity-Based: Precision = {populariy_precision:.4f}, Recall = {popularity_recall:.4f}")

Popularity-Based: Precision = 0.0200, Recall = 0.0167
