In [1]:
import pandas as pd
from sklearn.metrics.pairwise import cosine_similarity
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.neighbors import NearestNeighbors

In [2]:
books = pd.read_csv('archive/Books.csv')
users = pd.read_csv('archive/Users.csv')
ratings = pd.read_csv('archive/Ratings.csv')

  books = pd.read_csv('archive/Books.csv')


In [3]:
books.isnull().sum()

ISBN                   0
Book-Title             0
Book-Author            2
Year-Of-Publication    0
Publisher              2
Image-URL-S            0
Image-URL-M            0
Image-URL-L            3
dtype: int64

In [4]:
ratings.isnull().sum()

User-ID        0
ISBN           0
Book-Rating    0
dtype: int64

In [5]:
books.duplicated().sum()

0

In [6]:
ratings.duplicated().sum()

0

In [7]:
ratings_books = pd.merge(ratings, books, on='ISBN')
ratings_books.isnull().sum()

User-ID                0
ISBN                   0
Book-Rating            0
Book-Title             0
Book-Author            2
Year-Of-Publication    0
Publisher              2
Image-URL-S            0
Image-URL-M            0
Image-URL-L            4
dtype: int64

In [8]:
user_prune = ratings_books.groupby('User-ID')['Book-Rating'].count() > 100
user_and_rating = user_prune[user_prune].index 

In [9]:
filtered_rating = ratings_books[ratings_books['User-ID'].isin(user_and_rating)]

rating_prune = ratings_books.groupby('Book-Title')['Book-Rating'].count() >= 50
famous_books = rating_prune[rating_prune].index

final_rating = filtered_rating[filtered_rating['Book-Title'].isin(famous_books)]
final_rating.isnull().sum()

User-ID                0
ISBN                   0
Book-Rating            0
Book-Title             0
Book-Author            0
Year-Of-Publication    0
Publisher              0
Image-URL-S            0
Image-URL-M            0
Image-URL-L            0
dtype: int64

In [10]:
final_rating.head()

Unnamed: 0,User-ID,ISBN,Book-Rating,Book-Title,Book-Author,Year-Of-Publication,Publisher,Image-URL-S,Image-URL-M,Image-URL-L
1150,277427,002542730X,10,Politically Correct Bedtime Stories: Modern Ta...,James Finn Garner,1994,John Wiley &amp; Sons Inc,http://images.amazon.com/images/P/002542730X.0...,http://images.amazon.com/images/P/002542730X.0...,http://images.amazon.com/images/P/002542730X.0...
1157,277427,0060177721,0,Finding Moon,Tony Hillerman,1995,Harpercollins,http://images.amazon.com/images/P/0060177721.0...,http://images.amazon.com/images/P/0060177721.0...,http://images.amazon.com/images/P/0060177721.0...
1162,277427,006092988X,0,A Tree Grows in Brooklyn,Betty Smith,1998,Perennial,http://images.amazon.com/images/P/006092988X.0...,http://images.amazon.com/images/P/006092988X.0...,http://images.amazon.com/images/P/006092988X.0...
1163,277427,0060930535,0,The Poisonwood Bible: A Novel,Barbara Kingsolver,1999,Perennial,http://images.amazon.com/images/P/0060930535.0...,http://images.amazon.com/images/P/0060930535.0...,http://images.amazon.com/images/P/0060930535.0...
1164,277427,0060932139,0,The Unbearable Lightness of Being : A Novel (P...,Milan Kundera,1999,Perennial,http://images.amazon.com/images/P/0060932139.0...,http://images.amazon.com/images/P/0060932139.0...,http://images.amazon.com/images/P/0060932139.0...


In [11]:
user_item_matrix = final_rating.pivot_table(index='User-ID', columns='Book-Title', values='Book-Rating')
user_item_matrix.fillna(0, inplace=True)
user_similarity = cosine_similarity(user_item_matrix)
user_similarity_df = pd.DataFrame(user_similarity, index=user_item_matrix.index, columns=user_item_matrix.index)


In [12]:
#user based
def recommend_books_collaborative(user_id, num_recommendations=5):
    similar_users = user_similarity_df[user_id].sort_values(ascending=False).index[1:]
    similar_user_ratings = user_item_matrix.loc[similar_users]
    user_ratings = user_item_matrix.loc[user_id]
    recommended_books = similar_user_ratings.loc[:, user_ratings == 0].mean().sort_values(ascending=False).head(num_recommendations)
    return recommended_books.index.tolist()

In [13]:
#content based
books['combined_features'] = books['Book-Title'].fillna('') + " " + books['Book-Author'].fillna('') + " " + books['Publisher'].fillna('')
tfidf = TfidfVectorizer(stop_words='english')
tfidf_matrix = tfidf.fit_transform(books['combined_features'])
nn = NearestNeighbors(metric='cosine', algorithm='brute', n_neighbors=6)
nn.fit(tfidf_matrix)

In [14]:
def recommend_books_content_based(book_title, num_recommendations=5):
    if book_title in books['Book-Title'].values:
        book_idx = books[books['Book-Title'] == book_title].index[0]
        distances, indices = nn.kneighbors(tfidf_matrix[book_idx], n_neighbors=num_recommendations+1)
        book_indices = indices.flatten()[1:]
        return books['Book-Title'].iloc[book_indices].tolist()
    else:
        return ["The book title '{}' does not exist in the dataset.".format(book_title)]


In [15]:
def hybrid_recommendation(user_id, book_title, num_recommendations=5):
    collaborative_recommendations = recommend_books_collaborative(user_id, num_recommendations)
    content_based_recommendations = recommend_books_content_based(book_title, num_recommendations)
    
    if isinstance(content_based_recommendations, list) and "does not exist in the dataset" in content_based_recommendations[0]:
        content_based_recommendations = recommend_books_content_based(collaborative_recommendations[0], num_recommendations)
    
    final_recommendations = list(set(collaborative_recommendations + content_based_recommendations))
    return final_recommendations[:num_recommendations]


In [16]:
user_id_example = 277427
book_title_example = "Classical Mythology"

print("Collaborative Filtering Recommendations:")
print(recommend_books_collaborative(user_id_example))

print("\nContent-Based Recommendations:")
print(recommend_books_content_based(book_title_example))

print("\nHybrid Recommendations:")
print(hybrid_recommendation(user_id_example, book_title_example))

Collaborative Filtering Recommendations:
['The Lovely Bones: A Novel', 'Harry Potter and the Chamber of Secrets (Book 2)', 'Harry Potter and the Prisoner of Azkaban (Book 3)', 'The Secret Life of Bees', "Harry Potter and the Sorcerer's Stone (Harry Potter (Paperback))"]

Content-Based Recommendations:
['Classical mythology', 'Classical mythology', 'Classical Mythology', "Who's Who in Classical Mythology (Who's Who Series)", 'The Oxford Classical Dictionary']

Hybrid Recommendations:
['Harry Potter and the Chamber of Secrets (Book 2)', 'Classical mythology', 'The Oxford Classical Dictionary', 'The Secret Life of Bees', 'Harry Potter and the Prisoner of Azkaban (Book 3)']
