In [1]:
#This Entire Notebook will be used for all the code so make sure you have all the packages installed otherwise you wont be able to run this notebook

In [21]:
#Import all the libraries
import pandas as pd
from sklearn.metrics import mean_squared_error
from math import sqrt
from surprise import Dataset, Reader
from surprise import SVD, accuracy
from surprise.model_selection import train_test_split
from difflib import SequenceMatcher

In [6]:
def collabrative_evaluation(algorithm, test):
    predictions = algorithm.test(test)
    RMSE_prediction = accuracy.rmse(predictions, verbose=False)
    print("RMSE Predictions = %f" % RMSE_prediction)
    
def content_evaluation():
    user_preferences = {
    "favorite_authors": ["Agatha Christie", "Stephen King"],
    "favorite_books": ["Murder on the Orient Express"]
    }
    recommended_books = content_based_recommendations(user_preferences)
    print(recommended_books)
    

In [7]:
# Load the datasets
books_path = '../Dataset/Books.csv'
ratings_path = '../Dataset/Ratings.csv'
users_path = '../Dataset/Users.csv'

# Reading the files
books = pd.read_csv(books_path, dtype={'Year-Of-Publication': object})
ratings = pd.read_csv(ratings_path)
users = pd.read_csv(users_path)


In [8]:
# Preprocessing steps (as described earlier)

# Preprocessing Books DataFrame
books['Year-Of-Publication'] = pd.to_numeric(books['Year-Of-Publication'], errors='coerce')
books['Year-Of-Publication'].fillna(0, inplace=True)
books['Year-Of-Publication'] = books['Year-Of-Publication'].astype(int)
books.drop(['Image-URL-S', 'Image-URL-M', 'Image-URL-L'], axis=1, inplace=True)

# Preprocessing Ratings DataFrame
ratings['Book-Rating'] = pd.to_numeric(ratings['Book-Rating'], errors='coerce')

# Preprocessing Users DataFrame
users['Age'] = pd.to_numeric(users['Age'], errors='coerce')
users['Age'].fillna(users['Age'].median(), inplace=True)
users['Age'] = users['Age'].astype(int)
users['Age'] = users['Age'].clip(10, 100)

In [9]:
# Collaborative Filtering Model
reader = Reader(rating_scale=(1, 10))  # Adjust the scale according to your dataset
data = Dataset.load_from_df(ratings[['User-ID', 'ISBN', 'Book-Rating']], reader)
trainset, testset = train_test_split(data, test_size=0.25)
model = SVD()
model.fit(trainset)
collabrative_evaluation(model, testset)

RMSE Predictions = 3.494095


In [23]:
def similarity_ratio(str1, str2):
    return SequenceMatcher(None, str1.lower(), str2.lower()).ratio()

In [56]:
# Content-Based Recommendation Function
def content_based_recommendations(user_preferences, n_books=5):
    filtered_books = books.copy()
    filtered_books.reindex()

    # Extract preferences from the user_preferences dictionary
    favorite_authors = user_preferences.get('favorite_authors', [])
    favorite_books = user_preferences.get('favorite_books', [])
    recommendations = pd.DataFrame()
    author_recommendation = pd.DataFrame()
    # Filter by favorite authors if provided
    if favorite_authors:
        author_recommendation = filtered_books[filtered_books['Book-Author'].isin(favorite_authors)]
    
    # Filter by similarity to favorite books if provided
    if favorite_books:
        # For simplicity, filtering by titles.
        filtered_books['Similarity']= filtered_books.apply(lambda row: similarity_ratio(row["Book-Title"], favorite_books[0]), axis=1)
        filtered_books = filtered_books.sort_values(by='Similarity', ascending=False)
        recommendations = pd.concat([recommendations, filtered_books.drop('Similarity', axis=1).head(2)],ignore_index=True)
        recommendations = pd.concat([recommendations,author_recommendations], ignore_index=True)
        #filtered_books = filtered_books[filtered_books['Book-Title'].isin(favorite_books)]

    # Recommend the top N books based on this filtering
    return recommendations.head(n_books)

In [11]:
# Hybrid Recommendation Function
def hybrid_recommendations(user_id, user_preferences, n_books=5):
    user_ratings = ratings[ratings['User-ID'] == user_id]
    
    # Check if the user has rated enough books
    if len(user_ratings) < 5:  # Threshold can be adjusted
        return content_based_recommendations(user_preferences, n_books)
    else:
        # Use collaborative filtering
        # Generate recommendations based on the model
        user_recommendations = []
        for isbn in books['ISBN']:
            user_recommendations.append((isbn, model.predict(user_id, isbn).est))

        # Sort and return the top recommendations
        user_recommendations.sort(key=lambda x: x[1], reverse=True)
        top_books_isbn = [isbn for isbn, _ in user_recommendations[:n_books]]
        return books[books['ISBN'].isin(top_books_isbn)]


In [57]:
#Testing of Content Based
content_evaluation()

KeyError: "['Similarity'] not found in axis"

In [20]:
# Example usage
user_id = 12345  # Replace with a user ID
user_preferences = {
    "favorite_authors": ["J.K. Rowling", "Isaac Asimov"],
    "favorite_books": ["Harry Potter and the Sorcerer's Stone"]
}
recommended_books = hybrid_recommendations(user_id, user_preferences)
print(recommended_books)

              ISBN                             Book-Title   Book-Author  \
259611  0939173344  Harry Potter and the Sorcerer's Stone  J.K. Rowling   

        Year-Of-Publication                     Publisher  
259611                 1999  National Braille Press, Inc.  
