In [10]:
#This Entire Notebook will be used for all the code so make sure you have all the packages installed otherwise you wont be able to run this notebook

In [11]:
#Import all the libraries
import pandas as pd
import numpy as np
from sklearn.metrics import mean_squared_error
from sklearn.decomposition import TruncatedSVD  
from sklearn.model_selection import train_test_split
from difflib import SequenceMatcher

In [12]:
# Load the datasets
books_path = '../Dataset/Books.csv'
ratings_path = '../Dataset/Ratings.csv'
users_path = '../Dataset/Users.csv'

# Reading the files
books = pd.read_csv(books_path, dtype={'Year-Of-Publication': object})
ratings = pd.read_csv(ratings_path)
users = pd.read_csv(users_path)


In [13]:
# Preprocessing steps (as described earlier)

# Preprocessing Books DataFrame
books['Year-Of-Publication'] = pd.to_numeric(books['Year-Of-Publication'], errors='coerce')
books['Year-Of-Publication'].fillna(0, inplace=True)
books['Year-Of-Publication'] = books['Year-Of-Publication'].astype(int)
books.drop(['Image-URL-S', 'Image-URL-M', 'Image-URL-L'], axis=1, inplace=True)

# Preprocessing Ratings DataFrame
ratings['Book-Rating'] = pd.to_numeric(ratings['Book-Rating'], errors='coerce')

# Preprocessing Users DataFrame
users['Age'] = pd.to_numeric(users['Age'], errors='coerce')
users['Age'].fillna(users['Age'].median(), inplace=True)
users['Age'] = users['Age'].astype(int)
users['Age'] = users['Age'].clip(10, 100)

In [14]:
def similarity_ratio(str1, str2):
    return difflib.SequenceMatcher(None, str1, str2).ratio()


In [15]:
def content_based_recommendations(user_preferences, books, n_recs=5):
    
    # Filter books by favorite authors 
    favorite_authors = user_preferences.get('favorite_authors', [])
    auth_books = books[books['Book-Author'].isin(favorite_authors)]
    
    # Filter books similar to user's favorite books
    favorite_books = user_preferences.get('favorite_books', [])
    similar_books = []
    for book in favorite_books:
        similarities = books.apply(lambda x: similarity_ratio(x['Book-Title'], book), axis=1)
        similar_book = books.loc[similarities.idxmax()] 
        similar_books.append(similar_book)
        
    # Combine the filtered data frames and return top books
    recs = pd.concat([auth_books, pd.DataFrame(similar_books)], ignore_index=True) 
    return recs[:n_recs]

In [16]:
def collab_recommendations(user_id, ratings, books, n_recs=5):

    # Compute cosine similarity between users 
    user_ratings = ratings[ratings['User-ID'] == user_id]
    other_ratings = ratings[ratings['User-ID'] != user_id]
    user_book_matrix = pivot_ratings(user_ratings)
    other_book_matrix = pivot_ratings(other_ratings)
    similarities = cosine_similarity(user_book_matrix, other_book_matrix)
    
    # Find most similar users
    similar_users = np.argsort(similarities)[-1:-6:-1]  
    
    # Get top book recommendations from similar users
    top_books = {}
    for user in similar_users:
        other_user_books = other_book_matrix.iloc[user] 
        for i, rating in other_user_books.iteritems():
            if i not in user_book_matrix.columns:
                if i not in top_books or top_books[i] < rating:
                    top_books[i] = rating
                    
    # Return top books
    return books.loc[list(top_books.keys())][:n_recs]

In [17]:
def hybrid_recommendations(user_id, user_prefs, ratings, books, n=5):

    user_ratings = ratings[ratings['User-ID'] == user_id]
    
    if len(user_ratings) >= 10:
       cf_recs = collab_recommendations(user_id, ratings, books, n//2)
       cb_recs = content_based_recommendations(user_prefs, books, n//2)  
       return pd.concat([cf_recs, cb_recs]).head(n)
       
    elif len(user_ratings) >= 5: 
       # Put more weight on collaborative filtering
       cf_recs = collab_recommendations(user_id, ratings, books, n*2//3)  
       cb_recs = content_based_recommendations(user_prefs, books, n//3)
       return pd.concat([cf_recs, cb_recs]).head(n)
       
    else:
       # Only content-based filtering
       return content_based_recommendations(user_prefs, books, n)

In [18]:
def evaluate_cf(model_predictions, actual_ratings):
    
    # Calculate RMSE
    mse = mean_squared_error(actual_ratings, model_predictions)  
    rmse = np.sqrt(mse)
    
    print(f'Collaborative Filtering RMSE: {rmse:.3f}')


In [19]:
# Generate 5 predictions for user 123
predictions = collab_recommendations(123, ratings, books, 5) 

# Get actual ratings for those books by this user 
user_123_ratings = ratings[ratings['User-ID'] == 123]
actual = list(user_123_ratings.iloc[:5]['Book-Rating'])
predicted = [pred_rating for _, pred_rating in predictions]

# Evaluate
evaluate_cf(predicted, actual)

NameError: name 'pivot_ratings' is not defined