In [5]:
from base import *  # Import predefined functions to maintain modularity
from sklearn.model_selection import train_test_split
import pandas as pd

# Load the dataset
data_path = 'ProcessedData.csv'
data = pd.read_csv(data_path, encoding='utf-8')

# Sample the data to reduce memory usage
sampled_data = sampling_data(data)
print("Sampled Data:")
print(sampled_data)
print('-----------------------------------------------------------')

# Split the sampled data into training and testing sets
train_data, test_data = train_test_split(sampled_data, test_size=0.2, random_state=42)
print("Train Data:")
print(train_data)
print('-----------------------------------------------------------')
print("Test Data:")
print(test_data)
print('-----------------------------------------------------------')

# Collaborative Filtering - Prepare User-Book Matrix
ratings = train_data[['User-ID', 'Title', 'Rating']]
user_book_matrix = ratings.pivot_table(index='User-ID', columns='Title', values='Rating').fillna(0)

# Select a test user and author
test_user_id = test_data['User-ID'].iloc[2]
test_author = test_data['Author'].iloc[2]

# Euclidean Distance Collaborative Filtering
print('Collaborative filtering result using Euclidean Distance:')
euclidean_recommendations_test = recommend_books_euclidean(user_book_matrix, user_id=test_user_id)
print("Recommendations (Euclidean Distance):")
for title in euclidean_recommendations_test:
    print(title)
print('-----------------------------------------------------------')

# Content-Based Filtering
print('Content-based filtering result:')
content_based_test = content_based_filtering(test_author, sampled_data)
print("Recommendations (Content-Based):")
for title in content_based_test:
    print(title)
print('-----------------------------------------------------------')

# Hybrid Filtering
def hybrid_filtering(author, user_id, processed_data, user_book_matrix, n_recommendations=5):
    """
    Combine results from Content-Based and Collaborative Filtering for hybrid recommendations.
    """
    # Get recommendations from content-based and collaborative methods
    content_based_recs = content_based_filtering(author, processed_data, n_recommendations)
    collaborative_recs = recommend_books_euclidean(user_book_matrix, user_id, n_recommendations)

    # Combine scores with weights
    content_based_weight = 0.5
    collaborative_weight = 0.5
    scores = {}

    for idx, book in enumerate(content_based_recs):
        scores[book] = scores.get(book, 0) + (n_recommendations - idx) * content_based_weight

    for idx, book in enumerate(collaborative_recs):
        scores[book] = scores.get(book, 0) + (n_recommendations - idx) * collaborative_weight

    # Sort by scores and return top recommendations
    ranked_books = sorted(scores.items(), key=lambda x: x[1], reverse=True)
    recommended_books = [book for book, _ in ranked_books[:n_recommendations]]

    return recommended_books

print('Hybrid filtering result:')
hybrid_filtering_test = hybrid_filtering(test_author, test_user_id, sampled_data, user_book_matrix)
print("Recommendations (Hybrid):")
for title in hybrid_filtering_test:
    print(title)
print('-----------------------------------------------------------')

Sampled Data:
         User-ID        ISBN  Rating  \
1150      277427  002542730X      10   
1168      277427  0061009059       9   
1215      277427  0316776963       8   
1235      277427  0345413903      10   
1256      277427  0380702843       8   
...          ...         ...     ...   
1030970   276680  0452283205       7   
1030997   276680  0743203631       7   
1031009   276680  0743486226       6   
1031034   276680  1573229083       7   
1031042   276680  1931561648       9   

                                                     Title  \
1150     Politically Correct Bedtime Stories: Modern Ta...   
1168     One for the Money (Stephanie Plum Novels (Pape...   
1215                                Me Talk Pretty One Day   
1235                                       The Murder Book   
1256     The Return of the Indian (Indian in the Cupboard)   
...                                                    ...   
1030970                                     Falling Angels   
1030997  