In [4]:
!pip install scikit-surprise

Collecting scikit-surprise
  Using cached scikit_surprise-1.1.4-cp312-cp312-win_amd64.whl
Installing collected packages: scikit-surprise
Successfully installed scikit-surprise-1.1.4


In [6]:
from surprise import Dataset, Reader, SVD, KNNBasic, accuracy
from surprise.model_selection import train_test_split
import pandas as pd
import numpy as np

In [10]:
# Load the recommendation dataset
recommendation_df = pd.read_csv('./recommendation_exclusive.csv')
recommendation_df.head()

Unnamed: 0,User_ID,ISBN,Rating,Book-Title,Book-Author,Year-Of-Publication,Publisher,Location,Age
0,276747,60517794,5,little altars everywhere,rebecca wells,2003,harpertorch,"iowa city, iowa, usa",25.0
1,276747,671537458,5,waiting to exhale,terry mcmillan,1995,pocket,"iowa city, iowa, usa",25.0
2,276822,60096195,5,the boy next door,meggin cabot,2002,avon trade,"calgary, alberta, canada",11.0
3,276822,786817070,5,"artemis fowl (artemis fowl, book 1)",eoin colfer,2002,miramax kids,"calgary, alberta, canada",11.0
4,276847,3423071516,5,der kleine hobbit,j. r. r. tolkien,2002,distribooks,"köln, nordrhein-westfalen, germany",27.0


In [12]:
recommendation_df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 68889 entries, 0 to 68888
Data columns (total 9 columns):
 #   Column               Non-Null Count  Dtype  
---  ------               --------------  -----  
 0   User_ID              68889 non-null  int64  
 1   ISBN                 68889 non-null  object 
 2   Rating               68889 non-null  int64  
 3   Book-Title           68889 non-null  object 
 4   Book-Author          68889 non-null  object 
 5   Year-Of-Publication  68889 non-null  int64  
 6   Publisher            68889 non-null  object 
 7   Location             68889 non-null  object 
 8   Age                  68889 non-null  float64
dtypes: float64(1), int64(3), object(5)
memory usage: 4.7+ MB


In [14]:
# Calculate the number of unique users and books
num_users = recommendation_df['User_ID'].nunique()
num_books = recommendation_df['ISBN'].nunique()

print(f"\nTotal Number of Users: {num_users}")
print(f"Total Number of Books: {num_books}\n")


Total Number of Users: 9875
Total Number of Books: 3027



In [16]:
# Load data into Surprise format
reader = Reader(rating_scale=(recommendation_df['Rating'].min(), recommendation_df['Rating'].max()))
data = Dataset.load_from_df(recommendation_df[['User_ID', 'ISBN', 'Rating']], reader)

# Train-test split
trainset, testset = train_test_split(data, test_size=0.2, random_state=42)

In [None]:
# 1. Train SVD Model with adjusted parameters
svd = SVD(n_factors=20, lr_all=0.005, random_state=42)
svd.fit(trainset)

# 2. User-Based CF Model with adjusted k value and Pearson similarity
user_based_cf = KNNBasic(sim_options={'name': 'pearson', 'user_based': True}, k=20)
user_based_cf.fit(trainset)

# 3. Item-Based CF Model with adjusted k value and Pearson similarity
item_based_cf = KNNBasic(sim_options={'name': 'pearson', 'user_based': False}, k=20)
item_based_cf.fit(trainset)

In [None]:
# RMSE Evaluation for each model
# SVD Model RMSE
svd_predictions = svd.test(testset)
svd_rmse = accuracy.rmse(svd_predictions)
print(f"SVD Model RMSE: {svd_rmse}\n")

# User-Based Collaborative Filtering Model RMSE
user_based_predictions = user_based_cf.test(testset)
user_based_rmse = accuracy.rmse(user_based_predictions)
print(f"User-Based CF Model RMSE: {user_based_rmse}\n")

# Item-Based Collaborative Filtering Model RMSE
item_based_predictions = item_based_cf.test(testset)
item_based_rmse = accuracy.rmse(item_based_predictions)
print(f"Item-Based CF Model RMSE: {item_based_rmse}\n")

In [None]:
# Function to find top 10 nearest neighbors for a specific user
def get_top_neighbors(user_inner_id, num_neighbors=10):
    # Get similarity matrix from the user-based collaborative filtering model
    similarity_matrix = user_based_cf.sim
    # Get similarities of the specific user with all other users
    user_similarities = similarity_matrix[user_inner_id]
    # Sort by similarity and get the top `num_neighbors` excluding the user itself
    nearest_neighbors = np.argsort(user_similarities)[-num_neighbors-1:-1][::-1]
    return nearest_neighbors

In [None]:
# Select a user ID to find their nearest neighbors
sample_user_id = 99

# Convert the raw user ID to an inner ID used by the model
sample_user_inner_id = trainset.to_inner_uid(sample_user_id)

# Get and print the top 10 nearest neighbors
top_neighbors = get_top_neighbors(sample_user_inner_id, num_neighbors=10)
print(f"\nTop 10 neighbors for User {sample_user_id}:")
for neighbor_inner_id in top_neighbors:
    neighbor_raw_id = trainset.to_raw_uid(neighbor_inner_id)
    print(f"Neighbor User ID: {neighbor_raw_id}")

# Recommendation function
def recommend_books(user_id, model, num_recommendations=5):
    # Get books that the user has not rated
    user_rated_books = recommendation_df[recommendation_df['User_ID'] == user_id]['ISBN'].tolist()
    all_books = recommendation_df['ISBN'].unique()
    books_to_predict = [isbn for isbn in all_books if isbn not in user_rated_books]

    # Predict ratings for books the user hasn't rated yet
    predictions = [model.predict(user_id, isbn) for isbn in books_to_predict]
    predictions.sort(key=lambda x: x.est, reverse=True)

    # Recommend top books
    recommended_books = []
    for pred in predictions[:num_recommendations]:
        isbn = pred.iid
        title = recommendation_df[recommendation_df['ISBN'] == isbn]['Book-Title'].values
        if title.size > 0:
            recommended_books.append((title[0], pred.est))

    return recommended_books

# Test the recommendation system for the specific user
print(f"\nSVD Recommendations for User {sample_user_id}:")
svd_recommendations = recommend_books(sample_user_id, svd, num_recommendations=5)
for title, predicted_rating in svd_recommendations:
    print(f"Title: {title}, Predicted Rating: {predicted_rating:.2f}")

print(f"\nUser-Based CF Recommendations for User {sample_user_id}:")
user_based_recommendations = recommend_books(sample_user_id, user_based_cf, num_recommendations=5)
for title, predicted_rating in user_based_recommendations:
    print(f"Title: {title}, Predicted Rating: {predicted_rating:.2f}")

print(f"\nItem-Based CF Recommendations for User {sample_user_id}:")
item_based_recommendations = recommend_books(sample_user_id, item_based_cf, num_recommendations=5)
for title, predicted_rating in item_based_recommendations:
    print(f"Title: {title}, Predicted Rating: {predicted_rating:.2f}")
