In [1]:
# Install the required libraries
!pip install surprise




In [5]:


# Import necessary libraries
import pandas as pd
from surprise import Dataset, Reader
from surprise import SVD
from surprise.model_selection import cross_validate, train_test_split
from surprise import accuracy


In [7]:


# Load Book-Crossing dataset
# Download and load the dataset
data_url = "https://raw.githubusercontent.com/zygmuntz/goodbooks-10k/master/ratings.csv"
df = pd.read_csv(data_url)

# Preview dataset
print(df.head())

   user_id  book_id  rating
0        1      258       5
1        2     4081       4
2        2      260       5
3        2     9296       5
4        2     2318       3


In [9]:
# Use only necessary columns
df = df[['user_id', 'book_id', 'rating']]
# Define a reader with rating scale (Book-Crossing uses a 1-10 scale)
reader = Reader(rating_scale=(1, 10))

# Load dataset into Surprise format
data = Dataset.load_from_df(df[['user_id', 'book_id', 'rating']], reader)

In [16]:
# Split data into training and testing sets
trainset, testset = train_test_split(data, test_size=0.2)

# Use SVD (Singular Value Decomposition) for Matrix Factorization
model = SVD()

# Train the model on the training data
model.fit(trainset)

# Make predictions on the test data
predictions = model.test(testset)

# Evaluate the model performance using RMSE
accuracy.rmse(predictions)

# Cross-validate the model to check overall performance
cross_validate(model, data, measures=['RMSE', 'MAE'], cv=5, verbose=True)

# Function to get top n recommendations for a specific user
def get_top_n_recommendations(predictions, n=10):
    from collections import defaultdict
    
    # Map the predictions to each user
    top_n = defaultdict(list)
    for uid, iid, true_r, est, _ in predictions:
        top_n[uid].append((iid, est))
    
    # Sort the predictions for each user and get the top n items
    for uid, user_ratings in top_n.items():
        user_ratings.sort(key=lambda x: x[1], reverse=True)
        top_n[uid] = user_ratings[:n]
    
    return top_n

# Get top 10 recommendations for all users
top_n_recommendations = get_top_n_recommendations(predictions, n=10)

# Display recommendations for a specific user (e.g., user 1)
user_id = 1
print(f"\nTop 10 book recommendations for User {user_id}:\n")
for book_id, predicted_rating in top_n_recommendations[user_id]:
    print(f"Book ID: {book_id}, Predicted Rating: {predicted_rating}")

RMSE: 0.8309
Evaluating RMSE, MAE of algorithm SVD on 5 split(s).

                  Fold 1  Fold 2  Fold 3  Fold 4  Fold 5  Mean    Std     
RMSE (testset)    0.8321  0.8293  0.8310  0.8305  0.8314  0.8308  0.0009  
MAE (testset)     0.6445  0.6420  0.6437  0.6430  0.6436  0.6434  0.0008  
Fit time          156.24  164.33  142.15  151.89  115.90  146.10  16.71   
Test time         28.77   26.84   31.17   29.29   20.05   27.22   3.84    

Top 10 book recommendations for User 1:

Book ID: 66, Predicted Rating: 4.16641671240337
Book ID: 177, Predicted Rating: 4.006029129308437
Book ID: 4, Predicted Rating: 3.9516763423583896
Book ID: 36, Predicted Rating: 3.855149995137722
Book ID: 94, Predicted Rating: 3.8471740318616767
Book ID: 437, Predicted Rating: 3.820615430696438
Book ID: 32, Predicted Rating: 3.7439220103386086
Book ID: 273, Predicted Rating: 3.6989624841799986
Book ID: 329, Predicted Rating: 3.687523883791537
Book ID: 140, Predicted Rating: 3.5598441659665245
