In [None]:
#so this is where we will do the coding of this project in this notebook
# now we need to import the kaggle dataset


In [3]:
#Import all the libraries
import pandas as pd
from surprise import Dataset, Reader
from surprise import SVD, accuracy
from surprise.model_selection import train_test_split

In [4]:
# Load the datasets
books_path = 'Books.csv'
ratings_path = 'Ratings.csv'
users_path = 'Users.csv'

# Reading the files
books = pd.read_csv(books_path, dtype={'Year-Of-Publication': object})
ratings = pd.read_csv(ratings_path)
users = pd.read_csv(users_path)


# Preprocessing steps (as described earlier)

# Preprocessing Books DataFrame
books['Year-Of-Publication'] = pd.to_numeric(books['Year-Of-Publication'], errors='coerce')
books['Year-Of-Publication'].fillna(0, inplace=True)
books['Year-Of-Publication'] = books['Year-Of-Publication'].astype(int)
books.drop(['Image-URL-S', 'Image-URL-M', 'Image-URL-L'], axis=1, inplace=True)

# Preprocessing Ratings DataFrame
ratings['Book-Rating'] = pd.to_numeric(ratings['Book-Rating'], errors='coerce')

# Preprocessing Users DataFrame
users['Age'] = pd.to_numeric(users['Age'], errors='coerce')
users['Age'].fillna(users['Age'].median(), inplace=True)
users['Age'] = users['Age'].astype(int)
users['Age'] = users['Age'].clip(10, 100)

# Surprise requires that the data be in a specific format, with user IDs, item IDs, and ratings
reader = Reader(rating_scale=(0, 10))  # Assuming rating scale is from 0 to 10
data = Dataset.load_from_df(ratings[['User-ID', 'ISBN', 'Book-Rating']], reader)

# Splitting the dataset into training and testing set
trainset, testset = train_test_split(data, test_size=0.25)

# Using SVD (Singular Value Decomposition)
svd = SVD()

# Train the algorithm on the trainset
svd.fit(trainset)

# Predict ratings for the testset
predictions = svd.test(testset)

# Calculate and print the accuracy
accuracy.rmse(predictions)

# Function to make book recommendations for a user
def recommend_books(user_id, n_books=5):
    # Check if user exists in the dataset
    if user_id not in ratings['User-ID'].unique():
        return "User not found."

    # Predict ratings for all books for this user
    user_ratings = []
    for isbn in books['ISBN']:
        user_ratings.append((isbn, svd.predict(user_id, isbn).est))

    # Sort the predictions based on estimated ratings
    user_ratings.sort(key=lambda x: x[1], reverse=True)

    # Return the top N book recommendations
    top_books_isbn = [isbn for isbn, _ in user_ratings[:n_books]]
    return books[books['ISBN'].isin(top_books_isbn)]

# Example usage
user_id = 276725  # Replace with an actual user ID from your dataset
recommended_books = recommend_books(user_id)
print(recommended_books)

RMSE: 3.4990
             ISBN                                         Book-Title  \
3028   1844262553                                               Free   
11220  0615116426  Marching Through Culpeper : A Novel of Culpepe...   
19642  0440222028                                           Simisola   
27556  8445071416                                          El Hobbit   
82880  3423071516                                  Der Kleine Hobbit   

                 Book-Author  Year-Of-Publication           Publisher  
3028            Paul Vincent                 2003  Upfront Publishing  
11220  Virginia Beard Morton                 2000      Edgehill Books  
19642           Ruth Rendell                 1996                Dell  
27556       J. R. R. Tolkien                 1991           Minotauro  
82880       J. R. R. Tolkien                 2002         Distribooks  
