In [None]:
import pandas as pd
import numpy as np
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import linear_kernel

# Load datasets
books_df = pd.read_csv('/content/Books.csv', nrows=30000, usecols=['ISBN', 'Book-Title', 'Book-Author', 'Year-Of-Publication', 'Publisher'])
users_df = pd.read_csv('/content/Users.csv', nrows=5000)
ratings_df = pd.read_csv('/content/Ratings.csv', nrows=50000)

# Preprocess data
books_df['Book-Title'] = books_df['Book-Title'].fillna('')
books_df['Book-Author'] = books_df['Book-Author'].fillna('')
books_df['Publisher'] = books_df['Publisher'].fillna('')

# Compute TF-IDF matrix
tfidf = TfidfVectorizer(stop_words='english')
tfidf_matrix = tfidf.fit_transform(books_df['Book-Title'])

# Compute cosine similarity
cosine_sim = linear_kernel(tfidf_matrix, tfidf_matrix)

# Create a mapping between the book title and its index in the dataset
title_to_idx = pd.Series(books_df.index, index=books_df['Book-Title']).drop_duplicates()

def get_recommendations(title, cosine_sim=cosine_sim):
    # Get the index of the book that matches the title
    idx = title_to_idx[title]

    # Get the pairwise similarity scores of all books with that book
    sim_scores = list(enumerate(cosine_sim[idx]))

    # Sort the books based on the similarity scores
    sim_scores = sorted(sim_scores, key=lambda x: x[1][0] if isinstance(x[1], np.ndarray) else x[1], reverse=True)

    # Get the scores of the 5 most similar books
    sim_scores = sim_scores[1:6]

    # Get the book indices
    book_indices = [i[0] for i in sim_scores]

    # Return the top 5 most similar books
    return books_df.iloc[book_indices]



# Test the recommendation system
title = ""
recommendations = get_recommendations(title)
print(recommendations)

FileNotFoundError: ignored