In [None]:
import pandas as pd
from sklearn.neighbors import NearestNeighbors


In [None]:
# Load the datasets
books = pd.read_csv('BX-Books.csv', sep=';', encoding='latin-1', usecols=[0, 1, 2], names=['ISBN', 'Title', 'Author'], skiprows=1)
ratings = pd.read_csv('BX-Ratings.csv', sep=';', encoding='latin-1', usecols=[0, 1, 2], names=['UserID', 'ISBN', 'Rating'], skiprows=1)

# Display the first few rows of the books and ratings data
print(books.head())
print(ratings.head())


In [None]:
# Count ratings for each user and each book
user_counts = ratings['UserID'].value_counts()
book_counts = ratings['ISBN'].value_counts()

# Filter users with less than 200 ratings
filtered_users = user_counts[user_counts >= 200].index
# Filter books with less than 100 ratings
filtered_books = book_counts[book_counts >= 100].index

# Filter the ratings dataframe
filtered_ratings = ratings[ratings['UserID'].isin(filtered_users) & ratings['ISBN'].isin(filtered_books)]

# Create a pivot table for the ratings
ratings_matrix = filtered_ratings.pivot(index='UserID', columns='ISBN', values='Rating').fillna(0)

# Display the shape of the ratings matrix
print(ratings_matrix.shape)


In [None]:
# Fit the KNN model
knn = NearestNeighbors(n_neighbors=6, metric='cosine')
knn.fit(ratings_matrix.T)  # Transpose to have books as rows


In [None]:
def get_recommends(book_title):
    # Find the index of the book
    book_index = ratings_matrix.columns.get_loc(book_title)

    # Get the distances and indices of the nearest neighbors
    distances, indices = knn.kneighbors(ratings_matrix.T[book_index].values.reshape(1, -1))

    # Prepare the list of recommendations
    recommendations = []
    for i in range(1, len(distances.flatten())):  # Start from 1 to skip the book itself
        recommendations.append([ratings_matrix.columns[indices.flatten()[i]], distances.flatten()[i]])

    return [book_title, recommendations]


In [None]:
# Test the recommendation function
result = get_recommends("The Queen of the Damned (Vampire Chronicles (Paperback))")
print(result)
