In [None]:
# import libraries (you may add additional imports but you may not have to)
import pandas as pd
import numpy as np
from sklearn.neighbors import NearestNeighbors
from sklearn.preprocessing import StandardScaler

# Load datasets
books = pd.read_csv('books.csv', delimiter=';', encoding='latin-1')
ratings = pd.read_csv('ratings.csv', delimiter=';', encoding='latin-1')

# Display the first few rows of each dataset
books.head(), ratings.head()


In [None]:
# get data files
!wget https://cdn.freecodecamp.org/project-data/books/book-crossings.zip

!unzip book-crossings.zip

books_filename = 'BX-Books.csv'
ratings_filename = 'BX-Book-Ratings.csv'

# Filter users with less than 200 ratings
user_counts = ratings['User-ID'].value_counts()
valid_users = user_counts[user_counts >= 200].index
ratings_filtered = ratings[ratings['User-ID'].isin(valid_users)]

# Filter books with less than 100 ratings
book_counts = ratings_filtered['ISBN'].value_counts()
valid_books = book_counts[book_counts >= 100].index
ratings_filtered = ratings_filtered[ratings_filtered['ISBN'].isin(valid_books)]

# Merge with book titles
data = ratings_filtered.merge(books, on='ISBN')

# Pivot table to create user-item matrix
user_item_matrix = data.pivot_table(index='User-ID', columns='Book-Title', values='Book-Rating')

# Fill NaNs with 0 (for items not rated by a user)
user_item_matrix = user_item_matrix.fillna(0)


In [None]:
# import csv data into dataframes
df_books = pd.read_csv(
    books_filename,
    encoding = "ISO-8859-1",
    sep=";",
    header=0,
    names=['isbn', 'title', 'author'],
    usecols=['isbn', 'title', 'author'],
    dtype={'isbn': 'str', 'title': 'str', 'author': 'str'})

df_ratings = pd.read_csv(
    ratings_filename,
    encoding = "ISO-8859-1",
    sep=";",
    header=0,
    names=['user', 'isbn', 'rating'],
    usecols=['user', 'isbn', 'rating'],
    dtype={'user': 'int32', 'isbn': 'str', 'rating': 'float32'})

In [None]:
# add your code here - consider creating a new cell for each section of code

In [None]:
# function to return recommended books - this will be tested
# Convert the user-item matrix to a NumPy array and scale it
X = user_item_matrix.values
scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)

# Train KNN model
knn = NearestNeighbors(n_neighbors=6, algorithm='brute', metric='cosine')
knn.fit(X_scaled)


In [None]:
def get_recommends(book_title):
    # Check if the book is in the dataset
    if book_title not in user_item_matrix.columns:
        raise ValueError(f"Book '{book_title}' not found in the dataset.")

    # Find the index of the book
    book_index = user_item_matrix.columns.get_loc(book_title)

    # Create a query vector for the given book
    query_vector = user_item_matrix.iloc[:, book_index].values.reshape(1, -1)
    query_vector_scaled = scaler.transform(query_vector)

    # Find the nearest neighbors
    distances, indices = knn.kneighbors(query_vector_scaled)

    # Get the book titles and distances
    similar_books = []
    for i in range(1, 6):  # Skip the first result because it is the book itself
        book_idx = indices[0][i]
        distance = distances[0][i]
        similar_book_title = user_item_matrix.columns[book_idx]
        similar_books.append([similar_book_title, distance])

    return [book_title, similar_books]
