In [10]:
import pandas as pd

# Load your datasets (semicolon separator)
books = pd.read_csv('BX_Books.csv', sep=';', encoding='latin-1', on_bad_lines='skip')
users = pd.read_csv('BX-Users.csv', sep=';', encoding='latin-1', on_bad_lines='skip')
ratings = pd.read_csv('BX-Book-Ratings.csv', sep=';', encoding='latin-1', on_bad_lines='skip')

# Check the shape of each dataframe
print("Books:", books.shape)
print("Users:", users.shape)
print("Ratings:", ratings.shape)

# Filter users who have rated at least 200 books
active_users = ratings['User-ID'].value_counts()
active_users = active_users[active_users >= 200].index
ratings_filtered = ratings[ratings['User-ID'].isin(active_users)]

# Filter books that have at least 100 ratings
popular_books = ratings_filtered['ISBN'].value_counts()
popular_books = popular_books[popular_books >= 100].index
ratings_filtered = ratings_filtered[ratings_filtered['ISBN'].isin(popular_books)]

# Check the new shape after filtering
print("Filtered Ratings:", ratings_filtered.shape)
# Create the pivot table
book_user_matrix = ratings_filtered.pivot_table(index='ISBN', columns='User-ID', values='Book-Rating')

# Fill NaN with 0
book_user_matrix.fillna(0, inplace=True)

# Check the shape
print("Book-User Matrix shape:", book_user_matrix.shape)







Books: (271379, 8)
Users: (278858, 3)
Ratings: (1149780, 3)
Filtered Ratings: (13793, 3)
Book-User Matrix shape: (100, 857)


In [14]:
pip install scikit-learn


Note: you may need to restart the kernel to use updated packages.


In [16]:
from sklearn.neighbors import NearestNeighbors
from scipy.sparse import csr_matrix

book_user_sparse_matrix = csr_matrix(book_user_matrix.values)

model = NearestNeighbors(metric='cosine', algorithm='brute')
model.fit(book_user_sparse_matrix)

print("✅ KNN model trained successfully!")


✅ KNN model trained successfully!


In [21]:
def get_recommends(book_title, model=model, data=book_user_matrix):
    # Get the index of the book
    if book_title not in data.index:
        return [book_title, []]  # book not found

    book_index = data.index.get_loc(book_title)

    # Reshape and query the model
    distances, indices = model.kneighbors(data.iloc[book_index, :].values.reshape(1, -1), n_neighbors=6)

    # Build the result (skip the first one as it's the input book itself)
    recommended_books = []
    for i in range(1, len(distances[0])):
        title = data.index[indices[0][i]]
        dist = distances[0][i]
        recommended_books.append([title, dist])

    return [book_title, recommended_books]


In [23]:
book_user_matrix.index[:20].tolist()


['0060502258',
 '0060928336',
 '0060930535',
 '0060934417',
 '0060976845',
 '0060987103',
 '0061009059',
 '006101351X',
 '014028009X',
 '0140293248',
 '0142001740',
 '0156027321',
 '0312195516',
 '0312278586',
 '0312924585',
 '0312966091',
 '0312983271',
 '0316096199',
 '0316284955',
 '0316601950']

FileNotFoundError: [Errno 2] No such file or directory: '/mnt/data/BX-Users.csv'