In [1]:


# Step 1: Import libraries
import pandas as pd
from scipy.sparse import csr_matrix
from sklearn.neighbors import NearestNeighbors

# Step 2: Load dataset
ratings = pd.read_csv("https://raw.githubusercontent.com/zygmuntz/goodbooks-10k/master/ratings.csv")
books = pd.read_csv("https://raw.githubusercontent.com/zygmuntz/goodbooks-10k/master/books.csv")

# Step 3: Filter dataset (avoid memory issues in Colab)
min_book_ratings = 100
min_user_ratings = 50

book_counts = ratings['book_id'].value_counts()
user_counts = ratings['user_id'].value_counts()

popular_books = book_counts[book_counts >= min_book_ratings].index
active_users = user_counts[user_counts >= min_user_ratings].index

ratings = ratings[ratings['book_id'].isin(popular_books) & ratings['user_id'].isin(active_users)]

# Step 4: Create pivot table
pivot = ratings.pivot_table(index='user_id', columns='book_id', values='rating').fillna(0)

# Step 5: Train KNN on books × users
item_user_mat = csr_matrix(pivot.T.values)   # transpose → books x users
model = NearestNeighbors(metric='cosine', algorithm='brute')
model.fit(item_user_mat)

# Step 6: Book ID → Title mapping
id_to_title = dict(zip(books['book_id'], books['title']))

# Step 7: Recommendation function
def recommend_book(book_id, n_neighbors=6):
    if book_id not in pivot.columns:
        print("Book not found in dataset.")
        return

    book_idx = list(pivot.columns).index(book_id)
    distances, indices = model.kneighbors(item_user_mat[book_idx], n_neighbors=n_neighbors)

    print(f"\nBooks similar to '{id_to_title.get(book_id, book_id)}':")
    for i in indices.flatten()[1:]:
        neighbor_id = pivot.columns[i]
        print("-", id_to_title.get(neighbor_id, neighbor_id))

# Step 8: Example test
recommend_book(1)   # Example: "Harry Potter and the Half-Blood Prince"

KeyboardInterrupt: 