In [1]:
import pandas as pd
from sklearn.neighbors import NearestNeighbors
from difflib import get_close_matches

In [2]:
books = pd.read_csv('books.csv')
ratings = pd.read_csv('booksratings.csv')

In [3]:
data = ratings.merge(books, on='book_id')

In [4]:
data.head()

Unnamed: 0,book_id,user_id,rating,id,best_book_id,work_id,books_count,isbn,isbn13,authors,...,ratings_count,work_ratings_count,work_text_reviews_count,ratings_1,ratings_2,ratings_3,ratings_4,ratings_5,image_url,small_image_url
0,1,314,5,27,1,41335427,275,439785960,9780440000000.0,"J.K. Rowling, Mary GrandPré",...,1678823,1785676,27520,7308,21516,136333,459028,1161491,https://images.gr-assets.com/books/1361039191m...,https://images.gr-assets.com/books/1361039191s...
1,1,439,3,27,1,41335427,275,439785960,9780440000000.0,"J.K. Rowling, Mary GrandPré",...,1678823,1785676,27520,7308,21516,136333,459028,1161491,https://images.gr-assets.com/books/1361039191m...,https://images.gr-assets.com/books/1361039191s...
2,1,588,5,27,1,41335427,275,439785960,9780440000000.0,"J.K. Rowling, Mary GrandPré",...,1678823,1785676,27520,7308,21516,136333,459028,1161491,https://images.gr-assets.com/books/1361039191m...,https://images.gr-assets.com/books/1361039191s...
3,1,1169,4,27,1,41335427,275,439785960,9780440000000.0,"J.K. Rowling, Mary GrandPré",...,1678823,1785676,27520,7308,21516,136333,459028,1161491,https://images.gr-assets.com/books/1361039191m...,https://images.gr-assets.com/books/1361039191s...
4,1,1185,4,27,1,41335427,275,439785960,9780440000000.0,"J.K. Rowling, Mary GrandPré",...,1678823,1785676,27520,7308,21516,136333,459028,1161491,https://images.gr-assets.com/books/1361039191m...,https://images.gr-assets.com/books/1361039191s...


In [5]:
data.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 79701 entries, 0 to 79700
Data columns (total 25 columns):
 #   Column                     Non-Null Count  Dtype  
---  ------                     --------------  -----  
 0   book_id                    79701 non-null  int64  
 1   user_id                    79701 non-null  int64  
 2   rating                     79701 non-null  int64  
 3   id                         79701 non-null  int64  
 4   best_book_id               79701 non-null  int64  
 5   work_id                    79701 non-null  int64  
 6   books_count                79701 non-null  int64  
 7   isbn                       78822 non-null  object 
 8   isbn13                     79303 non-null  float64
 9   authors                    79701 non-null  object 
 10  original_publication_year  79601 non-null  float64
 11  original_title             77920 non-null  object 
 12  title                      79701 non-null  object 
 13  language_code              68562 non-null  obj

In [6]:
(data.isnull().sum() / len(data)) * 100

book_id                       0.000000
user_id                       0.000000
rating                        0.000000
id                            0.000000
best_book_id                  0.000000
work_id                       0.000000
books_count                   0.000000
isbn                          1.102872
isbn13                        0.499366
authors                       0.000000
original_publication_year     0.125469
original_title                2.234602
title                         0.000000
language_code                13.975985
average_rating                0.000000
ratings_count                 0.000000
work_ratings_count            0.000000
work_text_reviews_count       0.000000
ratings_1                     0.000000
ratings_2                     0.000000
ratings_3                     0.000000
ratings_4                     0.000000
ratings_5                     0.000000
image_url                     0.000000
small_image_url               0.000000
dtype: float64

In [7]:
data.duplicated().sum()

118

In [8]:
data = data.drop_duplicates()

In [9]:
data.duplicated().sum()

0

In [10]:
pivot_table = data.pivot_table(index='title', columns='user_id', values='rating').fillna(0)

In [11]:
print("Pivot Table Shape:", pivot_table.shape)

Pivot Table Shape: (812, 28906)


In [12]:
model = NearestNeighbors(metric='cosine', algorithm='brute').fit(pivot_table.values)

In [13]:
def recommend_books(book_name, n_recommendations=5):
    book_name = book_name.lower()
    matches = [title for title in pivot_table.index if book_name in title.lower()] or \
             get_close_matches(book_name, pivot_table.index.str.lower(), n=1, cutoff=0.5)
    if not matches:
        print(f"No book found for '{book_name}'")
        return
    best_match_title = matches[0]
    book_idx = pivot_table.index.get_loc(best_match_title)
    distances, indices = model.kneighbors(pivot_table.iloc[book_idx].values.reshape(1, -1), n_neighbors=n_recommendations+1)
    print(f"Because you like '{best_match_title}', you might also like:\n")
    for i in range(1, len(distances.flatten())):
        print(f"{i}. {pivot_table.index[indices.flatten()[i]]}")
recommend_books('The First World War')

Because you like 'The First World War', you might also like:

1. The Pilot's Wife
2. Eats, Shoots & Leaves: The Zero Tolerance Approach to Punctuation
3. Built to Last: Successful Habits of Visionary Companies
4. The Aleph and Other Stories 
5. Vampire Mountain (Cirque Du Freak, #4)
