# Collaborative Based - Book Recommendation System (Clustering)

In [77]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

In [66]:
books = pd.read_csv('data/Books.csv')

books.columns

books = books[['ISBN', 'Book-Title', 'Book-Author', 'Year-Of-Publication', 'Publisher',
       'Image-URL-L' ]]


books.rename(columns={
    "Book-Title": "title",
    "Book-Author": "author",
    "Year-Of-Publication": "year",
    "Publisher": "publisher",
    "Image-URL-L": "img_url",
}, inplace=True )

  books = pd.read_csv('data/Books.csv')


In [67]:
users = pd.read_csv('data/Users.csv')

ratings = pd.read_csv('data/Ratings.csv')

ratings.rename(columns={
    "User-ID": "user_id",
    "Book-Rating": "rating"
}, inplace=True )

### Here we will start the analysing part

In [70]:
x = ratings['user_id'].value_counts() > 200
y = x[x].index
ratings = ratings[ratings['user_id'].isin(y)]

In [None]:
ratings_with_books = ratings.merge(books, on='ISBN')

In [71]:
num_rating = ratings_with_books.groupby('title')['rating'].count().reset_index()
num_rating.rename(columns={'rating': 'num_of_rating'}, inplace=True)

### Merge the ratings dataset with the books dataset

In [94]:
final_rating = ratings_with_books.merge(num_rating, on='title')
final_rating = final_rating[final_rating['num_of_rating']>=50]
final_rating.drop_duplicates(['user_id', 'title'], inplace=True)

### Pivot Table (Matrix) and Clustering

In [101]:
book_pivot = final_rating.pivot_table(columns='user_id', index='title', values='rating')
book_pivot.fillna(0, inplace=True)

In [104]:
from scipy.sparse import csr_matrix
book_sparse = csr_matrix(book_pivot)

In [115]:
# use clustering algorithm from sklearn - kmeans 

from sklearn.neighbors import NearestNeighbors
model = NearestNeighbors(algorithm='brute')
model.fit(book_sparse)

distance,suggestion = model.kneighbors(book_pivot.iloc[237,:].values.reshape(1,-1), n_neighbors=6)

In [119]:
for i in range(len(suggestion)):
    print(book_pivot.index[suggestion[i]])

Index(['Harry Potter and the Chamber of Secrets (Book 2)',
       'Harry Potter and the Goblet of Fire (Book 4)',
       'Harry Potter and the Prisoner of Azkaban (Book 3)',
       'Harry Potter and the Sorcerer's Stone (Book 1)', 'Exclusive',
       'The Cradle Will Fall'],
      dtype='object', name='title')


In [121]:
book_name = book_pivot.index

In [125]:
import pickle 

pickle.dump(model, open('artifacts/model.pkl', 'wb'))
pickle.dump(book_name, open('artifacts/book_name.pkl', 'wb'))
pickle.dump(final_rating, open('artifacts/final_rating.pkl', 'wb'))
pickle.dump(book_pivot, open('artifacts/book_pivot.pkl', 'wb'))


## Testing the Recommendation process

In [130]:
def recommend_book(book_name):
    book_id = np.where(book_pivot.index == book_name)[0][0]
    distance,suggestion = model.kneighbors(book_pivot.iloc[book_id,:].values.reshape(1,-1), n_neighbors=6)

    for i in range(len(suggestion)):
        books = book_pivot.index[suggestion[i]]

        for j in books: 
            print(j)

In [131]:
book_name = 'A Bend in the Road'
recommend_book(book_name)

A Bend in the Road
Exclusive
The Cradle Will Fall
No Safe Place
Family Album
Lake Wobegon days
