In [None]:
import pandas as pd
import numpy as np

In [None]:
ratings_data = pd.read_csv("/content/BX-Book-Ratings.csv",sep=";",error_bad_lines=False,encoding="latin-1")
ratings_data_copy = ratings_data.copy()

users_data = pd.read_csv("/content/BX-Users.csv",encoding="latin-1",sep=";",error_bad_lines=False)
users_data_copy = users_data.copy()

books_data = pd.read_csv("/content/BX-Books.csv",sep=";",encoding="latin-1",error_bad_lines=False,engine="python")
books_data_copy = books_data.copy()

In [None]:
#Keeping the required columns.
books_data = books_data[["ISBN","Book-Title","Book-Author","Year-Of-Publication","Publisher"]]

In [None]:
#Changing the column names for better.
books_data.rename(columns={"Book-Title":"title","Book-Author":"author","Year-Of-Publication":"year","Publisher":"publisher"},inplace=True)
users_data.rename(columns={"User-ID":"user_id","Location":"location","Age":"age"},inplace=True)
ratings_data.rename(columns={"User-ID":"user_id","Book-Rating":"rating"},inplace=True)

In [None]:
#Find out those users who have rated the books
ratings_data["user_id"].value_counts().shape
#105283 users have actually rated the books

In [None]:
#Now only select those users who have given more than 180 ratings
#This will improve our accuracy
a = ratings_data["user_id"].value_counts()>=180
b = a[a].index

In [None]:
#Selecting only those rows of the user id
ratings_data = ratings_data[ratings_data["user_id"].isin(b)]

In [None]:
#Merging rating_data and users_data based on ISBN column
rated_books_data = ratings_data.merge(books_data,on="ISBN")

In [None]:
#Calculate how many books have rated how many times
no_of_ratings_data = rated_books_data.groupby("title")["rating"].count().reset_index()
no_of_ratings_data.rename(columns={"rating":"number_of_rating"},inplace=True)

In [None]:
#Merging no_of_ratings_data with rated_books_data
final_rating_data = rated_books_data.merge(no_of_ratings_data,on="title")

In [None]:
#Selecting books with 50 or more number of rating
final_rating_data = final_rating_data[final_rating_data["number_of_rating"]>=50]

In [None]:
#drop any duplicates from final_rating_data
final_rating_data.drop_duplicates(["user_id","title"],inplace=True)

In [None]:
final_rating_data.shape

(66083, 8)

In [None]:
final_rating_data.shape

(59899, 8)

In [None]:
#Creating pivot table with col=user,index=books,values=rating
rating_pivot_table = final_rating_data.pivot_table(columns="user_id",index="title",values="rating")
rating_pivot_table.fillna(0,inplace=True)

In [None]:
#We will cluster 
#so the 0 wil be less important for our algorigthm so we will convert this pivot table to sparse matrix
#the sparse matrix will be used for building model
from scipy.sparse import csr_matrix
sparse_matrix = csr_matrix(rating_pivot_table)


**Model Building**

In [None]:
from sklearn.neighbors import NearestNeighbors
model = NearestNeighbors(algorithm='brute')

In [None]:
model.fit(sparse_matrix)

NearestNeighbors(algorithm='brute')

In [None]:
#claculating distance of each book with every other books
distances,suggestions=model.kneighbors(rating_pivot_table.iloc[0,:].values.reshape(1,-1),n_neighbors=6)

In [None]:
suggestions

In [None]:
def recommend_books(book_name):
  book_index = np.where(rating_pivot_table.index==book_name)[0][0]
  distances , suggestions = model.kneighbors(rating_pivot_table.iloc[book_index,:].values.reshape(1,-1),n_neighbors=6)
  suggestions = np.ravel(suggestions, order='C') #2d to 1d array
  for i in suggestions:
    print(rating_pivot_table.index[i])


In [None]:
recommend_books("1984")

1984
No Safe Place
A Civil Action
Malice
Abduction
Master of the Game


In [None]:
recommend_books("Animal Farm")

Animal Farm
Exclusive
Jacob Have I Loved
Second Nature
The Playboy
Personal Injuries


**Here our recommendation model is complete**

**Making book image dataframe**

In [None]:
books_image_data = books_data_copy[["Book-Title","Image-URL-M"]]
books_image_data.rename(columns={"Book-Title":"title","Image-URL-M":"image"},inplace=True)
books_image_data = books_image_data[books_image_data["title"].isin(rating_pivot_table.index)]
books_image_data.drop_duplicates(subset=["title"],keep='first',inplace=True)

In [None]:
books_image_data

**Pickling Our Model**

In [None]:
import pickle

In [None]:
pickle.dump(rating_pivot_table,open("rating_table.pkl","wb"))

In [None]:
pickle.dump(books_image_data,open("books_image_data.pkl","wb"))

**Webpage Related**

In [None]:
import pandas as pd
import numpy as np
import pickle


In [None]:
rating_table = pickle.load(open("/content/rating_table.pkl","rb"))
books_image_data = pickle.load(open("/content/books_image_data.pkl","rb"))

In [None]:
from scipy.sparse import csr_matrix
new_sparse_matrix = csr_matrix(rating_table)

In [None]:
from sklearn.neighbors import NearestNeighbors
model2 = NearestNeighbors(algorithm='brute')

In [None]:
model2.fit(new_sparse_matrix)

NearestNeighbors(algorithm='brute')

In [None]:
#Function for recommending movies
def rec(book_name):
  recommended_books = []
  image_url = []
  book_index = np.where(rating_table.index==book_name)[0][0]
  distances , suggestions = model2.kneighbors(rating_table.iloc[book_index,:].values.reshape(1,-1),n_neighbors=6)
  suggestions = np.ravel(suggestions, order='C') #2d to 1d array
  for i in suggestions:
    recommended_books.append(rating_table.index[i])
  
  for i in recommended_books:
    image_url.append(books_image_data[books_image_data["title"] == i ].image.to_string(index=False))

    
  return recommended_books,image_url



In [None]:

#Function to get the images
def image(book_list):
  image_url = []
  for i in book_list:
    image_url.append(books_image_data[books_image_data["title"] == i ].image.to_string(index=False))
  return image_url




In [None]:
rec("Animal Farm")[1]

In [None]:
books_image_data[books_image_data.title=="Exclusive"]

Unnamed: 0,title,image
19105,Exclusive,http://images.amazon.com/images/P/0446604232.0...
