In [None]:
import pandas as pd
import numpy as np

# Data Reading

In [None]:
users = pd.read_csv('../csv files/user_det.csv')
books = pd.read_csv('../csv files/books.csv')
# ratings = pd.read_csv('../csv files/ratings.csv')
ratings = pd.read_csv('../csv files/updated_ratings.csv')

## User Data

In [None]:
print(users.shape)
users.head()

## Books Data

In [None]:
print(books.shape)
books.head()

## Ratings Data

In [None]:
print(ratings.shape)
ratings

# Data Preprocession

## Convert all the '*User-ID*'s in '*ratings.csv*' with the user ids from the database.
This code is optional.

In [None]:
# updated_ratings = ratings.drop('User-ID', axis=1).copy()
# updated_ratings.insert(loc=0, column='User-ID',
#                        value=np.random.choice(users["user_id"], size=len(updated_ratings)))
# updated_ratings.to_csv('../csv files/updated_ratings.csv', index=False)
updated_ratings = ratings

## Calculate if the dataset has any NULL values.

In [None]:
books.isnull().sum()

In [None]:
ratings.isnull().sum()

In [None]:
users.isnull().sum()

## Calculate the number of duplicate entries in the dataset.

In [None]:
books.duplicated().sum()

In [None]:
ratings.duplicated().sum()

In [None]:
users.duplicated().sum()

# Processing the Ratings

In [None]:
ratings_with_name = updated_ratings.merge(books, on='ISBN')

In [None]:
ratings_book_num = ratings_with_name.groupby('Book-Title').count()['Book-Rating'].reset_index()
ratings_book_num.rename(columns={'Book-Rating':'Number of Ratings'},inplace=True)

In [None]:
ratings_book_num

In [None]:
ratings_book_avg = ratings_with_name.groupby('Book-Title').mean(numeric_only=True)['Book-Rating'].reset_index()
ratings_book_avg.rename(columns={'Book-Rating':'Average Ratings'},inplace=True)

In [None]:
ratings_book_avg

In [None]:
ratings_book = ratings_book_num.merge(ratings_book_avg, on='Book-Title')

In [None]:
ratings_book

# Popularity Based Recommender System

In [None]:
popular_ratings = ratings_book[ratings_book['Number of Ratings']
             > 200].sort_values('Average Ratings', ascending=False).reset_index(drop=True)

In [None]:
print(popular_ratings.shape)
popular_ratings.head()

In [None]:
popular_ratings = popular_ratings.merge(books, on='Book-Title').drop_duplicates('Book-Title')

In [None]:
print(popular_ratings.shape)
popular_ratings.head()

In [None]:
popular_ratings_min = popular_ratings[['ISBN', 'Book-Title', 'Book-Author',
                                       'Image-URL-L', 'Number of Ratings',
                                       'Average Ratings']].reset_index(drop=True)

In [None]:
popular_ratings_min

# Collaborative Filtering Based Recommender System

In [None]:
x = ratings_with_name.groupby('User-ID').count()['Book-Rating'] > 200
educated_users = x[x].index

In [None]:
filtered_rating = ratings_with_name[ratings_with_name['User-ID'].isin(educated_users)]
y = filtered_rating.groupby('Book-Title').count()['Book-Rating']>=50
famous_books = y[y].index

In [None]:
final_ratings = filtered_rating[filtered_rating['Book-Title'].isin(famous_books)]
pt = final_ratings.pivot_table(index='Book-Title',columns='User-ID',values='Book-Rating')

In [None]:
pt.fillna(0,inplace=True)
pt

In [None]:
from sklearn.metrics.pairwise import cosine_similarity
similarity_scores = cosine_similarity(pt)
similarity_scores.shape

In [None]:
def recommend(book_name):
    index = np.where(pt.index==book_name)[0][0]
    similar_items = sorted(list(enumerate(similarity_scores[index])),key=lambda x:x[1],reverse=True)[1:4]
    
    data = []
    for i in similar_items:
        item = []
        temp_df = books[books['Book-Title'] == pt.index[i[0]]]
        item.extend(list(temp_df.drop_duplicates('Book-Title')['Book-Title'].values))
        item.extend(list(temp_df.drop_duplicates('Book-Title')['Book-Author'].values))
        item.extend(list(temp_df.drop_duplicates('Book-Title')['Image-URL-L'].values))
        
        data.append(item)
    
    return data

In [None]:
recommend('The Da Vinci Code')

# Saving the Objects using *pickle*

In [None]:
folderNameForPickleFiles = '../pickle files/'

In [None]:
popular_ratings_min.to_pickle(folderNameForPickleFiles + 'popular_ratings.pkl')

In [None]:
import pickle

In [None]:
pickle.dump(pt,open(folderNameForPickleFiles + 'pt.pkl','wb'))
pickle.dump(similarity_scores,open(folderNameForPickleFiles + 'similarity_scores.pkl','wb'))

In [None]:
books.drop_duplicates(['Book-Title'])

In [None]:
books_with_ratings = ratings_book.merge(books, on='Book-Title').drop_duplicates('Book-Title')

In [None]:
books_with_ratings = books_with_ratings[['ISBN', 'Book-Title', 'Book-Author',
                                       'Year-Of-Publication', 'Publisher', 'Image-URL-L',
                                       'Number of Ratings', 'Average Ratings']]

In [None]:
books_with_ratings.to_pickle(folderNameForPickleFiles + 'book_with_ratings.pkl')