## Data Loading

In [1]:
#importing python libraries
import pandas as pd
import numpy as np
import pickle
import matplotlib.pyplot as plt

In [2]:
#loading datasets
df_books = pd.read_csv('Dataset/Books.csv', low_memory=False)
df_ratings = pd.read_csv('Dataset/Ratings.csv')
df_users = pd.read_csv('Dataset/Users.csv')

In [3]:
#set seed for reproducibility
np.random.seed(0)

## Preprocessing on Books dataset

In [4]:
#first five rows of books dataset
df_books.head()

Unnamed: 0,ISBN,Book-Title,Book-Author,Year-Of-Publication,Publisher,Image-URL-S,Image-URL-M,Image-URL-L
0,195153448,Classical Mythology,Mark P. O. Morford,2002,Oxford University Press,http://images.amazon.com/images/P/0195153448.0...,http://images.amazon.com/images/P/0195153448.0...,http://images.amazon.com/images/P/0195153448.0...
1,2005018,Clara Callan,Richard Bruce Wright,2001,HarperFlamingo Canada,http://images.amazon.com/images/P/0002005018.0...,http://images.amazon.com/images/P/0002005018.0...,http://images.amazon.com/images/P/0002005018.0...
2,60973129,Decision in Normandy,Carlo D'Este,1991,HarperPerennial,http://images.amazon.com/images/P/0060973129.0...,http://images.amazon.com/images/P/0060973129.0...,http://images.amazon.com/images/P/0060973129.0...
3,374157065,Flu: The Story of the Great Influenza Pandemic...,Gina Bari Kolata,1999,Farrar Straus Giroux,http://images.amazon.com/images/P/0374157065.0...,http://images.amazon.com/images/P/0374157065.0...,http://images.amazon.com/images/P/0374157065.0...
4,393045218,The Mummies of Urumchi,E. J. W. Barber,1999,W. W. Norton &amp; Company,http://images.amazon.com/images/P/0393045218.0...,http://images.amazon.com/images/P/0393045218.0...,http://images.amazon.com/images/P/0393045218.0...


In [5]:
#number of missing values in books dataset
missing_books_count = df_books.isnull().sum()
missing_books_count

ISBN                   0
Book-Title             0
Book-Author            1
Year-Of-Publication    0
Publisher              2
Image-URL-S            0
Image-URL-M            0
Image-URL-L            3
dtype: int64

In [6]:
#dropping unrequired columns in books dataset
df_books.drop(['Image-URL-S', 'Image-URL-L'], axis = 1, inplace = True)

In [7]:
#uppercasing ISBN
df_books['ISBN'].str.upper()

0         0195153448
1         0002005018
2         0060973129
3         0374157065
4         0393045218
             ...    
271355    0440400988
271356    0525447644
271357    006008667X
271358    0192126040
271359    0767409752
Name: ISBN, Length: 271360, dtype: object

In [8]:
#replacing null author and publisher with other
null_Author = np.where(df_books['Book-Author'].isnull())
null_publisher = np.where(df_books['Publisher'].isnull())

df_books.at[null_Author[0][0],'Book-Author'] = 'Other'
df_books.at[null_publisher[0][0],'Publisher'] = 'Other'
df_books.at[null_publisher[0][1],'Publisher'] = 'Other'

In [9]:
#get all the unique values of year of publication
years = df_books['Year-Of-Publication'].unique().sort()


In [10]:
#checking data for 'DK Publishing Inc'
df_books.loc[df_books['Year-Of-Publication'] == 'DK Publishing Inc',:]

Unnamed: 0,ISBN,Book-Title,Book-Author,Year-Of-Publication,Publisher,Image-URL-M
209538,078946697X,"DK Readers: Creating the X-Men, How It All Beg...",2000,DK Publishing Inc,http://images.amazon.com/images/P/078946697X.0...,http://images.amazon.com/images/P/078946697X.0...
221678,0789466953,"DK Readers: Creating the X-Men, How Comic Book...",2000,DK Publishing Inc,http://images.amazon.com/images/P/0789466953.0...,http://images.amazon.com/images/P/0789466953.0...


In [11]:
#editing data for DK Publishing Inc
df_books.at[209538,'Book-Author'] = 'Other'
df_books.at[209538,'Year-Of-Publication'] = 2000
df_books.at[209538,'Publisher'] = 'DK Publishing Inc'

df_books.at[221678,'Book-Author'] = 'Other'
df_books.at[221678,'Publisher'] = 'DK Publishing Inc'
df_books.at[221678,'Year-Of-Publication'] = 2000

In [12]:
#checking data for 'Gallimard'
df_books.loc[df_books['Year-Of-Publication'] == 'Gallimard',:]

Unnamed: 0,ISBN,Book-Title,Book-Author,Year-Of-Publication,Publisher,Image-URL-M
220731,2070426769,"Peuple du ciel, suivi de 'Les Bergers\"";Jean-M...",2003,Gallimard,http://images.amazon.com/images/P/2070426769.0...,http://images.amazon.com/images/P/2070426769.0...


In [13]:
#editing data for Gallimard
df_books.at[220731 ,'Book-Author'] = 'Other'
df_books.at[220731 ,'Publisher'] = 'Gallimard'
df_books.at[220731 ,'Year-Of-Publication'] = '2003'

In [14]:
#converting year of publication in int data type
df_books['Year-Of-Publication'] = df_books['Year-Of-Publication'].astype(int)

In [15]:
#selecting range which less than 2022
df_books.loc[df_books['Year-Of-Publication'] > 2022, 'Year-Of-Publication'] = 2002

#replacing Invalid years with max year
df_books.loc[df_books['Year-Of-Publication'] == 0, 'Year-Of-Publication'] = 2002

In [16]:
#duplicate rows in books dataset
duplicated_books = df_books.duplicated().sum()

## Preprocessing on Users dataset

In [17]:
#first five rows of users dataset
df_users.head()

Unnamed: 0,User-ID,Location,Age
0,1,"nyc, new york, usa",
1,2,"stockton, california, usa",18.0
2,3,"moscow, yukon territory, russia",
3,4,"porto, v.n.gaia, portugal",17.0
4,5,"farnborough, hants, united kingdom",


In [18]:
#number of missing values in users dataset
missing_users_count = df_users.isnull().sum()

In [19]:
#splitting location into city, state and country
locations_list = df_users.Location.str.split(', ')
location_count = len(locations_list)
cities_list = []
states_list = []
countries_list = []
for location in range(0, location_count):
    if locations_list[location][0] == '' or locations_list[location][0] == 'n/a' or locations_list[location][0] == ' ':
        cities_list.append('Other')
    else: 
        cities_list.append(locations_list[location][0])

    if (len(locations_list[location]) < 2):
        states_list.append('Other')
        countries_list.append('Other')
    
    else: 
        if locations_list[location][1] == '' or locations_list[location][1] == 'n/a' or locations_list[location][1] == ' ':
            states_list.append('Other')
        else: 
            states_list.append(locations_list[location][1])
        
        if (len(locations_list[location]) < 3):
            countries_list.append('Other')
        
        else: 
            if locations_list[location][2] == '' or locations_list[location][2] == 'n/a' or locations_list[location][2] == ' ':
                countries_list.append('Other')
            else: 
                countries_list.append(locations_list[location][2])



In [20]:
#creating location dataframes
df_city = pd.DataFrame(cities_list, columns=['City'])
df_state = pd.DataFrame(states_list, columns = ['State'])
df_country = pd.DataFrame(countries_list, columns =['Country'])

df_location = pd.concat([df_city, df_state, df_country], axis=1)
df_location

Unnamed: 0,City,State,Country
0,nyc,new york,usa
1,stockton,california,usa
2,moscow,yukon territory,russia
3,porto,v.n.gaia,portugal
4,farnborough,hants,united kingdom
...,...,...,...
278853,portland,oregon,usa
278854,tacoma,washington,united kingdom
278855,brampton,ontario,canada
278856,knoxville,tennessee,usa


In [21]:
#converting location to lowercase
df_location['City'] = df_location['City'].str.lower()
df_location['State'] = df_location['State'].str.lower()
df_location['Country'] = df_location['Country'].str.lower()

In [22]:
#adding locations to df_users
df_users = pd.concat([df_users, df_location], axis = 1)
df_users

Unnamed: 0,User-ID,Location,Age,City,State,Country
0,1,"nyc, new york, usa",,nyc,new york,usa
1,2,"stockton, california, usa",18.0,stockton,california,usa
2,3,"moscow, yukon territory, russia",,moscow,yukon territory,russia
3,4,"porto, v.n.gaia, portugal",17.0,porto,v.n.gaia,portugal
4,5,"farnborough, hants, united kingdom",,farnborough,hants,united kingdom
...,...,...,...,...,...,...
278853,278854,"portland, oregon, usa",,portland,oregon,usa
278854,278855,"tacoma, washington, united kingdom",50.0,tacoma,washington,united kingdom
278855,278856,"brampton, ontario, canada",,brampton,ontario,canada
278856,278857,"knoxville, tennessee, usa",,knoxville,tennessee,usa


In [23]:
#dropping location from users dataset
df_users.drop(['Location'], axis = 1, inplace = True)

In [24]:
#age preprocessing
ages = df_users['Age'].unique().sort()
considerable_age = df_users[df_users['Age'] <= 98] 
considerable_age = considerable_age[considerable_age['Age'] >= 8]
average_age = round(considerable_age['Age'].mean())


In [25]:
#replacing ages that don't fall in range with average
df_users.loc[df_users['Age'] > 98, 'Age'] = average_age
df_users.loc[df_users['Age'] < 8, 'Age'] = average_age

In [26]:
#filling missing age with average age 
#changing age data type to int
df_users['Age'] = df_users['Age'].fillna(average_age)

df_users['Age'] = df_users['Age'].astype(int)

In [27]:
#duplicate users in books dataset
duplicated_users = df_users.duplicated().sum()
duplicated_users

0

## Preprocessing on Ratings dataset

In [28]:
#first five rows of ratings dataset
df_ratings.head()

Unnamed: 0,User-ID,ISBN,Book-Rating
0,276725,034545104X,0
1,276726,0155061224,5
2,276727,0446520802,0
3,276729,052165615X,3
4,276729,0521795028,6


In [29]:
#number of missing values in ratings dataset
missing_ratings_count = df_ratings.isnull().sum()
missing_ratings_count

User-ID        0
ISBN           0
Book-Rating    0
dtype: int64

In [30]:
#checking data type of 'Book-Rating'
df_ratings.dtypes

User-ID         int64
ISBN           object
Book-Rating     int64
dtype: object

In [31]:
#uppercasing ISBN
df_books['ISBN'].str.upper()

0         0195153448
1         0002005018
2         0060973129
3         0374157065
4         0393045218
             ...    
271355    0440400988
271356    0525447644
271357    006008667X
271358    0192126040
271359    0767409752
Name: ISBN, Length: 271360, dtype: object

In [32]:
#duplicate ratings in books dataset
duplicated_ratings = df_ratings.duplicated().sum()
duplicated_ratings

0

## Dataset Merging

In [33]:
df_recommendation_dataset = pd.merge(df_books, df_ratings, on="ISBN")
df_recommendation_dataset = pd.merge(df_recommendation_dataset, df_users, on="User-ID")

In [34]:
df_recommendation_dataset

Unnamed: 0,ISBN,Book-Title,Book-Author,Year-Of-Publication,Publisher,Image-URL-M,User-ID,Book-Rating,Age,City,State,Country
0,0195153448,Classical Mythology,Mark P. O. Morford,2002,Oxford University Press,http://images.amazon.com/images/P/0195153448.0...,2,0,18,stockton,california,usa
1,0002005018,Clara Callan,Richard Bruce Wright,2001,HarperFlamingo Canada,http://images.amazon.com/images/P/0002005018.0...,8,5,35,timmins,ontario,canada
2,0060973129,Decision in Normandy,Carlo D'Este,1991,HarperPerennial,http://images.amazon.com/images/P/0060973129.0...,8,0,35,timmins,ontario,canada
3,0374157065,Flu: The Story of the Great Influenza Pandemic...,Gina Bari Kolata,1999,Farrar Straus Giroux,http://images.amazon.com/images/P/0374157065.0...,8,0,35,timmins,ontario,canada
4,0393045218,The Mummies of Urumchi,E. J. W. Barber,1999,W. W. Norton &amp; Company,http://images.amazon.com/images/P/0393045218.0...,8,0,35,timmins,ontario,canada
...,...,...,...,...,...,...,...,...,...,...,...,...
1031131,087004124X,Anthem,Ayn Rand,1966,Caxton Press,http://images.amazon.com/images/P/087004124X.0...,259589,8,19,venice,florida,usa
1031132,158243123X,A Student of Weather,Elizabeth Hay,2001,Counterpoint Press,http://images.amazon.com/images/P/158243123X.0...,259591,8,39,tioga,pennsylvania,usa
1031133,8485900057,Rimas y leyendas (ClÃ¡sicos Fraile ; 3),Gustavo Adolfo BÃ©cquer,1981,Ediciones Fraile,http://images.amazon.com/images/P/8485900057.0...,259614,10,22,madrid,madrid,spain
1031134,0838934854,Anglo-American Cataloguing Rules,Michael Gorman,1998,American Library Association,http://images.amazon.com/images/P/0838934854.0...,259679,6,27,grand prairie,texas,usa


In [35]:
#books with ratings
df_books_with_ratings = df_recommendation_dataset[df_recommendation_dataset['Book-Rating'] != 0]
df_books_with_ratings = df_books_with_ratings.reset_index(drop = True)

In [72]:
bookList = df_books_with_ratings['Book-Title'].drop_duplicates().tolist()
#bookList = bookList.tolist()
bookList

['Clara Callan',
 "Where You'll Find Me: And Other Stories",
 'The Middle Stories',
 'Jane Doe',
 'The Witchfinder (Amos Walker Mystery Series)',
 'More Cunning Than Man: A Social History of Rats and Man',
 'Goodbye to the Buttermilk Sky',
 'The Five People You Meet in Heaven',
 'Life of Pi',
 'The Red Tent (Bestselling Backlist)',
 "The Pilot's Wife : A Novel Tag: Author of the Weight of Water (Oprah's Book Club (Hardcover))",
 'Good in Bed',
 'Five Quarters of the Orange',
 'The Winner',
 "A Patchwork Planet (Ballantine Reader's Circle)",
 'The Horse Whisperer',
 'Open House',
 'Gracie: A Love Story',
 "Lady Chatterley's Lover",
 'Dating Big Bird',
 'An Italian Affair',
 'Prodigy',
 'Joy School',
 'The Sister Circle (Sister Circle)',
 "The Kitchen God's Wife",
 'PLEADING GUILTY',
 'The Testament',
 'Beloved (Plume Contemporary Fiction)',
 "Our Dumb Century: The Onion Presents 100 Years of Headlines from America's Finest News Source",
 'New Vegetarian: Bold and Beautiful Recipes for E

In [37]:
#books without ratings
df_books_without_ratings = df_recommendation_dataset[df_recommendation_dataset['Book-Rating'] == 0]
df_books_without_ratings = df_books_without_ratings.reset_index(drop = True)

## TOP 50 Books

In [38]:
#calculating total number of ratings for each book
df_ratings_count = df_books_with_ratings.groupby('Book-Title').count()['Book-Rating'].reset_index()
df_ratings_count = df_ratings_count.sort_values('Book-Rating', ascending=False)

In [39]:
#calculating average ratings 
df_average_rating = df_books_with_ratings.groupby('Book-Title').mean(numeric_only = True)['Book-Rating'].reset_index()
df_average_rating.rename(columns={'Book-Rating':'Average-Rating'},inplace=True)
df_average_rating = df_average_rating.sort_values('Average-Rating', ascending=False)


In [40]:
#merging total-ratings and average-ratings dataset
df_popular_books = pd.merge(df_ratings_count, df_average_rating, on="Book-Title")

In [41]:
#filter to consider total-ratings atleast more than 200
df_top_books = df_popular_books[df_popular_books['Book-Rating']>=200].sort_values('Average-Rating',ascending=False)

In [42]:
#merge with books for display
df_top_books = df_top_books.merge(df_books,on='Book-Title').drop_duplicates('Book-Title')[['Book-Title','Book-Author','Image-URL-M', 'Book-Rating', 'Average-Rating']]
df_top_books.reset_index(inplace=True)

In [43]:
def get_top_books():
    top_books = pickle.dump(df_top_books, open('top_books.pkl', 'wb'))
    return top_books
get_top_books()

## Books by same author and publisher

In [44]:
#calculating ratings count on all books
df_total_ratings_count = df_recommendation_dataset.groupby('Book-Title').count()['Book-Rating'].reset_index()
df_total_ratings_count = df_total_ratings_count.sort_values('Book-Rating', ascending=False)

In [45]:
#calculating average ratings on all books
df_average_books_rating = df_recommendation_dataset.groupby('Book-Title').mean(numeric_only = True)['Book-Rating'].reset_index()
df_average_books_rating.rename(columns={'Book-Rating':'Average-Rating'},inplace=True)

In [46]:
# merging all the books
df_all_books = df_total_ratings_count.merge(df_average_books_rating,on='Book-Title')

In [47]:
#calculating aggregared rating
df_author_recommendations = df_all_books.sort_values('Average-Rating', ascending=False)
df_author_recommendations["Aggregated-Rating"] = df_author_recommendations['Book-Rating']*df_author_recommendations['Average-Rating']

In [48]:
#merging with books
df_author_recommendations = df_author_recommendations.merge(df_books,on='Book-Title').drop_duplicates('Book-Title')
df_author_recommendations=df_author_recommendations.sort_values('Aggregated-Rating',ascending=False)

In [49]:
pickle.dump(df_author_recommendations, open('author_recommendations.pkl', 'wb'))

#### Create objects for the recommended books

In [50]:
class Recommendations:
    def __init__(self, title, books):
        self.title = title
        self.books = books
        
class Book:
    def __init__(self, name, cover, author):
        self.name = name
        self.cover = cover
        self.author = author

#### Helper method to create books in custom list from dataframe

In [51]:
import json
def create_book_lists_helper(title, books):
    recommendation_books = Recommendations(title, books)
    return recommendation_books

## Recommendation for same author

In [52]:
#Recommend books by same author of the book with bookname as an input  
def recommendation_by_same_author(bookname):
    booksList = []
    bookname = bookname.lower()
    book_entry = df_author_recommendations[df_author_recommendations['Book-Title'].str.lower().str.contains(bookname)]
    if book_entry.empty:
        return create_book_lists_helper("oops! No author recommendations for the input", booksList)
    book_author = book_entry['Book-Author'].iloc[0]
    author_recommendations = df_author_recommendations.loc[df_author_recommendations['Book-Author'] == book_author,:][:5]
    author_recommendations.drop(author_recommendations.index[author_recommendations['Book-Title'] == bookname], inplace = True)
    for book in author_recommendations.values.tolist():
        rBook = Book(book[0], book[8], book[5])
        booksList.append(rBook)
    
    return create_book_lists_helper("Top Books with same author", booksList)
    

## Recommendation by the given author name

In [53]:
def recommendation_by_given_author(authorName):
    booksList = []
    authorName = authorName.lower()
    author_recommendations = df_author_recommendations.loc[df_author_recommendations['Book-Author'].str.lower().str.contains(authorName),:][:5]
    if author_recommendations.empty:
        return create_book_lists_helper("oops! No author recommendations for the input", booksList)
    for book in author_recommendations.values.tolist():
        rBook = Book(book[0], book[8], book[5])
        booksList.append(rBook)
    return create_book_lists_helper("Similar top Books by given author", booksList)

## Recommendation for same publisher

In [54]:
#Recommend books by same publisher of the book with bookname as an input  
def recommendation_by_same_publisher(bookname):
    booksList = []
    bookname = bookname.lower()
    book_entry = df_author_recommendations[df_author_recommendations['Book-Title'].str.lower().str.contains(bookname)]
    if book_entry.empty:
        return create_book_lists_helper("oops! No publisher recommendations for the input", booksList)
    book_publisher = book_entry['Publisher'].iloc[0]
    publisher_recommendations = df_author_recommendations.loc[df_author_recommendations['Publisher'] == book_publisher,:][:5]
    publisher_recommendations.drop(publisher_recommendations.index[publisher_recommendations['Book-Title'] == bookname], inplace = True)

    for book in publisher_recommendations.values.tolist():
        rBook = Book(book[0], book[8], book[5])
        booksList.append(rBook)
    return create_book_lists_helper("Top Books published by same publisher", booksList) 

## Recommendation by the given publisher name

In [55]:
#recommendation by the given publisher name
def recommendation_by_given_publisher(publisherName):
    booksList = []
    publisherName = publisherName.lower()
    author_recommendations = df_author_recommendations.loc[df_author_recommendations['Publisher'].str.lower().str.contains(publisherName),:][:5]
    if author_recommendations.empty:
        return create_book_lists_helper("oops! No publisher recommendations for the input", booksList)
    for book in author_recommendations.values.tolist():
        rBook = Book(book[0], book[8], book[5])
        booksList.append(rBook)
    return create_book_lists_helper("Similar top Books by given publisher", booksList)

## Collaborative Filtering

In [56]:
#fetching experienced users who have rated at least 200 books
collaborative_user_data = df_recommendation_dataset.groupby('User-ID').count()['Book-Rating'] > 200
experienced_users = collaborative_user_data[collaborative_user_data].index

df_filtered_collaborative_data = df_recommendation_dataset[df_recommendation_dataset['User-ID'].isin(experienced_users)]

In [57]:
#fetching books with minimum 50 ratings by users
collaborative_rating_data = df_filtered_collaborative_data.groupby('Book-Title').count()['Book-Rating'] > 50
books_with_experienced_ratings = collaborative_rating_data[collaborative_rating_data].index 

df_final_collaborative_data = df_filtered_collaborative_data[df_filtered_collaborative_data['Book-Title'].isin(books_with_experienced_ratings)]

In [58]:
#creating pivot table
df_pivot_table = df_final_collaborative_data.pivot_table(index='Book-Title', columns='User-ID', values='Book-Rating')
df_pivot_table.fillna(0, inplace=True)

In [59]:
df_pivot_table

User-ID,254,2276,2766,2977,3363,4017,4385,6251,6323,6543,...,271705,273979,274004,274061,274301,274308,275970,277427,277639,278418
Book-Title,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
1984,9.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,10.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
1st to Die: A Novel,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,9.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2nd Chance,0.0,10.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
4 Blondes,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
A Bend in the Road,0.0,0.0,7.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
Year of Wonders,0.0,0.0,0.0,7.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,9.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
You Belong To Me,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
Zen and the Art of Motorcycle Maintenance: An Inquiry into Values,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
Zoya,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


In [60]:
#similarity-scores
from sklearn.metrics.pairwise import cosine_similarity
df_similarity_scores = cosine_similarity(df_pivot_table)

In [61]:
def collaborative_recommendation(book_name):
    booksList = []
    array_size = np.where(df_pivot_table.index== book_name)[0]
    if array_size.size == 0:
        return create_book_lists_helper("oops! No trending recommendations for the input", booksList)
    book_index = np.where(df_pivot_table.index==book_name)[0][0]
    similar_books = sorted(list(enumerate(df_similarity_scores[book_index])),key=lambda x:x[1],reverse=True)[1:6]
    if len(similar_books) == 0:
        return create_book_lists_helper("Top trending similar books", booksList)
   
    for book in similar_books:
        temp_df = df_books[df_books['Book-Title'] == df_pivot_table.index[book[0]]]
        book_title = temp_df.drop_duplicates('Book-Title')['Book-Title'].values
        book_author = temp_df.drop_duplicates('Book-Title')['Book-Author'].values
        cover_image = temp_df.drop_duplicates('Book-Title')['Image-URL-M'].values
        rBook = Book(book_title[0], cover_image[0], book_author[0])
        booksList.append(rBook)
    
    return create_book_lists_helper("Top trending similar books", booksList)

In [62]:
pickle.dump(df_pivot_table, open('pivot_table.pkl', 'wb'))
pickle.dump(df_similarity_scores, open('similarity_scores.pkl', 'wb'))
pickle.dump(df_books,open('books.pkl', 'wb'))

## Books Published Yearly

In [63]:
# get books published in the same year 
def getBooksYearly(year_or_book: int or str):
    booksList = []
    try:
        year_of_publication = int(year_or_book)
        #valid year checking
        if (year_of_publication < 1900):
            return create_book_lists_helper("oops! Please input the valid year between 1900 - 2022", booksList)
        elif (year_of_publication > 2022):
            return create_book_lists_helper("oops! Please input the valid year between 1900 - 2022", booksList)
        
        #filter books in the same year
        same_year_books = df_recommendation_dataset[df_recommendation_dataset['Year-Of-Publication'] == year_of_publication]
    except:
        #check for book name
        same_year_books = df_recommendation_dataset[df_recommendation_dataset['Book-Title'].str.lower().str.contains(year_or_book.lower())]
        
        #no books from the same year
        if (len(same_year_books)== 0):
            return create_book_lists_helper("oops! No yearly recommendations for the input", booksList)        
        
        #year of publication of the same book
        year_of_publication = same_year_books.iloc[0]['Year-Of-Publication']
        same_year_books = df_recommendation_dataset[df_recommendation_dataset['Year-Of-Publication'] == year_of_publication]

    if (len(same_year_books)== 0):
        return create_book_lists_helper("oops! No recommendations for year input", booksList)

    #top 5 rated books
    same_year_books = same_year_books.sort_values(by="Book-Rating", ascending=False)[:5]
    
    #dropping the duplicates
    same_year_books = same_year_books.drop_duplicates(subset=["Book-Title"])
    
    for book in same_year_books.values.tolist():
        rBook = Book(book[1], book[5], book[2])
        booksList.append(rBook)
    return create_book_lists_helper("Trending books in the same year", booksList)

## Books published at the given place

In [64]:
#location as input
def getsamePlaceBooks(place):
    booksList = []
    if place is not None:
        place = place.lower()
    
    places = ((df_recommendation_dataset['City'].str.lower() == place) |
            (df_recommendation_dataset['State'].str.lower() == place) |
            (df_recommendation_dataset['Country'].str.lower() == place))
    
    if places.any():
        same_place_books = df_recommendation_dataset[places]
        #top 5 rated books
        same_place_books = same_place_books.sort_values(by = "Book-Rating", ascending=False)[:5]
        same_place_books = same_place_books.drop_duplicates(subset=["Book-Title"])
        if(len(same_place_books) == 0):
            return create_book_lists_helper("oops! No recommendations for place input", booksList)
        for book in same_place_books.values.tolist():
            rBook = Book(book[1], book[5], book[2])
            booksList.append(rBook)
        return create_book_lists_helper("Trending books at the same location", booksList)
    else:
        return create_book_lists_helper("oops! No recommendations for place input", booksList)

In [65]:
#book name as input
def getsamePlaceBooksByTitle(book_name):
    booksList = []
    if book_name is not None:
        book_name = book_name.lower()

        #check for book name
        same_place_books = df_recommendation_dataset[df_recommendation_dataset['Book-Title'].str.lower().str.contains(book_name.lower())]
        
        #no books from the same year
        if (len(same_place_books) == 0):
            return create_book_lists_helper("oops! No recommendations for place input", booksList)
            
    places = ((df_recommendation_dataset['City'].str.lower() == same_place_books.iloc[0]['City'].lower()) |
                  (df_recommendation_dataset['State'].str.lower() == same_place_books.iloc[0]['State'].lower()) |
                  (df_recommendation_dataset['Country'].str.lower() == same_place_books.iloc[0]['Country'].lower()))
    
    if places.any():
        same_place_books = df_recommendation_dataset[places]
        #top 5 rated books
        same_place_books = same_place_books.sort_values(by = "Book-Rating", ascending=False)[:5]
        same_place_books = same_place_books.drop_duplicates(subset=["Book-Title"])
        if(len(same_place_books) == 0):
            return create_book_lists_helper("oops! No recommendations for place input", booksList)    
        for book in same_place_books.values.tolist():
            rBook = Book(book[1], book[5], book[2])
            booksList.append(rBook)
        return create_book_lists_helper("Trending books at the same location", booksList)
    else:
        return create_book_lists_helper("oops! No recommendations for place input", booksList)    

### Converting result to JSON format for frontend

In [66]:
def results_in_json(finalRecommendations):   
    result = json.dumps(finalRecommendations, default=lambda o: o.__dict__, indent=4)
    return result

In [67]:
# get Final results for all recommendations according to title
def getAllRecommendationsByBookName(name):
    finalRecommendations = []
    results = collaborative_recommendation(name)
    if len(results.books) > 0:
        finalRecommendations.append(collaborative_recommendation(name))
    results = recommendation_by_same_author(name)
    if len(results.books) > 0:
        finalRecommendations.append(recommendation_by_same_author(name))
    results = recommendation_by_same_publisher(name)
    if len(results.books) > 0:
        finalRecommendations.append(recommendation_by_same_publisher(name))
    results = getBooksYearly(name)
    if len(results.books) > 0:
        finalRecommendations.append(getBooksYearly(name))
    results = getsamePlaceBooksByTitle(name)
    if len(results.books) > 0:
        finalRecommendations.append(getsamePlaceBooksByTitle(name))

    if len(finalRecommendations) == 0:
        finalRecommendations.append(create_book_lists_helper("No books found!",[]))
    
    return results_in_json(finalRecommendations)

def getAllRecommendationsByAuthorName(name):
    finalRecommendations = []
    finalRecommendations.append(recommendation_by_given_author(name))
    return results_in_json(finalRecommendations)

def getAllRecommendationsByPublisherName(name):
    finalRecommendations = []
    finalRecommendations.append(recommendation_by_given_publisher(name))
    return results_in_json(finalRecommendations)

def getAllRecommendationsByYear(name):
    finalRecommendations = []
    finalRecommendations.append(getBooksYearly(name))
    return results_in_json(finalRecommendations)

def getAllRecommendationsByLocation(name):
    finalRecommendations = []
    finalRecommendations.append(getsamePlaceBooks(name))
    return results_in_json(finalRecommendations)

In [68]:
getAllRecommendationsByYear("1984")

'[\n    {\n        "title": "Trending books in the same year",\n        "books": [\n            {\n                "name": "Pers\\u00c3\\u00b6nlichkeit und \\u00c3?bertragung (Grundwerk / C.G. Jung)",\n                "cover": "http://images.amazon.com/images/P/3530407836.01.MZZZZZZZ.jpg",\n                "author": "C. G Jung"\n            },\n            {\n                "name": "The Clan of the Cave Bear (Earth\'s Children (Paperback))",\n                "cover": "http://images.amazon.com/images/P/0553250426.01.MZZZZZZZ.jpg",\n                "author": "Jean M. Auel"\n            },\n            {\n                "name": "HEARTBURN",\n                "cover": "http://images.amazon.com/images/P/0671496786.01.MZZZZZZZ.jpg",\n                "author": "Nora Ephron"\n            },\n            {\n                "name": "Where the Red Fern Grows",\n                "cover": "http://images.amazon.com/images/P/0553274295.01.MZZZZZZZ.jpg",\n                "author": "Wilson Rawls"\n    

In [69]:
getAllRecommendationsByBookName("animal farm")

'[\n    {\n        "title": "Top Books with same author",\n        "books": [\n            {\n                "name": "1984",\n                "cover": "http://images.amazon.com/images/P/0451524934.01.MZZZZZZZ.jpg",\n                "author": "George Orwell"\n            },\n            {\n                "name": "Animal Farm",\n                "cover": "http://images.amazon.com/images/P/0451526341.01.MZZZZZZZ.jpg",\n                "author": "George Orwell"\n            },\n            {\n                "name": "Down and Out in Paris and London",\n                "cover": "http://images.amazon.com/images/P/015626224X.01.MZZZZZZZ.jpg",\n                "author": "George Orwell"\n            },\n            {\n                "name": "Rebelion en la Granja",\n                "cover": "http://images.amazon.com/images/P/8423309223.01.MZZZZZZZ.jpg",\n                "author": "George Orwell"\n            },\n            {\n                "name": "Animal Farm: A Fairy Story",\n           