In [11]:
import numpy as np
import pandas as pd
import difflib
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import cosine_similarity
from scipy.sparse import csr_matrix

book_data = pd.read_csv('Book_Details.csv')
book_data = book_data.reset_index()
pd.set_option('display.max_columns', None)
pd.set_option('display.max_rows', None)
#prints data
# print(book_data.head())


# number of rows and columns in the data frame
# print(book_data.shape)

# selecting the relevant features for recommendation
selected_features = ['book_title','book_details','publication_info','author','num_pages']
# print(selected_features)

# replacing the null valuess with null string
for feature in selected_features:
  book_data[feature] = book_data[feature].fillna('')

# combining all the 5 selected features
combined_features = book_data['book_title']+' '+book_data['book_details']+' '+book_data['publication_info']+' '+book_data['author']+' '+book_data['num_pages']
# print(combined_features)
#
# converting the text data to feature vectors
vectorizer = TfidfVectorizer()
feature_vectors = vectorizer.fit_transform(combined_features)

# getting the book name from the user
book_name = "Harry Potter" #input(' Enter your favorite book: ')

# creating a list with all the book names given in the dataset
list_of_all_titles = book_data['book_title'].tolist()

# finding the close match for the book name given by the user
find_close_match = difflib.get_close_matches(book_name, list_of_all_titles)

close_match = find_close_match[0]

# finding the index of the book with title
index_of_the_book = book_data[book_data.book_title == close_match]['index'].values[0]


In [36]:
book_data.head()

Unnamed: 0.1,index,Unnamed: 0,book_id,cover_image_uri,book_title,book_details,format,publication_info,authorlink,author,num_pages,genres,num_ratings,num_reviews,average_rating,rating_distribution
0,0,0,1,https://images-na.ssl-images-amazon.com/images...,Harry Potter and the Half-Blood Prince,"It is the middle of the summer, but there is a...","['652 pages, Paperback']","['First published July 16, 2005']",https://www.goodreads.com/author/show/1077326....,J.K. Rowling,['652'],"['Fantasy', 'Young Adult', 'Fiction', 'Magic',...",3292516,58398,4.58,"{'5': '2,244,154', '4': '775,028', '3': '219,8..."
1,1,1,2,https://images-na.ssl-images-amazon.com/images...,Harry Potter and the Order of the Phoenix,Harry Potter is about to start his fifth year ...,"['912 pages, Paperback']","['First published June 21, 2003']",https://www.goodreads.com/author/show/1077326....,J.K. Rowling,['912'],"['Young Adult', 'Fiction', 'Magic', 'Childrens...",3401709,64300,4.5,"{'5': '2,178,760', '4': '856,178', '3': '293,2..."
2,2,2,3,https://images-na.ssl-images-amazon.com/images...,Harry Potter and the Sorcerer's Stone,Harry Potter has no idea how famous he is. Tha...,"['309 pages, Hardcover']","['First published June 26, 1997']",https://www.goodreads.com/author/show/1077326....,J.K. Rowling,['309'],"['Fantasy', 'Fiction', 'Young Adult', 'Magic',...",10116247,163493,4.47,"{'5': '6,544,542', '4': '2,348,390', '3': '856..."
3,3,3,5,https://images-na.ssl-images-amazon.com/images...,Harry Potter and the Prisoner of Azkaban,"Harry Potter, along with his best friends, Ron...","['435 pages, Mass Market Paperback']","['First published July 8, 1999']",https://www.goodreads.com/author/show/1077326....,J.K. Rowling,['435'],"['Fantasy', 'Fiction', 'Young Adult', 'Magic',...",4215031,84959,4.58,"{'5': '2,892,322', '4': '970,190', '3': '287,7..."
4,4,4,6,https://images-na.ssl-images-amazon.com/images...,Harry Potter and the Goblet of Fire,It is the summer holidays and soon Harry Potte...,"['734 pages, Paperback']","['First published July 8, 2000']",https://www.goodreads.com/author/show/1077326....,J.K. Rowling,['734'],"['Fantasy', 'Young Adult', 'Fiction', 'Magic',...",3718209,69961,4.57,"{'5': '2,500,070', '4': '899,496', '3': '259,7..."


In [46]:
similarity_list = cosine_similarity(feature_vectors, feature_vectors[index_of_the_book])
similarity = pd.DataFrame(similarity_list)
similarity = similarity.rename(columns={similarity.columns[0]: "score"})
similarity = similarity.sort_values(by="score", ascending=False)
similar_indices = similarity.index[:10]
for i in similar_indices:
    print(book_data[book_data["index"] == i]["book_title"].values[0])

Harry Potter Collection
Harry Potter Boxed Set, Books 1-5
Harry Potter and the Chamber of Secrets
The Harry Potter Collection 1-4
Harry Potter and the Prisoner of Azkaban
Harry Potter and the Order of the Phoenix
Harry Potter and the Goblet of Fire
Harry, a History: The True Story of a Boy Wizard, His Fans, and Life Inside the Harry Potter Phenomenon
Harry, a History: The True Story of a Boy Wizard, His Fans, and Life Inside the Harry Potter Phenomenon
Harry Potter and the Sorcerer’s Stone


In [30]:
# getting the similarity scores using cosine similarity
similarity = cosine_similarity(feature_vectors)

# getting a list of similar books
similarity_score = list(enumerate(similarity[index_of_the_book]))
print(similarity_score)

[(0, 0.3201436418218123), (1, 0.39710675712391474), (2, 0.32659889608497167), (3, 0.40183662729963093), (4, 0.3852793027788509), (5, 0.4551019090514854), (6, 1.0000000000000002), (7, 0.08100570014901377), (8, 0.10390612202192569), (9, 0.06423823798720148), (10, 0.060555329612234046), (11, 0.05873295361962013), (12, 0.07170806751057172), (13, 0.07474624440446377), (14, 0.07216479645837823), (15, 0.07119413882796015), (16, 0.14942638696774502), (17, 0.09006132207851725), (18, 0.0803511546708852), (19, 0.10948589266956933), (20, 0.03494850738703805), (21, 0.03664196710563861), (22, 0.05757209645198546), (23, 0.04984054089814209), (24, 0.03999795154017355), (25, 0.026373125632028552), (26, 0.03479914132816629), (27, 0.0922501570434808), (28, 0.0922501570434808), (29, 0.07030754998791829), (30, 0.09456813328379078), (31, 0.06843153941343862), (32, 0.059563841929383375), (33, 0.036760219341670945), (34, 0.059178316481209386), (35, 0.08333400198450004), (36, 0.03351069693578945), (37, 0.04906

In [28]:

# sorting the books based on their similarity score
sorted_similar_books = sorted(similarity_score, key = lambda x:x[1], reverse = True)
print(sorted_similar_books)

# print the name of similar books based on the index
display('Books suggested for you : \n')

i = 1

for book in sorted_similar_books:
  index = book[0]
  title_from_index = book_data[book_data.index==index]['book_title'].values[0]
  publication_info_from_index = book_data[book_data.index==index]['publication_info'].values[0]  # Get the publication info
  if i < 31:
    display(f"{i}. {title_from_index} ({publication_info_from_index})")  # Print title and publication info
    i += 1

[(6, 1.0000000000000002), (5, 0.4551019090514854), (1402, 0.4374753574555081), (4152, 0.4258735891033853), (3, 0.40183662729963093), (1, 0.39710675712391474), (4, 0.3852793027788509), (9807, 0.3534120482107904), (9808, 0.3534120482107904), (15097, 0.3409226504024279), (15928, 0.3357816803833204), (2, 0.32659889608497167), (8517, 0.32453549334962967), (11791, 0.3214628704093177), (0, 0.3201436418218123), (7963, 0.3154442841845812), (10021, 0.31394882360051024), (14238, 0.29577127176558515), (4737, 0.29428575623041536), (11970, 0.270674721738903), (9427, 0.2405475906074781), (6217, 0.2332922875654793), (15371, 0.23209456643063112), (13570, 0.22016124823487718), (14619, 0.22006856880347817), (6857, 0.21046840564619676), (12111, 0.20930344572317963), (2850, 0.20547883106294285), (14357, 0.2031685164676008), (13753, 0.20294067243598254), (3950, 0.20261466579387832), (5302, 0.19937766785039085), (3954, 0.19900356222974905), (2253, 0.186877224432718), (1535, 0.18679168719597544), (3549, 0.185

'Books suggested for you : \n'

"1. Harry Potter Collection (['First published January 1, 2005'])"

"2. Harry Potter Boxed Set, Books 1-5 (['First published October 1, 2003'])"

"3. Harry Potter and the Chamber of Secrets (['First published July 2, 1998'])"

"4. The Harry Potter Collection 1-4 (['First published January 1, 1999'])"

"5. Harry Potter and the Prisoner of Azkaban (['First published July 8, 1999'])"

"6. Harry Potter and the Order of the Phoenix (['First published June 21, 2003'])"

"7. Harry Potter and the Goblet of Fire (['First published July 8, 2000'])"

"8. Harry, a History: The True Story of a Boy Wizard, His Fans, and Life Inside the Harry Potter Phenomenon (['First published November 4, 2008'])"

"9. Harry, a History: The True Story of a Boy Wizard, His Fans, and Life Inside the Harry Potter Phenomenon (['First published November 4, 2008'])"

"10. Harry Potter and the Sorcerer’s Stone (['First published June 26, 1997'])"

"11. Harry Potter and the Cursed Child. Parts One and Two (['First published July 31, 2016'])"

"12. Harry Potter and the Sorcerer's Stone (['First published June 26, 1997'])"

"13. Harry Potter and the Order of the Phoenix (['First published July 1, 2003'])"

"14. Harry Potter: The Prequel (['First published June 11, 2008'])"

"15. Harry Potter and the Half-Blood Prince (['First published July 16, 2005'])"

"16. Harry Potter Series Box Set (['First published October 1, 2007'])"

"17. How Harry Cast His Spell: The Meaning behind the Mania for J. K. Rowling's Bestselling Books (['First published September 1, 2008'])"

"18. Harry Potter and the Cursed Child: Parts One and Two (['First published July 31, 2016'])"

"19. Harry Potter and the Deathly Hallows (['First published July 21, 2007'])"

"20. Harry Potter and the Methods of Rationality (['First published March 14, 2015'])"

"21. James Potter and the Hall of Elders' Crossing (['First published January 1, 2007'])"

"22. The Heaven Tree Trilogy (['First published January 1, 1960'])"

"23. Manacled (['First published April 27, 2018'])"

"24. The First Fifteen Lives of Harry August (['First published April 8, 2014'])"

"25. The First Fifteen Lives of Harry August (['First published April 8, 2014'])"

"26. Into the Blue (['First published January 1, 1990'])"

"27. Only Time Will Tell (['First published May 12, 2011'])"

"28. The Concrete Blonde (['First published January 1, 1994'])"

"29. The Six of Crows Duology Boxed Set (['First published September 27, 2016'])"

"30. The Mortal Instruments, the Complete Collection (Boxed Set): City of Bones; City of Ashes; City of Glass; City of Fallen Angels; City of Lost Souls; City of Heavenly Fire (['First published November 11, 2014'])"