In [1]:
import numpy as np
import pandas as pd
import difflib
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import cosine_similarity

In [2]:
with open('book.csv') as f:
    print(f)

<_io.TextIOWrapper name='book.csv' mode='r' encoding='cp1252'>


In [3]:
book= pd.read_csv('book.csv',encoding='cp1252')
book

Unnamed: 0.1,Unnamed: 0,User.ID,Book.Title,Book.Rating
0,1,276726,Classical Mythology,5
1,2,276729,Clara Callan,3
2,3,276729,Decision in Normandy,6
3,4,276736,Flu: The Story of the Great Influenza Pandemic...,8
4,5,276737,The Mummies of Urumchi,6
...,...,...,...,...
9995,9996,162121,American Fried: Adventures of a Happy Eater.,7
9996,9997,162121,Cannibal In Manhattan,9
9997,9998,162121,How to Flirt: A Practical Guide,7
9998,9999,162121,Twilight,8


In [4]:
book.rename(columns = {'Unnamed: 0':'Index','User.ID':'User_ID','Book.Title':'Book_Title','Book.Rating':'Book_Rating'}, inplace = True)
book

Unnamed: 0,Index,User_ID,Book_Title,Book_Rating
0,1,276726,Classical Mythology,5
1,2,276729,Clara Callan,3
2,3,276729,Decision in Normandy,6
3,4,276736,Flu: The Story of the Great Influenza Pandemic...,8
4,5,276737,The Mummies of Urumchi,6
...,...,...,...,...
9995,9996,162121,American Fried: Adventures of a Happy Eater.,7
9996,9997,162121,Cannibal In Manhattan,9
9997,9998,162121,How to Flirt: A Practical Guide,7
9998,9999,162121,Twilight,8


In [5]:
book.isnull().sum()

Index          0
User_ID        0
Book_Title     0
Book_Rating    0
dtype: int64

In [6]:
combine_features = book['User_ID'].astype(str) + ' ' + book['Book_Title'].astype(str) + ' ' + book['Book_Rating'].astype(str)

In [7]:
combine_features

0                            276726 Classical Mythology 5
1                                   276729 Clara Callan 3
2                           276729 Decision in Normandy 6
3       276736 Flu: The Story of the Great Influenza P...
4                         276737 The Mummies of Urumchi 6
                              ...                        
9995    162121 American Fried: Adventures of a Happy E...
9996                       162121 Cannibal In Manhattan 9
9997             162121 How to Flirt: A Practical Guide 7
9998                                    162121 Twilight 8
9999                162129 Kids Say the Darndest Things 6
Length: 10000, dtype: object

In [8]:
vectorizer = TfidfVectorizer()

In [9]:
feature_vectors = vectorizer.fit_transform(combine_features)

In [10]:
feature_vectors

<10000x13742 sparse matrix of type '<class 'numpy.float64'>'
	with 63985 stored elements in Compressed Sparse Row format>

In [11]:
print(feature_vectors)

  (0, 9163)	0.548281100918183
  (0, 4375)	0.5913492345374032
  (0, 1046)	0.5913492345374032
  (1, 3927)	0.5855458280732825
  (1, 4365)	0.5855458280732825
  (1, 1047)	0.5605998273741687
  (2, 9369)	0.5239517750632293
  (2, 7445)	0.23028749567355614
  (2, 5051)	0.592325890408055
  (2, 1047)	0.5670910388084065
  (3, 7650)	0.1895095585188876
  (3, 4109)	0.31334066490799
  (3, 12418)	0.20028248619446423
  (3, 13169)	0.31334066490799
  (3, 6324)	0.14173737628734467
  (3, 11250)	0.23592564026848833
  (3, 2864)	0.10728609924169452
  (3, 695)	0.290519974804029
  (3, 9695)	0.31334066490799
  (3, 7496)	0.31334066490799
  (3, 6770)	0.19314896141907897
  (3, 9485)	0.1686165141327128
  (3, 12002)	0.17163665393597424
  (3, 12420)	0.2608046658141504
  (3, 6284)	0.31334066490799
  :	:
  (9995, 584)	0.4232371080898347
  (9995, 5567)	0.4876750562144335
  (9995, 6429)	0.4722779954216829
  (9995, 6951)	0.39483730072497153
  (9995, 2631)	0.33499908592765626
  (9995, 2822)	0.27035568227478113
  (9995, 9485)	

In [12]:
similarity = cosine_similarity(feature_vectors)

In [13]:
print(similarity)

[[1.         0.         0.         ... 0.         0.         0.        ]
 [0.         1.         0.31791114 ... 0.         0.         0.        ]
 [0.         0.31791114 1.         ... 0.         0.         0.        ]
 ...
 [0.         0.         0.         ... 1.         0.33378877 0.        ]
 [0.         0.         0.         ... 0.33378877 1.         0.        ]
 [0.         0.         0.         ... 0.         0.         1.        ]]


In [14]:
similarity.shape

(10000, 10000)

In [15]:
book_name = input('Enter your favourite book name : ')

Enter your favourite book name : Twilight


In [16]:
list_of_all_titles = book['Book_Title'].tolist()
print(list_of_all_titles)



In [17]:
find_close_match = difflib.get_close_matches(book_name, list_of_all_titles)
print(find_close_match)

['Twilight', 'Flight', 'Edge of Twilight']


In [18]:
close_match = find_close_match[0]
print(close_match)

Twilight


In [19]:
index_of_the_book = book[book.Book_Title == close_match]['Index']
print(index_of_the_book)

9998    9999
Name: Index, dtype: int64


In [20]:
similarity_score = list(enumerate(similarity[index_of_the_book]))
print(similarity_score)

[(0, array([0., 0., 0., ..., 0., 0., 1.]))]


In [21]:
len(similarity_score)

1

In [22]:
sorted_similar_books = sorted(similarity_score,key = lambda x:x[1], reverse = True)
sorted_similar_books

[(0, array([0., 0., 0., ..., 0., 0., 1.]))]

In [24]:
print('Books you might like are: \n')

i = 1
for books in sorted_similar_books:
    index = books[0]
    title_from_index = book[book.index == index]['Book_Title'].values[0]
    if (i<30):
        print(i, '.',title_from_index)
        i+=1

Books you might like are: 

1 . Classical Mythology


In [25]:
book_name = input('Enter your favourite book name : ')

list_of_all_titles = book['Book_Title'].tolist()

find_close_match = difflib.get_close_matches(book_name, list_of_all_titles)

close_match = find_close_match[0]

index_of_the_book = book[book.Book_Title == close_match]['Index'].values[0]

similarity_score = list (enumerate(similarity[index_of_the_book]))

sorted_similar_book = sorted(similarity_score, key = lambda x:x[1], reverse=True)

print('Books you might like are : \n')

i = 1

for books in sorted_similar_book:
    index=books[0]
    title_from_index = book[book.index == index]['Book_Title'].values[0]
    if (i<30):
        print(i, '.',title_from_index)
        i+=1

Enter your favourite book name : harry potter
Books you might like are : 

1 . Apocalipstick
2 . Herzsprung
3 . Embers
4 . Christmas Miracles
5 . Harem: A Novel
6 . Summer Harvest
7 . Always a Thief
8 . The Marriage Plan
9 . Once a Thief
10 . Charlotte Gray
11 . The Donovan Legacy
12 . The Night Crew
13 . Watership Down
14 . Comfortable Wife
15 . The Blue Nowhere : A Novel
16 . The Wailing Wind
17 . Les Thanatonautes
18 . Sudden Prey
19 . Come Home Forever
20 . Bad Heir Day
21 . Breakfast of Champions
22 . Fires in the Mist
23 . The  Garden of Ediacara
24 . Just Plain Cat
25 . How to Fight a Girl
26 . The Eagle and the Dove
27 . Basin and Range
28 . Return to Love (Arabesque)
29 . Valley of the Dolls: A Novel
