In [258]:
import pandas as pd
from sklearn.metrics.pairwise import cosine_similarity
from sklearn.feature_extraction.text import CountVectorizer
import pickle

In [259]:
books = pd.read_csv('books.csv')

In [260]:
books.head()

Unnamed: 0,isbn13,isbn10,title,subtitle,authors,categories,thumbnail,description,published_year,average_rating,num_pages,ratings_count
0,9780002005883,2005883,Gilead,,Marilynne Robinson,Fiction,http://books.google.com/books/content?id=KQZCP...,A NOVEL THAT READERS and critics have been eag...,2004.0,3.85,247.0,361.0
1,9780002261982,2261987,Spider's Web,A Novel,Charles Osborne;Agatha Christie,Detective and mystery stories,http://books.google.com/books/content?id=gA5GP...,A new 'Christie for Christmas' -- a full-lengt...,2000.0,3.83,241.0,5164.0
2,9780006163831,6163831,The One Tree,,Stephen R. Donaldson,American fiction,http://books.google.com/books/content?id=OmQaw...,Volume Two of Stephen Donaldson's acclaimed se...,1982.0,3.97,479.0,172.0
3,9780006178736,6178731,Rage of angels,,Sidney Sheldon,Fiction,http://books.google.com/books/content?id=FKo2T...,"A memorable, mesmerizing heroine Jennifer -- b...",1993.0,3.93,512.0,29532.0
4,9780006280897,6280897,The Four Loves,,Clive Staples Lewis,Christian life,http://books.google.com/books/content?id=XhQ5X...,Lewis' work on the nature of love divides love...,2002.0,4.15,170.0,33684.0


In [261]:
books['tags'] = books['description']+books['categories']

In [262]:
books['num_pages'] = books['num_pages'].fillna(0).astype(int).astype(str).replace(0, "Unknown")
books['published_year']=books['published_year'].fillna(0).astype(int).astype(str).replace(0, "Unknown")
books['authors'] = books['authors'].fillna('Unknown')

In [263]:
cv=CountVectorizer(max_features=10000, stop_words='english')

In [264]:
cv

In [265]:
vector=cv.fit_transform(books['tags'].values.astype('U')).toarray()

In [266]:
similarity=cosine_similarity(vector)

In [267]:
similarity

array([[1.        , 0.00709952, 0.02383656, ..., 0.        , 0.00734025,
        0.        ],
       [0.00709952, 1.        , 0.02707652, ..., 0.        , 0.00833797,
        0.02707652],
       [0.02383656, 0.02707652, 1.        , ..., 0.        , 0.02799463,
        0.        ],
       ...,
       [0.        , 0.        , 0.        , ..., 1.        , 0.        ,
        0.        ],
       [0.00734025, 0.00833797, 0.02799463, ..., 0.        , 1.        ,
        0.        ],
       [0.        , 0.02707652, 0.        , ..., 0.        , 0.        ,
        1.        ]])

In [268]:
def recommand(book):
    index=books[books['title']==book].index[0]
    distance = sorted(list(enumerate(similarity[index])), reverse=True, key=lambda vector:vector[1])
    for i in distance[0:5]:
        print(books.iloc[i[0]].title)

In [269]:
recommand("Survivor")

Survivor
Geek Love
Farmer Giles of Ham
Crime Novels
Assassin's Apprentice


In [270]:
pickle.dump(similarity, open('similarity.pkl', 'wb'))
pickle.dump(books, open('books.pkl', 'wb'))

In [271]:
books[books["title"]=="World Studies: Eastern Hemisphere"]

Unnamed: 0,isbn13,isbn10,title,subtitle,authors,categories,thumbnail,description,published_year,average_rating,num_pages,ratings_count,tags
633,9780131816596,131816594,World Studies: Eastern Hemisphere,,Heidi Hayes Jacobs;Pearson Prentice Hall;Kate ...,Juvenile Nonfiction,http://books.google.com/books/content?id=3EgFG...,"Introduces the geography, history, and culture...",2004,2.0,892,2.0,"Introduces the geography, history, and culture..."


In [272]:
books[books['title']=='Zondervan Handbook to the Bible']['authors'].values

array(['David Alexander;Pat Alexander'], dtype=object)

In [273]:
c=books[books['average_rating'] <= 2]['average_rating'].sort_values(ascending=False).index
c

Index([ 633, 5262, 5587, 5835, 1635, 1750, 4088, 4479, 5213, 6071, 6668, 6739,
       6788, 6808],
      dtype='int64')

In [274]:
def recommend_by_rating(rate, n):
    # Filter the books by rating
    recommend_books=[]
    Index = books[books['average_rating'] <= float(rate)]['average_rating'].sort_values(ascending=False).index
    cover=[]
    for i in range(n):
        recommend_books.append(books.iloc[Index[i]]['title'])
        cover.append(books.iloc[Index[i]]['thumbnail'])
    return recommend_books,cover

In [275]:

c=books.iloc[633].title
c

'World Studies: Eastern Hemisphere'

In [276]:
books.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 6810 entries, 0 to 6809
Data columns (total 13 columns):
 #   Column          Non-Null Count  Dtype  
---  ------          --------------  -----  
 0   isbn13          6810 non-null   int64  
 1   isbn10          6810 non-null   object 
 2   title           6810 non-null   object 
 3   subtitle        2381 non-null   object 
 4   authors         6810 non-null   object 
 5   categories      6711 non-null   object 
 6   thumbnail       6481 non-null   object 
 7   description     6548 non-null   object 
 8   published_year  6810 non-null   object 
 9   average_rating  6767 non-null   float64
 10  num_pages       6810 non-null   object 
 11  ratings_count   6767 non-null   float64
 12  tags            6511 non-null   object 
dtypes: float64(2), int64(1), object(10)
memory usage: 691.8+ KB
