## Book_Recommendation_Project

In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns

In [2]:
data = pd.read_csv('Book3.csv')
data.head()

Unnamed: 0,ISBN,Book-Title,Book-Author,Year-Of-Publication,Publisher,Image-URL-S,Image-URL-M,Image-URL-L
0,195153448,Classical Mythology,Mark P. O. Morford,2002,Oxford University Press,http://images.amazon.com/images/P/0195153448.0...,http://images.amazon.com/images/P/0195153448.0...,http://images.amazon.com/images/P/0195153448.0...
1,2005018,Clara Callan,Richard Bruce Wright,2001,HarperFlamingo Canada,http://images.amazon.com/images/P/0002005018.0...,http://images.amazon.com/images/P/0002005018.0...,http://images.amazon.com/images/P/0002005018.0...
2,60973129,Decision in Normandy,Carlo D'Este,1991,HarperPerennial,http://images.amazon.com/images/P/0060973129.0...,http://images.amazon.com/images/P/0060973129.0...,http://images.amazon.com/images/P/0060973129.0...
3,374157065,Flu: The Story of the Great Influenza Pandemic...,Gina Bari Kolata,1999,Farrar Straus Giroux,http://images.amazon.com/images/P/0374157065.0...,http://images.amazon.com/images/P/0374157065.0...,http://images.amazon.com/images/P/0374157065.0...
4,393045218,The Mummies of Urumchi,E. J. W. Barber,1999,W. W. Norton &amp; Company,http://images.amazon.com/images/P/0393045218.0...,http://images.amazon.com/images/P/0393045218.0...,http://images.amazon.com/images/P/0393045218.0...


In [3]:
data.drop(columns=['Image-URL-S', 'Image-URL-M','Image-URL-L'], axis = 1, inplace = True)

In [4]:
data.sample(5)

Unnamed: 0,ISBN,Book-Title,Book-Author,Year-Of-Publication,Publisher
115,446612618,A Kiss Remembered,Sandra Brown,2003,Warner Books
3053,671881981,ANYTHING FOR LOVE (NANCY DREW FILES 107) : ANY...,Carolyn Keene,1995,Simon Pulse
1767,394718968,Plays by and about Women,VICTORIA SULLIVAN,1974,Vintage
4913,696204800,Better Homes and Gardens 501 Quilt Blocks: A T...,Better Homes and Gardens,1995,Meredith Books
5004,312264372,"Robert Ludlum's the Hades Factor (Ludlum, Robe...",Robert Ludlum,2000,St. Martin's Press


In [5]:
data.shape

(7980, 5)

In [6]:
# data cleaning

In [7]:
data.isnull().sum()

ISBN                   0
Book-Title             0
Book-Author            0
Year-Of-Publication    0
Publisher              0
dtype: int64

In [8]:
data.duplicated().sum()

0

In [9]:
data.shape

(7980, 5)

In [10]:
data.duplicated().sum()

0

In [11]:
data['Book-Title'].duplicated().sum()

308

In [12]:
data.drop_duplicates(subset='Book-Title', keep='first', inplace=True)

In [13]:
book_data = ['Book-Title', 'Book-Author', 'Year-Of-Publication', 'Publisher']

### TF-IDF vectorization technique

In [14]:
from sklearn.feature_extraction.text import TfidfVectorizer

vector = TfidfVectorizer(stop_words='english')

matrix = vector.fit_transform(data['Book-Title'])

In [15]:
matrix

<7672x9714 sparse matrix of type '<class 'numpy.float64'>'
	with 28853 stored elements in Compressed Sparse Row format>

### Cosine Similarity

In [16]:
from sklearn.metrics.pairwise import cosine_similarity

similarity = cosine_similarity(matrix)

In [17]:
similarity

array([[1., 0., 0., ..., 0., 0., 0.],
       [0., 1., 0., ..., 0., 0., 0.],
       [0., 0., 1., ..., 0., 0., 0.],
       ...,
       [0., 0., 0., ..., 1., 0., 0.],
       [0., 0., 0., ..., 0., 1., 0.],
       [0., 0., 0., ..., 0., 0., 1.]])

In [18]:
def book_recommend(book_data, top_n=10):
    
    index = data[data['Book-Title'] == book_data].index[0]
    
    distance = sorted(list(enumerate(similarity[index])), reverse=True, key=lambda x: x[1])
    
    book_indices = [i[0] for i in distance[1:top_n+1]]
    
    return data['Book-Title'].iloc[book_indices]

In [19]:
book_data = book_recommend('The Day After Tomorrow')
book_data

2606                                      If I Should Die
5968                                     And Then You Die
1759                                          Sofies Welt
2105                    Mama Day (Vintage Contemporaries)
3822                                  Generation Warriors
1170    Sofies Welt. Roman Ã?Â¼ber die Geschichte der ...
1772    Theos Reise. Roman Ã?Â¼ber die Religionen der ...
2276                       Die Spinne und das MÃ?Â¤dchen.
7716                                     A Man to Die for
4587    Die externe Rechnungslegung der Kreditinstitut...
Name: Book-Title, dtype: object