# CONTENT BASED BOOK RECOMMENDATION SYSTEM

## Importing the libraries and data

In [1]:
import numpy as np
import pandas as pd
import random
import sklearn
from nltk.corpus import stopwords
from sklearn.feature_extraction.text import CountVectorizer
from sklearn.metrics.pairwise import cosine_similarity

import warnings
warnings.filterwarnings('ignore')

In [2]:
df=pd.read_csv('reviewsdescriptions1000_for_recommender.csv')

In [3]:
df.columns

Index(['title', 'book_id', 'descriptiondetect', 'reviews_clean',
       'description_clean'],
      dtype='object')

In [4]:
df.head(5)

Unnamed: 0,title,book_id,descriptiondetect,reviews_clean,description_clean
0,"11 Birthdays (Willow Falls, #1)",4835838,groundhog day meets flipped in this tale of a ...,kids like old memes themes adults jeremy fink ...,groundhog day meets flipped tale girl stuck bi...
1,14 Cows for America,6536509,"in june of 2002, a ceremony begins in a villag...",original circumstances individuals book based ...,june ceremony begins village western kenya hun...
2,365 Days of Wonder: Mr. Browne's Book of Precepts,20758093,in the #1 new york timesbestselling novel wond...,cute book full inspirational quotesquick read ...,new york timesbestselling novel wonderreaders ...
3,A Bad Case of Stripes,474858,"""what we have here is a bad case of stripes. o...",book tc class picked book enjoyed childbook vi...,bad case stripes one worst ive ever seencamill...
4,A Ball for Daisy,9703979,winner of the 2012 randolph caldecott medal\nt...,wordless picture book depicts story dog reciev...,winner randolph caldecott medalthis new york t...


## Creating the Content based Book Recommender System 

### Create a list of columns with the important features

In [5]:
columns = ['reviews_clean','description_clean']

### Create a function to combine these important features

In [6]:
def combine_features(df):
    features=[]
    for i in range(0,df.shape[0]):
        features.append(df['reviews_clean'][i]+ ' ' + df['description_clean'][i])
        
    return features

### Create a new column with the combined features

In [7]:
df['combined_features'] = combine_features(df)

In [8]:
df.head(2)

Unnamed: 0,title,book_id,descriptiondetect,reviews_clean,description_clean,combined_features
0,"11 Birthdays (Willow Falls, #1)",4835838,groundhog day meets flipped in this tale of a ...,kids like old memes themes adults jeremy fink ...,groundhog day meets flipped tale girl stuck bi...,kids like old memes themes adults jeremy fink ...
1,14 Cows for America,6536509,"in june of 2002, a ceremony begins in a villag...",original circumstances individuals book based ...,june ceremony begins village western kenya hun...,original circumstances individuals book based ...


### Convert the text from the new column to a matrix of word counts

In [9]:
count=CountVectorizer(stop_words='english')

In [10]:
count_matrix=count.fit_transform(df['combined_features'])

In [11]:
count_matrix

<1000x226090 sparse matrix of type '<class 'numpy.int64'>'
	with 1686230 stored elements in Compressed Sparse Row format>

### Get the cosine similarity matrix from the word count matrix

In [12]:
%%time
cs=cosine_similarity(count_matrix,count_matrix)
cs

Wall time: 1.59 s


array([[1.        , 0.36147289, 0.52728334, ..., 0.40151614, 0.42248363,
        0.45849059],
       [0.36147289, 1.        , 0.3836145 , ..., 0.34561999, 0.28700194,
        0.38106167],
       [0.52728334, 0.3836145 , 1.        , ..., 0.41083463, 0.45359523,
        0.57301634],
       ...,
       [0.40151614, 0.34561999, 0.41083463, ..., 1.        , 0.34704922,
        0.42681687],
       [0.42248363, 0.28700194, 0.45359523, ..., 0.34704922, 1.        ,
        0.46290224],
       [0.45849059, 0.38106167, 0.57301634, ..., 0.42681687, 0.46290224,
        1.        ]])

### Define the function to get recommendations from the similarity scores

In [13]:
indices = pd.Series(df.index, index=df['book_id'])

In [14]:
def get_recommendations(book_id, cosine_sim=cs):
    
    idx = indices[book_id]

    # Get the pairwise similarity scores of all books with that book
    sim_scores = list(enumerate(cosine_sim[idx]))

    # Sort the movies based on the similarity scores
    sim_scores = sorted(sim_scores, key=lambda x: x[1], reverse=True)

    # Get the scores of the 5 most similar books
    sim_scores = sim_scores[1:6]

    # Get the books indices
    book_indices = [i[0] for i in sim_scores]
    
    book_titles = dict(zip(df['book_id'], df['title']))
    book_title = book_titles[book_id]

    # Return the top 5 most similar books
    print("Because you read", book_title, "you would love:")
    print('********************************************************')
    print (list(df['title'].iloc[book_indices]))

### Getting some recommendations of books

In [15]:
df.head()

Unnamed: 0,title,book_id,descriptiondetect,reviews_clean,description_clean,combined_features
0,"11 Birthdays (Willow Falls, #1)",4835838,groundhog day meets flipped in this tale of a ...,kids like old memes themes adults jeremy fink ...,groundhog day meets flipped tale girl stuck bi...,kids like old memes themes adults jeremy fink ...
1,14 Cows for America,6536509,"in june of 2002, a ceremony begins in a villag...",original circumstances individuals book based ...,june ceremony begins village western kenya hun...,original circumstances individuals book based ...
2,365 Days of Wonder: Mr. Browne's Book of Precepts,20758093,in the #1 new york timesbestselling novel wond...,cute book full inspirational quotesquick read ...,new york timesbestselling novel wonderreaders ...,cute book full inspirational quotesquick read ...
3,A Bad Case of Stripes,474858,"""what we have here is a bad case of stripes. o...",book tc class picked book enjoyed childbook vi...,bad case stripes one worst ive ever seencamill...,book tc class picked book enjoyed childbook vi...
4,A Ball for Daisy,9703979,winner of the 2012 randolph caldecott medal\nt...,wordless picture book depicts story dog reciev...,winner randolph caldecott medalthis new york t...,wordless picture book depicts story dog reciev...


In [16]:
get_recommendations(6310, cs)

Because you read Charlie and the Chocolate Factory (Charlie Bucket, #1) you would love:
********************************************************
['Charlie and the Great Glass Elevator (Charlie Bucket, #2)', 'The Witches', 'The BFG', 'The Phantom Tollbooth', 'James and the Giant Peach']


In [17]:
get_recommendations(275325, cs)

Because you read The Butter Battle Book you would love:
********************************************************
['The Lorax', "Oh, The Places You'll Go!", 'What Pet Should I Get?', 'Hop On Pop', 'Green Eggs and Ham']


In [18]:
get_recommendations(19543, cs)

Because you read Where the Wild Things Are you would love:
********************************************************
['Goodnight Moon', 'The Phantom Tollbooth', 'The Book with No Pictures', 'Where the Red Fern Grows', 'The Last of the Really Great Whangdoodles']


In [19]:
get_recommendations(3636, cs)

Because you read The Giver (The Giver, #1) you would love:
********************************************************
['Bridge to Terabithia', 'Wonder (Wonder #1)', 'The Phantom Tollbooth', 'Where the Red Fern Grows', 'The Mysterious Benedict Society (The Mysterious Benedict Society, #1)']


In [20]:
df[df.title== 'Mix It Up!']

Unnamed: 0,title,book_id,descriptiondetect,reviews_clean,description_clean,combined_features
429,Mix It Up!,20549446,accept herve tullet's irresistible invitation ...,better recent onesread prek children loved pre...,accept herve tullets irresistible invitation m...,better recent onesread prek children loved pre...


In [21]:
get_recommendations(20549446, cs)

Because you read Mix It Up! you would love:
********************************************************
['Press Here', 'Mouse Paint', 'Open This Little Book', 'This Book Just Ate My Dog!', 'One']


In [22]:
df[df.title== 'I Want My Hat Back'].book_id

315    11233988
Name: book_id, dtype: int64

In [23]:
get_recommendations(11233988, cs)

Because you read I Want My Hat Back you would love:
********************************************************
['This is Not My Hat', "We're Going on a Bear Hunt", 'Go, Dog. Go!', 'Bear Snores On', 'The Cat in the Hat']


In [24]:
get_recommendations(6689, cs)

Because you read James and the Giant Peach you would love:
********************************************************
['The BFG', 'The Witches', 'Charlie and the Chocolate Factory (Charlie Bucket, #1)', 'The Phantom Tollbooth', 'The Giraffe and the Pelly and Me']


In [26]:
get_recommendations(19321, cs)

Because you read The Tale of Peter Rabbit you would love:
********************************************************
['Peter Nimble and His Fantastic Eyes (Peter Nimble, #1)', 'Peter and the Starcatchers (Peter and the Starcatchers, #1)', 'The Railway Children', 'The Velveteen Rabbit', 'The Secret Garden']


In [27]:
get_recommendations(28507895, cs)

Because you read Ada Twist, Scientist you would love:
********************************************************
['The Field Guide (The Spiderwick Chronicles, #1)', 'All the World', 'The Mysterious Benedict Society (The Mysterious Benedict Society, #1)', 'Iggy Peck, Architect', 'I Like Myself!']
