### Import Libraries

In [1]:
import pandas as pd
import numpy as np

In [2]:
books = pd.read_csv('Books.csv', low_memory=False);
users = pd.read_csv('Users.csv', low_memory=False);
ratings = pd.read_csv('Ratings.csv', low_memory=False);

In [3]:
books.shape

(271360, 8)

In [4]:
users.shape

(278858, 3)

In [5]:
ratings.shape

(1149780, 3)

In [6]:
import pandas as pd

def rename_columns(dataframe, column_mapping):
  
    if not isinstance(dataframe, pd.DataFrame):
        raise ValueError("Input must be a pandas DataFrame.")
    if not isinstance(column_mapping, dict):
        raise ValueError("Column mapping must be a dictionary.")
    
    return dataframe.rename(columns=column_mapping)

In [7]:
books.columns

Index(['ISBN', 'Book-Title', 'Book-Author', 'Year-Of-Publication', 'Publisher',
       'Image-URL-S', 'Image-URL-M', 'Image-URL-L'],
      dtype='object')

In [8]:
# Renaming columns
column_mapping = {
                    'ISBN': 'ISBN', 
                    'Book-Title': 'BOOK_TITLE',
                    'Book-Author': 'BOOK_AUTHOR', 
                    'Year-Of-Publication': 'YEAR_OF_PUBLICATION',
                    'Publisher': 'PUBLISHER', 
                    'Image-URL-S': 'IMAGE_URL_S',
                    'Image-URL-M': 'IMAGE_URL_M', 
                    'Image-URL-L': 'IMAGE_URL_L'
                }

In [9]:
books = rename_columns(books, column_mapping)

In [10]:
users.columns

Index(['User-ID', 'Location', 'Age'], dtype='object')

In [11]:
# Renaming columns
column_mapping = {
                    'User-ID': 'USER_ID', 
                    'Location': 'LOCATION',
                    'Age': 'AGE'
                }

In [12]:
users = rename_columns(users, column_mapping)

In [13]:
ratings.columns

Index(['User-ID', 'ISBN', 'Book-Rating'], dtype='object')

In [14]:
# Renaming columns
column_mapping = {
                    'User-ID': 'USER_ID', 
                    'ISBN': 'ISBN',
                    'Book-Rating': 'BOOK_RATING'
                }

In [15]:
ratings = rename_columns(ratings, column_mapping)

In [16]:
books.isnull().sum().to_frame().rename(columns={0:"Total No. of Missing Values"})

Unnamed: 0,Total No. of Missing Values
ISBN,0
BOOK_TITLE,0
BOOK_AUTHOR,2
YEAR_OF_PUBLICATION,0
PUBLISHER,2
IMAGE_URL_S,0
IMAGE_URL_M,0
IMAGE_URL_L,3


In [17]:
users.isnull().sum().to_frame().rename(columns={0:"Total No. of Missing Values"})

Unnamed: 0,Total No. of Missing Values
USER_ID,0
LOCATION,0
AGE,110762


In [18]:
ratings.isnull().sum().to_frame().rename(columns={0:"Total No. of Missing Values"})

Unnamed: 0,Total No. of Missing Values
USER_ID,0
ISBN,0
BOOK_RATING,0


In [19]:
print("Duplicate Values =",books.duplicated().sum())

Duplicate Values = 0


In [20]:
print("Duplicate Values =",users.duplicated().sum())

Duplicate Values = 0


In [21]:
print("Duplicate Values =",ratings.duplicated().sum())

Duplicate Values = 0


In [22]:
books.sample(1)

Unnamed: 0,ISBN,BOOK_TITLE,BOOK_AUTHOR,YEAR_OF_PUBLICATION,PUBLISHER,IMAGE_URL_S,IMAGE_URL_M,IMAGE_URL_L
69525,60168307,Sacred Clowns,Tony Hillerman,1994,Harpercollins,https://images.amazon.com/images/P/0060168307....,https://images.amazon.com/images/P/0060168307....,https://images.amazon.com/images/P/0060168307....


In [23]:
users.sample()

Unnamed: 0,USER_ID,LOCATION,AGE
277472,277473,"dunbar, nebraska, usa",43.0


In [24]:
ratings.sample()

Unnamed: 0,USER_ID,ISBN,BOOK_RATING
644509,155819,553200550,0


In [25]:
rating_with_name = ratings.merge(books, on='ISBN')
rating_with_name.sample()

Unnamed: 0,USER_ID,ISBN,BOOK_RATING,BOOK_TITLE,BOOK_AUTHOR,YEAR_OF_PUBLICATION,PUBLISHER,IMAGE_URL_S,IMAGE_URL_M,IMAGE_URL_L
190988,267418,3442445590,0,Spice Boys. Die VerschwÃ?Â¶rung der MÃ?Â¤nner.,Sparkle Hayter,1999,Goldmann,https://images.amazon.com/images/P/3442445590....,https://images.amazon.com/images/P/3442445590....,https://images.amazon.com/images/P/3442445590....


### Popularity Based recommendation System

In [26]:
# Group by BOOK_TITLE and calculate the number of rating
num_rating_dataframe = rating_with_name.groupby('BOOK_TITLE').size().reset_index(name='NUM_OF_RATING').sort_values(by='BOOK_TITLE', ascending=True)
num_rating_dataframe

Unnamed: 0,BOOK_TITLE,NUM_OF_RATING
0,Earth Prayers From around the World: 365 Pray...,10
1,Final Fantasy Anthology: Official Strategy Gu...,4
2,Flight of Fancy: American Heiresses (Zebra Ba...,2
3,Little Comic Shop of Horrors (Give Yourself G...,4
4,Mystery Mile,2
...,...,...
68750,Ã?Â?lpiraten.,2
68751,Ã?Â?rger mit Produkt X. Roman.,4
68752,Ã?Â?sterlich leben.,1
68753,Ã?Â?stlich der Berge.,3


In [27]:
# Group by BOOK_TITLE and calculate the average rating
avg_rating_dataframe = rating_with_name.groupby('BOOK_TITLE')['BOOK_RATING'].mean().reset_index(name='AVG_RATING')
avg_rating_dataframe

Unnamed: 0,BOOK_TITLE,AVG_RATING
0,Earth Prayers From around the World: 365 Pray...,5.000000
1,Final Fantasy Anthology: Official Strategy Gu...,5.000000
2,Flight of Fancy: American Heiresses (Zebra Ba...,4.000000
3,Little Comic Shop of Horrors (Give Yourself G...,1.250000
4,Mystery Mile,0.000000
...,...,...
68750,Ã?Â?lpiraten.,0.000000
68751,Ã?Â?rger mit Produkt X. Roman.,5.250000
68752,Ã?Â?sterlich leben.,7.000000
68753,Ã?Â?stlich der Berge.,2.666667


In [28]:
popular_dataframe = num_rating_dataframe.merge(avg_rating_dataframe, on='BOOK_TITLE')
popular_dataframe

Unnamed: 0,BOOK_TITLE,NUM_OF_RATING,AVG_RATING
0,Earth Prayers From around the World: 365 Pray...,10,5.000000
1,Final Fantasy Anthology: Official Strategy Gu...,4,5.000000
2,Flight of Fancy: American Heiresses (Zebra Ba...,2,4.000000
3,Little Comic Shop of Horrors (Give Yourself G...,4,1.250000
4,Mystery Mile,2,0.000000
...,...,...,...
68750,Ã?Â?lpiraten.,2,0.000000
68751,Ã?Â?rger mit Produkt X. Roman.,4,5.250000
68752,Ã?Â?sterlich leben.,1,7.000000
68753,Ã?Â?stlich der Berge.,3,2.666667


In [29]:
popular_dataframe = popular_dataframe[popular_dataframe['NUM_OF_RATING'] > 250].sort_values('AVG_RATING', ascending= False).head(50)
popular_dataframe

Unnamed: 0,BOOK_TITLE,NUM_OF_RATING,AVG_RATING
24876,Harry Potter and the Order of the Phoenix (Boo...,335,5.585075
24884,Harry Potter and the Sorcerer's Stone (Harry P...,575,4.895652
612,"A Child Called \It\"": One Child's Courage to S...",265,4.086792
7252,Bridget Jones's Diary,270,3.751852
58321,The No. 1 Ladies' Detective Agency (Today Show...,431,3.417633
26518,How to Be Good,304,3.305921
49920,Snow Falling on Cedars,627,3.236045
1231,A Painted House,833,3.222089
53948,The Bridges of Madison County,371,3.097035
55697,The Firm,535,3.063551


In [30]:
popular_dataframe = popular_dataframe.merge(books, on='BOOK_TITLE').drop_duplicates('BOOK_TITLE')[['BOOK_TITLE','AVG_RATING','BOOK_AUTHOR','PUBLISHER','IMAGE_URL_M']]
popular_dataframe

Unnamed: 0,BOOK_TITLE,AVG_RATING,BOOK_AUTHOR,PUBLISHER,IMAGE_URL_M
0,Harry Potter and the Order of the Phoenix (Boo...,5.585075,J. K. Rowling,Scholastic,https://images.amazon.com/images/P/043935806X....
4,Harry Potter and the Sorcerer's Stone (Harry P...,4.895652,J. K. Rowling,Arthur A. Levine Books,https://images.amazon.com/images/P/059035342X....
6,"A Child Called \It\"": One Child's Courage to S...",4.086792,Dave Pelzer,Health Communications,https://images.amazon.com/images/P/1558743669....
8,Bridget Jones's Diary,3.751852,Helen Fielding,Picador (UK),https://images.amazon.com/images/P/0330332775....
14,The No. 1 Ladies' Detective Agency (Today Show...,3.417633,Alexander McCall Smith,Anchor,https://images.amazon.com/images/P/1400034779....
15,How to Be Good,3.305921,Nick Hornby,Riverhead Books,https://images.amazon.com/images/P/1573229326....
18,Snow Falling on Cedars,3.236045,David Guterson,Harcourt,https://images.amazon.com/images/P/0151001006....
25,A Painted House,3.222089,John Grisham,Dell Publishing Company,https://images.amazon.com/images/P/044023722X....
30,The Bridges of Madison County,3.097035,Robert James Waller,Warner Books,https://images.amazon.com/images/P/044651652X....
35,The Firm,3.063551,John Grisham,Doubleday Books,https://images.amazon.com/images/P/0385416342....


**Find all those users who rated atleast 200 books**

In [31]:
rated_atleast_200_books = rating_with_name.groupby('USER_ID').count()['BOOK_RATING'] > 200

**Get the index of all those users who rated atleast 200 books**

In [32]:
book_worm = rated_atleast_200_books[rated_atleast_200_books].index

**Create the dataframe for those users who rated atleast 200 books**

In [33]:
filtered_rating = rating_with_name[rating_with_name['USER_ID'].isin(book_worm)]
filtered_rating

Unnamed: 0,USER_ID,ISBN,BOOK_RATING,BOOK_TITLE,BOOK_AUTHOR,YEAR_OF_PUBLICATION,PUBLISHER,IMAGE_URL_S,IMAGE_URL_M,IMAGE_URL_L
922,278418,002043300X,0,Big Snow,Berta Hader,1972,MacMillan Publishing Company.,https://images.amazon.com/images/P/002043300X....,https://images.amazon.com/images/P/002043300X....,https://images.amazon.com/images/P/002043300X....
923,278418,003021436X,0,Yakety Yak Yak Yak (Sweet Pickles Series),Richard Hefter,1977,Henry Holt &amp; Co (J),https://images.amazon.com/images/P/003021436X....,https://images.amazon.com/images/P/003021436X....,https://images.amazon.com/images/P/003021436X....
924,278418,006015957X,0,Roseanne: My Life As a Woman,Roseanne,1989,Harpercollins,https://images.amazon.com/images/P/006015957X....,https://images.amazon.com/images/P/006015957X....,https://images.amazon.com/images/P/006015957X....
925,278418,006016848X,0,"Men Are from Mars, Women Are from Venus: A Pra...",John Gray,1992,HarperCollins Publishers,https://images.amazon.com/images/P/006016848X....,https://images.amazon.com/images/P/006016848X....,https://images.amazon.com/images/P/006016848X....
926,278418,006020883X,0,The Golly Sisters Go West (An I Can Read Book),Betsy Cromer Byars,1985,Harper &amp; Row,https://images.amazon.com/images/P/006020883X....,https://images.amazon.com/images/P/006020883X....,https://images.amazon.com/images/P/006020883X....
...,...,...,...,...,...,...,...,...,...,...
196373,275970,1931868123,0,There's a Porcupine in My Outhouse: Misadventu...,Mike Tougias,2002,Capital Books (VA),https://images.amazon.com/images/P/1931868123....,https://images.amazon.com/images/P/1931868123....,https://images.amazon.com/images/P/1931868123....
196374,275970,3411086211,10,Die Biene.,Sybil GrÃ?Â¤fin SchÃ?Â¶nfeldt,1993,"Bibliographisches Institut, Mannheim",https://images.amazon.com/images/P/3411086211....,https://images.amazon.com/images/P/3411086211....,https://images.amazon.com/images/P/3411086211....
196375,275970,3829021860,0,The Penis Book,Joseph Cohen,1999,Konemann,https://images.amazon.com/images/P/3829021860....,https://images.amazon.com/images/P/3829021860....,https://images.amazon.com/images/P/3829021860....
196376,275970,4770019572,0,Musashi,Eiji Yoshikawa,1995,Kodansha International (JPN),https://images.amazon.com/images/P/4770019572....,https://images.amazon.com/images/P/4770019572....,https://images.amazon.com/images/P/4770019572....


**Find all those books which has atleast 50 rating**

In [34]:
book_rated_50 = filtered_rating.groupby('BOOK_TITLE').count()['BOOK_RATING'] >= 50
famous_books = book_rated_50[book_rated_50].index

In [35]:
final_rating = filtered_rating[filtered_rating['BOOK_TITLE'].isin(famous_books)]

**Pivot the table**

In [36]:
pt = final_rating.drop_duplicates().pivot_table(index='BOOK_TITLE',columns='USER_ID',values = 'BOOK_RATING')

In [37]:
pt.fillna(0,inplace=True)

In [None]:
pt

In [39]:
from sklearn.metrics.pairwise import cosine_similarity

In [40]:
similarity_score = cosine_similarity(pt)

ValueError: Found array with 0 sample(s) (shape=(0, 0)) while a minimum of 1 is required by check_pairwise_arrays.

In [350]:
similarity_score.shape

(706, 706)

### Book recommendation system

In [351]:
def book_recommend(book_name):
    ## fetch index
    index = np.where(pt.index == book_name)[0][0]
    similar_item = sorted(list(enumerate(similarity_score[index])), key =lambda x : x[1],reverse=True)[1: 11]

    for i in similar_item:
        print(pt.index[i[0]])

In [352]:
book_recommend('The Lovely Bones: A Novel')

Where the Heart Is (Oprah's Book Club (Paperback))
Good in Bed
The Book of Ruth (Oprah's Book Club (Paperback))
Life of Pi
Lucky : A Memoir
The Pilot's Wife : A Novel
Drowning Ruth (Oprah's Book Club)
The Nanny Diaries: A Novel
Nights in Rodanthe
Empire Falls


In [353]:
book_recommend('Harry Potter and the Chamber of Secrets (Book 2)')

Harry Potter and the Prisoner of Azkaban (Book 3)
Harry Potter and the Goblet of Fire (Book 4)
Harry Potter and the Sorcerer's Stone (Harry Potter (Paperback))
Harry Potter and the Sorcerer's Stone (Book 1)
Harry Potter and the Order of the Phoenix (Book 5)
Charlotte's Web (Trophy Newbery)
The Fellowship of the Ring (The Lord of the Rings, Part 1)
The Witness
The Firm
The Secret Garden


In [354]:
book_recommend('The Witness')

Secrets
Exclusive
Message from Nam
Kaleidoscope
Remember Me
The Long Road Home
Anne of Avonlea (Anne of Green Gables Novels (Paperback))
Montana Sky
Unspeakable
Fine Things


### dump the required files

In [355]:
import pickle

In [356]:
pickle.dump(popular_dataframe,open('popular_50.pkl','wb'))

In [357]:
pickle.dump(pt,open('pt.pkl','wb'))

In [358]:
pickle.dump(similarity_score,open('similarity_score.pkl','wb'))

In [359]:
pickle.dump(books,open('books.pkl','wb'))

### Recommendation function for streamlit

In [360]:
def book_recommend_web(book_name):
    # index fetch
    index = np.where(pt.index==book_name)[0][0]
    similar_items = sorted(list(enumerate(similarity_score[index])),key=lambda x:x[1],reverse=True)[1:5]
    
    data = []
    for i in similar_items:
        item = []
        temp_df = books[books['BOOK_TITLE'] == pt.index[i[0]]]
        item.extend(list(temp_df.drop_duplicates('BOOK_TITLE')['BOOK_TITLE'].values))
        item.extend(list(temp_df.drop_duplicates('BOOK_TITLE')['BOOK_AUTHOR'].values))
        item.extend(list(temp_df.drop_duplicates('BOOK_TITLE')['IMAGE_URL_M'].values))
        
        data.append(item)
    df = pd.DataFrame(data, columns=["BOOK_TITLE", "BOOK_AUTHOR", "IMAGE_URL_M"])
    return df

In [361]:
book_recommend_web('The Notebook')

Unnamed: 0,BOOK_TITLE,BOOK_AUTHOR,IMAGE_URL_M
0,A Walk to Remember,Nicholas Sparks,http://images.amazon.com/images/P/0446608955.0...
1,The Rescue,Nicholas Sparks,http://images.amazon.com/images/P/0446610399.0...
2,One Door Away from Heaven,Dean R. Koontz,http://images.amazon.com/images/P/0553582755.0...
3,Toxin,Robin Cook,http://images.amazon.com/images/P/0425166619.0...
