In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import difflib 
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import cosine_similarity

In [2]:
df=pd.read_csv('books.csv')
df.head()

Unnamed: 0,Title,Author,Genre,Height,Publisher
0,Fundamentals of Wavelets,"Goswami, Jaideva",signal_processing,228,Wiley
1,Data Smart,"Foreman, John",data_science,235,Wiley
2,God Created the Integers,"Hawking, Stephen",mathematics,197,Penguin
3,Superfreakonomics,"Dubner, Stephen",economics,179,HarperCollins
4,Orientalism,"Said, Edward",history,197,Penguin


In [3]:
df.shape

(211, 5)

In [4]:
df.isnull().sum()

Title         0
Author       24
Genre         0
Height        0
Publisher    96
dtype: int64

In [5]:
index_values = range(0, len(df))
df['Index'] = index_values

In [6]:
df.head()

Unnamed: 0,Title,Author,Genre,Height,Publisher,Index
0,Fundamentals of Wavelets,"Goswami, Jaideva",signal_processing,228,Wiley,0
1,Data Smart,"Foreman, John",data_science,235,Wiley,1
2,God Created the Integers,"Hawking, Stephen",mathematics,197,Penguin,2
3,Superfreakonomics,"Dubner, Stephen",economics,179,HarperCollins,3
4,Orientalism,"Said, Edward",history,197,Penguin,4


In [7]:
df['Genre'].unique()

array(['signal_processing', 'data_science', 'mathematics', 'economics',
       'history', 'science', 'psychology', 'fiction', 'computer_science',
       'nonfiction', 'philosophy', 'comic'], dtype=object)

In [8]:
df['Publisher'].unique()

array(['Wiley', 'Penguin', 'HarperCollins', 'Springer',
       'Orient Blackswan', 'CRC', 'Apress', 'Random House', 'Bodley Head',
       'MIT Press', "O'Reilly", 'HBA', 'Rupa', 'Transworld', 'Pan',
       'Hyperion', 'Pocket', 'Mauj', 'BBC', 'Elsevier', 'Pearson',
       'Prentice Hall', 'TMH', 'Picador', nan, 'vikas', 'Routledge',
       'FreePress', 'Jaico', 'Vintage', 'HighStakes', 'Simon&Schuster',
       'Fontana', 'Dell'], dtype=object)

In [9]:
selected_features = ['Index','Genre','Title', 'Author', 'Publisher']

In [10]:
for features in selected_features:
    df[features]=df[features].fillna('')

In [11]:
df.head()

Unnamed: 0,Title,Author,Genre,Height,Publisher,Index
0,Fundamentals of Wavelets,"Goswami, Jaideva",signal_processing,228,Wiley,0
1,Data Smart,"Foreman, John",data_science,235,Wiley,1
2,God Created the Integers,"Hawking, Stephen",mathematics,197,Penguin,2
3,Superfreakonomics,"Dubner, Stephen",economics,179,HarperCollins,3
4,Orientalism,"Said, Edward",history,197,Penguin,4


In [12]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 211 entries, 0 to 210
Data columns (total 6 columns):
 #   Column     Non-Null Count  Dtype 
---  ------     --------------  ----- 
 0   Title      211 non-null    object
 1   Author     211 non-null    object
 2   Genre      211 non-null    object
 3   Height     211 non-null    int64 
 4   Publisher  211 non-null    object
 5   Index      211 non-null    int64 
dtypes: int64(2), object(4)
memory usage: 10.0+ KB


In [13]:
combined_features = df['Title']+' '+df['Genre']+' '+df['Author']+' '+df['Publisher']+' '+['Index']
print(combined_features)

0      Fundamentals of Wavelets signal_processing Gos...
1      Data Smart data_science Foreman, John Wiley Index
2      God Created the Integers mathematics Hawking, ...
3      Superfreakonomics economics Dubner, Stephen Ha...
4         Orientalism history Said, Edward Penguin Index
                             ...                        
206    Structure and Randomness mathematics Tao, Tere...
207    Image Processing with MATLAB signal_processing...
208            Animal Farm fiction Orwell, George  Index
209         Idiot, The fiction Dostoevsky, Fyodor  Index
210    Christmas Carol, A fiction Dickens, Charles  I...
Length: 211, dtype: object


In [14]:
vectorizer=TfidfVectorizer()

In [15]:
feature_vectors=vectorizer.fit_transform(combined_features)

In [16]:
similarity = cosine_similarity(feature_vectors)

In [17]:
similarity.shape

(211, 211)

In [18]:
df

Unnamed: 0,Title,Author,Genre,Height,Publisher,Index
0,Fundamentals of Wavelets,"Goswami, Jaideva",signal_processing,228,Wiley,0
1,Data Smart,"Foreman, John",data_science,235,Wiley,1
2,God Created the Integers,"Hawking, Stephen",mathematics,197,Penguin,2
3,Superfreakonomics,"Dubner, Stephen",economics,179,HarperCollins,3
4,Orientalism,"Said, Edward",history,197,Penguin,4
...,...,...,...,...,...,...
206,Structure and Randomness,"Tao, Terence",mathematics,252,,206
207,Image Processing with MATLAB,"Eddins, Steve",signal_processing,241,,207
208,Animal Farm,"Orwell, George",fiction,180,,208
209,"Idiot, The","Dostoevsky, Fyodor",fiction,197,,209


In [19]:
import pickle

In [20]:
pickle.dump(df, open('Book_list.pkl','wb'))

In [21]:
pickle.dump(similarity, open('similarity.pkl','wb'))

In [22]:
def get_recommended_books(book_name, df, similarity):
    list_of_titles = df['Title'].tolist()
    find_close_match = difflib.get_close_matches(book_name, list_of_titles)

    if find_close_match:
        close_match = find_close_match[0]
        index_of_the_book = df[df['Title'] == close_match]['Index'].values[0]
        similarity_score = list(enumerate(similarity[index_of_the_book]))
        sorted_list = sorted(similarity_score, key=lambda x: x[1], reverse=True)

        recommended_books = []
        i = 1
        for book in sorted_list:
            index = book[0]
            title_from_index = df[df.index == index]['Title'].values[0]
            recommended_books.append(title_from_index)
            i += 1
            if i > 6:
                break

        return recommended_books
    else:
        return None


In [23]:
book_name = input('Enter your Book name: ')
recommended_books = get_recommended_books(book_name, df, similarity)

if recommended_books:
    print('Recommended Books for you: ')
    for i, book in enumerate(recommended_books, start=1):
        print(i, '.', book)
else:
    print('No close matches found for the given book name.')


Enter your Book name: Fundamentals
Recommended Books for you: 
1 . Fundamentals of Wavelets
2 . Data Smart
3 . Image Processing with MATLAB
4 . Image Processing & Mathematical Morphology
5 . Empire of the Mughal - Ruler of the World
6 . Death of Superman, The


In [24]:
pickle.load(open('Book_list.pkl', 'rb'))

Unnamed: 0,Title,Author,Genre,Height,Publisher,Index
0,Fundamentals of Wavelets,"Goswami, Jaideva",signal_processing,228,Wiley,0
1,Data Smart,"Foreman, John",data_science,235,Wiley,1
2,God Created the Integers,"Hawking, Stephen",mathematics,197,Penguin,2
3,Superfreakonomics,"Dubner, Stephen",economics,179,HarperCollins,3
4,Orientalism,"Said, Edward",history,197,Penguin,4
...,...,...,...,...,...,...
206,Structure and Randomness,"Tao, Terence",mathematics,252,,206
207,Image Processing with MATLAB,"Eddins, Steve",signal_processing,241,,207
208,Animal Farm,"Orwell, George",fiction,180,,208
209,"Idiot, The","Dostoevsky, Fyodor",fiction,197,,209


In [25]:
pickle.load(open('similarity.pkl', 'rb'))

array([[1.        , 0.17731295, 0.00544868, ..., 0.00652781, 0.00742601,
        0.0063085 ],
       [0.17731295, 1.        , 0.00632138, ..., 0.00757334, 0.00861541,
        0.00731891],
       [0.00544868, 0.00632138, 1.        , ..., 0.00666664, 0.03502658,
        0.00644267],
       ...,
       [0.00652781, 0.00757334, 0.00666664, ..., 1.        , 0.05235329,
        0.04447483],
       [0.00742601, 0.00861541, 0.03502658, ..., 0.05235329, 1.        ,
        0.05059442],
       [0.0063085 , 0.00731891, 0.00644267, ..., 0.04447483, 0.05059442,
        1.        ]])

In [26]:
def get_recommended_genre(genre_name, df, similarity):
    list_of_genre = df['Genre'].unique().tolist()
    find_close_match = difflib.get_close_matches(genre_name, list_of_genre)

    if find_close_match:
        close_match = find_close_match[0]
        index_of_the_book = df[df['Genre'] == close_match]['Index'].values[0]
        similarity_score = list(enumerate(similarity[index_of_the_book]))
        sorted_list = sorted(similarity_score, key=lambda x: x[1], reverse=True)

        recommended_books = []
        i = 1
        for book in sorted_list:
            index = book[0]
            title_from_index = df[df.index == index]['Title'].values[0]
            recommended_books.append(title_from_index)
            i += 1
            if i > 10:
                break

        return recommended_books
    else:
        return None


In [1]:
genre_name = input('Enter your Genre: ')
recommended_genre = get_recommended_genre(genre_name, df, similarity)

if recommended_genre:
    print('Recommended Books for you: ')
    for i, genre in enumerate(recommended_genre, start=1):
        print(i, '.', genre)
else:
    print('No close matches found for the given book name.')


Enter your Genre: signal_processing


NameError: name 'get_recommended_genre' is not defined

In [28]:
df['Genre'].unique()

array(['signal_processing', 'data_science', 'mathematics', 'economics',
       'history', 'science', 'psychology', 'fiction', 'computer_science',
       'nonfiction', 'philosophy', 'comic'], dtype=object)