In [None]:
import numpy as np
import pandas as pd

Pyarrow will become a required dependency of pandas in the next major release of pandas (pandas 3.0),
(to allow more performant data types, such as the Arrow string type, and better interoperability with other libraries)
but was not found to be installed on your system.
If this would cause problems for you,
please provide us feedback at https://github.com/pandas-dev/pandas/issues/54466
        
  import pandas as pd


In [2]:
books = pd.read_csv('ld.csv')

In [3]:
books.head()

Unnamed: 0,Title,Author(s),Publisher(s),Year Published,Pages,Description,Genre
0,Agricultural Insect Pests and Their Control,V.B. Awasthi,Scientific Publishers,2008,304.0,Discusses various insect pests affecting agric...,"Agriculture, Entomology"
1,Biotechnology for Biological Control of Pests ...,Karl Maramorosch,Karl Maramorosch,1967,,Investigates the use of biotechnology to devel...,"Biotechnology, Pest Management"
2,Ecological Engineering for Pest Management,"Geoff M Gurr, Steve D Wratten, Miguel A Altieri",CSIRO,2004,238.0,Explores the role of ecological engineering in...,"Ecology, Pest Management"
3,Integrated Pest Management & Bio Control,"S. C. Dwivedi, Nalini Dwivedi","Pointer Publishers,",2006,472.0,Focuses on integrated pest management strategi...,"Pest Management, Agriculture"
4,Genetic Improvement of Field Crops,Gustavo A. Slafer,CRC Press,1993,486.0,Examines methods and advances in genetic impro...,"Agriculture, Genetics"


In [4]:
print(books.shape)

(1260, 7)


In [5]:
books.isnull().sum()

Title               0
Author(s)         153
Publisher(s)      316
Year Published    654
Pages             293
Description         8
Genre               0
dtype: int64

In [7]:
books_grouped = books.groupby('Title')


In [8]:
books['Author(s)'].fillna('',inplace=True)
books['Description'].fillna('',inplace=True)
books['Publisher(s)'].fillna('',inplace=True)
books['Genre'].fillna('',inplace=True)
books['Year Published'].fillna('',inplace=True)
books['Pages'].fillna('',inplace=True)

The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.

For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].method(value) instead, to perform the operation inplace on the original object.


  books['Author(s)'].fillna('',inplace=True)
The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.

For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].method(value) instead, to perform the operation inplace on the original object.


  books['Description'].fillna('',inplace=True)
The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values alway

In [35]:
#now combining relevant features into a single string



In [9]:
books.head()

Unnamed: 0,Title,Author(s),Publisher(s),Year Published,Pages,Description,Genre
0,Agricultural Insect Pests and Their Control,V.B. Awasthi,Scientific Publishers,2008,304.0,Discusses various insect pests affecting agric...,"Agriculture, Entomology"
1,Biotechnology for Biological Control of Pests ...,Karl Maramorosch,Karl Maramorosch,1967,,Investigates the use of biotechnology to devel...,"Biotechnology, Pest Management"
2,Ecological Engineering for Pest Management,"Geoff M Gurr, Steve D Wratten, Miguel A Altieri",CSIRO,2004,238.0,Explores the role of ecological engineering in...,"Ecology, Pest Management"
3,Integrated Pest Management & Bio Control,"S. C. Dwivedi, Nalini Dwivedi","Pointer Publishers,",2006,472.0,Focuses on integrated pest management strategi...,"Pest Management, Agriculture"
4,Genetic Improvement of Field Crops,Gustavo A. Slafer,CRC Press,1993,486.0,Examines methods and advances in genetic impro...,"Agriculture, Genetics"


In [11]:
books['combined_features'] = books['Title']+' '+ books['Author(s)'] + books['Description'] + books['Publisher(s)'] + books['Genre']

In [12]:
from sklearn.feature_extraction.text import TfidfVectorizer

tfidf_vectorizer = TfidfVectorizer(stop_words = 'english')
tfidf_matrix = tfidf_vectorizer.fit_transform(books['combined_features'])

In [13]:
from sklearn.metrics.pairwise import cosine_similarity

# Compute the cosine similarity matrix
cosine_sim_matrix = cosine_similarity(tfidf_matrix, tfidf_matrix)

In [16]:
def get_recommendations(Title, cosine_sim=cosine_sim_matrix):
    # Get the index of the book that matches the title
    idx = books[books['Title'] == Title].index[0]

    # Get the pairwise similarity scores of all books with that book
    sim_scores = list(enumerate(cosine_sim[idx]))

    # Sort the books based on the similarity scores
    sim_scores = sorted(sim_scores, key=lambda x: x[1], reverse=True)

    # Get the scores of the 10 most similar books
    sim_scores = sim_scores[1:20]

    # Get the book indices
    book_indices = [i[0] for i in sim_scores]

    # Return the top 10 most similar books
    return books['Title'].iloc[book_indices]

In [18]:
print(get_recommendations('परिस्थितियों से जीतो'))

567                       Cell Division Control in Plants
1156                       कैंसर पर विजय कैसे प्राप्त करे
1100                              अच्छे बनो अच्छे मिलेंगे
141                  मृत्यु की पराजय (Victory Over Death)
1139                                    पर्यावरण और ऊर्जा
1137                                     पर्यावरण संरक्षण
1076                                          आंख का पानी
1059    राजभाषा सेनानी: हिन्दी कवि हिन्दी कवियों का सं...
1058    राजभाषा सेनानी: हिन्दी लेखक हिन्दी लेखकों का स...
1033                                      कार्यालय दीपिका
1141                     पर्यावरण संकट एवं प्रदूषण का कहर
930                                   हिन्दी और हिन्दीकार
1051    व्यावहारिक राजभाषा हिन्दी: हिन्दी प्रयोग विधि ...
1057                                  हिन्दी और हिन्दीकार
1039                                 हिन्दी वर्तनी दीपिका
1078                                   कम्प्यूटर एक परिचय
1093                      पाठकों के अकाल परः कलम का दरबार
1133          