In [13]:
import pandas as pd
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import linear_kernel
import numpy as np

# Step 1: Read the CSV file
file_path = 'BooksDatasetClean.csv'
df = pd.read_csv(file_path)
df

Unnamed: 0,Title,Authors,Description,Category,Publisher,Price Starting With ($),Publish Date (Month),Publish Date (Year)
0,Goat Brothers,"By Colton, Larry",,"History , General",Doubleday,8.79,January,1993
1,The Missing Person,"By Grumbach, Doris",,"Fiction , General",Putnam Pub Group,4.99,March,1981
2,Don't Eat Your Heart Out Cookbook,"By Piscatella, Joseph C.",,"Cooking , Reference",Workman Pub Co,4.99,September,1983
3,When Your Corporate Umbrella Begins to Leak: A...,"By Davis, Paul D.",,,Natl Pr Books,4.99,April,1991
4,Amy Spangler's Breastfeeding : A Parent's Guide,"By Spangler, Amy",,,Amy Spangler,5.32,February,1997
...,...,...,...,...,...,...,...,...
103058,Build 3 Super Serving Carts,By Chuck Hampton,,,ENDesigns Inc.,9.97,January,1992
103059,My Land of Israel,"By Nover, Elizabeth Z.",,"Juvenile Nonfiction , People & Places , Middl...",Behrman House,4.99,May,1987
103060,Tongues: To Speak or Not to Speak,By Donald W. Burdick,,,Moody Press,5.29,January,1969
103061,If I'm in charge here why is everybody laughing?,"By Campbell, David P.",,,Argus Communications,4.99,January,1980


In [14]:
df['Title'].fillna('', inplace=True)
df['Authors'].fillna('', inplace=True)
df['Description'].fillna('', inplace=True)
df['Category'].fillna('', inplace=True)
df['Publisher'].fillna('', inplace=True)
# Step 2: Preprocess the Data
# Combine relevant text fields for better matching (title, authors, description, category)
df['text'] = df['Title'] + ' ' + df['Authors'] + ' ' + df['Category'] + ' ' + df['Publisher']

# Fill missing descriptions with empty strings
df['Description'].fillna('', inplace=True)

# Combine the description with other fields to form the final text for matching
df['text'] = df['text'] + ' ' + df['Description']

# Step 3: Build the Keyword Matching System
# Vectorize the text using TF-IDF
vectorizer = TfidfVectorizer(stop_words='english')
tfidf_matrix = vectorizer.fit_transform(df['text'])

The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.

For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].method(value) instead, to perform the operation inplace on the original object.


  df['Title'].fillna('', inplace=True)
The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.

For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].method(value) instead, to perform the operation inplace on the original object.


  df['Authors'].fillna('', inplace=True)
The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as

In [15]:


# Function to preprocess and vectorize user query
def preprocess_query(query):
    return vectorizer.transform([query])

# Step 4: Display Results
def search_books(query, top_n=5):
    query_vector = preprocess_query(query)
    cosine_similarities = linear_kernel(query_vector, tfidf_matrix).flatten()
    print(cosine_similarities)
    top_n_indices = np.argsort(cosine_similarities)[-top_n:]
    print(top_n_indices)
    
    results = df.iloc[top_n_indices][['Title', 'Authors', 'Description', 'Category', 'Publisher', 'Price Starting With ($)', 'Publish Date (Month)', 'Publish Date (Year)']]
    return results

# Example user query
user_query = "i need a book on biology"
search_results = search_books(user_query)

# Print the results
search_results


[0. 0. 0. ... 0. 0. 0.]
[ 2420 50180 83661 19351 60104]


Unnamed: 0,Title,Authors,Description,Category,Publisher,Price Starting With ($),Publish Date (Month),Publish Date (Year)
2420,Introduction to Biology (Introductions Series),"By Chisholm, Jane",,"Juvenile Nonfiction , Science & Nature , Biology",Edc Pub,5.29,November,1983
50180,Student Study Guide for Biology,"By Taylor, Martha R.",,"Science , Life Sciences , Biology",Benjamin-Cummings Pub Co,6.82,January,1999
83661,Discover Biology,"By Cain, Michael L., Yoon, Carol Kaesuk, and S...","Written from the ground up for nonmajors, Disc...","Science , Life Sciences , Biology",W. W. Norton & Company,8.98,February,2009
19351,Human Biology,"By Mader, Sylvia S.",,"Science , Life Sciences , Biology",William C Brown Pub,10.99,January,1995
60104,"Biology, Fourth Edition","By Arms, Karen",,"Science , Life Sciences , Biology",Saunders College Pub,10.53,January,1995
