In [1]:
import requests
import pandas as pd 
from bs4 import BeautifulSoup
from joblib import Parallel, delayed
from rake_nltk import Rake


In [2]:
def get_product_links(url):
    global base_url
    request = requests.get(url)
    soup = BeautifulSoup(request.text, 'html.parser')

    product_articles = soup.find_all('article', class_='product_pod')
    product_links = [base_url + link['href'] for item in product_articles for link in item.find_all('a', href=True) ]

    return product_links


In [3]:
def flatten_list(list_of_lists):
    flat_list = []
    for list in list_of_lists:
        flat_list += list
    return flat_list

In [4]:
def get_product_details(url):
    request = requests.get(url)
    soup = BeautifulSoup(request.text, 'html.parser')

    name=soup.find('h1').text.strip()
    availability=soup.find('p',class_='instock availability').text.strip()
    price=soup.find('p').text.strip('Â')
    rating = soup.find('p', class_='star-rating')['class'][1] 
    
    return {'name': name, 'availability': availability, 'price': price, 'rating': rating, 'link': url}


In [5]:

if __name__ == '__main__':

    global base_url
    base_url = 'https://books.toscrape.com/catalogue/'

    product_links = Parallel(n_jobs=18)(delayed(get_product_links)(base_url+f'page-{page}.html') for page in range(1, 51))
    product_links =  set(flatten_list(product_links))

    books = Parallel(n_jobs=32, verbose=100)(delayed(get_product_details)(url) for url in product_links)
    books = pd.DataFrame(books)


[Parallel(n_jobs=32)]: Using backend LokyBackend with 32 concurrent workers.
[Parallel(n_jobs=32)]: Done   1 tasks      | elapsed:    7.6s
[Parallel(n_jobs=32)]: Done   2 tasks      | elapsed:    7.8s
[Parallel(n_jobs=32)]: Done   3 tasks      | elapsed:    8.0s
[Parallel(n_jobs=32)]: Done   4 tasks      | elapsed:    8.0s
[Parallel(n_jobs=32)]: Done   5 tasks      | elapsed:    8.0s
[Parallel(n_jobs=32)]: Done   6 tasks      | elapsed:    8.0s
[Parallel(n_jobs=32)]: Done   7 tasks      | elapsed:    8.0s
[Parallel(n_jobs=32)]: Done   8 tasks      | elapsed:    8.0s
[Parallel(n_jobs=32)]: Done   9 tasks      | elapsed:    8.1s
[Parallel(n_jobs=32)]: Done  10 tasks      | elapsed:    9.0s
[Parallel(n_jobs=32)]: Done  11 tasks      | elapsed:    9.2s
[Parallel(n_jobs=32)]: Done  12 tasks      | elapsed:    9.3s
[Parallel(n_jobs=32)]: Done  13 tasks      | elapsed:    9.4s
[Parallel(n_jobs=32)]: Done  14 tasks      | elapsed:    9.5s
[Parallel(n_jobs=32)]: Done  15 tasks      | elapsed:  

In [6]:
books

Unnamed: 0,name,availability,price,rating,link
0,Abstract City,In stock (14 available),£56.37,Five,https://books.toscrape.com/catalogue/abstract-...
1,The Help Yourself Cookbook for Kids: 60 Easy P...,In stock (14 available),£28.77,Three,https://books.toscrape.com/catalogue/the-help-...
2,The Four Agreements: A Practical Guide to Pers...,In stock (18 available),£17.66,Five,https://books.toscrape.com/catalogue/the-four-...
3,My Kitchen Year: 136 Recipes That Saved My Life,In stock (13 available),£11.53,Two,https://books.toscrape.com/catalogue/my-kitche...
4,Les Fleurs du Mal,In stock (8 available),£29.04,Five,https://books.toscrape.com/catalogue/les-fleur...
...,...,...,...,...,...
995,The Power of Now: A Guide to Spiritual Enlight...,In stock (15 available),£43.54,Two,https://books.toscrape.com/catalogue/the-power...
996,The Girl You Left Behind (The Girl You Left Be...,In stock (6 available),£15.79,One,https://books.toscrape.com/catalogue/the-girl-...
997,The Art Forger,In stock (16 available),£40.76,Three,https://books.toscrape.com/catalogue/the-art-f...
998,The Origin of Species,In stock (7 available),£10.01,Four,https://books.toscrape.com/catalogue/the-origi...


In [8]:
def search_books(book_name, books_df):
    matched_books = []
    r = Rake()
    r.extract_keywords_from_text(book_name)
    book_keywords = set(r.get_ranked_phrases())

    book_name_lower = book_name.lower()

    for index, row in books_df.iterrows():
        name_lower = row['name'].lower()

        if book_name_lower in name_lower:
            matched_books.append(row.to_dict())
        else:
            r.extract_keywords_from_text(row['name'])
            details_keywords = set(r.get_ranked_phrases())
            if book_keywords.intersection(details_keywords):
                matched_books.append(row.to_dict())

    return matched_books


In [9]:
search_query = input("Enter the book name to search: ")
search_results = search_books(search_query, books)
if search_results:
    print("Search Results:")
    for result in search_results:
        print(result)
else:
    print("No matching books found.")

Search Results:
{'name': 'The Widow', 'availability': 'In stock (11 available)', 'price': '£27.26', 'rating': 'Two', 'link': 'https://books.toscrape.com/catalogue/the-widow_609/index.html'}
