In [1]:
import requests
from bs4 import BeautifulSoup

In [2]:
# Extract Name & Price of Children's Books
url = 'https://books.toscrape.com/catalogue/category/books/childrens_11/index.html'
response = requests.get(url)
response.status_code

200

In [3]:
soup = BeautifulSoup(response.content, 'lxml')
soup.title

<title>
    Childrens | 
     Books to Scrape - Sandbox

</title>

In [4]:
soup.find('h3')

<h3><a href="../../../birdsong-a-story-in-pictures_975/index.html" title="Birdsong: A Story in Pictures">Birdsong: A Story in ...</a></h3>

In [5]:
soup.find('h3').text

'Birdsong: A Story in ...'

In [6]:
soup.find('h3').title

In [7]:
soup.find('h3').a['title']

'Birdsong: A Story in Pictures'

In [8]:
soup.find('h3').a.get('title')

'Birdsong: A Story in Pictures'

In [9]:
soup.find_all('h3')

[<h3><a href="../../../birdsong-a-story-in-pictures_975/index.html" title="Birdsong: A Story in Pictures">Birdsong: A Story in ...</a></h3>,
 <h3><a href="../../../the-bear-and-the-piano_967/index.html" title="The Bear and the Piano">The Bear and the ...</a></h3>,
 <h3><a href="../../../the-secret-of-dreadwillow-carse_944/index.html" title="The Secret of Dreadwillow Carse">The Secret of Dreadwillow ...</a></h3>,
 <h3><a href="../../../the-white-cat-and-the-monk-a-retelling-of-the-poem-pangur-ban_865/index.html" title="The White Cat and the Monk: A Retelling of the Poem “Pangur Bán”">The White Cat and ...</a></h3>,
 <h3><a href="../../../little-red_817/index.html" title="Little Red">Little Red</a></h3>,
 <h3><a href="../../../walt-disneys-alice-in-wonderland_777/index.html" title="Walt Disney's Alice in Wonderland">Walt Disney's Alice in ...</a></h3>,
 <h3><a href="../../../twenty-yawns_773/index.html" title="Twenty Yawns">Twenty Yawns</a></h3>,
 <h3><a href="../../../rain-fish_728/inde

In [10]:
names = soup.find_all('h3')
names_cleaned = [name.a.get('title') for name in names]
names_cleaned

['Birdsong: A Story in Pictures',
 'The Bear and the Piano',
 'The Secret of Dreadwillow Carse',
 'The White Cat and the Monk: A Retelling of the Poem “Pangur Bán”',
 'Little Red',
 "Walt Disney's Alice in Wonderland",
 'Twenty Yawns',
 'Rain Fish',
 'Once Was a Time',
 'Luis Paints the World',
 'Nap-a-Roo',
 'The Whale',
 'Shrunken Treasures: Literary Classics, Short, Sweet, and Silly',
 'Raymie Nightingale',
 'Playing from the Heart',
 'Maybe Something Beautiful: How Art Transformed a Neighborhood',
 'The Wild Robot',
 'The Thing About Jellyfish',
 'The Lonely Ones',
 'The Day the Crayons Came Home (Crayons)']

In [11]:
prices = soup.find_all('p', class_='price_color')
prices_cleaned = [price.text for price in prices]
prices_cleaned

['£54.64',
 '£36.89',
 '£56.13',
 '£58.08',
 '£13.47',
 '£12.96',
 '£22.08',
 '£23.57',
 '£18.28',
 '£53.95',
 '£25.08',
 '£35.96',
 '£52.87',
 '£34.41',
 '£32.38',
 '£22.54',
 '£56.07',
 '£48.77',
 '£43.59',
 '£26.33']

In [12]:
import pandas as pd

books_dict = {
    'Book Name': names_cleaned,
    'Price': prices_cleaned
}
books_df = pd.DataFrame(books_dict)
books_df

Unnamed: 0,Book Name,Price
0,Birdsong: A Story in Pictures,£54.64
1,The Bear and the Piano,£36.89
2,The Secret of Dreadwillow Carse,£56.13
3,The White Cat and the Monk: A Retelling of the...,£58.08
4,Little Red,£13.47
5,Walt Disney's Alice in Wonderland,£12.96
6,Twenty Yawns,£22.08
7,Rain Fish,£23.57
8,Once Was a Time,£18.28
9,Luis Paints the World,£53.95


In [13]:
soup.find_all('strong')

[<strong>Childrens</strong>,
 <strong>29</strong>,
 <strong>1</strong>,
 <strong>20</strong>,

In [14]:
soup.find('form', class_='form-horizontal').find('strong').text

'29'

In [15]:
import math
total_no_of_books = int(soup.find('form', class_='form-horizontal').find('strong').text)
no_of_books_per_page = 20
no_of_pages = math.ceil(total_no_of_books / no_of_books_per_page)
print(f'Total number of books: {total_no_of_books}')
print(f'Number of books per page: {no_of_books_per_page}')
print(f'Total number of pages: {no_of_pages}')

Total number of books: 29
Number of books per page: 20
Total number of pages: 2


In [16]:
ratings = [rating['class'][1] for rating in soup.find_all('p', class_='star-rating')]
ratings

['Three',
 'One',
 'One',
 'Four',
 'Three',
 'Five',
 'Two',
 'Three',
 'Two',
 'Three',
 'One',
 'Four',
 'Three',
 'Two',
 'One',
 'One',
 'Three',
 'One',
 'Five',
 'Five']

In [17]:
category = 'childrens_11'
for page in range(1, no_of_pages + 1):
    page_url = f'https://books.toscrape.com/catalogue/category/books/{category}/page-{page}.html'
    response = requests.get(page_url)
    soup = BeautifulSoup(response.content, 'lxml')
    
    names = soup.find_all('h3')
    names_cleaned = [name.a.get('title') for name in names]
    
    prices = soup.find_all('p', class_='price_color')
    prices_cleaned = [price.text for price in prices]
    
    ratings = soup.find_all('p', class_='star-rating')
    ratings_cleaned = [rating['class'][1] for rating in ratings]
    rating_map = {
        'One': 1,
        'Two': 2,
        'Three': 3,
        'Four': 4,
        'Five': 5
    }
    ratings_cleaned = [rating_map.get(rating, 0) for rating in ratings_cleaned]
    
    books_dict = {
        'Book Name': names_cleaned,
        'Price': prices_cleaned,
        'Rating': ratings_cleaned
    }
    
    books_df = pd.DataFrame(books_dict)
    
    if page == 1:
        all_books_df = books_df
    else:
        all_books_df = pd.concat([all_books_df, books_df], ignore_index=True)

In [18]:
all_books_df

Unnamed: 0,Book Name,Price,Rating
0,Birdsong: A Story in Pictures,£54.64,3
1,The Bear and the Piano,£36.89,1
2,The Secret of Dreadwillow Carse,£56.13,1
3,The White Cat and the Monk: A Retelling of the...,£58.08,4
4,Little Red,£13.47,3
5,Walt Disney's Alice in Wonderland,£12.96,5
6,Twenty Yawns,£22.08,2
7,Rain Fish,£23.57,3
8,Once Was a Time,£18.28,2
9,Luis Paints the World,£53.95,3


In [19]:
def scrape_books(category):
    url = f'https://books.toscrape.com/catalogue/category/books/{category}/index.html'
    response = requests.get(url)
    soup = BeautifulSoup(response.content, 'lxml')
    
    total_no_of_books = int(soup.find('form', class_='form-horizontal').find('strong').text)
    no_of_books_per_page = 20
    no_of_pages = math.ceil(total_no_of_books / no_of_books_per_page)
    
    all_books_df = pd.DataFrame()
    
    for page in range(1, no_of_pages + 1):
        page_url = f'https://books.toscrape.com/catalogue/category/books/{category}/page-{page}.html'
        response = requests.get(page_url)
        soup = BeautifulSoup(response.content, 'lxml')
        
        names = soup.find_all('h3')
        names_cleaned = [name.a.get('title') for name in names]
        
        prices = soup.find_all('p', class_='price_color')
        prices_cleaned = [price.text for price in prices]
        
        ratings = soup.find_all('p', class_='star-rating')
        ratings_cleaned = [rating['class'][1] for rating in ratings]
        rating_map = {
            'One': 1,
            'Two': 2,
            'Three': 3,
            'Four': 4,
            'Five': 5
        }
        ratings_cleaned = [rating_map.get(rating, 0) for rating in ratings_cleaned]
        
        books_dict = {
            'Book Name': names_cleaned,
            'Price': prices_cleaned,
            'Rating': ratings_cleaned
        }
        
        books_df = pd.DataFrame(books_dict)
        
        all_books_df = pd.concat([all_books_df, books_df], ignore_index=True)
    
    return all_books_df

In [20]:
scrape_books('romance_8')

Unnamed: 0,Book Name,Price,Rating
0,Chase Me (Paris Nights #2),£25.27,5
1,Black Dust,£34.53,5
2,Her Backup Boyfriend (The Sorensen Family #1),£33.97,1
3,First and First (Five Boroughs #3),£15.97,4
4,Fifty Shades Darker (Fifty Shades #2),£21.96,1
5,The Wedding Dress,£24.12,1
6,Suddenly in Love (Lake Haven #1),£55.99,2
7,Something More Than This,£16.24,4
8,Doing It Over (Most Likely To #1),£35.61,3
9,The Wedding Pact (The O'Malleys #2),£32.61,3


In [21]:
url = 'https://books.toscrape.com/'
response = requests.get(url)
soup = BeautifulSoup(response.content, 'lxml')


In [22]:
categories = soup.find('ul', class_='nav nav-list').find('ul').find_all('li')
categories_cleaned = [category.a.text.strip() for category in categories]
print(categories_cleaned)

['Travel', 'Mystery', 'Historical Fiction', 'Sequential Art', 'Classics', 'Philosophy', 'Romance', 'Womens Fiction', 'Fiction', 'Childrens', 'Religion', 'Nonfiction', 'Music', 'Default', 'Science Fiction', 'Sports and Games', 'Add a comment', 'Fantasy', 'New Adult', 'Young Adult', 'Science', 'Poetry', 'Paranormal', 'Art', 'Psychology', 'Autobiography', 'Parenting', 'Adult Fiction', 'Humor', 'Horror', 'History', 'Food and Drink', 'Christian Fiction', 'Business', 'Biography', 'Thriller', 'Contemporary', 'Spirituality', 'Academic', 'Self Help', 'Historical', 'Christian', 'Suspense', 'Short Stories', 'Novels', 'Health', 'Politics', 'Cultural', 'Erotica', 'Crime']


In [23]:
categories_simplified = [category.lower().replace(' ', '-') + '_' + str(i+2) for i, category in enumerate(categories_cleaned)]

categories_dict = {k: v for k, v in zip(categories_cleaned, categories_simplified)}
print(categories_dict)

{'Travel': 'travel_2', 'Mystery': 'mystery_3', 'Historical Fiction': 'historical-fiction_4', 'Sequential Art': 'sequential-art_5', 'Classics': 'classics_6', 'Philosophy': 'philosophy_7', 'Romance': 'romance_8', 'Womens Fiction': 'womens-fiction_9', 'Fiction': 'fiction_10', 'Childrens': 'childrens_11', 'Religion': 'religion_12', 'Nonfiction': 'nonfiction_13', 'Music': 'music_14', 'Default': 'default_15', 'Science Fiction': 'science-fiction_16', 'Sports and Games': 'sports-and-games_17', 'Add a comment': 'add-a-comment_18', 'Fantasy': 'fantasy_19', 'New Adult': 'new-adult_20', 'Young Adult': 'young-adult_21', 'Science': 'science_22', 'Poetry': 'poetry_23', 'Paranormal': 'paranormal_24', 'Art': 'art_25', 'Psychology': 'psychology_26', 'Autobiography': 'autobiography_27', 'Parenting': 'parenting_28', 'Adult Fiction': 'adult-fiction_29', 'Humor': 'humor_30', 'Horror': 'horror_31', 'History': 'history_32', 'Food and Drink': 'food-and-drink_33', 'Christian Fiction': 'christian-fiction_34', 'B

In [42]:
import requests
from bs4 import BeautifulSoup
import pandas as pd
import math


def get_categories():
    base_url = 'https://books.toscrape.com/'
    response = requests.get(base_url)
    soup = BeautifulSoup(response.content, 'lxml')
    categories = soup.find('ul', class_='nav nav-list').find('ul').find_all('li')
    categories_cleaned = [category.a.text.strip() for category in categories]
    categories_simplified = [category.lower().replace(' ', '-') + '_' + str(i+2) for i, category in enumerate(categories_cleaned)]
    return {k: v for k, v in zip(categories_cleaned, categories_simplified)}


def get_category_url(category):
    base_url = 'https://books.toscrape.com/catalogue/category/books/'
    category_key = get_categories().get(category)
    if category_key:
        return f'{base_url}{category_key}/index.html'
    else:
        raise ValueError(f'Category "{category}" not found in the dictionary.')


def scrape_books(category):
    try:
        url = get_category_url(category)
    except ValueError as e:
        print(e)
        return None
    else:
        response = requests.get(url)
        soup = BeautifulSoup(response.content, 'lxml')
        
        total_no_of_books = int(soup.find('form', class_='form-horizontal').find('strong').text)
        no_of_books_per_page = 20
        no_of_pages = math.ceil(total_no_of_books / no_of_books_per_page)
        
        all_books_df = pd.DataFrame()
        
        for page in range(1, no_of_pages + 1):
            if page == 1:
                page_url = url
            else:
                page_url = f'{url[:-10]}page-{page}.html'
            response = requests.get(page_url)
            soup = BeautifulSoup(response.content, 'lxml')
            
            names = soup.find_all('h3')
            names_cleaned = [name.a.get('title') for name in names]
            
            prices = soup.find_all('p', class_='price_color')
            prices_cleaned = [price.text.lstrip('£') for price in prices]
            
            ratings = soup.find_all('p', class_='star-rating')
            ratings_cleaned = [rating['class'][1] for rating in ratings]
            rating_map = {
                'One': 1,
                'Two': 2,
                'Three': 3,
                'Four': 4,
                'Five': 5
            }
            ratings_cleaned = [rating_map.get(rating, 0) for rating in ratings_cleaned]
            
            books_dict = {
                'Book Name': names_cleaned,
                'Price': prices_cleaned,
                'Rating': ratings_cleaned
            }
            
            books_df = pd.DataFrame(books_dict)
            
            all_books_df = pd.concat([all_books_df, books_df], ignore_index=True)
        
        return all_books_df

In [43]:
scrape_books('Childrens')

Unnamed: 0,Book Name,Price,Rating
0,Birdsong: A Story in Pictures,54.64,3
1,The Bear and the Piano,36.89,1
2,The Secret of Dreadwillow Carse,56.13,1
3,The White Cat and the Monk: A Retelling of the...,58.08,4
4,Little Red,13.47,3
5,Walt Disney's Alice in Wonderland,12.96,5
6,Twenty Yawns,22.08,2
7,Rain Fish,23.57,3
8,Once Was a Time,18.28,2
9,Luis Paints the World,53.95,3


In [52]:
import ipywidgets as widgets
from IPython.display import display
from IPython.display import clear_output


category_dropdown = widgets.Dropdown(
    options=list(categories_dict.keys()),
    description='Category:',
    layout=widgets.Layout(width='50%')
)

display(category_dropdown)

def on_category_change(change):
    if change['type'] == 'change' and change['name'] == 'value':
        selected = change['new']
        clear_output(wait=True)
        display(category_dropdown)  # re-display widget
        print(f"\n📚 Fetching books for: {selected}")
        books_df = scrape_books(selected)
        display(books_df)

# Register the callback
category_dropdown.observe(on_category_change)

Dropdown(description='Category:', index=3, layout=Layout(width='50%'), options=('Travel', 'Mystery', 'Historic…

Unnamed: 0,Book Name,Price,Rating
0,Scott Pilgrim's Precious Little Life (Scott Pi...,52.29,5
1,Tsubasa: WoRLD CHRoNiCLE 2 (Tsubasa WoRLD CHRo...,16.28,1
2,This One Summer,19.49,4
3,The Nameless City (The Nameless City #1),38.16,4
4,"Saga, Volume 5 (Saga (Collected Editions) #5)",51.04,2
...,...,...,...
70,"Hawkeye, Vol. 1: My Life as a Weapon (Hawkeye #1)",45.24,3
71,"Giant Days, Vol. 1 (Giant Days #1-4)",56.76,4
72,"Fruits Basket, Vol. 1 (Fruits Basket #1)",40.28,5
73,"Bleach, Vol. 1: Strawberry and the Soul Reaper...",34.65,5
