In [1]:
import requests
from bs4 import BeautifulSoup
import pandas as pd
import os

In [2]:

# Base URL template with a placeholder for the page number
base_url = "https://books.toscrape.com/catalogue/category/books_1/page-{}.html"

# Base URL for the website
base_website_url = "https://books.toscrape.com/"

# Directory to save images
image_directory = "downloaded_images"

# Create the directory if it doesn't exist
if not os.path.exists(image_directory):
    os.makedirs(image_directory)

# Looping through the pages
for page_no in range(1, 2):  

    # Formatting URL for each page
    url = base_url.format(page_no)

    # Sending a request to get page contents
    response = requests.get(url)

    if response.status_code != 200:
        print(f'Failed to fetch {page_no}')
        continue

    # Parsing the response
    data = BeautifulSoup(response.text, 'html.parser')

    # Finding all image tags
    images = data.find_all('img', src=True)

    # Selecting the src tag only
    image_src = [x['src'] for x in images]

    # Selecting jpg images
    image_src = [x for x in image_src if x.endswith('.jpg')]

    img_count = 1

    for image in image_src:
        # Constructing the full image URL
        image_url = os.path.join(base_website_url, image.lstrip('/'))

        # Downloading the image
        response = requests.get(image_url)
        
        # Saving each image into the created directory
        image_path = os.path.join(image_directory, 'image_' + str(img_count) + ".jpg")
        with open(image_path, 'wb') as f:
            f.write(response.content)

        img_count += 1

print(f"Images downloaded successfully and saved in '{image_directory}' directory.")


Images downloaded successfully and saved in 'downloaded_images' directory.
