In [1]:
!pip install requests beautifulsoup4
import requests
from bs4 import BeautifulSoup
import csv

# URL of the website to scrape
url = "http://books.toscrape.com/catalogue/page-1.html"

# Function to get the HTML content of the page
def get_page_content(url):
    response = requests.get(url)
    if response.status_code == 200:
        return response.content
    else:
        return None

# Function to parse the page and extract product information
def parse_page(content):
    soup = BeautifulSoup(content, 'html.parser')
    products = []
    
    # Find all product elements
    books = soup.find_all("article", class_="product_pod")
    
    for book in books:
        # Extract the book title
        title = book.h3.a["title"]
        
        # Extract the price
        price = book.find("p", class_="price_color").text.strip()
        
        # Extract the rating (as a class, e.g., "star-rating Four")
        rating = book.p["class"][1]  # e.g., "One", "Two", "Three", etc.
        
        products.append([title, price, rating])
    
    return products

# Function to save product data to a CSV file
def save_to_csv(products, filename):
    with open(filename, mode="w", newline="", encoding="utf-8") as file:
        writer = csv.writer(file)
        writer.writerow(["Title", "Price", "Rating"])  # Header row
        writer.writerows(products)

# Main function to scrape multiple pages and store the data
def scrape_website(base_url, pages=1):
    all_products = []
    
    # Scrape multiple pages if required
    for page_num in range(1, pages + 1):
        page_url = f"{base_url.replace('page-1', f'page-{page_num}')}"
        print(f"Scraping {page_url}...")
        content = get_page_content(page_url)
        
        if content:
            products = parse_page(content)
            all_products.extend(products)
        else:
            print(f"Failed to retrieve content from {page_url}")
    
    # Save the data to CSV
    save_to_csv(all_products, "books_data.csv")
    print(f"Data saved to books_data.csv with {len(all_products)} products.")

# Start the scraping process
if __name__ == "__main__":
    base_url = "http://books.toscrape.com/catalogue/page-1.html"
    scrape_website(base_url, pages=5)  # Scrape the first 5 pages





[notice] A new release of pip is available: 24.0 -> 24.2
[notice] To update, run: python.exe -m pip install --upgrade pip


Scraping http://books.toscrape.com/catalogue/page-1.html...
Scraping http://books.toscrape.com/catalogue/page-2.html...
Scraping http://books.toscrape.com/catalogue/page-3.html...
Scraping http://books.toscrape.com/catalogue/page-4.html...
Scraping http://books.toscrape.com/catalogue/page-5.html...
Data saved to books_data.csv with 100 products.
