In [2]:
import requests
from bs4 import BeautifulSoup
import csv

# Define the base URL
base_url = "https://books.toscrape.com/catalogue/"

# Function to get soup object from a URL
def get_soup(url):
    response = requests.get(url)
    if response.status_code == 200:
        return BeautifulSoup(response.text, 'html.parser')
    else:
        print(f"Failed to fetch page: {url}")
        return None

# Function to extract book details (name, price, link) from a single page
def extract_books_from_page(url):
    soup = get_soup(url)
    if not soup:
        return []
    
    books = []
    for book in soup.find_all("article", class_="product_pod"):
        title = book.h3.a["title"]
        price = book.find("p", class_="price_color").text
        book_link = book.h3.a["href"]
        # Normalize the link by combining it with the base URL
        full_link = base_url + book_link.replace("../", "")
        books.append({"name": title, "price": price, "link": full_link})
    return books

# Function to scrape all pages
def scrape_all_books():
    all_books = []
    page_url = "page-1.html"
    while page_url:
        full_url = base_url + page_url
        print(f"Scraping {full_url}...")
        soup = get_soup(full_url)
        if not soup:
            break
        
        # Extract book details from the current page
        all_books.extend(extract_books_from_page(full_url))
        
        # Find the "next" button
        next_page = soup.find("li", class_="next")
        page_url = next_page.a["href"] if next_page else None
    return all_books

# Save data to CSV
def save_to_csv(data, filename):
    keys = ["name", "price", "link"]
    with open(filename, "w", newline="", encoding="utf-8") as csvfile:
        writer = csv.DictWriter(csvfile, fieldnames=keys)
        writer.writeheader()
        writer.writerows(data)

# Main script
if __name__ == "__main__":
    books = scrape_all_books()
    save_to_csv(books, "books_data.csv")
    print("Data has been successfully saved to 'books_data.csv'")


Scraping https://books.toscrape.com/catalogue/page-1.html...
Scraping https://books.toscrape.com/catalogue/page-2.html...
Scraping https://books.toscrape.com/catalogue/page-3.html...
Scraping https://books.toscrape.com/catalogue/page-4.html...
Scraping https://books.toscrape.com/catalogue/page-5.html...
Scraping https://books.toscrape.com/catalogue/page-6.html...
Scraping https://books.toscrape.com/catalogue/page-7.html...
Scraping https://books.toscrape.com/catalogue/page-8.html...
Scraping https://books.toscrape.com/catalogue/page-9.html...
Scraping https://books.toscrape.com/catalogue/page-10.html...
Scraping https://books.toscrape.com/catalogue/page-11.html...
Scraping https://books.toscrape.com/catalogue/page-12.html...
Scraping https://books.toscrape.com/catalogue/page-13.html...
Scraping https://books.toscrape.com/catalogue/page-14.html...
Scraping https://books.toscrape.com/catalogue/page-15.html...
Scraping https://books.toscrape.com/catalogue/page-16.html...
Scraping https://