In [34]:
import requests
import pandas as pd
from bs4 import BeautifulSoup

# Base URL of the category (First Page)
base_url = "http://books.toscrape.com/catalogue/category/books/mystery_3/"

# Start with the first page
page_url = base_url + "index.html"

# Headers to mimic a real browser request
headers = {
    "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36"
}

# Create an empty list to store book details
book_data = []

while page_url:
    # Send a GET request to fetch the page content
    response = requests.get(page_url, headers=headers)

    if response.status_code == 200:
        soup = BeautifulSoup(response.text, "html.parser")

        # Find all book containers
        books = soup.find_all("article", class_="product_pod")

        # Extract book details from the current page
        for book in books:
            title = book.h3.a["title"]
            price = book.find("p", class_="price_color").text.strip()
            rating_class = book.find("p", class_="star-rating")["class"]
            rating = rating_class[1]  # Second class name represents rating

            # Append data to list
            book_data.append({"Title": title, "Rating": rating, "Price": price})

        # Find the "Next" button to navigate to the next page
        next_page = soup.find("li", class_="next")
        if next_page:
            next_page_url = next_page.a["href"]  # Extract next page link
            page_url = base_url + next_page_url  # Update URL for next iteration
        else:
            break  # Exit loop if no "Next" button found
    else:
        print(f"Failed to retrieve page. Status Code: {response.status_code}")
        break  # Stop scraping if request fails

# Convert list to DataFrame
df = pd.DataFrame(book_data)

# Save to CSV
df.to_csv("books_scraped.csv", index=False)

# Display first few rows
print(df.head(20))


                                                Title Rating    Price
0                                       Sharp Objects   Four  Â£47.82
1                                In a Dark, Dark Wood    One  Â£19.63
2                                 The Past Never Ends   Four  Â£56.50
3                                    A Murder in Time    One  Â£16.64
4     The Murder of Roger Ackroyd (Hercule Poirot #4)   Four  Â£44.10
5                      The Last Mile (Amos Decker #2)    Two  Â£54.21
6              That Darkness (Gardiner and Renner #1)    One  Â£13.92
7                Tastes Like Fear (DI Marnie Rome #3)    One  Â£10.69
8              A Time of Torment (Charlie Parker #14)   Five  Â£48.35
9             A Study in Scarlet (Sherlock Holmes #1)    Two  Â£16.73
10                   Poisonous (Max Revere Novels #3)  Three  Â£26.80
11  Murder at the 42nd Street Library (Raymond Amb...   Four  Â£54.36
12                                        Most Wanted  Three  Â£35.28
13                  