<a href="https://colab.research.google.com/github/devaki20703/cantilever/blob/main/2webscraping.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [21]:
import requests
from bs4 import BeautifulSoup
import csv
from google.colab import files

In [11]:
# The URL of the static page we want to scrape
URL = "http://books.toscrape.com/"

In [12]:
# A good practice is to set a User-Agent to mimic a browser request
headers = {
    "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36"
}

try:
    # 1. Fetch the page content
    print(f"Fetching content from {URL}...")
    response = requests.get(URL, headers=headers)

    # Raise an exception if the request was not successful (e.g., 404 Not Found)
    response.raise_for_status()
    print("Page fetched successfully!")

    # 2. Parse the HTML with BeautifulSoup
    # 'html.parser' is the built-in Python parser
    soup = BeautifulSoup(response.content, 'html.parser')

    # 3. Find and extract the data
    # Find all book containers. By inspecting the site, we know each book is an <article>
    # with the class 'product_pod'.
    books = soup.find_all('article', class_='product_pod')

    print(f"\nFound {len(books)} books on the page. Extracting details...\n")

    # Loop through each book container to get the details
    for book in books:
        # The title is in an 'a' tag inside an 'h3' tag. We get its 'title' attribute.
        title = book.h3.a['title']

        # The price is in a 'p' tag with the class 'price_color'.
        price = book.find('p', class_='price_color').get_text()

        # Print the extracted data
        print(f"Title: {title}")
        print(f"Price: {price}\n" + "-"*20)

except requests.exceptions.RequestException as e:
    print(f"An error occurred during the request: {e}")
except Exception as e:
    print(f"An unexpected error occurred: {e}")

Fetching content from http://books.toscrape.com/...
Page fetched successfully!

Found 20 books on the page. Extracting details...

Title: A Light in the Attic
Price: £51.77
--------------------
Title: Tipping the Velvet
Price: £53.74
--------------------
Title: Soumission
Price: £50.10
--------------------
Title: Sharp Objects
Price: £47.82
--------------------
Title: Sapiens: A Brief History of Humankind
Price: £54.23
--------------------
Title: The Requiem Red
Price: £22.65
--------------------
Title: The Dirty Little Secrets of Getting Your Dream Job
Price: £33.34
--------------------
Title: The Coming Woman: A Novel Based on the Life of the Infamous Feminist, Victoria Woodhull
Price: £17.93
--------------------
Title: The Boys in the Boat: Nine Americans and Their Epic Quest for Gold at the 1936 Berlin Olympics
Price: £22.60
--------------------
Title: The Black Maria
Price: £52.15
--------------------
Title: Starving Hearts (Triangular Trade Trilogy, #1)
Price: £13.99
------------

In [14]:
scraped_data = []
for book in books:
    title = book.h3.a['title']
    price = book.find('p', class_='price_color').get_text()
    scraped_data.append({'Title': title, 'Price': price})

In [18]:
df = pd.DataFrame(scraped_data)
print(df)

                                                Title   Price
0                                A Light in the Attic  £51.77
1                                  Tipping the Velvet  £53.74
2                                          Soumission  £50.10
3                                       Sharp Objects  £47.82
4               Sapiens: A Brief History of Humankind  £54.23
5                                     The Requiem Red  £22.65
6   The Dirty Little Secrets of Getting Your Dream...  £33.34
7   The Coming Woman: A Novel Based on the Life of...  £17.93
8   The Boys in the Boat: Nine Americans and Their...  £22.60
9                                     The Black Maria  £52.15
10     Starving Hearts (Triangular Trade Trilogy, #1)  £13.99
11                              Shakespeare's Sonnets  £20.66
12                                        Set Me Free  £17.46
13  Scott Pilgrim's Precious Little Life (Scott Pi...  £52.29
14                          Rip it Up and Start Again  £35.02
15  Our 

In [19]:
df.to_csv('books.csv', index=False)

print("\n✅ Data saved to books.csv")


✅ Data saved to books.csv


In [22]:
files.download('books.csv')

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>