In [12]:
from bs4 import BeautifulSoup
import requests

In [13]:

# Step 1: Set target URL
url = "https://books.toscrape.com/"

# Step 2: Send request
response = requests.get(url)

# Step 3: Parse HTML
soup = BeautifulSoup(response.text, "html.parser")

# Step 4: Get all book containers
books = soup.find_all("article", class_="product_pod")
print("Total books found:", len(books))

# Step 5: Extract info from first book only
first_book = books[0]

# Title
title = first_book.h3.a['title']

# Link (make it absolute)
relative_link = first_book.h3.a['href']
full_link = "https://books.toscrape.com/" + relative_link

# Price
price = first_book.find("p", class_="price_color").get_text()

# Rating (e.g., "Three")
rating_class = first_book.find("p", class_="star-rating")['class']
rating = rating_class[1] if len(rating_class) > 1 else "No Rating"

# Print results
print("Title:", title)
print("Link:", full_link)
print("Price:", price)
print("Rating:", rating)


Total books found: 20
Title: A Light in the Attic
Link: https://books.toscrape.com/catalogue/a-light-in-the-attic_1000/index.html
Price: Â£51.77
Rating: Three


In [14]:
# Empty list to store all book data
all_books = []

# Loop through all book containers
for book in books:
    title = book.h3.a['title']
    
    relative_link = book.h3.a['href']
    full_link = "https://books.toscrape.com/" + relative_link
    
    price = book.find("p", class_="price_color").get_text()
    
    rating_class = book.find("p", class_="star-rating")['class']
    rating = rating_class[1] if len(rating_class) > 1 else "No Rating"
    
    # Save data as dictionary
    all_books.append({
        "title": title,
        "price": price,
        "rating": rating,
        "link": full_link
    })

# Show total and preview
print("Total books scraped:", len(all_books))
print("Sample book:", all_books[0])


Total books scraped: 20
Sample book: {'title': 'A Light in the Attic', 'price': 'Â£51.77', 'rating': 'Three', 'link': 'https://books.toscrape.com/catalogue/a-light-in-the-attic_1000/index.html'}


In [15]:
import json
import pandas as pd

# Step 1: Save as JSON
with open("data.json", "w", encoding="utf-8") as json_file:
    json.dump(all_books, json_file, ensure_ascii=False, indent=4)

# Step 2: Save as Excel
df = pd.DataFrame(all_books)
df.to_excel("data.xlsx", index=False)

print("✅ Data saved to data.json and data.xlsx")


✅ Data saved to data.json and data.xlsx
