In [3]:
import requests
from bs4 import BeautifulSoup
import csv

# Base URL for the "Books to Scrape" website
BASE_URL = "http://books.toscrape.com/catalogue/page-"

# Function to scrape product details (title, price, availability, etc.)
def scrape_page(page_number):
    url = BASE_URL + str(page_number) + ".html"
    response = requests.get(url)
    
    # Check if the request is successful
    if response.status_code != 200:
        print(f"Failed to retrieve page {page_number}")
        return []
    
    # Parse the HTML content of the page
    soup = BeautifulSoup(response.text, "html.parser")
    
    # Find all product containers on the page
    products = soup.find_all("article", class_="product_pod")
    
    scraped_data = []
    
    for product in products:
        try:
            title = product.find("h3").find("a")["title"]
            price = product.find("p", class_="price_color").text.strip()
            availability = product.find("p", class_="instock availability").text.strip()
            
            # Append the data to the list
            scraped_data.append({
                "Title": title,
                "Price": price,
                "Availability": availability
            })
        except AttributeError:
            print("Missing some data in this product.")
    
    return scraped_data

# Main function to scrape the entire website
def scrape_website():
    all_data = []
    
    for page in range(1, 6):  
        print(f"Scraping page {page}...")
        data = scrape_page(page)
        all_data.extend(data)
    
    return all_data

# Function to save the scraped data to a CSV file
def save_to_csv(data, filename="C:/Users/ashra/Desktop/scraped_books.csv"):
    keys = data[0].keys()
    
    with open(filename, "w", newline="", encoding="utf-8") as file:
        writer = csv.DictWriter(file, fieldnames=keys)
        writer.writeheader()
        writer.writerows(data)
    
    print(f"Data saved to {filename}")

# Main execution
if __name__ == "__main__":
    scraped_data = scrape_website()
    
    if scraped_data:
        save_to_csv(scraped_data)
    else:
        print("No data scraped. Please check the website or scraping logic.")


Scraping page 1...
Scraping page 2...
Scraping page 3...
Scraping page 4...
Scraping page 5...
Data saved to C:/Users/ashra/Desktop/scraped_books.csv


In [5]:
import pandas as pd

df= pd.read_csv(r'C:/Users/ashra/Desktop/scraped_books.csv')

df.head()

Unnamed: 0,Title,Price,Availability
0,A Light in the Attic,Â£51.77,In stock
1,Tipping the Velvet,Â£53.74,In stock
2,Soumission,Â£50.10,In stock
3,Sharp Objects,Â£47.82,In stock
4,Sapiens: A Brief History of Humankind,Â£54.23,In stock


In [7]:
df

Unnamed: 0,Title,Price,Availability
0,A Light in the Attic,Â£51.77,In stock
1,Tipping the Velvet,Â£53.74,In stock
2,Soumission,Â£50.10,In stock
3,Sharp Objects,Â£47.82,In stock
4,Sapiens: A Brief History of Humankind,Â£54.23,In stock
...,...,...,...
95,Lumberjanes Vol. 3: A Terrible Plan (Lumberjan...,Â£19.92,In stock
96,"Layered: Baking, Building, and Styling Spectac...",Â£40.11,In stock
97,Judo: Seven Steps to Black Belt (an Introducto...,Â£53.90,In stock
98,Join,Â£35.67,In stock
