In [1]:
import requests
from bs4 import BeautifulSoup
import sqlite3

In [2]:
def scrape_website(url):
    try:
        headers = {"User-Agent": "Mozilla/5.0"}  # Prevent blocking
        response = requests.get(url, headers=headers)
        response.raise_for_status()
    except requests.exceptions.RequestException as e:
        print(f"Error fetching the website: {e}")
        return []
    
    try:
        soup = BeautifulSoup(response.text, 'html.parser')
        products = []
        
        for item in soup.select('article.product_pod'):  # Updated selector for books
            title = item.h3.a['title'] if item.h3.a else 'N/A'
            price = item.select_one('.price_color').get_text(strip=True) if item.select_one('.price_color') else 'N/A'
            link = "https://books.toscrape.com/" + item.h3.a['href'] if item.h3.a else 'N/A'
            
            products.append((title, price, link))  # Store as tuple for database insertion
        
        return products
    except Exception as e:
        print(f"Error parsing HTML: {e}")
        return []


In [3]:

def store_in_database(data, db_name="books.db"):
    if not data:
        print("No data to store.")
        return
    
    try:
        conn = sqlite3.connect(db_name)
        cursor = conn.cursor()
        
        cursor.execute('''
            CREATE TABLE IF NOT EXISTS books (
                id INTEGER PRIMARY KEY AUTOINCREMENT,
                title TEXT,
                price TEXT,
                link TEXT
            )
        ''')
        
        cursor.executemany("INSERT INTO books (title, price, link) VALUES (?, ?, ?)", data)
        
        conn.commit()
    except sqlite3.Error as e:
        print(f"Database error: {e}")
    except Exception as e:
        print(f"Unexpected error: {e}")
    finally:
        conn.close()
        print(f"Data stored in {db_name}")

In [4]:

def main():
    url = "https://books.toscrape.com/"
    data = scrape_website(url)
    store_in_database(data)

if __name__ == "__main__":
    main()

Data stored in books.db


In [6]:
def view_data(db_name="books.db"):
    try:
        conn = sqlite3.connect(db_name)
        cursor = conn.cursor()
        
        cursor.execute("SELECT * FROM books")
        rows = cursor.fetchall()
        
        if rows:
            for row in rows:
                print(row)
        else:
            print("No data found in the database.")
        
    except sqlite3.Error as e:
        print(f"Database error: {e}")
    except Exception as e:
        print(f"Unexpected error: {e}")
    finally:
        conn.close()
view_data()

(1, 'A Light in the Attic', 'Â£51.77', 'https://books.toscrape.com/catalogue/a-light-in-the-attic_1000/index.html')
(2, 'Tipping the Velvet', 'Â£53.74', 'https://books.toscrape.com/catalogue/tipping-the-velvet_999/index.html')
(3, 'Soumission', 'Â£50.10', 'https://books.toscrape.com/catalogue/soumission_998/index.html')
(4, 'Sharp Objects', 'Â£47.82', 'https://books.toscrape.com/catalogue/sharp-objects_997/index.html')
(5, 'Sapiens: A Brief History of Humankind', 'Â£54.23', 'https://books.toscrape.com/catalogue/sapiens-a-brief-history-of-humankind_996/index.html')
(6, 'The Requiem Red', 'Â£22.65', 'https://books.toscrape.com/catalogue/the-requiem-red_995/index.html')
(7, 'The Dirty Little Secrets of Getting Your Dream Job', 'Â£33.34', 'https://books.toscrape.com/catalogue/the-dirty-little-secrets-of-getting-your-dream-job_994/index.html')
(8, 'The Coming Woman: A Novel Based on the Life of the Infamous Feminist, Victoria Woodhull', 'Â£17.93', 'https://books.toscrape.com/catalogue/the-c

In [7]:
def validate_data():
    conn = sqlite3.connect("books.db")
    cursor = conn.cursor()
    cursor.execute("SELECT COUNT(*) FROM books")
    db_count = cursor.fetchone()[0]
    
    scraped_data = scrape_website("https://books.toscrape.com/")
    
    if db_count == len(scraped_data):
        print("Data integrity verified ✅")
    else:
        print(f"Mismatch! DB: {db_count}, Scraped: {len(scraped_data)} ❌")

    conn.close()

validate_data()

Data integrity verified ✅
