# Check Connected to MongoDB

In [1]:
from pymongo import MongoClient

# create a client object
client = MongoClient("mongodb://localhost:27017/")

# check the connection by calling the ping() method
try:
    client.server_info()  # this will raise an exception if the server is not available
    print("Connected to MongoDB.")
except Exception as e:
    print("Failed to connect to MongoDB:", e)


Connected to MongoDB.


# Scrap and save to database and csv

In [2]:
import requests
from bs4 import BeautifulSoup
import csv
from pymongo import MongoClient

# Connect to MongoDB
client = MongoClient()
db = client.books_db
collection = db.books_collection

# Create a CSV file to store the book information
with open("data/books.csv", mode="w", newline="", encoding="utf-8") as file:
    writer = csv.writer(file)
    writer.writerow(["Title", "Description", "UPC", "Price", "Availability", "Image URL"])

    # Scrape data from each page
    for page_num in range(1, 51):
        base_url = f"https://books.toscrape.com/catalogue/page-{page_num}.html"
        res = requests.get(base_url)
        soup = BeautifulSoup(res.text, 'html.parser')
        products = soup.find_all('article', class_='product_pod')

        # Scrape data for each book on the page
        for product in products:
            # Extract the book URL and scrape its data
            book_url = "https://books.toscrape.com/catalogue/" + product.find('a')['href']
            res = requests.get(book_url)
            soup = BeautifulSoup(res.text, 'html.parser')

            title = soup.find('h1').get_text(strip=True)
            description = soup.find('div', {'id': 'product_description'}).find_next('p').get_text(strip=True) if soup.find('div', {'id': 'product_description'}) else ''
            table = soup.find('table', {'class': 'table table-striped'})
            rows = table.find_all('tr') if table else []
            upc = rows[0].find('td').get_text(strip=True) if rows else ''
            price = rows[2].find('td').get_text(strip=True) if rows else ''
            availability = rows[5].find('td').get_text(strip=True) if rows else ''
            image_url = "https://books.toscrape.com/" + soup.find('div', class_='item active').find('img')['src'][6:]

            # Write the data to CSV file
            writer.writerow([title, description, upc, price, availability, image_url])

            # Save the data to MongoDB
            data = {
                "title": title,
                "description": description,
                "upc": upc,
                "price": price,
                "availability": availability,
                "image_url": image_url
            }
            collection.insert_one(data)

# Close the MongoDB connection
client.close()




# display results from database

In [3]:
import pymongo

# connect to MongoDB
client = pymongo.MongoClient("mongodb://localhost:27017/")
db = client["books_db"]
books_collection = db["books_collection"]

# display first 10 books
for book in books_collection.find().limit(5):
    print(book)


{'_id': ObjectId('644731563399e5b8e6aece45'), 'title': 'A Light in the Attic', 'description': "It's hard to imagine a world without A Light in the Attic. This now-classic collection of poetry and drawings from Shel Silverstein celebrates its 20th anniversary with this special edition. Silverstein's humorous and creative verse can amuse the dowdiest of readers. Lemon-faced adults and fidgety kids sit still and read these rhythmic words and laugh and smile and love th It's hard to imagine a world without A Light in the Attic. This now-classic collection of poetry and drawings from Shel Silverstein celebrates its 20th anniversary with this special edition. Silverstein's humorous and creative verse can amuse the dowdiest of readers. Lemon-faced adults and fidgety kids sit still and read these rhythmic words and laugh and smile and love that Silverstein. Need proof of his genius? RockabyeRockabye baby, in the treetopDon't you know a treetopIs no safe place to rock?And who put you up there,A