In [1]:
# Import necessary libraries
from bs4 import BeautifulSoup
import requests
import time
import datetime
import smtplib
import csv
import os

In [2]:
# Define the URL and headers
URL = 'https://books.toscrape.com/catalogue/soumission_998/index.html'
headers = {
    "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/118.0.0.0 Safari/537.36"
}

# Send a GET request to the URL
response = requests.get(URL, headers=headers)

# Check and print the status code
print("Status Code:", response.status_code)

# If the request is successful, parse the HTML content
if response.status_code == 200:
    soup = BeautifulSoup(response.content, "html.parser")
    print(soup.prettify())  # Debug print to inspect HTML structure
else:
    print("Failed to retrieve the webpage.")


Status Code: 200
<!DOCTYPE html>
<!--[if lt IE 7]>      <html lang="en-us" class="no-js lt-ie9 lt-ie8 lt-ie7"> <![endif]-->
<!--[if IE 7]>         <html lang="en-us" class="no-js lt-ie9 lt-ie8"> <![endif]-->
<!--[if IE 8]>         <html lang="en-us" class="no-js lt-ie9"> <![endif]-->
<!--[if gt IE 8]><!-->
<html class="no-js" lang="en-us">
 <!--<![endif]-->
 <head>
  <title>
   Soumission | Books to Scrape - Sandbox
  </title>
  <meta content="text/html; charset=utf-8" http-equiv="content-type"/>
  <meta content="24th Jun 2016 09:29" name="created"/>
  <meta content="
    Dans une France assez proche de la nôtre, un homme s’engage dans la carrière universitaire. Peu motivé par l’enseignement, il s’attend à une vie ennuyeuse mais calme, protégée des grands drames historiques. Cependant les forces en jeu dans le pays ont fissuré le système politique jusqu’à provoquer son effondrement. Cette implosion sans soubresauts, sans vraie révolution, s Dans une France assez proche de la nôtre, un 

In [3]:
# Extract the book title and price
try:
    title = soup.find("h1").get_text(strip=True)
except AttributeError:
    title = "Title not found"

try:
    price = soup.find(class_="price_color").get_text(strip=True)
except AttributeError:
    price = "Price not found"

# Print extracted data
print("Title:", title)
print("Price:", price)

Title: Soumission
Price: £50.10


In [7]:
# Clean up the price data (remove currency symbol)
if price != "Price not found":
    price = float(price[1:])

title = title.strip()
print("Clean Title:", title)
print("Clean Price:", price)

Clean Title: Soumission
Clean Price: 50.1


In [9]:
# Create a timestamp for the output
today = datetime.date.today()
print("Date:", today)

Date: 2024-11-11


In [11]:
# Define header and data for CSV
header = ['Title', 'Price', 'Date']
data = [title, price, today]

# Check if the file exists and write headers if it does not
file_exists = os.path.exists('bookstoscrapeDataset.csv')

# Write data to CSV file
with open('bookstoscrapeDataset.csv', 'a+', newline='', encoding='UTF8') as f:
    writer = csv.writer(f)
    if not file_exists:
        writer.writerow(header)
    writer.writerow(data)

print("Data written to CSV.")

Data written to CSV.


In [13]:
# Define a function to check the price and append data to CSV
def check_price():
    response = requests.get(URL, headers=headers)
    print("Status Code:", response.status_code)

    if response.status_code == 200:
        soup = BeautifulSoup(response.content, "html.parser")

        try:
            title = soup.find("h1").get_text(strip=True)
        except AttributeError:
            title = "Title not found"

        try:
            price = soup.find(class_="price_color").get_text(strip=True)
        except AttributeError:
            price = "Price not found"

        if price != "Price not found":
            price = float(price[1:])

        today = datetime.date.today()
        data = [title, price, today]

        with open('bookstoscrapeDataset.csv', 'a+', newline='', encoding='UTF8') as f:
            writer = csv.writer(f)
            writer.writerow(data)

        print("Title:", title)
        print("Price:", price)
        print("Data appended to CSV.")
    else:
        print("Failed to retrieve the webpage.")

In [15]:
# Call the function to test it
check_price()

Status Code: 200
Title: Soumission
Price: 50.1
Data appended to CSV.


In [17]:
for i in range(20):
    check_price()  # Call the check_price function
print("Completed 20 iterations.")

Status Code: 200
Title: Soumission
Price: 50.1
Data appended to CSV.
Status Code: 200
Title: Soumission
Price: 50.1
Data appended to CSV.
Status Code: 200
Title: Soumission
Price: 50.1
Data appended to CSV.
Status Code: 200
Title: Soumission
Price: 50.1
Data appended to CSV.
Status Code: 200
Title: Soumission
Price: 50.1
Data appended to CSV.
Status Code: 200
Title: Soumission
Price: 50.1
Data appended to CSV.
Status Code: 200
Title: Soumission
Price: 50.1
Data appended to CSV.
Status Code: 200
Title: Soumission
Price: 50.1
Data appended to CSV.
Status Code: 200
Title: Soumission
Price: 50.1
Data appended to CSV.
Status Code: 200
Title: Soumission
Price: 50.1
Data appended to CSV.
Status Code: 200
Title: Soumission
Price: 50.1
Data appended to CSV.
Status Code: 200
Title: Soumission
Price: 50.1
Data appended to CSV.
Status Code: 200
Title: Soumission
Price: 50.1
Data appended to CSV.
Status Code: 200
Title: Soumission
Price: 50.1
Data appended to CSV.
Status Code: 200
Title: Soumission

In [21]:
# Check if the CSV file exists
file_path = r'C:\\Users\\manya\\bookstoscrapeDataset.csv'
print("File exists:", os.path.exists(file_path))

File exists: True
