In [3]:
import requests
from bs4 import BeautifulSoup

# Step 1: Sending an HTTP request
url = 'http://quotes.toscrape.com/'
response = requests.get(url)
print("HTTP Response Status Code:", response.status_code)  # Check the status code
html_content = response.content
print("\nRaw HTML Content (Truncated):\n", html_content[:500])  # Print the first 500 characters of the HTML content

# Step 2: Parsing HTML Content
soup = BeautifulSoup(html_content, 'html.parser')
print("\nParsed HTML (Prettified):\n", soup.prettify()[:500])  # Print a prettified version of the HTML (truncated)

# Step 3: Extracting quotes and their authors
quotes = soup.find_all('div', class_='quote')
print("\nTotal Quotes Found:", len(quotes))  # Print the number of quotes found
for i, quote in enumerate(quotes, start=1):
    text = quote.find('span', class_='text').get_text()
    author = quote.find('span').find_next('small', class_='author').get_text()
    print(f"\nQuote {i}: {text}")
    print(f"Author {i}: {author}")

# Step 4: Extracting links
links = soup.find_all('a', href=True)
print("\nLinks Found on the Page:")
for i, link in enumerate(links, start=1):
    print(f"Link {i}: {link['href']}")


HTTP Response Status Code: 200

Raw HTML Content (Truncated):
 b'<!DOCTYPE html>\n<html lang="en">\n<head>\n\t<meta charset="UTF-8">\n\t<title>Quotes to Scrape</title>\n    <link rel="stylesheet" href="/static/bootstrap.min.css">\n    <link rel="stylesheet" href="/static/main.css">\n    \n    \n</head>\n<body>\n    <div class="container">\n        <div class="row header-box">\n            <div class="col-md-8">\n                <h1>\n                    <a href="/" style="text-decoration: none">Quotes to Scrape</a>\n                </h1>\n            </div>\n            <div cla'

Parsed HTML (Prettified):
 <!DOCTYPE html>
<html lang="en">
 <head>
  <meta charset="utf-8"/>
  <title>
   Quotes to Scrape
  </title>
  <link href="/static/bootstrap.min.css" rel="stylesheet"/>
  <link href="/static/main.css" rel="stylesheet"/>
 </head>
 <body>
  <div class="container">
   <div class="row header-box">
    <div class="col-md-8">
     <h1>
      <a href="/" style="text-decoration: none">
     

In [5]:
import requests
from bs4 import BeautifulSoup
import csv

# Define the base URL and the number of pages to scrape
base_url = 'http://quotes.toscrape.com/page/'
pages_to_scrape = 10

# Create a list to store the extracted data
quotes_data = []

# Loop through the first 10 pages
for page in range(1, pages_to_scrape + 1):
    url = f"{base_url}{page}/"
    response = requests.get(url)
    
    # Check if the page exists
    if response.status_code != 200:
        print(f"Page {page} does not exist. Skipping...")
        continue
    
    # Parse the page content
    soup = BeautifulSoup(response.content, 'html.parser')
    quotes = soup.find_all('div', class_='quote')
    
    # Extract quotes, authors, and tags
    for quote in quotes:
        text = quote.find('span', class_='text').get_text()
        author = quote.find('small', class_='author').get_text()
        tags = [tag.get_text() for tag in quote.find_all('a', class_='tag')]
        
        quotes_data.append({
            'Quote': text,
            'Author': author,
            'Tags': ', '.join(tags)
        })

    print(f"Page {page} scraped successfully.")

# Save the data to a CSV file
output_file = 'quotes.csv'
with open(output_file, mode='w', newline='', encoding='utf-8') as file:
    writer = csv.DictWriter(file, fieldnames=['Quote', 'Author', 'Tags'])
    writer.writeheader()
    writer.writerows(quotes_data)

print(f"Data successfully saved to {output_file}.")


Page 1 scraped successfully.
Page 2 scraped successfully.
Page 3 scraped successfully.
Page 4 scraped successfully.
Page 5 scraped successfully.
Page 6 scraped successfully.
Page 7 scraped successfully.
Page 8 scraped successfully.
Page 9 scraped successfully.
Page 10 scraped successfully.
Data successfully saved to quotes.csv.
