In [None]:
"""
BeautifulSoup is a Python library used to extract data from HTML and XML documents.
It converts webpage content into a structured format, making data extraction simple and efficient.

Steps Involved in Web Scraping:
------------------------------
1. Send HTTP Request
2. Parse HTML Content
3. Extract Required Data
4. Store Data for Future Use
"""

"""-------------------------------
### Install Required Libraries
-------------------------------"""

# pip install requests
# pip install beautifulsoup4

# ^Use these in Python Terminal to Install the files


"""-------------------------------
### Fetch HTML Content
-------------------------------"""
import requests
url = "https://www.geeksforgeeks.org/dsa/dsa-tutorial-learn-data-structures-and-algorithms/"
response = requests.get(url) 
print(response.text) 

# Explanation:
# Sends a GET request to the given URL
# response.text returns the raw HTML content of the webpage



"""-------------------------------
### Handling 403 Forbidden Error (It's Optional)
-------------------------------"""
headers = {'User-Agent': 'Mozilla/5.0'}
response = requests.get(url, headers=headers)



"""-------------------------------
### Parse HTML Using BeautifulSoup
-------------------------------"""
from bs4 import BeautifulSoup
soup = BeautifulSoup(response.text, 'html.parser')
print(soup.prettify())

# Explanation:
# Converts raw HTML into a structured parse tree
# html.parser is Pythonâ€™s built-in HTML parser



"""-------------------------------
### Extract Specific Data (Example: Inspirational Quotes)
-------------------------------"""
import requests
from bs4 import BeautifulSoup

url = "https://www.passiton.com/inspirational-quotes"
response = requests.get(url)
soup = BeautifulSoup(response.text, 'html.parser')

quotes = []

quote_boxes = soup.find_all(
    'div',
    class_='col-6 col-lg-3 text-center margin-30px-bottom sm-margin-30px-top'
)

for box in quote_boxes:
    quote_text = box.img['alt'].split(" #")
    quote = {
        'theme': box.h5.text.strip(),
        'image_url': box.img['src'],
        'lines': quote_text[0],
        'author': quote_text[1] if len(quote_text) > 1 else 'Unknown'
    }
    quotes.append(quote)

# for q in quotes[:5]:
    # print(q) ( If you want to check the output you can download the file and remove the "#" before print
    

# Explanation:
# find_all() locates all quote containers using class names
# Extracts quote text, author, theme, and image URL
# Stores extracted data as a list of dictionaries



"""-------------------------------
### Understanding HTML Structure
-------------------------------"""
container = soup.find('div', attrs={'id': 'all_quotes'})

# soup.prettify() helps inspect HTML structure
# find() retrieves a single element
# find_all() retrieves multiple matching elements



"""-------------------------------
### Save Extracted Data to CSV
-------------------------------"""
import csv

filename = "quotes.csv"

with open(filename, mode='w', newline='', encoding='utf-8') as file:
    writer = csv.DictWriter(
        file,
        fieldnames=['theme', 'image_url', 'lines', 'author']
    )
    writer.writeheader()
    for quote in quotes:
        writer.writerow(quote)

# Explanation:
# Creates a CSV file named quotes.csv
# Stores extracted data in a structured tabular format
# Data can be reused for analysis or reporting


""" **CONCLUSION**
This project demonstrates how web scraping can be implemented using Python
and BeautifulSoup. It automates data collection, extracts useful information,
and stores it efficiently, making it a powerful tool for data analysis
and research.
"""