In [12]:
import requests
from bs4 import BeautifulSoup
import csv
import json

# Fetch the HTML page
url = "https://baraasalout.github.io/test.html"
response = requests.get(url)
soup = BeautifulSoup(response.text, 'html.parser')

text_data = {}
tags_to_extract = ["p", "li", "h1", "h2"]

for tag in tags_to_extract:
    text_data[tag] = [element.text.strip() for element in soup.find_all(tag)]
"""
# Task 1: Extract Text Data
text_data = {
    'h1': [h1.text.strip() for h1 in soup.find_all('h1')],
    'h2': [h2.text.strip() for h2 in soup.find_all('h2')],
    'p': [p.text.strip() for p in soup.find_all('p')],
    'li': [li.text.strip() for li in soup.find_all('li')]
}
"""

# Save to CSV
with open('Extract_Text_Data.csv', 'w', newline='', encoding='utf-8') as csvfile:
    writer = csv.writer(csvfile)
    writer.writerow(['Tag Type', 'Content'])
    for tag_type, contents in text_data.items():
        for content in contents:
            writer.writerow([tag_type, content])

print("Task #1 Finished: Tags data has been extracted and saved to Extract_Text_Data.csv")

# Task 2: Extract Table Data
table = soup.find('table')
headers = [th.text.strip() for th in table.find_all('th')]
rows = []
for tr in table.find_all('tr')[1:]:  # Skip header row
    cells = [td.text.strip() for td in tr.find_all('td')]
    rows.append(dict(zip(headers, cells)))

# Save to CSV
with open('Extract_Table_Data.csv', 'w', newline='', encoding='utf-8') as csvfile:
    writer = csv.DictWriter(csvfile, fieldnames=headers)
    writer.writeheader()
    writer.writerows(rows)

print("Task #2 Finished: Table data has been extracted and saved to Extract_Table_Data.csv")

# Task 3: Extract Product Information (Cards Section)
cards = soup.find_all('div', style="text-align: center; width: 200px; border: 1px solid #ddd; padding: 10px; border-radius: 5px;")
products = []
for card in cards:
    product = {
        'Book Title': card.find('strong').text.strip(),
        'Price': card.find('p', style="color: green;").text.strip(),
        'Stock Availability': card.find('p', style="color: green;").text.strip(),
        'Button text': card.find('button').text.strip()
    }
    products.append(product)
#print(json.dumps(products, indent=4))

# Save to JSON
with open('Product_Information.json', 'w', encoding='utf-8') as jsonfile:
    json.dump(products, jsonfile, indent=4)

print("Task #3 Finished: Product Information (Cards Section) has been extracted and saved to Product_Information.json")

# Task 4: Extract Form Details
form = soup.find('form')
inputs = []
for input_tag in form.find_all('input'):
    input_data = {
        'field_name': input_tag.get('name', ''),
        'input_type': input_tag.get('type', ''),
        'default_value': input_tag.get('value', '')
    }
    inputs.append(input_data)

# Save to JSON
with open('Form_Details.json', 'w', encoding='utf-8') as jsonfile:
    json.dump(inputs, jsonfile, indent=4)
print("Task #4 Finished: Form_Details have been extracted and saved to Form_Details.json")

# Task 5: Extract Links and Multimedia
iframe = soup.find('iframe')
multimedia = {
    'video_link': iframe.get('src', '') if iframe else ''
}

# Save to JSON
with open('Multimedia.json', 'w', encoding='utf-8') as jsonfile:
    json.dump(multimedia, jsonfile, indent=4)
print("Task #5 Finished: Multimedia links have been extracted and saved to Multimedia.json")

# Task 6: Scraping Challenge (Featured Products)
featured_products = []
for product in soup.find_all('div', class_='product-card'):
    featured_product = {
        'id': product.get('data-id'),
        'name': product.find('p', class_='name').text.strip(),
        'price': product.find('p', class_='price').text.strip(),
        'colors': product.find('p', class_='colors').text.strip()
    }
    featured_products.append(featured_product)

print("Task #6 Finished: featured_products have been Printed as folows and saved to featured_products.json")
# Print challenge output
print(json.dumps(featured_products, indent=4))
# Save to JSON
with open('featured_products.json', 'w', encoding='utf-8') as jsonfile:
    json.dump(featured_product, jsonfile, indent=4)



Task #1 Finished: Tags data has been extracted and saved to Extract_Text_Data.csv
Task #2 Finished: Table data has been extracted and saved to Extract_Table_Data.csv
Task #3 Finished: Product Information (Cards Section) has been extracted and saved to Product_Information.json
Task #4 Finished: Form_Details have been extracted and saved to Form_Details.json
Task #5 Finished: Multimedia links have been extracted and saved to Multimedia.json
Task #6 Finished: featured_products have been Printed as folows and saved to featured_products.json
[
    {
        "id": "101",
        "name": "Wireless Headphones",
        "price": "$49.99",
        "colors": "Available colors: Black, White, Blue"
    },
    {
        "id": "102",
        "name": "Smart Speaker",
        "price": "$89.99",
        "colors": "Available colors: Grey, Black"
    },
    {
        "id": "103",
        "name": "Smart Watch",
        "price": "$149.99",
        "colors": "Available colors: Black, Silver, Gold"
    }
]
