In [47]:
import requests
from bs4 import BeautifulSoup
import csv
import json

In [45]:
"""
1. Extract Text Data:
"""
url = "https://www.baraasallout.com/test.html"
response = requests.get(url)

# Parse the HTML
soup = BeautifulSoup(response.content, 'html.parser')

data = []

for h1 in soup.find_all('h1'):
    data.append(["Heading", h1.get_text(strip=True)])

for h2 in soup.find_all('h2'):
    data.append(["Heading", h2.get_text(strip=True)])

for p in soup.find_all('p'):
    data.append(["Paragraph", p.get_text(strip=True)])

for li in soup.find_all('li'):
    data.append(["List Item", li.get_text(strip=True)])

# Write data to a CSV file
with open('Extract_Text_Data.CSV', 'w', encoding='UTF8', newline='') as f:
    writer = csv.writer(f)
    writer.writerow(["Type", "Content"])
    writer.writerows(data)
    f.close()

print("Data extraction and CSV creation completed successfully!")

Data extraction and CSV creation completed successfully!


In [42]:
"""
2. Extract Table Data:
"""
table = soup.find('table')
table_head = []
for th in soup.find_all('th'):
    table_head.append(th.get_text(strip=True))
table_data = []
for row in table.find_all('tr')[1:]:  # Skip the header row
    cells = [cell.get_text(strip=True) for cell in row.find_all('td')]
    table_data.append(cells)
with open('Extract_Table_Data.CSV', 'w', encoding='UTF8', newline='') as f :
    writer = csv.writer(f)
    writer.writerow(table_head)
    writer.writerows(table_data)
    f.close()

print("Data extraction and CSV creation completed successfully!")

Data extraction and CSV creation completed successfully!


In [58]:
'''
3. Extract Product Information (Cards Section):
'''
products = []

product_cards = soup.find_all('div', class_='product-card')

for card in product_cards:
    product_id = card.get('data-id', 'N/A')
    name = card.find('p', class_='name').get_text(strip=True) if card.find('p', class_='name') else 'N/A'
    price = card.find('p', class_='price')
    hidden_price = price.get_text(strip=True) if price else 'N/A'
    colors = card.find('p', class_='colors').get_text(strip=True) if card.find('p', class_='colors') else 'N/A'

    product_info = {
        'id': product_id,
        'name': name,
        'price': hidden_price,
        'colors': colors
    }

    products.append(product_info)

with open('Featured_Products.JSON', 'w', encoding='utf-8') as json_file:
    json.dump(products, json_file, ensure_ascii=False, indent=4)

print("Featured product information has been extracted and saved to Featured_Products.JSON.")



Featured product information has been extracted and saved to Featured_Products.JSON.


In [55]:
'''
4. Extract Form Details:
'''

form = soup.find('form')

form_details = []
for input_field in form.find_all('input'):
    field_name = input_field.get('name', 'N/A')
    input_type = input_field.get('type', 'text')
    default_value = input_field.get('value', '')

    form_details.append({
        "Field Name": field_name,
        "Input Type": input_type,
        "Default Value": default_value
    })

with open('Form_Details.JSON', 'w', encoding='utf-8') as json_file:
    json.dump(form_details, json_file, ensure_ascii=False, indent=4)

print("Form details have been extracted and saved to Form_Details.JSON.")


Form details have been extracted and saved to Form_Details.JSON.


In [57]:
'''
5. Extract Links and Multimedia:
'''

links_and_media = []

for anchor in soup.find_all('a', href=True):
    link_info = {
        "Type": "Hyperlink",
        "Text": anchor.get_text(strip=True),
        "URL": anchor['href']
    }
    links_and_media.append(link_info)

for iframe in soup.find_all('iframe', src=True):
    video_info = {
        "Type": "Video",
        "Source URL": iframe['src']
    }
    links_and_media.append(video_info)

with open('Links_and_Multimedia.JSON', 'w', encoding='utf-8') as json_file:
    json.dump(links_and_media, json_file, ensure_ascii=False, indent=4)

print("Links and multimedia data have been extracted and saved to Links_and_Multimedia.JSON.")

Links and multimedia data have been extracted and saved to Links_and_Multimedia.JSON.
