In [9]:
!pip install beautifulsoup4 requests streamlit




In [13]:
import requests
from bs4 import BeautifulSoup
import streamlit as st
import pandas as pd

# Function to scrape product details based on user input for tag and class
def scrape_product_info(url, details):
    try:
        # Send request to the product URL
        response = requests.get(url)
        soup = BeautifulSoup(response.text, 'html.parser')

        # Dictionary to store the product information
        product_info = {}

        # Scrape the requested details based on user inputs for tag and class
        for detail, tag_class in details.items():
            tag, class_name = tag_class['tag'], tag_class['class']
            if tag and class_name:
                element = soup.find(tag, {'class': class_name})
                product_info[detail] = element.get_text() if element else "Not Available"
            else:
                product_info[detail] = "Not Available"

        # Handling images separately because it's a list
        if 'images' in details:
            images = [img['src'] for img in soup.find_all('img', {'src': True})]  # Collect image URLs
            product_info['images'] = images if images else ["Not Available"]

        return product_info
    except Exception as e:
        return {'error': str(e)}

# Streamlit interface for the app
st.title("Product Web Scraping App")

# File uploader for the Notepad file containing product links
uploaded_file = st.file_uploader("Upload a Notepad file with product links", type="txt")

# Input for the HTML tags and class names to scrape (title, SKU, price, images, description)
st.write("Please provide the HTML tag and class for each product detail you want to scrape.")
details = {
    'title': {'tag': st.text_input("Title Tag (e.g., 'h1')"), 'class': st.text_input("Title Class (e.g., 'product-title')")},
    'sku': {'tag': st.text_input("SKU Tag (e.g., 'span')"), 'class': st.text_input("SKU Class (e.g., 'sku')")},
    'price': {'tag': st.text_input("Price Tag (e.g., 'span')"), 'class': st.text_input("Price Class (e.g., 'price')")},
    'description': {'tag': st.text_input("Description Tag (e.g., 'div')"), 'class': st.text_input("Description Class (e.g., 'product-description')")},
    'images': {'tag': None, 'class': None}  # Image handling is separate
}

# Only proceed if the file is uploaded
if uploaded_file:
    # Read the uploaded file and extract the product URLs (one per line)
    links = uploaded_file.getvalue().decode("utf-8").splitlines()

    # Scraping button
    if st.button("Scrape Products"):
        scraped_data = []
        for link in links:
            st.write(f"Scraping: {link}")  # Display the link being scraped
            product_info = scrape_product_info(link, details)
            scraped_data.append(product_info)

        # Display the scraped data as a table
        if scraped_data:
            df = pd.DataFrame(scraped_data)
            st.write(df)
        else:
            st.warning("No data found for the provided links.")




In [14]:
!wget -q -O - ipv4.icanhazip.com


34.125.33.161


In [None]:
 !streamlit run app.py & npx localtunnel --port 8501



Collecting usage statistics. To deactivate, set browser.gatherUsageStats to false.
[0m
[1G[0K⠙[1G[0K⠹[1G[0K⠸[0m
[34m[1m  You can now view your Streamlit app in your browser.[0m
[0m
[34m  Local URL: [0m[1mhttp://localhost:8501[0m
[34m  Network URL: [0m[1mhttp://172.28.0.12:8501[0m
[34m  External URL: [0m[1mhttp://34.125.33.161:8501[0m
[0m
[1G[0K⠼[1G[0K⠴[1G[0Kyour url is: https://nasty-facts-train.loca.lt
