In [1]:
import pandas as pd
import streamlit as st
from a_PyCaller import process_urls
from tqdm import tqdm
from datetime import datetime

def process_and_print_results(url, all_pole_studio_data, all_workshops_data, all_workshop_details_data, all_urls_data):
    data = process_urls([url])

    if data:
        for key, df in data.items():
            if df is not None and not df.empty:
                tqdm.write(f"{datetime.now().strftime('%Y-%m-%d %H:%M:%S')} - INFO - Scraping Data from: {url}")
                tqdm.write(f"{key.replace('_', ' ').title()}: {len(df)} entries")

                # Update the appropriate DataFrame
                if key == 'pole_studio_data':
                    all_pole_studio_data = pd.concat([all_pole_studio_data, df], ignore_index=True)
                elif key == 'workshops_data':
                    all_workshops_data = pd.concat([all_workshops_data, df], ignore_index=True)
                elif key == 'workshop_details':
                    all_workshop_details_data = pd.concat([all_workshop_details_data, df], ignore_index=True)

    # Add URLs to DataFrame
    all_urls_data = pd.concat([all_urls_data, pd.DataFrame({'URL': [url]})], ignore_index=True)

    return all_pole_studio_data, all_workshops_data, all_workshop_details_data, all_urls_data

def main(initial_urls):
    # Initialize DataFrames
    all_pole_studio_data = pd.DataFrame()
    all_workshops_data = pd.DataFrame()
    all_workshop_details_data = pd.DataFrame()
    all_urls_data = pd.DataFrame(columns=['URL'])

    # Process each URL with tqdm
    with tqdm(initial_urls, desc="Processing URLs", dynamic_ncols=True) as pbar:
        for url in pbar:
            all_pole_studio_data, all_workshops_data, all_workshop_details_data, all_urls_data = process_and_print_results(
                url, all_pole_studio_data, all_workshops_data, all_workshop_details_data, all_urls_data
            )
    workshops = pd.merge(all_workshop_details_data, all_workshops_data, on=['Workshopname', 'URL_E'], how='inner')
    # # Export DataFrames to CSV files
    # all_pole_studio_data.to_csv("Pole_Studio_Übersicht_S.csv", index=False)
    # all_workshops_data.to_csv("Workshop_Liste_SW.csv", index=False)
    # all_workshop_details_data.to_csv("Workshop_Übersicht_E.csv", index=False)
    # all_urls_data.to_csv("All_URLs.csv", index=False)

    # Return the final DataFrames
    return all_pole_studio_data, workshops, all_workshops_data, all_workshop_details_data, all_urls_data

def main():
    # Title of the app
    st.title("URL Processing App")

    # Upload CSV file
    uploaded_file = st.file_uploader("Upload a CSV file", type=["csv"])

    if uploaded_file is not None:
        # Read CSV file
        df = pd.read_csv(uploaded_file)

        # Display the uploaded data
        st.write("Uploaded CSV file:")
        st.write(df)

        # Choose column
        column_name = st.selectbox("Select the column to process", df.columns)

        # Process the URLs
        initial_urls = df[column_name].to_list()
        processed_data = main(initial_urls)

        # Show processed data
        st.write("Processed data:")
        for data in processed_data:
            st.write(data)

if __name__ == "__main__":
    main()


2024-03-17 16:47:28.188 
  command:

    streamlit run C:\Users\hamud\AppData\Roaming\Python\Python312\site-packages\ipykernel_launcher.py [ARGUMENTS]
