In [28]:
import requests
from bs4 import BeautifulSoup
import pandas as pd

# Global constant for the base URL
BASE_URL = "https://www.eversports.de"

def scrape_workshops(url):
    """
    Scrapes workshop information from a given URL.

    Args:
        url (str): The URL of the workshop page.

    Returns:
        DataFrame: A DataFrame with information about the workshops.
    """
    try:
        response = requests.get(url)
        response.raise_for_status()
    except requests.RequestException as e:
        print(f"Error retrieving the webpage: {e}")
        return None

    soup = BeautifulSoup(response.content, 'html.parser')

    # Dictionary to store workshop information
    workshop_data = {
        'Workshop Name': [],
        'Preis': [],
        'Einheiten': [],
        'Studio Name': [],
        'URL_E': []
    }

    # Workshop elements
    workshop_elements = soup.find_all('a', class_='marketplace-tile js_marketplace-tile')

    # Iterate through elements and extract information
    for workshop in workshop_elements:
        workshop_data['Workshop Name'].append(workshop.find('h4').text)
        workshop_data['Preis'].append(workshop.find('div', class_='marketplace-tile__price').text)
        workshop_data['Einheiten'].append(workshop.find('small', class_='u-text-bold').text)
        
        workshop_content = workshop.find('div', class_='marketplace-tile__content__bottom').find_all('small')
        workshop_data['Studio Name'].append(workshop_content[0].text)
        
        workshop_data['URL_E'].append(BASE_URL + workshop['href'])

    # Create DataFrame
    workshoplist_df = pd.DataFrame(workshop_data)

    return workshoplist_df



In [29]:
# Example call of the function
url = "https://www.eversports.de/sw/poda-studio"
workshoplist_df = scrape_workshops(url)
workshoplist_df


Unnamed: 0,Workshop Name,Preis,Einheiten,Studio Name,URL_E
0,*Shake that* Twerk Workshop,"ab 35,00 €",1 Einheit,poda Studio,https://www.eversports.de/e/workshop/JM68hgK
1,Sexy Floorwork Workshop,"ab 35,00 €",1 Einheit,poda Studio,https://www.eversports.de/e/workshop/9PbaXsNu8
2,Hip Hop Pole Choreo Workshop,"ab 35,00 €",1 Einheit,poda Studio,https://www.eversports.de/e/workshop/2f93D4_


In [30]:
import requests
from bs4 import BeautifulSoup
import pandas as pd

def scrape_workshop_details(url):
    """
    Scrapes detailed information of a workshop from a given URL.

    Args:
        url (str): The URL of the workshop detail page.

    Returns:
        DataFrame: A DataFrame with detailed information about the workshop.
    """
    try:
        response = requests.get(url)
        response.raise_for_status()
    except requests.RequestException as e:
        print(f"Error retrieving the webpage: {e}")
        return pd.DataFrame()

    soup = BeautifulSoup(response.content, 'html.parser')

    def get_text_or_none(element):
        return element.text.strip() if element else None

    # Extraction of various data points
    workshop_name = get_text_or_none(soup.find('h1', class_='MuiTypography-root MuiTypography-h1 css-gdjtsh'))
    description_div = soup.find('div', class_='css-3awvdx')
    description = ' '.join([p.text for p in description_div.find_all('p')]) if description_div else None
    studio_name = get_text_or_none(soup.find('p', class_='MuiTypography-root MuiTypography-body1 css-z923os'))
    location = get_text_or_none(soup.find_all('p', class_='MuiTypography-root MuiTypography-body1 css-bjhn26')[2])
    level = get_text_or_none(soup.find('p', class_='MuiTypography-root MuiTypography-body1 css-ilcg2f'))
    date = get_text_or_none(soup.find('p', class_='MuiTypography-root MuiTypography-body1 css-16ai5j1'))
    time = get_text_or_none(soup.find('p', class_='MuiTypography-root MuiTypography-body1 css-bjhn26'))
    trainer_name = get_text_or_none(soup.find('p', class_="MuiTypography-root MuiTypography-body1 css-j61xuw"))

    # Creating the DataFrame
    workshop_df = pd.DataFrame({
        'Workshop Name': [workshop_name],
        'Kategorie': [level],
        'Datum': [date],
        'Location': [location],
        'Studio Name': [studio_name],
        'ID':"caeb8b6e-1459-428f-ba21-d6fa39cd2052",
        'URL_E': [url],
        'Beschreibung': [description],
        'Uhrzeit': [time],
        'Trainer':[trainer_name]
    })

    return workshop_df



In [31]:
# # Test the function
url = "https://www.eversports.de/e/workshop/JM68hgK"
workshop_df = scrape_workshop_details(url)
workshop_df

Unnamed: 0,Workshop Name,Kategorie,Datum,Location,Studio Name,ID,URL_E,Beschreibung,Uhrzeit,Trainer
0,*Shake that* Twerk Workshop,Twerken,"Sonntag, 25. Februar 2024","10-12 Martinstraße Innenhof, 52062 Aachen",poda Studio,caeb8b6e-1459-428f-ba21-d6fa39cd2052,https://www.eversports.de/e/workshop/JM68hgK,Shaken will gelernt sein! Emmy lässt es mal wi...,20:00 - 21:30,Datum


---

In [32]:
merged_df = pd.merge(workshop_df, workshoplist_df[0:1], on=['Workshop Name', 'Studio Name', 'URL_E'], how='inner')


In [33]:
merged_df

Unnamed: 0,Workshop Name,Kategorie,Datum,Location,Studio Name,ID,URL_E,Beschreibung,Uhrzeit,Trainer,Preis,Einheiten
0,*Shake that* Twerk Workshop,Twerken,"Sonntag, 25. Februar 2024","10-12 Martinstraße Innenhof, 52062 Aachen",poda Studio,caeb8b6e-1459-428f-ba21-d6fa39cd2052,https://www.eversports.de/e/workshop/JM68hgK,Shaken will gelernt sein! Emmy lässt es mal wi...,20:00 - 21:30,Datum,"ab 35,00 €",1 Einheit


In [34]:
merged_df.to_excel('Workshop_URL_E_New.xlsx', index=False)