In [7]:
import pandas as pd
import requests
from bs4 import BeautifulSoup
import re

In [8]:
# Read city names from cities.csv
cities_df = pd.read_csv('cities.csv')

# Clean city names
cities_df['Cities'] = cities_df['Cities'].apply(lambda x: re.sub(r'\s*\([^)]*\)\s*', '', x))

# Select the first 10 cities
cities = cities_df['Cities'][:10]

#cities = cities_df['Cities']

# Initialize an empty list to store all data
all_data = []

# Loop through each city in the first_10_cities Series
for city in cities:
    # Construct the URL for the specific city
    url = f"https://www.eversports.de/l/poledance/{city}"
    print("Scraping URL:", url)  # Add this line to check the constructed URL
    
    try:
        response = requests.get(url)
        response.raise_for_status()  # Check for HTTP request errors
        soup = BeautifulSoup(response.content, 'html.parser')

        # Find and extract information for each pole dance studio in the city
        for div in soup.find_all('div', class_='flex flex-col justify-between h-full'):
            img_src = div.find('img')['src']
            business_name = div.find('h2', class_='text-lg font-bold').text
            rating_element = div.find('div', class_='flex gap-2 items-center text-sm')
            decimal_rating = float(rating_element.contents[0].strip())  # Extract the decimal rating
            num_reviews = rating_element.find('div', class_='flex').next_sibling.strip()
            categories = div.find('div', class_='flex flex-row items-center flex-wrap gap-2').find_all('span', class_='flex px-2 py-1 border rounded-md text-xs text-slate-500 capitalize')
            categories_list = [category.text for category in categories]
            address = div.find('dl').find('dd', class_='text-sm').text

            data = {
                'City': city.capitalize(),
                'Business Name': business_name,
                'Image Source': img_src,
                'Rating': decimal_rating,  # Decimal rating
                'Number of Reviews': num_reviews,
                'Categories': categories_list,
                'Address': address,
                'Eversports URL': url
            }

            all_data.append(data)
    except Exception as e:
        print(f"Error scraping {city}: {str(e)}")

# Create a DataFrame from the collected data
polestudio_df = pd.DataFrame(all_data)

# Print the first few rows of the DataFrame
print(polestudio_df.head())


Scraping URL: https://www.eversports.de/l/poledance/Aach
Scraping URL: https://www.eversports.de/l/poledance/Aachen
Scraping URL: https://www.eversports.de/l/poledance/Aalen
Scraping URL: https://www.eversports.de/l/poledance/Abenberg
Scraping URL: https://www.eversports.de/l/poledance/Abensberg
Scraping URL: https://www.eversports.de/l/poledance/Achern
Scraping URL: https://www.eversports.de/l/poledance/Achim
Scraping URL: https://www.eversports.de/l/poledance/Adelsheim
Scraping URL: https://www.eversports.de/l/poledance/Adenau
Scraping URL: https://www.eversports.de/l/poledance/Adorf/Vogtl.
Error scraping Adorf/Vogtl.: 404 Client Error: Not Found for url: https://www.eversports.de/l/poledance/Adorf/Vogtl.
Scraping URL: https://www.eversports.de/l/poledance/Ahaus
Scraping URL: https://www.eversports.de/l/poledance/Ahlen
Scraping URL: https://www.eversports.de/l/poledance/Ahrensburg
Scraping URL: https://www.eversports.de/l/poledance/Aichach
Scraping URL: https://www.eversports.de/l/po

In [11]:
polestudio_df

Unnamed: 0,City,Business Name,Image Source,Rating,Number of Reviews,Categories,Address,Eversports URL
0,Aachen,poda Studio,/_next/image?url=https%3A%2F%2Ffiles.eversport...,4.9,51,[1 Angebot für Neukund:innen],"Martinstraße 10-12, 52062 Aachen",https://www.eversports.de/l/poledance/Aachen
1,Aachen,Pura Studio,/_next/image?url=https%3A%2F%2Ffiles.eversport...,5.0,67,[1 Angebot für Neukund:innen],"Schaufenberger Straße 61 , 52477 Alsdorf",https://www.eversports.de/l/poledance/Aachen
2,Abensberg,Polesition Regensburg,/_next/image?url=https%3A%2F%2Ffiles.eversport...,4.9,27,[2 Angebote für Neukund:innen],"Rebhuhnstraße 4, 93326 Abensberg",https://www.eversports.de/l/poledance/Abensberg
3,Abensberg,Sportheim Abensberg,/_next/image?url=https%3A%2F%2Ffiles.eversport...,4.9,36,[],"Von-Hazzi-Straße 7, 93326 Abensberg",https://www.eversports.de/l/poledance/Abensberg
4,Achern,Körperformfabrik GbR,/_next/image?url=https%3A%2F%2Ffiles.eversport...,4.9,3,"[Kostenlos Schnuppern, 1 Angebot für Neukund:i...","Hauptstraße 108 , 77855 Achern",https://www.eversports.de/l/poledance/Achern
...,...,...,...,...,...,...,...,...
706,Zirndorf,-Pole-Aerial-Dance- Souldance,/_next/image?url=https%3A%2F%2Ffiles.eversport...,4.9,65,[1 Angebot für Neukund:innen],"Turnerheimstraße 45 , 90441 Nürnberg",https://www.eversports.de/l/poledance/Zirndorf
707,Zirndorf,Flow Fusion Pole-Aerial-Dance Studio,/_next/image?url=https%3A%2F%2Ffiles.eversport...,5.0,7,[],"Platenstraße 64 , 90441 Nürnberg",https://www.eversports.de/l/poledance/Zirndorf
708,Zirndorf,Lustforlife.Studio,/_next/image?url=https%3A%2F%2Ffiles.eversport...,5.0,30,[],"Ostendstraße 82b , 90482 Nürnberg",https://www.eversports.de/l/poledance/Zirndorf
709,Zwingenberg,Pole Faction - Pole Dance & Fitness,/_next/image?url=https%3A%2F%2Ffiles.eversport...,4.8,19,[1 Angebot für Neukund:innen],"Heidelberger Landstrasse , 64297 Darmstadt",https://www.eversports.de/l/poledance/Zwingenberg


In [10]:
# Specify the file name with the correct format
csv_file_name = "Übersicht_15.09.23.csv"

# Save the DataFrame to the CSV file
polestudio_df.to_csv(csv_file_name, index=False)

print(f"DataFrame saved to {csv_file_name}")

DataFrame saved to Übersicht_15.09.23.csv
