In [1]:
from selenium import webdriver
from selenium.webdriver.chrome.service import Service
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.common.by import By
from selenium.webdriver.chrome.options import Options
from selenium.webdriver.support import expected_conditions as EC
import time
from bs4 import BeautifulSoup

In [2]:
driver = webdriver.Chrome() 
url="https://www.kiel.de/de/_info/veranstaltungskalender.php#/de/kiel/default/search/Event/mode:next_months,3/sort:chronological/view:gallery"
driver.get(url)
driver.implicitly_wait(10)
# Wait for the iframe and switch to it
WebDriverWait(driver, 10).until(EC.frame_to_be_available_and_switch_to_it((By.CSS_SELECTOR, "#et4pages_iframe")))

# Wait until events are visible
WebDriverWait(driver, 10).until(EC.presence_of_element_located((By.CSS_SELECTOR, ".item-gallery")))

# Scroll down to load more events (optional, depending on how many events you want)
# Scroll to load all events (if lazy-loaded)
last_height = driver.execute_script("return document.body.scrollHeight")
while True:
    driver.execute_script("window.scrollTo(0, document.body.scrollHeight);")
    time.sleep(3)  # Wait for new content to load
    new_height = driver.execute_script("return document.body.scrollHeight")
    if new_height == last_height:  # Exit loop if no new content is loaded
        break
    last_height = new_height

driver.execute_script("window.scrollTo(0, document.body.scrollHeight);")

time.sleep(10)
# Use BeautifulSoup to parse the page source
soup = BeautifulSoup(driver.page_source, 'html.parser')


In [5]:

# Wait for the page to load and locate the event elements (adjust selectors as needed)
try:
    # Locate all event elements on the page
    events = driver.find_elements(By.CSS_SELECTOR, ".media-list > .media")  # Update selector as per the actual site
except Exception as e:
    print(f"Error locating event elements: {e}")
    driver.quit()
    exit()

# Initialize an empty list to store the events
events_list = []

# Iterate through each event element
for event in events:
    # Initialize an event dictionary
    event_data = {}
    
    # Extract event name
    try:
        event_name = event.find_element(By.CSS_SELECTOR, "h4.media-heading span").text
        event_data["name"] = event_name
    except Exception as e:
        print(f"Error extracting event name: {e}")
        event_data["name"] = None

    # Extract address and time
    try:
        address = event.find_element(By.CSS_SELECTOR, "i.fa-map-marker + span").text
        event_data["address"] = address
    except Exception as e:
        print(f"Error extracting address for event: {event_name}, Error: {e}")
        event_data["address"] = None

    try:
        time = event.find_element(By.CSS_SELECTOR, "i.fa-circle + span").text
        event_data["time"] = time
    except Exception as e:
        print(f"Error extracting time for event: {event_name}, Error: {e}")
        event_data["time"] = None

    # Append the event data to the events list
    events_list.append(event_data)

# Close the driver after completing
driver.quit()

# Print the stored events
for event in events_list:
    print(event)


In [6]:
from datetime import datetime, timedelta

# Example: Set specific dates for today
today_date = datetime.now().strftime("%Y-%m-%d")

for event in events_list:
    event["time"] = event["time"].replace("Heute", today_date)


In [7]:
from datetime import datetime, timedelta
import locale

# Set locale for handling German day and month names
locale.setlocale(locale.LC_TIME, 'de_DE.UTF-8')


'de_DE.UTF-8'

In [8]:
def parse_event_time(raw_text):
    now = datetime.now()
    today_str = now.strftime("%Y-%m-%d")
    tomorrow_str = (now + timedelta(days=1)).strftime("%Y-%m-%d")

    if "Morgen" in raw_text:  # Handle "Morgen"
        # Extract time range
        time_range = raw_text.replace("Morgen", "").strip()
        start_time, end_time = time_range.split(" - ")
        
        # Tomorrow's date
        start_datetime = datetime.strptime(f"{tomorrow_str} {start_time}", "%Y-%m-%d %H:%M")
        end_datetime = datetime.strptime(f"{tomorrow_str} {end_time}", "%Y-%m-%d %H:%M")
    
    elif "Heute" in raw_text:  # Handle "Heute"
        # Extract time range
        time_range = raw_text.split(" ")[1].strip()  # Get the part after "Heute"
        start_time, end_time = time_range.split(" - ")
        
        # Today's date
        start_datetime = datetime.strptime(f"{today_str} {start_time}", "%Y-%m-%d %H:%M")
        end_datetime = datetime.strptime(f"{today_str} {end_time}", "%Y-%m-%d %H:%M")
    
    else:  # Handle explicit dates like "Mi., 20. Nov. 13:00 - 20:00"
        # Extract date and time range
        parts = raw_text.split(", ")
        day_date = parts[1].strip()
        time_range = parts[2].strip().split(" ")[0]
        start_time, end_time = time_range.split(" - ")
        
        # Parse date
        event_date = datetime.strptime(day_date, "%d. %b.")
        event_date = event_date.replace(year=now.year)
        
        # Combine with times
        start_datetime = datetime.strptime(f"{event_date.strftime('%Y-%m-%d')} {start_time}", "%Y-%m-%d %H:%M")
        end_datetime = datetime.strptime(f"{event_date.strftime('%Y-%m-%d')} {end_time}", "%Y-%m-%d %H:%M")
    
    return start_datetime, end_datetime


In [9]:
for event in events_list:
    print(event)

In [10]:
import requests

def get_coordinates(address):
    url = "https://nominatim.openstreetmap.org/search"
    params = {"q": address, "format": "json"}
    response = requests.get(url, params=params)
    if response.status_code == 200 and response.json():
        location = response.json()[0]
        return float(location["lat"]), float(location["lon"])
    return None, None

for event in events_list:
    lat, lon = get_coordinates(event["address"])
    event["latitude"] = lat
    event["longitude"] = lon


In [89]:
for event in events_list:
    print(event)

{'name': 'Weihnachtsmarkt im CITTI-Park', 'address': 'CITTI-PARK, Mühlendamm 1, Kiel', 'time': '2024-11-18 09:00 - 20:00\nEndet in einer Stunde', 'latitude': None, 'longitude': None}
{'name': 'Riesenrad am Bahnhof "La Noria"', 'address': 'Kiel Hauptbahnhof , Sophienblatt 25-27, Kiel', 'time': '2024-11-18 11:00 - 21:00\nEndet in 2 Stunden', 'latitude': None, 'longitude': None}
{'name': 'Stadtwerke Eisfestival', 'address': 'Germaniahafen, Am Germaniahafen, Kiel', 'time': '2024-11-18 13:00 - 20:00\nEndet in einer Stunde', 'latitude': None, 'longitude': None}
{'name': 'Schnippelparty', 'address': 'Nachhaltigkeitszentrum Kiel, Europaplatz 2, Kiel', 'time': '2024-11-18 17:00 - 19:00\nEndet in 21 Minuten', 'latitude': None, 'longitude': None}
{'name': 'Das skandinavische Mittelalter und seine Medien', 'address': 'Christian-Albrechts-Universität zu Kiel | OS75, Olshausenstraße 75, Kiel', 'time': '2024-11-18 18:00 - 20:00\nEndet in einer Stunde', 'latitude': None, 'longitude': None}
{'name': 'A

In [108]:
def parse_event_time(raw_text):
    now = datetime.now()
    today_str = now.strftime("%Y-%m-%d")
    tomorrow_str = (now + timedelta(days=1)).strftime("%Y-%m-%d")

    if "Morgen" in raw_text:  # Handle "Morgen"
        # Extract time range
        time_range = raw_text.replace("Morgen", "").strip()
        start_time, end_time = time_range.split(" - ")
        
        # Tomorrow's date
        start_datetime = datetime.strptime(f"{tomorrow_str} {start_time}", "%Y-%m-%d %H:%M")
        end_datetime = datetime.strptime(f"{tomorrow_str} {end_time}", "%Y-%m-%d %H:%M")
    
    elif "Heute" in raw_text:  # Handle "Heute"
        # Extract time range
        time_range = raw_text.split(" ")[1].strip()  # Get the part after "Heute"
        start_time, end_time = time_range.split(" - ")
        
        # Today's date
        start_datetime = datetime.strptime(f"{today_str} {start_time}", "%Y-%m-%d %H:%M")
        end_datetime = datetime.strptime(f"{today_str} {end_time}", "%Y-%m-%d %H:%M")
    
    else:  # Handle explicit dates like "Mi., 20. Nov. 13:00 - 20:00"
        # Extract date and time range
        parts = raw_text.split(", ")
        day_date = parts[1].strip()
        time_range = parts[2].strip().split(" ")[0]
        start_time, end_time = time_range.split(" - ")
        
        # Parse date
        event_date = datetime.strptime(day_date, "%d. %b.")
        event_date = event_date.replace(year=now.year)
        
        # Combine with times
        start_datetime = datetime.strptime(f"{event_date.strftime('%Y-%m-%d')} {start_time}", "%Y-%m-%d %H:%M")
        end_datetime = datetime.strptime(f"{event_date.strftime('%Y-%m-%d')} {end_time}", "%Y-%m-%d %H:%M")
    
    return start_datetime, end_datetime
