In [1]:
# Imports

from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC

import pandas as pd
import time
from datetime import datetime, timedelta

In [6]:
days_in_advance = 30
# Get the current date
today = datetime.today().date()
# Get the date x days from today
today_plus_x = today + timedelta(days=days_in_advance)

i = 1

In [7]:
# Format the URL with zero-padded day and month
url = f"https://www.kiel-magazin.de/veranstaltungssuche/konzerte/0/{today.year}-{today.month:02d}-{today.day:02d}/{today_plus_x.year}-{today_plus_x.month:02d}-{today_plus_x.day:02d}/0/{i}"

print(url)

https://www.kiel-magazin.de/veranstaltungssuche/konzerte/0/2024-12-01/2024-12-31/0/1


In [8]:
#prepare scraping
driver = webdriver.Firefox()
driver.get(url)
#time.sleep(5)
wait = WebDriverWait(driver, 10)

In [9]:
try:
    button = wait.until(EC.element_to_be_clickable((By.CSS_SELECTOR, 'button.button.ccm--save-settings.ccm--button-primary.ccm--ctrl-init[data-full-consent="true"]')))
    button.click()
    #print("Button clicked successfully.")
except Exception as e:
    print(f"An error occurred: {e}")

In [14]:
try:
    # Locate the h1 element by its class name and get its text
    h1_element = WebDriverWait(driver, 10).until(
        EC.presence_of_element_located((By.CSS_SELECTOR, 'h1.color-blue.section__hl.event__search--hl'))
    )
    # Get the text of the h1 element
    #h1_text = h1_element.text
    #h1_element = driver.find_element_by_css_selector('h1.color-blue.section__hl.event__search--hl')
    h1_text = int(h1_element.text.split("/")[-1])
    print("Extracted text:", h1_text)
except Exception as e:
    print("Element not found")

Extracted text: 12


In [23]:
articles = wait.until(EC.presence_of_all_elements_located((By.CSS_SELECTOR, 'article.card.card__event')))
articles

[<selenium.webdriver.remote.webelement.WebElement (session="eec442d3-5b80-46d8-a910-06d8339d6783", element="fe21af18-cd98-48cc-8401-a0077af8b63e")>,
 <selenium.webdriver.remote.webelement.WebElement (session="eec442d3-5b80-46d8-a910-06d8339d6783", element="77433c2b-7c14-43c8-b4de-49988e2dbf54")>,
 <selenium.webdriver.remote.webelement.WebElement (session="eec442d3-5b80-46d8-a910-06d8339d6783", element="42068a8f-4eca-475b-baf8-20e18376a6a4")>,
 <selenium.webdriver.remote.webelement.WebElement (session="eec442d3-5b80-46d8-a910-06d8339d6783", element="9b50f4d9-46ce-484d-90dd-6daa1dbc7941")>,
 <selenium.webdriver.remote.webelement.WebElement (session="eec442d3-5b80-46d8-a910-06d8339d6783", element="1c18f9fe-a8b8-42e6-b827-beb9ccba0918")>,
 <selenium.webdriver.remote.webelement.WebElement (session="eec442d3-5b80-46d8-a910-06d8339d6783", element="e49d6a82-c355-4456-95fc-7b02e7228d1a")>,
 <selenium.webdriver.remote.webelement.WebElement (session="eec442d3-5b80-46d8-a910-06d8339d6783", element

In [27]:
len('Zur Veranstaltung ')

18

In [45]:
events = []

for article in articles:
    try:
        title_element = article.find_element(By.CSS_SELECTOR, 'a.card-link')
        title = title_element.get_attribute('title')[19:-1]
        source_url = title_element.get_attribute('href')

        date_and_location_element = article.find_element(By.CSS_SELECTOR, 'p.card-date')
        date_and_location = date_and_location_element.get_attribute('innerHTML').split('<br>')

        date = date_and_location[0].strip()
        date = date.split(',')[1]
        if "ab" in date:
            datedate = date.split(" ab ")[0]
            times = date.split(" ab ")[1][:-4]
        else:
            datedate = date
            times = "N/A"

        location = date_and_location[1].strip() if len(date_and_location) > 1 else ""

        category_element = article.find_element(By.CSS_SELECTOR, 'p.card-category')
        category = category_element.text.strip()

        event = {
            "Subject": title,
            "Description": source_url,
            "Start_date": datedate,
            "End_date": datedate,
            "Start_time": times,
            "End_time": "N/A",
            "Location": location.split(',')[0],
            "City": location.split(',')[-1],
            "Category": category,
            "Music_label": "music"
        }
        events.append(event)
    except Exception as e:
        print(f"An error occurred while processing an article: {e}")

# Print the extracted information
for event in events:
    print(event)

{'Subject': 'A Cappella Party - A cappella', 'Description': 'https://www.kiel-magazin.de/veranstaltungen/a-cappella-party-a-cappella-2318635', 'Start_date': ' 1. Dezember 2024', 'End_date': ' 1. Dezember 2024', 'Start_time': 'N/A', 'End_time': 'N/A', 'Location': 'Audimax', 'City': ' Kiel', 'Category': 'KONZERTE, WEITERE KONZERTE', 'Music_label': 'music'}
{'Subject': 'Festliches Konzert mit dem Lübschen Blech am 1. Advent · St. Thomas-Kirche Lübeck', 'Description': 'https://www.kiel-magazin.de/veranstaltungen/festliches-konzert-mit-dem-l%C3%BCbschen-blech-am-1-advent-st-thomas-kirche-l%C3%BCbeck-1214871', 'Start_date': ' 1. Dezember 2024', 'End_date': ' 1. Dezember 2024', 'Start_time': '17:00', 'End_time': 'N/A', 'Location': 'St.-Thomas-Kirche Lübeck', 'City': ' Lübeck', 'Category': 'KONZERTE, KLASSIK', 'Music_label': 'music'}
{'Subject': 'Jugend-Sinfonieorchester Ahrensburg', 'Description': 'https://www.kiel-magazin.de/veranstaltungen/jugend-sinfonieorchester-ahrensburg-1205948', 'Star

In [42]:
pd.DataFrame(events)

Unnamed: 0,Subject,Description,date_time,Location,City,Category,Music_label
0,A Cappella Party - A cappella,https://www.kiel-magazin.de/veranstaltungen/a-...,1. Dezember 2024,Audimax,Kiel,"KONZERTE, WEITERE KONZERTE",music
1,Festliches Konzert mit dem Lübschen Blech am 1...,https://www.kiel-magazin.de/veranstaltungen/fe...,1. Dezember 2024 ab 17:00 Uhr,St.-Thomas-Kirche Lübeck,Lübeck,"KONZERTE, KLASSIK",music
2,Jugend-Sinfonieorchester Ahrensburg,https://www.kiel-magazin.de/veranstaltungen/ju...,1. Dezember 2024 ab 18:00 Uhr,ATLANTIC Grand Hotel Travemünde,Travemünde,"KONZERTE, KLASSIK",music
3,KIWANIS BENEFIZ WEIHNACHTSKONZERT,https://www.kiel-magazin.de/veranstaltungen/ki...,1. Dezember 2024 ab 17:00 Uhr,Kolosseum zu Lübeck,Lübeck,"KONZERTE, JAZZ, BLUES, SWING & CHANSON",music
4,Malo Moray & His Inflatable Knee (Ambiet/Kraut...,https://www.kiel-magazin.de/veranstaltungen/ma...,1. Dezember 2024 ab 17:00 bis 19:30 Uhr,Atelier Paravicini,Lübeck,"KONZERTE, WEITERE KONZERTE",music
5,"Mary Roos & Wolfgang Trepper: Mehr Nutten, meh...",https://www.kiel-magazin.de/veranstaltungen/ma...,1. Dezember 2024 ab 19:30 Uhr,Wunderino Arena,KIEL,"KONZERTE, WEITERE KONZERTE",music
6,Musizierstunde Schlagzeug,https://www.kiel-magazin.de/veranstaltungen/mu...,1. Dezember 2024 ab 17:00 Uhr,MHL / Großer Saal,Lübeck,"KONZERTE, WEITERE KONZERTE",music
7,Weihnachtskonzert des Weihnachtshilfswerks der...,https://www.kiel-magazin.de/veranstaltungen/we...,1. Dezember 2024 ab 16:00 Uhr,BBZ Bad Segeberg (EduArt),Bad Segeberg,"KONZERTE, WEITERE KONZERTE, RELIGION & SPIRITU...",music
8,Krabbelkonzert,https://www.kiel-magazin.de/veranstaltungen/kr...,1. Dezember 2024 ab 14:30 Uhr,Theater Lübeck (Junges Studio),Lübeck,"KONZERTE, KLASSIK",music
9,Krabbelkonzert,https://www.kiel-magazin.de/veranstaltungen/kr...,1. Dezember 2024 ab 16:00 Uhr,Theater Lübeck (Junges Studio),Lübeck,"KONZERTE, KLASSIK",music


# Function

In [15]:
def scrape_kiel_magazin(days_in_advance=30):

    days_in_advance = 30
    today = datetime.today().date()
    today_plus_x = today + timedelta(days=days_in_advance)
    i = 1

    url = f"https://www.kiel-magazin.de/veranstaltungssuche/konzerte/0/{today.year}-{today.month:02d}-{today.day:02d}/{today_plus_x.year}-{today_plus_x.month:02d}-{today_plus_x.day:02d}/0/{i}"

    driver = webdriver.Firefox()
    driver.get(url)
    time.sleep(5)
    wait = WebDriverWait(driver, 10)
    events = []

    #cookie rejection
    try:
        button = wait.until(EC.element_to_be_clickable((By.CSS_SELECTOR, 'button.button.ccm--save-settings.ccm--button-primary.ccm--ctrl-init[data-full-consent="true"]')))
        button.click()
        #print("Button clicked successfully.")
    except Exception as e:
        print(f"An error occurred: {e}")

    try:
        h1_element = WebDriverWait(driver, 10).until(
            EC.presence_of_element_located((By.CSS_SELECTOR, 'h1.color-blue.section__hl.event__search--hl'))
        )
        x = int(h1_element.text.split("/")[-1])
    except Exception as e:
        print("Page element not found")
        x = 5

    for i in range(1,x+1):

        url = f"https://www.kiel-magazin.de/veranstaltungssuche/konzerte/0/{today.year}-{today.month:02d}-{today.day:02d}/{today_plus_x.year}-{today_plus_x.month:02d}-{today_plus_x.day:02d}/0/{i}"
        driver.get(url)
        time.sleep(5)

        articles = wait.until(EC.presence_of_all_elements_located((By.CSS_SELECTOR, 'article.card.card__event')))

        for article in articles:
            try:
                title_element = article.find_element(By.CSS_SELECTOR, 'a.card-link')
                title = title_element.get_attribute('title')[19:-1]
                source_url = title_element.get_attribute('href')

                date_and_location_element = article.find_element(By.CSS_SELECTOR, 'p.card-date')
                date_and_location = date_and_location_element.get_attribute('innerHTML').split('<br>')

                date = date_and_location[0].strip()
                date = date.split(',')[1]
                if "ab" in date:
                    datedate = date.split(" ab ")[0]
                    times = date.split(" ab ")[1][:-4]
                else:
                    datedate = date
                    times = "N/A"

                location = date_and_location[1].strip() if len(date_and_location) > 1 else ""

                category_element = article.find_element(By.CSS_SELECTOR, 'p.card-category')
                category = category_element.text.strip()

                event = {
                    "Subject": title,
                    "Description": source_url,
                    "Start_date": datedate,
                    "End_date": datedate,
                    "Start_time": times,
                    "End_time": "N/A",
                    "Location": location.split(',')[0],
                    "City": location.split(',')[-1],
                    "Category": category,
                    "Music_label": "music"
                }
                events.append(event)
            except Exception as e:
                print(f"An error occurred while processing an article: {e}")
                continue

    df_raw = pd.DataFrame(events)
    driver.close()

    return df_raw



In [25]:
df_raw = scrape_kiel_magazin(30)
df_raw

Unnamed: 0,Subject,Description,Start_date,End_date,Start_time,End_time,Location,City,Category,Music_label
0,A Cappella Party - A cappella,https://www.kiel-magazin.de/veranstaltungen/a-...,1. Dezember 2024,1. Dezember 2024,,,Audimax,Kiel,"KONZERTE, WEITERE KONZERTE",music
1,Festliches Konzert mit dem Lübschen Blech am 1...,https://www.kiel-magazin.de/veranstaltungen/fe...,1. Dezember 2024,1. Dezember 2024,17:00,,St.-Thomas-Kirche Lübeck,Lübeck,"KONZERTE, KLASSIK",music
2,Jugend-Sinfonieorchester Ahrensburg,https://www.kiel-magazin.de/veranstaltungen/ju...,1. Dezember 2024,1. Dezember 2024,18:00,,ATLANTIC Grand Hotel Travemünde,Travemünde,"KONZERTE, KLASSIK",music
3,KIWANIS BENEFIZ WEIHNACHTSKONZERT,https://www.kiel-magazin.de/veranstaltungen/ki...,1. Dezember 2024,1. Dezember 2024,17:00,,Kolosseum zu Lübeck,Lübeck,"KONZERTE, JAZZ, BLUES, SWING & CHANSON",music
4,Malo Moray & His Inflatable Knee (Ambiet/Kraut...,https://www.kiel-magazin.de/veranstaltungen/ma...,1. Dezember 2024,1. Dezember 2024,17:00 bis 19:30,,Atelier Paravicini,Lübeck,"KONZERTE, WEITERE KONZERTE",music
...,...,...,...,...,...,...,...,...,...,...
231,New York Gospel Stars,https://www.kiel-magazin.de/veranstaltungen/ne...,29. Dezember 2024,29. Dezember 2024,17:00,,Kolosseum zu Lübeck,Lübeck,"KONZERTE, WEITERE KONZERTE",music
232,New York Gospel Stars,https://www.kiel-magazin.de/veranstaltungen/ne...,29. Dezember 2024,29. Dezember 2024,20:00,,Kolosseum zu Lübeck,Lübeck,"KONZERTE, WEITERE KONZERTE",music
233,Paul Potts & Piano - The Greatest Hits,https://www.kiel-magazin.de/veranstaltungen/pa...,30. Dezember 2025,30. Dezember 2025,20:00,,Stadthalle Eckernförde,ECKERNFÖRDE,"KONZERTE, KLASSIK",music
234,Gemischter Chor der Travemünder Liedertafel vo...,https://www.kiel-magazin.de/veranstaltungen/ge...,30. Dezember 2025,30. Dezember 2025,19:00,,Gesellschaftshaus Travemünde,Travemünde,"KONZERTE, WEITERE KONZERTE",music


In [30]:
df_raw.head()

Unnamed: 0,Subject,Description,Start_date,End_date,Start_time,End_time,Location,City,Category,Music_label
0,A Cappella Party - A cappella,https://www.kiel-magazin.de/veranstaltungen/a-...,2024-12-01,2024-12-01,,,Audimax,Kiel,"KONZERTE, WEITERE KONZERTE",music
1,Festliches Konzert mit dem Lübschen Blech am 1...,https://www.kiel-magazin.de/veranstaltungen/fe...,2024-12-01,2024-12-01,17:00,,St.-Thomas-Kirche Lübeck,Lübeck,"KONZERTE, KLASSIK",music
2,Jugend-Sinfonieorchester Ahrensburg,https://www.kiel-magazin.de/veranstaltungen/ju...,2024-12-01,2024-12-01,18:00,,ATLANTIC Grand Hotel Travemünde,Travemünde,"KONZERTE, KLASSIK",music
3,KIWANIS BENEFIZ WEIHNACHTSKONZERT,https://www.kiel-magazin.de/veranstaltungen/ki...,2024-12-01,2024-12-01,17:00,,Kolosseum zu Lübeck,Lübeck,"KONZERTE, JAZZ, BLUES, SWING & CHANSON",music
4,Malo Moray & His Inflatable Knee (Ambiet/Kraut...,https://www.kiel-magazin.de/veranstaltungen/ma...,2024-12-01,2024-12-01,17:00,19:30,Atelier Paravicini,Lübeck,"KONZERTE, WEITERE KONZERTE",music


In [20]:
# Strip any leading/trailing spaces from Start_date and End_date
df_raw['Start_date'] = df_raw['Start_date'].str.strip()
df_raw['End_date'] = df_raw['End_date'].str.strip()

# Convert Start_date and End_date to datetime format and then to YYYY-MM-DD
df_raw['Start_date'] = pd.to_datetime(df_raw['Start_date'], format='%d. %B %Y', dayfirst=True).dt.strftime('%Y-%m-%d')
df_raw['End_date'] = pd.to_datetime(df_raw['End_date'], format='%d. %B %Y', dayfirst=True).dt.strftime('%Y-%m-%d')

In [22]:
# Split Start_time if it contains 'bis'
df_raw[['Start_time', 'End_time']] = df_raw['Start_time'].str.split(' bis ', expand=True)

In [28]:
# Preprocessing function

def preprocess_kiel_magazin(df_raw):

    df_raw['Start_date'] = df_raw['Start_date'].str.strip()
    df_raw['End_date'] = df_raw['End_date'].str.strip()
    df_raw['Start_date'] = pd.to_datetime(df_raw['Start_date'], format='%d. %B %Y', dayfirst=True).dt.strftime('%Y-%m-%d')
    df_raw['End_date'] = pd.to_datetime(df_raw['End_date'], format='%d. %B %Y', dayfirst=True).dt.strftime('%Y-%m-%d')
    
    df_raw[['Start_time', 'End_time']] = df_raw['Start_time'].str.split(' bis ', expand=True)  

    df_prep = df_raw[['Subject','Start_date', 'End_date', 'Start_time', 'End_time', 'Location', 'City', 'Description', 'Category', 'Music_label']]
    return df_prep

In [29]:
df_prep = preprocess_kiel_magazin(df_raw)
df_prep

Unnamed: 0,Subject,Start_date,End_date,Start_time,End_time,Location,City,Description,Category,Music_label
0,A Cappella Party - A cappella,2024-12-01,2024-12-01,,,Audimax,Kiel,https://www.kiel-magazin.de/veranstaltungen/a-...,"KONZERTE, WEITERE KONZERTE",music
1,Festliches Konzert mit dem Lübschen Blech am 1...,2024-12-01,2024-12-01,17:00,,St.-Thomas-Kirche Lübeck,Lübeck,https://www.kiel-magazin.de/veranstaltungen/fe...,"KONZERTE, KLASSIK",music
2,Jugend-Sinfonieorchester Ahrensburg,2024-12-01,2024-12-01,18:00,,ATLANTIC Grand Hotel Travemünde,Travemünde,https://www.kiel-magazin.de/veranstaltungen/ju...,"KONZERTE, KLASSIK",music
3,KIWANIS BENEFIZ WEIHNACHTSKONZERT,2024-12-01,2024-12-01,17:00,,Kolosseum zu Lübeck,Lübeck,https://www.kiel-magazin.de/veranstaltungen/ki...,"KONZERTE, JAZZ, BLUES, SWING & CHANSON",music
4,Malo Moray & His Inflatable Knee (Ambiet/Kraut...,2024-12-01,2024-12-01,17:00,19:30,Atelier Paravicini,Lübeck,https://www.kiel-magazin.de/veranstaltungen/ma...,"KONZERTE, WEITERE KONZERTE",music
...,...,...,...,...,...,...,...,...,...,...
231,New York Gospel Stars,2024-12-29,2024-12-29,17:00,,Kolosseum zu Lübeck,Lübeck,https://www.kiel-magazin.de/veranstaltungen/ne...,"KONZERTE, WEITERE KONZERTE",music
232,New York Gospel Stars,2024-12-29,2024-12-29,20:00,,Kolosseum zu Lübeck,Lübeck,https://www.kiel-magazin.de/veranstaltungen/ne...,"KONZERTE, WEITERE KONZERTE",music
233,Paul Potts & Piano - The Greatest Hits,2025-12-30,2025-12-30,20:00,,Stadthalle Eckernförde,ECKERNFÖRDE,https://www.kiel-magazin.de/veranstaltungen/pa...,"KONZERTE, KLASSIK",music
234,Gemischter Chor der Travemünder Liedertafel vo...,2025-12-30,2025-12-30,19:00,,Gesellschaftshaus Travemünde,Travemünde,https://www.kiel-magazin.de/veranstaltungen/ge...,"KONZERTE, WEITERE KONZERTE",music
