<a href="https://colab.research.google.com/github/amien1410/amien-scrapers/blob/main/Events_Scraper.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
import requests
import pandas as pd
from bs4 import BeautifulSoup

In [30]:
res = requests.get('https://www.visitberlin.de/en/event-calendar-berlin?page=0')
soup = BeautifulSoup(res.text, 'html.parser')
berlinEvents = soup.find_all('li', class_='l-list__item')
len(berlinEvents)

21

In [32]:
events = []
base_url = "https://www.visitberlin.de"

for event in berlinEvents:
    # Extract event name, if available
    event_name = event.find('a', class_='teaser-search__mainlink')
    event_name = event_name.get('title', '').strip() if event_name else None

    # Extract picture link, if available
    picture_tag = event.find('img', class_='teaser-search__img fluid-img')
    picture_link = base_url + picture_tag.get('src') if picture_tag else None

    # Extract category, if available
    category_tag = event.find('div', class_='teaser-search__category')
    category = category_tag.get_text(strip=True) if category_tag else None

    # Extract address, if available
    address_tag = event.find('span', class_='teaser-search__print-address teaser-search__print-info')
    address = address_tag.get_text(strip=True) if address_tag else None

    # Extract website, if available
    website_tag = event.find('p', class_='teaser-search__print-link teaser-search__print-info')
    website = website_tag.get_text(strip=True) if website_tag else None

    # Extract time, if available
    time = None
    time_tag = event.find('p', class_='teaser-search__time me')
    if time_tag:
        time = time_tag.select_one('span.me__content').get_text(strip=True)

    # Extract start and end dates, if available
    start_date, end_date = None, None
    date_tag = event.find('p', class_='teaser-search__date')
    if date_tag:
        times = date_tag.find_all('time')
        if len(times) > 1:
            start_date = times[0].get_text(strip=True)
            end_date = times[1].get_text(strip=True)
        elif len(times) == 1:
            start_date = times[0].get_text(strip=True)

    # Create the event info dictionary
    event_info = {
        'event_name': event_name,
        'category': category,
        'date': f"{start_date} - {end_date}" if start_date and end_date else start_date or None,
        'time': time,
        'address': address,
        'picture_link': picture_link,
        'website': website
    }

    # Add the event info to the events list
    events.append(event_info)

events

[{'event_name': 'Awakening and reforms',
  'category': 'History',
  'date': '27/06/2024 - 31/12/2024',
  'time': '08:00 am',
  'address': 'Rathaus WeißenseeBerliner Allee 252-26013088 Berlin',
  'picture_link': 'https://www.visitberlin.de/system/files/styles/visitberlin_teaser_search_visitberlin_mobile_1x/private/event_images/bo-163-b7e09a11-c881-e51c-62a3-fa283f1ee10e.jpg?h=c28cfe2e&itok=VGJtxTew',
  'website': 'https://www.berlin.de/museum-pankow/aktuelles/ausstellungen/historie-ausstellungen/2020/artikel.923700.php'},
 {'event_name': 'Climate change in the green world heritage site - and what we can do',
  'category': 'Science & Technology',
  'date': '27/04/2024 - 31/10/2024',
  'time': '08:00 am',
  'address': 'Park SanssouciZur Historischen Mühle 114469 Potsdam',
  'picture_link': 'https://www.visitberlin.de/system/files/styles/visitberlin_teaser_search_visitberlin_mobile_1x/private/event_images/vb-60-ff5d39a8-8617-4fd4-a53c-a2a1b7e80cbf.jpg?h=43ec1315&itok=PACrzZn_',
  'website'

In [None]:
picture_link

'https://www.visitberlin.de/system/files/styles/visitberlin_teaser_search_visitberlin_mobile_1x/private/event_images/vb-60-ff5d39a8-8617-4fd4-a53c-a2a1b7e80cbf.jpg?h=43ec1315&itok=PACrzZn_'

In [None]:
hamburgUrl = "https://www.hamburg-travel.com/see-explore/events/events-calendar/js.api?filter[date]=02.10.2024&filter[district]=all&page=0&filter[distance]=50"
res = requests.get(hamburgUrl)
soup = BeautifulSoup(res.text, 'html.parser')
hamburgEvents = soup.find_all('article', class_='listTeaser-event')
len(hamburgEvents)

3

In [None]:
events = []
for event in hamburgEvents:
    # Extract event details
    event_name = event.find('h3').text if event.find('h3') else None
    event_type = event.find('ul', class_='listTeaser-event__text__profiling').text.strip() if event.find('ul', class_='listTeaser-event__text__profiling') else None
    date = event.find('span', class_='icon-calendar').next_sibling.strip() if event.find('span', class_='icon-calendar') else None
    time = event.find('span', class_='icon-clock').next_sibling.strip() if event.find('span', class_='icon-clock') else None
    location = event.find('span', class_='icon-located').next_sibling.strip() if event.find('span', class_='icon-located') else None
    img_url = event.find('img')['src'] if event.find('img') else None
    imo_number = None
    departure_info = None

    # Check for IMO number and departure time
    text_sections = event.find_all('p')
    for p in text_sections:
        if 'IMO number' in p.text:
            imo_number = p.text.split('IMO number: ')[1].split()[0] if 'IMO number' in p.text else None
        if 'Departure' in p.text:
            departure_info = event.find('ul').find('li').text.strip()

    event_info = {
        'event_name': event_name,
        'event_type': event_type,
        'date': date,
        'time': time,
        'location': location,
        'image_url': img_url,
        'imo_number': imo_number,
        'departure_info': departure_info
    }
    events.append(event_info)

events

[{'event_name': 'Arrival Mein Schiff 7',
  'event_type': 'Estimated Vessels',
  'date': '02.10.2024',
  'time': '05:30',
  'location': 'Cruise Center Steinwerder',
  'image_url': 'https://www.hamburg-tourism.de/images/19107UOsuxI/rs:fill-down:636:476/sharpen:1.2/cb:/g:ce/aHR0cHM6Ly93d3cuaGFtYnVyZy10cmF2ZWwuY29tL3R5cG8zY29uZi9leHQvaGh0X2Zyb250ZW5kL1Jlc291cmNlcy9QdWJsaWMvSW1hZ2VzL2ZhbGxiYWNrLXZhZGIuanBn',
  'imo_number': '9851189Departure:',
  'departure_info': 'Estimated Vessels'},
 {'event_name': 'Soltauer Wochenmarkt',
  'event_type': "Farmer's Markets",
  'date': '02.10.2024',
  'time': '07:00',
  'location': 'Georges-Lemoine-Platz Soltau',
  'image_url': 'https://www.hamburg-tourism.de/images/J-HFSVxu0p8/rs:fill-down:636:476/sharpen:1.2/cb:/g:ce/aHR0cHM6Ly9oaHQuaW5mb21heG5ldC5kZS9kYXRhL2lteHBsYXRmb3Jtai9pbWFnZXMvd29jaGVubWFya3RfNi5qcGc',
  'imo_number': None,
  'departure_info': None},
 {'event_name': 'Lauenburger Wochenmarkt',
  'event_type': "Farmer's Markets",
  'date': '02.10.20

In [None]:
muenchenUrl = "https://www.muenchen.de/en/veranstaltungen/event?page=0"
res = requests.get(muenchenUrl)
soup = BeautifulSoup(res.text, 'html.parser')
muenchenEvents = soup.find_all('li', class_='m-listing__list-item')
len(muenchenEvents)

30

In [None]:
events = []
for event in muenchenEvents:
  # Extract event name
  event_name = event.find('h3', class_='m-event-list-item__headline').get_text(strip=True)

  # Extract start and end dates
  start_date = event.find('time', class_='m-date-range__item', itemprop='startDate')['datetime']
  end_date = event.find('time', class_='m-date-range__item', itemprop='endDate')['datetime']

  # Extract event day and time
  date_time_tag = event.find('time', datetime=True)
  event_datetime = date_time_tag['datetime'] if date_time_tag else None

  # Extract location
  location_tag = event.find('p', class_='m-event-list-item__detail', itemprop='location')
  location = location_tag.get_text(strip=True) if location_tag else None

  # Extract ticket link
  ticket_link_tag = event.find('a', class_='m-button', href=True)
  ticket_link = ticket_link_tag['href'] if ticket_link_tag else None

  event_info = {
      'event_name': event_name,
      'start_date': start_date,
      'end_date': end_date,
      'event_datetime': event_datetime,
      'location': location,
      'ticket_link': ticket_link
  }
  events.append(event_info)

events

[{'event_name': 'Tanzkurs Zwiefache mit Live-Musik',
  'start_date': '2024-10-02T12:00:00Z',
  'end_date': '2024-10-16T12:00:00Z',
  'event_datetime': '2024-10-02T12:00:00Z',
  'location': 'Kulturzentrum LUISE',
  'ticket_link': 'https://www.muenchenticket.de/tickets/performances/ei8udu5xbyt7/Tanzkurs-Zwiefache-mit-Live-Musik-3-Tages-Ticket?campaign=muenchen'},
 {'event_name': 'Tintenfische, Teufelsfinger und Tentakel - die faszinierende Welt der Kopffüßer',
  'start_date': '2024-08-13T12:00:00Z',
  'end_date': '2024-12-30T12:00:00Z',
  'event_datetime': '2024-08-13T12:00:00Z',
  'location': 'Paläontologisches Museum',
  'ticket_link': None},
 {'event_name': 'Skelette – Choreografen der Bewegung',
  'start_date': '2024-07-17T12:00:00Z',
  'end_date': '2024-10-15T12:00:00Z',
  'event_datetime': '2024-07-17T12:00:00Z',
  'location': 'Museum Mensch und Natur',
  'ticket_link': None},
 {'event_name': 'Sonderausstellung Erntedank: Von Erbsenzählern und Bohnenstangen – die Vielfalt der regio