### Imports

In [1]:
import requests
from bs4 import BeautifulSoup
import pandas as pd

### Helper functions

In [2]:
def parse_site(url):
    response = requests.get(url).content
    soup = BeautifulSoup(response, "html.parser")
    
    return soup

### Special Events

In [3]:
def scrape_tyrol_special(soup, event_category, event_country):
    _event_titles = []
    _event_links = []
    _event_locations = []
    _event_dates = []
    _event_descriptions = []
    _event_categories = []
    _event_countries = []

    for event_titles, event_details in zip(soup.find_all("div", class_="highlightListicleElement__titleImage"), soup.find_all("div", class_="highlightListicleElement__content h-grid-12 h-grid-fixed")):
        title = event_titles.find('a')
        event_title = title.contents[2].strip()
        event_link = title['href']
        event_location, event_date = event_details.find("h2").contents[0].strip().split(',')
        event_description = event_details.find("p").contents[0].strip()

        #print(title.__dict__)
        _event_titles.append(event_title)
        _event_links.append(event_link)
        _event_locations.append(event_location)
        _event_dates.append(event_date)
        _event_descriptions.append(event_description)
 
        if (event_category):
            _event_categories.append(event_category)
            
        if (event_country):
            _event_countries.append(event_country)
            
    return (_event_titles, _event_links, _event_locations, _event_dates, _event_descriptions, _event_categories, _event_countries)

### Uniform Events

In [4]:
def scrape_tyrol_uniform(soup, event_category, event_country):
    _event_titles = []
    _event_links = []
    _event_locations = []
    _event_dates = []
    _event_descriptions = []
    _event_categories = []
    _event_countries = []
    
    for event in soup.find_all("div", class_="listElement_wrapper"):
        event_title = event.find("a").contents[0]
        event_date_location = event.find("p", class_="subline").contents
        if len(event_date_location) == 3:
            event_location = event_date_location[0].strip()
            event_date = None
        else:
            event_date = event_date_location[0].strip()
            event_location = event_date_location[2].strip()

        event_link = event.find("a")['href']
        event_description = event.find_all('p')[1].string
        
        _event_titles.append(event_title)
        _event_links.append(event_link)
        _event_locations.append(event_location)
        _event_dates.append(event_date)
        _event_descriptions.append(event_description)
        
        if (event_category):
            _event_categories.append(event_category)
            
        if (event_country):
            _event_countries.append(event_country)
            
    return (_event_titles, _event_links, _event_locations, _event_dates, _event_descriptions, _event_categories, _event_countries)

### dictionary

In [12]:
# these values should be stored in a meta database
scraper_dict = [
        {'event_url': 'https://www.tirol.at/reisefuehrer/veranstaltungen/extremsportevents', 'event_category': 'Extrem sports', 'event_country': 'Austria', 'event_function': 'scrape_tyrol_uniform'}
       , {'event_url': 'https://www.tirol.at/reisefuehrer/veranstaltungen/top-trailrunning-events', 'event_category': 'Trailrunning', 'event_country': 'Austria', 'event_function': 'scrape_tyrol_uniform'}
       , {'event_url': 'https://www.tirol.at/reisefuehrer/veranstaltungen/top-bikeevents', 'event_category': 'Biking', 'event_country': 'Austria', 'event_function': 'scrape_tyrol_uniform'}
       , {'event_url': 'https://www.tirol.at/reisefuehrer/veranstaltungen/rennrad-events', 'event_category': 'Bike racing', 'event_country': 'Austria', 'event_function': 'scrape_tyrol_uniform'}
       , {'event_url': 'https://www.tirol.at/reisefuehrer/veranstaltungen/top-wanderveranstaltungen', 'event_category': 'Hiking', 'event_country': 'Austria', 'event_function': 'scrape_tyrol_uniform'}
       , {'event_url': 'https://www.tirol.at/reisefuehrer/veranstaltungen/sommersportevents', 'event_category': 'Sommer sports', 'event_country': 'Austria', 'event_function': 'scrape_tyrol_special'}
       , {'event_url': 'https://www.sport-oesterreich.at/sportkalender', 'event_category': 'Sommer sports', 'event_country': 'Austria', 'event_function': 'scrape_sportkalender'}
            ]

### single test

In [67]:
def scrape_sportkalender(soup):
    _event_titles = []
    _event_links = []
    _event_locations = []
    _event_dates = []
    _event_descriptions = []     
    
    soup_event_titles = soup.find_all("div", class_="views-field views-field-title-1")
    soup_event_dates = soup.find_all("span", class_="date-display-single")
    soup_event_locations = soup.find_all("div", class_="views-field views-field-field-veranstaltungsort")
    soup_event_descriptions = soup.find_all("div", class_="views-field views-field-body")
    
    for event_titles, event_dates, event_locations, event_descriptions in zip(soup_event_titles, soup_event_dates, soup_event_locations, soup_event_descriptions):
        title = event_titles.find('a')
        event_title = title.contents[0].strip()
        event_date = event_dates.contents[0].strip()
        location = event_locations.find('a')
        event_location = location.contents[0].strip()
        description = event_descriptions.find('p')
        
        print(description)
        
        _event_titles.append(event_title)
        _event_dates.append(event_date)
        _event_locations.append(event_location)
    
    return (_event_titles, _event_dates, _event_locations)

In [68]:
soup = parse_site('https://www.sport-oesterreich.at/sportkalender')
print(scrape_sportkalender(soup))

<p>Von 20. bis 22. Mai startet der Motorsport-Sommer 2022 am Red Bull Ring mit dem ersten großen Highlight – dem ADAC GT Masters – so richtig durch! Die Fahrer und Teams absolvieren ihren diesjährigen Österreich-Auftritt am zweiten Rennwochenende der Internationalen Deutschen GT-Meisterschaft. Einmal mehr werden Traumautos von Audi, BMW, Lamborghini, Mercedes und Porsche die Fans am Spielberg begeistern. Zudem sind zahlreiche rot-weiß-rote Piloten und attraktive Partnerserien am Start, wie der Porsche Carrera Cup Deutschland, die ADAC GT4 Germany, die ADAC TCR Germany und die Fanatec GT2 European Series.</p>
<p>Am 22. Mai 2022 findet das beliebte Open Water Stubenbergsee Freiwasser-Schwimmevent im ebenso benannten Stubenbergsee statt. Es gibt drei Bewerbe mit unterschiedlichen Distanzen zur Auswahl.</p>
None
<p>Die <a href="/swimrun">SwimRun</a> Bewerbe des BackwaterMan Waldviertel im <a href="/ottensteiner-stausee">Ottensteiner Stausee</a> finden vom 27. bis 29.5.2022 statt. Die klass

### Main

In [6]:
events_dict = {
    'event_title': []
    , 'event_link': []
    , 'event_location': []
    , 'event_date': []
    , 'event_description': []
    , 'event_category': []
    , 'event_country': []
}

for scraper_info in scraper_dict:
    event_url = scraper_info['event_url']
    event_category = scraper_info['event_category']
    event_country = scraper_info['event_country']
    event_function = scraper_info['event_function']
    
    soup = parse_site(event_url)
    
    event_results = eval(event_function + f"(soup, event_category, event_country)")
    
    for i, event_key in enumerate(events_dict.keys()):
        events_dict[event_key]+= [x for x in event_results[i]]

In [7]:
pd.DataFrame.from_dict(events_dict)

Unnamed: 0,event_title,event_link,event_location,event_date,event_description,event_category,event_country
0,Spartan Race,https://www.tirol.at/reisefuehrer/veranstaltun...,Oberndorf,,Egal ob Couch-Potatoe oder Olympiasiegerin – a...,Extrem sports,Austria
1,Red Bull Dolomitenmann,https://www.tirol.at/reisefuehrer/veranstaltun...,Lienz,10.09.2022,Einer der härtesten Staffelwettbewerbe findet ...,Extrem sports,Austria
2,XLETIX Challenge Kühtai,https://www.tirol.at/reisefuehrer/veranstaltun...,Kühtai,23.07.2022,Einer der schönsten und härtesten Hindernisläu...,Extrem sports,Austria
3,Challenge Kaiserwinkl-Walchsee,https://www.tirol.at/reisefuehrer/veranstaltun...,Walchsee,24.06.2022 - 26.06.2022,Mehr als 2.000 Athletinnen und Athleten und vi...,Extrem sports,Austria
4,Red Bull Almauftrieb,https://www.tirol.at/reisefuehrer/veranstaltun...,Mayrhofen,,"Ein Extremsport-Event, das seinesgleichen such...",Extrem sports,Austria
...,...,...,...,...,...,...,...
60,Radmarathon Tannheimer Tal,https://www.tirol.at/reisefuehrer/veranstaltun...,Tannheim,03.07.2022,Anfang Juli trägt das Tannheimer Tal alljährli...,Sommer sports,Austria
61,Generali Open Kitzbühel,https://www.tirol.at/reisefuehrer/veranstaltun...,Kitzbühel,23.07. - 30.07.2022,Tennis und der Ort Kitzbühel sind seit sieben ...,Sommer sports,Austria
62,Ötztal Radmarathon,https://www.tirol.at/reisefuehrer/veranstaltun...,Sölden,28.08.2022,Der Ötztaler Radmarathon hat sich in seiner üb...,Sommer sports,Austria
63,Red Bull Dolomitenmann,https://www.tirol.at/reisefuehrer/veranstaltun...,Lienz,10.09.2022,Der Dolomitenmann in Osttirol wird nicht umson...,Sommer sports,Austria
