[CDC - Observations Germany](https://opendata.dwd.de/climate_environment/CDC/observations_germany/)

# Wind data
[Wind historical](https://opendata.dwd.de/climate_environment/CDC/observations_germany/climate/10_minutes/wind/historical/) </br>
[Wind recent](https://opendata.dwd.de/climate_environment/CDC/observations_germany/climate/10_minutes/wind/recent/) </br></br>
[Extreme Wind historical](https://opendata.dwd.de/climate_environment/CDC/observations_germany/climate/10_minutes/extreme_wind/historical/) </br>
[Extreme Wind recent](https://opendata.dwd.de/climate_environment/CDC/observations_germany/climate/10_minutes/extreme_wind/recent/)

# Precipitation data
[Precipitation historical](https://opendata.dwd.de/climate_environment/CDC/observations_germany/climate/10_minutes/precipitation/historical/) </br>
[Precipitation recent](https://opendata.dwd.de/climate_environment/CDC/observations_germany/climate/10_minutes/precipitation/recent/)

In [None]:
import os
import requests
from bs4 import BeautifulSoup as bs
import zipfile


# not all data are available in every temporal resolution!
DATA = [
    'air_temperature',
    # 'cloud_type',
    # 'cloudiness',
    # 'dew_point',
    'extreme_wind',
    # 'moisture',
    'precipitation',
    # 'pressure',
    # 'soil',
    # 'soil_temperature',
    # 'solar',
    # 'sun',
    # 'standard_format',
    # 'visibility',
    # 'weather_phenomena',
    'wind',
    # 'wind_test',
    # 'wind_synop',
]

TEMPORAL_RES = [
    # '1_minute',
    '10_minutes',
    # 'hourly',
    # 'subdaily',
    # 'daily',
    # 'monthly',
    # 'annual',
    # 'multi_annual',
]

PERIOD = [
    # 'start - 2020', # in hourly data

    # '1991', # in 10_minutes data
    # '2000 - 2009', # in 10_minutes data
    # '2010 - 2019', # in 10_minutes data
    '2020 - 2021', # in 10_minutes data
    'recent', 
]

STATIONS_ID = [
    '691', # Bremen
    '1420', # Frankfurt a. M.

]

ROOT_URL = "https://opendata.dwd.de/climate_environment/CDC/observations_germany/climate/"

DOWNLOAD_DIR = os.path.join(os.curdir, "../data", "DeutscherWetterdienst", "")

# make target directory, if it doesn't exist
if not os.path.exists(DOWNLOAD_DIR):
    os.mkdir(DOWNLOAD_DIR)

# ensure that the id has 5 digits
for i, s_id in enumerate(STATIONS_ID):
    while len(s_id) < 5:
        s_id = '0' + s_id
    STATIONS_ID[i] = s_id

# get urls to search for downloadable data
urls_root = []
for temp_res in TEMPORAL_RES:
    for dat in DATA:
        if 'recent' in PERIOD:
            urls_root.append(ROOT_URL + temp_res + '/' + dat + '/' + 'recent' + '/')
        if len(PERIOD) > 1 or PERIOD[0] != 'recent':
            urls_root.append(ROOT_URL + temp_res + '/' + dat + '/' + 'historical' + '/')

# get relevant years, 'akt' for recent data 
years = [y.split(' - ')[1] if len(y.split('-')) > 1 else y.split(' - ')[0] for y in PERIOD]
if 'recent' in PERIOD:
    years.append('akt')

# get urls and names of desired files
urls = []
names = []
for url in urls_root:
    # get html of website
    r = requests.get(url)
    soup = bs(r.text)
    # find download links and filter for .zip files, station and relevant time periods
    for i, link in enumerate(soup.findAll('a')):
        if '.zip' in str(link) and \
            any([station in str(link) for station in STATIONS_ID]) and \
                any([year in str(link) for year in years]):
            url_download = url + link.get('href')
            urls.append(url_download)
            names.append(soup.select('a')[i].attrs['href'])

names_urls = zip(names, urls)

# download files
for name, url in names_urls:
    
    file_path = os.path.join(DOWNLOAD_DIR, name)
    file_path_txt = os.path.join(DOWNLOAD_DIR, name.split('.')[0] + '.txt')
    if not os.path.isfile(file_path) and not os.path.isfile(file_path_txt):
        response = requests.get(url, timeout=50)
        print(url)
        with open(file_path, 'wb') as f:
            f.write(response.content)

        # unzip file
        if os.path.isfile(file_path):
            with zipfile.ZipFile(file_path, 'r') as zip_ref:
                zip_ref.extractall(DOWNLOAD_DIR)

    # delete .zip
    if os.path.isfile(file_path):
        os.remove(file_path)
