[CDC - Observations Germany](https://opendata.dwd.de/climate_environment/CDC/observations_germany/)

# Wind data
[Wind historical](https://opendata.dwd.de/climate_environment/CDC/observations_germany/climate/10_minutes/wind/historical/) </br>
[Wind recent](https://opendata.dwd.de/climate_environment/CDC/observations_germany/climate/10_minutes/wind/recent/) </br></br>
[Extreme Wind historical](https://opendata.dwd.de/climate_environment/CDC/observations_germany/climate/10_minutes/extreme_wind/historical/) </br>
[Extreme Wind recent](https://opendata.dwd.de/climate_environment/CDC/observations_germany/climate/10_minutes/extreme_wind/recent/)

# Precipitation data
[Precipitation historical](https://opendata.dwd.de/climate_environment/CDC/observations_germany/climate/10_minutes/precipitation/historical/) </br>
[Precipitation recent](https://opendata.dwd.de/climate_environment/CDC/observations_germany/climate/10_minutes/precipitation/recent/)

## 1. Download data
Use the the following cell to get desired data.\
Since 'recent' only covers the last 520 days, we need to also download 'historical' data, thus running the cell twice:

1. PERIOD = ['2020-2022', 'recent']
2. PERIOD = ['2020 - 2020', 'historical']

...both with DATA = ['air_temperature', 'extreme_wind', 'precipitation', 'wind'], STATIONS_ID = ['691', '1420']


In [None]:
import os
import requests
from bs4 import BeautifulSoup as bs
import zipfile


# not all data are available in every temporal resolution!
DATA = [
    'air_temperature',
    # 'cloud_type',
    # 'cloudiness',
    # 'dew_point',
    'extreme_wind',
    # 'moisture',
    'precipitation',
    # 'pressure',
    # 'soil',
    # 'soil_temperature',
    # 'solar',
    # 'sun',
    # 'standard_format',
    # 'visibility',
    # 'weather_phenomena',
    'wind',
    # 'wind_test',
    # 'wind_synop',
]

TEMPORAL_RES = [
    # '1_minute',
    '10_minutes',
    # 'hourly',
    # 'subdaily',
    # 'daily',
    # 'monthly',
    # 'annual',
    # 'multi_annual',
]

PERIOD = [
    # 'start - 2020', # in hourly data

    # '1991', # in 10_minutes data
    # '2000 - 2009', # in 10_minutes data
    # '2010 - 2019', # in 10_minutes data
    '2020 - 2021', # in 10_minutes data
    'recent', 
]

STATIONS_ID = [
    '691', # Bremen
    '1420', # Frankfurt a. M.

]

ROOT_URL = "https://opendata.dwd.de/climate_environment/CDC/observations_germany/climate/"

DOWNLOAD_DIR = os.path.join(os.curdir, "../data", "DeutscherWetterdienst", "")

# make target directory, if it doesn't exist
if not os.path.exists(DOWNLOAD_DIR):
    os.mkdir(DOWNLOAD_DIR)

# ensure that the id has 5 digits
for i, s_id in enumerate(STATIONS_ID):
    while len(s_id) < 5:
        s_id = '0' + s_id
    STATIONS_ID[i] = s_id

# get urls to search for downloadable data
urls_root = []
for temp_res in TEMPORAL_RES:
    for dat in DATA:
        if 'recent' in PERIOD:
            urls_root.append(ROOT_URL + temp_res + '/' + dat + '/' + 'recent' + '/')
        if len(PERIOD) > 1 or PERIOD[0] != 'recent':
            urls_root.append(ROOT_URL + temp_res + '/' + dat + '/' + 'historical' + '/')

# get relevant years, 'akt' for recent data 
years = [y.split(' - ')[1] if len(y.split('-')) > 1 else y.split(' - ')[0] for y in PERIOD]
if 'recent' in PERIOD:
    years.append('akt')

# get urls and names of desired files
urls = []
names = []
for url in urls_root:
    # get html of website
    r = requests.get(url)
    soup = bs(r.text)
    # find download links and filter for .zip files, station and relevant time periods
    for i, link in enumerate(soup.findAll('a')):
        if '.zip' in str(link) and \
            any([station in str(link) for station in STATIONS_ID]) and \
                any([year in str(link) for year in years]):
            url_download = url + link.get('href')
            urls.append(url_download)
            names.append(soup.select('a')[i].attrs['href'])

names_urls = zip(names, urls)

# download files
for name, url in names_urls:
    
    file_path = os.path.join(DOWNLOAD_DIR, name)
    file_path_txt = os.path.join(DOWNLOAD_DIR, name.split('.')[0] + '.txt')
    if not os.path.isfile(file_path) and not os.path.isfile(file_path_txt):
        response = requests.get(url, timeout=50)
        print(url)
        with open(file_path, 'wb') as f:
            f.write(response.content)

        # unzip file
        if os.path.isfile(file_path):
            with zipfile.ZipFile(file_path, 'r') as zip_ref:
                zip_ref.extractall(DOWNLOAD_DIR)

    # delete .zip
    if os.path.isfile(file_path):
        os.remove(file_path)


## 2. Combine data

In [1]:
import pandas as pd
import glob

In [None]:
# Read all meteorological data for a city from 'data/DeutscherWetterdienst'
def read_city_data(city_code):
    path = r'../data/DeutscherWetterdienst' 
    all_files = glob.glob(path + "/produkt*.txt") 

    weather_metrics = {}
   
    for filename in all_files: 
        if city_code in filename:
            df = pd.read_csv(filename, sep=';')
            df['date'] = pd.to_datetime(df.MESS_DATUM, format='%Y%m%d%H')
            df.drop(['eor', 'MESS_DATUM', df.columns[df.columns.str.startswith('QN')][0]], axis=1, inplace=True)
            df = df.query("date < '2022-03' and date >= '2020-01'")
            weather_metrics[f'{filename[38:40] + filename[48]}'] = df
    return weather_metrics



# Concatenate historical and current data for each metric
def concat_city_data(metrics_dict):
    concated = []
    for metric in set([x[:-1] for x in list(metrics_dict.keys())]):
        concated.append(pd.concat([metrics_dict[f'{metric}1'], metrics_dict[f'{metric}2']], axis=0, ignore_index=True).drop_duplicates(keep='first'))
    return concated



# Merge all meterological data into one
def merge_city_data(df_list):
    df_merged = df_list[0]
    for df in df_list[1:]:
        df_merged = df_merged.merge(df, on=['date', 'STATIONS_ID'], how='left')
    df_merged.sort_values('date', ascending=True, inplace=True)
    return df_merged



In [None]:
dwd_ffm = merge_city_data(concat_city_data(read_city_data('1420')))

In [None]:
dwd_bre = merge_city_data(concat_city_data(read_city_data('691')))

In [None]:
dwd_ffm = dwd_ffm.rename(columns = {'RF_TU': 'relHumidity', 'TT_TU': 'Temperature', '  R1': 'Precipitation', 'RS_IND': 'PrecipitationIndicator', '   F': 'WindSpeed', '   D': 'WindDirection', '   P': 'AirPressureSeaLevel', '  P0': 'AirPressureStation'})
dwd_bre = dwd_bre.rename(columns = {'RF_TU': 'relHumidity', 'TT_TU': 'Temperature', '  R1': 'Precipitation', 'RS_IND': 'PrecipitationIndicator', '   F': 'WindSpeed', '   D': 'WindDirection', '   P': 'AirPressureSeaLevel', '  P0': 'AirPressureStation'})

In [None]:
dwd_bre

Unnamed: 0,STATIONS_ID,Temperature,relHumidity,date,Precipitation,PrecipitationIndicator,WRTR,AirPressureSeaLevel,AirPressureStation,WindSpeed,WindDirection
0,691,-1.8,100.0,2020-01-01 00:00:00,0.0,0.0,-999.0,1035.5,1034.8,0.7,250.0
1,691,-0.9,100.0,2020-01-01 01:00:00,0.0,0.0,0.0,1035.1,1034.4,1.2,180.0
2,691,-2.2,100.0,2020-01-01 02:00:00,0.0,0.0,0.0,1034.7,1034.0,0.8,190.0
3,691,-3.3,100.0,2020-01-01 03:00:00,0.0,0.0,-999.0,1034.3,1033.6,0.4,210.0
4,691,-2.7,100.0,2020-01-01 04:00:00,0.0,0.0,0.0,1034.6,1033.9,0.4,120.0
...,...,...,...,...,...,...,...,...,...,...,...
18955,691,3.8,71.0,2022-02-28 19:00:00,0.0,0.0,0.0,1031.9,1031.2,5.5,130.0
18956,691,2.7,76.0,2022-02-28 20:00:00,0.0,0.0,0.0,1031.7,1031.0,6.1,130.0
18957,691,1.8,79.0,2022-02-28 21:00:00,0.0,0.0,-999.0,1031.6,1030.9,5.1,130.0
18958,691,1.5,76.0,2022-02-28 22:00:00,0.0,0.0,0.0,1031.4,1030.7,5.6,130.0


In [None]:
dwd_ffm

Unnamed: 0,STATIONS_ID,Temperature,relHumidity,date,Precipitation,PrecipitationIndicator,WRTR,AirPressureSeaLevel,AirPressureStation,WindSpeed,WindDirection
0,1420,0.0,90.0,2020-01-01 00:00:00,0.0,0.0,-999.0,1036.5,1023.1,2.7,40
1,1420,0.1,90.0,2020-01-01 01:00:00,0.0,0.0,0.0,1036.3,1022.9,2.8,20
2,1420,-1.1,92.0,2020-01-01 02:00:00,0.0,0.0,0.0,1036.2,1022.8,2.2,70
3,1420,-0.4,93.0,2020-01-01 03:00:00,0.0,0.0,-999.0,1035.7,1022.3,1.6,80
4,1420,0.2,90.0,2020-01-01 04:00:00,0.0,0.0,0.0,1035.4,1022.1,2.0,50
...,...,...,...,...,...,...,...,...,...,...,...
18955,1420,5.3,46.0,2022-02-28 19:00:00,0.0,0.0,0.0,1031.4,1018.3,1.3,80
18956,1420,2.0,59.0,2022-02-28 20:00:00,0.0,0.0,0.0,1031.7,1018.5,1.0,90
18957,1420,1.8,61.0,2022-02-28 21:00:00,0.0,0.0,-999.0,1031.7,1018.5,2.0,50
18958,1420,2.0,61.0,2022-02-28 22:00:00,0.0,0.0,0.0,1031.5,1018.3,1.9,60
