In [41]:
import pandas as pd
import folium
import re
from __future__ import print_function
from ipywidgets import interact, interactive, fixed, interact_manual
import ipywidgets as widgets
import datetime
import matplotlib.pyplot as plt
import numpy as np

## Station plotting on Germany map

In [42]:
# replace stupid naming of cities by csv compatible

in_file = open("./data/ger-cl-mon-kl-historical/KL_Monatswerte_Beschreibung_Stationen.txt",  "r",\
               encoding="iso-8859-1")
out_file = open("./data/ger-cl-mon-kl-historical/MOD_KL_Monatswerte_Beschreibung_Stationen.txt", "w+")

linecount = 0
for line in in_file:
    # skip first line
    if linecount==0: 
        linecount+=1
        out_file.write(line)
        continue
    if linecount==1:
        linecount+=1
        continue
    
    # find and replace all whitespace where they do not belong
    line = line.replace("(", "X")
    line = re.sub(r"([a-z]|,|\.|[A-Z]|ß) ([a-z]|[A-Z]|[1-9])", "---", line)
    out_file.write(line)
    
    linecount+=1
    
in_file.close()
out_file.close()

In [43]:
station_inf = pd.read_csv("./data/ger-cl-mon-kl-historical/MOD_KL_Monatswerte_Beschreibung_Stationen.txt",\
                          encoding='UTF-8', sep=r"\s+", on_bad_lines=None, \
                          dtype="str")

In [44]:
# function for plotting map with circles
def plot_map(ID, BUNDESLAND, ZEITRAUM, HOEHE, BREITE, LAENGE):
    
    IDS=[]
    
    # create map
    folium_map = folium.Map(location=[51, 10],
                            zoom_start=5,
                            tiles="cartodbpositron")
    # mapstyle (more available): stamentoner, cartodbpositron, CartoDB dark_matter
    
    # create circles on for specified stations
    for i in range(station_inf.shape[0]):
        # specifications checked
        if ((ID==station_inf.loc[i, "Stations_id"] or ID=='') and\
            (BUNDESLAND==station_inf.loc[i, "Bundesland"] or BUNDESLAND=="Alle") and\
            (ZEITRAUM[0]>=int(station_inf.loc[i, "von_datum"][:4]) and \
             ZEITRAUM[1]<=int(station_inf.loc[i, "bis_datum"][:4])) and\
            (HOEHE[0]<=int(station_inf.loc[i, "Stationshoehe"])<=HOEHE[1]) and\
            (BREITE[0]<=float(station_inf.loc[i, "geoBreite"])<=BREITE[1]) and\
            (LAENGE[0]<=float(station_inf.loc[i, "geoLaenge"])<=LAENGE[1])
            ):
            # Popup for each station
            popup_text = """Name: {} <br>
                            Station ID: {}<br>
                            Messzeitraum: {} - {} <br>
                            Höhe: {}<br>
                            Bundesland: {}<br>"""
            popup_text = popup_text.format(station_inf.loc[i, "Stationsname"], \
                                           station_inf.loc[i, "Stations_id"], \
                                           station_inf.loc[i, "von_datum"][:4], \
                                           station_inf.loc[i, "bis_datum"][:4], \
                                           station_inf.loc[i, "Stationshoehe"], \
                                           station_inf.loc[i, "Bundesland"])
            # create Circle on location
            folium.Circle(location=[station_inf.loc[i, "geoBreite"], station_inf.loc[i, "geoLaenge"]],\
                          fill=True, \
                          radius=800, \
                          popup=popup_text).add_to(folium_map)
            IDS.append(station_inf.loc[i, "Stations_id"])
            
    display(folium_map)
    #SPECS = {'Bundesland': BUNDESLAND, 'Stationshoehe':HOEHE, \
    #         'geoBreite': BREITE, 'geoLaenge': LAENGE}
    return IDS;

In [45]:
# interaction parameters  
W_ID = widgets.Text(value='',\
                    placeholder='00000',\
                    description='Station ID:',\
                    disabled=False)
W_BUNDESLAND=widgets.Dropdown(options=['Alle', \
                                       'Baden-Württemberg', \
                                       'Bayern', \
                                       'Berlin',\
                                       'Brandenburg',\
                                       'Bremen',\
                                       'Hamburg',\
                                       'Hessen',\
                                       'Mecklenburg-Vorpommern',\
                                       'Niedersachsen', \
                                       'Nordrhein-Westfalen', \
                                       'Rheinland-Pfalz',\
                                       'Saarland',\
                                       'Sachsen',\
                                       'Sachsen-Anhalt',\
                                       'Schleswig-Holstein',\
                                       'Thüringen'],\
                                value='Alle',\
                                description='Bundesland',\
                                disabled=False)
W_ZEITRAUM = widgets.IntRangeSlider(value=[2010, 2021],\
                                    min=1720,\
                                    max=2020,\
                                    step=10,\
                                    description="Zeitraum",\
                                    disabled=False,\
                                    continuous_update=True,\
                                    orientation='horizontal',\
                                    readout=True,\
                                    readout_format='d')
W_HOEHE = widgets.IntRangeSlider(value=[0,2970],\
                                min=0,\
                                max=2970,\
                                step=10,\
                                description='Stationshoehe',\
                                disabled=False,\
                                continuous_update=True,\
                                orientation='horizontal',\
                                readout=True,\
                                readout_format='d')
W_BREITE = widgets.FloatRangeSlider(value=[47.3, 55.1],\
                                    min=47.3,\
                                    max=55.1,\
                                    step=0.1,\
                                    description='Breitengrad',\
                                    disabled=False,\
                                    continuous_update=True,\
                                    orientation='horizontal',\
                                    readout=True,\
                                    readout_format='.1f')
W_LAENGE = widgets.FloatRangeSlider(value=[6.0, 15.0],\
                                    min=6.0,\
                                    max=15.0,\
                                    step=0.1,\
                                    description='Laengengrad',\
                                    disabled=False,\
                                    continuous_update=True,\
                                    orientation='horizontal',\
                                    readout=True,\
                                    readout_format='.1f')
station_map = interactive(plot_map, \
                          ID=W_ID,\
                          BUNDESLAND=W_BUNDESLAND,\
                          ZEITRAUM=W_ZEITRAUM,\
                          HOEHE=W_HOEHE,\
                          BREITE=W_BREITE,\
                          LAENGE=W_LAENGE);
display(station_map)

Widget Javascript not detected.  It may not be installed or enabled properly. Reconnecting the current kernel may help.


In [48]:
def plot_graph(ZEITRAUM, ZEIT, MONAT, METRIK, WERTE, FILTER):
    
    # get chosen ids and print them
    ids = station_map.result
    #print(ids)
    
    # declare necessary data structures
    data_dict = {}
    num_dict = {}
    
    # save all data of different ids and calculate average per month
    for station in ids:
        
        # open the file and read with pandas
        folder = "monatswerte_KL_{}".format(station)
        file = "produkt_klima_monat_{}.txt".format(station) 
        try:
            data = pd.read_csv(r"./data/ger-cl-mon-kl-historical/{}/{}".format(folder, file),\
                               sep=";")
        except:
            print("data not existing for station ./data/ger-cl-mon-kl-historical/{}/{}".format(folder, file))
            continue
        
        # append recent data
        try:
            data_rec = pd.read_csv(r"./data/ger-cl-mon-kl-recent/{}/{}".format(folder, file),\
                                   sep=";")
        except:
            print("data not existing for station ./data/ger-cl-mon-kl-recent/{}/{}".format(folder, file))
            continue
            
        #data = data.append(data_rec)
        data = pd.concat([data, data_rec])

        # drop all duplicate when date is the same
        data = data.drop_duplicates(subset=['MESS_DATUM_BEGINN'])

        # change date column to date datatype
        data['MESS_DATUM_BEGINN'] = pd.to_datetime(data['MESS_DATUM_BEGINN'], format='%Y%m%d')
        data['MESS_DATUM_ENDE'] = pd.to_datetime(data['MESS_DATUM_ENDE'], format='%Y%m%d')
        
        #filter only selected timerange
        data = data[(data['MESS_DATUM_BEGINN'].dt.year >= ZEITRAUM[0]) &\
                    (data['MESS_DATUM_ENDE'].dt.year <= ZEITRAUM[1]) &\
                    (data['MESS_DATUM_ENDE'].dt.month >= MONAT[0]) &\
                    (data['MESS_DATUM_ENDE'].dt.month <= MONAT[1])]
            
        # replace all non correct values
        data = data.replace(-999.00, np.nan)
        
        # drop all duplicate when date is the same
        data = data.drop_duplicates(subset=['MESS_DATUM_BEGINN'])
        
        
        # #### this part depends on METRIK and WERTE ####
        # create value type to avoid if else chaos
        metrik_type = {'Temperatur': 'M0_T1', \
                       'Niederschlag': 'M0_R1',\
                       'Sonnenschein': 'MO_SD_S',\
                       'Windstärke': 'M0_F1'}
        werte_type = {'Durchschnitt': ['O', 'T'],\
                      'Max Durchschnitt': ['O', 'X'],\
                      'Min Durchschnitt': ['O', 'N'],\
                      'Max': ['X', 'X'],\
                      'Min': ['X', 'N'],\
                      'Summe': ['O', 'R'],\
                      'Max Summe': ['O', 'S']}
        value_type = metrik_type[METRIK]
        value_type = value_type.replace('0', werte_type[WERTE][0])
        value_type = value_type.replace('1', werte_type[WERTE][1])
        
        # delete all rows with value=nan
        data = data.dropna(subset=[value_type])
        
        # create new buffer dict of date: temp
        date = list(data['MESS_DATUM_BEGINN'])
        value = list(data[value_type])
        buffer_dict = {}
        buffer_dict = dict(zip(date, value))
        # ####
        
    
        # merge them to overall dict with sum
        data_dict = {k: data_dict.get(k,0)+buffer_dict.get(k,0)\
                     for k in set(data_dict)|set(buffer_dict)}
        # create data structure to know how much values per date
        for k in set(buffer_dict): num_dict[k] = num_dict.get(k,0) + 1
    
    # calculate average of all stations
    data_dict = {k: data_dict.get(k)/num_dict.get(k)\
                 for k in set(data_dict)}
    

    # transform back to pandas dataframe
    data = pd.DataFrame.from_dict({'MESS_DATUM_BEGINN': list(data_dict.keys()), \
                                   value_type: list(data_dict.values())})
    
    # calcualte year data
    if(ZEIT == 'Jahr'):
        year_dict = {}
        if (WERTE == ('Durchschnitt' or 'Max Durchschnitt' or 'Min Durchschnitt')):
            for year in range(ZEITRAUM[0], ZEITRAUM[1]+1):
                year_data = data[(data['MESS_DATUM_BEGINN'].dt.year) == year]
                year_sum = year_data[value_type].sum()
                if (year_sum!=0 and year_data[value_type].size==12): year_dict[year]=year_sum/12
                else: year_dict[year]=np.nan
        elif (WERTE == 'Summe'):
            for year in range(ZEITRAUM[0], ZEITRAUM[1]+1):
                year_data = data[(data['MESS_DATUM_BEGINN'].dt.year) == year]
                year_dict[year] = year_data[value_type].sum()
                if(year_dict[year]==0): year_dict[year]=np.nan
        elif (WERTE == ('Max' or 'Max Summe')):
            for year in range(ZEITRAUM[0], ZEITRAUM[1]+1):
                year_data = data[(data['MESS_DATUM_BEGINN'].dt.year) == year]
                if (year_data[value_type].size==12): year_dict[year] = year_data[value_type].max()
                else: year_dict[year]=np.nan
        elif (WERTE == 'Min'):
            for year in range(ZEITRAUM[0], ZEITRAUM[1]+1):
                year_data = data[(data['MESS_DATUM_BEGINN'].dt.year) == year]
                if (year_data[value_type].size==12): year_dict[year] = year_data[value_type].min()
                else: year_dict[year]=np.nan
        
        # convert dict back to dataframe
        data = pd.DataFrame.from_dict({'MESS_DATUM_BEGINN': list(year_dict.keys()), \
                                      value_type: list(year_dict.values())})  
    
    # sort rows by date
    data = data.sort_values(by=['MESS_DATUM_BEGINN'])
    
    # apply rolling filter: https://www.geeksforgeeks.org/how-to-calculate-moving-averages-in-python/
    series_values = pd.Series(list(data[value_type]))
    windows = series_values.rolling(FILTER)
    moving_averages = windows.mean()
    filter_values = moving_averages.tolist()
        
    # plot raw data and filtered
    plt.plot(list(data['MESS_DATUM_BEGINN']), list(data[value_type]), label='raw',
             marker='o', markersize=3, ls='--', color='black', alpha=0.5)
    plt.plot(list(data['MESS_DATUM_BEGINN']), filter_values, label='filtered',
             ls='--', color='blue')
    plt.xlabel('Year')
    plt.ylabel(METRIK)
    plt.grid()
    plt.legend()
    plt.show()
    
    

In [49]:
werte = {'Temperatur': ['Durchschnitt', 'Max Durchschnitt', 'Min Durchschnitt',\
                        'Max', 'Min'],\
         'Niederschlag': ['Summe', 'Max Summe'],\
         'Sonnenschein': ['Summe'],\
         'Windstärke': ['Max']}

# necessary widgets for specs

W_ZEITRAUM = widgets.IntRangeSlider(value=[1970, 2023],\
                                    min=1720,\
                                    max=2023,\
                                    step=1,\
                                    description="Zeitraum",\
                                    disabled=False,\
                                    continuous_update=True,\
                                    orientation='horizontal',\
                                    readout=True,\
                                    readout_format='d')
W_ZEIT = widgets.Dropdown(options=['Jahr', \
                                   'Monat'],\
                          value='Jahr',\
                          description='Zeit',\
                          disabled=False)
W_MONAT = widgets.IntRangeSlider(value=[1, 12],\
                                    min=1,\
                                    max=12,\
                                    step=1,\
                                    description="Monat",\
                                    disabled=False,\
                                    continuous_update=True,\
                                    orientation='horizontal',\
                                    readout=True,\
                                    readout_format='d')
W_METRIK = widgets.Dropdown(options=['Temperatur', \
                                     'Niederschlag', \
                                     'Sonnenschein', \
                                     'Windstärke'],\
                            value='Temperatur',\
                            description='Metrik',\
                            disabled=False)
W_WERTE = widgets.Dropdown(options=werte[W_METRIK.value],\
                           value=werte[W_METRIK.value][0],\
                           description='Wert',\
                           disabled=False)
W_FILTER = widgets.IntSlider(value=1,\
                                    min=1,\
                                    max=10,\
                                    step=1,\
                                    description=" Rolling Filter",\
                                    disabled=False,\
                                    continuous_update=True,\
                                    orientation='horizontal',\
                                    readout=True,\
                                    readout_format='d')



# Wert changes dependent on Metrik
def update_wert(*args):
    W_WERTE.options = werte[W_METRIK.value]
W_METRIK.observe(update_wert, 'value')    

# interaction and plot of specific fct
interact(plot_graph,\
         ZEITRAUM=W_ZEITRAUM,\
         ZEIT=W_ZEIT,\
         MONAT=W_MONAT,\
         METRIK=W_METRIK,\
         WERTE=W_WERTE,
         FILTER=W_FILTER)

Widget Javascript not detected.  It may not be installed or enabled properly. Reconnecting the current kernel may help.


<function __main__.plot_graph(ZEITRAUM, ZEIT, MONAT, METRIK, WERTE, FILTER)>