In [None]:
# pip install git+https://www.github.com/gezzze/pydwd@testing

# Remove old files
- dwd_data

In [None]:
import pathlib

# Function to remove files and folders 
def delete_folder(pth) :
    for sub in pth.iterdir() :
        if sub.is_dir():
            delete_folder(sub)
        else :
            sub.unlink()
    pth.rmdir()
    
delete_folder(pathlib.Path('dwd_data'))

# Import modules necessary for general functioning


In [None]:
# Modules used
try:
    import python_dwd
except:
    import sys
    sys.path.append("..")

    import python_dwd

import pandas as pd
import numpy as np
%matplotlib inline
import matplotlib as mpl
import matplotlib.pyplot as plt
from python_dwd.enumerations.period_type_enumeration import PeriodType
from python_dwd.enumerations.time_resolution_enumeration import TimeResolution
from python_dwd.enumerations.parameter_enumeration import Parameter

## 1. First check of a metadatafile
- for daily climate historical file
- metadata is printed with its shape/size and the number of available stations

In [None]:
metadata_precip_daily = python_dwd.metadata_for_dwd_data(parameter=Parameter.PRECIPITATION_MORE,
                                                         time_resolution=TimeResolution.DAILY,
                                                         period_type=PeriodType.HISTORICAL)
print("Size of dataframe: ", metadata_precip_daily.shape)
print("Number of available stations: ", metadata_precip_daily[metadata_precip_daily.HAS_FILE == True].shape[0])
metadata_precip_daily.head()

The metadata includes an id, the range of the measurments, the position  (including height) as well as place and state of it and if it has a file. With the following plot we want to show a map of those stations:

In [None]:
from matplotlib import cm
cmap = cm.get_cmap('viridis')
bounds = metadata_precip_daily.STATIONHEIGHT.quantile([0, 0.25, 0.5, 0.75, 1]).values
norm = mpl.colors.BoundaryNorm(bounds, cmap.N)

plot = metadata_precip_daily.plot.scatter(x="LON", y="LAT", c="STATIONHEIGHT", cmap=cmap, norm=norm)
plot.set_title("Map of daily precipitation stations in Germany\nColor refers to height of station")

## 2. The usual way of retrieving data

We first select our station with select_dwd as chosen from the above metadata by:
- it's id and
    * 1048 for Dresden, Germany 
- the composition of parameters
    * var="kl" for climate
    * res="daily" for daily data
    * per="historical" for data of all time

In [None]:
# Receive data for Dresden-Klotzsche (STATION_ID 1048)
# Link
remote_file_path = python_dwd.create_file_list_for_dwd_server(statid=[1048],
                                                              parameter=Parameter.CLIMATE_SUMMARY,
                                                              time_resolution=TimeResolution.DAILY,
                                                              period_type=PeriodType.HISTORICAL) 
remote_file_path

The function returns a link which leads to the server path where we can find the file. It is downloaded with download_dwd and the link from above:

In [None]:
# Download
station_download = python_dwd.download_dwd_data(remote_file_path)

The returned link now leads to the local file. Next we read it in with read_dwd et voila, the function presents us a DataFrame with first values.

In [None]:
station_data = python_dwd.parse_dwd_data(station_download)
station_data.head()

See that DATE is already parsed, so we can easily get some nice graphs with matplotlib.

## 3. Let's create some plots

First to have an easier job with the data, we want it to be transformed from tabular to column data, which means instead of having the data in several columns with an additional date column, we want three columns, where a set of all three defines the date of measured data, element and the exact value. To make it clear look at the following table, which is already transformed.

In [None]:
id_vars = ["STATION_ID", "DATE"]
value_vars = station_data.columns
value_vars = [var for var in value_vars if var not in id_vars]
station_data_transformed = station_data.melt(id_vars=id_vars, value_vars=value_vars, var_name="ELEMENT", value_name="VALUE")

station_data_transformed.head()

* we can create a timeseries/histogram of some elements to compare the distribution of the values, here for example precipitation and mean temperature:

In [None]:
elements_to_plot = ["RSK", "TMK"]
station_data_filtered = station_data_transformed.loc[station_data_transformed["ELEMENT"].isin(elements_to_plot)].sort_values(["STATION_ID", "ELEMENT", "DATE"])
# station_data_filtered.groupby("ELEMENT")["VALUE"].plot.hist()

station_data_grouped = station_data_filtered.groupby("ELEMENT")["DATE", "VALUE"] #.plot(x="DATE", y="VALUE")

In [None]:
cmap = plt.get_cmap('viridis')
colors = cmap(np.linspace(0, 1, 2))

fig, axes = plt.subplots(len(elements_to_plot), len(elements_to_plot), figsize=(10, 10))

for (k, v), (ax1, ax2), color in zip(station_data_grouped, axes.T, colors):
    v.plot(x="DATE", y="VALUE", label=k, alpha=.75, ax=ax1, c=color)
    v.plot(y="VALUE", kind="hist", label=k, alpha=.75, ax=ax2)
    
plt.tight_layout()
plt.subplots_adjust(top=0.9)
plt.suptitle("Precipitation/Mean temperature timeseries of Dresden, Germany")

We can see here that the precipitation is completely left-skewed and not normal distributed, while the temperature is almost normaldistributed! Also the timeseries gives a glimpse on how much data is available! Sad notice here is the gap of WW2.

## 4. Create yearly values

In [None]:
python_dwd.get_nearest_station([51.05089], [13.73832], Parameter.CLIMATE_SUMMARY, TimeResolution.DAILY, PeriodType.HISTORICAL)