# Deutscher Wetterdienst

In [13]:
%reset -sf
import pandas as pd

url = "https://opendata.dwd.de/"
path = 'climate_environment/CDC/observations_germany/climate/daily/kl/'
recent_path = path + 'recent/'
historical_path = path + 'historical/'
filename = 'KL_Tageswerte_Beschreibung_Stationen.txt' 
ws = pd.read_csv(url + recent_path + filename, sep="\t", header=0, skiprows = 0, encoding = "ISO-8859-1").dropna()
ws.drop(0, inplace=True)

# save original column names
colnames = ws.columns[0].split(' ')

# rename column for instance to 'dummy'
ws.columns = ['dummy']

# split string up to 6th column
ws = ws['dummy'].str.split('\s+', n=6, expand=True)

# convert 1:6 to numeric
for col in  ws.iloc[:,1:6]:
    ws[col] = pd.to_numeric(ws[col], errors='coerce') 
    
# concat columns back to a type consistent dataframe
wst = pd.concat([ws.iloc[:,0:6], ws[6].str.slice(0,41), ws[6].str.slice(41,)], axis=1)  
wst.columns = colnames

## All available weather stations

In [14]:
wst

Unnamed: 0,Stations_id,von_datum,bis_datum,Stationshoehe,geoBreite,geoLaenge,Stationsname,Bundesland
1,00001,19370101,19860630,478,47.8413,8.8493,Aach,Baden-Württemberg ...
2,00003,18910101,20110331,202,50.7827,6.0941,Aachen,Nordrhein-Westfalen ...
3,00011,19800901,20200930,680,47.9736,8.5205,Donaueschingen (Landeplatz),Baden-Württemberg ...
4,00044,19690101,20230309,44,52.9336,8.2370,Großenkneten,Niedersachsen ...
5,00052,19690101,20011231,46,53.6623,10.1990,Ahrensburg-Wulfsdorf,Schleswig-Holstein ...
...,...,...,...,...,...,...,...,...
1342,19364,19371201,19771231,720,50.6167,10.8167,Schmiedefeld/Rennsteig,Thüringen ...
1343,19378,19580101,19771231,505,50.8333,10.5833,Finsterbergen,Thüringen ...
1344,19433,19380401,19611231,316,50.8188,10.3443,"Liebenstein, Bad",Thüringen ...
1345,19510,19500101,19710106,159,51.1000,12.3300,Lucka,Thüringen ...


## Frankfurt/Main

In [15]:
wst[wst["Stationsname"].str.contains("Frankfurt/Main")]

Unnamed: 0,Stations_id,von_datum,bis_datum,Stationshoehe,geoBreite,geoLaenge,Stationsname,Bundesland
319,1420,19490101,20230309,100,50.0259,8.5213,Frankfurt/Main,Hessen ...
320,1421,19611201,19840831,125,50.1474,8.6746,Frankfurt/Main (Stadt),Hessen ...
322,1423,19720501,19871231,111,50.0496,8.5887,Frankfurt/Main (Terminal),Hessen ...
323,1424,19851101,20230309,124,50.1269,8.6694,Frankfurt/Main-Westend,Hessen ...
324,1425,18700101,19621231,103,50.1214,8.6604,Frankfurt/Main (Feldbergstr.),Hessen ...


## Offenbach/Main

In [16]:
wst[wst["Stationsname"].str.contains("Offenbach")]

Unnamed: 0,Stations_id,von_datum,bis_datum,Stationshoehe,geoBreite,geoLaenge,Stationsname,Bundesland
782,3775,19780101,19981216,100,50.1029,8.7468,Offenbach/Main (Zentrallabor),Hessen ...
783,3776,19571101,19951031,110,50.0946,8.7546,Offenbach/M. (Stadt),Hessen ...
1257,7341,20050715,20230309,119,50.09,8.7862,Offenbach-Wetterpark,Hessen ...


## Display number of weather stations by Bundesland

In [17]:
blc = wst.groupby(['Bundesland'])['Bundesland'].count()
print(blc)

Bundesland
Baden-Württemberg                                                                                     231
Bayern                                                                                                274
Berlin                                                                                                 21
Brandenburg                                                                                            45
Bremen                                                                                                  5
Hamburg                                                                                                12
Hessen                                                                                                107
Mecklenburg-Vorpommern                                                                                 45
Niedersachsen                                                                                         132
Nordrhein-Westfalen                

## Download zip file from URL
[howto](https://pythonguides.com/download-zip-file-from-url-using-python/)

In [18]:
from urllib.request import urlopen
from io import BytesIO
from zipfile import ZipFile
from re import compile

def collectRecords(zfile): 
    with BytesIO(zfile.read()) as b, ZipFile(b) as datafile: 
        r = compile("^produkt_klima_tag_.*\.txt$")
        dfound = list(filter(r.match, datafile.namelist()))
        number = len(dfound)
        assert  number == 1, f"WARN: exactly one element expected, got {number} instead"
        #print(dfound[0])
        #print(type(datafile))
        rf = datafile.open(dfound[0])
        lines = rf.readlines()
        rf.close()
        header = True
        for bline in lines:
            line = bline.decode('unicode-escape').rstrip('\r\n').split(';')
            del line[-1] # remove last column containing only eor
            #print(line)
            if header: # initialize list of lists
                header = not(header)
                record = [line]
            else:
                record.append(line)
    return record

In [28]:
zip_url = url + recent_path 
twre = compile(r'tageswerte_KL_[0-9]{5}_akt.zip')

#zip_url = url + historical_path 
#twre = compile(r'tageswerte_KL_[0-9]{5}_[0-9]{8}_[0-9]{8}_hist.zip')

print(zip_url)

with urlopen(zip_url) as f:
    for bline in f.readlines():
        zfound = twre.search(bline.decode('utf-8'))
        #print(zfound)
        if zfound:
            zfilename = zfound.string[zfound.start():zfound.end()]
            #print(zfilename)
            with urlopen(zip_url + zfilename) as z:
                record = collectRecords(z)    
                df = pd.DataFrame(record[1:], columns=record[0])
                #df.ilock[2:] = df.iloc[2:].map({ -999 : NaN})
                print(df)
                print(df.shape)    

https://opendata.dwd.de/climate_environment/CDC/observations_germany/climate/daily/kl/historical/
       STATIONS_ID MESS_DATUM  QN_3    FX    FM   QN_4     RSK  RSKF   SDK  \
0                1   19370101  -999  -999  -999      5     0.0     0  -999   
1                1   19370102  -999  -999  -999      5     0.0     0  -999   
2                1   19370103  -999  -999  -999      5     0.0     0  -999   
3                1   19370104  -999  -999  -999      5     0.0     0  -999   
4                1   19370105  -999  -999  -999      5     0.0     0  -999   
...            ...        ...   ...   ...   ...    ...     ...   ...   ...   
17343            1   19860626  -999  -999  -999     10     0.0     0  -999   
17344            1   19860627  -999  -999  -999     10     0.0     0  -999   
17345            1   19860628  -999  -999  -999     10     0.0     0  -999   
17346            1   19860629  -999  -999  -999     10     0.0     0  -999   
17347            1   19860630  -999  -999  -

KeyboardInterrupt: 