# Deutscher Wetterdienst

In [None]:
%reset -sf
import pandas as pd

url = "https://opendata.dwd.de/"
path = 'climate_environment/CDC/observations_germany/climate/daily/kl/'
recent_path = path + 'recent/'
historical_path = path + 'historical/'
filename = 'KL_Tageswerte_Beschreibung_Stationen.txt' 
ws = pd.read_csv(url + recent_path + filename, sep="\t", header=0, skiprows = 0, encoding = "ISO-8859-1").dropna()
ws.drop(0, inplace=True)

# save original column names
colnames = ws.columns[0].split(' ')

# rename column for instance to 'dummy'
ws.columns = ['dummy']

# split string up to 6th column
ws = ws['dummy'].str.split('\s+', n=6, expand=True)

# convert 1:6 to numeric
for col in  ws.iloc[:,1:6]:
    ws[col] = pd.to_numeric(ws[col], errors='coerce') 
    
# concat columns back to a type consistent dataframe
wst = pd.concat([ws.iloc[:,0:6], ws[6].str.slice(0,41), ws[6].str.slice(41,)], axis=1)  
wst.columns = colnames

## All available weather stations

In [None]:
wst

## Frankfurt/Main

In [None]:
wst[wst["Stationsname"].str.contains("Frankfurt/Main")]

## Offenbach/Main

In [None]:
wst[wst["Stationsname"].str.contains("Offenbach")]

## Display number of weather stations by Bundesland

In [None]:
blc = wst.groupby(['Bundesland'])['Bundesland'].count()
print(blc)

## Download zip file from URL
[howto](https://pythonguides.com/download-zip-file-from-url-using-python/)

In [None]:
from urllib.request import urlopen
from io import BytesIO
from zipfile import ZipFile
from re import compile

def collectRecords(zfile): 
    with BytesIO(zfile.read()) as b, ZipFile(b) as datafile: 
        r = compile("^produkt_klima_tag_.*\.txt$")
        dfound = list(filter(r.match, datafile.namelist()))
        number = len(dfound)
        assert  number == 1, f"WARN: exactly one element expected, got {number} instead"
        #print(dfound[0])
        #print(type(datafile))
        rf = datafile.open(dfound[0])
        lines = rf.readlines()
        rf.close()
        header = True
        for bline in lines:
            line = bline.decode('unicode-escape').rstrip('\r\n').split(';')
            del line[-1] # remove last column containing only string 'eor'
            #print(line)
            if header: # initialize list of lists
                header = not(header)
                record = [line]
            else:
                record.append(line)
    return record

## Loop zip files in zip_url and extract observations

In [48]:
zip_url = url + recent_path 
twre = compile(r'tageswerte_KL_[0-9]{5}_akt.zip')

#zip_url = url + historical_path 
#twre = compile(r'tageswerte_KL_[0-9]{5}_[0-9]{8}_[0-9]{8}_hist.zip')

print(zip_url)

with urlopen(zip_url) as f:
    for bline in f.readlines():
        zfound = twre.search(bline.decode('utf-8'))
        #print(zfound)
        if zfound:
            zfilename = zfound.string[zfound.start():zfound.end()]
            #print(zfilename[14:19]) # extracted weather station id
            #print(zfilename, zfound.start(), zfound.end())
            with urlopen(zip_url + zfilename) as z:
                record = collectRecords(z) # <-- function call   
                df = pd.DataFrame(record[1:], columns=record[0])
                #df.ilock[2:] = df.iloc[2:].map({ -999 : NaN})
                print(df)
                #print(df.shape)    

https://opendata.dwd.de/climate_environment/CDC/observations_germany/climate/daily/kl/recent/
     STATIONS_ID MESS_DATUM   QN_3      FX      FM  QN_4   RSK  RSKF   SDK  \
0             11   20200213     10    18.0     4.4  -999  -999  -999  -999   
1             11   20200214     10    10.7     3.0  -999  -999  -999  -999   
2             11   20200215     10     9.6     2.8  -999  -999  -999  -999   
3             11   20200216     10    14.6     3.7  -999  -999  -999  -999   
4             11   20200217     10    16.5     4.9  -999  -999  -999  -999   
..           ...        ...    ...     ...     ...   ...   ...   ...   ...   
226           11   20200926     10    11.9     4.4  -999  -999  -999  -999   
227           11   20200927     10    11.0     4.2  -999  -999  -999  -999   
228           11   20200928     10     9.1     2.3  -999  -999  -999  -999   
229           11   20200929     10     6.2     2.5  -999  -999  -999  -999   
230           11   20200930     10     4.8     1

KeyboardInterrupt: 