# Deutscher Wetterdienst

In [None]:
%reset -sf
import pandas as pd

url = "https://opendata.dwd.de/"
path = 'climate_environment/CDC/observations_germany/climate/daily/kl/'
recent_path = path + 'recent/'
historical_path = path + 'historical/'
filename = 'KL_Tageswerte_Beschreibung_Stationen.txt' 
ws = pd.read_csv(url + recent_path + filename, sep="\t", header=0, skiprows = 0, encoding = "ISO-8859-1").dropna()
ws.drop(0, inplace=True)

# save original column names
colnames = ws.columns[0].split(' ')

# rename column for instance to 'dummy'
ws.columns = ['dummy']

# split string up to 6th column
ws = ws['dummy'].str.split('\s+', n=6, expand=True)

# convert 1:6 to numeric
for col in  ws.iloc[:,1:6]:
    ws[col] = pd.to_numeric(ws[col], errors='coerce') 
    
# concat columns back to a type consistent dataframe
wst = pd.concat([ws.iloc[:,0:6], ws[6].str.slice(0,41), ws[6].str.slice(41,)], axis=1)  
wst.columns = colnames

## Stationsname Frankfurt/Main

In [None]:
wst[wst["Stationsname"].str.contains("Frankfurt/Main")]

## Stationsname Offenbach

In [None]:
wst[wst["Stationsname"].str.contains("Offenbach")]

## Display number of weather stations by Bundesland

In [None]:
blc = wst.groupby(['Bundesland'])['Bundesland'].count()
print(blc)

## Download zip file from URL
[howto](https://pythonguides.com/download-zip-file-from-url-using-python/)

In [None]:
from urllib.request import urlopen
from io import BytesIO
from zipfile import ZipFile
from re import compile

def collectRecords(zfile): 
    with BytesIO(zfile.read()) as b, ZipFile(b) as datafile: 
        r = compile("^produkt_klima_tag_.*\.txt$")
        dfound = list(filter(r.match, datafile.namelist()))
        number = len(dfound)
        assert  number == 1, f"WARN: exactly one element expected, got {number} instead"
        #print(dfound[0])
        #print(type(datafile))
        rf = datafile.open(dfound[0])
        lines = rf.readlines()
        rf.close()
        header = True
        for bline in lines:
            line = bline.decode('unicode-escape').rstrip('\r\n').split(';')
            del line[-1] # remove last column containing only eor
            #print(line)
            if header: # initialize list of lists
                header = not(header)
                record = [line]
            else:
                record.append(line)
    return record

In [None]:
from urllib.request import urlopen
from io import BytesIO
from zipfile import ZipFile
from re import compile

zip_url = url + recent_path 
twre = compile(r'tageswerte_KL_[0-9]{5}_akt.zip')

with urlopen(zip_url) as f:
    for bline in f.readlines():
        zfound = twre.search(bline.decode('utf-8'))
        #print(zfound)
        if zfound:
            zfilename = zfound.string[zfound.start():zfound.end()]
            #print(zfilename)
            with urlopen(zip_url + zfilename) as z:
                record = collectRecords(z)    
                df = pd.DataFrame(record[1:], columns=record[0])
                #df.ilock[2:] = df.iloc[2:].map({ -999 : NaN})
                print(df)
                print(df.shape)    

In [None]:
cols = df.columns[2:]
#print(cols)
#cols = ['Jan', 'Feb', 'Mar']
df = df.replace('-999', '-', regex=True)
df[cols] = df[cols].apply(pd.to_numeric, errors='coerce', axis=1)
df['MESS_DATUM'] = pd.to_datetime(df['MESS_DATUM'], format='%Y%m%d')

#df['STATIONS_ID'] = df['STATIONS_ID'].apply(lambda x: x.zfill(5))
#df['STATIONS_ID'] = df['STATIONS_ID'].apply(pd.to_int, errors='coerce', axis=1)
#df['STATIONS_ID'].map(lambda x: f'{x:0>5}')
print(len(df['STATIONS_ID'][0]))
#df['STATIONS_ID'] = df['STATIONS_ID'].apply(lambda x: x.zfill(11))
#print(len(df['STATIONS_ID'][0]))

df.STATIONS_ID.str.slice(6,11).str.replace(' ','0')
#print(df)