### Deutscher Wetterdienst

In [12]:
%reset -sf
import pandas as pd

url = "https://opendata.dwd.de/"
path = 'climate_environment/CDC/observations_germany/climate/daily/kl/'
recent_path = path + 'recent/'
historical_path = path + 'historical/'
filename = 'KL_Tageswerte_Beschreibung_Stationen.txt' 
ws = pd.read_csv(url + recent_path + filename, sep="\t", header=0, skiprows = 0, encoding = "ISO-8859-1").dropna()
ws.drop(0, inplace=True)

# save original column names
colnames = ws.columns[0].split(' ')

# rename column for instance to 'dummy'
ws.columns = ['dummy']

# split string up to 6th column
ws = ws['dummy'].str.split('\s+', n=6, expand=True)

# convert 1:6 to numeric
for col in  ws.iloc[:,1:6]:
    ws[col] = pd.to_numeric(ws[col], errors='coerce') 
    
# concat columns back to a type consistent dataframe
wst = pd.concat([ws.iloc[:,0:6], ws[6].str.slice(0,41), ws[6].str.slice(41,)], axis=1)  
wst.columns = colnames

All available weather stations:

In [18]:
wst.drop(['Stationshoehe','geoBreite','geoLaenge'], axis=1)

Unnamed: 0,Stations_id,von_datum,bis_datum,Stationsname,Bundesland
1,00001,19370101,19860630,Aach,Baden-...
2,00003,18910101,20110331,Aachen,Nordrh...
3,00011,19800901,20231105,Donaueschingen (Landeplatz),Baden-...
4,00044,19690101,20231105,Großenkneten,Nieder...
5,00052,19690101,20011231,Ahrensburg-Wulfsdorf,Schles...
...,...,...,...,...,...
1351,19631,18810601,19691231,Salzungen,Thürin...
1352,19647,19510101,20051031,Eberbach/Neckar,Baden-...
1353,19774,19710819,19940228,Nordhausen (Umspannwerk),Thürin...
1354,19781,19410101,19531231,Ingolstadt,Bayern...


Frankfurt/Main

In [6]:
wst[wst["Stationsname"].str.contains("Frankfurt/Main")]

Unnamed: 0,Stations_id,von_datum,bis_datum,Stationshoehe,geoBreite,geoLaenge,Stationsname,Bundesland
316,1420,19490101,20231105,100,50.0259,8.5213,Frankfurt/Main,Hessen...
317,1421,19611201,19840831,125,50.1474,8.6746,Frankfurt/Main (Stadt),Hessen...
319,1423,19720501,19871231,111,50.0496,8.5887,Frankfurt/Main (Terminal),Hessen...
320,1424,19851101,20231105,121,50.1269,8.6694,Frankfurt/Main-Westend,Hessen...
321,1425,18700101,19621231,103,50.1214,8.6604,Frankfurt/Main (Feldbergstr.),Hessen...


Offenbach/Main

In [7]:
wst[wst["Stationsname"].str.contains("Offenbach")]

Unnamed: 0,Stations_id,von_datum,bis_datum,Stationshoehe,geoBreite,geoLaenge,Stationsname,Bundesland
779,3775,19780101,19981216,100,50.1029,8.7468,Offenbach/Main (Zentrallabor),Hessen...
780,3776,19571101,19951031,110,50.0946,8.7546,Offenbach/M. (Stadt),Hessen...
1254,7341,20050715,20231105,119,50.09,8.7862,Offenbach-Wetterpark,Hessen...


Display number of weather stations by Bundesland:

In [8]:
blc = wst.groupby(['Bundesland'])['Bundesland'].count()
print(blc)

Bundesland
                                        Baden-Württemberg                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                             231
                                        Bayern                                     

Download zip file from URL
[howto](https://pythonguides.com/download-zip-file-from-url-using-python/)

In [9]:
from urllib.request import urlopen
from io import BytesIO
from zipfile import ZipFile
from re import compile

def collectRecords(zfile): 
    with BytesIO(zfile.read()) as b, ZipFile(b) as datafile: 
        r = compile("^produkt_klima_tag_.*\.txt$")
        dfound = list(filter(r.match, datafile.namelist()))
        number = len(dfound)
        assert  number == 1, f"WARN: exactly one element expected, got {number} instead"
        #print(dfound[0])
        #print(type(datafile))
        rf = datafile.open(dfound[0])
        lines = rf.readlines()
        rf.close()
        header = True
        for bline in lines:
            line = bline.decode('unicode-escape').rstrip('\r\n').split(';')
            del line[-1] # remove last column containing only string 'eor'
            #print(line)
            if header: # initialize list of lists
                header = not(header)
                record = [line]
            else:
                record.append(line)
    return record

Loop zip files in zip_url and extract observations

In [10]:
zip_url = url + recent_path 
twre = compile(r'tageswerte_KL_[0-9]{5}_akt.zip')

#zip_url = url + historical_path 
#twre = compile(r'tageswerte_KL_[0-9]{5}_[0-9]{8}_[0-9]{8}_hist.zip')

print(zip_url)

with urlopen(zip_url) as f:
    for bline in f.readlines():
        zfound = twre.search(bline.decode('utf-8'))
        #print(zfound)
        if zfound:
            zfilename = zfound.string[zfound.start():zfound.end()]
            #print(zfilename[14:19]) # extracted weather station id
            #print(zfilename, zfound.start(), zfound.end())
            with urlopen(zip_url + zfilename) as z:
                record = collectRecords(z) # <-- function call   
                df = pd.DataFrame(record[1:], columns=record[0])
                #df.ilock[2:] = df.iloc[2:].map({ -999 : NaN})
                print(df)
                #print(df.shape)    

https://opendata.dwd.de/climate_environment/CDC/observations_germany/climate/daily/kl/recent/
     STATIONS_ID MESS_DATUM   QN_3      FX      FM  QN_4   RSK  RSKF   SDK  \
0             11   20230727      3    12.1     3.2  -999  -999  -999  -999   
1             11   20230728      3    11.8     3.1  -999  -999  -999  -999   
2             11   20230729      3    13.0     4.1  -999  -999  -999  -999   
3             11   20230730      3    17.6     4.2  -999  -999  -999  -999   
4             11   20230731      3     9.8     2.9  -999  -999  -999  -999   
..           ...        ...    ...     ...     ...   ...   ...   ...   ...   
97            11   20231101      1     8.8     3.3  -999  -999  -999  -999   
98            11   20231102      1    10.8     3.5  -999  -999  -999  -999   
99            11   20231103      1     7.2     2.4  -999  -999  -999  -999   
100           11   20231104      1    21.3     6.0  -999  -999  -999  -999   
101           11   20231105      1    17.0     6

KeyboardInterrupt: 