# Data Transformation

I have:
- data frame containing Landkreis IDs
- data frame containing Landkreis Locations
- data frame containing Weather Data

Goal:
- assign every Landkreis ONE weather station
- combine the three data frames into one

In [1]:
from pathlib import Path
import pandas as pd
import math

In [2]:
path_base = Path.cwd()

# export path
path_export = Path.joinpath(path_base, "exports")
path_export.mkdir(parents=True, exist_ok=True)

In [3]:
# import the data from Notebook 01
df_temp = pd.read_pickle(Path.joinpath(path_export, "temp.pkl"))
df_temp_stations = pd.read_pickle(Path.joinpath(path_export, "temp_stations.pkl"))
df_prec = pd.read_pickle(Path.joinpath(path_export, "prec.pkl"))
df_prec_stations = pd.read_pickle(Path.joinpath(path_export, "prec_stations.pkl"))
df_sun = pd.read_pickle(Path.joinpath(path_export, "sun.pkl"))
df_sun_stations = pd.read_pickle(Path.joinpath(path_export, "sun_stations.pkl"))

## Integrate Geodata into Landkreise Frame

In [4]:
# load RKI Covid-19 data in order to build a Landkreis-ID lookup table
df_rki = pd.read_csv("https://www.arcgis.com/sharing/rest/content/items/f10774f1c63e40168479a1feb6c7ca74/data")
df_landkreise = df_rki.drop_duplicates('Landkreis')[['Landkreis', 'IdLandkreis', 'Bundesland', 'IdBundesland']]
df_landkreise

Unnamed: 0,Landkreis,IdLandkreis,Bundesland,IdBundesland
0,SK Flensburg,1001,Schleswig-Holstein,1
33,SK Kiel,1002,Schleswig-Holstein,1
270,SK Lübeck,1003,Schleswig-Holstein,1
412,SK Neumünster,1004,Schleswig-Holstein,1
471,LK Dithmarschen,1051,Schleswig-Holstein,1
...,...,...,...,...
117759,LK Weimarer Land,16071,Thüringen,16
117820,LK Sonneberg,16072,Thüringen,16
117904,LK Saalfeld-Rudolstadt,16073,Thüringen,16
117959,LK Saale-Holzland-Kreis,16074,Thüringen,16


In [5]:
# load geographical data of the Landkreise in Germany
df_districts_geo = pd.read_csv("https://public.opendatasoft.com/explore/dataset/landkreise-in-germany/download/?format=csv&timezone=Europe/Berlin&lang=en&use_labels_for_header=true&csv_separator=%3B", ";")
df_districts_geo
# Our districtId is in column "Cca 2"

Unnamed: 0,Geo Point,Geo Shape,Id 0,ISO,Name 0,Id 1,Name 1,Id 2,Name 2,Hasc 2,Ccn 2,Cca 2,Type 2,Engtype 2,Nl Name 2,Varname 2
0,"47.9925229956,7.81807596197","{""type"": ""Polygon"", ""coordinates"": [[[7.790447...",86,DEU,Germany,1,Baden-Württemberg,12,Freiburg im Breisgau,DE.BW.FB,0,8311.0,Stadtkreis,District,,
1,"48.5964037974,10.527764168","{""type"": ""Polygon"", ""coordinates"": [[[10.61448...",86,DEU,Germany,2,Bayern,68,Dillingen an der Donau,DE.BY.DD,0,9773.0,Landkreis,District,,
2,"49.4362114486,11.0827553426","{""type"": ""MultiPolygon"", ""coordinates"": [[[[11...",86,DEU,Germany,2,Bayern,107,Nürnberg,DE.BY.NR,0,9564.0,Kreisfreie Stadt,District,,
3,"49.2159614099,11.5665579197","{""type"": ""Polygon"", ""coordinates"": [[[11.46063...",86,DEU,Germany,2,Bayern,110,Neumarkt in der Oberpfalz,DE.BY.NO,0,9373.0,Landkreis,District,,
4,"47.8443777181,12.1087247511","{""type"": ""Polygon"", ""coordinates"": [[[12.05431...",86,DEU,Germany,2,Bayern,122,Rosenheim,DE.BY.RH,0,9163.0,Kreisfreie Stadt,District,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
398,"51.239397748,13.4829006825","{""type"": ""Polygon"", ""coordinates"": [[[13.41115...",86,DEU,Germany,14,Sachsen,360,Meißen,DE.SN.MI,0,14627.0,Landkreis,District,,
399,"54.2433885939,10.3636951573","{""type"": ""Polygon"", ""coordinates"": [[[10.33194...",86,DEU,Germany,15,Schleswig-Holstein,375,Plön,DE.SH.PO,0,1057.0,Kreis,District,,
400,"53.7208005726,10.3316398811","{""type"": ""MultiPolygon"", ""coordinates"": [[[[10...",86,DEU,Germany,15,Schleswig-Holstein,380,Stormarn,DE.SH.SO,0,1062.0,Kreis,District,,
401,"50.9564246614,12.3991313423","{""type"": ""Polygon"", ""coordinates"": [[[12.35657...",86,DEU,Germany,16,Thüringen,381,Altenburger Land,DE.TH.AL,0,16077.0,Landkreis,District,,


In [6]:
df_lk = pd.merge(df_landkreise, df_districts_geo, left_on="IdLandkreis", right_on="Cca 2")[['Landkreis', 'Name 2', 'Type 2','IdLandkreis', 'Bundesland', 'Geo Point']]
df_lk = df_lk.rename(columns={'Name 2': 'Name kurz', 'Type 2': 'Typ'})
df_lk

Unnamed: 0,Landkreis,Name kurz,Typ,IdLandkreis,Bundesland,Geo Point
0,SK Flensburg,Flensburg,Kreisfreie Stadt,1001,Schleswig-Holstein,"54.7849933768,9.43852835486"
1,SK Kiel,Kiel,Kreisfreie Stadt,1002,Schleswig-Holstein,"54.3248406926,10.1322443646"
2,SK Lübeck,Lübeck,Kreisfreie Stadt,1003,Schleswig-Holstein,"53.8723167338,10.7272831058"
3,SK Neumünster,Neumünster,Kreisfreie Stadt,1004,Schleswig-Holstein,"54.0811244365,9.98448195474"
4,LK Dithmarschen,Dithmarschen,Kreis,1051,Schleswig-Holstein,"54.1329109614,9.10781447873"
...,...,...,...,...,...,...
394,LK Weimarer Land,Weimarer Land,Landkreis,16071,Thüringen,"50.9715719951,11.3735532667"
395,LK Sonneberg,Sonneberg,Landkreis,16072,Thüringen,"50.4146305494,11.1329526551"
396,LK Saalfeld-Rudolstadt,Saalfeld-Rudolstadt,Landkreis,16073,Thüringen,"50.637797959,11.3091162493"
397,LK Saale-Holzland-Kreis,Saale-Holzland-Kreis,Landkreis,16074,Thüringen,"50.904172137,11.7315307817"


In [7]:
# split up column "Geo Point" into two seperate numerical columns
df_lk['latitude'], df_lk['longitude'] = df_lk['Geo Point'].str.split(',', 1).str
df_lk[['latitude', 'longitude']] = df_lk[['latitude', 'longitude']].apply(pd.to_numeric)
df_lk.drop(columns=['Geo Point'], inplace=True)
df_lk

  


Unnamed: 0,Landkreis,Name kurz,Typ,IdLandkreis,Bundesland,latitude,longitude
0,SK Flensburg,Flensburg,Kreisfreie Stadt,1001,Schleswig-Holstein,54.784993,9.438528
1,SK Kiel,Kiel,Kreisfreie Stadt,1002,Schleswig-Holstein,54.324841,10.132244
2,SK Lübeck,Lübeck,Kreisfreie Stadt,1003,Schleswig-Holstein,53.872317,10.727283
3,SK Neumünster,Neumünster,Kreisfreie Stadt,1004,Schleswig-Holstein,54.081124,9.984482
4,LK Dithmarschen,Dithmarschen,Kreis,1051,Schleswig-Holstein,54.132911,9.107814
...,...,...,...,...,...,...,...
394,LK Weimarer Land,Weimarer Land,Landkreis,16071,Thüringen,50.971572,11.373553
395,LK Sonneberg,Sonneberg,Landkreis,16072,Thüringen,50.414631,11.132953
396,LK Saalfeld-Rudolstadt,Saalfeld-Rudolstadt,Landkreis,16073,Thüringen,50.637798,11.309116
397,LK Saale-Holzland-Kreis,Saale-Holzland-Kreis,Landkreis,16074,Thüringen,50.904172,11.731531


In [8]:
df_temp_stations

Unnamed: 0,station_id,start_date,end_date,altitude,latitude,longitude,name,state
0,3,1950-04-01,2011-03-31,202,50.7827,6.0941,Aachen,Nordrhein-Westfalen
1,44,2007-04-01,2020-04-23,44,52.9336,8.2370,Großenkneten,Niedersachsen
2,52,1976-01-01,1988-01-01,46,53.6623,10.1990,Ahrensburg-Wulfsdorf,Schleswig-Holstein
3,71,2009-12-01,2020-01-02,759,48.2156,8.9784,Albstadt-Badkap,Baden-Württemberg
4,73,2007-04-01,2020-04-23,340,48.6159,13.0506,Aldersbach-Kriestorf,Bayern
...,...,...,...,...,...,...,...,...
653,14138,2009-09-15,2015-12-31,73,52.1655,14.1224,Falkenberg (Grenzschichtmessfeld),Brandenburg
654,15000,2011-04-01,2020-04-23,231,50.7983,6.0244,Aachen-Orsbach,Nordrhein-Westfalen
655,15207,2013-11-01,2020-04-23,317,51.2835,9.3590,Schauenburg-Elgershausen,Hessen
656,15444,2014-09-01,2020-04-23,593,48.4418,9.9216,Ulm-Mähringen,Baden-Württemberg


### Landkreise that are not covered by this dataset
The RKI dataset gives data for 412 Landkreise, however, the dataset from _opendatasoft_ provides geospatial coordinates only for 399 of them.

The Landkreise for which no geospatial data exists will be neglected in the following. If we have a look at them, we see that its mostly the districts of Berlin that are special, so we treat Berlin as a whole in the future:

In [9]:
# some of the Landkreise are not covered by BOTH datasets, so they will be omitted
pd.concat([df_lk, df_landkreise]).drop_duplicates(['IdLandkreis'], keep=False)

Unnamed: 0,Landkreis,Name kurz,Typ,IdLandkreis,Bundesland,latitude,longitude,IdBundesland
4009,LK Göttingen,,,3159,Niedersachsen,,,3.0
101519,SK Berlin Mitte,,,11001,Berlin,,,11.0
104194,SK Berlin Friedrichshain-Kreuzberg,,,11002,Berlin,,,11.0
104597,SK Berlin Pankow,,,11003,Berlin,,,11.0
105120,SK Berlin Charlottenburg-Wilmersdorf,,,11004,Berlin,,,11.0
105706,SK Berlin Spandau,,,11005,Berlin,,,11.0
105915,SK Berlin Steglitz-Zehlendorf,,,11006,Berlin,,,11.0
106327,SK Berlin Tempelhof-Schöneberg,,,11007,Berlin,,,11.0
106853,SK Berlin Neukölln,,,11008,Berlin,,,11.0
107406,SK Berlin Treptow-Köpenick,,,11009,Berlin,,,11.0


## Strategy

- iterate over all Landkreise
- assign every Landkreis the weather station that is closest to it

In [10]:
def assign_weather_station_to_landkreis(df_stations, df_lk):
    """Compares the center of each Landkreis with the location of each weather station
       and finds the one station that is closest to a particular Landkreis center.
    """
    closest_station_dict = {}
    for lk_idx, lk_row in df_lk.iterrows():
        idLandkreis = lk_row['IdLandkreis']
        for idx, row in df_stations.iterrows():    
            # calculate distance between station and landkreis center
            lk_lat = lk_row['latitude']
            lk_lon = lk_row['longitude']

            station_lat = row['latitude']
            station_lon = row['longitude']

            a = station_lat - lk_lat
            b = station_lon - lk_lon
            distance = math.sqrt(a*a + b*b)

            if idLandkreis not in closest_station_dict.keys():
                closest_station_dict[idLandkreis] = {'station_id': row['station_id'], 'distance': distance}
            else:
                # check if current station is closer to landkreis
                if distance < closest_station_dict[idLandkreis]['distance']:
                    closest_station_dict[idLandkreis] = {'station_id': row['station_id'], 'distance': distance}
    df = pd.DataFrame.from_dict(closest_station_dict, orient='index').reset_index()
    df.rename(columns={'index': "IdLandkreis"}, inplace=True)
    return df

In [11]:
# these variables contain the assiciation of Landkreis to weather station
temp_lk_stations = assign_weather_station_to_landkreis(df_temp_stations, df_lk)
prec_lk_stations = assign_weather_station_to_landkreis(df_prec_stations, df_lk)
sun_lk_stations = assign_weather_station_to_landkreis(df_sun_stations, df_lk)
sun_lk_stations

Unnamed: 0,IdLandkreis,station_id,distance
0,1001,1130,0.173220
1,1002,2564,0.053728
2,1003,4602,0.072251
3,1004,3538,0.051228
4,1051,1200,0.116370
...,...,...,...
394,16071,5424,0.049946
395,16072,3248,0.024237
396,16073,4605,0.115688
397,16074,550,0.066969


## Plot Landkreis-Weather Station Assignment

In [64]:
temp_stations = temp_lk_stations.merge(df_lk, on='IdLandkreis').merge(df_temp_stations, on='station_id').drop(columns=['Name kurz', 'Typ', 'Bundesland', 'start_date', 'end_date', 'state'])
temp_stations.rename(columns={'latitude_x': 'lk_latitude', 'longitude_x': 'lk_longitude', 'latitude_y': 'station_latitude', 'longitude_y': 'station_longitude'}, inplace=True)

prec_stations = prec_lk_stations.merge(df_lk, on='IdLandkreis').merge(df_prec_stations, on='station_id').drop(columns=['Name kurz', 'Typ', 'Bundesland', 'start_date', 'end_date', 'state'])
prec_stations.rename(columns={'latitude_x': 'lk_latitude', 'longitude_x': 'lk_longitude', 'latitude_y': 'station_latitude', 'longitude_y': 'station_longitude'}, inplace=True)

sun_stations = sun_lk_stations.merge(df_lk, on='IdLandkreis').merge(df_sun_stations, on='station_id').drop(columns=['Name kurz', 'Typ', 'Bundesland', 'start_date', 'end_date', 'state'])
sun_stations.rename(columns={'latitude_x': 'lk_latitude', 'longitude_x': 'lk_longitude', 'latitude_y': 'station_latitude', 'longitude_y': 'station_longitude'}, inplace=True)


prec_stations

Unnamed: 0,IdLandkreis,station_id,distance,Landkreis,lk_latitude,lk_longitude,altitude,station_latitude,station_longitude,name
0,1001,1130,0.173220,SK Flensburg,54.784993,9.438528,17,54.6282,9.3649,Eggebek
1,1002,2564,0.053728,SK Kiel,54.324841,10.132244,28,54.3776,10.1424,Kiel-Holtenau
2,1003,4602,0.072251,SK Lübeck,53.872317,10.727283,26,53.9385,10.6983,"Schwartau,Bad -Groß Parin"
3,1004,3538,0.051228,SK Neumünster,54.081124,9.984482,20,54.0833,9.9333,Neumünster-Wasbek
4,1051,1200,0.116370,LK Dithmarschen,54.132911,9.107814,3,54.0691,9.0105,Elpersbüttel
...,...,...,...,...,...,...,...,...,...,...
394,16071,5424,0.049946,LK Weimarer Land,50.971572,11.373553,328,51.0177,11.3544,Weimar-Schöndorf
395,16072,3248,0.024237,LK Sonneberg,50.414631,11.132953,516,50.3904,11.1324,Frankenblick-Mengersgereuth-Hämmern
396,16073,4605,0.115688,LK Saalfeld-Rudolstadt,50.637798,11.309116,277,50.6441,11.1936,Schwarzburg
397,16074,550,0.066969,LK Saale-Holzland-Kreis,50.904172,11.731531,344,50.9041,11.7985,Bobeck


In [14]:
import plotly.express as px
import plotly.graph_objects as go
token = "pk.eyJ1IjoiZnlubndpIiwiYSI6ImNrODk3YmF6MzAzcDczbWs5NXdhaGpyNzYifQ.vHweJb-1hjDeE21tTs7tGQ"
px.set_mapbox_access_token(token)
fig = px.scatter_mapbox(temp_stations, lat='lk_latitude', lon='lk_longitude', color='IdLandkreis', size='distance', size_max=12, text='Landkreis')
fig.show()

## Build Final Dataframe
Eventually, we want to have weather parameters for each day for each Landkreis. So far, the measurements are on an hourly resolution. I take the daily mean of the temperatures, and the sum of the precipitatino and sunshine hour data per day. 

Finally, all data is merged into a single dataframe that holds the temperature, precipitation and sunshine measurements for a particular day in a given Landkreis in one row.

In [37]:
#df_temp.groupby(['station_id', pd.Grouper(key='date', freq='D')]).mean().reset_index().merge(temp_lk_stations, on='station_id').merge(df_lk, on="IdLandkreis")
temp = df_temp.groupby(['station_id', pd.Grouper(key='date', freq='D')]).mean().reset_index()
prec = df_prec.groupby(['station_id', pd.Grouper(key='date', freq='D')]).sum().reset_index()
sun = df_sun.groupby(['station_id', pd.Grouper(key='date', freq='D')]).sum().reset_index()

In [53]:
temp_final = temp.merge(temp_lk_stations, on="station_id").merge(df_lk, on="IdLandkreis").merge(df_temp_stations, on="station_id")
temp_final.drop(columns=['quality', 'station_id', 'Name kurz', 'start_date', 'end_date', 'altitude', 'state', 'Typ', 'Bundesland'], inplace=True)
temp_final.rename(columns={'latitude_x': 'lk_latitude', 'longitude_x': 'lk_longitude', 'latitude_y': 'station_latitude', 'longitude_y': 'station_longitude', 'name': 'station_name'}, inplace=True)
temp_final = temp_final[['date', 'temperature', 'humidity', 'IdLandkreis', 'Landkreis', 'lk_latitude', 'lk_longitude', 'station_name', 'station_latitude', 'station_longitude']]
temp_final

Unnamed: 0,date,temperature,humidity,IdLandkreis,Landkreis,lk_latitude,lk_longitude,station_name,station_latitude,station_longitude
0,2020-01-01,-1.045833,98.625000,3458,LK Oldenburg,52.988077,8.389397,Großenkneten,52.9336,8.2370
1,2020-01-02,-0.045833,97.458333,3458,LK Oldenburg,52.988077,8.389397,Großenkneten,52.9336,8.2370
2,2020-01-03,6.495833,92.666667,3458,LK Oldenburg,52.988077,8.389397,Großenkneten,52.9336,8.2370
3,2020-01-04,4.762500,88.916667,3458,LK Oldenburg,52.988077,8.389397,Großenkneten,52.9336,8.2370
4,2020-01-05,4.162500,92.625000,3458,LK Oldenburg,52.988077,8.389397,Großenkneten,52.9336,8.2370
...,...,...,...,...,...,...,...,...,...,...
35969,2020-04-19,12.916667,73.666667,8425,LK Alb-Donau-Kreis,48.401711,9.827447,Ulm-Mähringen,48.4418,9.9216
35970,2020-04-20,10.462500,55.166667,8425,LK Alb-Donau-Kreis,48.401711,9.827447,Ulm-Mähringen,48.4418,9.9216
35971,2020-04-21,11.279167,43.500000,8425,LK Alb-Donau-Kreis,48.401711,9.827447,Ulm-Mähringen,48.4418,9.9216
35972,2020-04-22,11.887500,42.458333,8425,LK Alb-Donau-Kreis,48.401711,9.827447,Ulm-Mähringen,48.4418,9.9216


In [55]:
prec_final = prec.merge(prec_lk_stations, on="station_id").merge(df_lk, on="IdLandkreis").merge(df_prec_stations, on="station_id")
prec_final.drop(columns=['R1_IND', 'quality', 'station_id', 'WRTR', 'Name kurz', 'start_date', 'end_date', 'altitude', 'state', 'Typ', 'Bundesland'], inplace=True)
prec_final.rename(columns={'R1': 'precipitation', 'latitude_x': 'lk_latitude', 'longitude_x': 'lk_longitude', 'latitude_y': 'station_latitude', 'longitude_y': 'station_longitude', 'name': 'station_name'}, inplace=True)
prec_final = prec_final[['date', 'precipitation', 'IdLandkreis', 'Landkreis', 'lk_latitude', 'lk_longitude', 'station_name', 'station_latitude', 'station_longitude']]
prec_final

Unnamed: 0,date,precipitation,IdLandkreis,Landkreis,lk_latitude,lk_longitude,station_name,station_latitude,station_longitude
0,2020-01-01,0.0,6535,LK Vogelsbergkreis,50.638119,9.271380,Alsfeld-Eifa,50.7446,9.3450
1,2020-01-02,0.0,6535,LK Vogelsbergkreis,50.638119,9.271380,Alsfeld-Eifa,50.7446,9.3450
2,2020-01-03,0.7,6535,LK Vogelsbergkreis,50.638119,9.271380,Alsfeld-Eifa,50.7446,9.3450
3,2020-01-04,2.7,6535,LK Vogelsbergkreis,50.638119,9.271380,Alsfeld-Eifa,50.7446,9.3450
4,2020-01-05,0.4,6535,LK Vogelsbergkreis,50.638119,9.271380,Alsfeld-Eifa,50.7446,9.3450
...,...,...,...,...,...,...,...,...,...
40716,2020-04-19,0.1,8425,LK Alb-Donau-Kreis,48.401711,9.827447,Ulm-Mähringen,48.4418,9.9216
40717,2020-04-20,0.0,8425,LK Alb-Donau-Kreis,48.401711,9.827447,Ulm-Mähringen,48.4418,9.9216
40718,2020-04-21,0.0,8425,LK Alb-Donau-Kreis,48.401711,9.827447,Ulm-Mähringen,48.4418,9.9216
40719,2020-04-22,0.0,8425,LK Alb-Donau-Kreis,48.401711,9.827447,Ulm-Mähringen,48.4418,9.9216


In [57]:
sun_final = sun.merge(sun_lk_stations, on="station_id").merge(df_lk, on="IdLandkreis").merge(df_sun_stations, on="station_id")
sun_final.drop(columns=['quality', 'station_id', 'Name kurz', 'start_date', 'end_date', 'altitude', 'state', 'Typ', 'Bundesland'], inplace=True)
sun_final.rename(columns={'SD_SO': 'sunshine', 'latitude_x': 'lk_latitude', 'longitude_x': 'lk_longitude', 'latitude_y': 'station_latitude', 'longitude_y': 'station_longitude', 'name': 'station_name'}, inplace=True)
sun_final = sun_final[['date', 'sunshine', 'IdLandkreis', 'Landkreis', 'lk_latitude', 'lk_longitude', 'station_name', 'station_latitude', 'station_longitude']]
sun_final

Unnamed: 0,date,sunshine,IdLandkreis,Landkreis,lk_latitude,lk_longitude,station_name,station_latitude,station_longitude
0,2020-01-01,162.0,7331,LK Alzey-Worms,49.759441,8.157185,Alzey,49.7273,8.1164
1,2020-01-02,0.0,7331,LK Alzey-Worms,49.759441,8.157185,Alzey,49.7273,8.1164
2,2020-01-03,13.0,7331,LK Alzey-Worms,49.759441,8.157185,Alzey,49.7273,8.1164
3,2020-01-04,19.0,7331,LK Alzey-Worms,49.759441,8.157185,Alzey,49.7273,8.1164
4,2020-01-05,0.0,7331,LK Alzey-Worms,49.759441,8.157185,Alzey,49.7273,8.1164
...,...,...,...,...,...,...,...,...,...
14882,2020-04-19,619.0,8425,LK Alb-Donau-Kreis,48.401711,9.827447,Ulm-Mähringen,48.4418,9.9216
14883,2020-04-20,369.0,8425,LK Alb-Donau-Kreis,48.401711,9.827447,Ulm-Mähringen,48.4418,9.9216
14884,2020-04-21,752.0,8425,LK Alb-Donau-Kreis,48.401711,9.827447,Ulm-Mähringen,48.4418,9.9216
14885,2020-04-22,812.0,8425,LK Alb-Donau-Kreis,48.401711,9.827447,Ulm-Mähringen,48.4418,9.9216


In [69]:
sun_final.groupby('IdLandkreis').mean()

Unnamed: 0_level_0,sunshine,lk_latitude,lk_longitude,station_latitude,station_longitude
IdLandkreis,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
1051,271.307018,54.132911,9.107814,54.0691,9.0105
1055,232.252336,54.189649,10.819427,54.0893,10.8773
1056,247.495575,53.718084,9.736867,53.7331,9.8776
1057,243.824561,54.243389,10.363695,54.1654,10.3519
1058,250.464912,54.289616,9.781742,54.3194,9.8051
...,...,...,...,...,...
16056,284.228070,50.989676,10.300102,51.0007,10.3621
16061,279.385965,51.383649,10.253485,51.3933,10.3123
16066,257.614035,50.630708,10.405043,50.5612,10.3771
16070,283.131579,50.737934,10.966357,50.7334,10.8815


## Export

In [67]:
temp_final.to_pickle(Path.joinpath(path_export, "temp_final.pkl"))
prec_final.to_pickle(Path.joinpath(path_export, "prec_final.pkl"))
sun_final.to_pickle(Path.joinpath(path_export, "sun_final.pkl"))

df_lk.to_pickle(Path.joinpath(path_export, "landkreise.pkl"))

temp_stations.to_pickle(Path.joinpath(path_export, "temp_stations_assigned.pkl"))
prec_stations.to_pickle(Path.joinpath(path_export, "prec_stations_assigned.pkl"))
sun_stations.to_pickle(Path.joinpath(path_export, "sun_stations_assigned.pkl"))


In [66]:
prec_stations

Unnamed: 0,IdLandkreis,station_id,distance,Landkreis,lk_latitude,lk_longitude,altitude,station_latitude,station_longitude,name
0,1001,1130,0.173220,SK Flensburg,54.784993,9.438528,17,54.6282,9.3649,Eggebek
1,1002,2564,0.053728,SK Kiel,54.324841,10.132244,28,54.3776,10.1424,Kiel-Holtenau
2,1003,4602,0.072251,SK Lübeck,53.872317,10.727283,26,53.9385,10.6983,"Schwartau,Bad -Groß Parin"
3,1004,3538,0.051228,SK Neumünster,54.081124,9.984482,20,54.0833,9.9333,Neumünster-Wasbek
4,1051,1200,0.116370,LK Dithmarschen,54.132911,9.107814,3,54.0691,9.0105,Elpersbüttel
...,...,...,...,...,...,...,...,...,...,...
394,16071,5424,0.049946,LK Weimarer Land,50.971572,11.373553,328,51.0177,11.3544,Weimar-Schöndorf
395,16072,3248,0.024237,LK Sonneberg,50.414631,11.132953,516,50.3904,11.1324,Frankenblick-Mengersgereuth-Hämmern
396,16073,4605,0.115688,LK Saalfeld-Rudolstadt,50.637798,11.309116,277,50.6441,11.1936,Schwarzburg
397,16074,550,0.066969,LK Saale-Holzland-Kreis,50.904172,11.731531,344,50.9041,11.7985,Bobeck
