# Data Preparation for Streamlit Powerplants Implementation
Our Powerplants data contains specifics about green and non-green energy on individual coordinates.

In this session we want to create a nice looking web-page that let's us easily compare statistics between individual countries.

In order to do this we have to aggregate our individual coordinate-based data to per-country data.

## Imports

In [1]:
import geopandas as gpd
import pandas as pd
import folium
pd.set_option('display.max_rows', 100)
pd.set_option('display.max_columns', 100)
pd.set_option('display.width', None)
pd.set_option('display.max_colwidth', 100)

## Load Raw Data

In [2]:
# load data on european countries
europe = pd.read_pickle("../data/europe.p").to_crs("EPSG:4326")
# contains all countries intersecting our polygon from the WW2 analysis
europe.sample(5)# not only european countries per se
# columns of interest for us: name and geometry
europe.head()

Unnamed: 0,pop_est,continent,name,iso_a3,gdp_md_est,geometry
0,142257519,Europe,Russia,RUS,3745000.0,"MULTIPOLYGON (((45.00000 42.60827, 44.53762 42.71199, 43.93121 42.55496, 43.75599 42.74083, 42.3..."
1,5320045,Europe,Norway,-99,364700.0,"POLYGON ((31.10104 69.55810, 29.39955 69.15692, 28.59193 69.06478, 29.01557 69.76649, 27.73229 7..."
2,57713,North America,Greenland,GRL,2173.0,"POLYGON ((-20.72922 73.50000, -20.76234 73.46436, -22.17221 73.30955, -23.56593 73.30663, -22.31..."
3,67106161,Europe,France,-99,2699000.0,"MULTIPOLYGON (((8.74601 42.62812, 9.39000 43.00998, 9.56002 42.15249, 9.22975 41.38001, 8.77572 ..."
4,8299706,Asia,Israel,ISR,297000.0,"POLYGON ((35.05295 33.00000, 35.09846 33.08054, 35.12605 33.09090, 35.46071 33.08904, 35.55280 3..."


In [3]:
europe.shape

(56, 6)

In [4]:
# load Powerplants data for target_df
# geometry contains coordinate POINTS
europe_data = pd.read_pickle("../data/gdf_europe.p").to_crs("EPSG:4326")
europe_data.head()

Unnamed: 0,country code,country,name of powerplant,capacity in MW,latitude,longitude,primary_fuel,start date,owner of plant,geolocation_source,estimated_generation_gwh_2020,green,continent,geometry
9,ALB,Albania,Bistrica 1,27.0,39.9116,20.1047,Hydro,1965,,GEODB,88.45,True,Europe,POINT (20.10470 39.91160)
10,ALB,Albania,Fierza,500.0,42.2514,20.0431,Hydro,1978,,GEODB,1648.24,True,Europe,POINT (20.04310 42.25140)
11,ALB,Albania,Koman,600.0,42.1033,19.8224,Hydro,1985,,GEODB,1982.72,True,Europe,POINT (19.82240 42.10330)
12,ALB,Albania,Lanabregas,5.0,41.3428,19.8964,Hydro,1951,,GEODB,15.23,True,Europe,POINT (19.89640 41.34280)
13,ALB,Albania,Shkopet,24.0,41.6796,19.8305,Hydro,1963,,GEODB,83.57,True,Europe,POINT (19.83050 41.67960)


In [5]:
europe_data.shape

(9969, 14)

## Replace coordinates by countries polygons

In [6]:
res_intersect_europe = gpd.overlay(europe_data, europe, how='intersection')
# Basically checks for each geometry in "europe_data" (target coordinates) 
# if intersects with geometry of "target_df" (country)
# -> Merges both dataframes and keeps country polygon instead of target coordinates
res_intersect_europe.head()

Unnamed: 0,country code,country,name of powerplant,capacity in MW,latitude,longitude,primary_fuel,start date,owner of plant,geolocation_source,estimated_generation_gwh_2020,green,continent_1,pop_est,continent_2,name,iso_a3,gdp_md_est,geometry
0,ALB,Albania,Bistrica 1,27.0,39.9116,20.1047,Hydro,1965,,GEODB,88.45,True,Europe,3047987,Europe,Albania,ALB,33900.0,POINT (20.10470 39.91160)
1,ALB,Albania,Fierza,500.0,42.2514,20.0431,Hydro,1978,,GEODB,1648.24,True,Europe,3047987,Europe,Albania,ALB,33900.0,POINT (20.04310 42.25140)
2,ALB,Albania,Koman,600.0,42.1033,19.8224,Hydro,1985,,GEODB,1982.72,True,Europe,3047987,Europe,Albania,ALB,33900.0,POINT (19.82240 42.10330)
3,ALB,Albania,Lanabregas,5.0,41.3428,19.8964,Hydro,1951,,GEODB,15.23,True,Europe,3047987,Europe,Albania,ALB,33900.0,POINT (19.89640 41.34280)
4,ALB,Albania,Shkopet,24.0,41.6796,19.8305,Hydro,1963,,GEODB,83.57,True,Europe,3047987,Europe,Albania,ALB,33900.0,POINT (19.83050 41.67960)


In [7]:
res_intersect_europe.shape

(9969, 19)

In [8]:
# Set index to country name so we can directly insert data grouped by country name
europe.set_index("name", inplace=True, drop=False)

In [9]:
def insert_totals(target_df, data_df):
    # get powerplant counts and green ratio
    # and apply it to a given target country
    # get totals
    target_df[f"N_powerplants"] = data_df.groupby("name").size()
    target_df[f"N_green"] = data_df.loc[data_df["green"]].groupby("name").size()
    # replace NaN (no a number) values with 0
    target_df.fillna(0, inplace=True)
    n_green = data_df.groupby(["name", "green"])["name"].size().unstack()
    target_df["green_ratio"] = (n_green[True] / n_green.sum(axis=1)).fillna(0)
    return target_df

In [10]:
europe = insert_totals(europe, res_intersect_europe)

In [11]:
europe.head(1)

Unnamed: 0_level_0,pop_est,continent,name,iso_a3,gdp_md_est,geometry,N_powerplants,N_green,green_ratio
name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1
Russia,142257519,Europe,Russia,RUS,3745000.0,"MULTIPOLYGON (((45.00000 42.60827, 44.53762 42.71199, 43.93121 42.55496, 43.75599 42.74083, 42.3...",229.0,81.0,0.353712


In [12]:
europe = europe.loc[europe["continent"] == "Europe"]

In [13]:
def insert_fuel_types(target_df, data_df):
    for pp_type in data_df["primary_fuel"].unique():
        target_df[f"N_fuel_{pp_type}"] = data_df.loc[data_df["primary_fuel"] == pp_type].groupby("name").size()
        data_df.loc[data_df["primary_fuel"] == pp_type, f"is_{pp_type}"] = True
        data_df.loc[data_df["primary_fuel"] != pp_type, f"is_{pp_type}"] = False 
        ratio = data_df.groupby(["name", f"is_{pp_type}"]).size().unstack().fillna(0)
        target_df[f"fuel_ratio_{pp_type}"] = ratio[True] / ratio.sum(axis=1)

        production = data_df.groupby(["name", f"is_{pp_type}"])["estimated_generation_gwh_2020"].sum().unstack().fillna(0)

        target_df[f"fuel_generation_ratio_{pp_type}"] = production[True] / production.sum(axis=1)
        target_df[f"fuel_generation_total_{pp_type}"] = production[True]
    return target_df

In [14]:
europe = insert_fuel_types(europe, res_intersect_europe).fillna(0)

In [15]:
europe.shape

(39, 65)

In [16]:
europe.head(5)

Unnamed: 0_level_0,pop_est,continent,name,iso_a3,gdp_md_est,geometry,N_powerplants,N_green,green_ratio,N_fuel_Hydro,fuel_ratio_Hydro,fuel_generation_ratio_Hydro,fuel_generation_total_Hydro,N_fuel_Other,fuel_ratio_Other,fuel_generation_ratio_Other,fuel_generation_total_Other,N_fuel_Gas,fuel_ratio_Gas,fuel_generation_ratio_Gas,fuel_generation_total_Gas,N_fuel_Wind,fuel_ratio_Wind,fuel_generation_ratio_Wind,fuel_generation_total_Wind,N_fuel_Solar,fuel_ratio_Solar,fuel_generation_ratio_Solar,fuel_generation_total_Solar,N_fuel_Waste,fuel_ratio_Waste,fuel_generation_ratio_Waste,fuel_generation_total_Waste,N_fuel_Biomass,fuel_ratio_Biomass,fuel_generation_ratio_Biomass,fuel_generation_total_Biomass,N_fuel_Coal,fuel_ratio_Coal,fuel_generation_ratio_Coal,fuel_generation_total_Coal,N_fuel_Nuclear,fuel_ratio_Nuclear,fuel_generation_ratio_Nuclear,fuel_generation_total_Nuclear,N_fuel_Oil,fuel_ratio_Oil,fuel_generation_ratio_Oil,fuel_generation_total_Oil,N_fuel_Geothermal,fuel_ratio_Geothermal,fuel_generation_ratio_Geothermal,fuel_generation_total_Geothermal,N_fuel_Wave and Tidal,fuel_ratio_Wave and Tidal,fuel_generation_ratio_Wave and Tidal,fuel_generation_total_Wave and Tidal,N_fuel_Storage,fuel_ratio_Storage,fuel_generation_ratio_Storage,fuel_generation_total_Storage,N_fuel_Cogeneration,fuel_ratio_Cogeneration,fuel_generation_ratio_Cogeneration,fuel_generation_total_Cogeneration
name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1,Unnamed: 23_level_1,Unnamed: 24_level_1,Unnamed: 25_level_1,Unnamed: 26_level_1,Unnamed: 27_level_1,Unnamed: 28_level_1,Unnamed: 29_level_1,Unnamed: 30_level_1,Unnamed: 31_level_1,Unnamed: 32_level_1,Unnamed: 33_level_1,Unnamed: 34_level_1,Unnamed: 35_level_1,Unnamed: 36_level_1,Unnamed: 37_level_1,Unnamed: 38_level_1,Unnamed: 39_level_1,Unnamed: 40_level_1,Unnamed: 41_level_1,Unnamed: 42_level_1,Unnamed: 43_level_1,Unnamed: 44_level_1,Unnamed: 45_level_1,Unnamed: 46_level_1,Unnamed: 47_level_1,Unnamed: 48_level_1,Unnamed: 49_level_1,Unnamed: 50_level_1,Unnamed: 51_level_1,Unnamed: 52_level_1,Unnamed: 53_level_1,Unnamed: 54_level_1,Unnamed: 55_level_1,Unnamed: 56_level_1,Unnamed: 57_level_1,Unnamed: 58_level_1,Unnamed: 59_level_1,Unnamed: 60_level_1,Unnamed: 61_level_1,Unnamed: 62_level_1,Unnamed: 63_level_1,Unnamed: 64_level_1,Unnamed: 65_level_1
Russia,142257519,Europe,Russia,RUS,3745000.0,"MULTIPOLYGON (((45.00000 42.60827, 44.53762 42.71199, 43.93121 42.55496, 43.75599 42.74083, 42.3...",229.0,81.0,0.353712,62.0,0.270742,0.064986,27937.63,1.0,0.004367,0.0,0.0,124.0,0.541485,0.496539,213462.81,1.0,0.004367,2.3e-05,9.89,16.0,0.069869,0.001669,717.35,0.0,0.0,0.0,0.0,2.0,0.008734,0.0,0.0,11.0,0.048035,0.060436,25981.3,7.0,0.030568,0.376087,161680.26,5.0,0.021834,0.00026,111.75,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
Norway,5320045,Europe,Norway,-99,364700.0,"POLYGON ((31.10104 69.55810, 29.39955 69.15692, 28.59193 69.06478, 29.01557 69.76649, 27.73229 7...",288.0,285.0,0.989583,277.0,0.961806,0.97188,114691.64,0.0,0.0,0.0,0.0,3.0,0.010417,0.020032,2363.94,8.0,0.027778,0.008089,954.56,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
France,67106161,Europe,France,-99,2699000.0,"MULTIPOLYGON (((8.74601 42.62812, 9.39000 43.00998, 9.56002 42.15249, 9.22975 41.38001, 8.77572 ...",2038.0,2004.0,0.983317,412.0,0.202159,0.131952,60827.46,0.0,0.0,0.0,0.0,8.0,0.003925,0.040102,18486.3,702.0,0.344455,0.035496,16362.92,746.0,0.366045,0.016138,7439.21,0.0,0.0,0.0,0.0,143.0,0.070167,0.0,0.0,5.0,0.002453,0.035402,16319.68,16.0,0.007851,0.730729,336853.38,5.0,0.002453,0.010181,4693.46,0.0,0.0,0.0,0.0,1.0,0.000491,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
Sweden,9960487,Europe,Sweden,SWE,498100.0,"POLYGON ((11.02737 58.85615, 11.46827 59.43239, 12.30037 60.11793, 12.63115 61.29357, 11.99206 6...",164.0,157.0,0.957317,144.0,0.878049,0.477205,48293.54,0.0,0.0,0.0,0.0,3.0,0.018293,0.004196,424.64,7.0,0.042683,0.014521,1469.51,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,6.0,0.036585,0.0,0.0,1.0,0.006098,0.0,0.0,2.0,0.012195,0.503234,50927.73,1.0,0.006098,0.000844,85.38,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
Belarus,9549747,Europe,Belarus,BLR,165400.0,"POLYGON ((28.17671 56.16913, 29.22951 55.91834, 29.37157 55.67009, 29.89629 55.78946, 30.87391 5...",24.0,7.0,0.291667,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,17.0,0.708333,0.989825,33619.53,0.0,0.0,0.0,0.0,7.0,0.291667,0.010175,345.59,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


In [17]:
europe = europe.drop('name',axis=1)

In [19]:
europe = europe.reset_index()
europe = europe.fillna(0)
europe['id'] = europe.index # Work around German Umlaute

In [20]:
europe.to_file('Europa.geojson', driver="GeoJSON",index=False,encoding='latin1')
europe.to_csv('Europa.csv',index=False,encoding='latin1')
europe_df = pd.read_csv('Europa.csv',encoding='latin1')
europe_df.loc[europe['name']== 'Austria']

Unnamed: 0,index,name,pop_est,continent,iso_a3,gdp_md_est,geometry,N_powerplants,N_green,green_ratio,N_fuel_Hydro,fuel_ratio_Hydro,fuel_generation_ratio_Hydro,fuel_generation_total_Hydro,N_fuel_Other,fuel_ratio_Other,fuel_generation_ratio_Other,fuel_generation_total_Other,N_fuel_Gas,fuel_ratio_Gas,fuel_generation_ratio_Gas,fuel_generation_total_Gas,N_fuel_Wind,fuel_ratio_Wind,fuel_generation_ratio_Wind,fuel_generation_total_Wind,N_fuel_Solar,fuel_ratio_Solar,fuel_generation_ratio_Solar,fuel_generation_total_Solar,N_fuel_Waste,fuel_ratio_Waste,fuel_generation_ratio_Waste,fuel_generation_total_Waste,N_fuel_Biomass,fuel_ratio_Biomass,fuel_generation_ratio_Biomass,fuel_generation_total_Biomass,N_fuel_Coal,fuel_ratio_Coal,fuel_generation_ratio_Coal,fuel_generation_total_Coal,N_fuel_Nuclear,fuel_ratio_Nuclear,fuel_generation_ratio_Nuclear,fuel_generation_total_Nuclear,N_fuel_Oil,fuel_ratio_Oil,fuel_generation_ratio_Oil,fuel_generation_total_Oil,N_fuel_Geothermal,fuel_ratio_Geothermal,fuel_generation_ratio_Geothermal,fuel_generation_total_Geothermal,N_fuel_Wave and Tidal,fuel_ratio_Wave and Tidal,fuel_generation_ratio_Wave and Tidal,fuel_generation_total_Wave and Tidal,N_fuel_Storage,fuel_ratio_Storage,fuel_generation_ratio_Storage,fuel_generation_total_Storage,N_fuel_Cogeneration,fuel_ratio_Cogeneration,fuel_generation_ratio_Cogeneration,fuel_generation_total_Cogeneration,id
7,7,Austria,8754413,Europe,AUT,416600.0,"POLYGON ((16.979666782304037 48.123497015976305, 16.90375410326726 47.71486562762833, 16.3405843...",122.0,119.0,0.97541,102.0,0.836066,0.823896,30380.5,0.0,0.0,0.0,0.0,3.0,0.02459,0.168011,6195.26,4.0,0.032787,0.006486,239.16,13.0,0.106557,0.001608,59.29,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,7


# German Bundeslaender

In [21]:
# load data containing German Bundesländer
# similarly to the europe dataset it contains polygons mapping individual Bundesländer
bl = gpd.read_file('../data/vg2500_geo84.zip').to_crs("EPSG:4326")
# rename the column that specifies the name so that is the same with the remaining data
bl.rename({"GEN": "name"}, axis=1, inplace=True)

In [22]:
bl.shape

(16, 7)

In [23]:
bl.head()

Unnamed: 0,USE,RS,RS_ALT,name,SHAPE_LENG,SHAPE_AREA,geometry
0,2,2,20000000000,Hamburg,175253.8,760539800.0,"MULTIPOLYGON (((10.19114 53.72299, 10.18210 53.71003, 10.18181 53.70962, 10.15879 53.69080, 10.1..."
1,2,3,30000000000,Niedersachsen,2016496.0,47716410000.0,"MULTIPOLYGON (((8.68962 53.87999, 8.69297 53.87830, 8.70068 53.87991, 8.70166 53.88004, 8.70243 ..."
2,2,4,40000000000,Bremen,154971.6,405480900.0,"MULTIPOLYGON (((8.52976 53.21640, 8.55399 53.20863, 8.56548 53.21331, 8.57815 53.21713, 8.59904 ..."
3,2,5,50000000000,Nordrhein-Westfalen,1352108.0,34047270000.0,"POLYGON ((8.66673 52.52149, 8.70289 52.50053, 8.69809 52.48647, 8.70334 52.44430, 8.70447 52.414..."
4,2,6,60000000000,Hessen,1105093.0,21092320000.0,"POLYGON ((9.50560 51.62777, 9.50842 51.62712, 9.51127 51.62670, 9.51398 51.62648, 9.51644 51.626..."


In [24]:
# same as before but now we want to aggregate data for individual Bundesländer
res_intersect_bl = gpd.overlay(europe_data, bl, how='intersection')

In [25]:
res_intersect_bl.sample(5)

Unnamed: 0,country code,country,name of powerplant,capacity in MW,latitude,longitude,primary_fuel,start date,owner of plant,geolocation_source,estimated_generation_gwh_2020,green,continent,USE,RS,RS_ALT,name,SHAPE_LENG,SHAPE_AREA,geometry
18,DEU,Germany,Aidenbach Solar Power Plant,1.7,48.5727,13.0717,Solar,-1,,Industry About,2.15,True,Europe,2,9,90000000000,Bayern,2046039.0,70550070000.0,POINT (13.07170 48.57270)
340,DEU,Germany,Staudinger power station,1132.0,50.0886,8.9534,Coal,1992,E.On Kraftwerke GmbH,Open Power System Data,5565.32,False,Europe,2,6,60000000000,Hessen,1105093.0,21092320000.0,POINT (8.95340 50.08860)
531,DEU,Germany,Ermlitz-Schkopau,8.5,51.3928,12.1719,Solar,-1,,Wiki-Solar,12.33,True,Europe,2,15,150000000000,Sachsen-Anhalt,949096.8,20494980000.0,POINT (12.17190 51.39280)
516,DEU,Germany,Bitterfeld-Wolfen F1,9.6,51.664,12.245,Solar,-1,,Wiki-Solar,14.47,True,Europe,2,15,150000000000,Sachsen-Anhalt,949096.8,20494980000.0,POINT (12.24500 51.66400)
876,DEU,Germany,GuD Schwarzheide,122.0,51.4762,13.8891,Gas,1994,BASF Schwarzheide GmbH,Open Power System Data,472.79,False,Europe,2,12,120000000000,Brandenburg,1295460.0,29653900000.0,POINT (13.88910 51.47620)


In [26]:
bl.set_index("name", inplace=True)
bl = insert_totals(bl, res_intersect_bl)
bl = insert_fuel_types(bl, res_intersect_bl)

In [27]:
bl = bl.reset_index()
bl = bl.fillna(0)
bl['id'] = bl.index

In [28]:
bl.to_file('Bundeslaender.geojson', driver="GeoJSON",index=False,encoding='latin1')
bl.to_csv('Bundeslaender.csv',index=False,encoding='latin1')
bl_df = pd.read_csv('Bundeslaender.csv',encoding='latin1')
bl_df.head()

Unnamed: 0,name,USE,RS,RS_ALT,SHAPE_LENG,SHAPE_AREA,geometry,N_powerplants,N_green,green_ratio,N_fuel_Hydro,fuel_ratio_Hydro,fuel_generation_ratio_Hydro,fuel_generation_total_Hydro,N_fuel_Solar,fuel_ratio_Solar,fuel_generation_ratio_Solar,fuel_generation_total_Solar,N_fuel_Waste,fuel_ratio_Waste,fuel_generation_ratio_Waste,fuel_generation_total_Waste,N_fuel_Biomass,fuel_ratio_Biomass,fuel_generation_ratio_Biomass,fuel_generation_total_Biomass,N_fuel_Oil,fuel_ratio_Oil,fuel_generation_ratio_Oil,fuel_generation_total_Oil,N_fuel_Gas,fuel_ratio_Gas,fuel_generation_ratio_Gas,fuel_generation_total_Gas,N_fuel_Coal,fuel_ratio_Coal,fuel_generation_ratio_Coal,fuel_generation_total_Coal,N_fuel_Nuclear,fuel_ratio_Nuclear,fuel_generation_ratio_Nuclear,fuel_generation_total_Nuclear,N_fuel_Other,fuel_ratio_Other,fuel_generation_ratio_Other,fuel_generation_total_Other,N_fuel_Wind,fuel_ratio_Wind,fuel_generation_ratio_Wind,fuel_generation_total_Wind,id
0,Hamburg,2,2,20000000000,175253.8,760539800.0,"MULTIPOLYGON (((10.19113690277905 53.72299476403796, 10.182096824348678 53.71002525433905, 10.18...",7,1,0.142857,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1,0.142857,0.005312,50.5,1.0,0.142857,0.0,0.0,1.0,0.142857,0.005967,56.73,2.0,0.285714,0.060943,579.36,2.0,0.285714,0.927777,8819.95,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0
1,Niedersachsen,2,3,30000000000,2016496.0,47716410000.0,"MULTIPOLYGON (((8.689616610395177 53.879992720322285, 8.69297200462773 53.87829828511829, 8.7006...",45,16,0.355556,2.0,0.044444,0.010826,434.16,11.0,0.244444,0.005848,234.54,3,0.066667,0.004801,192.53,3.0,0.066667,0.0,0.0,0.0,0.0,0.0,0.0,18.0,0.4,0.30561,12255.88,5.0,0.111111,0.185606,7443.36,2.0,0.044444,0.463524,18588.72,1.0,0.022222,0.023785,953.84,0.0,0.0,0.0,0.0,1
2,Bremen,2,4,40000000000,154971.6,405480900.0,"MULTIPOLYGON (((8.52975665105563 53.216396506038286, 8.553992132614463 53.20862869551077, 8.5654...",8,1,0.125,1.0,0.125,0.008508,41.98,0.0,0.0,0.0,0.0,3,0.375,0.038806,191.47,0.0,0.0,0.0,0.0,1.0,0.125,0.05991,295.6,0.0,0.0,0.0,0.0,3.0,0.375,0.892777,4405.04,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,2
3,Nordrhein-Westfalen,2,5,50000000000,1352108.0,34047270000.0,"POLYGON ((8.666728266554577 52.52148958107465, 8.702887415877003 52.500526202144705, 8.698086755...",131,24,0.183206,5.0,0.038168,0.005946,834.4,10.0,0.076336,0.000515,72.33,15,0.114504,0.006808,955.27,9.0,0.068702,0.0,0.0,3.0,0.022901,0.002195,307.98,48.0,0.366412,0.24642,34577.24,37.0,0.282443,0.734128,103011.46,0.0,0.0,0.0,0.0,4.0,0.030534,0.003987,559.39,0.0,0.0,0.0,0.0,3
4,Hessen,2,6,60000000000,1105093.0,21092320000.0,"POLYGON ((9.505598958397917 51.62776574465611, 9.508417349910042 51.62712367038471, 9.5112743010...",42,19,0.452381,3.0,0.071429,0.092404,1196.78,13.0,0.309524,0.005394,69.86,4,0.095238,0.017562,227.46,3.0,0.071429,0.0,0.0,1.0,0.02381,0.002858,37.02,12.0,0.285714,0.291229,3771.88,5.0,0.119048,0.585524,7583.47,0.0,0.0,0.0,0.0,1.0,0.02381,0.005029,65.13,0.0,0.0,0.0,0.0,4


## German Landkreise
Let's go even lower than Bundesländer

In [29]:
kreise = gpd.read_file('../data/vg2500_krs.zip').to_crs("EPSG:4326")
kreise.rename({"GEN": "name"}, axis=1, inplace=True)
kreise.sample(5)

Unnamed: 0,USE,RS,RS_ALT,name,SHAPE_LENG,SHAPE_AREA,geometry
302,4,9677,96770000000,Main-Spessart,207409.050559,1332158000.0,"POLYGON ((9.51704 50.23365, 9.52887 50.23429, 9.53142 50.23437, 9.57531 50.23297, 9.58031 50.227..."
100,4,5758,57580000000,Herford,120295.693273,458716500.0,"POLYGON ((8.92837 52.18448, 8.92744 52.18404, 8.92634 52.18352, 8.90739 52.16981, 8.90608 52.168..."
109,4,5916,59160000000,Herne,32171.8203,55460460.0,"POLYGON ((7.29360 51.53274, 7.28719 51.52690, 7.28260 51.52505, 7.25617 51.52009, 7.22772 51.517..."
207,4,8317,83170000000,Ortenaukreis,213214.170483,1858002000.0,"POLYGON ((7.99201 48.70120, 7.97927 48.70050, 7.97462 48.69470, 7.97314 48.69213, 7.98067 48.686..."
135,4,6535,65350000000,Vogelsbergkreis,222969.562742,1447326000.0,"POLYGON ((9.44106 50.79548, 9.44347 50.78469, 9.44294 50.78052, 9.43562 50.77696, 9.43415 50.770..."


repeat the stepts

In [30]:
res_intersect_kreise = gpd.overlay(europe_data, kreise, how='intersection')

In [31]:
kreise.set_index("name", inplace=True)
kreise = insert_totals(kreise, res_intersect_kreise)
kreise = insert_fuel_types(kreise, res_intersect_kreise)

In [32]:
kreise = kreise.reset_index()
kreise = kreise.fillna(0)
kreise['id'] = kreise.index

In [33]:
kreise.to_file('Landkreise.geojson', driver="GeoJSON",index=False,encoding='latin1')
kreise.to_csv('Landkreise.csv',index=False,encoding='latin1')
kreise_df = pd.read_csv('Landkreise.csv',encoding='latin1')
kreise_df.head()

Unnamed: 0,name,USE,RS,RS_ALT,SHAPE_LENG,SHAPE_AREA,geometry,N_powerplants,N_green,green_ratio,N_fuel_Hydro,fuel_ratio_Hydro,fuel_generation_ratio_Hydro,fuel_generation_total_Hydro,N_fuel_Solar,fuel_ratio_Solar,fuel_generation_ratio_Solar,fuel_generation_total_Solar,N_fuel_Gas,fuel_ratio_Gas,fuel_generation_ratio_Gas,fuel_generation_total_Gas,N_fuel_Biomass,fuel_ratio_Biomass,fuel_generation_ratio_Biomass,fuel_generation_total_Biomass,N_fuel_Coal,fuel_ratio_Coal,fuel_generation_ratio_Coal,fuel_generation_total_Coal,N_fuel_Other,fuel_ratio_Other,fuel_generation_ratio_Other,fuel_generation_total_Other,N_fuel_Waste,fuel_ratio_Waste,fuel_generation_ratio_Waste,fuel_generation_total_Waste,N_fuel_Wind,fuel_ratio_Wind,fuel_generation_ratio_Wind,fuel_generation_total_Wind,N_fuel_Oil,fuel_ratio_Oil,fuel_generation_ratio_Oil,fuel_generation_total_Oil,N_fuel_Nuclear,fuel_ratio_Nuclear,fuel_generation_ratio_Nuclear,fuel_generation_total_Nuclear,id
0,Flensburg,4,1001,10010000000,32467.034276,53248990.0,"POLYGON ((9.489823260151168 54.82296231149822, 9.504673652057674 54.81004002875278, 9.5049951708...",1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,1.0,1.0,958.69,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0
1,Kiel,4,1002,10020000000,73027.302434,121865400.0,"POLYGON ((10.190574858682838 54.38909396612953, 10.183698316104081 54.38910380529061, 10.1781475...",2.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.5,0.070329,120.13,0.0,0.0,0.0,0.0,1.0,0.5,0.929671,1587.98,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1
2,Lübeck,4,1003,10030000000,104861.09422,221138600.0,"POLYGON ((10.892971221748953 53.956954865124764, 10.903374439173431 53.957047375648436, 10.90859...",0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,2
3,Neumünster,4,1004,10040000000,39858.818376,80923630.0,"POLYGON ((10.034541478286 54.129152216881, 10.022771700029574 54.10937690486711, 10.022703338048...",0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,3
4,Dithmarschen,4,1051,10510000000,202206.675707,1450311000.0,"MULTIPOLYGON (((9.051759426136824 54.36640406899298, 9.071925934905774 54.36253959382538, 9.0864...",3.0,3.0,1.0,0.0,0.0,0.0,0.0,3.0,1.0,1.0,27.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,4


In [34]:
kreise_df.shape

(402, 51)

In [35]:
kreise.shape

(402, 51)

In [36]:
kreise_df.isnull().sum().sum()

0

In [37]:
kreise.isnull().sum().sum()

0

In [38]:
kreise.crs

<Geographic 2D CRS: EPSG:4326>
Name: WGS 84
Axis Info [ellipsoidal]:
- Lat[north]: Geodetic latitude (degree)
- Lon[east]: Geodetic longitude (degree)
Area of Use:
- name: World.
- bounds: (-180.0, -90.0, 180.0, 90.0)
Datum: World Geodetic System 1984 ensemble
- Ellipsoid: WGS 84
- Prime Meridian: Greenwich

In [39]:
world = gpd.read_file(gpd.datasets.get_path('naturalearth_lowres'))
world_data = pd.read_pickle("../data/gdf_world.p").to_crs("EPSG:4326")
res_intersect_world = gpd.overlay(world_data, world, how='intersection')

In [40]:
world.set_index("name", inplace=True)
world = insert_totals(world, res_intersect_world)
world = insert_fuel_types(world, res_intersect_world)

In [41]:
world = world.reset_index()
world = world.fillna(0)
world['id'] = world.index

In [42]:
world.to_file('Welt.geojson', driver="GeoJSON",index=False,encoding='latin1')
world.to_csv('Welt.csv',index=False,encoding='latin1')
world_df = pd.read_csv('Welt.csv',encoding='latin1')
world_df.head()

Unnamed: 0,name,pop_est,continent,iso_a3,gdp_md_est,geometry,N_powerplants,N_green,green_ratio,N_fuel_Hydro,fuel_ratio_Hydro,fuel_generation_ratio_Hydro,fuel_generation_total_Hydro,N_fuel_Solar,fuel_ratio_Solar,fuel_generation_ratio_Solar,fuel_generation_total_Solar,N_fuel_Gas,fuel_ratio_Gas,fuel_generation_ratio_Gas,fuel_generation_total_Gas,N_fuel_Other,fuel_ratio_Other,fuel_generation_ratio_Other,fuel_generation_total_Other,N_fuel_Oil,fuel_ratio_Oil,fuel_generation_ratio_Oil,fuel_generation_total_Oil,N_fuel_Nuclear,fuel_ratio_Nuclear,fuel_generation_ratio_Nuclear,fuel_generation_total_Nuclear,N_fuel_Coal,fuel_ratio_Coal,fuel_generation_ratio_Coal,fuel_generation_total_Coal,N_fuel_Wind,fuel_ratio_Wind,fuel_generation_ratio_Wind,fuel_generation_total_Wind,N_fuel_Biomass,fuel_ratio_Biomass,fuel_generation_ratio_Biomass,fuel_generation_total_Biomass,N_fuel_Waste,fuel_ratio_Waste,fuel_generation_ratio_Waste,fuel_generation_total_Waste,N_fuel_Wave and Tidal,fuel_ratio_Wave and Tidal,fuel_generation_ratio_Wave and Tidal,fuel_generation_total_Wave and Tidal,N_fuel_Cogeneration,fuel_ratio_Cogeneration,fuel_generation_ratio_Cogeneration,fuel_generation_total_Cogeneration,N_fuel_Storage,fuel_ratio_Storage,fuel_generation_ratio_Storage,fuel_generation_total_Storage,N_fuel_Geothermal,fuel_ratio_Geothermal,fuel_generation_ratio_Geothermal,fuel_generation_total_Geothermal,N_fuel_Petcoke,fuel_ratio_Petcoke,fuel_generation_ratio_Petcoke,fuel_generation_total_Petcoke,id
0,Fiji,889953.0,Oceania,FJI,5496,"MULTIPOLYGON (((180 -16.067132663642447, 180 -16.555216566639196, 179.36414266196414 -16.8013540...",10.0,8.0,0.8,4.0,0.4,0.950302,468.86,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,2.0,0.2,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.1,0.049698,24.52,3.0,0.3,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0
1,Tanzania,58005463.0,Africa,TZA,63177,"POLYGON ((33.90371119710453 -0.9500000000000001, 34.07261999999997 -1.0598199999999451, 37.69868...",10.0,7.0,0.7,6.0,0.6,0.560748,2271.39,0.0,0.0,0.0,0.0,3.0,0.3,0.40816,1653.31,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.1,0.031091,125.94,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1
2,W. Sahara,603253.0,Africa,ESH,907,"POLYGON ((-8.665589565454809 27.656425889592356, -8.665124477564191 27.589479071558227, -8.68439...",0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,2
3,Canada,37589262.0,North America,CAN,1736425,"MULTIPOLYGON (((-122.84000000000003 49.000000000000114, -122.97421000000001 49.00253777777778, -...",1116.0,1013.0,0.907706,550.0,0.492832,0.58349,370737.2,141.0,0.126344,0.003954,2512.28,73.0,0.065412,0.105082,66767.0,3.0,0.002688,0.000903,573.89,6.0,0.005376,0.002746,1744.76,5.0,0.00448,0.148998,94670.18,15.0,0.013441,0.114284,72613.9,213.0,0.19086,0.040528,25750.64,108.0,0.096774,0.0,0.0,1.0,0.000896,1.4e-05,9.06,1.0,0.000896,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,3
4,United States of America,328239523.0,North America,USA,21433226,"MULTIPOLYGON (((-122.84000000000003 49.000000000000114, -120 49.000000000000114, -117.03121 49, ...",9654.0,6028.0,0.624404,1421.0,0.147193,0.062877,284357.75,3263.0,0.337995,0.016162,73091.77,1789.0,0.185312,0.420094,1899867.03,15.0,0.001554,0.0024,10854.44,804.0,0.083282,0.003969,17951.32,54.0,0.005594,0.168221,760774.22,282.0,0.029211,0.250845,1134439.83,1134.0,0.117464,0.070406,318409.74,145.0,0.01502,0.0,0.0,537.0,0.055625,0.000379,1712.14,0.0,0.0,0.0,0.0,34.0,0.003522,0.0,0.0,100.0,0.010358,0.0,0.0,65.0,0.006733,0.004648,21022.26,11.0,0.001139,0.0,0.0,4


## Challenge

## Streamlit App

First we´ll have to install streamlit, folium, und streamlit_folium libraries

``` Python
conda install -c conda-forge folium

conda install -c conda-forge streamlit

conda install -c conda-forge streamlit-folium

```

Next we will create the streamlit app which displays our data in an interactive map:

Let´s open a Python-file in jupyter lab!

![Alt-Text](pythonfile.png)

We import the necessary libraries and create a title for the map...
```python
import streamlit as st
import geopandas as gpd
import matplotlib.pyplot as plt
import pandas as pd
import folium
from streamlit_folium import st_folium

APP_TITLE = 'A simple interactive Map'


st.set_page_config(APP_TITLE)
st.title(APP_TITLE)

```

We use the selectbox feature in streamlit to make the user choose the filter for the data.

```python
choice = ['Welt','Europa','Bundeslaender','Landkreise']
choice_selected = st.selectbox('Gebiet auswählen:', choice)

```

We create the initial folium map and at a latitude tooltip:

```python
m = folium.Map()
m.add_child(folium.LatLngPopup())

```

Next we load the data of our Powerplants dataset for Landkreise, Bundeslaender, Europe and the world and add it to the folium map.

```python
data = gpd.read_file(f'{choice_selected}.geojson',encoding='latin1')
data_df = pd.read_csv(f'{choice_selected}.csv',encoding='latin1')
folium.GeoJson(data, name="geojson").add_to(m)

```

Next we add a choropleth map to the folium map

```python
choro = folium.Choropleth(geo_data = f'{choice_selected}.geojson',
                              data=data_df,columns=('id','green_ratio'),
                              key_on='feature.properties.id',
                              fill_opacity=0.5,
                              line_opacity=0.5,
                              linewidth=1.1,
                              fill_color='YlOrRd',
                              highlight=True).add_to(m)

```

Lastly we add a tooltip to the choropleth map and plot the map.

```python
choro.geojson.add_child(folium.features.GeoJsonTooltip(['name'],labels=False))
    
data2 = st_folium(m, width = 700, height = 450)
```

We can start the app by running

``` python
streamlit run streamlit_app_name.py

```