# Data Preparation for Streamlit Powerplants Implementation
Our Powerplants data contains specifics about green and non-green energy on individual coordinates.

In this session we want to create a nice looking web-page that let's us easily compare statistics between individual countries.

In order to do this we have to aggregate our individual coordinate-based data to per-country data.

## Imports

In [None]:
import geopandas as gpd
import pandas as pd
import folium
pd.set_option('display.max_rows', 100)
pd.set_option('display.max_columns', 100)
pd.set_option('display.width', None)
pd.set_option('display.max_colwidth', 100)

## Load Raw Data

In [None]:
# load data on european countries
europe = pd.read_pickle("../data/europe.p").to_crs("EPSG:4326")
# contains all countries intersecting our polygon from the WW2 analysis
europe.sample(5)# not only european countries per se
# columns of interest for us: name and geometry
europe.head()

In [None]:
europe.shape

In [None]:
# load Powerplants data for target_df
# geometry contains coordinate POINTS
europe_data = pd.read_pickle("../data/gdf_europe.p").to_crs("EPSG:4326")
europe_data.head()

In [None]:
europe_data.shape

## Replace coordinates by countries polygons

In [None]:
res_intersect_europe = gpd.overlay(europe_data, europe, how='intersection')
# Basically checks for each geometry in "europe_data" (target coordinates) 
# if intersects with geometry of "target_df" (country)
# -> Merges both dataframes and keeps country polygon instead of target coordinates
res_intersect_europe.head()

In [None]:
res_intersect_europe.shape

In [None]:
# Set index to country name so we can directly insert data grouped by country name
europe.set_index("name", inplace=True, drop=False)

In [None]:
def insert_totals(target_df, data_df):
    # get powerplant counts and green ratio
    # and apply it to a given target country
    # get totals
    target_df[f"N_powerplants"] = data_df.groupby("name").size()
    target_df[f"N_green"] = data_df.loc[data_df["green"]].groupby("name").size()
    # replace NaN (no a number) values with 0
    target_df.fillna(0, inplace=True)
    n_green = data_df.groupby(["name", "green"])["name"].size().unstack()
    target_df["green_ratio"] = (n_green[True] / n_green.sum(axis=1)).fillna(0)
    return target_df

In [None]:
europe = insert_totals(europe, res_intersect_europe)

In [None]:
europe.head(1)

In [None]:
europe = europe.loc[europe["continent"] == "Europe"]

In [None]:
def insert_fuel_types(target_df, data_df):
    for pp_type in data_df["primary_fuel"].unique():
        target_df[f"N_fuel_{pp_type}"] = data_df.loc[data_df["primary_fuel"] == pp_type].groupby("name").size()
        data_df.loc[data_df["primary_fuel"] == pp_type, f"is_{pp_type}"] = True
        data_df.loc[data_df["primary_fuel"] != pp_type, f"is_{pp_type}"] = False 
        ratio = data_df.groupby(["name", f"is_{pp_type}"]).size().unstack().fillna(0)
        target_df[f"fuel_ratio_{pp_type}"] = ratio[True] / ratio.sum(axis=1)

        production = data_df.groupby(["name", f"is_{pp_type}"])["estimated_generation_gwh_2020"].sum().unstack().fillna(0)

        target_df[f"fuel_generation_ratio_{pp_type}"] = production[True] / production.sum(axis=1)
        target_df[f"fuel_generation_total_{pp_type}"] = production[True]
    return target_df

In [None]:
europe = insert_fuel_types(europe, res_intersect_europe).fillna(0)

In [None]:
europe.shape

In [None]:
europe.head(5)

In [None]:
europe = europe.drop('name',axis=1)

In [None]:
europe = europe.reset_index()
europe = europe.fillna(0)
europe['id'] = europe.index # Work around German Umlaute

In [None]:
europe.to_file('Europa.geojson', driver="GeoJSON",index=False,encoding='latin1')
europe.to_csv('Europa.csv',index=False,encoding='latin1')
europe_df = pd.read_csv('Europa.csv',encoding='latin1')
europe_df.loc[europe['name']== 'Austria']

# German Bundeslaender

In [None]:
# load data containing German Bundesländer
# similarly to the europe dataset it contains polygons mapping individual Bundesländer
bl = gpd.read_file('../data/vg2500_geo84.zip').to_crs("EPSG:4326")
# rename the column that specifies the name so that is the same with the remaining data
bl.rename({"GEN": "name"}, axis=1, inplace=True)

In [None]:
bl.shape

In [None]:
bl.head()

In [None]:
# same as before but now we want to aggregate data for individual Bundesländer
res_intersect_bl = gpd.overlay(europe_data, bl, how='intersection')

In [None]:
res_intersect_bl.sample(5)

In [None]:
bl.set_index("name", inplace=True)
bl = insert_totals(bl, res_intersect_bl)
bl = insert_fuel_types(bl, res_intersect_bl)

In [None]:
bl = bl.reset_index()
bl = bl.fillna(0)
bl['id'] = bl.index

In [None]:
bl.to_file('Bundeslaender.geojson', driver="GeoJSON",index=False,encoding='latin1')
bl.to_csv('Bundeslaender.csv',index=False,encoding='latin1')
bl_df = pd.read_csv('Bundeslaender.csv',encoding='latin1')
bl_df.head()

## German Landkreise
Let's go even lower than Bundesländer

In [None]:
kreise = gpd.read_file('../data/vg2500_krs.zip').to_crs("EPSG:4326")
kreise.rename({"GEN": "name"}, axis=1, inplace=True)
kreise.sample(5)

repeat the stepts

In [None]:
res_intersect_kreise = gpd.overlay(europe_data, kreise, how='intersection')

In [None]:
kreise.set_index("name", inplace=True)
kreise = insert_totals(kreise, res_intersect_kreise)
kreise = insert_fuel_types(kreise, res_intersect_kreise)

In [None]:
kreise = kreise.reset_index()
kreise = kreise.fillna(0)
kreise['id'] = kreise.index

In [None]:
kreise.to_file('Landkreise.geojson', driver="GeoJSON",index=False,encoding='latin1')
kreise.to_csv('Landkreise.csv',index=False,encoding='latin1')
kreise_df = pd.read_csv('Landkreise.csv',encoding='latin1')
kreise_df.head()

In [None]:
kreise_df.shape

In [None]:
kreise.shape

In [None]:
kreise_df.isnull().sum().sum()

In [None]:
kreise.isnull().sum().sum()

In [None]:
kreise.crs

In [None]:
world = gpd.read_file(gpd.datasets.get_path('naturalearth_lowres'))
world_data = pd.read_pickle("../data/gdf_world.p").to_crs("EPSG:4326")
res_intersect_world = gpd.overlay(world_data, world, how='intersection')

In [None]:
world.set_index("name", inplace=True)
world = insert_totals(world, res_intersect_world)
world = insert_fuel_types(world, res_intersect_world)

In [None]:
world = world.reset_index()
world = world.fillna(0)
world['id'] = world.index

In [None]:
world.to_file('Welt.geojson', driver="GeoJSON",index=False,encoding='latin1')
world.to_csv('Welt.csv',index=False,encoding='latin1')
world_df = pd.read_csv('Welt.csv',encoding='latin1')
world_df.head()

## Challenge

## Streamlit App

First we´ll have to install streamlit, folium, und streamlit_folium libraries

``` Python
conda install -c conda-forge folium

conda install -c conda-forge streamlit

conda install -c conda-forge streamlit-folium

```

Next we will create the streamlit app which displays our data in an interactive map:

Let´s open a Python-file in jupyter lab!

![Alt-Text](pythonfile.png)

We import the necessary libraries and create a title for the map...
```python
import streamlit as st
import geopandas as gpd
import matplotlib.pyplot as plt
import pandas as pd
import folium
from streamlit_folium import st_folium

APP_TITLE = 'A simple interactive Map'


st.set_page_config(APP_TITLE)
st.title(APP_TITLE)

```

We use the selectbox feature in streamlit to make the user choose the filter for the data.

```python
choice = ['Welt','Europa','Bundeslaender','Landkreise']
choice_selected = st.selectbox('Gebiet auswählen:', choice)

```

We create the initial folium map and at a latitude tooltip:

```python
m = folium.Map()
m.add_child(folium.LatLngPopup())

```

Next we load the data of our Powerplants dataset for Landkreise, Bundeslaender, Europe and the world and add it to the folium map.

```python
data = gpd.read_file(f'{choice_selected}.geojson',encoding='latin1')
data_df = pd.read_csv(f'{choice_selected}.csv',encoding='latin1')
folium.GeoJson(data, name="geojson").add_to(m)

```

Next we add a choropleth map to the folium map

```python
choro = folium.Choropleth(geo_data = f'{choice_selected}.geojson',
                              data=data_df,columns=('id','green_ratio'),
                              key_on='feature.properties.id',
                              fill_opacity=0.5,
                              line_opacity=0.5,
                              linewidth=1.1,
                              fill_color='YlOrRd',
                              highlight=True).add_to(m)

```

Lastly we add a tooltip to the choropleth map and plot the map.

```python
choro.geojson.add_child(folium.features.GeoJsonTooltip(['name'],labels=False))
    
data2 = st_folium(m, width = 700, height = 450)
```

We can start the app by running

``` python
streamlit run streamlit_app_name.py

```