<a href="https://colab.research.google.com/github/akvo/usaid-wssh-tool-3/blob/develop/scripts/data-processing-collab.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [27]:
#@title Install Dependencies
%%capture
!pip install ipyleaflet==0.17.2

In [28]:
# @title Prologue
# 1. Import Dependencies
import geopandas as gpd
import pandas as pd
import requests
import json
from io import StringIO
import os
from sklearn.preprocessing import MinMaxScaler
import ipywidgets as widgets
import matplotlib.pyplot as plt
import folium
from IPython.display import display, clear_output, HTML
from folium import Choropleth, FeatureGroup, LayerControl, Map, GeoJson
from folium.plugins import Draw, FloatImage
from branca.colormap import linear
from branca.element import Figure
import ipyleaflet.leaflet as Lf

# 2. Clone Repository
if os.path.exists("usaid-wssh-tool-3"):
  !rm -rf usaid-wssh-tool-3
!git clone https://github.com/akvo/usaid-wssh-tool-3.git

geojson_path = "usaid-wssh-tool-3/data/output"
file_list = [f for f in os.listdir(geojson_path) if os.path.isfile(os.path.join(geojson_path, f))]
file_list = list(filter(lambda x: x.endswith(".geojson"), file_list))
country_list = list(map(lambda x: x.replace(".geojson", ""), file_list))
country_list.sort()
country_list = [c.capitalize() for c in country_list]

# 3. Column Definitions
column_definitions = {
    'drr': 'Drought Risk',
    'rfr': 'Riverine Flood Risk',
    'bws' : 'Base Water Stress',
    'Open_defecation_estimates_mean': 'Open Defecation',
    'No_Improved_water_premise_estimates_mean': 'No Improve Water Premises',
    'No_basic_water_estimates_mean': 'No Basic Water',
    'no_basic_sanitation_estimates_mean': 'No Basic Sanitation'
}
column_list = list(column_definitions.keys())
weight_mapping = {'core': 0.7, 'secondary': 0.3}

# 4. override get data

def _get_data(self):
    """
    Get the data for the choropleth.
    """
    colormap = self.colormap if self.colormap else linear.YlGnBu_09
    data = self.geo_data
    if isinstance(data, dict):
        data = data.copy()
    else:
        # Make a copy if it is a GeoJSON string
        data = json.loads(json.dumps(data))  # Or data.copy() if supported
    for feature in data['features']:
        feature['properties']['style'] = self.style_callback(feature, colormap,
                                                             self.choro_data[feature['properties'][self.key_on.split('.')[1]]])
    return data

Lf.Choropleth._get_data = _get_data

scaler = MinMaxScaler()

Cloning into 'usaid-wssh-tool-3'...
remote: Enumerating objects: 94, done.[K
remote: Counting objects: 100% (94/94), done.[K
remote: Compressing objects: 100% (78/78), done.[K
remote: Total 94 (delta 33), reused 42 (delta 8), pack-reused 0 (from 0)[K
Receiving objects: 100% (94/94), 20.58 MiB | 29.47 MiB/s, done.
Resolving deltas: 100% (33/33), done.


In [29]:
# @title Precalculate Data

def pre_calculate_data():
    countries_data = {}
    for country in country_list:
        geojson = gpd.read_file(f"{geojson_path}/{country.lower()}.geojson")
        # Initialize the Index
        missing_columns = [col for col in column_list if col not in geojson.columns]
        for col in missing_columns:
            print(f"Adding missing column: {col}")
            geojson[col] = 0
        geojson['index'] = 0
        countries_data[country] = geojson
    return countries_data

countries_data = pre_calculate_data()

In [30]:
# @title Init Map Functions

# Apply Directionality
def apply_directionality(data, indicators, directionality):
    data[indicators] = scaler.fit_transform(data[indicators])
    directed_data = {}
    for idx, indicator in enumerate(indicators):
        if directionality[indicator] == 'positive':
            directed_data[indicator] = data[indicator]  # Positive direction, keep the value as is
        else:
            directed_data[indicator] = 1 - data[indicator]  # Negative direction, invert the value (1 - value)
    return directed_data

# Get center of the maps
def get_center_cordinates(geojson):
    center_x = (geojson.bounds['minx'].min() + geojson.bounds['maxx'].max()) / 2
    center_y = (geojson.bounds['miny'].min() + geojson.bounds['maxy'].max()) / 2
    return [center_y, center_x]

# Function to calculate the weighted index and show on the map
def calculate_index(countries, selected_index, core_secondary, directionality):

    with map_area:
        clear_output(wait=True)

    # Loading
    loading_widget = widgets.HTML("Loading...")
    display(loading_widget)

    merged_geojson = []
    top_five = []
    bottom_five = []
    for country in countries:
        geojson = countries_data[country].copy()
        directed_data = apply_directionality(geojson, selected_index, directionality)
        for indicator in selected_index:
            weight = weight_mapping[core_secondary[indicator]]
            geojson['index'] += directed_data[indicator] * weight
        if geojson['index'].sum() > 0:
            geojson['index'] = geojson['index'] / len(selected_index)
        geojson['quartile'] = pd.qcut(geojson['index'], 4, labels=False, duplicates='drop') + 1
        merged_geojson.append(geojson)
        top_five.append(geojson.nlargest(5, 'index'))
        bottom_five.append(geojson.nsmallest(5, 'index'))

    geojson = gpd.GeoDataFrame(pd.concat(merged_geojson, ignore_index=True))

    center_cordinate = get_center_cordinates(geojson)

    # height 800px
    fig = Figure(width='100%', height=600)
    # Map Creation
    m = Map(
        location=center_cordinate,
        zoom_start=7,
        scrollWheelZoom=False
    )
    fig.add_child(m)

    chloropleth_index = Choropleth(
        geo_data=geojson,
        name='Index',
        data=geojson,
        columns=['ADM2_EN', 'index'],
        key_on='feature.properties.ADM2_EN',
        fill_color='YlGnBu',
        fill_opacity=0.7,
        line_opacity=0.2,
        legend_name='Index',
        legend_kwds={'position': 'bottomleft'}
    )
    chloropleth_index.add_to(m)

    chloropleth_quartile = Choropleth(
        geo_data=geojson,
        name='Quartile',
        data=geojson,
        columns=['ADM2_EN', 'quartile'],
        key_on='feature.properties.ADM2_EN',
        fill_color='YlGnBu',
        fill_opacity=0.7,
        line_opacity=0.2,
        legend_name='Quartile',
        legend_kwds={'position': 'bottomleft'},
        show=False
    )
    chloropleth_quartile.add_to(m)

    # for indicator in column_list:
    #     feature_layer = Choropleth(
    #         geo_data=geojson,
    #         name=column_definitions[indicator],
    #         data=geojson,
    #         columns=['ADM2_EN', indicator],
    #         key_on='feature.properties.ADM2_EN',
    #         fill_color='YlGnBu',
    #         fill_opacity=0.7,
    #         line_opacity=0.2,
    #         legend_name=column_definitions[indicator],
    #         show=False
    #     )
    #     feature_layer.add_to(m)
    #     for child in feature_layer._children:
    #         if child.startswith("color_map"):
    #             del feature_layer._children[child]

    GeoJson(
        geojson.to_json(),
        style_function=lambda x: {'fillColor': '#ffffff00', 'color': '#00000000', 'weight': 0},
        tooltip=folium.GeoJsonTooltip(
            fields=["ADM2_EN", "index"],
            aliases=["Region: ", "Index Score: "],
            localize=True
        )
    ).add_to(m)


    LayerControl().add_to(m)

    # Create draw control
    draw = Draw(
        export=True,
        filename='my_data.geojson',
        position='topleft',
        draw_options={
          'polyline': {'allowIntersection': False},
          'polygon': {'allowIntersection': False},
          'circle': False,  # Turns off this drawing tool
          'rectangle': {'repeatMode': False, 'allowIntersection': False},
          'marker': False,  # Turns off this drawing tool
          'circlemarker': False  # Turns off this drawing tool
        },
        edit_options={'edit': False}
    )

    draw.add_to(m)

    # Add Top 5 Country as HTML Table
    top_five = pd.concat(top_five)
    top_five = top_five.sort_values(by='index', ascending=False)
    top_five = top_five.reset_index(drop=True)
    top_five = top_five[['ADM0_EN','ADM1_EN','ADM2_EN', 'index']]
    top_five.columns = ['Country', 'State', 'District', 'Index']
    top_five_html = top_five.to_html(index=False, escape=False)

    # Add Bottom 5 Country as HTML Table
    bottom_five = pd.concat(bottom_five)
    bottom_five = bottom_five.sort_values(by='index', ascending=True)
    bottom_five = bottom_five.reset_index(drop=True)
    bottom_five = bottom_five[['ADM0_EN','ADM1_EN','ADM2_EN', 'index']]
    bottom_five.columns = ['Country', 'State', 'District', 'Index']
    bottom_five_html = bottom_five.to_html(index=False, escape=False)

    # top 5 and bottom 5 side by side, both 50 % width
    both_titles = widgets.HTML("""
        <div style='float:left;width:50%;'><b>Top 5 Countries</b></div><div style='float:left;width:50%;'><b>Bottom 5 Countries</b></div>
        <hr>
        <div style='clear:both;'></div>
        """)

    both_tables = widgets.HTML("""
        <div style='float:left;width:48%;border-right:1px solid black;'>
            {}
        </div><div style='float:left;width:50%;'>
            {}
        </div>
        <div style='clear:both;'></div>
        <hr>
        """.format(
            top_five_html.replace("<table","<table style='width:100%';"),
            bottom_five_html.replace("<table","<table style='width:100%'")
        )
    )


    # Clear the map area to prevent stacking maps
    with map_area:
        clear_output(wait=True)
    display(
        widgets.HTML("<hr>"),
        fig,
        both_titles,
        both_tables
    )

    loading_widget.close()

In [32]:
# @title Select Countries & Indicators
# 1. Init Container
output_area = widgets.Output()  # For buttons and widgets
map_area = widgets.Output()     # For the map

core_secondary_title_widgets = {}
core_secondary_widgets = {}
directionality_widgets = {}


output_container = []

# 2. Country selection widget
country_selection = widgets.SelectMultiple(
    options=[c.capitalize() for c in country_list],
    description='Countries',
    value=[country_list[0]],
    disabled=False,
    layout=widgets.Layout(width='250px', height='100px')  # Set custom width and height
)

# 3. Indicator Selection
indicator_selection = widgets.SelectMultiple(
    options=[(column_definitions[indicator], indicator) for indicator in column_list],
    description='Indicators',
    disabled=False,
    layout=widgets.Layout(width='420px', height='100px')  # Set custom width and height
)

# 5. Function to handle the button click event for calculating the index
def on_calculate_button_click(event):
    selected_indicators = list(indicator_selection.value)
    if not selected_indicators:
        print("Please select at least one indicator.")
        return
    selected_countries = list(country_selection.value)

    # Collect core/secondary and directionality choices
    core_sec = {indicator: core_secondary_widgets[indicator].value for indicator in selected_indicators}
    directionality = {indicator: directionality_widgets[indicator].value for indicator in selected_indicators}

    calculate_index(selected_countries, selected_indicators, core_sec, directionality)

button = widgets.Button(description="Calculate")
button.on_click(on_calculate_button_click)

# 6. Function on change country
def on_change_country(change):
    clear_output(wait=True)
    output_container.clear()
    core_secondary_widgets.clear()
    directionality_widgets.clear()
    selected_countries = list(country_selection.value)

# 7. Function on change indicator
def on_change_indicator(change):

    clear_output(wait=True)
    output_container.clear()
    core_secondary_widgets.clear()
    directionality_widgets.clear()
    selected_indicators = list(indicator_selection.value)
    selected_countries = list(country_selection.value)
    # line widget
    for indicator in selected_indicators:
        indicator_name = column_definitions[indicator]
        if indicator not in core_secondary_widgets:
            core_secondary_widgets[indicator] = widgets.Dropdown(
                options=['core', 'secondary'],
                description=f'{indicator_name} :',
                value='core',
                layout=widgets.Layout(width='670px'),
                style={'description_width': 'initial'}
            )

        if indicator not in directionality_widgets:
            directionality_widgets[indicator] = widgets.Dropdown(
                options=['positive', 'negative'],
                description=f'{indicator_name} - Direction:',
                value='positive',
                layout=widgets.Layout(width='670px'),
                style={'description_width': 'initial'}
            )

        # Add both widgets (Core/Secondary and Directionality) to a VBox
        output_container.append(widgets.VBox([core_secondary_widgets[indicator], directionality_widgets[indicator]]))
    return display(widgets.HBox([country_selection, indicator_selection, widgets.VBox(output_container)]), button)

# 8. Attach an observer to capture changes in selection
country_selection.observe(on_change_country, names="value")
indicator_selection.observe(on_change_indicator, names="value")


display(widgets.HBox([country_selection, indicator_selection]))

HBox(children=(SelectMultiple(description='Countries', index=(0,), layout=Layout(height='100px', width='250px'…