<a href="https://colab.research.google.com/github/akvo/usaid-wssh-tool-3/blob/develop/scripts/data-processing-collab.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
#@title Install Dependencies
%%capture
!pip install ipyleaflet==0.17.2

In [2]:
# @title Prologue
# 1. Import Dependencies
import geopandas as gpd
import pandas as pd
import requests
import json
from io import StringIO
import os
from sklearn.preprocessing import MinMaxScaler
import ipywidgets as widgets
import matplotlib.pyplot as plt
import folium
from IPython.display import display, clear_output, HTML
from folium import Choropleth, FeatureGroup, LayerControl, Map, GeoJson
from folium.plugins import Draw, FloatImage
from branca.colormap import linear
from branca.element import Figure
import ipyleaflet.leaflet as Lf

# 2. Clone Repository
if os.path.exists("usaid-wssh-tool-3"):
  !rm -rf usaid-wssh-tool-3
!git clone https://github.com/akvo/usaid-wssh-tool-3.git

geojson_path = "usaid-wssh-tool-3/data/output"
file_list = [f for f in os.listdir(geojson_path) if os.path.isfile(os.path.join(geojson_path, f))]
file_list = list(filter(lambda x: x.endswith(".geojson"), file_list))
country_list = list(map(lambda x: x.replace(".geojson", ""), file_list))
country_list.sort()
country_list = [c.capitalize() for c in country_list]

# 3. Column Definitions
column_definitions = {
    'drr': 'Drought Risk',
    'rfr': 'Riverine Flood Risk',
    'bws' : 'Base Water Stress',
    'Open_defecation_estimates_mean': 'Open Defecation',
    'No_Improved_water_premise_estimates_mean': 'No Improve Water Premises',
    'No_basic_water_estimates_mean': 'No Basic Water',
    'no_basic_sanitation_estimates_mean': 'No Basic Sanitation'
}
column_list = list(column_definitions.keys())
weight_mapping = {'core': 0.7, 'secondary': 0.3}

# 4. override get data

def _get_data(self):
    """
    Get the data for the choropleth.
    """
    colormap = self.colormap if self.colormap else linear.YlGnBu_09
    data = self.geo_data
    if isinstance(data, dict):
        data = data.copy()
    else:
        # Make a copy if it is a GeoJSON string
        data = json.loads(json.dumps(data))  # Or data.copy() if supported
    for feature in data['features']:
        feature['properties']['style'] = self.style_callback(feature, colormap,
                                                             self.choro_data[feature['properties'][self.key_on.split('.')[1]]])
    return data

Lf.Choropleth._get_data = _get_data

scaler = MinMaxScaler()

Cloning into 'usaid-wssh-tool-3'...
remote: Enumerating objects: 132, done.[K
remote: Counting objects: 100% (132/132), done.[K
remote: Compressing objects: 100% (113/113), done.[K
remote: Total 132 (delta 50), reused 49 (delta 11), pack-reused 0 (from 0)[K
Receiving objects: 100% (132/132), 26.22 MiB | 15.87 MiB/s, done.
Resolving deltas: 100% (50/50), done.


In [3]:
# @title Precalculate Data

def pre_calculate_data():
    countries_data = {}
    for country in country_list:
        geojson = gpd.read_file(f"{geojson_path}/{country.lower()}.geojson")
        # Initialize the Index
        missing_columns = [col for col in column_list if col not in geojson.columns]
        for col in missing_columns:
            print(f"Adding missing column: {col}")
            geojson[col] = 0
        geojson['index'] = 0
        countries_data[country] = geojson
    return countries_data

countries_data = pre_calculate_data()

In [4]:
# @title Init Map Functions
map_area = widgets.Output()
map_area.clear_output(wait=True)

# Apply Directionality
def apply_directionality(data, selected_index):
    indicator_names = [indicator['indicator'] for indicator in selected_index]
    data = data.copy()
    data[indicator_names] = scaler.fit_transform(data[indicator_names])
    directed_data = {}
    for idx, indicator in enumerate(selected_index):
        indicator_name = indicator['indicator']
        if indicator['direction'] == 'positive':
            directed_data[indicator_name] = data[indicator_name]  # Positive direction, keep the value as is
        else:
            directed_data[indicator_name] = 1 - data[indicator_name]  # Negative direction, invert the value (1 - value)
    return directed_data

# Get center of the maps
def get_center_cordinates(geojson):
    center_x = (geojson.bounds['minx'].min() + geojson.bounds['maxx'].max()) / 2
    center_y = (geojson.bounds['miny'].min() + geojson.bounds['maxy'].max()) / 2
    return [center_y, center_x]

# Function to calculate the weighted index and show on the map
def calculate_index(countries, selected_index):
    merged_geojson = []
    top_five = []
    bottom_five = []
    for country in countries:
        geojson = countries_data[country].copy()
        directed_data = apply_directionality(geojson, selected_index)
        for i, indicator in enumerate(selected_index):
            weight = weight_mapping[indicator['weight']]
            geojson['index'] += directed_data[indicator['indicator']] * weight
            # changes in ranking from each additional indicator
            changes = i + 1
            geojson[f'index_{changes}'] = geojson['index'] / changes
        if geojson['index'].sum() > 0:
            geojson['index'] = geojson['index'] / len(selected_index)
        geojson['quartile'] = pd.qcut(geojson['index'], 4, labels=False, duplicates='drop') + 1
        merged_geojson.append(geojson)
        # rank each additional indicators
        for i, indicator in enumerate(selected_index):
            changes = i + 1
            geojson[f'Rank_{changes}'] = geojson[f'index_{changes}'].rank(ascending=False).astype(int)
        top_five.append(geojson.nlargest(5, 'index'))
        bottom_five.append(geojson.nsmallest(5, 'index'))

    geojson = gpd.GeoDataFrame(pd.concat(merged_geojson, ignore_index=True))

    center_cordinate = get_center_cordinates(geojson)

    # height 800px
    fig = Figure(width='100%', height=600)
    # Map Creation
    m = Map(
        location=center_cordinate,
        zoom_start=7,
        scrollWheelZoom=False,
        tiles='cartodbpositron'
    )

    fig.add_child(m)
    for d in ["index","quartile"]:
          feature_layer = Choropleth(
              geo_data=geojson,
              name=d.capitalize(),
              data=geojson,
              columns=['ADM2_EN', d],
              key_on='feature.properties.ADM2_EN',
              fill_color='YlGnBu',
              fill_opacity=0.7,
              line_opacity=0.2,
              legend_name=d.capitalize(),
              legend_kwds={'position': 'bottomleft'},
              show=d == "index",
          )
          feature_layer.add_to(m)
          if d == "quartile":
             for child in feature_layer._children:
               if child.startswith("color_map"):
                  del feature_layer._children[child]

    for indicator in column_list:
        feature_layer = Choropleth(
            geo_data=geojson,
            name=column_definitions[indicator],
            data=geojson,
            columns=['ADM2_EN', indicator],
            key_on='feature.properties.ADM2_EN',
            fill_color='YlGnBu',
            fill_opacity=0.7,
            line_opacity=0.2,
            legend_name=column_definitions[indicator],
            show=False
        )
        feature_layer.add_to(m)
        for child in feature_layer._children:
          if child.startswith("color_map"):
             del feature_layer._children[child]

    GeoJson(
        geojson.to_json(),
        name="Tooltip",
        style_function=lambda x: {'fillColor': '#ffffff00', 'color': '#00000000', 'weight': 0},
        tooltip=folium.GeoJsonTooltip(
            fields=["ADM2_EN", "index"] + column_list,
            aliases=["Region: ", "Index Score: "] + [column_definitions[indicator] for indicator in column_list] ,
            localize=True
        )
    ).add_to(m)

    LayerControl(
        collapsed=False,
        position='bottomright'
    ).add_to(m)

    # Create draw control
    draw = Draw(
        export=True,
        filename='my_data.geojson',
        position='topleft',
        draw_options={
          'polyline': {'allowIntersection': False},
          'polygon': {'allowIntersection': False},
          'circle': False,  # Turns off this drawing tool
          'rectangle': {'repeatMode': False, 'allowIntersection': False},
          'marker': False,  # Turns off this drawing tool
          'circlemarker': False  # Turns off this drawing tool
        },
        edit_options={'edit': False}
    )

    draw.add_to(m)

    additional_index = [f"Rank_{i + 1}" for i in range(len(selected_index))]

    # Add Top 5 Country as HTML Table
    top_five = pd.concat(top_five)
    top_five = top_five.sort_values(by='index', ascending=False)
    top_five = top_five.reset_index(drop=True)
    top_five = top_five[['ADM1_EN','ADM2_EN', 'index'] + additional_index]
    top_five.columns = ['State', 'District', 'Index'] + additional_index
    top_five_html = top_five.to_html(index=False, escape=False)

    # Add Bottom 5 Country as HTML Table
    bottom_five = pd.concat(bottom_five)
    bottom_five = bottom_five.sort_values(by='index', ascending=True)
    bottom_five = bottom_five.reset_index(drop=True)
    bottom_five = bottom_five[['ADM1_EN','ADM2_EN', 'index'] + additional_index]
    bottom_five.columns = ['State', 'District', 'Index'] + additional_index
    # sort by last additional rank
    bottom_five = bottom_five.sort_values(by=additional_index[-1], ascending=False)
    bottom_five_html = bottom_five.to_html(index=False, escape=False)

    # top 5 and bottom 5 side by side, both 50 % width
    tables = widgets.HTML("""
        <div style='float:left;width:50%;'><b>Top 5 Districts</b></div><div style='float:left;width:50%;'><b>Bottom 5 Districts</b></div>
        <hr>
        <div style='clear:both;'></div>
        <div style='float:left;width:48%;border-right:1px solid black;'>
            {}
        </div><div style='float:left;width:50%;'>
            {}
        </div>
        <div style='clear:both;'></div>
        <hr>
        """.format(
            top_five_html.replace("<table","<table style='width:100%';"),
            bottom_five_html.replace("<table","<table style='width:100%'")
        )
    )
    return fig, tables

In [5]:
# @title Dropdown Cofig
indicator_config = {
    "Climate-resilient WASH": [{
        "indicator": "rfr",
        "label":"Riverine Flood Risk",
        "weight": "core",
        "direction": "positive"
    },{
        "indicator": "No_Improved_water_premise_estimates_mean",
        "label":"No Improve Water Premises",
        "weight": "core",
        "direction": "positive"
    },{
        "indicator": "Open_defecation_estimates_mean",
        "label":"Open Defecation",
        "weight": "core",
        "direction": "positive"
    },{
        "indicator":"drr",
        "label":"Drought Risk",
        "weight": "secondary",
        "direction": "positive"
    }]
}

In [6]:
# @title Select Countries & Indicators
# 1. Init Container
output_area = widgets.Output()  # For buttons and widgets

# 2. Country selection widget
country_selection = widgets.Dropdown(
    options=[c.capitalize() for c in country_list],
    description='Country',
    value=country_list[0],
    disabled=False,
    layout=widgets.Layout(width='250px')  # Set custom width and height
)

# 3. Indicator Selection
indicator_selection = widgets.Dropdown(
    options=list(indicator_config),
    description='Indicator',
    value=list(indicator_config)[0],
    disabled=False,
    layout=widgets.Layout(width='300px')  # Set custom width and height
)

# 5. Function to handle the button click event for calculating the index
def on_calculate_button_click(event):
    map_area.clear_output(wait=True)

    loading_widget = widgets.HTML("Loading...")
    display(loading_widget)

    selected_indicators = indicator_config[indicator_selection.value]
    selected_countries = country_selection.value
    if not isinstance(selected_countries, list):
        selected_countries = [selected_countries]

    # Collect core/secondary and directionality choices
    fig, tables = calculate_index(selected_countries, selected_indicators)
    with map_area:
        display(widgets.HTML("<hr>"), fig, tables)
    loading_widget.close()

button = widgets.Button(description="Calculate")
button.on_click(on_calculate_button_click)


display(widgets.HBox([country_selection, indicator_selection, button]))
display(map_area)
# disable button
# button.disabled = True

HBox(children=(Dropdown(description='Country', layout=Layout(width='250px'), options=('Madagascar', 'Rwanda', …

Output()