# Settlement Extent data

* Load and explore GRID3 settlement extent data
* Ensure admin. info (county names) match or build mapping
* Trim to select primary/secondary counties (start with sample)
* Visualize
* Re-export trimmed(, cleaned up if necessary) version

In [1]:
import pandas as pd
import geopandas as gpd
import plotly.express as px

In [2]:
from config import primary_counties, secondary_counties

In [3]:
primary_counties

['Nakuru', 'Mombasa', 'Kiambu', 'Uasin Gishu', 'Nyeri']

In [4]:
secondary_counties

['Kirinyaga',
 'Embu',
 'Nyandarua',
 'Laikipia',
 'Meru',
 "Murang'a",
 'Nandi',
 'Elgeyo-Marakwet',
 'Kilifi',
 'Kwale',
 'Taita Taveta']

In [None]:
# A short-list of select counties for testing and / or baseline investigation
sel_counties = ["Nandi"]

# Otherwise, simply select all primary + secondary
# sel_counties = primary_counties + secondary_counties

sel_counties

## Load and Filter SE data

In [None]:
se_file = "./data/GRID3_Kenya_Settlement_Extents_Version_1.1/GRID3_Kenya_Settlement_Extents_Version_1.1.gdb/"

In [None]:
se_df = gpd.read_file(se_file)
se_df.shape

In [None]:
# Check county name mapping
assert len(set(primary_counties) - set(se_df["adm1_name"].unique())) == 0, "Missing primary counties"
assert len(set(secondary_counties) - set(se_df["adm1_name"].unique())) == 0, "Missing secondary counties"

In [None]:
# Sub-select to counties (and include all "crosses boundary" items, for accuracy)
sel_df = se_df[se_df["adm1_name"].isin(sel_counties + ["crosses boundary"])].copy()
print("Drop counties", sel_df.shape)

# Drop areas with UN-adjusted population > 200,000
sel_df = sel_df[sel_df["pop_un_adj"] < 200000].copy()
print("Drop UN Adj Pop > 200000", sel_df.shape)

In [None]:
sel_df.head(1)

In [None]:
sel_df["type"].value_counts()

In [None]:
# Overall population hist
f = px.histogram(se_df, "pop_un_adj", facet_col="type")
f.update_yaxes(matches=None, showticklabels=True).update_xaxes(matches=None)

In [None]:
# Selected counties population hist
f = px.histogram(sel_df, "pop_un_adj", facet_col="type")
f.update_yaxes(matches=None, showticklabels=True).update_xaxes(matches=None)

In [None]:
# Selected counties comparison - population estimate (not UN adjusted)
f = px.histogram(sel_df, "population", facet_col="type", color_discrete_sequence=["salmon"])
f.update_yaxes(matches=None, showticklabels=True).update_xaxes(matches=None)

In [None]:
# See how many in which county, incl cross boundary
sel_df["adm1_name"].value_counts()

## Plot SEs

In [None]:
# Plot only  500 < un adj. pop < 100000, non border crossing
# plt_df = sel_df[(sel_df["pop_un_adj"].between(500, 100000)) & (sel_df["adm1_name"] != "crosses boundary")]
plt_df = sel_df[(sel_df["pop_un_adj"].between(500, 100000))]
plt_df.shape

In [None]:
plt_df.explore()

In [None]:
plt_df["type"].value_counts()

In [None]:
plt_df["adm1_name"].value_counts()

# Explore later versions
- Population inclusion
- Better shapes than v.1.1 (See Nandi / Western Kenya)

## Load ADM data for intersection filtering

In [None]:
adm1_gadm = "./data/gadm41_KEN_1.json"
adm1_df = gpd.read_file(adm1_gadm).to_crs(crs="EPSG:21037")
adm1_df.shape

In [None]:
# Carve out version of ADM1 boundaries for only select counties
sel_adm1_df = adm1_df[adm1_df["NAME_1"].isin(sel_counties + [n.replace(" ", "") for n in sel_counties])].copy()
assert sel_adm1_df.shape[0] == len(sel_counties)
sel_adm1_df.shape

In [None]:
sel_adm1_df

## Load GRID3 SE v2.0

In [None]:
# se_file = "./data/GRID3_Kenya_Settlement_Extents_Version_1.1/GRID3_Kenya_Settlement_Extents_Version_1.1.gdb/"
se_file = "./data/GRID3_Kenya_Settlement_Extents_Version_2.0/GRID3_Kenya_Settlement_Extents_Version_02.gdb/"

In [None]:
se_df = gpd.read_file(se_file).to_crs(crs="EPSG:21037")
se_df.shape

In [None]:
# Drop any shapes that do NOT intercept with selected counties
sel_se_df = se_df[se_df.apply(lambda r: r["geometry"].intersects(sel_adm1_df.geometry).sum() > 0, axis=1)].copy()
print("Filter out non-intersecting SEs", sel_se_df.shape)

In [None]:
sel_se_df.head(1)

In [None]:
px.histogram(sel_se_df["bld_count"])

In [None]:
sel_se_df["dou_level1"].value_counts()

In [None]:
sel_se_df["dou_level2"].value_counts()

In [None]:
m = sel_adm1_df.explore(style_kwds=dict(color="red", weight=2, opacity=0.75, fill=True, fillOpacity=0.05),
                        tooltip=False, popup=False, highlight=False)
m = sel_se_df.explore("dou_level2", cmap="tab20", legend=True, m=m)

In [None]:
m

## Load GRID3 SE v3.0

In [None]:
se_file = "./data/GRID3_Kenya_Settlement_Extents_Version_3.0/GRID3_KEN_settlement_extents_v3_0.gpkg"

In [None]:
se_df = gpd.read_file(se_file).to_crs(crs="EPSG:21037")
se_df.shape

In [None]:
se_df.head(1)

In [None]:
# Drop any shapes that do NOT intercept with selected counties
sel_se_df = se_df[se_df.apply(lambda r: r["geometry"].intersects(sel_adm1_df.geometry).sum() > 0, axis=1)].copy()
print("Filter out non-intersecting SEs", sel_se_df.shape)

In [None]:
sel_se_df["type"].value_counts()

In [None]:
m = sel_adm1_df.explore(style_kwds=dict(color="red", weight=2, opacity=0.75, fill=True, fillOpacity=0.05),
                        tooltip=False, popup=False, highlight=False)
#m = sel_se_df[sel_se_df["type"]!="Built-up Area"].explore("type", cmap="tab20", legend=True, m=m)
m = sel_se_df.explore("type", cmap="tab20", legend=True, m=m)