## Settlement Extent data

* Load and explore GRID3 settlement extent data
* Ensure admin. info (county names) match or build mapping
* Trim to select primary/secondary counties (start with sample)
* Visualize
* Re-export trimmed(, cleaned up if necessary) version

In [None]:
import pandas as pd
import geopandas as gpd
import plotly.express as px

In [None]:
from config import primary_counties, secondary_counties

In [None]:
primary_counties

In [None]:
secondary_counties

In [None]:
# A short-list of select counties for testing and / or baseline investigation
sel_counties = ["Kilifi", "Nyeri"]

# Otherwise, simply select all primary + secondary
# sel_counties = primary_counties + secondary_counties

sel_counties

## Load and Filter SE data

In [None]:
se_file = "./data/GRID3_Kenya_Settlement_Extents_Version_1.1/GRID3_Kenya_Settlement_Extents_Version_1.1.gdb/"

In [None]:
se_df = gpd.read_file(se_file)
se_df.shape

In [None]:
# Check county name mapping
assert len(set(primary_counties) - set(se_df["adm1_name"].unique())) == 0, "Missing primary counties"
assert len(set(secondary_counties) - set(se_df["adm1_name"].unique())) == 0, "Missing secondary counties"

In [None]:
# Sub-select to counties (and include all "crosses boundary" items, for accuracy)
sel_df = se_df[se_df["adm1_name"].isin(sel_counties + ["crosses boundary"])].copy()
print("Drop counties", sel_df.shape)

# Drop areas with UN-adjusted population > 200,000
sel_df = sel_df[sel_df["pop_un_adj"] < 200000].copy()
print("Drop UN Adj Pop > 200000", sel_df.shape)

In [None]:
sel_df.head(1)

In [None]:
sel_df["type"].value_counts()

In [None]:
# Overall population hist
f = px.histogram(se_df, "pop_un_adj", facet_col="type")
f.update_yaxes(matches=None, showticklabels=True).update_xaxes(matches=None)

In [None]:
# Selected counties population hist
f = px.histogram(sel_df, "pop_un_adj", facet_col="type")
f.update_yaxes(matches=None, showticklabels=True).update_xaxes(matches=None)

In [None]:
# Selected counties comparison - population estimate (not UN adjusted)
f = px.histogram(sel_df, "population", facet_col="type", color_discrete_sequence=["salmon"])
f.update_yaxes(matches=None, showticklabels=True).update_xaxes(matches=None)

In [None]:
# See how many in which county, incl cross boundary
sel_df["adm1_name"].value_counts()

## Plot SEs

In [None]:
# Plot only  500 < un adj. pop < 100000, non border crossing
# plt_df = sel_df[(sel_df["pop_un_adj"].between(500, 100000)) & (sel_df["adm1_name"] != "crosses boundary")]
plt_df = sel_df[(sel_df["pop_un_adj"].between(500, 100000))]
plt_df.shape

In [None]:
plt_df.explore()

In [None]:
plt_df["type"].value_counts()

In [None]:
plt_df["adm1_name"].value_counts()