# Populated Places data

* Load (both sources: UNOCHA and HOTOSM) PP data
* Ensure admin. data matches or develop mapping
* Trim to select primary/secondary counties (start with sample two)
* Visualize (pre- and post-trim)
* Re-export trimmed, cleaned version

In [None]:
import pandas as pd
import geopandas as gpd

In [None]:
from config import primary_counties, secondary_counties

In [None]:
primary_counties

## UNOCHA COD-PP

In [None]:
codpp_file = "./data/KEN_Populated places_2002_DEPHA"

In [None]:
codpp_cols = ['NEWDLAT', 'NEWDLONG', "FULL_NAME", "ADM1", 'DISTRICT', 'REGION', 'LOCATION',
              'SUB_LOCATI', 'geometry']

codpp_county_map = {
    "E. Marakwet": "Elgeyo-Marakwet",
    "Muranga": "Murang'a",
    "Taita Tavet": "Taita Taveta",
}

In [None]:
codpp_df = gpd.read_file(codpp_file)
codpp_df = codpp_df[codpp_cols].copy()
codpp_df["DISTRICT"] = codpp_df["DISTRICT"].replace(codpp_county_map)
assert len(set(primary_counties) - set(codpp_df["DISTRICT"].unique())) == 0, "Primary counties missing"
assert len(set(secondary_counties) - set(codpp_df["DISTRICT"].unique())) == 0, "Secondary counties missing"
codpp_df.shape

In [None]:
sel_codpp_df = codpp_df[codpp_df["DISTRICT"].isin(primary_counties + secondary_counties)].copy()
sel_codpp_df.shape

In [None]:
codpp_df.head(2)

In [None]:
m = codpp_df.explore(color="dodgerblue")
m

## HOTOSM PPs

> NB: Does not contain ADM level information, ie no counties. Must derive county membership via spatial inclusion with county shapes.

In [None]:
hotosm_file = "./data/hotosm_ken_populated_places_points_shp.zip"

In [None]:
hotosm_df = gpd.read_file(hotosm_file)
hotosm_df = hotosm_df[~hotosm_df["place"].isin(["isolated_dwelling"])]
hotosm_df.shape

In [None]:
hotosm_df.head(5)

In [None]:
hotosm_df["place"].value_counts()

In [None]:
hotosm_df.explore(m=m, color="salmon")