# Brief notebook to explore given WaterTRACS EA waterpoint testing data

@auth dpb

Data from Anna / Karen / WaterTRACS c. Oct 2024

In [1]:
import os
import pandas as pd
import geopandas as gpd

In [2]:
data_dir = "/Users/datascience/Aquaya Dropbox/Duncan Penfold-Brown/Project W II/Data/WaterTRACS Data Vis/"
data_file = os.path.join(data_dir, "Hilton_WP_data_Combined_2024-10-02.csv")

In [27]:
keep_cols = ["country_wp", "district_wp", "Clean_Date_wp", "ea_wp", "ea_code_wp", "wp_site_id_code1",
        "wptype", "institutional_wp", "wateravailable", "collectsample", 'survey_round_wp', 'wet_season_wp',
        'ph', 'conductivity', 'temperature', 'turbidity', 'ecoli_wp', 'ecoli_wp_cat', 'latitude_wp', 'longitude_wp']

col_rename = {"Clean_Date_wp": "sample_date", "wp_site_id_code1": "wp_site_id", "wptype": "wp_type"}

### Load and basic clean

In [79]:
df = pd.read_csv(data_file, encoding="ISO-8859-1", usecols=keep_cols)
df = df.sort_values(["country_wp", "district_wp", "ea_wp", "wp_site_id_code1", "survey_round_wp"])
df = df[keep_cols].copy()
df = df.rename(columns=col_rename)

# Drop all rows (samples) where the E Coli results are Null (ie, a test wasn't done) - we'll just do this for now
df = df.dropna(how="any", subset=["ecoli_wp", "ecoli_wp_cat"])
df.shape

(3980, 20)

In [80]:
df.head(2)

Unnamed: 0,country_wp,district_wp,sample_date,ea_wp,ea_code_wp,wp_site_id,wp_type,institutional_wp,wateravailable,collectsample,survey_round_wp,wet_season_wp,ph,conductivity,temperature,turbidity,ecoli_wp,ecoli_wp_cat,latitude_wp,longitude_wp
1923,Ethiopia,Dera,7/28/2022,Agar,ea_03020988802001,430484593,12,0,1,1,0,wet,6.79,102.0,18.1,102.0,101.0,3.0,11.641217,37.673716
3201,Ethiopia,Dera,1/24/2023,Agar,ea_03020988802001,430484593,10,0,1,1,1,dry,7.2,142.0,13.1,2.07,101.0,3.0,11.64127,37.673826


### Transform to GeoDataFrame (load geom from lat/lon)

In [81]:
gdf = gpd.GeoDataFrame(df, geometry=gpd.GeoSeries.from_xy(df["longitude_wp"], df["latitude_wp"]), crs=4326)
gdf.shape

(3980, 21)

### Filter to select EA

In [83]:
select_ea_code = "204"

In [84]:
ea_gdf = gdf[gdf["ea_code_wp"]==select_ea_code].copy()
ea_gdf.shape

(27, 21)

## Explore on Map

- Available tiles: `"OpenStreetMap", "CartoDB positron", “CartoDB dark_matter"`
- Marker types (for now): ‘marker’, ‘circle’, ‘circle_marker’


**Documentation for further map customization** (see icons, colors, marker styling, etc): https://geopandas.org/en/stable/docs/reference/api/geopandas.GeoDataFrame.explore.html

In [85]:
tooltip_cols = ["country_wp", "district_wp", "sample_date", "ea_wp", "ea_code_wp", "wp_site_id",
        "wp_type", "institutional_wp", "wateravailable", "collectsample", 'survey_round_wp', 'wet_season_wp',
        'ph', 'conductivity', 'temperature', 'turbidity', 'ecoli_wp', 'ecoli_wp_cat']

popup_cols = ["country_wp", "district_wp", "sample_date", "ea_wp", "wp_site_id",
        "wp_type", 'wet_season_wp', 'ecoli_wp', 'ecoli_wp_cat']

In [95]:
fig = ea_gdf.explore(column="ecoli_wp", cmap="coolwarm", tiles="OpenStreetMap",
                     tooltip=tooltip_cols, popup=popup_cols,
                     marker_type="circle_marker", marker_kwds={"radius": 10, "fill": True})
fig

In [97]:
fig.save(outfile="sample_map.html")