<a href="https://colab.research.google.com/github/acoiman/pdt/blob/main/asthma_mortality/notebooks/colab/10_Asthma_Mortality_DEM.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# ⛰️ Elevation Data

In this notebook, we will calculate elevation data (in meters) for each department using the [ NASADEM: NASA 30m Digital Elevation Model](https://developers.google.com/earth-engine/datasets/catalog/NASA_NASADEM_HGT_001#citations. We will begin by calculating and mapping elevation in Córdoba province as an example, and then extend the analysis to all departments in Argentina. This variable will later be included as a confounding factor in the modeling process

##📦 Import Required Libraries

In [None]:
# data.frame handling
import pandas as pd

# geospatial data handling
import geopandas as gpd
import geemap
import ee
import folium

# other libraries
import branca.colormap as cm
import os
from itables import init_notebook_mode

In [None]:
# Set the PROJ_LIB path
os.environ['PROJ_LIB'] = "/opt/conda/envs/gds/share/proj"

In [None]:
# change to my computer home directory
%cd work

## 🌍 Connect to Google Earth Engine (GEE)

In [None]:
# trigger the authentication flow
ee.Authenticate()

In [None]:
# initialize the library.
ee.Initialize(project='ee-pdt')
print(ee.String('Hello from the Earth Engine servers!').getInfo())

## Computing Elevation Data for Cordoba Province

In [None]:
# Load all departments and select Córdoba province departments (IDPROV = 14)
all_dptos = ee.FeatureCollection("projects/ee-pdt/assets/argentina/ar_prov_dpto")
cordoba_dptos = all_dptos.filter(ee.Filter.eq('IDPROV', "14"))

In [None]:
# Load elevation dataset (NASADEM)
nasadem = ee.Image("NASA/NASADEM_HGT/001").select("elevation")

In [None]:
# Clip elevation to Cordoba extent
cor_extent = cordoba_dptos.geometry()
nasadem_cor = nasadem.clip(cor_extent)

In [None]:
# Compute mean elevation per department in Cordoba
mean_elev_cor = nasadem_cor.reduceRegions(
    collection=cordoba_dptos,
    reducer=ee.Reducer.mean(),
    scale=30  # NASADEM native resolution
)

In [None]:
# change mean by ELEV_2001
mean_elev_cor = mean_elev_cor.map(
    lambda f: f.set(
        "ELEV_2001", f.get("mean")
    ).select(["IDDPTO", "ELEV_2001", "geometry"])
)

In [None]:
# Convert feature collection into GeoDataFrame and DataFrame and extract desired columns
gdf_elev_cor = geemap.ee_to_gdf(mean_elev_cor).round(2)
df_elev_cor  = gdf_elev_cor[['IDDPTO', 'ELEV_2001']]

In [None]:
# visualize data.frame
init_notebook_mode(all_interactive=True)
df_elev_cor

####  Mapping the Elevation (m) in Cordoba Province

In [None]:
# Define color bins and values
bins = [106, 190, 451, 593, 1087]
colors = [
    "#1a9850",
    "#fee08b",
    "#fc8d59",
    "#d73027",
    "#f7f7f7"
]
# Create a step colormap for legend
colormap = cm.StepColormap(colors=colors, vmin=bins[0], vmax=bins[-1], index=bins,
                           caption='Elevation (m)')

# Function to assign color to each feature based on NBUT_2022 value
def get_color(elev):
    if elev is None:
        return 'gray'
    for i in range(len(bins) - 1):
        if bins[i] <= elev < bins[i + 1]:
            return colors[i]
    return colors[-1]

# Initialize map
m = folium.Map(location=[-31.3, -64.2], zoom_start=7, control_scale=True)

# Add the styled NBUT GeoJson layer
folium.GeoJson(
    gdf_elev_cor,
    name='Elevation (Custom Choropleth)',
    style_function=lambda feature: {
        'fillColor': get_color(feature['properties']['ELEV_2001']),
        'color': 'black',
        'weight': 0.3,
        'fillOpacity': 0.6,
    },
    tooltip=folium.GeoJsonTooltip(
        fields=['IDDPTO', 'ELEV_2001'],
        aliases=['Department ID:', 'Elevation (/m):'],
        localize=True
    )
).add_to(m)

# Add the colormap legend
colormap.add_to(m)

# Add layer control
folium.LayerControl().add_to(m)

In [None]:
# display the map
m

#### Computing Elevation for all provinces

In [None]:
# Load the shapefile to get province id list
shapefile_path = 'pdt/asthma_mortality/data/shp/ar_prov_dpto.shx'
gdf = gpd.read_file(shapefile_path)

# Get unique values of IDPROV
province_ids = gdf['IDPROV'].unique().tolist()

print(province_ids)

In [None]:
# Load all province-department features
all_dptos = ee.FeatureCollection("projects/ee-pdt/assets/argentina/ar_prov_dpto")

In [None]:
# Load elevation dataset (NASADEM)
nasadem = ee.Image("NASA/NASADEM_HGT/001").select("elevation")

In [None]:
# List to store DataFrames from each province
df_list = []

In [None]:
# Loop through each province ID
for prov_id in province_ids:
    print("Processing province: ", prov_id)

    # Filter departments by province
    prov_dptos = all_dptos.filter(ee.Filter.eq('IDPROV', prov_id))

    # Clip elevation to province extent
    prov_extent = prov_dptos.geometry()
    nasadem_prov = nasadem.clip(prov_extent)

    # Compute mean elevation per department in the province
    mean_elev_prov = nasadem_prov.reduceRegions(
        collection=prov_dptos,
        reducer=ee.Reducer.mean(),
        scale=30  # NASADEM native resolution
    )

    # change mean by ELEV_2001
    mean_elev_prov = mean_elev_prov.map(
        lambda f: f.set(
        "ELEV_2001", f.get("mean")
        ).select(["IDDPTO", "ELEV_2001"])
    )

    # Convert to GeoDataFrame and filter only required columns
    gdf = geemap.ee_to_gdf(mean_elev_prov).round(2)
    df = gdf[['IDDPTO', 'ELEV_2001']]

    # Append to list
    df_list.append(df)

In [None]:
# Merge all provincial data into one DataFrame
df_elev_all = pd.concat(df_list, ignore_index=True)

In [None]:
# visualize data.frame
init_notebook_mode(all_interactive=True)
df_elev_all

In [None]:
# get basic info of the data.frame
df_elev_all.info()

In [None]:
# test if df_elev_all["ELEV_2001"] for cordoba province is the same as df_elev_cor["ELEV_2001"]

# Filter  df_elev_all for Cordoba province (IDPROV = '14')
df_elev_all_cordoba =  df_elev_all[ df_elev_all['IDDPTO'].str.startswith('14')]

# Sort both dataframes by 'IDDPTO' to ensure comparison alignment
df_elev_all_cordoba = df_elev_all_cordoba.sort_values(by='IDDPTO').reset_index(drop=True)
df_elev_cor_sorted = df_elev_cor.sort_values(by='IDDPTO').reset_index(drop=True)

# Test if the 'NBUT_2022' columns are the same
are_equal = (df_elev_all_cordoba['ELEV_2001'] == df_elev_cor_sorted['ELEV_2001']).all()

print(f"Are the 'ELEV_2001' values for Cordoba province the same?: {are_equal}")

#### Merge computed elevation data with other features

In [None]:
# load geopackage with PM2.5, Burned areas, LULC,  and other datasets
gdf = gpd.read_file("pdt/asthma_mortality/data/gpkg/tma_pm25_ba_pd_pdpm25_agrt_nwvt_nbut_2001_2022.gpkg", driver="GPKG")

In [None]:
# Perform a left merge, preserving all rows from gdf
gdf_elev_2001 = gdf.merge(df_elev_all, on='IDDPTO', how='left')

In [None]:
# visualize gdf
init_notebook_mode(all_interactive=True)
gdf_elev_2001.head()

In [None]:
# check dataframe shape
gdf_elev_2001.shape

In [None]:
# Save dataset with ELEV_2001 as other features as a gpkg file
gdf_elev_2001.to_file("pdt/asthma_mortality/data/gpkg/tma_pm25_ba_pd_pdpm25_agrt_nwvt_nbut_elev_2001_2022.gpkg", driver="GPKG")

To facilitate subsequent analysis, we will shorten the name of the spatial data file; from this point onward, it will simply be referred to as **data**.

In [None]:
# Save dataset with ELEV_2001 as other features as a gpkg file called data
gdf_elev_2001.to_file("pdt/asthma_mortality/data/gpkg/data.gpkg", driver="GPKG")