<a href="https://colab.research.google.com/github/acoiman/pdt/blob/main/asthma_mortality/notebooks/colab/06_Asthma_Mortality_PD.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# 👨‍👩‍👧‍👦 Population Density (PD)

In this notebook we will  calculate the **population density (PD)** and the **multiplication of PD and PM2.5** for each department in the study area from 2001 to 2022 based on our  annual population estimates, the area of each department,  and the annual concentration of PM2.5.

##📦 Import Required Libraries

In [None]:
# geospatial data handling
import geopandas as gpd

# other libraries
from itables import init_notebook_mode

##🗂 Load Dataset

In [None]:
# change to my computer home directory
%cd work

In [None]:
# Load dataset with data per department
# gdf = gpd.read_file("pdt/asthma_mortality/data/gpkg/tma_pm25_ba_2001_2022.gpkg")

In [None]:
# Load dataset with data per department with BA calculated from 500 m MODIS data with 500 m spatial resolution
gdf = gpd.read_file("pdt/asthma_mortality/data/gpkg/tma_pm25_ba_2001_2022_2.gpkg")

In [None]:
init_notebook_mode(all_interactive=True)
gdf.head()

## 👪 Calculate Population Density

In [None]:
def add_population_density(gdf):
    """
    Adds population density columns to a GeoDataFrame.

    Parameters:
    - gdf: GeoDataFrame in EPSG:4326 with population columns like 'A_2001', 'A_2002', ...

    Returns:
    - GeoDataFrame with new columns 'PD_2001', 'PD_2002', ... (people per km²)
    """
    # Project to EPSG:6933 (Equal Area projection)
    gdf_proj = gdf.to_crs(epsg=6933)

    # Calculate area in square kilometers
    gdf_proj["area_km2"] = gdf_proj.geometry.area / 1e6

    # Identify population columns
    pop_cols = [col for col in gdf.columns if col.startswith("A_") and col[2:].isdigit()]

    # Add population density columns
    for col in pop_cols:
        year = col.split("_")[1]
        gdf_proj[f"PD_{year}"] = round(gdf_proj[col] / gdf_proj["area_km2"], 2)

    # Drop intermediate area column if not needed
    gdf_proj = gdf_proj.drop(columns=["area_km2"])

    # Reproject back to EPSG:4326
    gdf_final = gdf_proj.to_crs(epsg=4326)

    return gdf_final

### Apply Population Density Function


In [None]:
# Add population density
gdf_with_density = add_population_density(gdf)

### Preview Resulting GeoDataFrame and save results

In [None]:
# Display the first few rows of the DataFrame
init_notebook_mode(all_interactive=True)
gdf_with_density.head()

In [None]:
# Save the GeoDataFrame to a GeopackeoPackage file
# gdf_with_density.to_file("pdt/asthma_mortality/data/gpkg/tma_pm25_ba_pd_2001_2022.gpkg", driver="GPKG")

In [None]:
# Save the GeoDataFrame to a GeopackeoPackage file
gdf_with_density.to_file("pdt/asthma_mortality/data/gpkg/tma_pm25_ba_pd_2001_2022_2.gpkg", driver="GPKG")

## 👪 🏭 Calculate PD * PM25

In [None]:
# Load dataset with data per department
gdf = gpd.read_file("pdt/asthma_mortality/data/gpkg/tma_pm25_ba_pd_2001_2022_2.gpkg")

In [None]:
init_notebook_mode(all_interactive=True)
gdf.head()

In [None]:
def add_pdpm25(gdf):
    """
    Adds PDPM25_year = PD_year * PM25_year for each year where both columns exist.

    Parameters:
    - gdf: GeoDataFrame with columns like 'PD_2001', 'PM25_2001', ...

    Returns:
    - GeoDataFrame with new columns: 'PDPM25_YYYY'
    """
    # Work on a copy to avoid modifying the original dataframe
    gdf_copy = gdf.copy()

    # Identify PD columns
    pd_cols = [col for col in gdf_copy.columns if col.startswith("PD_") and col[4:].isdigit()]

    for pd_col in pd_cols:
        year = pd_col.split("_")[1]
        pm25_col = f"PM25_{year}"

        if pm25_col in gdf_copy.columns:
            gdf_copy[f"PDPM25_{year}"] = gdf_copy[pd_col] * gdf_copy[pm25_col]

    return gdf_copy


### Apply Population Density Function


In [None]:
# Add PD * PM25 (PDPM25)
gdf_with_pdpm25 = add_pdpm25(gdf)

### Preview Resulting GeoDataFrame and save results

In [None]:
# Display the first few rows of the DataFrame
init_notebook_mode(all_interactive=True)
gdf_with_pdpm25.head()

In [None]:
# Save the GeoDataFrame to a GeopackeoPackage file
# gdf_with_pdpm25.to_file("pdt/asthma_mortality/data/gpkg/tma_pm25_ba_pd_pdpm25_2001_2022.gpkg", driver="GPKG")

In [None]:
# Save the GeoDataFrame to a GeopackeoPackage file
gdf_with_pdpm25.to_file("pdt/asthma_mortality/data/gpkg/tma_pm25_ba_pd_pdpm25_2001_2022_2.gpkg", driver="GPKG")