### Calculate total population for each age group

Using the population (density) dataset and the ratio of the population for each age group to the total population for each country, we can calulate the population for each age group for each grid cell

In [52]:
import geopandas as gpd
import xarray as xr
from cartopy import crs as ccrs
import seaborn as sns; sns.set_theme()
import os
import fiona
import country_converter as coco
import dask
import dask.array as da
import netCDF4 as nc
import regionmask
from matplotlib import cm
import numpy as np
from matplotlib import pyplot as plt
import country_converter as coco
import pyogrio
#pyogrio.set_gdal_config_options({"SHAPE_RESTORE_SHX": "YES"})
import pandas as pd
from cartopy.util import add_cyclic_point
import nc_time_axis
import glob
import cdo
import pandas as pd
import cartopy.feature as cfeature
import plotly.express as px


In [53]:
import warnings
warnings.filterwarnings('ignore')

In [54]:
# read in mortality data that has been verified

mortality = pd.read_csv(f'./Mortality_data/IFs/mortality_all_new.csv')
#mortality per thousand
demographic = xr.open_mfdataset(f'./Mortality_data/demographic/countries/pop_by_age.nc')
demo_frac = xr.open_mfdataset(f'./Mortality_data/demographic/countries/pop_by_age_frac.nc')

#used to make a mask
population = xr.open_mfdataset(f'./Mortality_data/population/SSP2/Total/NetCDF/*.nc', parallel=True, combine='nested')


In [55]:
# Gridded Population of the World, Version 4 (GPWv4): National Identifier Grid, Revision 11
# can be downloaded from: https://www.earthdata.nasa.gov/data/catalog/sedac-ciesin-sedac-gpwv4-natiden-r11-4.11#:~:text=Description,use%20in%20aggregating%20population%20data.
# cited as: Center For International Earth Science Information Network-CIESIN-Columbia University. (2018). Gridded Population of the World, Version 4 (GPWv4): National Identifier Grid, Revision 11 (Version 4.11) [Data set]. Palisades, NY: NASA Socioeconomic Data and Applications Center (SEDAC). https://doi.org/10.7927/H4TD9VDP
shapefile_path = "./Mortality_data/country_borders/gpw_v4_national_identifier_grid_rev11_15_min.shp"

# Read the shapefile
gdf = gpd.read_file(shapefile_path)
gdf_list = pd.read_csv(f'./Mortality_data/country_borders/gpw_new.csv')

In [56]:
#get country mask
lon = population['lon']
lat = population['lat']
print(lat.shape)
mask = regionmask.mask_3D_geopandas(gdf,lon,lat)

(1117,)


In [62]:
age_mapping = {
    0: '25to30',
    1: '30to35',
    2: '35to40',
    3: '40to45',
    4: '45to50',
    5: '50to55',
    6: '55to60',
    7: '60to65',
    8: '65to70',
    9: '70to75',
    10: '75to80',
    11: '80to85'
}

age_theta = {
    '25to29': 0.1585,
    '30to34': 0.1577,
    '35to39': 0.1570,
    '40to44': 0.1558,
    '45to49': 0.1532,
    '50to54': 0.1499,
    '55to59': 0.1462,
    '60to64': 0.1421,
    '65to69': 0.1374,
    '70to74': 0.1319,
    '75to79': 0.1253,
    '80to84': 0.1141
}

region_to_iso = dict(enumerate(gdf_list['ISOCODE']))
# Replace the integer age coordinates in `combined` with the corresponding strings
#combined['age'] = [age_mapping[age] for age in combined.age.values]

### Multiply the original population grid with the ratio of the population for each age group to the total population for each country

In [63]:
#iso_demographic = np.zeros((12, C.lat.shape[0],C.lon.shape[0]))
iso_demographic = np.zeros((12))#, C.lat.shape[0],C.lon.shape[0]))

demo_grid = {}

for region, iso in region_to_iso.items():
    if iso in demographic['region']:
        for i in range(12):
            #selected_data = demographic[list(age_theta)[i]].sel(region = f'{iso}').values
            iso_demographic[i] = demo_frac[list(age_theta)[i]].sel(region = f'{iso}').values
        new = iso_demographic[:, np.newaxis, np.newaxis]
        new = np.broadcast_to(new, (12, 1117, 2880))
        demo_grid[region] = (('age', 'lat', 'lon'), population['ssp2_2020'].expand_dims(dim='age')*new*mask[region].to_numpy()[np.newaxis])
    else:
        demo_grid[region] = (('age', 'lat', 'lon'), np.full((12, 1117, 2880), np.nan))
        


KeyboardInterrupt



In [None]:
demo_arrays = []
regions = []

for region, (dims, data) in demo_grid.items():
    demo_arrays.append(data)
    regions.append(region)

In [None]:
#BMF = xr.full_like(calcs['AF'], np.nan)
demo_pop = np.stack(demo_arrays, axis=0)

In [None]:
ds_p = xr.Dataset(
    {
        "demo_pop": (("region", "age", "lat", "lon"), demo_pop)
    },
    coords={
        "region": regions,  # List of region names
        "age": np.arange(12),  # Example: 12 age groups
        "lat": C['lat'],  # Example: Latitude coordinates
        "lon": C['lon'],  # Example: Longitude coordinates
    }
)

In [None]:
combined_pop = ds_p['demo_pop'].fillna(0).sum(dim='region')
combined_pop['age'] = [list(age_theta)[age] for age in combined_pop.age.values]

In [None]:
combined_pop.to_netcdf(f'./processed/2020_demo_frac.nc')