### Process BMR (IFS dataset) 

The IFS dataset provides the deaths/1000 people for each disease type, age group and country. This code processes it so that the deaths/1000 of a country is applied evenly to every grid cell for that specific country.

In [1]:
import geopandas as gpd
import xarray as xr
from cartopy import crs as ccrs
import seaborn as sns; sns.set_theme()
import os
import fiona
import country_converter as coco
import dask
import dask.array as da
import netCDF4 as nc
import regionmask
from matplotlib import cm
import numpy as np
from matplotlib import pyplot as plt
import country_converter as coco
import pyogrio
#pyogrio.set_gdal_config_options({"SHAPE_RESTORE_SHX": "YES"})
import pandas as pd
from cartopy.util import add_cyclic_point
import nc_time_axis
import glob
import cdo
import pandas as pd
import cartopy.feature as cfeature
import plotly.express as px


In [2]:
import warnings
warnings.filterwarnings('ignore')

In [3]:
# Gridded Population of the World, Version 4 (GPWv4): National Identifier Grid, Revision 11
# can be downloaded from: https://www.earthdata.nasa.gov/data/catalog/sedac-ciesin-sedac-gpwv4-natiden-r11-4.11#:~:text=Description,use%20in%20aggregating%20population%20data.
# cited as: Center For International Earth Science Information Network-CIESIN-Columbia University. (2018). Gridded Population of the World, Version 4 (GPWv4): National Identifier Grid, Revision 11 (Version 4.11) [Data set]. Palisades, NY: NASA Socioeconomic Data and Applications Center (SEDAC). https://doi.org/10.7927/H4TD9VDP
shapefile_path = "./Mortality_data/country_borders/gpw_v4_national_identifier_grid_rev11_15_min.shp"

# Read the shapefile
gdf = gpd.read_file(shapefile_path)
gdf_list = pd.read_csv(f'./Mortality_data/country_borders/gpw_new.csv')

In [5]:
# read in mortality data that has been verified

mortality = pd.read_csv(f'./Mortality_data/IFs/mortality_all_new.csv')
#mortality per thousand

#used to make a mask
population = xr.open_mfdataset(f'./Mortality_data/population/SSP2/Total/NetCDF/*.nc', parallel=True, combine='nested')


In [6]:
#get country mask
lon = population['lon']
lat = population['lat']
print(lat.shape)
mask = regionmask.mask_3D_geopandas(gdf,lon,lat)

(1117,)


In [7]:
def global_mean_xarray(ds_XXLL):
    """ 
    Compute the global mean value of the data.
    The data has to have the lat and lon in its dimensions.
    Should not include NaN in Inputs.
    
    Parameters
    ----------
    ds_XXLL   : xarray with lat and lon. ds_XXLL.lat will be 
                used for area weight.

    Returns
    ----------
    tmp_XX    : xarray without lat and lon.
    
    """
    lat = ds_XXLL.coords['lat']        # readin lat
    # global mean
    # compute cos(lat) as a weight function
    weight_lat = np.cos(np.deg2rad(lat))/np.mean(np.cos(np.deg2rad(lat)))
    tmp_XXL = ds_XXLL.mean(dim=['lon'])*weight_lat
    tmp_XX  = tmp_XXL.mean(dim=['lat'])
    return tmp_XX

def weighted_temporal_mean_l(ds, var=None):
    """
    weight by days in each month
    """
    #ds = xr.decode_cf(ds)
    # Determine the month length
    month_length = ds.time.dt.days_in_month

    # Calculate the weights
    wgts = month_length.groupby("time.year") / month_length.groupby("time.year").sum()

    # Make sure the weights in each year add up to 1
    np.testing.assert_allclose(wgts.groupby("time.year").sum(xr.ALL_DIMS), 1.0)

    # Subset our dataset for our variable
    obs = ds if var is None else ds[var]

    # Setup our masking for nan values
    cond = obs.isnull()
    ones = xr.where(cond, 0.0, 1.0)

    # Calculate the numerator
    obs_sum = (obs * wgts).resample(time="AS").sum(dim="time")

    # Calculate the denominator
    ones_out = (ones * wgts).resample(time="AS").sum(dim="time")

    # Return the weighted average
    return obs_sum / ones_out

In [8]:
#PM2.5 calculations uses Non communicative disease + lower respiratory infections

mortality['NCDS_total']= mortality['MortPerFeMalesNonComDis']+mortality['MortPerMalesNonComDis']
mortality['LRIS_total'] = mortality['RespInfFemales'] + mortality['RespInfMales']
mortality['NCDS+LRIS'] = mortality['LRIS_total'] + mortality['NCDS_total']

### Reiterate for each year

In [9]:
# Select mortality data for specific year

mortality_2020 = mortality[mortality['TimeId']==2070]

In [10]:
iso_to_mortality = mortality_2020[(mortality_2020['CohortId'] >= 6) & (mortality_2020['CohortId'] <= 17)].set_index('ISO3')['NCDS+LRIS']

# Step 2: Map mask region indices to ISOCODEs
region_to_iso = dict(enumerate(gdf_list['ISOCODE']))  # Assuming ordered list

# Step 3: Create a mortality rate grid
#mortality_grid = xr.full_like(PAF, np.nan)  # Initialize with NaNs
#mortality_grid = np.zeros((12, C.lat.shape[0],C.lon.shape[0]))

In [None]:
mortality_grid = {}

# Loop through each region and corresponding ISO code
for region, iso in region_to_iso.items():
    if iso in iso_to_mortality:
        # Extract mortality values for the current ISO code
        iso_mortality_values = iso_to_mortality[iso].to_numpy()

        # Reshape to (12, 1, 1) for broadcasting
        iso_mortality_values = iso_mortality_values[:, np.newaxis, np.newaxis]
        
        # Broadcast to the desired shape (12, 1117, 2880)
        iso_mortality_values = np.broadcast_to(iso_mortality_values, (12, 1117, 2880))

        # Apply the mask for the current region and store the result in mortality_grid
        mortality_grid[region] = (('age', 'lat', 'lon'), (iso_mortality_values * mask[region].to_numpy()[np.newaxis]))
    else:
        mortality_grid[region] = (('age', 'lat', 'lon'), np.full((12, 1117, 2880), np.nan))

In [None]:
mortality_arrays = []
regions = []

for region, (dims, data) in mortality_grid.items():
    mortality_arrays.append(data)
    regions.append(region)

In [None]:
BMR = np.stack(mortality_arrays, axis=0)

In [None]:
ds = xr.Dataset(
    {
        "mortality": (("region", "age", "lat", "lon"), BMR)
    },
    coords={
        "region": regions,  # List of region names
        "age": np.arange(12),  # Example: 12 age groups
        "lat": C['lat'],  # Example: Latitude coordinates
        "lon": C['lon'],  # Example: Longitude coordinates
    }
)

In [None]:
combined = ds['mortality'].fillna(0).sum(dim='region')

In [None]:
combined.to_netcdf(f'./processed/BMR/2070.nc')