In [2]:
import xarray as xr
import numpy as np
import earthkit.data
from earthkit.transforms import aggregate
from dask.distributed import LocalCluster

In [None]:
cluster = LocalCluster()
client = cluster.get_client()
client

In [4]:
pm_file = "./data/pm_final_srilanka_linearp.nc"
pm_ds = xr.open_dataset(pm_file, chunks={"time": 100})
pm_data = pm_ds["__xarray_dataarray_variable__"]

district_file = "./data/geoBoundaries-LKA-ADM2.geojson"
features = earthkit.data.from_source("file", district_file)

years = np.unique(pm_data['time'].dt.year).astype(str)

for year in years:
  print(f"Calculating {year}") 
  pm_year = pm_data.sel(time=year) 
    
  pop_file = f"./data/lka_pop_{year}_CN_1km_R2025A_UA_v1.tif"
  pop_ds = xr.open_dataset(pop_file)
  pop_data = pop_ds['band_data'].rename({"x": "lon", "y": "lat"})
  pop_aligned = pop_data.interp(lon=pm_year.lon, lat=pm_year.lat, method="nearest")

  pm_weighted = pm_year * pop_aligned

  agg_num = aggregate.spatial.reduce(pm_weighted, features, how="sum", mask_dim="shapeName")
  agg_den = aggregate.spatial.reduce(pop_aligned, features, how="sum", mask_dim="shapeName")

  pw = agg_num / agg_den
  pw.name = "pm25_popweighted"

  agg_df = pw.to_dataframe().reset_index()

Calculating 2020
Calculating 2021
Calculating 2022
Calculating 2023
