In [12]:
#!/usr/bin/env python
from pathlib import Path
import pandas as pd

# 1) where your raw climate folders live
CLIM_ROOT = Path("/shared/share_hle/data/climate_raw")

# 2) the folders you expect to exist under CLIM_ROOT
products = ["GMFD", "ERA5-025", "MERRA2", "JRA-3Q"]

rows = []
for prod in products:
    prod_dir = CLIM_ROOT / prod
    if not prod_dir.exists():
        print(f"⚠️  Missing directory for {prod}: {prod_dir}")
        rows.append({"product": prod, "tas_filepath": "", "precip_filepath": ""})
        continue

    # look only for the daily files in that directory
    tas_hits = list(prod_dir.glob("tas_day*.zarr")) + list(prod_dir.glob("tas_day*.nc"))
    pr_hits  = list(prod_dir.glob("pr_day*.zarr"))  + list(prod_dir.glob("pr_day*.nc"))

    if not tas_hits:
        print(f"⚠️  No tas_day file found for {prod}")
    if not pr_hits:
        print(f"⚠️  No pr_day file found for {prod}")

    rows.append({
        "product": prod,
        "tas_filepath": str(tas_hits[0]) if tas_hits else "",
        "precip_filepath": str(pr_hits[0]) if pr_hits else ""
    })

# write it out
df = pd.DataFrame(rows)
df.to_csv("car_paths.csv", index=False)
print("\n✔️  Generated car_paths.csv:\n")
print(df.to_string(index=False))


⚠️  No pr_day file found for MERRA2

✔️  Generated car_paths.csv:

 product                                                                                              tas_filepath                                                                                          precip_filepath
    GMFD         /shared/share_hle/data/climate_raw/GMFD/tas_day_GMFD_historical_reanalysis_19810101-20101231.zarr         /shared/share_hle/data/climate_raw/GMFD/pr_day_GMFD_historical_reanalysis_19810101-20101231.zarr
ERA5-025 /shared/share_hle/data/climate_raw/ERA5-025/tas_day_ERA5-025_historical_reanalysis_19810101-20241231.zarr /shared/share_hle/data/climate_raw/ERA5-025/pr_day_ERA5-025_historical_reanalysis_19810101-20231231.zarr
  MERRA2       /shared/share_hle/data/climate_raw/MERRA2/tas_day_MERRA2_historical_reanalysis_19800101-20240801.nc                                                                                                         
  JRA-3Q       /shared/share_hle/data/climate_raw/JRA

In [13]:
import geopandas as gpd

shape_path = (
    "/shared/share_hle/data/1_estimation/3_regions/"
    "insample_shp/mortality_insample_world.shp"
)
gdf = gpd.read_file(shape_path, engine="fiona").to_crs("EPSG:4326")

# Quick diagnostic: how many unique polygons per adm1 vs. adm2?
print("unique ADM1:", gdf["adm1_id"].nunique())
print("unique ADM2:", gdf["adm2_id"].nunique())


unique ADM1: 178
unique ADM2: 11010


In [9]:
from pathlib import Path

CLIM_ROOT = Path("/shared/share_hle/data/climate_raw")

print("Top‑level directories:")
for p in CLIM_ROOT.iterdir():
    if p.is_dir():
        print("  ", p.name)
print("\nSome .zarr & .nc stores found (first 10):")
for p in CLIM_ROOT.rglob("*.*"):
    if p.suffix in (".zarr", ".nc"):
        print("  ", p.relative_to(CLIM_ROOT))

Top‑level directories:
   GMFD
   ERA5-025
   JRA-3Q
   MERRA2

Some .zarr & .nc stores found (first 10):
   GMFD/pr_day_GMFD_historical_reanalysis_19810101-20101231.zarr
   GMFD/tas_day_GMFD_historical_reanalysis_19810101-20101231.zarr
   GMFD/pr_Amon_GMFD_historical_reanalysis_19810131-20101231.zarr
   ERA5-025/tasmax_day_ERA5-025_historical_reanalysis_19810101-20241231_CONUS.zarr
   ERA5-025/pr_Amon_ERA5-025_historical_reanalysis_19810101-20221201.zarr
   ERA5-025/tas_day_ERA5-025_historical_reanalysis_19810101-20241231.zarr
   JRA-3Q/pr_Amon_JRA-3Q_historical_reanalysis_19810101-20241231.nc
   JRA-3Q/tas_day_JRA-3Q_historical_reanalysis_19810101-20241231.nc
   JRA-3Q/pr_day_JRA-3Q_historical_reanalysis_19810101-20241231.nc
   JRA-3Q/tasmax_day_JRA-3Q_historical_reanalysis_19810101-20241231.nc
   MERRA2/tasmin_day_MERRA2_historical_reanalysis_19800101-20241231.nc
   MERRA2/prcorr_Amon_MERRA2_historical_reanalysis_19800101-20250530.nc
   MERRA2/pr_Amon_MERRA2_historical_reanalysis_19