In [None]:
import xagg as xa
import geopandas as gpd 
import xarray as xr
import pandas as pd
import pyreadstat

In [None]:
stata_path = "/shared/share_hle/data/aux_data/global_mortality_panel_public.dta"

panel_df, meta = pyreadstat.read_dta(stata_path)

years = sorted(panel_df["year"].unique())

years_list = sorted(panel_df["year"].dropna().astype(int).unique().tolist())

print(years_list)

In [None]:
df_paths = pd.read_csv("car_paths.csv", dtype=str)

df_paths.head()

In [None]:
df = df_paths.copy()

In [None]:
shape_path = "/shared/share_hle/data/aux_data/geo_data/impact-region.shp"

gdf = gpd.read_file(shape_path)


In [None]:
ds = ds.assign(T1 = ds.tas, T2 = ds.tas**2, T3 = ds.tas**3,T4 = ds.tas**4)

# 4) Sum over days and take the mean
T1_sum = ds.T1.sum("time")
T2_sum = ds.T2.sum("time")
T3_sum = ds.T3.sum("time")
T4_sum = ds.T4.sum("time")
Tmean  = ds.tas.mean("time")

In [None]:
def open_climate(ds_path):
    if ds_path.endswith(".zarr"):
        return xr.open_zarr(ds_path, consolidated=False)
    else:
        return xr.open_dataset(ds_path)



In [None]:
xa.set_options(impl="numba", silent=True)

all_rows = []

years = years_list

for i, row in df.iterrows():
    ds = open_climate(row['filepath'])
    
    product = row["product"]
    path     = row["filepath"]
    print(f"Processing {product}")
    
    ds_all = open_climate(path).chunk({"time": 30})
    ds_grid = ds_all.isel(time=0, drop=True)
    wm      = xa.pixel_overlaps(ds_grid, gdf)
    
    for yr in years:
        print(f"  Year {yr}")
        ds_yr = ds_all.sel(time=slice(f"{yr}-01-01", f"{yr}-12-31"))

        ds_poly = xr.Dataset({
            "T1_sum": (ds_yr.tas**1).sum("time"),
            "T2_sum": (ds_yr.tas**2).sum("time"),
            "T3_sum": (ds_yr.tas**3).sum("time"),
            "T4_sum": (ds_yr.tas**4).sum("time"),
            "Tmean" :  ds_yr.tas.mean("time"),
        })

        agg    = xa.aggregate(ds_poly, wm)
        df_reg = agg.to_dataframe().reset_index()

        df_reg["product"] = product
        df_reg["year"]    = yr

        all_rows.append(df_reg)

big = pd.concat(all_rows, ignore_index=True)

In [None]:
big = big.rename(columns={"region_id": "region"})
if "time" in big.columns:
    big = big.drop(columns="time")

cols = ["product", "region", "year"] + [c for c in big.columns if c not in ("product","region","year")]
big  = big[cols]

out_path = "/mnt/data/all_products_by_region_year.dta"
big.to_stata(out_path, write_index=False, version=118)
print(f"Wrote {out_path}")