In [6]:
import fsspec
import s3fs
import re
import xarray as xr

def decade_month_calc(ds: xr.Dataset, time_dim: str = "time") -> xr.Dataset:
    """Calculates the climatological mean by decade and month.

    This function computes the decade-by-decade average for each month in the provided dataset.
    The process involves averaging values across each decade for each month separately.
    For instance, for the 2050s, the function calculates the average values for January, February,
    March, and so on, resulting in 12 averaged values corresponding to each month of the 2050s.
    This approach preserves seasonal variability while smoothing out interannual variability
    within each decade.

    The function performs the following steps:
    1. Assigns new coordinates to the dataset:
       - `decade`: Represents the decade (e.g., 2050 for the 2050s).
       - `month`: Represents the month (1 for January, 2 for February, etc.).
    2. Creates a combined `decade_month` coordinate, formatted as "YYYY-MM",
       where "YYYY" is the starting year of the decade, and "MM" is the month.
    3. Groups the dataset by the `decade_mon
    """
    ds = ds.assign_coords(
        decade=(ds["time.year"] // 10) * 10, month=ds["time"].dt.month
    )

    ds = ds.assign_coords(
        decade_month=(
            time_dim,
            [
                f"{decade}-{month:02d}"
                for decade, month in zip(ds["decade"].values, ds["month"].values)
            ],
        )
    )

    ds = ds.groupby("decade_month").mean()

    return ds

def extract_model_from_path(path):
    match = re.search(r"/([^/]+)/ssp\d+/", path)
    return match.group(1) if match else "unknown_model"

s3_bucket = "uw-crl"
s3_prefix = "climate-risk-map/backend/climate/scenariomip/NEX-GDDP-CMIP6"
ssp = "126"  # or ssp245, ssp370, etc.

# Initialize an anonymous S3 filesystem (assuming public data)
fs = s3fs.S3FileSystem()

# Build a wildcard pattern to find all relevant Zarr stores


pattern_1 = f"{s3_bucket}/{s3_prefix}/ACCESS-CM2/ssp{ssp}/*/*.zarr"
pattern_2 = f"{s3_bucket}/{s3_prefix}/ACCESS-ESM1-5/ssp{ssp}/*/*.zarr"

# Use fs.glob to list all Zarr stores matching the pattern
zarr_paths_1 = fs.glob(pattern_1)
zarr_paths_2 = fs.glob(pattern_2)

# if not zarr_paths:
#     print("No Zarr stores found for the given pattern.")
# else:
#     print(f"Found {len(zarr_paths)} Zarr stores")

# Use the full S3 URIs for Xarray
zarr_uris_1 = [f"s3://{path}" for path in zarr_paths_1]
zarr_uris_2 = [f"s3://{path}" for path in zarr_paths_2]

# Open multiple Zarr datasets
ds_1 = xr.open_mfdataset(zarr_uris_1, engine="zarr", combine="by_coords", data_vars=["fwi"], parallel=True, preprocess=decade_month_calc)
ds_2 = xr.open_mfdataset(zarr_uris_2, engine="zarr", combine="by_coords", data_vars=["fwi"], parallel=True, preprocess=decade_month_calc)

ds_1 = ds_1.assign_coords(model="ACCESS-CM2")
ds_1 = ds_1.expand_dims("model")

ds_2 = ds_2.assign_coords(model="ACCESS-ESM1-5")
ds_2 = ds_2.expand_dims("model")


In [14]:
ds_combined = xr.combine_nested([ds_1, ds_2], concat_dim=["model"])

In [29]:
ds_combined.fwi.sel(decade_month="2050-07").sel(lat=47, lon=290, method="nearest").compute()

In [37]:
ds_combined.fwi.sel(decade_month="2050-07").sel(lat=47, method="nearest").max(dim="model").compute()

In [31]:
(0.47432232 + 0.41958573) /2

0.44695402500000003

In [70]:
import numpy as np
import pandas as pd
import xarray as xr
import geopandas as gpd
from shapely.geometry import Point, Polygon, LineString
from shapely import wkt

In [67]:
geometries = [
        Point(1, 1),
        Point(4, 4),
        Polygon([(2, 2), (2, 3), (3, 3), (3, 2)]),
        LineString([(0, 0), (1, 1), (2, 2), (2, 3)]),
    ]
df = pd.DataFrame({"osm_id": [1, 2, 3, 4], "geometry": geometries})
gdf = gpd.GeoDataFrame(df, geometry="geometry").set_index("osm_id")

In [79]:
data = np.array(
        [
            [
                [10.0,   20.0,   30.0,    40.0,     50.0],
                [100.0,  200.0,  300.0,   400.0,    500.0],
                [1000.0, 2000.0, 3000.0,  4000.0,   5000.0],
                [8.0,    9.0,    10.0,    11.0,     12.0],
                [13.0,   14.0,   15.0,    16.0,     17.0],
            ]
        ]
    )
times = ["2020-01"]
x = np.array([0, 1, 2, 3, 4])
y = np.array([0, 1, 2, 3, 4])
dims = ["decade_month", "y", "x"]
ds = xr.Dataset(
    data_vars={"value_mean": (dims, data),
                "value_median": (dims, data),
                "value_stddev": (dims, data),
                "value_min": (dims, data),
                "value_max": (dims, data),
                "value_q1": (dims, data),
                "value_q3": (dims, data)},
    coords={"decade_month": times, "y": y, "x": x}
)

In [76]:
data[0][2][3]

np.float64(4000.0)

In [80]:
ds.sel(x=2, y=3)

In [39]:
values = []
for point in [(0, 0), (1, 1), (2, 2), (2, 3)]:
    dim = (0,) + point  # Create the tuple for indexing
    values.append(data[dim])  # Append the value

# Convert the list to a NumPy array
arr = np.array(values)


In [58]:
data[0][3][2]

np.float64(10.0)

In [42]:
arr

array([  10.,  200., 3000., 4000.])

In [46]:
7210/9

801.1111111111111