aaronspring · aaronspring · Oct 18, 2022 · Oct 18, 2022 · Oct 18, 2022 · Oct 18, 2022
diff --git a/catalogs/shapefiles.yaml b/catalogs/shapefiles.yaml
@@ -61,3 +61,70 @@ sources:
       storage_options:
         simplecache:
           same_names: true
+
+  GeigerKoeppen_shp:
+    description: >-
+      Köppen-Geiger climate classification shapefile
+
+      Note: combine all areas of a certain region by `dissolve("GRID")`
+    metadata:
+      url: http://koeppen-geiger.vu-wien.ac.at/
+      doi: http://dx.doi.org/10.1127/0941-2948/2006/0130
+    driver: intake_geopandas.geopandas.ShapefileSource
+    parameters:
+      period:
+        description: period start-end
+        type: str
+        default: "1976-2000"
+        allowed:
+          - "1901-1925"
+          - "1926-1950"
+          - "1951-1975"
+          - "1976-2000" # observed
+          - "2001-2025_A1FI" # future scenario A1FI
+          - "2001-2025_A2"
+          - "2001-2025_B1"
+          - "2001-2025_B2"
+          - "2026-2050_A1FI"
+          - "2026-2050_A2"
+          - "2026-2050_B1"
+          - "2026-2050_B2"
+          - "2051-2075_A1FI"
+          - "2051-2075_A2"
+          - "2051-2075_B1"
+          - "2051-2075_B2"
+          - "2076-2100_A1FI"
+          - "2076-2100_A2"
+          - "2076-2100_B1"
+          - "2076-2100_B2"
+    args:
+      urlpath: simplecache::http://koeppen-geiger.vu-wien.ac.at/data/{{period}}_GIS.zip
+      use_fsspec: true
+      storage_options:
+        simplecache:
+          same_names: true
+
+  GeigerKoeppen_xr:
+    description: >-
+      Köppen-Geiger climate classification xarray.DataArray
+
+      Note:
+          Use `transform_kwargs` to be passed to the transform function.
+          Use `target_kwargs: target` to be passed to the target.
+          See https://github.com/intake/intake/issues/638
+          ```python
+          cat.shapefiles.GeigerKoeppen_xr(
+            transform_kwargs=dict(res=5),
+            target_kwargs=dict(GeigerKoeppen_shp=dict(period="1976-2000"))
+          ).read().plot()
+          ```
+
+    metadata:
+      url: http://koeppen-geiger.vu-wien.ac.at/
+      doi: http://dx.doi.org/10.1127/0941-2948/2006/0130
+    driver: intake.source.derived.GenericTransform
+    args:
+      targets:
+        - GeigerKoeppen_shp
+      transform: "remote_climate_data.utils.geigerkoeppen.dissolve_to_xrDataArray"
+      transform_kwargs: {res: 1}
diff --git a/remote_climate_data/utils/geigerkoeppen.py b/remote_climate_data/utils/geigerkoeppen.py
@@ -0,0 +1,145 @@
+import intake
+import xarray as xr
+from matplotlib.colors import ListedColormap
+
+cat = intake.open_catalog("master.yaml").shapefiles
+
+cmap = ListedColormap(
+    [
+        "#960000",
+        "#FF0000",
+        "#FF6E6E",
+        "#FFCCCC",
+        "#CC8D14",
+        "#CCAA54",
+        "#FFCC00",
+        "#FFFF64",
+        "#007800",
+        "#005000",
+        "#003200",
+        "#96FF00",
+        "#00D700",
+        "#00AA00",
+        "#BEBE00",
+        "#8C8C00",
+        "#5A5A00",
+        "#550055",
+        "#820082",
+        "#C800C8",
+        "#FF6EFF",
+        "#646464",
+        "#8C8C8C",
+        "#BEBEBE",
+        "#E6E6E6",
+        "#6E28B4",
+        "#B464FA",
+        "#C89BFA",
+        "#C8C8FF",
+        "#6496FF",
+        "#64FFFF",
+        "#F5FFFF",
+    ]
+)
+
+
+def attach_abbrevs(f):
+    """Add abbrevs to GeoDataFrame or xr.DataArray/set."""
+    import geopandas as gpd
+    import pandas as pd
+
+    table = pd.read_fwf(
+        "http://koeppen-geiger.vu-wien.ac.at/data/legend.txt",
+        header=None,
+        names=["number", "nothing", "abbrev"],
+        index=1,
+    )
+    del table["nothing"]
+    table = table.set_index("number")
+
+    if isinstance(f, gpd.geodataframe.GeoDataFrame):
+        f = f.merge(table, left_index=True, right_index=True)
+    if isinstance(f, (xr.DataArray, xr.Dataset)):
+        f.attrs["abbrevs"] = table.abbrev.to_dict()
+    else:
+        raise NotImplementedError
+    return f
+
+
+def dissolve_to_xrDataArray(gdf, res=1):
+    """Dissolve by GRIDCODE and convert with regionmask to res degree grid."""
+    gdf = gdf.dissolve("GRIDCODE").drop("ID", axis=1)
+
+    import numpy as np
+
+    assert 180 / res == int(180 / res), "res must divide 180 without remainder"
+    grid = xr.DataArray(
+        dims=["lat", "lon"],
+        coords={
+            "lat": np.linspace(-90 + res / 2, 90 - res / 2, int(180 / res)),
+            "lon": np.linspace(-180 + res / 2, 180 - res / 2, int(360 / res)),
+        },
+    )
+
+    import regionmask
+
+    ds = regionmask.mask_geopandas(gdf, grid, wrap_lon=False)
+    ds.name = "ID"
+    ds.attrs["long_name"] = "Geiger Koeppen Classification ID"
+    ds.lon.attrs.update(
+        {
+            "standard_name": "longitude",
+            "long_name": "Longitude",
+            "units": "degrees_east",
+            "axis": "X",
+        }
+    )
+    ds.lat.attrs.update(
+        {
+            "standard_name": "latitude",
+            "long_name": "Latitude",
+            "units": "degrees_north",
+            "axis": "Y",
+        }
+    )
+    ds.attrs["processed"] = {
+        "1.": "gdf.dissolve('GRIDCODE')",
+        "2.": "create grid",
+        "3.": "regionmask.mask_geopandas(gdf, grid, wrap_lon=False)",
+    }
+    return ds
+
+
+def get_all_observed(res=1):
+    """Load all observed/historical Geiger Koeppen Classifications as xr.DataArray."""
+    obs_periods = ["1901-1925", "1926-1950", "1951-1975", "1976-2000"]
+
+    obs = []
+    for p in obs_periods:
+        gdf = cat.GeigerKoeppen_shp(period=p).read()
+        obs.append(dissolve_to_xrDataArray(gdf, res=res))
+    obs = xr.concat(obs, "period")
+    obs = obs.assign_coords(period=obs_periods)
+    obs = attach_abbrevs(obs)
+    return obs
+
+
+def get_all_future(res=1):
+    """Load all future scenario Geiger Koeppen Classifications as xr.DataArray."""
+    scenarios = ["A1FI", "A2", "B1", "B2"]
+    periods = ["2001-2025", "2026-2050", "2051-2075", "2076-2100"]
+
+    fut = []
+    for scenario in scenarios:
+        scenario_ds = []
+        for period in periods:
+            scenario_ds.append(
+                cat.GeigerKoeppen_xr(
+                    transform_kwargs=dict(res=1),
+                    target_kwargs=dict(
+                        GeigerKoeppen_shp=dict(period=f"{period}_{scenario}")
+                    ),
+                ).read()
+            )
+        fut.append(xr.concat(scenario_ds, "period"))
+    fut = xr.concat(fut, "scenario").assign_coords(scenario=scenarios, period=periods)
+    return fut
diff --git a/tests/test_geigerkoeppen.py b/tests/test_geigerkoeppen.py
@@ -0,0 +1,10 @@
+import pytest
+import xarray as xr
+
+from remote_climate_data.utils.geigerkoeppen import get_all_future, get_all_observed
+
+
+@pytest.mark.parametrize("get", [get_all_observed, get_all_future])
+def test_get_all(get):
+    """Test get_all_* returns xr.DataArray."""
+    assert isinstance(get(), xr.DataArray)