Add final calibration score

inbo · Mar 28, 2022 · 7fd2f27 · 7fd2f27
1 parent 5846437
commit 7fd2f27
Show file tree

Hide file tree

Showing 2 changed files with 98 additions and 50 deletions.
diff --git a/niche_vlaanderen/bwk_overlay.py b/niche_vlaanderen/bwk_overlay.py
@@ -5,40 +5,52 @@
     warnings.filterwarnings("ignore", category=DeprecationWarning)
     warnings.filterwarnings("ignore", category=UserWarning)
     import geopandas as gpd
+
 import pandas as pd
+import numpy as np
 from rasterstats import zonal_stats
 
 from .niche import Niche
 
 class NicheOverlay(object):
-    """ Creates a new NicheOverlay object
+    """Creates a new NicheOverlay object
 
     Overlays the BWK (Biologische waarderingskaart) with niche in order to
-    obtain the accuracy of the model
+    obtain the accuracy of the model.
 
     Parameters:
         niche: niche_vlaanderen.Niche
             Niche object containing predicted variables types according to niche.
             The model must have run prior to the overlay
         map: Path
-            Path to a file containing the BWK map in one of the formats supported by fiona.
+            Path to a file containing the BWK map in one of the formats supported
+            by fiona, eg shape.
+            must contain these attributes:
+            * HABx:
+            * pHABx:
+
         mapping_file: Path | None
-            optional file containing the mapping
-        mapping_columns: list(tuples) | None
+            optional file containing the mapping between habitat (HAB) code on BWK
+            and Niche vegetation types. An example mapping type is part of the
+            package at niche_vlaanderen/system_tables/hab_nich_join.csv
+        mapping_columns: list(dict) | None
             Optional list containing the different mappings between columns.
             If not specified, the default mapping will be used.
-            Custom mappings are defined as tuples
-            (source column in map, source field in mapping_file, to_field in mapping_file, resulting_column)
+            Custom mappings are defined as dicts:
+            {
+                map_key: source column in map,
+                join_key: source field in mapping_file,
+                join_value: to_field in mapping_file,
+                new_column: resulting_column
+            }
             eg:
-            [('HAB1', 'HAB', 'NICHE_C', 'NICH_1_1']
-
-        ct_* lnk_*: path
-          Optionally, paths to codetables can be provided. These will override
-          the standard codetables used by Niche.
+            [{'map_key': 'HAB1',
+              'join_key': 'HAB',
+              'join_value': 'NICHE_C1',
+              'new_column': 'NICH_1_1'}, ...]
 
     """
 
-
     def __init__(self, niche, map, mapping_file=None, mapping_columns=None):
 
         if type(niche) is Niche:
@@ -50,14 +62,15 @@ def __init__(self, niche, map, mapping_file=None, mapping_columns=None):
             self.mapping_columns = mapping_columns
         else:
             self.mapping_columns = {
-                'HAB1': 'HAB',
-                'HAB2': 'HAB',
-                'HAB3': 'HAB',
-                'HAB4': 'HAB'
+                "HAB1": "HAB",
+                "HAB2": "HAB",
+                "HAB3": "HAB",
+                "HAB4": "HAB",
             }
         if mapping_file is None:
             mapping_file = resource_filename(
-             "niche_vlaanderen", "system_tables/hab_niche_join.csv")
+                "niche_vlaanderen", "system_tables/hab_niche_join.csv"
+            )
 
         self.mapping = pd.read_csv(mapping_file)
 
@@ -67,52 +80,81 @@ def __init__(self, niche, map, mapping_file=None, mapping_columns=None):
             self.mapping_columns = mapping_columns
         else:
             self.mapping_columns = []
-            for i in range(1,6):
-                for j in range(1,3):
+            for i in range(1, 6):
+                for j in range(1, 3):
                     self.mapping_columns.append(
-                        {'map_key': f"HAB{i}",
-                         'join_key': "HAB",
-                         'join_value': f"NICHE_C{j}",
-                         'new_column':f"NICH_{i}_{j}"}
+                        {
+                            "map_key": f"HAB{i}",
+                            "join_key": "HAB",
+                            "join_value": f"NICHE_C{j}",
+                            "new_column": f"NICH_{i}_{j}",
+                        }
                     )
 
         # TODO: geopandas allows using a bbox or mask
+        # what should be done if file does not overlap with grid?
         self.map = gpd.read_file(map)
         self._check_mapping_columns()
 
     def _check_mapping_columns(self):
         """Checks whether the mapping columns are present in the dataset"""
-        # TODO
+        for item in self.mapping_columns:
+            print(item["map_key"])
+            if item["map_key"] not in self.map.columns:
+                raise(NicheOverlayException(f"expected column {item['map_key']} not found in shape file"))
+            if f'p{item["map_key"]}' not in self.map.columns:
+                raise (NicheOverlayException(
+                    f"expected column p{item['map_key']} not found in shape file"))
         pass
 
     def overlay(self):
         """Overlays the map and the niche object"""
 
         # add mapping columns to source
         for t in self.mapping_columns:
-            source = self.mapping[[t['join_key'], t['join_value']]].rename(columns=
-                {t['join_value']: t['new_column'],
-                 t['join_key']: t['map_key']})
-            self.map = pd.merge(self.map,
-                                source,
-                                on=t['map_key'],
-                                how="left"
-                                )
-
+            source = self.mapping[[t["join_key"], t["join_value"]]].rename(
+                columns={t["join_value"]: t["new_column"], t["join_key"]: t["map_key"]}
+            )
+            self.map = pd.merge(self.map, source, on=t["map_key"], how="left")
+        self._niche_columns = self.map.columns[self.map.columns.str.startswith("NICH")]
         # get potential presence
         self.potential_presence = self.niche.zonal_stats(self.map, outside=False)
 
-        self.potential_presence = self.potential_presence.pivot(columns=['vegetation'], index=['presence', 'shape_id'])
-        print(self.potential_presence.head())
-        print(self.map.head())
+        self.potential_presence = self.potential_presence.pivot(
+            columns=["vegetation"], index=["presence", "shape_id"]
+        )
+
+        summary = self.potential_presence.loc["no data"]["area_ha"] * np.nan
+        summary_area = self.potential_presence.loc["no data"]["area_ha"] * np.nan
+        # Only if actual present: (pHAB * present) / (present + not present)
+        for i, row in self.map.iterrows():
+            for veg in self._niche_columns:
+                if np.isfinite(row[veg]) and row[veg] != 0:
+                    area_pot = (
+                        self.potential_presence.loc["present"]
+                        .loc[i]
+                        .loc["area_ha"][row[veg]]
+                    )
+                    area_nopot = (
+                        self.potential_presence.loc["not present"]
+                        .loc[i]
+                        .loc["area_ha"][row[veg]]
+                    )
 
-        # actual presence - optimistic
-        # sum (presence * area) / sum (area)
+                    # use correct pHAB for vegetation
+                    # note that veg has this form: NICH_1_2 -> position 5 is the corresponding
+                    # pHAB part.
+                    pHab = row["pHAB" + veg[5]]
+                    summary[row[veg]].loc[i] = pHab * area_pot
+                    summary_area[row[veg]].loc[i] = area_pot + area_nopot
 
-        # Summarize per vegetation type
-        self.summary = self.potential_presence.loc["present"].sum() /(self.potential_presence.loc["not present"].sum() +  no.potential_presence.loc["present"].sum())
+            pass
+        self.summary = summary
+        self.summary_area = summary_area
 
-        # combine
+        self.score = self.summary.sum() / self.summary_area.sum()
 
         # aggregate
 
+class NicheOverlayException(Exception):
+    msg = "Error using Niche Overlay"
diff --git a/tests/test_bwk_overlay.py b/tests/test_bwk_overlay.py
@@ -1,3 +1,5 @@
+import numpy as np
+
 from niche_vlaanderen.niche import Niche
 from niche_vlaanderen.bwk_overlay import NicheOverlay
 
@@ -6,20 +8,24 @@ def test_overlay():
     nv = Niche()
     nv.run_config_file("tests/data/bwk/niche_brasschaat/simple.yaml")
 
+    no = NicheOverlay(niche=nv, map="tests/data/bwk/bkw_brasschaat_part1.shp")
+    no.overlay()
 
     # get the index of a record we know should have a nich_1_1 value
-    test_index = no.map.reset_index().set_index("OBJECTID_1").loc[[2662]]['index'].values[0]
+    test_index = (
+        no.map.reset_index().set_index("OBJECTID_1").loc[[2662]]["index"].values[0]
+    )
     assert test_index == 48
     item = no.map.iloc[test_index]
-    assert item['NICH_1_1'] == 6
-    assert item['NICH_1_2'] == 6
-
-    # this should also have a potential presence
-    no.potential_presence.loc[test_index].loc[6].loc['present']
+    assert item["NICH_1_1"] == 6
+    assert item["NICH_1_2"] == 6
 
-    assert False
+    # calibration score
+    assert np.isclose(no.score[6], 77.872340)
 
 
-def test_overlay2():
+def test_overlay_artificial():
     nv = Niche()
     nv.run_config_file("tests/data/bwk/niche_brasschaat/simple.yaml")
+    no = NicheOverlay(niche=nv, map="tests/data/bwk/bwk_selection.shp")
+    no.overlay()