ai4er-cdt · Croydon-Brixton · Mar 3, 2021 · Feb 19, 2021 · Feb 22, 2021 · Feb 22, 2021
diff --git a/notebooks/exploratory/svm-10-optimise-identify-node.ipynb b/notebooks/exploratory/svm-10-optimise-identify-node.ipynb
diff --git a/notebooks/exploratory/svm-7-getting-to-know-graph.ipynb b/notebooks/exploratory/svm-7-getting-to-know-graph.ipynb
diff --git a/notebooks/exploratory/svm-8-generate-test-cases.ipynb b/notebooks/exploratory/svm-8-generate-test-cases.ipynb
diff --git a/src/data_loading/landcover_plot_utils.py b/src/data_loading/landcover_plot_utils.py
@@ -1,7 +1,12 @@
-"""A collection of utility functions for plotting landcover datasets"""
+"""A collection of utility functions for plotting landcover datasets."""
+from typing import Optional
+
+import matplotlib.pyplot as plt
 import numpy as np
 import pandas as pd
+from matplotlib.patches import Patch
 from numba import njit
+
 from src.constants import ESA_LANDCOVER_DIR
 
 
@@ -71,3 +76,73 @@ def classes_to_rgb(
             rgb_data[i, j, ...] = class_to_rgb[data[i, j]]
 
     return rgb_data
+
+
+def plot_landcover(
+    data: np.ndarray,
+    ax: Optional[plt.Axes] = None,
+    landcover_class_df: pd.DataFrame = ESA_CCI_CLASSES,
+    with_legend: bool = True,
+) -> None:
+    """
+    Plot array with landcover data with colors and legend.
+
+    Convenience function to plot landcover data with labels and RGB values in the
+    data`lddatar_class_df`. The format of that dataframe should be:
+        - Rows indexed by landcover class (int)
+        - Cdatacontaining the substring `label` must exist for plotting with legend
+        - Columns with names `R`, `G`, `B` must exist with integers corresponding to RGB
+          values for the landcover classes.
+
+    Args:
+        data (np.ndarray): The landcover data to plot. Values should be the landcover
+            classes.
+        ax (Optional[pldatas], optional): matplotlib.Axes object to add the plot to an
+            existing canvas. Defaults to None.
+        landcover_class_df (Optional[pd.DataFrame], optional): A dataframe containing
+            the RGB values to color each class with and a class label for each class.
+            Defaults to ESA_CCI_CLASSES.
+        with_legend (bool, optional): whether to add legend with class labels.
+            Defaults to True.
+    """
+
+    # Remove unused dimensions
+    data = data.squeeze()
+    # Check right format
+    assert data.ndim == 2, "`image` must be 2 dimensional"
+
+    # Create canvas to plot onto, if needed
+    if not ax:
+        _, ax = plt.subplots(figsize=(15, 15))
+
+    # Plot image
+    ax.imshow(
+        classes_to_rgb(data, class_to_rgb=_class_rgb_array_from_df(landcover_class_df))
+    )
+
+    # Remove axes
+    ax.set_yticks([])
+    ax.set_xticks([])
+
+    # Create legend
+    if with_legend:
+        # Automatically identify label column name
+        label_colname = landcover_class_df.columns[
+            landcover_class_df.columns.str.contains("label", case=False)
+        ][0]
+
+        # Filter only the landcover classes that appear in the image
+        present_classes = np.unique(data)
+
+        # Generate legend
+        legend_elements = []
+        for _, row in landcover_class_df.loc[present_classes].iterrows():
+            class_name = row[label_colname]
+            class_rgba = row[["R", "G", "B"]].values / 255.0  # plt wants values in 0-1
+            legend_elements.append(Patch(label=class_name, facecolor=class_rgba))
+        ax.legend(
+            handles=legend_elements,
+            bbox_to_anchor=(1.01, 1),  # places legend to the right of the image
+            loc="upper left",  # anchor for the bbox_to_anchor statement
+            prop={"size": 18},  # fontsize in legend
+        )
diff --git a/src/data_loading/rasterio_utils.py b/src/data_loading/rasterio_utils.py
@@ -115,9 +115,9 @@ def read_from_lat_lon(
 
 def polygonise(
     data_array: np.ndarray,
-    mask: Optional[np.ndarray],
-    transform: affine.Affine,
-    crs: Optional[str],
+    mask: Optional[np.ndarray] = None,
+    transform: affine.Affine = affine.identity,
+    crs: Optional[str] = None,
     connectivity: int = 4,
     apply_buffer: bool = True,
 ):

diff --git a/src/data_loading/test_data.py b/src/data_loading/test_data.py
@@ -43,7 +43,7 @@ def get_square_row(
     square_coords = [(0, 0), (1, 0), (1, 1), (0, 1), (0, 0)]
     # scaling up by square_side_len
     square_coords = [
-        tuple([coord * square_side_len for coord in coords]) for coords in square_coords
+        tuple(coord * square_side_len for coord in coords) for coords in square_coords
     ]
     # setting to origin coords
     square_coords = [[sum(x) for x in zip(coords, origin)] for coords in square_coords]

diff --git a/src/models/binary_graph_operations.py b/src/models/binary_graph_operations.py
@@ -0,0 +1,118 @@
+"""Contains tools for binary operations between GeoGraph objects"""
+from typing import List
+from numpy import ndarray
+from src.models.polygon_utils import (
+    connect_with_interior,
+    connect_with_interior_or_edge,
+    connect_with_interior_or_edge_or_corner,
+    connect_with_interior_bulk,
+    connect_with_interior_or_edge_bulk,
+    connect_with_interior_or_edge_or_corner_bulk,
+)
+
+# For switching identifiction mode in `identify_node`
+_SPATIAL_IDENTIFICATION_FUNCTION = {
+    "corner": connect_with_interior_or_edge_or_corner,
+    "edge": connect_with_interior_or_edge,
+    "interior": connect_with_interior,
+}
+_BULK_SPATIAL_IDENTIFICATION_FUNCTION = {
+    "corner": connect_with_interior_or_edge_or_corner_bulk,
+    "edge": connect_with_interior_or_edge_bulk,
+    "interior": connect_with_interior_bulk,
+}
+
+
+def identify_node(node: dict, other_graph: "GeoGraph", mode: str = "corner") -> ndarray:
+    """
+    Return list of all node ids in `other_graph` which identify with the given `node`.
+
+    Args:
+        node (dict): The node for which to find nodes in `other_graphs` that can be
+            identified with `node`.
+        other_graph (GeoGraph): The GeoGraph object in which to search for
+            identifications
+        mode (str, optional): Must be one of `corner`, `edge` or `interior`. Defaults
+            to "corner".
+            The different modes correspond to different rules for identification:
+            - corner: Polygons of the same `class_label` which overlap, touch in their
+                edges or corners will be identified with each other. (fastest)
+            - edge: Polygons of the same `class_label` which overlap or touch in their
+                edges will be identified with each other.
+            - interior: Polygons of the same `class_label` which overlap will be
+                identified with each other. Touching corners or edges are not counted.
+
+    Returns:
+        np.ndarray: List of node ids in `other_graph` which identify with `node`.
+    """
+
+    # Mode switch
+    assert mode in ["corner", "edge", "interior"]
+    have_valid_overlap = _BULK_SPATIAL_IDENTIFICATION_FUNCTION[mode]
+
+    # Extract relevant node elements for legibility
+    poly = node["geometry"]
+    label = node["class_label"]
+
+    # Get potential candidates for overlap
+    candidate_ids = other_graph.rtree.query(poly, sort=True)
+    # Filter candidates according to the same class label
+    # fmt: off
+    candidate_ids = candidate_ids[
+        other_graph._class_label(candidate_ids) == label  # pylint: disable=protected-access
+    ]
+    # Filter candidates accroding to correct spatial overlap
+    # fmt: on
+    candidate_ids = candidate_ids[
+        have_valid_overlap(
+            poly,
+            other_graph._geometry(candidate_ids),  # pylint: disable=protected-access
+        )
+    ]
+
+    return candidate_ids
+
+
+### Deprecated but kept for tests and backward compatibility
+def identify_node_old(
+    node: dict, other_graph: "GeoGraph", mode: str = "corner"
+) -> List[int]:
+    """
+    Return list of all node ids in `other_graph` which identify with the given `node`.
+
+    Args:
+        node (dict): The node for which to find nodes in `other_graphs` that can be
+            identified with `node`.
+        other_graph (GeoGraph): The GeoGraph object in which to search for
+            identifications
+        mode (str, optional): Must be one of `corner`, `edge` or `interior`. Defaults
+            to "corner".
+            The different modes correspond to different rules for identification:
+            - corner: Polygons of the same `class_label` which overlap, touch in their
+                edges or corners will be identified with each other. (fastest)
+            - edge: Polygons of the same `class_label` which overlap or touch in their
+                edges will be identified with each other.
+            - interior: Polygons of the same `class_label` which overlap will be
+                identified with each other. Touching corners or edges are not counted.
+
+    Returns:
+        List[int]: List of node ids in `other_graph` which identify with `node`.
+    """
+
+    # Mode switch
+    assert mode in ["corner", "edge", "interior"]
+    have_valid_overlap = _SPATIAL_IDENTIFICATION_FUNCTION[mode]
+
+    # Build list of nodes in `other_graph` which identify with `node`
+    identifies_with = []
+    for candidate_id in other_graph.rtree.intersection(node["geometry"].bounds):
+
+        candidate_node = other_graph.df.iloc[candidate_id]
+        have_same_class_label = node["class_label"] == candidate_node["class_label"]
+
+        if have_same_class_label and have_valid_overlap(
+            node["geometry"], candidate_node["geometry"]
+        ):
+            identifies_with.append(candidate_id)
+
+    return identifies_with
diff --git a/src/models/geograph.py b/src/models/geograph.py
@@ -3,6 +3,7 @@
 
 See https://networkx.org/documentation/stable/index.html for graph operations.
 """
+from __future__ import annotations
 import bz2
 import gzip
 import os
@@ -11,7 +12,7 @@
 from copy import deepcopy
 from dataclasses import dataclass
 from itertools import zip_longest
-from typing import Dict, List, Optional, Tuple, Union
+from typing import Dict, List, Optional, Tuple, Union, Sequence
 
 import geopandas as gpd
 import networkx as nx
@@ -21,6 +22,7 @@
 import shapely
 from shapely.prepared import prep
 from src.data_loading.rasterio_utils import polygonise
+from src.models.binary_graph_operations import identify_node
 from tqdm import tqdm
 
 VALID_EXTENSIONS = (
@@ -52,7 +54,7 @@ class GeoGraph:
     def __init__(
         self,
         data,
-        crs: Union[str, pyproj.CRS],
+        crs: Union[str, pyproj.CRS] = None,
         graph_save_path: Optional[Union[str, os.PathLike]] = None,
         raster_save_path: Optional[Union[str, os.PathLike]] = None,
         columns_to_rename: Optional[Dict[str, str]] = None,
@@ -193,6 +195,14 @@ def crs(self):
         """Return crs of dataframe."""
         return self.df.crs
 
+    def _class_label(self, node_ids: Sequence[int]):
+        """Return class label of `node_ids` directly from underlying numpy array"""
+        return self.df.class_label.values[node_ids]
+
+    def _geometry(self, node_ids: Sequence[int]):
+        """Return geometry of `node_ids` directly from underlying numpy array"""
+        return self.df.geometry.values[node_ids]
+
     def _load_from_vector_path(
         self,
         vector_path: pathlib.Path,
@@ -403,6 +413,8 @@ def _load_from_dataframe(
                 if polygon_id != neighbour_id:
                     self.graph.add_edge(polygon_id, neighbour_id)
 
+        # add index name
+        df.index.name = "node_index"
         return df
 
     def merge_nodes(
@@ -528,7 +540,7 @@ def add_habitat(
             # Query rtree for all polygons within `max_travel_distance` of the original
             for nbr in self.rtree.intersection(buff_poly_bounds):
                 # If a node is not a habitat class node, don't add the edge
-                if nbr in invalid_idx:
+                if nbr != node or nbr in invalid_idx:
                     continue
                 # Otherwise add the edge with distance attribute
                 nbr_polygon = polygons[nbr]
@@ -589,3 +601,8 @@ def get_graph_components(
         geom = [self.df.geometry.iloc[list(comp)].unary_union for comp in components]
         gdf = gpd.GeoDataFrame({"geometry": geom}, crs=self.df.crs)
         return gdf, components
+
+    def identify_node(
+        self, node_id: int, other_graph: GeoGraph, mode: str
+    ) -> List[int]:
+        return identify_node(self.df.iloc[node_id], other_graph=other_graph, mode=mode)
diff --git a/src/models/graph_tools.py b/src/models/graph_tools.py