Skip to content

Commit

Permalink
Merge pull request #28 from ai4er-cdt/feature/graph-analysis-identifi…
Browse files Browse the repository at this point in the history
…cation

Feature/graph analysis identification
  • Loading branch information
Croydon-Brixton committed Mar 3, 2021
2 parents de5470b + 29beff4 commit 7d937c4
Show file tree
Hide file tree
Showing 28 changed files with 6,465 additions and 103 deletions.
1,409 changes: 1,409 additions & 0 deletions notebooks/exploratory/svm-10-optimise-identify-node.ipynb

Large diffs are not rendered by default.

1,976 changes: 1,976 additions & 0 deletions notebooks/exploratory/svm-7-getting-to-know-graph.ipynb

Large diffs are not rendered by default.

2,488 changes: 2,488 additions & 0 deletions notebooks/exploratory/svm-8-generate-test-cases.ipynb

Large diffs are not rendered by default.

77 changes: 76 additions & 1 deletion src/data_loading/landcover_plot_utils.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,12 @@
"""A collection of utility functions for plotting landcover datasets"""
"""A collection of utility functions for plotting landcover datasets."""
from typing import Optional

import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
from matplotlib.patches import Patch
from numba import njit

from src.constants import ESA_LANDCOVER_DIR


Expand Down Expand Up @@ -71,3 +76,73 @@ def classes_to_rgb(
rgb_data[i, j, ...] = class_to_rgb[data[i, j]]

return rgb_data


def plot_landcover(
data: np.ndarray,
ax: Optional[plt.Axes] = None,
landcover_class_df: pd.DataFrame = ESA_CCI_CLASSES,
with_legend: bool = True,
) -> None:
"""
Plot array with landcover data with colors and legend.
Convenience function to plot landcover data with labels and RGB values in the
data`lddatar_class_df`. The format of that dataframe should be:
- Rows indexed by landcover class (int)
- Cdatacontaining the substring `label` must exist for plotting with legend
- Columns with names `R`, `G`, `B` must exist with integers corresponding to RGB
values for the landcover classes.
Args:
data (np.ndarray): The landcover data to plot. Values should be the landcover
classes.
ax (Optional[pldatas], optional): matplotlib.Axes object to add the plot to an
existing canvas. Defaults to None.
landcover_class_df (Optional[pd.DataFrame], optional): A dataframe containing
the RGB values to color each class with and a class label for each class.
Defaults to ESA_CCI_CLASSES.
with_legend (bool, optional): whether to add legend with class labels.
Defaults to True.
"""

# Remove unused dimensions
data = data.squeeze()
# Check right format
assert data.ndim == 2, "`image` must be 2 dimensional"

# Create canvas to plot onto, if needed
if not ax:
_, ax = plt.subplots(figsize=(15, 15))

# Plot image
ax.imshow(
classes_to_rgb(data, class_to_rgb=_class_rgb_array_from_df(landcover_class_df))
)

# Remove axes
ax.set_yticks([])
ax.set_xticks([])

# Create legend
if with_legend:
# Automatically identify label column name
label_colname = landcover_class_df.columns[
landcover_class_df.columns.str.contains("label", case=False)
][0]

# Filter only the landcover classes that appear in the image
present_classes = np.unique(data)

# Generate legend
legend_elements = []
for _, row in landcover_class_df.loc[present_classes].iterrows():
class_name = row[label_colname]
class_rgba = row[["R", "G", "B"]].values / 255.0 # plt wants values in 0-1
legend_elements.append(Patch(label=class_name, facecolor=class_rgba))
ax.legend(
handles=legend_elements,
bbox_to_anchor=(1.01, 1), # places legend to the right of the image
loc="upper left", # anchor for the bbox_to_anchor statement
prop={"size": 18}, # fontsize in legend
)
10 changes: 5 additions & 5 deletions src/data_loading/rasterio_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -115,9 +115,9 @@ def read_from_lat_lon(

def polygonise(
data_array: np.ndarray,
mask: Optional[np.ndarray],
transform: affine.Affine,
crs: Optional[str],
mask: Optional[np.ndarray] = None,
transform: affine.Affine = affine.identity,
crs: Optional[str] = None,
connectivity: int = 4,
apply_buffer: bool = True,
):
Expand Down Expand Up @@ -149,12 +149,12 @@ def polygonise(
Returns:
gpd.GeoDataFrame: GeoDataFrame containing polygon objects.
"""
poly_generator = shapes(
polygon_generator = shapes(
data_array, mask=mask, connectivity=connectivity, transform=transform
)
results = list(
{"properties": {"class_label": int(val)}, "geometry": shape}
for shape, val in poly_generator
for shape, val in polygon_generator
)
df = gpd.GeoDataFrame.from_features(results, crs=crs)
if apply_buffer:
Expand Down
2 changes: 1 addition & 1 deletion src/data_loading/test_data.py
Original file line number Diff line number Diff line change
Expand Up @@ -43,7 +43,7 @@ def get_square_row(
square_coords = [(0, 0), (1, 0), (1, 1), (0, 1), (0, 0)]
# scaling up by square_side_len
square_coords = [
tuple([coord * square_side_len for coord in coords]) for coords in square_coords
tuple(coord * square_side_len for coord in coords) for coords in square_coords
]
# setting to origin coords
square_coords = [[sum(x) for x in zip(coords, origin)] for coords in square_coords]
Expand Down
54 changes: 54 additions & 0 deletions src/models/binary_graph_operations.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,54 @@
"""Contains tools for binary operations between GeoGraph objects."""
from numpy import ndarray
from src.models.polygon_utils import (
connect_with_interior_bulk,
connect_with_interior_or_edge_bulk,
connect_with_interior_or_edge_or_corner_bulk,
)

# For switching identifiction mode in `identify_node`
_BULK_SPATIAL_IDENTIFICATION_FUNCTION = {
"corner": connect_with_interior_or_edge_or_corner_bulk,
"edge": connect_with_interior_or_edge_bulk,
"interior": connect_with_interior_bulk,
}


def identify_node(node: dict, other_graph: "GeoGraph", mode: str = "corner") -> ndarray:
"""
Return list of all node ids in `other_graph` which identify with the given `node`.
Args:
node (dict): The node for which to find nodes in `other_graphs` that can be
identified with `node`.
other_graph (GeoGraph): The GeoGraph object in which to search for
identifications
mode (str, optional): Must be one of `corner`, `edge` or `interior`. Defaults
to "corner".
The different modes correspond to different rules for identification:
- corner: Polygons of the same `class_label` which overlap, touch in their
edges or corners will be identified with each other. (fastest)
- edge: Polygons of the same `class_label` which overlap or touch in their
edges will be identified with each other.
- interior: Polygons of the same `class_label` which overlap will be
identified with each other. Touching corners or edges are not counted.
Returns:
np.ndarray: List of node ids in `other_graph` which identify with `node`.
"""
# Mode switch
assert mode in ["corner", "edge", "interior"]
have_valid_overlap = _BULK_SPATIAL_IDENTIFICATION_FUNCTION[mode]

# Get potential candidates for overlap
candidate_ids = other_graph.rtree.query(node["geometry"], sort=True)
# Filter candidates according to the same class label
candidate_ids = candidate_ids[
other_graph.class_label[candidate_ids] == node["class_label"]
]
# Filter candidates accroding to correct spatial overlap
candidate_ids = candidate_ids[
have_valid_overlap(node["geometry"], other_graph.geometry[candidate_ids])
]

return candidate_ids
75 changes: 59 additions & 16 deletions src/models/geograph.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,8 @@
See https://networkx.org/documentation/stable/index.html for graph operations.
"""
from __future__ import annotations

import bz2
import gzip
import os
Expand All @@ -20,7 +22,8 @@
import rasterio
import shapely
from shapely.prepared import prep
from src.data_loading.rasterio_utils import polygonise
from src.data_loading import rasterio_utils
from src.models import binary_graph_operations
from tqdm import tqdm

VALID_EXTENSIONS = (
Expand Down Expand Up @@ -52,7 +55,7 @@ class GeoGraph:
def __init__(
self,
data,
crs: Union[str, pyproj.CRS],
crs: Optional[Union[str, pyproj.CRS]] = None,
graph_save_path: Optional[Union[str, os.PathLike]] = None,
raster_save_path: Optional[Union[str, os.PathLike]] = None,
columns_to_rename: Optional[Dict[str, str]] = None,
Expand Down Expand Up @@ -116,7 +119,7 @@ def __init__(
super().__init__()
self.graph = nx.Graph()
self._habitats: Dict[str, Habitat] = {}
self._crs: Union[str, pyproj.CRS] = crs
self._crs: Optional[Union[str, pyproj.CRS]] = crs
self._columns_to_rename: Optional[Dict[str, str]] = columns_to_rename
self._tolerance: float = tolerance

Expand Down Expand Up @@ -193,6 +196,22 @@ def crs(self):
"""Return crs of dataframe."""
return self.df.crs

@property
def class_label(self):
"""Return class label of nodes directly from underlying numpy array.
Note: Uses `iloc` type indexing.
"""
return self.df.class_label.values

@property
def geometry(self):
"""Return geometry of nodes from underlying numpy array.
Note: Uses `iloc` type indexing.
"""
return self.df.geometry.values

def _load_from_vector_path(
self,
vector_path: pathlib.Path,
Expand Down Expand Up @@ -269,12 +288,13 @@ def _load_from_raster(
Returns:
gpd.GeoDataFrame: The dataframe containing polygon objects.
"""
vector_df = polygonise(data_array=data_array, **raster_kwargs)
vector_df = rasterio_utils.polygonise(data_array=data_array, **raster_kwargs)
if save_path is not None:
if save_path.suffix == ".gpkg":
vector_df.to_file(save_path, driver="GPKG")
else:
vector_df.to_file(save_path, driver="ESRI Shapefile")
save_path.chmod(0o664)
return self._load_from_dataframe(vector_df, tolerance=self._tolerance)

def _load_from_graph_path(self, graph_path: pathlib.Path) -> gpd.GeoDataFrame:
Expand Down Expand Up @@ -319,6 +339,7 @@ def _save_graph(self, save_path: pathlib.Path) -> None:
else:
with open(save_path, "wb") as file:
pickle.dump(data, file)
save_path.chmod(0o664)

def _load_from_dataframe(
self,
Expand Down Expand Up @@ -403,6 +424,8 @@ def _load_from_dataframe(
if polygon_id != neighbour_id:
self.graph.add_edge(polygon_id, neighbour_id)

# add index name
df.index.name = "node_index"
return df

def merge_nodes(
Expand Down Expand Up @@ -502,17 +525,26 @@ def add_habitat(
# Remove all edges in the graph, then at the end we only have edges
# between nodes less than `max_travel_distance` apart
hgraph.clear_edges()
# Get lists of polygons and buff polygons to avoid repeatedly querying
# the dataframe
polygons: List[shapely.Polygon] = self.df.geometry.tolist()
# Get dict to convert between iloc indexes and loc indexes
# These are different only if nodes have been removed from the df
idx_dict: Dict[int, int] = dict(zip(range(len(self.df)), self.df.index.values))
# Get dicts of polygons and buff polygons to avoid repeatedly querying
# the dataframe. These dicts accept loc indexes
polygons: Dict[int, shapely.Polygon] = self.df.geometry.to_dict()
if max_travel_distance > 0:
# Vectorised buffer on the entire df to calculate the expanded polygons
# used to get intersections.
buff_polygons = self.df.geometry.buffer(max_travel_distance).tolist()
buff_polygons = self.df.geometry.buffer(max_travel_distance).to_dict()
# Remove non-habitat nodes from habitat graph
invalid_idx = set(
self.df.loc[~self.df["class_label"].isin(set(valid_classes))].index
)
# np.where is very fast here and gets the iloc based indexes
# Combining it with the set comprehension reduces time by an order of
# magnitude compared to `set(self.df.loc()`
invalid_idx = {
idx_dict[i]
for i in np.where(~self.df["class_label"].isin(set(valid_classes)).values)[
0
]
}
hgraph.remove_nodes_from(invalid_idx)

for node in tqdm(
Expand All @@ -527,16 +559,20 @@ def add_habitat(
buff_poly = prep(polygon)
# Query rtree for all polygons within `max_travel_distance` of the original
for nbr in self.rtree.intersection(buff_poly_bounds):
# Necessary to correct for the rtree returning iloc indexes
nbr = idx_dict[nbr]
# If a node is not a habitat class node, don't add the edge
if nbr in invalid_idx:
if nbr == node or nbr in invalid_idx:
continue
# Otherwise add the edge with distance attribute
nbr_polygon = polygons[nbr]
if not hgraph.has_edge(node, nbr) and buff_poly.intersects(nbr_polygon):
if add_distance:
hgraph.add_edge(
node, nbr, distance=polygon.distance(nbr_polygon)
)
if max_travel_distance == 0:
dist = 0.0
else:
dist = polygon.distance(nbr_polygon)
hgraph.add_edge(node, nbr, distance=dist)
else:
hgraph.add_edge(node, nbr)
# Add habitat to habitats dict
Expand Down Expand Up @@ -586,6 +622,13 @@ def get_graph_components(
graph components.
"""
components: List[set] = list(nx.connected_components(graph))
geom = [self.df.geometry.iloc[list(comp)].unary_union for comp in components]
geom = [self.df.geometry.loc[comp].unary_union for comp in components]
gdf = gpd.GeoDataFrame({"geometry": geom}, crs=self.df.crs)
return gdf, components

def identify_node(
self, node_id: int, other_graph: GeoGraph, mode: str
) -> List[int]:
return binary_graph_operations.identify_node(
self.df.loc[node_id], other_graph=other_graph, mode=mode
).tolist()

0 comments on commit 7d937c4

Please sign in to comment.