# Walkthrough Notebook

Notes:

data["geometry"][10] is a good counterexample, on LHS issue of needing to clock going OUT but current method does not.

data["geometry"][257] (Heyroyd) is weird and shows issues with OSM. There is something that SHOULD be a residential area at the top, but the line goes straight through. Just grey "nothing" - no tags. This is the base layer so has small borders around everything, meaning it will be hard to select 'all that does not have a tag' as a polygon in and of itself. Maybe SAM could work, but this is getting difficult now.

In [None]:
import urllib

import geopandas as gpd
import numpy as np
import osmnx as ox
import plotly.graph_objects as go
import polars as pl
import requests
from brdr.aligner import Aligner
from brdr.enums import OpenbaarDomeinStrategy
from brdr.loader import DictLoader
from shapely import LineString, MultiLineString, MultiPolygon, Polygon
from shapely.ops import unary_union
from shapely.wkt import loads

In [None]:
datasette_base_url = "https://datasette.planning.data.gov.uk/conservation-area.csv"

query = """
select * 
from entity
"""
encoded_query = urllib.parse.urlencode({"sql": query})

r = requests.get(f"{datasette_base_url}?{encoded_query}", auth=("user", "pass"))

filename = "datasette_data.csv"
with open(filename, "wb") as f_out:
    f_out.write(r.content)

data = pl.read_csv(filename)

In [None]:
def download_osm_polygons(
    polygon_for_osm,
    osm_tags,
    osm_query_crs,
    brdr_crs,
    line_buffer,
):
    # Download Open Street Map polygons
    osm_features_df = gpd.GeoDataFrame(geometry=[], crs=osm_query_crs)
    osm_features_base = ox.features_from_polygon(polygon_for_osm, osm_tags)
    # Check of standard formats.
    osm_features_df = osm_features_base[
        osm_features_base.geometry.geom_type.isin(
            ["Polygon", "MultiPolygon", "LineString"]
        )
    ]
    # This ensures that LineStrings are also interpreted as v.small polygons - shortcut for all in one solution.
    osm_features_df = osm_features_df.to_crs(brdr_crs)
    line_mask = osm_features_df["geometry"].geom_type == "LineString"
    osm_features_df.loc[line_mask, "geometry"] = osm_features_df.loc[
        line_mask, "geometry"
    ].buffer(line_buffer)

    # Buffer of zero is to ensure it is a GeoPandas series, rather than a df with other args.
    osm_features_df = osm_features_df.buffer(0)

    # Align OSM co-ordinate systems
    print(f"Downloaded {len(osm_features_df)} polygons.")

    # Final check for crs matching - maybe not necessary
    # FUTURE OLI LOOK AT THIS
    if osm_features_df.crs == brdr_crs:
        osm_features_proj = osm_features_df
    else:
        osm_features_proj = osm_features_df.to_crs(brdr_crs)
    return osm_features_proj

In [None]:
def process_osm_polygons(osm_features_proj, used_osm_indices):
    reference_geom = None
    geometries_to_combine = None

    if used_osm_indices is None:
        # Here we use ALL downloaded features
        print("Using all polygons.")
        geometries_to_combine = osm_features_proj.geometry
    elif isinstance(used_osm_indices, list) and used_osm_indices:
        # Here we only use features at specific indices
        print(f"Selecting polygons at indices {used_osm_indices}.")
        geometries_to_combine = osm_features_proj.iloc[used_osm_indices].geometry

    # Combine all the geometries we need
    combined_reference = unary_union(geometries_to_combine)
    if not combined_reference.is_empty:
        reference_geom = combined_reference
    else:
        print("Empty osm polygons when combining")
    return reference_geom

In [None]:
def get_snapped_polygon(
    reference_geom,
    original_proj,
    brdr_distance,
    brdr_strategy,
    brdr_threshold,
    brdr_crs,
):
    aligned_df = None
    thematic_geom = original_proj["geometry"].iloc[0]
    if not thematic_geom.is_valid:
        # Fix for funky geoms.
        thematic_geom = thematic_geom.buffer(0)

    # Alignment logic
    aligner = Aligner(crs=brdr_crs)
    loader_thematic = DictLoader({"theme_id_1": thematic_geom})
    aligner.load_thematic_data(loader_thematic)
    loader_reference = DictLoader({"ref_id_1": reference_geom})
    aligner.load_reference_data(loader_reference)

    # Perform alignment
    process_result = aligner.process(
        relevant_distance=brdr_distance,
        od_strategy=brdr_strategy,
        threshold_overlap_percentage=brdr_threshold,
    )

    aligned_geom = process_result["theme_id_1"][brdr_distance]["result"]
    diff_geom = process_result["theme_id_1"][brdr_distance]["result_diff"]

    # Weird geom fixes
    if not aligned_geom.is_valid:
        aligned_geom = aligned_geom.buffer(0)

    aligned_df = gpd.GeoDataFrame([1], geometry=[aligned_geom], crs=brdr_crs)

    if not diff_geom.is_valid:
        diff_geom = diff_geom.buffer(0)

    diff_df = gpd.GeoDataFrame([1], geometry=[diff_geom], crs=brdr_crs)

    return aligned_df, diff_df, process_result

In [None]:
def process_areas(
    original_wkt: str,
    initial_crs: str = "EPSG:4326",
    osm_tags: dict | None = None,
    brdr_distance: float = 20,
    brdr_threshold: float = 10,
    brdr_strategy=OpenbaarDomeinStrategy.SNAP_ONLY_VERTICES,
    brdr_crs: str = "EPSG:3857",
    osm_query_crs: str = "EPSG:4326",
    used_osm_indices: list | None = None,
    polygon_detection_buffer: float = 0.00001,
    line_buffer: float = 0.00001,
):

    # Load datasette polygon for area
    original_geom = loads(original_wkt)
    original_df = gpd.GeoDataFrame([1], geometry=[original_geom], crs=initial_crs)
    adding_buffer_df = original_df.to_crs(brdr_crs).boundary.buffer(
        polygon_detection_buffer
    )
    df_for_query = adding_buffer_df.to_crs(osm_query_crs)

    # Need to add a small buffer to ensure that all nearby features are captured. May need testing.
    polygon_for_osm = df_for_query.geometry.iloc[0]
    original_proj = original_df.to_crs(brdr_crs)

    # # Download Open Street Map polygons
    try:
        osm_features_proj = download_osm_polygons(
            polygon_for_osm=polygon_for_osm,
            osm_tags=osm_tags,
            osm_query_crs=osm_query_crs,
            brdr_crs=brdr_crs,
            line_buffer=line_buffer,
        )
    except Exception as e:
        print(f"Error: {e}")
        print("Returning 'None'")
        return None

    # Prep features
    reference_geom = process_osm_polygons(osm_features_proj, used_osm_indices)

    # Brdr for snapping polygons
    aligned_df, diff_df, process_result = get_snapped_polygon(
        reference_geom=reference_geom,
        original_proj=original_proj,
        brdr_distance=brdr_distance,
        brdr_strategy=brdr_strategy,
        brdr_threshold=brdr_threshold,
        brdr_crs=brdr_crs,
    )
    # Prep all the borders needed for plotting in meter based crs.
    # Note, some to_crs functions could definitely be simplified here, just wanted to be 100% sure.

    # original_border = original_df["geometry"].to_crs("EPSG:3857")[0]
    original_border = original_df["geometry"].to_crs("EPSG:4326")[0]
    # new_border = aligned_df["geometry"][0]
    new_border = aligned_df["geometry"].to_crs("EPSG:4326")[0]
    red_area_calcs = [
        geom.area
        for geom in MultiPolygon(list(osm_features_proj.explode()))
        .intersection(diff_df["geometry"])
        .explode()
        if geom.area >= 100
    ]
    base_features = MultiPolygon(list(osm_features_proj.explode().to_crs("EPSG:4326")))
    difference_area = base_features.intersection(
        diff_df["geometry"].to_crs("EPSG:4326")
    )
    difference_area = difference_area.explode()
    difference_area = difference_area[
        difference_area.geometry.geom_type.isin(["Polygon", "MultiPolygon"])
    ]
    difference_area = MultiPolygon(list(difference_area))

    # Print relevant areas.
    print(f"Areas of red areas over 100m^2: {red_area_calcs}")
    print(
        f"Ratio of red areas in total area as percentage: {100*difference_area.area/new_border.area}%"
    )

    return original_border, new_border, difference_area, base_features

In [None]:
def polygon_prep(
    polygon,
):
    # Need to convert from crs to co-ords for this plotly method
    lons = []
    lats = []
    # Shortcut to avoid separate functions
    try:
        if polygon.geom_type == "Polygon":
            polygon = MultiPolygon([polygon])
    except:
        pass
    for poly in polygon.geoms:
        boundary = poly.boundary
        if isinstance(boundary, MultiLineString):
            boundary = boundary.geoms[0]
        x_coords, y_coords = boundary.coords.xy
        lon, lat = x_coords, y_coords
        lons += list(lon)
        lats += list(lat)
        # Need to add separator for multiple polygons
        lons.append(None)
        lats.append(None)

    return lons, lats

In [None]:
def plot_area_with_sliders(
    original_border,
    new_border,
    difference_area,
    diff_rgb,
    base_features,
    base_rgb,
    alpha,
    area_name,
):
    diff_lons, diff_lats = polygon_prep(difference_area)
    feature_lons, feature_lats = polygon_prep(base_features)

    original_lons, original_lats = polygon_prep(original_border)
    new_lons, new_lats = polygon_prep(new_border)

    # Find centre for plotting
    boundary_centre = original_border.centroid
    centre_lon, centre_lat = boundary_centre.x, boundary_centre.y

    # Get colours and adjust alphas
    diff_fill_colour = f"rgba({diff_rgb[0]}, {diff_rgb[1]}, {diff_rgb[2]}, {alpha})"
    diff_line_colour = (
        f"rgba({diff_rgb[0]}, {diff_rgb[1]}, {diff_rgb[2]}, {min(alpha+0.1, 1)})"
    )
    feature_fill_colour = f"rgba({base_rgb[0]}, {base_rgb[1]}, {base_rgb[2]}, {alpha})"
    feature_line_colour = (
        f"rgba({base_rgb[0]}, {base_rgb[1]}, {base_rgb[2]}, {min(alpha+0.1, 1)})"
    )

    # Prep dicts for plotly alpha sliders - steps of 0.1
    diff_steps = []
    feature_steps = []
    alphas = np.linspace(0, 1, 11)
    for _, alpha_step in enumerate(alphas):
        alpha_step = round(alpha_step, 2)
        diff_step_color = (
            f"rgba({diff_rgb[0]}, {diff_rgb[1]}, {diff_rgb[2]}, {alpha_step})"
        )
        feature_step_color = (
            f"rgba({base_rgb[0]}, {base_rgb[1]}, {base_rgb[2]}, {alpha_step})"
        )

        diff_step = dict(
            method="restyle",
            args=[{"fillcolor": [diff_step_color]}, [0]],
            label=str(alpha_step),
        )
        diff_steps.append(diff_step)

        feature_step = dict(
            method="restyle",
            args=[{"fillcolor": [feature_step_color]}, [1]],
            label=str(alpha_step),
        )
        feature_steps.append(feature_step)

    diff_sliders = [
        dict(
            active=3,
            currentvalue={"prefix": "Added Area alpha: ", "visible": True},
            pad={"t": 20},
            steps=diff_steps,
        )
    ]

    feature_sliders = [
        dict(
            active=3,
            currentvalue={"prefix": "Base Features alpha: ", "visible": True},
            pad={"t": 120},
            steps=feature_steps,
        )
    ]

    # Start base plots and figure
    fig = go.Figure()
    fig.add_trace(
        go.Scattermap(
            lon=diff_lons,
            lat=diff_lats,
            mode="lines",
            fill="toself",
            fillcolor=diff_fill_colour,
            line=dict(color=diff_line_colour, width=1),
            name="Concerning Areas",
            showlegend=True,
        )
    )

    fig.add_trace(
        go.Scattermap(
            lon=feature_lons,
            lat=feature_lats,
            mode="lines",
            fill="toself",
            fillcolor=feature_fill_colour,
            line=dict(color=feature_line_colour, width=1),
            name="Base Features",
            showlegend=True,
        )
    )

    # Now plot boundary lines
    fig.add_trace(
        go.Scattermap(
            lon=original_lons,
            lat=original_lats,
            mode="lines",
            fill="none",
            line=dict(color="black", width=3),
            name="Original Boundary",
            showlegend=True,
        )
    )

    fig.add_trace(
        go.Scattermap(
            lon=new_lons,
            lat=new_lats,
            mode="lines",
            fill="none",
            line=dict(color="red", width=1),
            name="New Boundary",
            showlegend=True,
        )
    )

    # Zoom in for plot - gets in close to area
    initial_zoom = 15

    # Satellite works differently - plotly in-built in 'style' arg doesn't work when zoomed in
    satellite_layer = [
        dict(
            below="traces",
            sourcetype="raster",
            source=[
                "https://server.arcgisonline.com/ArcGIS/rest/services/World_Imagery/MapServer/tile/{z}/{y}/{x}"
            ],
        )
    ]

    # Button for changing styles
    map_switch = [
        dict(
            args=[{"map.style": "open-street-map", "map.layers": None}],
            label="OSM",
            method="relayout",
        ),
        dict(
            args=[{"map.style": None, "map.layers": satellite_layer}],
            label="Satellite",
            method="relayout",
        ),
    ]

    # Add formatting changes with sliders and button
    fig.update_layout(
        title_text=f"Conservation area anomalies in {area_name}",
        geo_scope="europe",
        map=dict(
            style="open-street-map",
            center=dict(lon=centre_lon, lat=centre_lat),
            zoom=initial_zoom,
        ),
        showlegend=True,
        margin={"r": 100, "t": 50, "l": 100, "b": 50},
        sliders=diff_sliders + feature_sliders,
        height=800,
        width=1000,
        updatemenus=[
            dict(
                type="buttons",
                direction="left",
                buttons=map_switch,
                pad={"r": 10, "t": 10},
                showactive=True,
                x=0.7,
                xanchor="right",
                y=1.15,
                yanchor="top",
            ),
        ],
    )

    fig.show()

In [None]:
# Customise tags for which areas you want to specify for
# Prominent and common useful ones below
# May want to go one by one for best results
input_tags = {
    "landuse": ["residential", "farmyard", "cemetery", "allotments"],
    "natural": ["wood", "grassland", "meadow"],
    # 'landuse': ['farmyard'],
    # 'landuse': ['farmland'],
    # 'landuse': ['residential'],
    # 'waterway': ['drain'],
    # "highway": ['primary', 'secondary', 'unclassified', 'track']
}

input_brdr_distance = 20  # distance that we can snap to polygons within
input_brdr_threshold = 1  # parameter to tune sensitivity - lower is more sensitive
input_brdr_strategy = (
    OpenbaarDomeinStrategy.SNAP_PREFER_VERTICES
)  # strategies for how to snap to polygons
# input_brdr_strategy = OpenbaarDomeinStrategy.SNAP_ONLY_VERTICES
# input_brdr_strategy = OpenbaarDomeinStrategy.SNAP_ALL_SIDE
initial_crs = "EPSG:4326"  # CRS from datasette
brdr_crs = "EPSG:3857"  # CRS for brdr, with accurate meter interpretable distances
target_osm_crs = "EPSG:4326"  # CRS for osm input polygon
data_index = 4  # Index for this test case run
used_osm_indices = (
    None  # If wanting to test specific indices of polygons - moreso for testing
)
line_buffer = 10  # Buffer for width of line in meters, this is radius of circle generated at each point
polygon_detection_buffer = (
    1  # Detection distance for polygons on either side of actual boundary in meters
)

areas_tuple = process_areas(
    original_wkt=data["geometry"][data_index],
    initial_crs=initial_crs,
    osm_tags=input_tags,
    brdr_distance=input_brdr_distance,
    brdr_threshold=input_brdr_threshold,
    brdr_strategy=input_brdr_strategy,
    brdr_crs=brdr_crs,
    osm_query_crs=target_osm_crs,
    used_osm_indices=used_osm_indices,
    line_buffer=line_buffer,
    polygon_detection_buffer=polygon_detection_buffer,
)

if areas_tuple:
    original_border, new_border, difference_area, base_features = areas_tuple

A really good example is Sleapshyde. Note, if the distance is upped, say to 100, then the long area at the bottom is highlighted when it probably shouldn't be. Maybe this is a nice highlight as the line through the wood seems somewhat arbitrary, but setting the dist to, say, 20, gives nice clean highlights for the two main issues.

In [None]:
plot_area_with_sliders(
    original_border,
    new_border,
    difference_area,
    (255, 0, 0),  # Red
    base_features,
    (0, 0, 255),  # Blue
    0.3,  # Initial alpha value
    data["name"][data_index],
)

In [None]:
input_tags = {
    "landuse": ["residential", "farmyard", "cemetery", "allotments"],
    "natural": ["wood", "grassland", "meadow"],
}

input_brdr_distance = 20  # distance that we can snap to polygons within
input_brdr_threshold = 1  # parameter to tune sensitivity - lower is more sensitive
input_brdr_strategy = (
    OpenbaarDomeinStrategy.SNAP_PREFER_VERTICES
)  # strategies for how to snap to polygons
initial_crs = "EPSG:4326"  # CRS from datasette
brdr_crs = "EPSG:3857"  # CRS for brdr, with accurate meter interpretable distances
target_osm_crs = "EPSG:4326"  # CRS for osm input polygon
data_index = 257  # Index for this test case run
used_osm_indices = (
    None  # If wanting to test specific indices of polygons - moreso for testing
)
line_buffer = 10  # Buffer for width of line in meters, this is radius of circle generated at each point
polygon_detection_buffer = (
    1  # Detection distance for polygons on either side of actual boundary in meters
)

heyroyd_areas_tuple = process_areas(
    original_wkt=data["geometry"][data_index],
    initial_crs=initial_crs,
    osm_tags=input_tags,
    brdr_distance=input_brdr_distance,
    brdr_threshold=input_brdr_threshold,
    brdr_strategy=input_brdr_strategy,
    brdr_crs=brdr_crs,
    osm_query_crs=target_osm_crs,
    used_osm_indices=used_osm_indices,
    line_buffer=line_buffer,
    polygon_detection_buffer=polygon_detection_buffer,
)

if heyroyd_areas_tuple:
    original_border, new_border, difference_area, base_features = heyroyd_areas_tuple

Example below has a clear issue on OSM - the left hand side at the top corner clearly goes through houses when looking at the satellite images, but the area is generic with no tags. Although there is a residential area below which we would ideally want to snap to, because there is not a polygon on <i>both sides</i>, the model struggles to highlight the area as there is no overlap with an area of interest. This is also looks very wrong - setting the distance to e.g. 200 snaps to some of the top of the residential area, but again as the field is not a polygon, it is not highlighted as added area.

This could potentially be solved with SAM, then checking the overlap of SAM generated polygons with existing, then if little to no overlap (as we would expect with the grey 'nothing' area), then create that as a tag: generic polygon or something.

In [None]:
plot_area_with_sliders(
    original_border,
    new_border,
    difference_area,
    (255, 0, 0),  # Red
    base_features,
    (0, 0, 255),  # Blue
    0.3,  # Initial alpha value
    data["name"][data_index],
)

In [None]:
input_tags = {
    "landuse": ["residential", "farmyard", "cemetery", "allotments"],
    "natural": ["wood", "grassland", "meadow"],
}

input_brdr_distance = 200  # distance that we can snap to polygons within
input_brdr_threshold = 10  # parameter to tune sensitivity - lower is more sensitive
input_brdr_strategy = (
    OpenbaarDomeinStrategy.SNAP_PREFER_VERTICES
)  # strategies for how to snap to polygons
initial_crs = "EPSG:4326"  # CRS from datasette
brdr_crs = "EPSG:3857"  # CRS for brdr, with accurate meter interpretable distances
target_osm_crs = "EPSG:4326"  # CRS for osm input polygon
data_index = 0  # Index for this test case run
used_osm_indices = (
    None  # If wanting to test specific indices of polygons - moreso for testing
)
line_buffer = 10  # Buffer for width of line in meters, this is radius of circle generated at each point
polygon_detection_buffer = (
    1  # Detection distance for polygons on either side of actual boundary in meters
)

napsbury_areas_tuple = process_areas(
    original_wkt=data["geometry"][data_index],
    initial_crs=initial_crs,
    osm_tags=input_tags,
    brdr_distance=input_brdr_distance,
    brdr_threshold=input_brdr_threshold,
    brdr_strategy=input_brdr_strategy,
    brdr_crs=brdr_crs,
    osm_query_crs=target_osm_crs,
    used_osm_indices=used_osm_indices,
    line_buffer=line_buffer,
    polygon_detection_buffer=polygon_detection_buffer,
)

if napsbury_areas_tuple:
    original_border, new_border, difference_area, base_features = napsbury_areas_tuple

Napsbury is a weird one. By setting the dist relatively high (200m) we highlight some real issues. Referring to the document_url, there should not be conservation area lines through the housing areas. It is odd that brdr has chosen to align on the inside in this case, but it still highlights that something is wrong - when brdr is confused, even if it doesn't get it right, it is still useful to indicate things are not cut and dry! Setting the distance lower, say to 20, results in worse detection due to the scale of how wrong this conservation area is.

In [None]:
plot_area_with_sliders(
    original_border,
    new_border,
    difference_area,
    (255, 0, 0),  # Red
    base_features,
    (0, 0, 255),  # Blue
    0.3,  # Initial alpha value
    data["name"][data_index],
)