In [None]:
import sys
import pandas as pd
sys.path.insert(1,'C:\R-Packages')

import rasterio
from rasterio import features
from rasterio.mask import mask
from rasterio.crs import CRS
from shapely.geometry import mapping
from shapely.geometry import shape
import numpy as np
import geopandas as gpd

# File 
chm_raster = r"\\forestresearch.gov.uk\shares\IFOS\Forest Inventory\0700_NonCore_Funded\0726_TOW_Wales\04_Spatial Analysis\3_Test_Square\SS79_CHM_2020_22.tif"
chm_raster_25 = r"\\forestresearch.gov.uk\shares\IFOS\Forest Inventory\0700_NonCore_Funded\0726_TOW_Wales\04_Spatial Analysis\3_Test_Square\SS79_CHM_2020_22_2.5thresh.tif"
OSMM_path = r"\\forestresearch.gov.uk\shares\IFOS\Forest Inventory\0700_NonCore_Funded\0726_TOW_Wales\04_Spatial Analysis\3_Test_Square\OSMM_Square_SS79\OSMM_Square_SS79.shp"
chm_masked = r"\\forestresearch.gov.uk\shares\IFOS\Forest Inventory\0700_NonCore_Funded\0726_TOW_Wales\04_Spatial Analysis\3_Test_Square\SS79_CHM_2.5_masked.tif"
chm_masked_NFI = r"\\forestresearch.gov.uk\shares\IFOS\Forest Inventory\0700_NonCore_Funded\0726_TOW_Wales\04_Spatial Analysis\3_Test_Square\SS79_CHM_2.5_masked_NFI.tif"
NFI_woodland = r"\\forestresearch.gov.uk\shares\IFOS\Forest Inventory\0700_NonCore_Funded\0726_TOW_Wales\04_Spatial Analysis\1_Reference_Data\6_Wales_NFI_2023\NFI_Wales_2023_WoodlandOnly.gpkg"

In [None]:
#Apply 2.5m threshold to CHM

with rasterio.open(chm_raster) as src:
    # Read raster data
    raster_data = src.read(1)  

    # Apply threshold:
    threshold_value = 2.5
    raster_data[raster_data < threshold_value] = np.nan  # You can use np.nan or a specific NoData value

    # Update the raster metadata
    meta = src.meta
    meta.update(dtype=rasterio.float32, nodata=np.nan)
    
# Write 2.5m CHM output raster with CRS
with rasterio.open(chm_raster_25, 'w', **meta) as dst:
    dst.write(raster_data, 1)

print("CHM thresholded to 2.5m")  

In [None]:
#Categorise OSMM

OSMM = gpd.read_file(OSMM_path) 

#Update features with blank 'descterm' field

OSMM["descterm"].replace('', np.nan, inplace=True)

for value in OSMM["descgroup"].unique():
    OSMM.loc[
        (OSMM["descterm"].isna()) & (OSMM["descgroup"] == value),
        "descterm"
    ] = value

#Create Surf_Obj field
OSMM["Surf_Obj"] = None  # or np.nan

#Define 'tree', surface' and 'mask' terms

tree_terms = ["Agricultural Land", "Aqueduct,Canal", "Aqueduct,Scrub", "Boulders", "Boulders (Scattered),Coniferous Trees,Nonconiferous Trees,Scrub", "Boulders (Scattered),Nonconiferous Trees", "Boulders (Scattered),Rough Grassland,Scrub", "General Surface", "Bridge,Nonconiferous Trees", "Bridge,Nonconiferous Trees,Scrub", "Bridge,Rough Grassland", "Bridge,Step", "Canal", "Cliff", "Collects", "Coniferous Trees", "Coniferous Trees (Scattered)", "Coniferous Trees (Scattered),Nonconiferous Trees (Scattered)", "Coniferous Trees (Scattered),Nonconiferous Trees (Scattered),Scrub", "Coniferous Trees (Scattered),Rough Grassland", "Coniferous Trees (Scattered),Rough Grassland,Scrub", "Coniferous Trees (Scattered),Scrub", "Coniferous Trees,Mineral Workings (Inactive)", "Coniferous Trees,Nonconiferous Trees", "Coniferous Trees,Nonconiferous Trees,Scrub", "Coniferous Trees,Scrub", "Coniferous Trees,Scrub,Nonconiferous Trees", "Coniferous Trees,Static Water", "Coppice Or Osiers,Nonconiferous Trees", "Drain", "Ford", "Fountain", "Gas Governor", "Heath", "Heath,Nonconiferous Trees (Scattered)", "Heath,Nonconiferous Trees (Scattered),Rough Grassland", "Heath,Rough Grassland", "Heath,Rough Grassland,Scrub", "Inland Water", "Landform", "Marsh", "Marsh,Nonconiferous Trees", "Marsh,Nonconiferous Trees,Scrub", "Marsh,Rough Grassland", "Marsh,Rough Grassland,Scrub", "Marsh,Scrub", "Mineral Workings", "Mineral Workings (Inactive)", "Mineral Workings (Inactive),Nonconiferous Trees", "Mineral Workings (Inactive),Nonconiferous Trees,Scrub", "Mineral Workings (Inactive),Slope", "Mud", "Multi Surface", "Nonconiferous Trees", "Nonconiferous Trees (Scattered)", "Nonconiferous Trees (Scattered),Heath", "Nonconiferous Trees (Scattered),Rough Grassland", "Nonconiferous Trees (Scattered),Rough Grassland,Scrub", "Nonconiferous Trees (Scattered),Scrub", "Nonconiferous Trees (Scattered),Scrub,Rough Grassland", "Nonconiferous Trees,Boulders", "Nonconiferous Trees,Coniferous Trees", "Nonconiferous Trees,Coniferous Trees,Scrub", "Nonconiferous Trees,Coppice Or Osiers", "Nonconiferous Trees,Scrub", "Nonconiferous Trees,Scrub,Coniferous Trees", "Orchard", "Path", "Path,Structure", "Rail", "Reeds,Static Water", "Reeds,Watercourse", "Reservoir", "Road Or Track", "Roadside", "Rock", "Rough Grassland", "Rough Grassland,Boulders", "Rough Grassland,Boulders,Heath", "Rough Grassland,Coniferous Trees (Scattered)", "Rough Grassland,Heath", "Rough Grassland,Nonconiferous Trees (Scattered)", "Rough Grassland,Sand", "Rough Grassland,Scrub", "Rough Grassland,Scrub,Boulders (Scattered)", "Rough Grassland,Scrub,Nonconiferous Trees (Scattered)", "Rough Grassland,Scrub,Spoil Heap (Inactive)", "Scrub", "Scrub,Coniferous Trees", "Scrub,Coniferous Trees,Nonconiferous Trees", "Scrub,Nonconiferous Trees", "Scrub,Nonconiferous Trees (Scattered)", "Scrub,Nonconiferous Trees,Coniferous Trees", "Scrub,Rough Grassland", "Scrub,Rough Grassland,Nonconiferous Trees (Scattered)", "Scrub,Spoil Heap (Inactive)", "Shingle", "Sinks", "Slipway", "Slope", "Sloping Masonry", "Spring", "Static Water", "Step", "Tidal Water", "Track", "Traffic Calming", "Unclassified", "Watercourse", "Waterfall", "Weir"]
OSMM.loc[OSMM["descterm"].isin(tree_terms), "Surf_Obj"] = "No"

surface_terms = ["Archway", "Bridge", "Building","Building,Structure", "Chimney", "Conveyor", "Conveyor,Overhead Construction", "Electricity Sub Station", "Footbridge", "Footbridge,Step", "Gantry", "General Surface,Structure", "General Surface,Rail", "General Surface,Roadside,Structure", "Glasshouse", "Level Crossing", "Public Convenience", "Pylon", "Rail Signal Gantry", "Rail,Structure", "Roadside,Structure", "Structure", "Structure,Inland Water", "Tank"]
OSMM.loc[OSMM["descterm"].isin(surface_terms), "Surf_Obj"] = "Yes"

mask_terms = ["Boulders,Foreshore", "Foreshore", "Foreshore,Mud", "Foreshore,Mud,Sand", "Foreshore,Mud,Shingle", "Foreshore,Saltmarsh", "Foreshore,Sand", "Foreshore,Shingle", "Foreshore,Slipway", "Foreshore,Sloping Masonry", "Mud,Sand", "Sand", "Swimming Pool"]
OSMM.loc[OSMM["descterm"].isin(mask_terms), "Surf_Obj"] = "Mask"

# Save updated OSMM file
OSMM.to_file(OSMM_path)

print("OSMM_Square_SS79.shp updated with Surf_Obj field")

In [None]:
# Mask out non-tree OSMM features from CHM

# Get CHM CRS
with rasterio.open(chm_raster_25) as src:
    raster_crs = src.crs
    transform = src.transform
    out_shape = (src.height, src.width)
    meta = src.meta.copy()
    
    # Buffer and reproject surface_terms
    surface_gdf = OSMM[OSMM["Surf_Obj"] == "Yes"].copy()
    surface_gdf = surface_gdf.to_crs(raster_crs)
    surface_gdf["geometry"] = surface_gdf.buffer(2)

    # Get mask_terms and reproject
    mask_gdf = OSMM[OSMM["Surf_Obj"] == "Mask"].copy()
    mask_gdf = mask_gdf.to_crs(raster_crs)

    # Rasterise buffered surface_terms
    surface_mask = features.rasterize(
        [(geom, 1) for geom in surface_gdf.geometry],
        out_shape=out_shape,
        transform=transform,
        fill=0,
        dtype="uint8"
    )

    # Rasterise mask_terms
    mask_mask = features.rasterize(
        [(geom, 1) for geom in mask_gdf.geometry],
        out_shape=out_shape,
        transform=transform,
        fill=0,
        dtype="uint8"
    )

    # Read CHM data
    chm_data = src.read(1).astype("float32")

# Apply both masks
combined_mask = (surface_mask == 1) | (mask_mask == 1)
chm_data[combined_mask] = np.nan

# Create binary mask of valid CHM areas
valid_mask = (np.isfinite(chm_data)) & (chm_data > 0)

# Vectorise the valid regions
shapes = features.shapes(valid_mask.astype('uint8'), transform=transform)
polygons = [shape(geom) for geom, val in shapes if val == 1]

# Convert to GeoDataFrame
gdf = gpd.GeoDataFrame(geometry=polygons, crs=raster_crs)

# Filter out small areas
gdf = gdf[gdf.geometry.area >= 1.5]

# Rasterise the cleaned-up polygons
cleaned_mask = features.rasterize(
    [(geom, 1) for geom in gdf.geometry],
    out_shape=chm_data.shape,
    transform=transform,
    fill=0,
    dtype="uint8"
).astype(bool)

# Remove small areas
chm_data[~cleaned_mask] = np.nan

# Save output
meta.update(dtype="float32", nodata=np.nan)

with rasterio.open(chm_masked, 'w', **meta) as dst:
    dst.write(chm_data, 1)

print("CHM raster masked and single pixels removed")

In [None]:
# Mask out NFI Woodland

#Load NFI data
gdf = gpd.read_file(NFI_woodland)

# Open masked CHM
with rasterio.open(chm_masked) as src:
    raster_crs = src.crs

# Reproject NFI to match CHM 
    if gdf.crs != raster_crs:
        gdf = gdf.to_crs(raster_crs)

# Extract geometry of NFI 
    geoms = gdf.geometry.values
    geoms = [geom.__geo_interface__ for geom in geoms]
    
# Mask the CHM
    out_image, out_transform = mask(
        src, geoms, invert=True, crop=False
    )

    out_meta = src.meta.copy()

# Update metadata
out_meta.update({
    "driver": "GTiff",
    "height": out_image.shape[1],
    "width": out_image.shape[2],
    "transform": out_transform
})

# Save output
with rasterio.open(chm_masked_NFI, "w", **out_meta) as dest:
    dest.write(out_image)

print("CHM raster masked with NFI Woodland)   
