In [None]:
import geopandas as gpd
from shapely.ops import unary_union
from shapely.geometry import Polygon
import numpy as np

# Load the annotations file
gdf = gpd.read_file("./crowns_file.gpkg")

# Function to calculate similarity between two polygons
def are_similar(poly1, poly2, threshold=0.9):
    intersection = poly1.intersection(poly2).area
    union = poly1.union(poly2).area
    return (intersection / union) >= threshold

# Find and merge overlapping polygons based on similarity
def merge_overlapping_polygons(gdf, similarity_threshold=0.9):
    merged_polygons = []
    processed = np.zeros(len(gdf), dtype=bool)

    for i, poly1 in enumerate(gdf.geometry):
        if not processed[i]:
            # Find all polygons that overlap with poly1 and are not processed yet
            overlaps = [poly1]
            for j, poly2 in enumerate(gdf.geometry):
                if i != j and not processed[j]:
                    if poly1.intersects(poly2) and are_similar(poly1, poly2, similarity_threshold):
                        overlaps.append(poly2)
                        processed[j] = True
            # Merge all overlapping polygons
            merged_polygon = unary_union(overlaps)
            merged_polygons.append(merged_polygon)
            processed[i] = True

    # Create a new GeoDataFrame with merged polygons
    return gpd.GeoDataFrame(geometry=merged_polygons, crs=gdf.crs)

# Apply the merging function
merged_gdf = merge_overlapping_polygons(gdf)

# Save the merged polygons back to a file
merged_gdf.to_file("./crowns_file_clean.gpkg", driver='GPKG')