In [3]:
import geopandas as gpd

def merge_overlapping_by_espece_exploded(input_geojson, output_geojson):
    # Load GeoJSON
    gdf = gpd.read_file(input_geojson)

    # Dissolve polygons by espece (merges overlaps)
    merged = gdf.dissolve(by="especes", aggfunc="first").reset_index()

    # Explode multipolygons into separate polygons
    exploded = merged.explode(index_parts=True).reset_index(drop=True)

    # Save result
    exploded.to_file(output_geojson, driver="GeoJSON")
    print(f"Cleaned GeoJSON saved to {output_geojson}")


# Example usage
merge_overlapping_by_espece_exploded(
    "/home/adelb/Documents/Bpartners/Stanislas/big_VGGs/small_state_expanded_4326.geojson",
    "/home/adelb/Documents/Bpartners/Stanislas/big_VGGs/small_state_expanded_merged_4326.geojson"
)


Cleaned GeoJSON saved to /home/adelb/Documents/Bpartners/Stanislas/big_VGGs/small_state_expanded_merged_4326.geojson


In [3]:
import geopandas as gpd
from shapely.ops import unary_union
from shapely.strtree import STRtree
import numpy as np
from shapely.geometry import Polygon, MultiPolygon

def to_outer_polygon(union_geom):
    """
    Convert a MultiPolygon or Polygon into a single Polygon 
    keeping only the largest outer ring (no holes).
    """
    if isinstance(union_geom, MultiPolygon):
        # pick the polygon with the largest area
        largest = max(union_geom.geoms, key=lambda g: g.area)
        return Polygon(largest.exterior)
    elif isinstance(union_geom, Polygon):
        return Polygon(union_geom.exterior)
    else:
        raise TypeError(f"Unsupported geometry type: {union_geom.geom_type}")


def is_overlap(geom1, geom2, iou_thresh=0.5):
    inter_area = geom1.intersection(geom2).area
    union_area = geom1.union(geom2).area
    
    iou = inter_area / union_area if union_area > 0 else 0
    
    return iou >= iou_thresh
    

def resolve_overlaps(gdf, area_threshold=0.8, iou_thresh = 0.5):
    used = set()
    results = []

    geoms = list(gdf.geometry)
    tree = STRtree(geoms)

    for i, row in gdf.iterrows():
        if i in used:
            continue

        geom = row.geometry

        # STRtree returns indices
        overlaps_idx = tree.query(geom)
        overlaps = [j for j in overlaps_idx if is_overlap(geoms[j], geom, iou_thresh)]
        

        cluster = set(overlaps)
        if not cluster:
            results.append({
                "geometry": geom,
                "label": row["label"],
                "scores": row["scores"]
            })
            continue

        cluster_geoms = [geoms[j] for j in cluster]
        union_geom = unary_union(cluster_geoms)

        if len(cluster) == 1:
            results.append({
                "geometry": geom,
                "label": row["label"],
                "scores": row["scores"]
            })
            used.update(cluster)
            continue

        # Compute ratios (area of polygon / union area)
        candidates = []
        union_area = union_geom.area
        for j in cluster:
            poly = geoms[j]
            ratio = poly.area / union_area
            candidates.append({
                "idx": j,
                "label": gdf.loc[j, "label"],
                "scores": gdf.loc[j, "scores"],
                "ratio": ratio
            })

        # Sort by ratio then score
        candidates.sort(key=lambda c: (c["ratio"], c["scores"]), reverse=True)

        # Apply rules
        if candidates[0]["ratio"] >= area_threshold:
            chosen = candidates[0]
        else:
            chosen = max(candidates, key=lambda c: c["scores"])

        results.append({
            "geometry": to_outer_polygon(union_geom),
            "label": chosen["label"],
            "scores": chosen["scores"]
        })

        used.update(cluster)

    return gpd.GeoDataFrame(results, crs=gdf.crs)



def merge_geojson(input_geojson, output_geojson):
    gdf = gpd.read_file(input_geojson)

    # Ensure needed columns
    assert "label" in gdf.columns and "scores" in gdf.columns, "GeoJSON must have 'label' and 'scores'"

    cleaned = resolve_overlaps(gdf, area_threshold=.8)

    cleaned.to_file(output_geojson, driver="GeoJSON")
    print(f"Saved merged GeoJSON -> {output_geojson}")


# Example usage
merge_geojson("/home/adelb/Downloads/Compressed/Fusion_data_Place_Stanislas_256X256_Z25/test.geojson",
    "/home/adelb/Downloads/Compressed/Fusion_data_Place_Stanislas_256X256_Z25/test_merged.geojson")

Saved merged GeoJSON -> /home/adelb/Downloads/Compressed/Fusion_data_Place_Stanislas_256X256_Z25/test_merged.geojson


In [4]:
import geopandas as gpd
from shapely.ops import unary_union
from shapely.geometry import Polygon, MultiPolygon

def dissolve_overlaps(gdf):
    """
    Merge all overlapping polygons into a non-overlapping set.
    Keeps only the outer rings (removes holes).
    """
    # Merge everything into one geometry
    unioned = unary_union(gdf.geometry)

    # Split back into individual polygons
    if unioned.geom_type == "Polygon":
        geoms = [Polygon(unioned.exterior)]
    elif unioned.geom_type == "MultiPolygon":
        geoms = [Polygon(poly.exterior) for poly in unioned.geoms]
    else:
        raise ValueError(f"Unexpected geometry type: {unioned.geom_type}")

    return gpd.GeoDataFrame(geometry=geoms, crs=gdf.crs)


def clean_geojson(input_geojson, output_geojson):
    # Load file
    gdf = gpd.read_file(input_geojson)

    # Dissolve overlaps
    cleaned = dissolve_overlaps(gdf)

    # Save back to GeoJSON
    cleaned.to_file(output_geojson, driver="GeoJSON")
    print(f"Saved cleaned GeoJSON -> {output_geojson}")


# Example usage
if __name__ == "__main__":
    clean_geojson(
        "/home/adelb/Downloads/Compressed/Fusion_data_Place_Stanislas_256X256_Z25/test.geojson",
        "/home/adelb/Downloads/Compressed/Fusion_data_Place_Stanislas_256X256_Z25/test_nonoverlap.geojson"
    )


Saved cleaned GeoJSON -> /home/adelb/Downloads/Compressed/Fusion_data_Place_Stanislas_256X256_Z25/test_nonoverlap.geojson


In [1]:
import geopandas as gpd
import numpy as np
from shapely.strtree import STRtree

def polygon_iou(poly1, poly2):
    """Compute IoU between two polygons."""
    inter = poly1.intersection(poly2).area
    union = poly1.union(poly2).area
    return inter / union if union > 0 else 0


def nms_polygons(gdf, iou_thresh=0.5):
    """
    Perform Non-Maximum Suppression (NMS) on polygons in a GeoDataFrame.
    Keeps the highest scoring polygon in each overlapping cluster.
    """
    geoms = list(gdf.geometry)
    scores = gdf["scores"].values
    labels = gdf["label"].values

    # Sort polygons by score (high → low)
    order = scores.argsort()[::-1]

    keep = []
    suppressed = set()

    tree = STRtree(geoms)

    for i in order:
        if i in suppressed:
            continue

        # Keep the highest score polygon
        keep.append(i)

        # Query spatial index for potential overlaps
        candidates = tree.query(geoms[i])

        for j in candidates:
            if j == i or j in suppressed:
                continue
            if labels[i] != labels[j]:  
                # Optionally: only suppress if same class
                continue
            iou = polygon_iou(geoms[i], geoms[j])
            if iou >= iou_thresh:
                suppressed.add(j)

    return gdf.iloc[keep].reset_index(drop=True)

def nms_polygons_area(gdf, iou_thresh=0.5):
    """
    Perform Non-Maximum Suppression (NMS) on polygons in a GeoDataFrame.
    Keeps the highest scoring polygon in each overlapping cluster.
    """
    geoms = list(gdf.geometry)
    scores = gdf["scores"].values
    labels = gdf["label"].values
    areas = np.array([geom.area for geom in geoms])
    
    # Sort polygons by score (high → low)
    order = areas.argsort()[::-1]

    keep = []
    suppressed = set()

    tree = STRtree(geoms)

    for i in order:
        if i in suppressed:
            continue

        # Keep the highest score polygon
        keep.append(i)

        # Query spatial index for potential overlaps
        candidates = tree.query(geoms[i])

        for j in candidates:
            if j == i or j in suppressed:
                continue
            if labels[i] != labels[j]:  
                # Optionally: only suppress if same class
                continue
            iou = polygon_iou(geoms[i], geoms[j])
            if iou >= iou_thresh:
                suppressed.add(j)

    return gdf.iloc[keep].reset_index(drop=True)

def nms_geojson(input_geojson, output_geojson, iou_thresh=0.5):
    gdf = gpd.read_file(input_geojson)

    # Ensure needed columns
    assert "label" in gdf.columns and "scores" in gdf.columns, \
        "GeoJSON must have 'label' and 'scores'"

    # Apply polygon NMS
    cleaned = nms_polygons(gdf, iou_thresh=iou_thresh)

    # Save result
    cleaned.to_file(output_geojson, driver="GeoJSON")
    print(f"Saved NMS-cleaned GeoJSON -> {output_geojson}")


# Example usage
if __name__ == "__main__":
    nms_geojson(
        "/home/adelb/Downloads/Compressed/Fusion_data_Place_Stanislas_256X256_Z25/new_test_obb_valid_NMS_IOU_0.2.geojson",
        "/home/adelb/Downloads/Compressed/Fusion_data_Place_Stanislas_256X256_Z25/new_test_obb_valid_NMS_IOU_0.2_nms.geojson",
        iou_thresh=0.1
    )


Saved NMS-cleaned GeoJSON -> /home/adelb/Downloads/Compressed/Fusion_data_Place_Stanislas_256X256_Z25/new_test_obb_valid_NMS_IOU_0.2_nms.geojson


In [2]:
import geopandas as gpd

from shapely.strtree import STRtree

def polygon_filter(gdf, inter_thresh=0.8):
    """
    Remove small polygons that are mostly covered by larger polygons using STRtree.
    Compatible with Shapely >= 2.0 (query returns indices).
    """
    gdf = gdf.copy()
    gdf["area"] = gdf.geometry.area

    # Sort polygons by area (largest → smallest)
    gdf_sorted = gdf.sort_values("area", ascending=False).reset_index(drop=True)

    geoms = list(gdf_sorted.geometry)
    tree = STRtree(geoms)

    to_drop = set()

    for i, big in gdf_sorted.iterrows():
        if i in to_drop:
            continue

        # Query candidates intersecting with the big polygon (returns indices in Shapely 2.x)
        cand_idx = tree.query(big.geometry)

        for j in cand_idx:
            if j <= i or j in to_drop:
                continue

            small = gdf_sorted.iloc[j]
            inter = big.geometry.intersection(small.geometry)
            if not inter.is_empty:
                inter_ratio = inter.area / small.area
                if inter_ratio > inter_thresh:
                    to_drop.add(j)

    return gdf_sorted.drop(index=list(to_drop)).reset_index(drop=True)





# Example usage
gdf = gpd.read_file("/home/adelb/Downloads/Compressed/Fusion_data_Place_Stanislas_256X256_Z25/new_test_obb_valid_NMS_IOU_0.2_nms.geojson")
filtered_gdf = polygon_filter(gdf, inter_thresh=0.4)
filtered_gdf.to_file("/home/adelb/Downloads/Compressed/Fusion_data_Place_Stanislas_256X256_Z25/new_test_obb_valid_NMS_IOU_0.2_filtered.geojson", driver="GeoJSON")


In [4]:
import geopandas as gpd
from shapely.ops import unary_union

def polygon_nmm(gdf, iou_thresh=0.5):
    """
    Non-Maximum Merging (NMM) for polygons.
    Merge polygons that overlap with IoU > threshold.
    
    Parameters:
        gdf (GeoDataFrame): Input polygons
        iou_thresh (float): Intersection-over-Union threshold for merging
    
    Returns:
        GeoDataFrame: Polygons after merging
    """
    gdf = gdf.copy().reset_index(drop=True)
    merged_polys = []
    used = set()

    def iou(poly1, poly2):
        inter = poly1.intersection(poly2).area
        union = poly1.union(poly2).area
        return inter / union if union > 0 else 0

    for i, poly_i in gdf.iterrows():
        if i in used:
            continue
        current_group = [poly_i.geometry]
        used.add(i)
        for j, poly_j in gdf.iterrows():
            if j in used or i == j:
                continue
            if iou(poly_i.geometry, poly_j.geometry) > iou_thresh:
                current_group.append(poly_j.geometry)
                used.add(j)
        # merge all geometries in the group
        merged_polys.append(unary_union(current_group))

    return gpd.GeoDataFrame(geometry=merged_polys, crs=gdf.crs)


# Example usage
gdf = gpd.read_file("/home/adelb/Documents/Bpartners/Stanislas/big_VGGs/typo_obb.geojson")
merged_gdf = polygon_nmm(gdf, iou_thresh=0.2)
merged_gdf.to_file("/home/adelb/Documents/Bpartners/Stanislas/big_VGGs/typo_obb_nmm.geojson", driver="GeoJSON")


KeyboardInterrupt: 

In [19]:
merged_gdf.explode().to_file('/home/adelb/Downloads/Compressed/Fusion_data_Place_Stanislas_256X256_Z25/test_exp.geojson')

In [20]:
import geopandas as gpd

def suppress_big_keep_small(gdf, inter_thresh=0.3):
    """
    Delete big polygons if they overlap with smaller ones.
    
    Parameters:
        gdf (GeoDataFrame): Input polygons
        inter_thresh (float): ratio threshold 
                              overlap_area / small_area must be > inter_thresh
                              
    Returns:
        GeoDataFrame: Filtered polygons
    """
    gdf = gdf.copy().reset_index(drop=True)
    keep_indices = set(range(len(gdf)))

    for i in range(len(gdf)):
        if i not in keep_indices:
            continue
        poly_i = gdf.geometry[i]
        area_i = poly_i.area
        for j in range(len(gdf)):
            if i == j or j not in keep_indices:
                continue
            poly_j = gdf.geometry[j]
            area_j = poly_j.area
            inter = poly_i.intersection(poly_j).area

            if inter > 0:
                # find smaller and bigger
                if area_i < area_j:
                    small, big = i, j
                    small_area = area_i
                else:
                    small, big = j, i
                    small_area = area_j

                # check overlap ratio
                if inter / small_area > inter_thresh:
                    # remove the big one
                    if big in keep_indices:
                        keep_indices.remove(big)

    return gdf.loc[list(keep_indices)].reset_index(drop=True)


# Example usage
gdf = gpd.read_file('/home/adelb/Downloads/Compressed/Fusion_data_Place_Stanislas_256X256_Z25/test_exp.geojson')
filtered_gdf = suppress_big_keep_small(gdf, inter_thresh=0.3)
filtered_gdf.to_file("/home/adelb/Downloads/Compressed/Fusion_data_Place_Stanislas_256X256_Z25/small_only.geojson", driver="GeoJSON")
