In [266]:
from sklearn.cluster import DBSCAN, HDBSCAN
import numpy as np
import geopandas as gpd 
import os
import pandas as pd 
from shapely import concave_hull, convex_hull, segmentize, minimum_rotated_rectangle
from shapely.ops import nearest_points
# ignore warnings
import warnings
warnings.filterwarnings('ignore')


In [267]:
dbscan_distance = 50
density_threshold = 10
concave_ratio = 0.7

In [268]:
def polygon_distance(polygon1, polygon2):
    # Calculate the minimum distance between two polygons
    point1, point2 = nearest_points(polygon1, polygon2)
    return point1.distance(point2)

def compute_distance_matrix(polygons):
    # Create a distance matrix between all polygons
    num_polygons = len(polygons)
    distance_matrix = np.zeros((num_polygons, num_polygons))
    
    for i in range(num_polygons):
        for j in range(i + 1, num_polygons):
            distance_matrix[i, j] = polygon_distance(polygons[i], polygons[j])
            distance_matrix[j, i] = distance_matrix[i, j]  # Symmetry
    
    return distance_matrix

In [269]:
data_dir = r'D:\Projects\superparcels\data\urban'
parcels = gpd.read_file(os.path.join(data_dir, 'sp_sample_08013_cluster_canidates.shp'))
utm = parcels.estimate_utm_crs().to_epsg()

In [270]:
parcels = parcels.to_crs(epsg=utm)  

In [271]:
unique_owners = parcels['OWNER'].unique()
print('Number of unique owners:', len(unique_owners))   

Number of unique owners: 1516


In [272]:
parcels['OWNER'].value_counts()

OWNER
STEPHEN DTEBO          23
DAVID NLARSON          18
HENRY PVELLANDI        13
CHRISTOPHER JWALKER    13
TUCKER CBROCK           9
                       ..
JAMES ABRAY             2
JACOBBECK               2
JACLYNFLEMING           2
JACKMCCARTHY            2
ZHIWEIZHANG             2
Name: count, Length: 1516, dtype: int64

In [273]:
clustered_parcel_data = gpd.GeoDataFrame()
single_parcel_data = gpd.GeoDataFrame()
for owner in unique_owners:
    print(f'OWNER: {owner}')
    owner_parcels = parcels[parcels['OWNER'] == owner]
    polygons = owner_parcels['geometry'].to_list()
    distance_matrix = compute_distance_matrix(polygons)


    dbscan = DBSCAN(eps=dbscan_distance, min_samples=2, metric='precomputed')
    clusters = dbscan.fit_predict(distance_matrix)
    owner_parcels['cluster'] = clusters 
    counts = owner_parcels['cluster'].value_counts()
    #print(f'Cluster Counts: {counts}')
    single_parcel_clusters = counts[counts == 1].index
    single_parcel_outliers = counts[counts.index == -1].index
    single_parcel_filter_ids = list(single_parcel_clusters) + list(single_parcel_outliers)
        
    single_parcel_filter = owner_parcels[owner_parcels['cluster'].isin(single_parcel_filter_ids)]
    single_parcel_data = pd.concat([single_parcel_data, single_parcel_filter], ignore_index=True)
    
    cluster_filter = owner_parcels[(~owner_parcels['cluster'].isin(single_parcel_clusters))&(owner_parcels['cluster'] != -1)]
    clustered_parcel_data = pd.concat([clustered_parcel_data, cluster_filter], ignore_index=True)
    print('______________________________________________________________________________________')
    

    

OWNER: 2004 JAMES THOMASCHRISTOL
______________________________________________________________________________________
OWNER: AARON RICHARDBROWN
______________________________________________________________________________________
OWNER: AARONGREENE
______________________________________________________________________________________
OWNER: ABISHEKMANMADHAN
______________________________________________________________________________________
OWNER: ADAM BENJAMINPALMER
______________________________________________________________________________________
OWNER: ADAM GHECHT
______________________________________________________________________________________
OWNER: ADELA FANGHEL
______________________________________________________________________________________
OWNER: ADELE GMAHLE
______________________________________________________________________________________
OWNER: ADELYN MUNCE REVJONES
______________________________________________________________________________________

In [274]:
# create cluster ID
clustered_parcel_data['cluster_ID'] = clustered_parcel_data['OWNER'] + '_' + clustered_parcel_data['cluster'].astype(str)
single_parcel_data['cluster_ID'] = single_parcel_data['OWNER'] + '_' + single_parcel_data['cluster'].astype(str)

In [275]:
parcel_dissolve = clustered_parcel_data.dissolve(by='cluster_ID').reset_index()

In [276]:
super_parcels = parcel_dissolve.copy()

In [277]:
# densify the super parcels
super_parcels['geometry'] = super_parcels['geometry'].apply(lambda x: segmentize(x, density_threshold))

In [278]:
super_parcels['geometry'] = super_parcels['geometry'].apply(lambda x: concave_hull(x, ratio=concave_ratio))

In [279]:

for idx, parcel in super_parcels.iterrows():
    parcel_geom = gpd.GeoDataFrame(geometry=[parcel.geometry])
    parcel_id = parcel['cluster_ID']    
    other_sp = super_parcels.loc[super_parcels['cluster_ID'] != parcel_id]
    ssssssdfsdgfsdfgdfghghjgdhjgh.loc[parcels['cluster_ID'] != parcel_id]
    other_union = gpd.GeoDataFrame(geometry=[other_sp.unary_union])
    parcel_clip = (gpd.overlay(parcel_geom, other_union, how='difference')
                    .explode(ignore_index=True)
                    .reset_index(drop=True))
    parcel_clip['cluster_ID'] = parcel_id
    parcel_clip['OWNER'] = parcel['OWNER']
    # drop correspnding row in super_parcels
    super_parcels = super_parcels[super_parcels['cluster_ID'] != parcel_id]
    # add parcel clip to super_parcels
    super_parcels = pd.concat([super_parcels, parcel_clip], ignore_index=True)
    


In [280]:
super_parcels[['cluster_ID', 'OWNER', 'geometry']].to_file(os.path.join(data_dir, f'sp_dbscan{dbscan_distance}-cr{concave_ratio}-dens{density_threshold}.shp'))

In [101]:
super_parcels = gpd.read_file(os.path.join(data_dir, f'super_parcels_cleaned_{dbscan_distance}.shp'))

In [None]:
super_parcels

In [94]:
super_parcels = super_parcels[['cluster_ID', 'OWNER', 'geometry']]

In [None]:
sp_join = gpd.sjoin(super_parcels, parcels)
sp_join

In [104]:
sp_join[['cluster_ID', 'OWNER_left', 'index_right', 'geometry']].to_file(os.path.join(data_dir, f'super_parcels_cleaned_{dbscan_distance}_join.shp'))

In [None]:
cluster_to_parcel_ids = sp_join.groupby('cluster_ID')['index_right'].apply(list)
cluster_to_parcel_ids

Pick up here, remove multiple small sliver polygons, and then rerun the above code. add a condition to only grab matching ownership before dissolve the sp_geom. 

In [None]:
super_parcels.head(2)

In [None]:
final_super_parcels = gpd.GeoDataFrame()
for cluster_id, parcel_ids in cluster_to_parcel_ids.items():
    sp_geom = super_parcels[super_parcels['cluster_ID'] == cluster_id]
    
    
    for parcel_id in set(parcel_ids):
        print(f'{cluster_id} -> {parcel_id}')
        parcel_geom = parcels[parcels.index == parcel_id]
        if parcel_geom['OWNER'].values[0] == sp_join[sp_join['index_right'] == parcel_id]['OWNER_left'].values[0]:
            print('Owner match')
            sp_geom = pd.concat([sp_geom, parcel_geom], ignore_index=True)
            print('___')
        else:
            print('Owner mismatch')
            print('___')
            continue
        
    sp_geom_dissolve = sp_geom.dissolve().reset_index()[['cluster_ID', 'OWNER', 'geometry']]
    # drop cluster_id from super parcel
    super_parcels = super_parcels[super_parcels['cluster_ID'] != cluster_id]
    # add new super parcel
    super_parcels = pd.concat([super_parcels, sp_geom_dissolve], ignore_index=True)
    
    

In [114]:
super_parcels.to_file(os.path.join(data_dir, f'super_parcels_cleaned_{dbscan_distance}_final.shp'))

In [None]:
single_parcel_clusters