In [2]:
from sklearn.cluster import DBSCAN, HDBSCAN
import numpy as np
import geopandas as gpd 
import os
import pandas as pd 
from shapely import concave_hull, convex_hull, segmentize, minimum_rotated_rectangle
from shapely.ops import nearest_points
# ignore warnings
import warnings
warnings.filterwarnings('ignore')


In [3]:
dbscan_distance = 35
density_threshold = 10
concave_ratio = 0.7

In [4]:
def polygon_distance(polygon1, polygon2):
    # Calculate the minimum distance between two polygons
    point1, point2 = nearest_points(polygon1, polygon2)
    return point1.distance(point2)

def compute_distance_matrix(polygons):
    # Create a distance matrix between all polygons
    num_polygons = len(polygons)
    distance_matrix = np.zeros((num_polygons, num_polygons))
    
    for i in range(num_polygons):
        for j in range(i + 1, num_polygons):
            distance_matrix[i, j] = polygon_distance(polygons[i], polygons[j])
            distance_matrix[j, i] = distance_matrix[i, j]  # Symmetry
    
    return distance_matrix

In [6]:
data_dir = r'D:\Projects\superparcels\data\Urban\Alameda_CA'
output_dir = r'D:\Projects\superparcels\data\urban\outputs\dmatrix'
parcels = gpd.read_file(os.path.join(data_dir, 'sp_sample_06001_cluster_canidates.shp'))
utm = parcels.estimate_utm_crs().to_epsg()

In [7]:
parcels = parcels.to_crs(epsg=utm)  

In [8]:
parcels['OWNER'].value_counts()

OWNER
DAVID SBOYD        70
RAJNEESHSALWAN     60
FAYEZ EABBOUD      51
JOHN JSULLIVAN     49
DANILOMAYORGA      46
                   ..
DOUGLAS ABROWN      1
LAKSHMIKOSARAJU     1
YUNGLINGCHEN        1
MILANPETRENCIK      1
LILLIANHOWAN        1
Name: count, Length: 38644, dtype: int64

In [9]:
unique_owners = parcels['OWNER'].unique()
print('Number of unique owners:', len(unique_owners))   

Number of unique owners: 38644


In [24]:
clustered_parcel_data = gpd.GeoDataFrame()
single_parcel_data = gpd.GeoDataFrame()
for owner in unique_owners:
    if owner != 'COLEMANFOLEY':
        continue
    print(f'OWNER: {owner}')

    owner_parcels = parcels[parcels['OWNER'] == owner]
    polygons = owner_parcels['geometry'].to_list()
    distance_matrix = compute_distance_matrix(polygons)
    # get distance greater than 0 but minimum distance
    if np.all(distance_matrix == 0):
        min_valid_distance = 3
        
    else:
        min_valid_distance = np.round(np.min(distance_matrix[distance_matrix > 0]))
        print(f'Minimum valid distance: {min_valid_distance}')


    dbscan = DBSCAN(eps=dbscan_distance, min_samples=2, metric='precomputed')
    clusters = dbscan.fit_predict(distance_matrix)
    owner_parcels['cluster'] = clusters 
    owner_parcels['area'] = owner_parcels['geometry'].area
    counts = owner_parcels['cluster'].value_counts()
    
    #single_parcel_clusters = counts[counts == 1].index
    low_parcel_clusters = counts[counts < 3].index
    outliers = counts[counts.index == -1].index
    single_parcel_filter_ids = set(list(outliers) + list(low_parcel_clusters))
        
    single_parcel_filter = owner_parcels[owner_parcels['cluster'].isin(single_parcel_filter_ids)]
    single_parcel_data = pd.concat([single_parcel_data, single_parcel_filter], ignore_index=True)
    
    cluster_filter = owner_parcels[~owner_parcels['cluster'].isin(single_parcel_filter_ids)]
    clustered_parcel_data = pd.concat([clustered_parcel_data, cluster_filter], ignore_index=True)
    print('______________________________________________________________________________________')
    

    

OWNER: COLEMANFOLEY
Minimum valid distance: 59.0


In [22]:
if np.all(distance_matrix == 0):
    print('All distances are zero')

In [None]:
min_valid_distance = np.min(distance_matrix[distance_matrix > 0])

Example of Owner containing two parcels with their respective distances to eachother (meters)

In [10]:
# create cluster ID
clustered_parcel_data['cluster_ID'] = clustered_parcel_data['OWNER'] + '_' + clustered_parcel_data['cluster'].astype(str)
single_parcel_data['cluster_ID'] = single_parcel_data['OWNER'] + '_' + single_parcel_data['cluster'].astype(str)

In [18]:
parcel_dissolve = clustered_parcel_data.dissolve(by='cluster_ID').reset_index()


In [20]:
parcel_dissolve['area'] = parcel_dissolve['geometry'].area

In [26]:
parcel_dissolve.groupby('cluster_ID')['area'].mean().sort_values(ascending=False)

cluster_ID
THERAPEUTICSEDGEWISE_0    865913.749402
FRANCISCO JCASTRO_0       159065.579957
JUDIMOOR_0                107411.532067
DELTA BBEMENT_0            53037.217819
JASONVON ESCHEN_0          12225.876271
SOTERIOSPALMOS_0            7596.545356
BARRY LOUISPLOOG_0          6876.254014
ANNE GRACETOLBERT_0         5332.505209
JOHN JKELLY_0               4764.154995
DANIEL JMCCARTHY_0          4268.231304
KEVIN DNESS_0               3626.093358
FRED APICKRELL_0            2772.287780
PETERFREYMUTH_0             2710.725644
WAYNE EBENNETT_0            2557.883984
MARK JWOJCIECHOWSKI_0       2354.993858
DANIEL KGUESMAN_0           2348.315373
CRAIG HRUSSELL_0            2282.800028
WARD WRIGHTKING_0           1806.797998
JOHN GROEMER_0              1653.527003
YU TMORTON_0                1219.367472
GARY RCOOK_0                 820.450929
DOUGLAS EJOHNSON_0           693.257598
JOHN PSTURGEON_0             455.272190
DANIELCONNORS_0                8.167988
Name: area, dtype: float64

In [28]:
mean_area = parcel_dissolve.groupby('cluster_ID')['area'].mean()
super_parcel_ids = mean_area[mean_area > 100000].index
super_parcels = parcel_dissolve[parcel_dissolve['cluster_ID'].isin(super_parcel_ids)]
super_parcels = super_parcels[['cluster_ID', 'OWNER', 'geometry']]

In [29]:
super_parcels['geometry'] = super_parcels['geometry'].buffer(dbscan_distance)
super_parcels['geometry'] = super_parcels['geometry'].buffer(-dbscan_distance)


In [30]:
super_parcels.to_file(os.path.join(output_dir, 'super_parcels_rbuff.shp'))