In [55]:
from sklearn.cluster import DBSCAN, HDBSCAN
import numpy as np
import geopandas as gpd 
import os
import pandas as pd 
from shapely import concave_hull, convex_hull, segmentize, minimum_rotated_rectangle
from shapely.ops import nearest_points
# ignore warnings
import warnings
warnings.filterwarnings('ignore')


In [56]:
dbscan_distance = 35
density_threshold = 10
concave_ratio = 0.7

In [57]:
def polygon_distance(polygon1, polygon2):
    # Calculate the minimum distance between two polygons
    point1, point2 = nearest_points(polygon1, polygon2)
    return point1.distance(point2)

def compute_distance_matrix(polygons):
    # Create a distance matrix between all polygons
    num_polygons = len(polygons)
    distance_matrix = np.zeros((num_polygons, num_polygons))
    
    for i in range(num_polygons):
        for j in range(i + 1, num_polygons):
            distance_matrix[i, j] = polygon_distance(polygons[i], polygons[j])
            distance_matrix[j, i] = distance_matrix[i, j]  # Symmetry
    
    return distance_matrix

In [58]:
data_dir = r'D:\Projects\superparcels\data\Rural\Crook_OR'
output_dir = r'D:\Projects\superparcels\data\urban\outputs\dmatrix'
parcels = gpd.read_file(os.path.join(data_dir, 'sp_sample_41013_cluster_canidates.shp'))
utm = parcels.estimate_utm_crs().to_epsg()

In [59]:
parcels = parcels.to_crs(epsg=utm)  

In [60]:
parcels['OWNER'].value_counts()

OWNER
BRADLEYSANTUCCI                        24
CONNIE MHATFIELD                       18
R HORTON INCD R HORTON INC-PORTLAND    18
JOANNWEAVER                            14
STEPHEN TGILLEN                        14
                                       ..
IRA GESSOE                              2
MARY DMAYFIELD                          2
SUSAN KCRAWFORD                         2
DUSTINCOLLINS                           2
IMPROVEMENTIDLEWAY                      2
Name: count, Length: 1226, dtype: int64

In [61]:
unique_owners = parcels['OWNER'].unique()
print('Number of unique owners:', len(unique_owners))   

Number of unique owners: 1226


In [62]:
sample_size = 3

In [63]:
clustered_parcel_data = gpd.GeoDataFrame()
single_parcel_data = gpd.GeoDataFrame()
cluster_counts_dict = {}
for owner in unique_owners:
    if owner != 'L RKOPCINSKI':
        continue
    dbscan_distance = 200
    owner_parcels = parcels[parcels['OWNER'] == owner]
    polygons = owner_parcels['geometry'].to_list()
    distance_matrix = compute_distance_matrix(polygons)
    
    if distance_matrix.shape[0] < 3: # only two parcels
        continue

    if np.all(distance_matrix == 0): # get distance greater than 0 but minimum distance
        dbscan_distance = 3
    
    dbscan = DBSCAN(eps=dbscan_distance, min_samples=sample_size, metric='precomputed')
    clusters = dbscan.fit_predict(distance_matrix)
    owner_parcels['cluster'] = clusters 
    owner_parcels['area'] = owner_parcels['geometry'].area
    counts = owner_parcels['cluster'].value_counts()
    
    outliers = counts[counts.index == -1].index
    # drop outliers
    counts = counts[counts.index != -1]
    single_parcel_filter_ids = set(list(outliers))
        
    single_parcel_filter = owner_parcels[owner_parcels['cluster'].isin(single_parcel_filter_ids)]
    single_parcel_data = pd.concat([single_parcel_data, single_parcel_filter], ignore_index=True)
    
    cluster_filter = owner_parcels[~owner_parcels['cluster'].isin(single_parcel_filter_ids)]
    if len(cluster_filter) > 0:
        cluster_filter['pcount'] = cluster_filter['cluster'].map(counts)
        cluster_filter['buff_dist'] = dbscan_distance
        clustered_parcel_data = pd.concat([clustered_parcel_data, cluster_filter], ignore_index=True)




In [64]:
counts

cluster
0    5
Name: count, dtype: int64

In [65]:
clustered_parcel_data

Unnamed: 0,FIPS,OWNER,duplicate_,duplicat_1,classifica,classifi_1,geometry,cluster,area,pcount,buff_dist
0,41013,L RKOPCINSKI,1,0,Class2: Duplicate Owner,2,"POLYGON ((706875.541 4928174.174, 706923.916 4...",0,199568.3,5,200
1,41013,L RKOPCINSKI,1,0,Class2: Duplicate Owner,2,"POLYGON ((707775.642 4928936.781, 707786.399 4...",0,1726039.0,5,200
2,41013,L RKOPCINSKI,1,0,Class2: Duplicate Owner,2,"POLYGON ((700237.515 4930263.665, 700250.683 4...",0,25128300.0,5,200
3,41013,L RKOPCINSKI,1,0,Class2: Duplicate Owner,2,"POLYGON ((701995.624 4937563.549, 702006.582 4...",0,34803360.0,5,200
4,41013,L RKOPCINSKI,1,0,Class2: Duplicate Owner,2,"POLYGON ((699513.147 4934531.966, 699539.222 4...",0,22269890.0,5,200


Example of Owner containing two parcels with their respective distances to eachother (meters)

In [66]:
# create cluster ID
clustered_parcel_data['cluster_ID'] = clustered_parcel_data['OWNER'] + '_' + clustered_parcel_data['cluster'].astype(str)
single_parcel_data['cluster_ID'] = single_parcel_data['OWNER'] + '_' + single_parcel_data['cluster'].astype(str)

In [67]:
parcel_dissolve = clustered_parcel_data.dissolve(by='cluster_ID').reset_index()


In [68]:
parcel_dissolve['area'] = parcel_dissolve['geometry'].area

In [69]:
parcel_dissolve.groupby('cluster_ID')['area'].mean().sort_values(ascending=False)

cluster_ID
L RKOPCINSKI_0    8.412716e+07
Name: area, dtype: float64

In [70]:
mean_area = parcel_dissolve.groupby('cluster_ID')['area'].mean()
super_parcel_ids = mean_area[mean_area > 300000].index
super_parcels = parcel_dissolve[parcel_dissolve['cluster_ID'].isin(super_parcel_ids)]
super_parcels = super_parcels[['cluster_ID', 'area', 'pcount', 'OWNER', 'geometry']]

In [71]:
super_parcels['geometry'] = super_parcels['geometry'].buffer(dbscan_distance)
super_parcels['geometry'] = super_parcels['geometry'].buffer(-dbscan_distance)


In [72]:
super_parcels['sp_id'] = super_parcels['cluster_ID'] + "_" + super_parcels.groupby('cluster_ID').cumcount().astype(str) 
super_parcels['rank'] = super_parcels['area'].rank(ascending=False)
super_parcels = super_parcels.sort_values(by='rank', ascending=True)
super_parcels = super_parcels.reset_index(drop=True)

In [73]:
super_parcels

Unnamed: 0,cluster_ID,area,pcount,OWNER,geometry,sp_id,rank
0,L RKOPCINSKI_0,84127160.0,5,L RKOPCINSKI,"POLYGON ((698115.618 4937420.146, 698120.275 4...",L RKOPCINSKI_0_0,1.0


In [53]:
super_parcels['sp_count'] = super_parcels['OWNER'].map(super_parcels.groupby('OWNER')['sp_id'].count())

In [54]:
super_parcels

Unnamed: 0,cluster_ID,area,pcount,OWNER,geometry,sp_id,rank,sp_count
0,I S DGARLAND_3,398583.981299,4,I S DGARLAND,"POLYGON ((719937.094 3649048.670, 719939.860 3...",I S DGARLAND_3_0,1.0,3
1,I S DGARLAND_6,370443.844441,4,I S DGARLAND,"POLYGON ((725191.492 3649578.410, 725191.477 3...",I S DGARLAND_6_0,2.0,3
2,I S DGARLAND_5,353591.262642,5,I S DGARLAND,"POLYGON ((726425.895 3647236.562, 726425.960 3...",I S DGARLAND_5_0,3.0,3


In [49]:
super_parcels.to_file(os.path.join(data_dir, 'super_parcels_temp.shp'))