In [40]:
import geopandas as gpd
import numpy as np
from sklearn.cluster import KMeans
from shapely.geometry import MultiPoint, Point

def cluster_buildings(gdf, n_clusters=3):
    # This assumes that each neighborhood will be divided into exactly n_clusters, adjust as needed.
    gdf['centroid'] = gdf.geometry.centroid
    gdf['cluster'] = None

    for neighborhood, buildings in gdf.groupby('Neighborhood'):
        if len(buildings) > n_clusters:
            # Use K-Means to cluster the centroids
            kmeans = KMeans(n_clusters=n_clusters)
            coords = np.array([[geom.x, geom.y] for geom in buildings.centroid])
            clusters = kmeans.fit_predict(coords)
            cluster_labels = [f"{neighborhood}_{c}" for c in clusters]
        else:
            # If there are not enough buildings for the desired number of clusters,
            # assign each building to its own cluster.
            cluster_labels = [f"{neighborhood}_{i}" for i in range(len(buildings))]

        gdf.loc[buildings.index, 'cluster'] = cluster_labels

    return gdf

def compute_cluster_centroids(gdf):
    centroids = []
    for cluster_key, group in gdf.groupby('cluster'):
        centroid = MultiPoint(group['centroid'].tolist()).centroid
        centroids.append({'cluster': cluster_key, 'centroid': centroid})
    
    centroids_gdf = gpd.GeoDataFrame(centroids, geometry='centroid', crs=gdf.crs)
    return centroids_gdf

# Load your data
buildings_gdf = gpd.read_file("buildings_district_nbh.gpkg")

# Cluster buildings within neighborhoods
clustered_buildings = cluster_buildings(buildings_gdf)

# Compute centroids of each cluster
cluster_centroids = compute_cluster_centroids(clustered_buildings)

# Optionally, save the output
# clustered_buildings.set_geometry('centroid', inplace=True)
cluster_centroids.to_file("cluster_centroids.gpkg", driver='GPKG')


