In [1]:
import sys
import os
sys.path.append(os.path.join(os.getcwd(), 'SpectralEqualSizeClustering'))

import pandas as pd
import numpy as np
import logging
from source_code.spectral_equal_size_clustering import SpectralEqualSizeClustering
from source_code.visualisation import visualise_clusters

logging.basicConfig()
logging.getLogger().setLevel(logging.INFO)

# read the file with coordinates. This file is used only for visualization purposes
coords = pd.read_csv("SpectralEqualSizeClustering/datasets/restaurants_in_amsterdam.csv")

# read the file of the symmetric distance matrix associated to the coords data frame
dist_tr = np.load("SpectralEqualSizeClustering/datasets/symmetric_dist_tr.npy")

clustering = SpectralEqualSizeClustering(nclusters=6,
                                         nneighbors=int(dist_tr.shape[0] * 0.1),
                                         equity_fraction=1,
                                         seed=1234)

labels = clustering.fit(dist_tr)

coords["cluster"] = labels
logging.info(f"Points per cluster: \n {coords.cluster.value_counts()}")
clusters_figure = visualise_clusters(coords,
                                     longitude_colname="longitude",
                                     latitude_colname="latitude",
                                     label_col="cluster",
                                     zoom=11)
clusters_figure.show()


INFO:root:parameters of the cluster: nclusters: (6,) equity_fr: 1 nneighbours: 164
INFO:root:ideal elements per cluster: [275, 275, 275, 275, 275, 274]
INFO:root:min-max range of elements: 274-275
INFO:root:Points per cluster: 
 cluster
3    286
5    275
0    274
1    274
2    274
4    266
Name: count, dtype: int64


In [2]:
import sys
import os
sys.path.append(os.path.join(os.getcwd(), 'SpectralEqualSizeClustering'))

import pandas as pd
import numpy as np
import logging
from RecursiveSpectralClustering import RecursiveSpectralClustering
from source_code.visualisation import visualise_clusters

logging.basicConfig()
logging.getLogger().setLevel(logging.INFO)

# read the file with coordinates. This file is used only for visualization purposes
coords = pd.read_csv("SpectralEqualSizeClustering/datasets/restaurants_in_amsterdam.csv")

# read the file of the symmetric distance matrix associated to the coords data frame
dist_tr = np.load("SpectralEqualSizeClustering/datasets/symmetric_dist_tr.npy")

clustering = RecursiveSpectralClustering(n_clusters=6, 
                                         affinity="precomputed_nearest_neighbors",
                                         n_neighbors=int(dist_tr.shape[0] * 0.1),
                                         assign_labels="kmeans")

labels = clustering.fit(dist_tr)

coords["cluster"] = labels
logging.info(f"Points per cluster: \n {coords.cluster.value_counts()}")
clusters_figure = visualise_clusters(coords,
                                     longitude_colname="longitude",
                                     latitude_colname="latitude",
                                     label_col="cluster",
                                     zoom=11)
clusters_figure.show()


INFO:root:Points per cluster: 
 cluster
4    275
5    275
1    275
0    275
3    275
2    274
Name: count, dtype: int64
