Skip to content

Commit

Permalink
#667: PAM BUILD algorithm implementation (Python version).
Browse files Browse the repository at this point in the history
  • Loading branch information
annoviko committed Feb 8, 2021
1 parent 1cbb151 commit feb5ce7
Show file tree
Hide file tree
Showing 11 changed files with 383 additions and 67 deletions.
3 changes: 3 additions & 0 deletions CHANGES
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,9 @@ CHANGE NOTES FOR 0.11.0 (STARTED Nov 26, 2020), (RELEASED: -)

GENERAL CHANGES:

- Introduced PAM BUILD algorithm to generate initial medoids (Python: `pyclustering.cluster.kmedoids`, C++: `pyclustering::clst::pam_build`).
See: https://github.com/annoviko/pyclustering/issues/667

- Parallel optimization of K-Medoids (PAM) algorithm (Python: `pyclustering.cluster.kmedoids`, C++: `pyclustering::clst::kmedoids`).
See: https://github.com/annoviko/pyclustering/issues/667

Expand Down
20 changes: 18 additions & 2 deletions ccore/include/pyclustering/cluster/kmedoids_data.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -33,6 +33,8 @@ class kmedoids_data : public cluster_data {
private:
medoid_sequence m_medoids = { };

std::size_t m_iterations = 0;

public:
/**
*
Expand Down Expand Up @@ -69,17 +71,31 @@ class kmedoids_data : public cluster_data {
public:
/**
*
* @brief Returns medoids that corresponds to allocated clusters.
* @brief Returns reference medoids that corresponds to allocated clusters.
*
*/
medoid_sequence & medoids() { return m_medoids; }

/**
*
* @brief Returns medoids that corresponds to allocated clusters.
* @brief Returns constant reference to medoids that corresponds to allocated clusters.
*
*/
const medoid_sequence & medoids() const { return m_medoids; }

/*
@brief Returns reference to the amount of iterations that were performed during the clustering process.
*/
std::size_t & iterations() { return m_iterations; }

/*
@brief Returns the amount of iterations that were performed during the clustering process.
*/
std::size_t iterations() const { return m_iterations; }
};


Expand Down
1 change: 1 addition & 0 deletions ccore/include/pyclustering/interface/kmedoids_interface.h
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,7 @@
enum kmedoids_package_indexer {
KMEDOIDS_PACKAGE_INDEX_CLUSTERS = 0,
KMEDOIDS_PACKAGE_INDEX_MEDOIDS,
KMEDOIDS_PACKAGE_INDEX_ITERATIONS,
KMEDOIDS_PACKAGE_SIZE
};

Expand Down
2 changes: 1 addition & 1 deletion ccore/src/cluster/kmedoids.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -82,7 +82,7 @@ void kmedoids::process(const dataset & p_data, const kmedoids_data_t p_type, kme
current_deviation = update_clusters();
}

for (std::size_t iteration = 0; (iteration < m_itermax) && (changes > m_tolerance); iteration++) {
for (p_result.iterations() = 0; (p_result.iterations() < m_itermax) && (changes > m_tolerance); p_result.iterations()++) {
const double swap_cost = swap_medoids();

if (swap_cost != NOTHING_TO_SWAP) {
Expand Down
3 changes: 3 additions & 0 deletions ccore/src/interface/kmedoids_interface.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -45,6 +45,9 @@ pyclustering_package * kmedoids_algorithm(const pyclustering_package * const p_s
((pyclustering_package **) package->data)[KMEDOIDS_PACKAGE_INDEX_CLUSTERS] = create_package(&output_result.clusters());
((pyclustering_package **) package->data)[KMEDOIDS_PACKAGE_INDEX_MEDOIDS] = create_package(&output_result.medoids());

std::vector<std::size_t> iteration_storage(1, output_result.iterations());
((pyclustering_package **)package->data)[KMEDOIDS_PACKAGE_INDEX_ITERATIONS] = create_package(&iteration_storage);

return package;
}

2 changes: 1 addition & 1 deletion pyclustering/cluster/center_initializer.py
Original file line number Diff line number Diff line change
Expand Up @@ -166,7 +166,7 @@ def __init__(self, data, amount_centers, amount_candidates=None, **kwargs):
"""!
@brief Creates K-Means++ center initializer instance.
@param[in] data (array_like): List of points where each point is represented by list of coordinates.
@param[in] data (array_like): Points where each point is represented by list of coordinates.
@param[in] amount_centers (uint): Amount of centers that should be initialized.
@param[in] amount_candidates (uint): Amount of candidates that is considered as a center, if the farthest points
(with the highest probability) should be considered as centers then special constant should be used
Expand Down
3 changes: 3 additions & 0 deletions pyclustering/cluster/examples/kmedoids_examples.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,10 +27,13 @@ def template_clustering(start_medoids, path, tolerance=0.25, show=True, **kwargs
sample = calculate_distance_matrix(sample)

metric = distance_metric(type_metric.EUCLIDEAN_SQUARE, data=sample)

kmedoids_instance = kmedoids(sample, start_medoids, tolerance, metric=metric, ccore=ccore, data_type=data_type)
(ticks, result) = timedcall(kmedoids_instance.process)

clusters = kmedoids_instance.get_clusters()
print("Iterations:", kmedoids_instance.get_iterations())
print([len(cluster) for cluster in clusters])
print(clusters)
medoids = kmedoids_instance.get_medoids()
print("Sample: ", path, "\t\tExecution time: ", ticks, "\n")
Expand Down
Loading

0 comments on commit feb5ce7

Please sign in to comment.