diff --git a/ccore/src/cluster/xmeans.cpp b/ccore/src/cluster/xmeans.cpp index 79e8796c..cfeec65a 100644 --- a/ccore/src/cluster/xmeans.cpp +++ b/ccore/src/cluster/xmeans.cpp @@ -21,6 +21,7 @@ #include +#include #include #include #include @@ -33,6 +34,9 @@ namespace cluster_analysis { +const std::size_t xmeans::DEFAULT_AMOUNT_THREADS = 10; + + xmeans::xmeans(const dataset & p_centers, const std::size_t p_kmax, const double p_tolerance, const splitting_type p_criterion) : m_centers(p_centers), m_maximum_clusters(p_kmax), @@ -67,6 +71,7 @@ void xmeans::process(const dataset & data, cluster_data & output_result) { } } + void xmeans::improve_parameters(cluster_sequence & improved_clusters, dataset & improved_centers, const index_sequence & available_indexes) { double current_change = std::numeric_limits::max(); @@ -185,40 +190,56 @@ std::size_t xmeans::find_proper_cluster(const dataset & analysed_centers, const return index_optimum; } +double xmeans::foo(cluster & p) { + return 0.0; +} double xmeans::update_centers(const cluster_sequence & analysed_clusters, dataset & analysed_centers) { double maximum_change = 0; /* for each cluster */ - for (unsigned int index_cluster = 0; index_cluster < analysed_clusters.size(); index_cluster++) { - std::vector total(analysed_centers[index_cluster].size(), 0); - - /* for each object in cluster */ - for (cluster::const_iterator object_index_iterator = analysed_clusters[index_cluster].begin(); object_index_iterator < analysed_clusters[index_cluster].end(); object_index_iterator++) { - /* for each dimension */ - for (std::size_t dimension = 0; dimension < total.size(); dimension++) { - total[dimension] += (*m_ptr_data)[*object_index_iterator][dimension]; - } - } + std::vector> pool_update_futures; - /* average for each dimension */ - for (auto & dimension : total) { - dimension = dimension / analysed_clusters[index_cluster].size(); - } - - double distance = euclidean_distance_sqrt( &(analysed_centers[index_cluster]), &total ); + for (std::size_t index_cluster = 0; index_cluster < analysed_clusters.size(); index_cluster++) { + auto update_functor = std::bind(&xmeans::update_center, this, std::cref(analysed_clusters[index_cluster]), std::ref(analysed_centers[index_cluster])); + pool_update_futures.emplace_back(std::async(std::launch::async, update_functor)); + } + for (auto & update_future : pool_update_futures) { + double distance = update_future.get(); if (distance > maximum_change) { maximum_change = distance; } - - std::copy(total.begin(), total.end(), analysed_centers[index_cluster].begin()); } return maximum_change; } +double xmeans::update_center(const cluster & p_cluster, point & p_center) { + std::vector total(p_center.size(), 0); + + /* for each object in cluster */ + for (auto & object_index : p_cluster) { + /* for each dimension */ + for (std::size_t dimension = 0; dimension < total.size(); dimension++) { + total[dimension] += (*m_ptr_data)[object_index][dimension]; + } + } + + /* average for each dimension */ + for (auto & dimension : total) { + dimension = dimension / p_cluster.size(); + } + + double distance = euclidean_distance_sqrt( &p_center, &total ); + + std::copy(total.begin(), total.end(), p_center.begin()); + + return distance; +} + + double xmeans::bayesian_information_criterion(const cluster_sequence & analysed_clusters, const dataset & analysed_centers) const { std::vector scores(analysed_centers.size(), 0.0); diff --git a/ccore/src/cluster/xmeans.hpp b/ccore/src/cluster/xmeans.hpp index 303bb73d..7d9696ea 100644 --- a/ccore/src/cluster/xmeans.hpp +++ b/ccore/src/cluster/xmeans.hpp @@ -22,6 +22,7 @@ #pragma once +#include #include #include "cluster/cluster_algorithm.hpp" @@ -38,6 +39,9 @@ enum class splitting_type { class xmeans : public cluster_algorithm { +private: + const static std::size_t DEFAULT_AMOUNT_THREADS; + private: dataset m_centers; @@ -51,6 +55,8 @@ class xmeans : public cluster_algorithm { splitting_type m_criterion; + std::mutex m_mutex; + public: /** * @@ -89,8 +95,14 @@ class xmeans : public cluster_algorithm { double update_centers(const cluster_sequence & clusters, dataset & centers); + double update_center(const cluster & p_cluster, point & p_center); + + double foo(cluster & p); + void improve_structure(void); + void improve_region_structure(void); + void improve_parameters(cluster_sequence & clusters, dataset & centers, const index_sequence & available_indexes); double splitting_criterion(const cluster_sequence & clusters, const dataset & centers) const; diff --git a/pyclustering/core/x64/win/ccore.dll b/pyclustering/core/x64/win/ccore.dll index 81153110..15f6ed3d 100755 Binary files a/pyclustering/core/x64/win/ccore.dll and b/pyclustering/core/x64/win/ccore.dll differ