diff --git a/ccore/include/pyclustering/cluster/kmedoids_data.hpp b/ccore/include/pyclustering/cluster/kmedoids_data.hpp index a355943c..07c50326 100755 --- a/ccore/include/pyclustering/cluster/kmedoids_data.hpp +++ b/ccore/include/pyclustering/cluster/kmedoids_data.hpp @@ -35,6 +35,8 @@ class kmedoids_data : public cluster_data { std::size_t m_iterations = 0; + double m_total_deviation = 0.0; + public: /** * @@ -96,6 +98,20 @@ class kmedoids_data : public cluster_data { */ std::size_t iterations() const { return m_iterations; } + + /* + + @brief Returns reference to the final loss (total deviation). + + */ + double & total_deviation() { return m_total_deviation; } + + /* + + @brief Returns the final loss (total deviation). + + */ + double total_deviation() const { return m_total_deviation; } }; diff --git a/ccore/include/pyclustering/interface/kmedoids_interface.h b/ccore/include/pyclustering/interface/kmedoids_interface.h index 31e3555e..d464a37d 100755 --- a/ccore/include/pyclustering/interface/kmedoids_interface.h +++ b/ccore/include/pyclustering/interface/kmedoids_interface.h @@ -24,6 +24,7 @@ enum kmedoids_package_indexer { KMEDOIDS_PACKAGE_INDEX_CLUSTERS = 0, KMEDOIDS_PACKAGE_INDEX_MEDOIDS, KMEDOIDS_PACKAGE_INDEX_ITERATIONS, + KMEDOIDS_PACKAGE_INDEX_TOTAL_DEVIATION, KMEDOIDS_PACKAGE_SIZE }; diff --git a/ccore/src/cluster/kmedoids.cpp b/ccore/src/cluster/kmedoids.cpp index e515644a..20947464 100755 --- a/ccore/src/cluster/kmedoids.cpp +++ b/ccore/src/cluster/kmedoids.cpp @@ -76,19 +76,20 @@ void kmedoids::process(const dataset & p_data, const data_t p_type, kmedoids_dat double changes = std::numeric_limits::max(); double previous_deviation = std::numeric_limits::max(); - double current_deviation = std::numeric_limits::max(); + p_result.total_deviation() = 0; if (m_itermax > 0) { - current_deviation = update_clusters(); + p_result.total_deviation() = update_clusters(); } - for (p_result.iterations() = 0; (p_result.iterations() < m_itermax) && (changes > m_tolerance); p_result.iterations()++) { + for (p_result.iterations() = 0; (p_result.iterations() < m_itermax) && (changes > m_tolerance);) { + p_result.iterations()++; const double swap_cost = swap_medoids(); if (swap_cost != NOTHING_TO_SWAP) { - previous_deviation = current_deviation; - current_deviation = update_clusters(); - changes = previous_deviation - current_deviation; + previous_deviation = p_result.total_deviation(); + p_result.total_deviation() = update_clusters(); + changes = previous_deviation - p_result.total_deviation(); } else { break; @@ -143,7 +144,7 @@ kmedoids::distance_calculator kmedoids::create_distance_calculator(const data_t }; } else { - throw std::invalid_argument("Unknown type data is specified"); + throw std::invalid_argument("Unknown type data is specified (data type code: '" + std::to_string(static_cast(p_type)) + "') ."); } } diff --git a/ccore/src/interface/kmedoids_interface.cpp b/ccore/src/interface/kmedoids_interface.cpp index 5b203a39..60fd5105 100755 --- a/ccore/src/interface/kmedoids_interface.cpp +++ b/ccore/src/interface/kmedoids_interface.cpp @@ -22,6 +22,7 @@ pyclustering_package * kmedoids_algorithm(const pyclustering_package * const p_s const std::size_t p_itermax, const void * const p_metric, const std::size_t p_type) +try { pyclustering::clst::medoid_sequence medoids; p_medoids->extract(medoids); @@ -48,6 +49,11 @@ pyclustering_package * kmedoids_algorithm(const pyclustering_package * const p_s std::vector iteration_storage(1, output_result.iterations()); ((pyclustering_package **)package->data)[KMEDOIDS_PACKAGE_INDEX_ITERATIONS] = create_package(&iteration_storage); + std::vector total_deviation_storage(1, output_result.total_deviation()); + ((pyclustering_package **)package->data)[KMEDOIDS_PACKAGE_INDEX_TOTAL_DEVIATION] = create_package(&total_deviation_storage); + return package; } - +catch (std::exception & p_exception) { + return create_package(p_exception.what()); +} diff --git a/ccore/src/pyclustering-shared.vcxproj b/ccore/src/pyclustering-shared.vcxproj index 7512c56b..97c4356e 100755 --- a/ccore/src/pyclustering-shared.vcxproj +++ b/ccore/src/pyclustering-shared.vcxproj @@ -105,6 +105,7 @@ EXPORT_PYCLUSTERING_INTERFACE;%(PreprocessorDefinitions) false false + ProgramDatabase pyclustering-static.lib @@ -123,6 +124,7 @@ EXPORT_PYCLUSTERING_INTERFACE;%(PreprocessorDefinitions) false false + ProgramDatabase pyclustering-static.lib diff --git a/ccore/src/pyclustering-static.vcxproj b/ccore/src/pyclustering-static.vcxproj index aa196ee6..c160c3cd 100755 --- a/ccore/src/pyclustering-static.vcxproj +++ b/ccore/src/pyclustering-static.vcxproj @@ -99,6 +99,7 @@ false false true + ProgramDatabase true @@ -115,6 +116,7 @@ false false true + ProgramDatabase true diff --git a/ccore/tst/ut-shared.vcxproj b/ccore/tst/ut-shared.vcxproj index 10080e0a..ebcc72c7 100755 --- a/ccore/tst/ut-shared.vcxproj +++ b/ccore/tst/ut-shared.vcxproj @@ -89,6 +89,7 @@ true $(SolutionDir)include;$(SolutionDir)external\include true + false $(OutDir) @@ -107,6 +108,7 @@ copy "$(OutDir)pyclustering.dll" "..\tst\pyclustering.dll" /y; true $(SolutionDir)include;$(SolutionDir)external\include true + false $(OutDir) diff --git a/ccore/tst/utest-interface-kmedoids.cpp b/ccore/tst/utest-interface-kmedoids.cpp index 3a407b97..406b7847 100755 --- a/ccore/tst/utest-interface-kmedoids.cpp +++ b/ccore/tst/utest-interface-kmedoids.cpp @@ -32,7 +32,10 @@ TEST(utest_interface_kmedoids, kmedoids_api) { distance_metric metric = distance_metric_factory::euclidean_square(); pyclustering_package * kmedoids_result = kmedoids_algorithm(sample.get(), medoids.get(), 0.001, 100, &metric, 0); + ASSERT_NE(nullptr, kmedoids_result); + ASSERT_GT(((std::size_t *)((pyclustering_package **)kmedoids_result->data)[KMEDOIDS_PACKAGE_INDEX_ITERATIONS])[0], 0); + ASSERT_GT(((double *)((pyclustering_package **)kmedoids_result->data)[KMEDOIDS_PACKAGE_INDEX_TOTAL_DEVIATION])[0], 0.0); delete kmedoids_result; } diff --git a/ccore/tst/utest-kmedoids.cpp b/ccore/tst/utest-kmedoids.cpp index 86d3d305..238fa49e 100755 --- a/ccore/tst/utest-kmedoids.cpp +++ b/ccore/tst/utest-kmedoids.cpp @@ -36,11 +36,29 @@ template_kmedoids_length_process_data(const dataset_ptr p_data, const medoid_sequence & medoids = output_result.medoids(); if (p_itermax == 0) { + ASSERT_EQ(0.0, output_result.total_deviation()); + ASSERT_EQ(0, output_result.iterations()); ASSERT_TRUE(actual_clusters.empty()); ASSERT_EQ(p_start_medoids, medoids); return; } + double expected_total_deviation = 0.0; + for (std::size_t index_cluster = 0; index_cluster < actual_clusters.size(); index_cluster++) { + const auto index_point_medoid = medoids[index_cluster]; + + for (const std::size_t index_point : actual_clusters[index_cluster]) { + if (index_point_medoid == index_point) { + continue; + } + + expected_total_deviation += p_metric(p_data->at(index_point_medoid), p_data->at(index_point)); + } + } + + ASSERT_GT(output_result.iterations(), 0); + ASSERT_NEAR(expected_total_deviation, output_result.total_deviation(), 0.000001); + ASSERT_LE(medoids.size(), p_start_medoids.size()); ASSERT_EQ(medoids.size(), actual_clusters.size()); ASSERT_CLUSTER_SIZES(data, actual_clusters, p_expected_cluster_length); @@ -55,7 +73,7 @@ template_kmedoids_length_process_distance_matrix(const dataset_ptr p_data, const distance_metric & p_metric = distance_metric_factory::euclidean_square()) { dataset matrix; - distance_matrix(*p_data, matrix); + distance_matrix(*p_data, p_metric, matrix); kmedoids_data output_result; kmedoids solver(p_start_medoids, kmedoids::DEFAULT_TOLERANCE, p_itermax, p_metric); @@ -66,11 +84,29 @@ template_kmedoids_length_process_distance_matrix(const dataset_ptr p_data, const medoid_sequence & medoids = output_result.medoids(); if (p_itermax == 0) { + ASSERT_EQ(0.0, output_result.total_deviation()); + ASSERT_EQ(0, output_result.iterations()); ASSERT_TRUE(actual_clusters.empty()); ASSERT_EQ(p_start_medoids, medoids); return; } + double expected_total_deviation = 0.0; + for (std::size_t index_cluster = 0; index_cluster < actual_clusters.size(); index_cluster++) { + const auto index_point_medoid = medoids[index_cluster]; + + for (const std::size_t index_point : actual_clusters[index_cluster]) { + if (index_point_medoid == index_point) { + continue; + } + + expected_total_deviation += p_metric(p_data->at(index_point_medoid), p_data->at(index_point)); + } + } + + ASSERT_GT(output_result.iterations(), 0); + ASSERT_NEAR(expected_total_deviation, output_result.total_deviation(), 0.000001); + ASSERT_EQ(p_start_medoids.size(), actual_clusters.size()); ASSERT_EQ(p_start_medoids.size(), medoids.size()); ASSERT_CLUSTER_SIZES(data, actual_clusters, p_expected_cluster_length); diff --git a/docs/doxygen_conf_pyclustering b/docs/doxygen_conf_pyclustering index a2807bd7..4cf3ca2f 100755 --- a/docs/doxygen_conf_pyclustering +++ b/docs/doxygen_conf_pyclustering @@ -27,7 +27,6 @@ INHERIT_DOCS = YES SEPARATE_MEMBER_PAGES = NO TAB_SIZE = 4 ALIASES = -TCL_SUBST = OPTIMIZE_OUTPUT_FOR_C = NO OPTIMIZE_OUTPUT_JAVA = NO OPTIMIZE_FOR_FORTRAN = NO @@ -186,7 +185,6 @@ VERBATIM_HEADERS = YES # Configuration options related to the alphabetical class index #--------------------------------------------------------------------------- ALPHABETICAL_INDEX = YES -COLS_IN_ALPHA_INDEX = 5 IGNORE_PREFIX = #--------------------------------------------------------------------------- # Configuration options related to the HTML output @@ -254,7 +252,7 @@ LATEX_OUTPUT = LATEX_CMD_NAME = latex MAKEINDEX_CMD_NAME = makeindex COMPACT_LATEX = NO -PAPER_TYPE = a4wide +PAPER_TYPE = a4 EXTRA_PACKAGES = LATEX_HEADER = LATEX_FOOTER = @@ -323,12 +321,10 @@ GENERATE_TAGFILE = ALLEXTERNALS = NO EXTERNAL_GROUPS = YES EXTERNAL_PAGES = YES -PERL_PATH = /usr/bin/perl #--------------------------------------------------------------------------- # Configuration options related to the dot tool #--------------------------------------------------------------------------- CLASS_DIAGRAMS = NO -MSCGEN_PATH = DIA_PATH = HIDE_UNDOC_RELATIONS = YES HAVE_DOT = YES diff --git a/pyclustering/cluster/center_initializer.py b/pyclustering/cluster/center_initializer.py index a85551f5..1e6885b1 100755 --- a/pyclustering/cluster/center_initializer.py +++ b/pyclustering/cluster/center_initializer.py @@ -355,7 +355,13 @@ def initialize(self, **kwargs): """ - return_index = kwargs.get('return_index', False) + return_index = kwargs.get('return_index', None) + if return_index is None: + return_index = (self.__data_type == 'distance_matrix') + + if (return_index is False) and (self.__data_type == 'distance_matrix'): + raise ValueError("In case of 'distance_matrix' data type, parameter 'return_index' cannot be 'False'. " + "Please, use 'return_index=True' in case of 'distance_matrix' data type.") index_point = self.__get_initial_center(True) centers = [index_point] diff --git a/pyclustering/cluster/kmedoids.py b/pyclustering/cluster/kmedoids.py index 7fb9df55..17cd5d56 100755 --- a/pyclustering/cluster/kmedoids.py +++ b/pyclustering/cluster/kmedoids.py @@ -336,6 +336,7 @@ def __init__(self, data, initial_index_medoids, tolerance=0.0001, ccore=True, ** self.__distance_second_medoid = [float('inf')] * len(data) self.__tolerance = tolerance self.__iterations = 0 + self.__total_deviation = float('inf') self.__metric = kwargs.get('metric', distance_metric(type_metric.EUCLIDEAN_SQUARE)) self.__data_type = kwargs.get('data_type', 'points') @@ -366,28 +367,29 @@ def process(self): if self.__ccore is True: ccore_metric = metric_wrapper.create_instance(self.__metric) - self.__clusters, self.__medoid_indexes = kmedoids_wrapper.kmedoids(self.__pointer_data, self.__medoid_indexes, self.__tolerance, self.__itermax, ccore_metric.get_pointer(), self.__data_type) + self.__clusters, self.__medoid_indexes, self.__iterations, self.__total_deviation = kmedoids_wrapper.kmedoids(self.__pointer_data, self.__medoid_indexes, self.__tolerance, self.__itermax, ccore_metric.get_pointer(), self.__data_type) else: changes = float('inf') - previous_deviation, current_deviation = float('inf'), float('inf') + previous_deviation, self.__total_deviation = float('inf'), 0 self.__iterations = 0 if self.__itermax > 0: - current_deviation = self.__update_clusters() + self.__total_deviation = self.__update_clusters() while (changes > self.__tolerance) and (self.__iterations < self.__itermax): + self.__iterations += 1 swap_cost = self.__swap_medoids() if swap_cost != float('inf'): - previous_deviation = current_deviation - current_deviation = self.__update_clusters() - changes = previous_deviation - current_deviation + previous_deviation = self.__total_deviation + self.__total_deviation = self.__update_clusters() + changes = previous_deviation - self.__total_deviation else: break - self.__iterations += 1 + self.__erase_empty_clusters() @@ -491,6 +493,17 @@ def get_iterations(self): return self.__iterations + def get_total_deviation(self): + """! + @brief Returns total deviation - the final loss after optimization. + @return (float) The total deviation - the final loss after optimization. + + @see process() + + """ + return self.__total_deviation + + def get_cluster_encoding(self): """! @brief Returns clustering result representation type that indicate how clusters are encoded. diff --git a/pyclustering/cluster/tests/kmedoids_templates.py b/pyclustering/cluster/tests/kmedoids_templates.py index 60febcc5..795be521 100755 --- a/pyclustering/cluster/tests/kmedoids_templates.py +++ b/pyclustering/cluster/tests/kmedoids_templates.py @@ -44,7 +44,7 @@ def templateLengthProcessWithMetric(path_to_file, initial_medoids, expected_clus input_data = sample if data_type == 'distance_matrix': - input_data = calculate_distance_matrix(sample) + input_data = calculate_distance_matrix(sample, metric) if input_type == 'numpy': input_data = numpy.array(input_data) @@ -65,6 +65,8 @@ def templateLengthProcessWithMetric(path_to_file, initial_medoids, expected_clus medoids = kmedoids_instance.get_medoids() if itermax == 0: + assertion.eq(0, kmedoids_instance.get_iterations()) + assertion.eq(0.0, kmedoids_instance.get_total_deviation()) assertion.eq([], clusters) assertion.eq(medoids, initial_medoids) return @@ -89,6 +91,19 @@ def templateLengthProcessWithMetric(path_to_file, initial_medoids, expected_clus if obtained_cluster_sizes != expected_cluster_length: continue + assertion.gt(kmedoids_instance.get_iterations(), 0) + + expected_total_deviation = 0.0 + for index_cluster in range(len(clusters)): + index_point_medoid = medoids[index_cluster] + for index_point in clusters[index_cluster]: + if index_point == index_point_medoid: + continue + + expected_total_deviation += metric(sample[index_point_medoid], sample[index_point]) + + assertion.eq_float(expected_total_deviation, kmedoids_instance.get_total_deviation(), 0.000001) + testing_result = True assertion.true(testing_result) diff --git a/pyclustering/core/kmedoids_wrapper.py b/pyclustering/core/kmedoids_wrapper.py index de713864..a8d3a39d 100755 --- a/pyclustering/core/kmedoids_wrapper.py +++ b/pyclustering/core/kmedoids_wrapper.py @@ -29,4 +29,4 @@ def kmedoids(sample, medoids, tolerance, itermax, metric_pointer, data_type): result = package_extractor(package).extract() ccore.free_pyclustering_package(package) - return result[0], result[1] + return result[0], result[1], result[2][0], result[3][0]