-
Notifications
You must be signed in to change notification settings - Fork 251
/
xmeans.hpp
119 lines (84 loc) · 3.63 KB
/
xmeans.hpp
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
/**
*
* Copyright (C) 2014-2017 Andrei Novikov (pyclustering@yandex.ru)
*
* GNU_PUBLIC_LICENSE
* pyclustering is free software: you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* pyclustering is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program. If not, see <http://www.gnu.org/licenses/>.
*
*/
#pragma once
#include <mutex>
#include <vector>
#include "cluster/cluster_algorithm.hpp"
#include "cluster/xmeans_data.hpp"
namespace cluster_analysis {
enum class splitting_type {
BAYESIAN_INFORMATION_CRITERION = 0,
MINIMUM_NOISELESS_DESCRIPTION_LENGTH = 1,
};
class xmeans : public cluster_algorithm {
private:
const static std::size_t DEFAULT_AMOUNT_THREADS;
private:
dataset m_centers;
xmeans_data * m_ptr_result; /* temporary pointer to output result */
const dataset * m_ptr_data; /* used only during processing */
std::size_t m_maximum_clusters;
double m_tolerance;
splitting_type m_criterion;
std::mutex m_mutex;
public:
/**
*
* @brief Constructor of clustering algorithm where algorithm parameters for processing are
* specified.
*
* @param[in] p_centers: initial centers that are used for processing.
* @param[in] p_kmax: maximum number of clusters that can be allocated.
* @param[in] p_tolerance: stop condition in following way: when maximum value of distance change of
* cluster centers is less than tolerance than algorithm will stop processing.
* @param[in] p_criterion: splitting criterion that is used for making descision about cluster splitting.
*
*/
xmeans(const dataset & p_centers, const std::size_t p_kmax, const double p_tolerance, const splitting_type p_criterion);
/**
*
* @brief Default destructor of the algorithm.
*
*/
virtual ~xmeans(void);
public:
/**
*
* @brief Performs cluster analysis of an input data.
*
* @param[in] p_data: input data for cluster analysis.
* @param[out] p_result: clustering result of an input data.
*
*/
virtual void process(const dataset & data, cluster_data & output_result) override;
private:
void update_clusters(cluster_sequence & clusters, const dataset & centers, const index_sequence & available_indexes);
double update_centers(const cluster_sequence & clusters, dataset & centers);
double update_center(const cluster & p_cluster, point & p_center);
double foo(cluster & p);
void improve_structure(void);
void improve_region_structure(void);
void improve_parameters(cluster_sequence & clusters, dataset & centers, const index_sequence & available_indexes);
double splitting_criterion(const cluster_sequence & clusters, const dataset & centers) const;
std::size_t find_proper_cluster(const dataset & analysed_centers, const point & p_point) const;
double bayesian_information_criterion(const cluster_sequence & clusters, const dataset & centers) const;
double minimum_noiseless_description_length(const cluster_sequence & clusters, const dataset & centers) const;
};
}