Skip to content

Commit

Permalink
[pyclustering.cluster.xmeans] Correction for BIC (issue #326) and MND…
Browse files Browse the repository at this point in the history
…L (issue #328).
  • Loading branch information
annoviko committed Mar 18, 2017
1 parent f9065bc commit eb32263
Show file tree
Hide file tree
Showing 4 changed files with 101 additions and 104 deletions.
2 changes: 1 addition & 1 deletion pyclustering/cluster/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -218,7 +218,7 @@ def append_clusters(self, clusters, data = None, canvas = 0, marker = '.', marke
self.append_cluster(cluster, data, canvas, marker, markersize);


def set_canvas_title(self, text, canvas):
def set_canvas_title(self, text, canvas = 0):
"""!
@brief Set title for specified canvas.
Expand Down
17 changes: 12 additions & 5 deletions pyclustering/cluster/examples/xmeans_examples.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,9 +25,11 @@

from pyclustering.samples.definitions import SIMPLE_SAMPLES, FCPS_SAMPLES;

from pyclustering.cluster import cluster_visualizer;
from pyclustering.cluster.xmeans import xmeans, splitting_type;

from pyclustering.utils import draw_clusters, read_sample, timedcall;
from pyclustering.utils import read_sample, timedcall;


def template_clustering(start_centers, path, tolerance = 0.025, criterion = splitting_type.BAYESIAN_INFORMATION_CRITERION, ccore = False):
sample = read_sample(path);
Expand All @@ -36,15 +38,20 @@ def template_clustering(start_centers, path, tolerance = 0.025, criterion = spli
(ticks, result) = timedcall(xmeans_instance.process);

clusters = xmeans_instance.get_clusters();
centers = xmeans_instance.get_centers();

criterion_string = "UNKNOWN";
if (criterion == splitting_type.BAYESIAN_INFORMATION_CRITERION): criterion_string = "BAYESIAN_INFORMATION_CRITERION";
elif (criterion == splitting_type.MINIMUM_NOISELESS_DESCRIPTION_LENGTH): criterion_string = "MINIMUM_NOISELESS_DESCRIPTION_LENGTH";
if (criterion == splitting_type.BAYESIAN_INFORMATION_CRITERION): criterion_string = "BAYESIAN INFORMATION CRITERION";
elif (criterion == splitting_type.MINIMUM_NOISELESS_DESCRIPTION_LENGTH): criterion_string = "MINIMUM NOISELESS DESCRIPTION_LENGTH";

print("Sample: ", path, "\nInitial centers: '", (start_centers is not None), "', Execution time: '", ticks, "', Number of clusters:", len(clusters), ",", criterion_string, "\n");

draw_clusters(sample, clusters);

visualizer = cluster_visualizer();
visualizer.set_canvas_title(criterion_string);
visualizer.append_clusters(clusters, sample);
visualizer.append_cluster(centers, None, marker = '*');
visualizer.show();


def cluster_sample1():
"Start with wrong number of clusters."
Expand Down
75 changes: 26 additions & 49 deletions pyclustering/cluster/tests/xmeans_tests.py
Original file line number Diff line number Diff line change
Expand Up @@ -47,77 +47,57 @@ def templateLengthProcessData(self, path_to_file, start_centers, expected_cluste

obtained_cluster_sizes.sort();
expected_cluster_length.sort();

assert obtained_cluster_sizes == expected_cluster_length;

def testBicClusterAllocationSampleSimple1(self):
self.templateLengthProcessData(SIMPLE_SAMPLES.SAMPLE_SIMPLE1, [[3.7, 5.5], [6.7, 7.5]], [5, 5], splitting_type.BAYESIAN_INFORMATION_CRITERION);

def testBicSampleSimple1WithoutInitialCenters(self):
self.templateLengthProcessData(SIMPLE_SAMPLES.SAMPLE_SIMPLE1, None, [5, 5], splitting_type.BAYESIAN_INFORMATION_CRITERION);

def testBicClusterAllocationSampleSimple1ByCore(self):
self.templateLengthProcessData(SIMPLE_SAMPLES.SAMPLE_SIMPLE1, [[3.7, 5.5], [6.7, 7.5]], [5, 5], splitting_type.BAYESIAN_INFORMATION_CRITERION, True);

def testBicSampleSimple1WithoutInitialCentersByCore(self):
self.templateLengthProcessData(SIMPLE_SAMPLES.SAMPLE_SIMPLE1, None, [5, 5], splitting_type.BAYESIAN_INFORMATION_CRITERION, True);

def testBicWrongStartClusterAllocationSampleSimple1(self):
self.templateLengthProcessData(SIMPLE_SAMPLES.SAMPLE_SIMPLE1, [[3.7, 5.5]], [5, 5], splitting_type.BAYESIAN_INFORMATION_CRITERION);

def testBicWrongStartClusterAllocationSampleSimple1ByCore(self):
self.templateLengthProcessData(SIMPLE_SAMPLES.SAMPLE_SIMPLE1, [[3.7, 5.5]], [5, 5], splitting_type.BAYESIAN_INFORMATION_CRITERION, True);

def testMndlClusterAllocationSampleSimple1(self):
self.templateLengthProcessData(SIMPLE_SAMPLES.SAMPLE_SIMPLE1, [[3.7, 5.5], [6.7, 7.5]], [5, 5], splitting_type.MINIMUM_NOISELESS_DESCRIPTION_LENGTH);

def testMndlSampleSimple1WithoutInitialCenters(self):
self.templateLengthProcessData(SIMPLE_SAMPLES.SAMPLE_SIMPLE1, None, [5, 5], splitting_type.MINIMUM_NOISELESS_DESCRIPTION_LENGTH);

def testMndlClusterAllocationSampleSimple1ByCore(self):
self.templateLengthProcessData(SIMPLE_SAMPLES.SAMPLE_SIMPLE1, [[3.7, 5.5], [6.7, 7.5]], [5, 5], splitting_type.MINIMUM_NOISELESS_DESCRIPTION_LENGTH, True);

def testMndlSampleSimple1WithoutInitialCentersByCore(self):
self.templateLengthProcessData(SIMPLE_SAMPLES.SAMPLE_SIMPLE1, None, [5, 5], splitting_type.MINIMUM_NOISELESS_DESCRIPTION_LENGTH, True);

def testMndlWrongStartClusterAllocationSampleSimple1(self):
self.templateLengthProcessData(SIMPLE_SAMPLES.SAMPLE_SIMPLE1, [[3.7, 5.5]], [5, 5], splitting_type.MINIMUM_NOISELESS_DESCRIPTION_LENGTH);

def testMndlWrongStartClusterAllocationSampleSimple1ByCore(self):
self.templateLengthProcessData(SIMPLE_SAMPLES.SAMPLE_SIMPLE1, [[3.7, 5.5]], [5, 5], splitting_type.MINIMUM_NOISELESS_DESCRIPTION_LENGTH, True);

def testBicClusterAllocationSampleSimple2(self):
self.templateLengthProcessData(SIMPLE_SAMPLES.SAMPLE_SIMPLE2, [[3.5, 4.8], [6.9, 7], [7.5, 0.5]], [10, 5, 8], splitting_type.BAYESIAN_INFORMATION_CRITERION);

def testBicClusterAllocationSampleSimple2ByCore(self):
self.templateLengthProcessData(SIMPLE_SAMPLES.SAMPLE_SIMPLE2, [[3.5, 4.8], [6.9, 7], [7.5, 0.5]], [10, 5, 8], splitting_type.BAYESIAN_INFORMATION_CRITERION, True);

def testBicWrongStartClusterAllocationSampleSimple2(self):
self.templateLengthProcessData(SIMPLE_SAMPLES.SAMPLE_SIMPLE2, [[3.5, 4.8], [6.9, 7]], [10, 5, 8], splitting_type.BAYESIAN_INFORMATION_CRITERION);

def testBicWrongStartClusterAllocationSampleSimple2ByCore(self):
self.templateLengthProcessData(SIMPLE_SAMPLES.SAMPLE_SIMPLE2, [[3.5, 4.8], [6.9, 7]], [10, 5, 8], splitting_type.BAYESIAN_INFORMATION_CRITERION, True);

def testMndlClusterAllocationSampleSimple2(self):
self.templateLengthProcessData(SIMPLE_SAMPLES.SAMPLE_SIMPLE2, [[3.5, 4.8], [6.9, 7], [7.5, 0.5]], [10, 5, 8], splitting_type.MINIMUM_NOISELESS_DESCRIPTION_LENGTH);

def testMndlClusterAllocationSampleSimple2ByCore(self):
self.templateLengthProcessData(SIMPLE_SAMPLES.SAMPLE_SIMPLE2, [[3.5, 4.8], [6.9, 7], [7.5, 0.5]], [10, 5, 8], splitting_type.MINIMUM_NOISELESS_DESCRIPTION_LENGTH, True);

def testMndlWrongStartClusterAllocationSampleSimple2(self):
self.templateLengthProcessData(SIMPLE_SAMPLES.SAMPLE_SIMPLE2, [[3.5, 4.8], [6.9, 7]], [10, 5, 8], splitting_type.MINIMUM_NOISELESS_DESCRIPTION_LENGTH);

def testMndlWrongStartClusterAllocationSampleSimple2ByCore(self):
self.templateLengthProcessData(SIMPLE_SAMPLES.SAMPLE_SIMPLE2, [[3.5, 4.8], [6.9, 7]], [10, 5, 8], splitting_type.MINIMUM_NOISELESS_DESCRIPTION_LENGTH, True);


def testBicClusterAllocationSampleSimple3(self):
self.templateLengthProcessData(SIMPLE_SAMPLES.SAMPLE_SIMPLE3, [[0.2, 0.1], [4.0, 1.0], [2.0, 2.0], [2.3, 3.9]], [10, 10, 10, 30], splitting_type.BAYESIAN_INFORMATION_CRITERION);

def testBicClusterAllocationSampleSimple3ByCore(self):
self.templateLengthProcessData(SIMPLE_SAMPLES.SAMPLE_SIMPLE3, [[0.2, 0.1], [4.0, 1.0], [2.0, 2.0], [2.3, 3.9]], [10, 10, 10, 30], splitting_type.BAYESIAN_INFORMATION_CRITERION, True);

def testBicWrongStartClusterAllocationSampleSimple3(self):
self.templateLengthProcessData(SIMPLE_SAMPLES.SAMPLE_SIMPLE3, [[0.2, 0.1], [4.0, 1.0], [5.9, 5.9]], [10, 10, 10, 30], splitting_type.BAYESIAN_INFORMATION_CRITERION);

def testBicWrongStartClusterAllocationSampleSimple3ByCore(self):
self.templateLengthProcessData(SIMPLE_SAMPLES.SAMPLE_SIMPLE3, [[0.2, 0.1], [4.0, 1.0], [5.9, 5.9]], [10, 10, 10, 30], splitting_type.BAYESIAN_INFORMATION_CRITERION, True);

Expand All @@ -135,13 +115,13 @@ def testMndlWrongStartClusterAllocationSampleSimple3ByCore(self):

def testBicClusterAllocationSampleSimple4(self):
self.templateLengthProcessData(SIMPLE_SAMPLES.SAMPLE_SIMPLE4, [[1.5, 0.0], [1.5, 2.0], [1.5, 4.0], [1.5, 6.0], [1.5, 8.0]], [15, 15, 15, 15, 15], splitting_type.BAYESIAN_INFORMATION_CRITERION);

def testBicClusterAllocationSampleSimple4ByCore(self):
self.templateLengthProcessData(SIMPLE_SAMPLES.SAMPLE_SIMPLE4, [[1.5, 0.0], [1.5, 2.0], [1.5, 4.0], [1.5, 6.0], [1.5, 8.0]], [15, 15, 15, 15, 15], splitting_type.BAYESIAN_INFORMATION_CRITERION, True);

def testBicWrongStartClusterAllocationSampleSimple4(self):
self.templateLengthProcessData(SIMPLE_SAMPLES.SAMPLE_SIMPLE4, [[1.5, 0.0], [1.5, 2.0], [1.5, 4.0], [1.5, 6.0]], [15, 15, 15, 15, 15], splitting_type.BAYESIAN_INFORMATION_CRITERION);

def testBicWrongStartClusterAllocationSampleSimple4ByCore(self):
self.templateLengthProcessData(SIMPLE_SAMPLES.SAMPLE_SIMPLE4, [[1.5, 0.0], [1.5, 2.0], [1.5, 4.0], [1.5, 6.0]], [15, 15, 15, 15, 15], splitting_type.BAYESIAN_INFORMATION_CRITERION, True);

Expand All @@ -159,13 +139,13 @@ def testMndlWrongStartClusterAllocationSampleSimple4ByCore(self):

def testBicClusterAllocationSampleSimple5(self):
self.templateLengthProcessData(SIMPLE_SAMPLES.SAMPLE_SIMPLE5, [[0.0, 1.0], [0.0, 0.0], [1.0, 1.0], [1.0, 0.0]], [15, 15, 15, 15], splitting_type.BAYESIAN_INFORMATION_CRITERION);

def testBicClusterAllocationSampleSimple5ByCore(self):
self.templateLengthProcessData(SIMPLE_SAMPLES.SAMPLE_SIMPLE5, [[0.0, 1.0], [0.0, 0.0], [1.0, 1.0], [1.0, 0.0]], [15, 15, 15, 15], splitting_type.BAYESIAN_INFORMATION_CRITERION, True);

def testBicWrongStartClusterAllocationSampleSimple5(self):
self.templateLengthProcessData(SIMPLE_SAMPLES.SAMPLE_SIMPLE5, [[0.0, 1.0], [0.0, 0.0]], [15, 15, 15, 15], splitting_type.BAYESIAN_INFORMATION_CRITERION);

def testBicWrongStartClusterAllocationSampleSimple5ByCore(self):
self.templateLengthProcessData(SIMPLE_SAMPLES.SAMPLE_SIMPLE5, [[0.0, 1.0], [0.0, 0.0]], [15, 15, 15, 15], splitting_type.BAYESIAN_INFORMATION_CRITERION, True);

Expand All @@ -183,13 +163,13 @@ def testMndlWrongStartClusterAllocationSampleSimple5ByCore(self):

def testBicClusterAllocationSampleTwoDiamonds(self):
self.templateLengthProcessData(FCPS_SAMPLES.SAMPLE_TWO_DIAMONDS, [[0.8, 0.2], [3.0, 0.0]], [400, 400], splitting_type.BAYESIAN_INFORMATION_CRITERION);

def testBicClusterAllocationSampleTwoDiamondsByCore(self):
self.templateLengthProcessData(FCPS_SAMPLES.SAMPLE_TWO_DIAMONDS, [[0.8, 0.2], [3.0, 0.0]], [400, 400], splitting_type.BAYESIAN_INFORMATION_CRITERION, True);

def testBicWrongStartClusterAllocationSampleTwoDiamonds(self):
self.templateLengthProcessData(FCPS_SAMPLES.SAMPLE_TWO_DIAMONDS, [[0.8, 0.2]], [400, 400], splitting_type.BAYESIAN_INFORMATION_CRITERION);

def testBicWrongStartClusterAllocationSampleTwoDiamondsByCore(self):
self.templateLengthProcessData(FCPS_SAMPLES.SAMPLE_TWO_DIAMONDS, [[0.8, 0.2]], [400, 400], splitting_type.BAYESIAN_INFORMATION_CRITERION, True);

Expand All @@ -199,27 +179,24 @@ def testMndlClusterAllocationSampleTwoDiamonds(self):
def testMndlClusterAllocationSampleTwoDiamondsByCore(self):
self.templateLengthProcessData(FCPS_SAMPLES.SAMPLE_TWO_DIAMONDS, [[0.8, 0.2], [3.0, 0.0]], [400, 400], splitting_type.MINIMUM_NOISELESS_DESCRIPTION_LENGTH, True);

def testMndlWrongStartClusterAllocationSampleTwoDiamonds(self):
self.templateLengthProcessData(FCPS_SAMPLES.SAMPLE_TWO_DIAMONDS, [[0.8, 0.2]], [400, 400], splitting_type.MINIMUM_NOISELESS_DESCRIPTION_LENGTH);

def testMndlWrongStartClusterAllocationSampleTwoDiamondsByCore(self):
self.templateLengthProcessData(FCPS_SAMPLES.SAMPLE_TWO_DIAMONDS, [[0.8, 0.2]], [400, 400], splitting_type.MINIMUM_NOISELESS_DESCRIPTION_LENGTH, True);


def templateClusterAllocationOneDimensionData(self, ccore_flag):
input_data = [ [0.0] for i in range(10) ] + [ [5.0] for i in range(10) ] + [ [10.0] for i in range(10) ] + [ [15.0] for i in range(10) ];

xmeans_instance = xmeans(input_data, [ [0.5], [5.5], [10.5], [15.5] ], 20, 0.025, splitting_type.BAYESIAN_INFORMATION_CRITERION, ccore_flag);
xmeans_instance.process();
clusters = xmeans_instance.get_clusters();

assert len(clusters) == 4;
for cluster in clusters:
assert len(cluster) == 10;

def testClusterAllocationOneDimensionData(self):
self.templateClusterAllocationOneDimensionData(False);

def testClusterAllocationOneDimensionDataByCore(self):
self.templateClusterAllocationOneDimensionData(True);

Expand Down

0 comments on commit eb32263

Please sign in to comment.