Skip to content

Commit

Permalink
#372: Examples for testing performance are written.
Browse files Browse the repository at this point in the history
  • Loading branch information
annoviko committed Oct 27, 2017
1 parent 15b8af2 commit ae13c54
Show file tree
Hide file tree
Showing 6 changed files with 74 additions and 43 deletions.
4 changes: 2 additions & 2 deletions ccore/src/cluster/xmeans.cpp
Expand Up @@ -202,8 +202,8 @@ double xmeans::update_centers(const cluster_sequence & analysed_clusters, datase
}

/* average for each dimension */
for (point::iterator dimension_iterator = total.begin(); dimension_iterator != total.end(); dimension_iterator++) {
*dimension_iterator = *dimension_iterator / analysed_clusters[index_cluster].size();
for (auto & dimension : total) {
dimension = dimension / analysed_clusters[index_cluster].size();
}

double distance = euclidean_distance_sqrt( &(analysed_centers[index_cluster]), &total );
Expand Down
6 changes: 2 additions & 4 deletions ccore/src/cluster/xmeans.hpp
Expand Up @@ -18,8 +18,8 @@
*
*/

#ifndef _XMEANS_H_
#define _XMEANS_H_

#pragma once


#include <vector>
Expand Down Expand Up @@ -105,5 +105,3 @@ class xmeans : public cluster_algorithm {

}


#endif
2 changes: 1 addition & 1 deletion pyclustering/cluster/dbscan.py
Expand Up @@ -36,7 +36,7 @@
class dbscan:
"""!
@brief Class represents clustering algorithm DBSCAN.
@details This algorithm is KD-tree optimized.
@details This DBSCAN algorithm is KD-tree optimized.
Example:
@code
Expand Down
76 changes: 60 additions & 16 deletions pyclustering/cluster/examples/xmeans_examples.py
Expand Up @@ -23,6 +23,10 @@
"""


import ntpath;
import random;

from pyclustering.samples.definitions import SIMPLE_SAMPLES, FCPS_SAMPLES;

from pyclustering.cluster import cluster_visualizer;
Expand All @@ -44,7 +48,7 @@ def template_clustering(start_centers, path, tolerance = 0.025, criterion = spli
if (criterion == splitting_type.BAYESIAN_INFORMATION_CRITERION): criterion_string = "BAYESIAN INFORMATION CRITERION";
elif (criterion == splitting_type.MINIMUM_NOISELESS_DESCRIPTION_LENGTH): criterion_string = "MINIMUM NOISELESS DESCRIPTION_LENGTH";

print("Sample: ", path, "\nInitial centers: '", (start_centers is not None), "', Execution time: '", ticks, "', Number of clusters:", len(clusters), ",", criterion_string, "\n");
print("Sample: ", ntpath.basename(path), "\nInitial centers: '", (start_centers is not None), "', Execution time: '", ticks, "', Number of clusters:", len(clusters), ",", criterion_string, "\n");

visualizer = cluster_visualizer();
visualizer.set_canvas_title(criterion_string);
Expand Down Expand Up @@ -150,22 +154,62 @@ def cluster_tetra():
start_centers = [[1, 0, 0], [0, 1, 0], [0, -1, 0], [-1, 0, 0]];
template_clustering(start_centers, FCPS_SAMPLES.SAMPLE_TETRA, criterion = splitting_type.BAYESIAN_INFORMATION_CRITERION);
template_clustering(start_centers, FCPS_SAMPLES.SAMPLE_TETRA, criterion = splitting_type.MINIMUM_NOISELESS_DESCRIPTION_LENGTH);


def template_clustering_performance(start_centers, path, tolerance = 0.025, criterion = splitting_type.BAYESIAN_INFORMATION_CRITERION, ccore = False):
sample = read_sample(path);

xmeans_instance = xmeans(sample, start_centers, 20, tolerance, criterion, ccore);
(ticks, result) = timedcall(xmeans_instance.process);

criterion_string = "UNKNOWN";
if (criterion == splitting_type.BAYESIAN_INFORMATION_CRITERION): criterion_string = "BAYESIAN INFORMATION CRITERION";
elif (criterion == splitting_type.MINIMUM_NOISELESS_DESCRIPTION_LENGTH): criterion_string = "MINIMUM NOISELESS DESCRIPTION_LENGTH";

print("Sample: ", ntpath.basename(path), "', Execution time: '", ticks, "',", criterion_string, "\n");


def template_clustering_random_points_performance(cluster_length, amount_clusters, ccore_flag):
sample = [ [ random.random(), random.random() ] for _ in range(cluster_length) ];
for index in range(1, amount_clusters):
default_offset = 5;
sample += [ [ random.random() + default_offset * index, random.random() + default_offset * index ] for _ in range(cluster_length) ];

initial_center = [ [ random.random(), random.random() ], [ random.random(), random.random() ] ];
xmeans_instance = xmeans(sample, initial_center, 20, 0.25, splitting_type.BAYESIAN_INFORMATION_CRITERION, ccore_flag);
(ticks, _) = timedcall(xmeans_instance.process);

print("Random sample: (size:" + str(len(sample)) + ") ', Execution time: '", ticks,);


def experiment_execution_time(ccore_flag = False):
template_clustering([[3.7, 5.5]], SIMPLE_SAMPLES.SAMPLE_SIMPLE1, ccore = ccore_flag);
template_clustering([[3.5, 4.8], [2.6, 2.5]], SIMPLE_SAMPLES.SAMPLE_SIMPLE2, ccore = ccore_flag);
template_clustering([[0.2, 0.1], [4.0, 1.0]], SIMPLE_SAMPLES.SAMPLE_SIMPLE3, ccore = ccore_flag);
template_clustering([[1.5, 0.0], [1.5, 2.0], [1.5, 4.0], [1.5, 6.0], [1.5, 8.0]], SIMPLE_SAMPLES.SAMPLE_SIMPLE4, ccore = ccore_flag);
template_clustering([[0.0, 1.0], [0.0, 0.0]], SIMPLE_SAMPLES.SAMPLE_SIMPLE5, ccore = ccore_flag);
template_clustering([[1.0, 4.5], [3.1, 2.7]], SIMPLE_SAMPLES.SAMPLE_ELONGATE, ccore = ccore_flag);
template_clustering([[1.0, 3.5], [2.0, 0.5], [3.0, 3.0]], FCPS_SAMPLES.SAMPLE_LSUN, ccore = ccore_flag);
template_clustering([[0.2, 0.2], [0.0, -2.0], [3.0, -3.0], [3.0, 3.0], [-3.0, 3.0], [-3.0, -3.0]], FCPS_SAMPLES.SAMPLE_TARGET, ccore = ccore_flag);
template_clustering([[0.8, 0.2]], FCPS_SAMPLES.SAMPLE_TWO_DIAMONDS, ccore = ccore_flag);
template_clustering([[-1.5, 1.5], [1.5, 1.5]], FCPS_SAMPLES.SAMPLE_WING_NUT, ccore = ccore_flag);
template_clustering([[1.1, -1.7, 1.1], [-1.4, 2.5, -1.2]], FCPS_SAMPLES.SAMPLE_CHAINLINK, ccore = ccore_flag);
template_clustering([[0.0, 0.0, 0.0], [3.0, 0.0, 0.0], [-2.0, 0.0, 0.0], [0.0, 3.0, 0.0], [0.0, -3.0, 0.0], [0.0, 0.0, 2.5]], FCPS_SAMPLES.SAMPLE_HEPTA, ccore = ccore_flag)
template_clustering([[1, 0, 0], [0, 1, 0], [0, -1, 0], [-1, 0, 0]], FCPS_SAMPLES.SAMPLE_TETRA, ccore = ccore_flag);
template_clustering([[1, 0, 0], [0, 1, 0], [0, -1, 0], [-1, 0, 0]], FCPS_SAMPLES.SAMPLE_ATOM);
template_clustering_performance([[3.7, 5.5]], SIMPLE_SAMPLES.SAMPLE_SIMPLE1, ccore = ccore_flag);
template_clustering_performance([[3.5, 4.8], [2.6, 2.5]], SIMPLE_SAMPLES.SAMPLE_SIMPLE2, ccore = ccore_flag);
template_clustering_performance([[0.2, 0.1], [4.0, 1.0]], SIMPLE_SAMPLES.SAMPLE_SIMPLE3, ccore = ccore_flag);
template_clustering_performance([[1.5, 0.0], [1.5, 2.0], [1.5, 4.0], [1.5, 6.0], [1.5, 8.0]], SIMPLE_SAMPLES.SAMPLE_SIMPLE4, ccore = ccore_flag);
template_clustering_performance([[0.0, 1.0], [0.0, 0.0]], SIMPLE_SAMPLES.SAMPLE_SIMPLE5, ccore = ccore_flag);
template_clustering_performance([[1.0, 4.5], [3.1, 2.7]], SIMPLE_SAMPLES.SAMPLE_ELONGATE, ccore = ccore_flag);
template_clustering_performance([[1.0, 3.5], [2.0, 0.5], [3.0, 3.0]], FCPS_SAMPLES.SAMPLE_LSUN, ccore = ccore_flag);
template_clustering_performance([[0.2, 0.2], [0.0, -2.0], [3.0, -3.0], [3.0, 3.0], [-3.0, 3.0], [-3.0, -3.0]], FCPS_SAMPLES.SAMPLE_TARGET, ccore = ccore_flag);
template_clustering_performance([[0.8, 0.2]], FCPS_SAMPLES.SAMPLE_TWO_DIAMONDS, ccore = ccore_flag);
template_clustering_performance([[-1.5, 1.5], [1.5, 1.5]], FCPS_SAMPLES.SAMPLE_WING_NUT, ccore = ccore_flag);
template_clustering_performance([[1.1, -1.7, 1.1], [-1.4, 2.5, -1.2]], FCPS_SAMPLES.SAMPLE_CHAINLINK, ccore = ccore_flag);
template_clustering_performance([[0.0, 0.0, 0.0], [3.0, 0.0, 0.0], [-2.0, 0.0, 0.0], [0.0, 3.0, 0.0], [0.0, -3.0, 0.0], [0.0, 0.0, 2.5]], FCPS_SAMPLES.SAMPLE_HEPTA, ccore = ccore_flag)
template_clustering_performance([[1, 0, 0], [0, 1, 0], [0, -1, 0], [-1, 0, 0]], FCPS_SAMPLES.SAMPLE_TETRA, ccore = ccore_flag);
template_clustering_performance([[1, 0, 0], [0, 1, 0], [0, -1, 0], [-1, 0, 0]], FCPS_SAMPLES.SAMPLE_ATOM);

template_clustering_random_points_performance(1000, 6, ccore_flag);
template_clustering_random_points_performance(2000, 6, ccore_flag);
template_clustering_random_points_performance(4000, 6, ccore_flag);
template_clustering_random_points_performance(6000, 6, ccore_flag);
template_clustering_random_points_performance(8000, 6, ccore_flag);
template_clustering_random_points_performance(10000, 6, ccore_flag);
template_clustering_random_points_performance(15000, 6, ccore_flag);
template_clustering_random_points_performance(30000, 6, ccore_flag);
template_clustering_random_points_performance(45000, 6, ccore_flag);
template_clustering_random_points_performance(100000, 6, ccore_flag);
template_clustering_random_points_performance(200000, 6, ccore_flag);
template_clustering_random_points_performance(300000, 6, ccore_flag);


cluster_sample1();
Expand All @@ -188,6 +232,6 @@ def experiment_execution_time(ccore_flag = False):
cluster_sample4_without_initial_centers();
cluster_sample5_without_initial_centers();
cluster_two_diamonds_without_initial_centers();

experiment_execution_time(False); # Python code
experiment_execution_time(True); # C++ code + Python env.
16 changes: 2 additions & 14 deletions pyclustering/cluster/optics.py
Expand Up @@ -32,7 +32,6 @@

from pyclustering.cluster.encoder import type_encoding;

from pyclustering.utils import euclidean_distance;
from pyclustering.utils.color import color as color_list;

import matplotlib.pyplot as plt;
Expand All @@ -43,6 +42,7 @@
class ordering_visualizer:
"""!
@brief Cluster ordering diagram visualizer that represents dataset graphically as density-based clustering structure.
@details This OPTICS algorithm is KD-tree optimized.
@see ordering_analyser
Expand Down Expand Up @@ -615,16 +615,4 @@ def __neighbor_indexes(self, optic_object):
"""

kdnodes = self.__kdtree.find_nearest_dist_nodes(self.__sample_pointer[optic_object.index_object], self.__eps);
return [ [node_tuple[1].payload, math.sqrt(node_tuple[0]) ] for node_tuple in kdnodes if node_tuple[1].payload != optic_object.index_object];

# neighbor_description = [];
#
# for index in range(0, len(self.__sample_pointer), 1):
# if (index == optic_object.index_object):
# continue;
#
# distance = euclidean_distance(self.__sample_pointer[optic_object.index_object], self.__sample_pointer[index]);
# if (distance <= self.__eps):
# neighbor_description.append( [index, distance] );
#
# return neighbor_description;
return [ [node_tuple[1].payload, math.sqrt(node_tuple[0]) ] for node_tuple in kdnodes if node_tuple[1].payload != optic_object.index_object];
13 changes: 7 additions & 6 deletions pyclustering/cluster/xmeans.py
Expand Up @@ -132,7 +132,8 @@ def __init__(self, data, initial_centers = None, kmax = 20, tolerance = 0.025, c
self.__criterion = criterion;

self.__ccore = ccore;



def process(self):
"""!
@brief Performs cluster analysis in line with rules of X-Means algorithm.
Expand Down Expand Up @@ -161,8 +162,8 @@ def process(self):
break;
else:
self.__centers = allocated_centers;


def get_clusters(self):
"""!
@brief Returns list of allocated clusters, each cluster contains indexes of objects in list of data.
Expand All @@ -173,10 +174,10 @@ def get_clusters(self):
@see get_centers()
"""

return self.__clusters;


def get_centers(self):
"""!
@brief Returns list of centers for allocated clusters.
Expand Down

0 comments on commit ae13c54

Please sign in to comment.