#372: Examples for testing performance are written.

annoviko · Oct 27, 2017 · ae13c54 · ae13c54
1 parent 15b8af2
commit ae13c54
Show file tree

Hide file tree

Showing 6 changed files with 74 additions and 43 deletions.
diff --git a/ccore/src/cluster/xmeans.cpp b/ccore/src/cluster/xmeans.cpp
@@ -202,8 +202,8 @@ double xmeans::update_centers(const cluster_sequence & analysed_clusters, datase
         }
 
         /* average for each dimension */
-        for (point::iterator dimension_iterator = total.begin(); dimension_iterator != total.end(); dimension_iterator++) {
-            *dimension_iterator = *dimension_iterator / analysed_clusters[index_cluster].size();
+        for (auto & dimension : total) {
+            dimension = dimension / analysed_clusters[index_cluster].size();
         }
 
         double distance = euclidean_distance_sqrt( &(analysed_centers[index_cluster]), &total );

diff --git a/ccore/src/cluster/xmeans.hpp b/ccore/src/cluster/xmeans.hpp
@@ -18,8 +18,8 @@
 *
 */
 
-#ifndef _XMEANS_H_
-#define _XMEANS_H_
+
+#pragma once
 
 
 #include <vector>
@@ -105,5 +105,3 @@ class xmeans : public cluster_algorithm {
 
 }
 
-
-#endif
diff --git a/pyclustering/cluster/dbscan.py b/pyclustering/cluster/dbscan.py
@@ -36,7 +36,7 @@
 class dbscan:
     """!
     @brief Class represents clustering algorithm DBSCAN.
-    @details This algorithm is KD-tree optimized.
+    @details This DBSCAN algorithm is KD-tree optimized.
     
     Example:
     @code

diff --git a/pyclustering/cluster/examples/xmeans_examples.py b/pyclustering/cluster/examples/xmeans_examples.py
@@ -23,6 +23,10 @@
 
 """
 
+
+import ntpath;
+import random;
+
 from pyclustering.samples.definitions import SIMPLE_SAMPLES, FCPS_SAMPLES;
 
 from pyclustering.cluster import cluster_visualizer;
@@ -44,7 +48,7 @@ def template_clustering(start_centers, path, tolerance = 0.025, criterion = spli
     if (criterion == splitting_type.BAYESIAN_INFORMATION_CRITERION): criterion_string = "BAYESIAN INFORMATION CRITERION";
     elif (criterion == splitting_type.MINIMUM_NOISELESS_DESCRIPTION_LENGTH): criterion_string = "MINIMUM NOISELESS DESCRIPTION_LENGTH";
 
-    print("Sample: ", path, "\nInitial centers: '", (start_centers is not None), "', Execution time: '", ticks, "', Number of clusters:", len(clusters), ",", criterion_string, "\n");
+    print("Sample: ", ntpath.basename(path), "\nInitial centers: '", (start_centers is not None), "', Execution time: '", ticks, "', Number of clusters:", len(clusters), ",", criterion_string, "\n");
 
     visualizer = cluster_visualizer();
     visualizer.set_canvas_title(criterion_string);
@@ -150,22 +154,62 @@ def cluster_tetra():
     start_centers = [[1, 0, 0], [0, 1, 0], [0, -1, 0], [-1, 0, 0]];
     template_clustering(start_centers, FCPS_SAMPLES.SAMPLE_TETRA, criterion = splitting_type.BAYESIAN_INFORMATION_CRITERION);
     template_clustering(start_centers, FCPS_SAMPLES.SAMPLE_TETRA, criterion = splitting_type.MINIMUM_NOISELESS_DESCRIPTION_LENGTH);
+
+
+def template_clustering_performance(start_centers, path, tolerance = 0.025, criterion = splitting_type.BAYESIAN_INFORMATION_CRITERION, ccore = False):
+    sample = read_sample(path);
+
+    xmeans_instance = xmeans(sample, start_centers, 20, tolerance, criterion, ccore);
+    (ticks, result) = timedcall(xmeans_instance.process);
+
+    criterion_string = "UNKNOWN";
+    if (criterion == splitting_type.BAYESIAN_INFORMATION_CRITERION): criterion_string = "BAYESIAN INFORMATION CRITERION";
+    elif (criterion == splitting_type.MINIMUM_NOISELESS_DESCRIPTION_LENGTH): criterion_string = "MINIMUM NOISELESS DESCRIPTION_LENGTH";
+
+    print("Sample: ", ntpath.basename(path), "', Execution time: '", ticks, "',", criterion_string, "\n");
+
+
+def template_clustering_random_points_performance(cluster_length, amount_clusters, ccore_flag):
+    sample = [ [ random.random(), random.random() ] for _ in range(cluster_length) ];
+    for index in range(1, amount_clusters):
+        default_offset = 5;
+        sample += [ [ random.random() + default_offset * index, random.random() + default_offset * index ] for _ in range(cluster_length) ];
 
+    initial_center = [ [ random.random(), random.random() ], [ random.random(), random.random() ] ];
+    xmeans_instance = xmeans(sample, initial_center, 20, 0.25, splitting_type.BAYESIAN_INFORMATION_CRITERION, ccore_flag);
+    (ticks, _) = timedcall(xmeans_instance.process);
+
+    print("Random sample: (size:" + str(len(sample)) + ") ', Execution time: '", ticks,);
+
+
 def experiment_execution_time(ccore_flag = False):
-    template_clustering([[3.7, 5.5]], SIMPLE_SAMPLES.SAMPLE_SIMPLE1, ccore = ccore_flag);
-    template_clustering([[3.5, 4.8], [2.6, 2.5]], SIMPLE_SAMPLES.SAMPLE_SIMPLE2, ccore = ccore_flag);
-    template_clustering([[0.2, 0.1], [4.0, 1.0]], SIMPLE_SAMPLES.SAMPLE_SIMPLE3, ccore = ccore_flag);
-    template_clustering([[1.5, 0.0], [1.5, 2.0], [1.5, 4.0], [1.5, 6.0], [1.5, 8.0]], SIMPLE_SAMPLES.SAMPLE_SIMPLE4, ccore = ccore_flag);
-    template_clustering([[0.0, 1.0], [0.0, 0.0]], SIMPLE_SAMPLES.SAMPLE_SIMPLE5, ccore = ccore_flag);
-    template_clustering([[1.0, 4.5], [3.1, 2.7]], SIMPLE_SAMPLES.SAMPLE_ELONGATE, ccore = ccore_flag);
-    template_clustering([[1.0, 3.5], [2.0, 0.5], [3.0, 3.0]], FCPS_SAMPLES.SAMPLE_LSUN, ccore = ccore_flag);
-    template_clustering([[0.2, 0.2], [0.0, -2.0], [3.0, -3.0], [3.0, 3.0], [-3.0, 3.0], [-3.0, -3.0]], FCPS_SAMPLES.SAMPLE_TARGET, ccore = ccore_flag);
-    template_clustering([[0.8, 0.2]], FCPS_SAMPLES.SAMPLE_TWO_DIAMONDS, ccore = ccore_flag);
-    template_clustering([[-1.5, 1.5], [1.5, 1.5]], FCPS_SAMPLES.SAMPLE_WING_NUT, ccore = ccore_flag);
-    template_clustering([[1.1, -1.7, 1.1], [-1.4, 2.5, -1.2]], FCPS_SAMPLES.SAMPLE_CHAINLINK, ccore = ccore_flag);
-    template_clustering([[0.0, 0.0, 0.0], [3.0, 0.0, 0.0], [-2.0, 0.0, 0.0], [0.0, 3.0, 0.0], [0.0, -3.0, 0.0], [0.0, 0.0, 2.5]], FCPS_SAMPLES.SAMPLE_HEPTA, ccore = ccore_flag)
-    template_clustering([[1, 0, 0], [0, 1, 0], [0, -1, 0], [-1, 0, 0]], FCPS_SAMPLES.SAMPLE_TETRA, ccore = ccore_flag);
-    template_clustering([[1, 0, 0], [0, 1, 0], [0, -1, 0], [-1, 0, 0]], FCPS_SAMPLES.SAMPLE_ATOM);
+    template_clustering_performance([[3.7, 5.5]], SIMPLE_SAMPLES.SAMPLE_SIMPLE1, ccore = ccore_flag);
+    template_clustering_performance([[3.5, 4.8], [2.6, 2.5]], SIMPLE_SAMPLES.SAMPLE_SIMPLE2, ccore = ccore_flag);
+    template_clustering_performance([[0.2, 0.1], [4.0, 1.0]], SIMPLE_SAMPLES.SAMPLE_SIMPLE3, ccore = ccore_flag);
+    template_clustering_performance([[1.5, 0.0], [1.5, 2.0], [1.5, 4.0], [1.5, 6.0], [1.5, 8.0]], SIMPLE_SAMPLES.SAMPLE_SIMPLE4, ccore = ccore_flag);
+    template_clustering_performance([[0.0, 1.0], [0.0, 0.0]], SIMPLE_SAMPLES.SAMPLE_SIMPLE5, ccore = ccore_flag);
+    template_clustering_performance([[1.0, 4.5], [3.1, 2.7]], SIMPLE_SAMPLES.SAMPLE_ELONGATE, ccore = ccore_flag);
+    template_clustering_performance([[1.0, 3.5], [2.0, 0.5], [3.0, 3.0]], FCPS_SAMPLES.SAMPLE_LSUN, ccore = ccore_flag);
+    template_clustering_performance([[0.2, 0.2], [0.0, -2.0], [3.0, -3.0], [3.0, 3.0], [-3.0, 3.0], [-3.0, -3.0]], FCPS_SAMPLES.SAMPLE_TARGET, ccore = ccore_flag);
+    template_clustering_performance([[0.8, 0.2]], FCPS_SAMPLES.SAMPLE_TWO_DIAMONDS, ccore = ccore_flag);
+    template_clustering_performance([[-1.5, 1.5], [1.5, 1.5]], FCPS_SAMPLES.SAMPLE_WING_NUT, ccore = ccore_flag);
+    template_clustering_performance([[1.1, -1.7, 1.1], [-1.4, 2.5, -1.2]], FCPS_SAMPLES.SAMPLE_CHAINLINK, ccore = ccore_flag);
+    template_clustering_performance([[0.0, 0.0, 0.0], [3.0, 0.0, 0.0], [-2.0, 0.0, 0.0], [0.0, 3.0, 0.0], [0.0, -3.0, 0.0], [0.0, 0.0, 2.5]], FCPS_SAMPLES.SAMPLE_HEPTA, ccore = ccore_flag)
+    template_clustering_performance([[1, 0, 0], [0, 1, 0], [0, -1, 0], [-1, 0, 0]], FCPS_SAMPLES.SAMPLE_TETRA, ccore = ccore_flag);
+    template_clustering_performance([[1, 0, 0], [0, 1, 0], [0, -1, 0], [-1, 0, 0]], FCPS_SAMPLES.SAMPLE_ATOM);
+
+    template_clustering_random_points_performance(1000, 6, ccore_flag);
+    template_clustering_random_points_performance(2000, 6, ccore_flag);
+    template_clustering_random_points_performance(4000, 6, ccore_flag);
+    template_clustering_random_points_performance(6000, 6, ccore_flag);
+    template_clustering_random_points_performance(8000, 6, ccore_flag);
+    template_clustering_random_points_performance(10000, 6, ccore_flag);
+    template_clustering_random_points_performance(15000, 6, ccore_flag);
+    template_clustering_random_points_performance(30000, 6, ccore_flag);
+    template_clustering_random_points_performance(45000, 6, ccore_flag);
+    template_clustering_random_points_performance(100000, 6, ccore_flag);
+    template_clustering_random_points_performance(200000, 6, ccore_flag);
+    template_clustering_random_points_performance(300000, 6, ccore_flag);
 
 
 cluster_sample1();
@@ -188,6 +232,6 @@ def experiment_execution_time(ccore_flag = False):
 cluster_sample4_without_initial_centers();
 cluster_sample5_without_initial_centers();
 cluster_two_diamonds_without_initial_centers();
- 
+
 experiment_execution_time(False);   # Python code
 experiment_execution_time(True);    # C++ code + Python env.
diff --git a/pyclustering/cluster/optics.py b/pyclustering/cluster/optics.py
@@ -32,7 +32,6 @@
 
 from pyclustering.cluster.encoder import type_encoding;
 
-from pyclustering.utils import euclidean_distance;
 from pyclustering.utils.color import color as color_list;
 
 import matplotlib.pyplot as plt;
@@ -43,6 +42,7 @@
 class ordering_visualizer:
     """!
     @brief Cluster ordering diagram visualizer that represents dataset graphically as density-based clustering structure.
+    @details This OPTICS algorithm is KD-tree optimized.
     
     @see ordering_analyser
     
@@ -615,16 +615,4 @@ def __neighbor_indexes(self, optic_object):
         """
 
         kdnodes = self.__kdtree.find_nearest_dist_nodes(self.__sample_pointer[optic_object.index_object], self.__eps);
-        return [ [node_tuple[1].payload, math.sqrt(node_tuple[0]) ] for node_tuple in kdnodes if node_tuple[1].payload != optic_object.index_object];
-
-#         neighbor_description = [];
-#         
-#         for index in range(0, len(self.__sample_pointer), 1):
-#             if (index == optic_object.index_object):
-#                 continue;
-#             
-#             distance = euclidean_distance(self.__sample_pointer[optic_object.index_object], self.__sample_pointer[index]);
-#             if (distance <= self.__eps):
-#                 neighbor_description.append( [index, distance] );
-#             
-#         return neighbor_description;
+        return [ [node_tuple[1].payload, math.sqrt(node_tuple[0]) ] for node_tuple in kdnodes if node_tuple[1].payload != optic_object.index_object];
diff --git a/pyclustering/cluster/xmeans.py b/pyclustering/cluster/xmeans.py
@@ -132,7 +132,8 @@ def __init__(self, data, initial_centers = None, kmax = 20, tolerance = 0.025, c
         self.__criterion = criterion;
 
         self.__ccore = ccore;
-
+
+
     def process(self):
         """!
         @brief Performs cluster analysis in line with rules of X-Means algorithm.
@@ -161,8 +162,8 @@ def process(self):
                     break;
                 else:
                     self.__centers = allocated_centers;
-                    
-     
+
+
     def get_clusters(self):
         """!
         @brief Returns list of allocated clusters, each cluster contains indexes of objects in list of data.
@@ -173,10 +174,10 @@ def get_clusters(self):
         @see get_centers()
         
         """
-         
+
         return self.__clusters;
-     
-     
+
+
     def get_centers(self):
         """!
         @brief Returns list of centers for allocated clusters.