#369: Cosmetic changes, KD-tree example, documentation, DBSCAN examples.

annoviko · Oct 25, 2017 · ccd4bac · ccd4bac
1 parent 242f198
commit ccd4bac
Show file tree

Hide file tree

Showing 6 changed files with 138 additions and 7 deletions.
diff --git a/CHANGES b/CHANGES
@@ -4,6 +4,10 @@ CHANGE NOTES FOR 0.7.3 (STARTED Oct 23, 2017)
 
 ------------------------------------------------------------------------
 
+GENERAL CHANGES:
+- Optimization of DBSCAN algorithm using KD-tree for searching nearest neighbors (pyclustering.cluster.dbscan)
+  See: https://github.com/annoviko/pyclustering/issues/369
+
 CORRECTED MAJOR BUGS:
 - Corrected bug with returned nullptr in method 'kdtree_searcher::find_nearest_node()' (ccore.container.kdtree)
   See: no reference.

diff --git a/pyclustering/cluster/dbscan.py b/pyclustering/cluster/dbscan.py
@@ -31,7 +31,7 @@
 from pyclustering.cluster.encoder import type_encoding;
 
 import pyclustering.core.dbscan_wrapper as wrapper;
-
+from pyclustering.utils import euclidean_distance_sqrt;
 
 class dbscan:
     """!
@@ -201,4 +201,4 @@ def __neighbor_indexes(self, index_point):
         kdnodes = self.__kdtree.find_nearest_dist_nodes(self.__pointer_data[index_point], self.__eps);
         return [node_tuple[1].payload for node_tuple in kdnodes if node_tuple[1].payload != index_point];
 
-        #return [i for i in range(0, len(self.__pointer_data)) if euclidean_distance_sqrt(self.__pointer_data[index_point], self.__pointer_data[i]) <= self.__sqrt_eps and (i != index_point) ]; # Fast mode
+        # return [i for i in range(0, len(self.__pointer_data)) if euclidean_distance_sqrt(self.__pointer_data[index_point], self.__pointer_data[i]) <= self.__sqrt_eps and (i != index_point) ]; # Fast mode
diff --git a/pyclustering/cluster/examples/dbscan_examples.py b/pyclustering/cluster/examples/dbscan_examples.py
@@ -23,6 +23,9 @@
 
 """
 
+
+import random;
+
 from pyclustering.cluster import cluster_visualizer;
 from pyclustering.cluster.dbscan import dbscan;
 
@@ -31,6 +34,7 @@
 
 from pyclustering.samples.definitions import SIMPLE_SAMPLES, FCPS_SAMPLES;
 
+
 def template_clustering(radius, neighb, path, invisible_axes = False, ccore = True, show = True):
     sample = read_sample(path);
 
@@ -76,14 +80,14 @@ def cluster_elongate():
     template_clustering(0.5, 3, SIMPLE_SAMPLES.SAMPLE_ELONGATE);
 
 def cluster_lsun():
-    template_clustering(0.5, 3, FCPS_SAMPLES.SAMPLE_LSUN);    
+    template_clustering(0.5, 3, FCPS_SAMPLES.SAMPLE_LSUN);
 
 def cluster_target():
-    template_clustering(0.5, 2, FCPS_SAMPLES.SAMPLE_TARGET);    
+    template_clustering(0.5, 2, FCPS_SAMPLES.SAMPLE_TARGET);
 
 def cluster_two_diamonds():
     "It's hard to choose properly parameters, but it's OK"
-    template_clustering(0.15, 7, FCPS_SAMPLES.SAMPLE_TWO_DIAMONDS);   
+    template_clustering(0.15, 7, FCPS_SAMPLES.SAMPLE_TWO_DIAMONDS);
 
 def cluster_wing_nut():
     "It's hard to choose properly parameters, but it's OK"
@@ -170,6 +174,15 @@ def display_fcps_dependence_clustering_results():
     visualizer.show();
 
 
+def clustering_random_points(amount, ccore):
+    sample = [ [ random.random(), random.random() ] for _ in range(amount) ];
+
+    dbscan_instance = dbscan(sample, 0.1, 20, ccore);
+    (ticks, _) = timedcall(dbscan_instance.process);
+
+    print("Execution time ("+ str(amount) +" 2D-points):", ticks);
+
+
 cluster_sample1();
 cluster_sample2();
 cluster_sample3();
@@ -193,4 +206,10 @@ def display_fcps_dependence_clustering_results():
 experiment_execution_time(True);    # C++ code + Python env.
 
 display_fcps_clustering_results();
-display_fcps_dependence_clustering_results();
+display_fcps_dependence_clustering_results();
+
+clustering_random_points(1000, False);
+clustering_random_points(2000, False);
+clustering_random_points(3000, False);
+clustering_random_points(4000, False);
+clustering_random_points(5000, False);
diff --git a/pyclustering/container/examples/__init__.py b/pyclustering/container/examples/__init__.py
@@ -0,0 +1,24 @@
+"""!
+
+@brief Collection of examples devoted to containers.
+
+@authors Andrei Novikov (pyclustering@yandex.ru)
+@date 2014-2017
+@copyright GNU Public License
+
+@cond GNU_PUBLIC_LICENSE
+    PyClustering is free software: you can redistribute it and/or modify
+    it under the terms of the GNU General Public License as published by
+    the Free Software Foundation, either version 3 of the License, or
+    (at your option) any later version.
+    
+    PyClustering is distributed in the hope that it will be useful,
+    but WITHOUT ANY WARRANTY; without even the implied warranty of
+    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+    GNU General Public License for more details.
+    
+    You should have received a copy of the GNU General Public License
+    along with this program.  If not, see <http://www.gnu.org/licenses/>.
+@endcond
+
+"""
diff --git a/pyclustering/container/examples/kdtree_examples.py b/pyclustering/container/examples/kdtree_examples.py
@@ -0,0 +1,62 @@
+"""!
+
+@brief Examples devoted to KD-tree.
+
+@authors Andrei Novikov (pyclustering@yandex.ru)
+@date 2014-2017
+@copyright GNU Public License
+
+@cond GNU_PUBLIC_LICENSE
+    PyClustering is free software: you can redistribute it and/or modify
+    it under the terms of the GNU General Public License as published by
+    the Free Software Foundation, either version 3 of the License, or
+    (at your option) any later version.
+    
+    PyClustering is distributed in the hope that it will be useful,
+    but WITHOUT ANY WARRANTY; without even the implied warranty of
+    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+    GNU General Public License for more details.
+    
+    You should have received a copy of the GNU General Public License
+    along with this program.  If not, see <http://www.gnu.org/licenses/>.
+@endcond
+
+"""
+
+
+from pyclustering.container.kdtree import kdtree, kdtree_text_visualizer;
+
+from pyclustering.utils import read_sample;
+
+from pyclustering.samples.definitions import SIMPLE_SAMPLES;
+
+
+def template_build_visualize(sample_path):
+    print("KD Tree for sample: '" + sample_path + "'");
+    sample = read_sample(sample_path);
+    tree_instance = kdtree(sample);
+
+    kdtree_text_visualizer(tree_instance).visualize(True);
+
+
+def kdtree_sample_simple01():
+    template_build_visualize(SIMPLE_SAMPLES.SAMPLE_SIMPLE1);
+
+def kdtree_sample_simple02():
+    template_build_visualize(SIMPLE_SAMPLES.SAMPLE_SIMPLE2);
+
+def kdtree_sample_simple03():
+    template_build_visualize(SIMPLE_SAMPLES.SAMPLE_SIMPLE3);
+
+def kdtree_sample_simple04():
+    template_build_visualize(SIMPLE_SAMPLES.SAMPLE_SIMPLE4);
+
+def kdtree_sample_simple05():
+    template_build_visualize(SIMPLE_SAMPLES.SAMPLE_SIMPLE4);
+
+
+kdtree_sample_simple01();
+kdtree_sample_simple02();
+kdtree_sample_simple03();
+kdtree_sample_simple04();
+kdtree_sample_simple05();
diff --git a/pyclustering/container/kdtree.py b/pyclustering/container/kdtree.py
@@ -154,7 +154,29 @@ def __str__(self):
 
 class kdtree:
     """!
-    @brief Represents KD Tree.
+    @brief Represents KD Tree that is a space-partitioning data structure for organizing points in a k-dimensional space.
+    
+    Examples:
+    @code
+        # Import required modules
+        from pyclustering.samples.definitions import SIMPLE_SAMPLES;
+        from pyclustering.container.kdtree import kdtree;
+        from pyclustering.utils import read_sample;
+        
+        # Read data from text file
+        sample = read_sample(SIMPLE_SAMPLES.SAMPLE_SIMPLE3);
+        
+        # Create instance of KD-tree and initialize (fill) it by read data.
+        tree_instance = kdtree(sample);
+        
+        # Search for nearest point
+        search_distance = 0.3;
+        nearest_node = tree_instance.find_nearest_dist_node([1.12, 4.31], search_distance);
+        
+        # Search for nearest point in radius 0.3
+        nearest_nodes = tree_instance.find_nearest_dist_nodes([1.12, 4.31], search_distance);
+        print("Nearest nodes:", nearest_nodes);
+    @endcode
     
     """