From ccd4bac2f098fa66741e04db479c980025c60dce Mon Sep 17 00:00:00 2001 From: annoviko Date: Wed, 25 Oct 2017 11:57:18 +0300 Subject: [PATCH] #369: Cosmetic changes, KD-tree example, documentation, DBSCAN examples. --- CHANGES | 4 ++ pyclustering/cluster/dbscan.py | 4 +- .../cluster/examples/dbscan_examples.py | 27 ++++++-- pyclustering/container/examples/__init__.py | 24 +++++++ .../container/examples/kdtree_examples.py | 62 +++++++++++++++++++ pyclustering/container/kdtree.py | 24 ++++++- 6 files changed, 138 insertions(+), 7 deletions(-) create mode 100755 pyclustering/container/examples/__init__.py create mode 100755 pyclustering/container/examples/kdtree_examples.py diff --git a/CHANGES b/CHANGES index 906ca824..00638dbd 100755 --- a/CHANGES +++ b/CHANGES @@ -4,6 +4,10 @@ CHANGE NOTES FOR 0.7.3 (STARTED Oct 23, 2017) ------------------------------------------------------------------------ +GENERAL CHANGES: +- Optimization of DBSCAN algorithm using KD-tree for searching nearest neighbors (pyclustering.cluster.dbscan) + See: https://github.com/annoviko/pyclustering/issues/369 + CORRECTED MAJOR BUGS: - Corrected bug with returned nullptr in method 'kdtree_searcher::find_nearest_node()' (ccore.container.kdtree) See: no reference. diff --git a/pyclustering/cluster/dbscan.py b/pyclustering/cluster/dbscan.py index b5e2e945..23cd2e6d 100755 --- a/pyclustering/cluster/dbscan.py +++ b/pyclustering/cluster/dbscan.py @@ -31,7 +31,7 @@ from pyclustering.cluster.encoder import type_encoding; import pyclustering.core.dbscan_wrapper as wrapper; - +from pyclustering.utils import euclidean_distance_sqrt; class dbscan: """! @@ -201,4 +201,4 @@ def __neighbor_indexes(self, index_point): kdnodes = self.__kdtree.find_nearest_dist_nodes(self.__pointer_data[index_point], self.__eps); return [node_tuple[1].payload for node_tuple in kdnodes if node_tuple[1].payload != index_point]; - #return [i for i in range(0, len(self.__pointer_data)) if euclidean_distance_sqrt(self.__pointer_data[index_point], self.__pointer_data[i]) <= self.__sqrt_eps and (i != index_point) ]; # Fast mode + # return [i for i in range(0, len(self.__pointer_data)) if euclidean_distance_sqrt(self.__pointer_data[index_point], self.__pointer_data[i]) <= self.__sqrt_eps and (i != index_point) ]; # Fast mode diff --git a/pyclustering/cluster/examples/dbscan_examples.py b/pyclustering/cluster/examples/dbscan_examples.py index 88d53622..0e1e65a8 100644 --- a/pyclustering/cluster/examples/dbscan_examples.py +++ b/pyclustering/cluster/examples/dbscan_examples.py @@ -23,6 +23,9 @@ """ + +import random; + from pyclustering.cluster import cluster_visualizer; from pyclustering.cluster.dbscan import dbscan; @@ -31,6 +34,7 @@ from pyclustering.samples.definitions import SIMPLE_SAMPLES, FCPS_SAMPLES; + def template_clustering(radius, neighb, path, invisible_axes = False, ccore = True, show = True): sample = read_sample(path); @@ -76,14 +80,14 @@ def cluster_elongate(): template_clustering(0.5, 3, SIMPLE_SAMPLES.SAMPLE_ELONGATE); def cluster_lsun(): - template_clustering(0.5, 3, FCPS_SAMPLES.SAMPLE_LSUN); + template_clustering(0.5, 3, FCPS_SAMPLES.SAMPLE_LSUN); def cluster_target(): - template_clustering(0.5, 2, FCPS_SAMPLES.SAMPLE_TARGET); + template_clustering(0.5, 2, FCPS_SAMPLES.SAMPLE_TARGET); def cluster_two_diamonds(): "It's hard to choose properly parameters, but it's OK" - template_clustering(0.15, 7, FCPS_SAMPLES.SAMPLE_TWO_DIAMONDS); + template_clustering(0.15, 7, FCPS_SAMPLES.SAMPLE_TWO_DIAMONDS); def cluster_wing_nut(): "It's hard to choose properly parameters, but it's OK" @@ -170,6 +174,15 @@ def display_fcps_dependence_clustering_results(): visualizer.show(); +def clustering_random_points(amount, ccore): + sample = [ [ random.random(), random.random() ] for _ in range(amount) ]; + + dbscan_instance = dbscan(sample, 0.1, 20, ccore); + (ticks, _) = timedcall(dbscan_instance.process); + + print("Execution time ("+ str(amount) +" 2D-points):", ticks); + + cluster_sample1(); cluster_sample2(); cluster_sample3(); @@ -193,4 +206,10 @@ def display_fcps_dependence_clustering_results(): experiment_execution_time(True); # C++ code + Python env. display_fcps_clustering_results(); -display_fcps_dependence_clustering_results(); \ No newline at end of file +display_fcps_dependence_clustering_results(); + +clustering_random_points(1000, False); +clustering_random_points(2000, False); +clustering_random_points(3000, False); +clustering_random_points(4000, False); +clustering_random_points(5000, False); \ No newline at end of file diff --git a/pyclustering/container/examples/__init__.py b/pyclustering/container/examples/__init__.py new file mode 100755 index 00000000..6061f0a5 --- /dev/null +++ b/pyclustering/container/examples/__init__.py @@ -0,0 +1,24 @@ +"""! + +@brief Collection of examples devoted to containers. + +@authors Andrei Novikov (pyclustering@yandex.ru) +@date 2014-2017 +@copyright GNU Public License + +@cond GNU_PUBLIC_LICENSE + PyClustering is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + PyClustering is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see . +@endcond + +""" \ No newline at end of file diff --git a/pyclustering/container/examples/kdtree_examples.py b/pyclustering/container/examples/kdtree_examples.py new file mode 100755 index 00000000..631ddac1 --- /dev/null +++ b/pyclustering/container/examples/kdtree_examples.py @@ -0,0 +1,62 @@ +"""! + +@brief Examples devoted to KD-tree. + +@authors Andrei Novikov (pyclustering@yandex.ru) +@date 2014-2017 +@copyright GNU Public License + +@cond GNU_PUBLIC_LICENSE + PyClustering is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + PyClustering is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see . +@endcond + +""" + + +from pyclustering.container.kdtree import kdtree, kdtree_text_visualizer; + +from pyclustering.utils import read_sample; + +from pyclustering.samples.definitions import SIMPLE_SAMPLES; + + +def template_build_visualize(sample_path): + print("KD Tree for sample: '" + sample_path + "'"); + sample = read_sample(sample_path); + tree_instance = kdtree(sample); + + kdtree_text_visualizer(tree_instance).visualize(True); + + +def kdtree_sample_simple01(): + template_build_visualize(SIMPLE_SAMPLES.SAMPLE_SIMPLE1); + +def kdtree_sample_simple02(): + template_build_visualize(SIMPLE_SAMPLES.SAMPLE_SIMPLE2); + +def kdtree_sample_simple03(): + template_build_visualize(SIMPLE_SAMPLES.SAMPLE_SIMPLE3); + +def kdtree_sample_simple04(): + template_build_visualize(SIMPLE_SAMPLES.SAMPLE_SIMPLE4); + +def kdtree_sample_simple05(): + template_build_visualize(SIMPLE_SAMPLES.SAMPLE_SIMPLE4); + + +kdtree_sample_simple01(); +kdtree_sample_simple02(); +kdtree_sample_simple03(); +kdtree_sample_simple04(); +kdtree_sample_simple05(); \ No newline at end of file diff --git a/pyclustering/container/kdtree.py b/pyclustering/container/kdtree.py index 9d57af74..a264d649 100755 --- a/pyclustering/container/kdtree.py +++ b/pyclustering/container/kdtree.py @@ -154,7 +154,29 @@ def __str__(self): class kdtree: """! - @brief Represents KD Tree. + @brief Represents KD Tree that is a space-partitioning data structure for organizing points in a k-dimensional space. + + Examples: + @code + # Import required modules + from pyclustering.samples.definitions import SIMPLE_SAMPLES; + from pyclustering.container.kdtree import kdtree; + from pyclustering.utils import read_sample; + + # Read data from text file + sample = read_sample(SIMPLE_SAMPLES.SAMPLE_SIMPLE3); + + # Create instance of KD-tree and initialize (fill) it by read data. + tree_instance = kdtree(sample); + + # Search for nearest point + search_distance = 0.3; + nearest_node = tree_instance.find_nearest_dist_node([1.12, 4.31], search_distance); + + # Search for nearest point in radius 0.3 + nearest_nodes = tree_instance.find_nearest_dist_nodes([1.12, 4.31], search_distance); + print("Nearest nodes:", nearest_nodes); + @endcode """