Skip to content

Commit

Permalink
#369: Cosmetic changes, KD-tree example, documentation, DBSCAN examples.
Browse files Browse the repository at this point in the history
  • Loading branch information
annoviko committed Oct 25, 2017
1 parent 242f198 commit ccd4bac
Show file tree
Hide file tree
Showing 6 changed files with 138 additions and 7 deletions.
4 changes: 4 additions & 0 deletions CHANGES
Expand Up @@ -4,6 +4,10 @@ CHANGE NOTES FOR 0.7.3 (STARTED Oct 23, 2017)

------------------------------------------------------------------------

GENERAL CHANGES:
- Optimization of DBSCAN algorithm using KD-tree for searching nearest neighbors (pyclustering.cluster.dbscan)
See: https://github.com/annoviko/pyclustering/issues/369

CORRECTED MAJOR BUGS:
- Corrected bug with returned nullptr in method 'kdtree_searcher::find_nearest_node()' (ccore.container.kdtree)
See: no reference.
Expand Down
4 changes: 2 additions & 2 deletions pyclustering/cluster/dbscan.py
Expand Up @@ -31,7 +31,7 @@
from pyclustering.cluster.encoder import type_encoding;

import pyclustering.core.dbscan_wrapper as wrapper;

from pyclustering.utils import euclidean_distance_sqrt;

class dbscan:
"""!
Expand Down Expand Up @@ -201,4 +201,4 @@ def __neighbor_indexes(self, index_point):
kdnodes = self.__kdtree.find_nearest_dist_nodes(self.__pointer_data[index_point], self.__eps);
return [node_tuple[1].payload for node_tuple in kdnodes if node_tuple[1].payload != index_point];

#return [i for i in range(0, len(self.__pointer_data)) if euclidean_distance_sqrt(self.__pointer_data[index_point], self.__pointer_data[i]) <= self.__sqrt_eps and (i != index_point) ]; # Fast mode
# return [i for i in range(0, len(self.__pointer_data)) if euclidean_distance_sqrt(self.__pointer_data[index_point], self.__pointer_data[i]) <= self.__sqrt_eps and (i != index_point) ]; # Fast mode
27 changes: 23 additions & 4 deletions pyclustering/cluster/examples/dbscan_examples.py
Expand Up @@ -23,6 +23,9 @@
"""


import random;

from pyclustering.cluster import cluster_visualizer;
from pyclustering.cluster.dbscan import dbscan;

Expand All @@ -31,6 +34,7 @@

from pyclustering.samples.definitions import SIMPLE_SAMPLES, FCPS_SAMPLES;


def template_clustering(radius, neighb, path, invisible_axes = False, ccore = True, show = True):
sample = read_sample(path);

Expand Down Expand Up @@ -76,14 +80,14 @@ def cluster_elongate():
template_clustering(0.5, 3, SIMPLE_SAMPLES.SAMPLE_ELONGATE);

def cluster_lsun():
template_clustering(0.5, 3, FCPS_SAMPLES.SAMPLE_LSUN);
template_clustering(0.5, 3, FCPS_SAMPLES.SAMPLE_LSUN);

def cluster_target():
template_clustering(0.5, 2, FCPS_SAMPLES.SAMPLE_TARGET);
template_clustering(0.5, 2, FCPS_SAMPLES.SAMPLE_TARGET);

def cluster_two_diamonds():
"It's hard to choose properly parameters, but it's OK"
template_clustering(0.15, 7, FCPS_SAMPLES.SAMPLE_TWO_DIAMONDS);
template_clustering(0.15, 7, FCPS_SAMPLES.SAMPLE_TWO_DIAMONDS);

def cluster_wing_nut():
"It's hard to choose properly parameters, but it's OK"
Expand Down Expand Up @@ -170,6 +174,15 @@ def display_fcps_dependence_clustering_results():
visualizer.show();


def clustering_random_points(amount, ccore):
sample = [ [ random.random(), random.random() ] for _ in range(amount) ];

dbscan_instance = dbscan(sample, 0.1, 20, ccore);
(ticks, _) = timedcall(dbscan_instance.process);

print("Execution time ("+ str(amount) +" 2D-points):", ticks);


cluster_sample1();
cluster_sample2();
cluster_sample3();
Expand All @@ -193,4 +206,10 @@ def display_fcps_dependence_clustering_results():
experiment_execution_time(True); # C++ code + Python env.

display_fcps_clustering_results();
display_fcps_dependence_clustering_results();
display_fcps_dependence_clustering_results();

clustering_random_points(1000, False);
clustering_random_points(2000, False);
clustering_random_points(3000, False);
clustering_random_points(4000, False);
clustering_random_points(5000, False);
24 changes: 24 additions & 0 deletions pyclustering/container/examples/__init__.py
@@ -0,0 +1,24 @@
"""!
@brief Collection of examples devoted to containers.
@authors Andrei Novikov (pyclustering@yandex.ru)
@date 2014-2017
@copyright GNU Public License
@cond GNU_PUBLIC_LICENSE
PyClustering is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation, either version 3 of the License, or
(at your option) any later version.
PyClustering is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program. If not, see <http://www.gnu.org/licenses/>.
@endcond
"""
62 changes: 62 additions & 0 deletions pyclustering/container/examples/kdtree_examples.py
@@ -0,0 +1,62 @@
"""!
@brief Examples devoted to KD-tree.
@authors Andrei Novikov (pyclustering@yandex.ru)
@date 2014-2017
@copyright GNU Public License
@cond GNU_PUBLIC_LICENSE
PyClustering is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation, either version 3 of the License, or
(at your option) any later version.
PyClustering is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program. If not, see <http://www.gnu.org/licenses/>.
@endcond
"""


from pyclustering.container.kdtree import kdtree, kdtree_text_visualizer;

from pyclustering.utils import read_sample;

from pyclustering.samples.definitions import SIMPLE_SAMPLES;


def template_build_visualize(sample_path):
print("KD Tree for sample: '" + sample_path + "'");
sample = read_sample(sample_path);
tree_instance = kdtree(sample);

kdtree_text_visualizer(tree_instance).visualize(True);


def kdtree_sample_simple01():
template_build_visualize(SIMPLE_SAMPLES.SAMPLE_SIMPLE1);

def kdtree_sample_simple02():
template_build_visualize(SIMPLE_SAMPLES.SAMPLE_SIMPLE2);

def kdtree_sample_simple03():
template_build_visualize(SIMPLE_SAMPLES.SAMPLE_SIMPLE3);

def kdtree_sample_simple04():
template_build_visualize(SIMPLE_SAMPLES.SAMPLE_SIMPLE4);

def kdtree_sample_simple05():
template_build_visualize(SIMPLE_SAMPLES.SAMPLE_SIMPLE4);


kdtree_sample_simple01();
kdtree_sample_simple02();
kdtree_sample_simple03();
kdtree_sample_simple04();
kdtree_sample_simple05();
24 changes: 23 additions & 1 deletion pyclustering/container/kdtree.py
Expand Up @@ -154,7 +154,29 @@ def __str__(self):

class kdtree:
"""!
@brief Represents KD Tree.
@brief Represents KD Tree that is a space-partitioning data structure for organizing points in a k-dimensional space.
Examples:
@code
# Import required modules
from pyclustering.samples.definitions import SIMPLE_SAMPLES;
from pyclustering.container.kdtree import kdtree;
from pyclustering.utils import read_sample;
# Read data from text file
sample = read_sample(SIMPLE_SAMPLES.SAMPLE_SIMPLE3);
# Create instance of KD-tree and initialize (fill) it by read data.
tree_instance = kdtree(sample);
# Search for nearest point
search_distance = 0.3;
nearest_node = tree_instance.find_nearest_dist_node([1.12, 4.31], search_distance);
# Search for nearest point in radius 0.3
nearest_nodes = tree_instance.find_nearest_dist_nodes([1.12, 4.31], search_distance);
print("Nearest nodes:", nearest_nodes);
@endcode
"""

Expand Down

0 comments on commit ccd4bac

Please sign in to comment.