Skip to content

Commit

Permalink
#589, #588, #587, #379: Optimized balanced KD-tree for C++ pyclusteri…
Browse files Browse the repository at this point in the history
…ng. DBSCAN, OPTICS, CURE optimization.
  • Loading branch information
annoviko committed Feb 18, 2020
1 parent 2d4a03a commit 3539259
Show file tree
Hide file tree
Showing 4 changed files with 655 additions and 0 deletions.
198 changes: 198 additions & 0 deletions ccore/include/pyclustering/container/kdtree_balanced.hpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,198 @@
/*!
@authors Andrei Novikov (pyclustering@yandex.ru)
@date 2014-2020
@copyright GNU Public License
@cond GNU_PUBLIC_LICENSE
pyclustering is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation, either version 3 of the License, or
(at your option) any later version.
pyclustering is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program. If not, see <http://www.gnu.org/licenses/>.
@endcond
*/


#pragma once

#include "kdnode.hpp"

#include <functional>
#include <memory>
#include <vector>

#include <pyclustering/definitions.hpp>


namespace pyclustering {

namespace container {


/*!
@brief Represents balanced static KD-tree that does not provide services to add and remove nodes after
initialization.
@details In the term KD tree, k denotes the dimensionality of the space being represented. Each data point is
represented as a node in the k-d tree in the form of a record of type node.
There is an example how to create KD-tree:
@code
TODO:
@endcode
There is an illustration of balanced KD-tree above that has been done by python version of pyclustering library.
@image html kd_tree_balanced_lsun.png "Fig. 1. Balanced KD-tree for sample 'Lsun'."
Implementation based on paper @cite book::the_design_and_analysis.
*/
class kdtree_balanced {
protected:
kdnode::ptr m_root = nullptr;

std::size_t m_dimension = 0;

std::size_t m_size = 0;

public:
/*!
@brief Default constructor of balanced KD-tree.
*/
kdtree_balanced() = default;

/*!
@brief Parameterized constructor of balanced KD-tree.
@param[in] p_data: data that should be stored in the tree.
@param[in] p_payloads: payload for each point in `p_data`.
*/
kdtree_balanced(const dataset & p_data, const std::vector<void *> p_payloads = { });

/*!
@brief Default copy constructor of balanced KD-tree.
@param[in] p_other: another tree that is used as a copy for constructed tree.
*/
kdtree_balanced(const kdtree_balanced & p_other) = default;

/*!
@brief Default move constructor of balanced KD-tree.
@param[in,out] p_other: another tree that is used to move to constructed tree.
*/
kdtree_balanced(kdtree_balanced && p_other) = default;

/*!
@brief Default deconstructor of balanced KD-tree.
*/
virtual ~kdtree_balanced() = default;

public:
/*!
@brief Find node in KD-tree using coordinates.
@param[in] p_point: coordinates of searched node.
@return Pointer to found node in tree.
*/
kdnode::ptr find_node(const point & p_point) const;

/*!
@brief Find node in KD-tree using coordinates.
@param[in] p_point: coordinates of searched node.
@param[in] p_payload: payload that is used to identify node.
@return Pointer to found node in tree.
*/
kdnode::ptr find_node(const point & p_point, const void * p_payload) const;

/*!
@brief Return the root of the tree.
@return Returns pointer to the root of the tree.
*/
kdnode::ptr get_root() const;

/*!
@brief Return size of KD-tree.
@return Returns amount of nodes in KD-tree.
*/
std::size_t get_size() const;

protected:
/*!
@brief Creates sub-tree of KD-tree from node `p_parent`.
@param[in] p_begin: iterator to the beginning of the collection that should be used to build KD-tree.
@param[in] p_end: iterator to the end of the collection that should be used to build KD-tree.
@param[in] p_parent: node that is parent for tree that is going to be built.
@param[in] p_depth: depth of the tree that where children of the `parent` should be placed.
@return Returns a node that is a root for the created sub-tree.
*/
kdnode::ptr create_tree(
std::vector<kdnode::ptr>::iterator & p_begin,
std::vector<kdnode::ptr>::iterator & p_end,
const kdnode::ptr & p_parent, const std::size_t p_depth);

public:
/*!
@brief Assignment operator for KD-tree.
@param[in] p_other: another KD-tree that should be copied to the tree.
@return Returns reference to KD-tree to where another tree was copied.
*/
kdtree_balanced & operator=(const kdtree_balanced & p_other);

/*!
@brief Movement operator for KD-tree.
@param[in,out] p_other: another KD-tree that should be moved to the tree.
@return Returns reference to KD-tree to where another was moved.
*/
kdtree_balanced & operator=(kdtree_balanced && p_other);
};


}

}
180 changes: 180 additions & 0 deletions ccore/include/pyclustering/container/kdtree_searcher.hpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,180 @@
/*!
@authors Andrei Novikov (pyclustering@yandex.ru)
@date 2014-2020
@copyright GNU Public License
@cond GNU_PUBLIC_LICENSE
pyclustering is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation, either version 3 of the License, or
(at your option) any later version.
pyclustering is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program. If not, see <http://www.gnu.org/licenses/>.
@endcond
*/

#pragma once

#include <functional>
#include <vector>

#include <pyclustering/container/kdnode.hpp>
#include <pyclustering/definitions.hpp>


namespace pyclustering {

namespace container {


/*!
@brief Searcher in KD Tree provides services related to searching in KD Tree.
*/
class kdtree_searcher {
public:
using rule_store = std::function<void(const kdnode::ptr, const double)>;

private:
using proc_store = std::function<void(const kdnode::ptr)>;

private:
mutable std::vector<double> m_nodes_distance = { };
mutable std::vector<kdnode::ptr> m_nearest_nodes = { };
mutable dataset m_nearest_points = { };

mutable rule_store m_user_rule = nullptr;
mutable proc_store m_proc = nullptr;

double m_distance = -1;
double m_sqrt_distance = -1;
kdnode::ptr m_initial_node = nullptr;
std::vector<double> m_search_point = { };

public:
/**
*
* @brief Default constructor. Search will not be performed until it's initialized.
*
*/
kdtree_searcher() = default;

/**
*
* @brief Constructor of searcher with request for searching.
*
* @param[in] point: point for which nearest nodes should be found.
* @param[in] node: initial node in tree from which searching should started.
* @param[in] radius_search: allowable distance for searching from the point.
*
*/
kdtree_searcher(const std::vector<double> & point, const kdnode::ptr & node, const double radius_search);

/**
*
* @brief Default destructor.
*
*/
~kdtree_searcher() = default;

public:
/**
*
* @brief Search nodes that are located in specified distance from specified point.
*
* @param[out] p_distances: distances from the point to nodes in the location (that are radius-reachable).
* @param[out] p_nearest_nodes: nodes in the location (radius-reachable).
*
* @return Return vector of found nodes in kd tree that satisfy the request. If distances are
* specified then it will be filled by corresponding distances.
*
*/
void find_nearest_nodes(std::vector<double> & p_distances, std::vector<kdnode::ptr> & p_nearest_nodes) const;

/**
*
* @brief Search the nearest node in specified location for specified point in the request.
*
* @return Return pointer to the nearest node in kd tree that satisfy the request.
*
*/
kdnode::ptr find_nearest_node() const;

/**
*
* @brief Search the nearest nodes and store information about found node using user-defined way.
*
* @param[in] p_store_rule: defines how to store KD-node.
*
*/
void find_nearest(const rule_store & p_store_rule) const;

private:
/**
*
* @brief Initialization of new request for searching.
*
* @param[in] point: point for which nearest nodes should be found.
* @param[in] node: initial node in tree from which searching should started.
* @param[in] radius_search: allowable distance for searching from the point.
*
*/
void initialize(const std::vector<double> & point, const kdnode::ptr & node, const double radius_search);

/**
*
* @brief Clear internal temporary structures.
*
*/
void clear() const;

/**
*
* @brief Recursive method for searching nodes that satisfy the request.
*
* @param[in] node: initial node in tree from which searching should performed.
*
*/
void recursive_nearest_nodes(const kdnode::ptr & node) const;

/**
*
* @brief Append to storage reachable node and distance to it.
*
* @param[in] node: node that should be added to best collection if it is reachable.
*
*/
void store_if_reachable(const kdnode::ptr & node) const;

/**
*
* @brief Store only one node in collection if it the best node.
*
* @param[in] node: node that should be added to best collection if it is reachable.
*
*/
void store_best_if_reachable(const kdnode::ptr & node) const;

/**
*
* @brief Store nodes using user-defined rule.
*
* @param[in] node: node that should be added to best collection if it is reachable.
*
*/
void store_user_nodes_if_reachable(const kdnode::ptr & node) const;
};


}

}

0 comments on commit 3539259

Please sign in to comment.