Merge branch 'release/1.1'

gieseke · Dec 1, 2015 · ca329f1 · ca329f1
2 parents 6069918 + 83957b6
commit ca329f1
Show file tree

Hide file tree

Showing 51 changed files with 1,106 additions and 712 deletions.
diff --git a/.gitignore b/.gitignore
@@ -70,3 +70,4 @@ target/
 # Eclipse
 .project
 .pydevproject
+.settings
diff --git a/MANIFEST.in b/MANIFEST.in
@@ -4,11 +4,12 @@ include LICENSE
 include requirements.txt
 
 recursive-include examples *.py
-recursive-include bufferkdtree *.c *.h *.pyx *.pxd *.pxi *.i *.cl
+recursive-include bufferkdtree *.c *.h *.i *.cl
 
 include docs/conf.py
 include docs/Makefile
 recursive-include docs *.rst
-include docs/_static/*
+include docs/_static/bibtex/*
+include docs/_static/images/*
 
 exclude MANIFEST.in
diff --git a/README.rst b/README.rst
@@ -2,9 +2,9 @@
 bufferkdtree
 ============
 
-The bufferkdtree library is a Python library that aims at accelerating nearest neighbor computations using both k-d trees and many-core devices (e.g., GPUs) via the `OpenCL <https://www.khronos.org/opencl/OpenCL>`_ framework. 
+The bufferkdtree package is a Python library that aims at accelerating nearest neighbor computations using both k-d trees and modern many-core devices such as graphics processing units (GPUs). The implementation is based on `OpenCL <https://www.khronos.org/opencl/OpenCL>`_. 
 
-The buffer k-d tree technique can be seen as an intermediate version between a standard parallel k-d tree traversal and massively-parallel brute-force implementations for nearest neigbhor search. The implementation is well-suited for data sets with a large reference set (e.g., 1,000,000 points) and a huge query set (e.g., 10,000,000 points) with a moderate-sized feature space (e.g., from d=5 to d=25).
+The implementation can be seen as an intermediate version between a standard parallel k-d tree traversal (on multi-core systems) and a massively-parallel brute-force implementation for nearest neigbhor search. In particular, it makes use of the top of a standard k-d tree (which induces a spatial subdivision of the space) and resorts to a simple yet efficient brute-force implementation for processing chunks of "big" leaves. The implementation is well-suited for data sets with a large reference set (e.g., 1,000,000 points) and a huge query set (e.g., 10,000,000 points) given a moderate dimensionality of the search space (e.g., from d=5 to d=25).
 
 =============
 Documentation
@@ -16,15 +16,15 @@ See the `documentation <http://bufferkdtree.readthedocs.org>`_ for details and e
 Quickstart
 ==========
 
-The package can be installed via pip via::
+The package can easily be installed via pip via::
 
   pip install bufferkdtree
 
-To install the package from the sources, get the current version via::
+To install the package from the sources, first get the current version via::
 
   git clone https://github.com/gieseke/bufferkdtree.git
 
-To install the package locally on a Linux system, use::
+Afterwards, on Linux systems, you can install the package locally for the current user via::
 
   python setup.py install --user
 
@@ -33,8 +33,6 @@ On Debian/Ubuntu systems, the package can be installed globally for all users vi
   python setup.py build
   sudo python setup.py install
 
-To run the tests, type ``nosetests -v bufferkdtree`` from *outside* the source directory.
-
 ============
 Dependencies
 ============
@@ -43,7 +41,7 @@ The bufferkdtree package is tested under Python 2.6 and Python 2.7. The required
 
 - NumPy >= 1.6.1
 
-and a working C/C++ compiler. Further, `Swig <http://www.swig.org>`_ and `OpenCL <https://www.khronos.org/opencl/OpenCL>`_ need to be installed. See the `documentation <http://bufferkdtree.readthedocs.org>`_ for more details.
+Further, `Swig <http://www.swig.org>`_, `OpenCL <https://www.khronos.org/opencl/OpenCL>`_, `setuptools <https://pypi.python.org/pypi/setuptools>`_, and a working C/C++ compiler need to be available. See the `documentation <http://bufferkdtree.readthedocs.org>`_ for more details.
 
 ==========
 Disclaimer

diff --git a/bufferkdtree/__init__.py b/bufferkdtree/__init__.py
@@ -6,4 +6,4 @@
 
 import os
 
-__version__ = '1.0.2'
+__version__ = '1.1'
diff --git a/bufferkdtree/neighbors/base.py b/bufferkdtree/neighbors/base.py
@@ -59,12 +59,12 @@ class NearestNeighbors(object):
         plat_dev_ids={0:[0,1]} makes use of platform 0 and
         the first two devices.
         
-    allowed_train_mem_percent_chunk : float, optional (default=0.2)
+    allowed_train_mem_percent_chunk : float, optional (default=0.15)
         Passed to the 'buffer_kd_tree' implementation.
         The amount of memory (OpenCL) used for the 
         training patterns (in percent).
          
-    allowed_test_mem_percent : float, optional (default=0.8)
+    allowed_test_mem_percent : float, optional (default=0.55)
         Passed to the 'buffer_kd_tree' implementation.
         The amount of memory (OpenCL) used for the 
         test/query patterns (in percent).
@@ -113,8 +113,8 @@ def __init__(self, \
                  splitting_type="cyclic", \
                  n_train_chunks=1, \
                  plat_dev_ids={0:[0]}, \
-                 allowed_train_mem_percent_chunk=0.2, \
-                 allowed_test_mem_percent=0.8, \
+                 allowed_train_mem_percent_chunk=0.15, \
+                 allowed_test_mem_percent=0.55, \
                  n_jobs=1, \
                  verbose=0, \
                  **kwargs):
@@ -271,64 +271,7 @@ def kneighbors(self, X=None, n_neighbors=None, return_distance=True):
             else:
                 neigh_ind = result            
                 neigh_ind = neigh_ind[:, 1:]
-                return neigh_ind        
-
-    def compute_optimal_tree_depth(self, Xtrain, Xtest, target="test", tree_depths=None):
-        """ Computes the optimal tree depth for the 
-        tree-based implementations. The method tests
-        various assignments of the parameters and 
-        simply measures the time needed for the approach
-        tp finish.
-        
-        Parameters
-        ----------
-        Xtrain : array-like, shape (n_samples, n_features)
-            The set of training/reference points, where
-            'n_samples' is the number points and 
-            'n_features' the number of features.
-            
-        Xtest : array-like, shape (n_samples, n_features)
-            The set of testing/querying points, where
-            'n_samples' is the number points and 
-            'n_features' the number of features.
-            
-        target : {'train', 'test', 'both'}, optional (default='test')
-            The runtime target, i.e., which phase shall 
-            be optimized. Three choices:
-            - 'train' : The training phase
-            - 'test' : The testing phase
-            - 'both' : Both phases
-        
-        tree_depths : list or None, optional
-            The range of different tree depths that 
-            shall be tested. If None, then the default
-            ranges are used by the different implementations:
-            
-            - buffer_kd_tree : range(2, max_depth - 1)
-            - kd_tree : range(4, max_depth - 1)
-            
-            where max_depth = int(math.floor(math.log(len(Xtrain), 2)))
-        
-        Returns
-        -------
-        opt_height : int
-            The optimal tree depth
-        """
-
-        ALLOWED_TARGETS = ['train', 'test', 'both']
-
-        if self.algorithm not in ["kd_tree", "buffer_kd_tree"]:
-            raise Exception("Optimal tree depth can only be \
-                    determined for tree-based methods!")
-
-        if target not in ALLOWED_TARGETS:
-            raise Exception("Target is not valid (allowed ones are " + \
-                            unicode(ALLOWED_TARGETS) + ": " + unicode(target))
-
-        return self._get_wrapper().compute_optimal_tree_depth(Xtrain=Xtrain, \
-                                                              Xtest=Xtest, \
-                                                              target=target, \
-                                                              tree_depths=tree_depths)
+                return neigh_ind
 
     def _set_internal_data_types(self):
         """ Set numpy float and int dtypes