In [1]:
import numpy as np
import matplotlib.pyplot as plt
%matplotlib inline

In [2]:
from rptree.nearestNeighbor import makeForest, euclidean

In [3]:
np.random.seed(1)

In [4]:
isolated_points = np.array([
    [110, 110],
    [113, 114],
    [119, 122],
], dtype='float')

In [5]:
n_obs = 10000
n_col = isolated_points.shape[1]
data = np.vstack([
    isolated_points,
    np.random.normal(0, 1, (n_obs - len(isolated_points), n_col)),
])
perm = np.random.permutation(n_obs)
data = data[perm, :]  # shuffle rows

In [6]:
pstat = %prun -r \
forest = makeForest(data, maxLeafSize = 10, numTrees = 20, \
    distanceFunction = euclidean)

print("makeForest step: n_obs = {}".format(n_obs))
pstat.print_stats()
None

 makeForest step: n_obs = 10000
         920116 function calls (860758 primitive calls) in 1.416 seconds

   Ordered by: internal time

   ncalls  tottime  percall  cumtime  percall filename:lineno(function)
 59378/20    0.436    0.000    1.415    0.071 nearestNeighbor.py:37(makeTree)
    29679    0.130    0.000    0.130    0.000 {method 'normal' of 'mtrand.RandomState' objects}
    29679    0.117    0.000    0.885    0.000 nearestNeighbor.py:70(chooseRule)
    29679    0.101    0.000    0.199    0.000 linalg.py:2022(norm)
    29679    0.085    0.000    0.085    0.000 {method 'partition' of 'numpy.ndarray' objects}
    29679    0.081    0.000    0.410    0.000 nearestNeighbor.py:32(randomUnitVector)
    29679    0.067    0.000    0.067    0.000 nearestNeighbor.py:126(__init__)
    59358    0.059    0.000    0.059    0.000 {numpy.core.multiarray.dot}
    29679    0.039    0.000    0.253    0.000 nearestNeighbor.py:6(selectQuantile)
    29679    0.038    0.000    0.038    0.000 {method '

In [7]:
pstat = %prun -r \
distances, indices = forest.kneighbors(isolated_points, k = 3)

print("kneighbors step: n_obs = {}".format(n_obs))
pstat.print_stats()
None

 kneighbors step: n_obs = 10000
         316393 function calls (286734 primitive calls) in 0.525 seconds

   Ordered by: internal time

   ncalls  tottime  percall  cumtime  percall filename:lineno(function)
 29679/20    0.244    0.000    0.503    0.025 nearestNeighbor.py:131(getLeaves)
    59540    0.082    0.000    0.082    0.000 {numpy.core.multiarray.copyto}
    29679    0.060    0.000    0.081    0.000 nearestNeighbor.py:80(__call__)
    59702    0.039    0.000    0.039    0.000 {numpy.core.multiarray.empty}
    59378    0.035    0.000    0.156    0.000 numeric.py:258(full)
    29699    0.023    0.000    0.126    0.000 nearestNeighbor.py:90(getLeaves)
    30087    0.021    0.000    0.021    0.000 {numpy.core.multiarray.dot}
     4602    0.002    0.000    0.002    0.000 {getattr}
      282    0.002    0.000    0.006    0.000 core.py:2913(__array_finalize__)
      402    0.002    0.000    0.004    0.000 core.py:2887(_update_from)
      246    0.002    0.000    0.003    0.000 core.py

In [8]:
expected = np.array([
    [0, 5, 15],
    [0, 5, 10],
    [0, 10, 15],
], dtype='float')
np.testing.assert_allclose(distances, expected)

In [9]:
# Extract indices corresponding to isolated points
i1, i2, i3 = [list(perm).index(i) for i in range(3)]

In [10]:
expected = np.array([
    [i1, i2, i3],
    [i2, i1, i3],
    [i3, i2, i1],
], dtype='int')
np.testing.assert_array_equal(indices, expected)