In [1]:
import numpy as np
import matplotlib.pyplot as plt
%matplotlib inline

In [2]:
from rptree.nearestNeighbor import makeForest, euclidean

In [3]:
np.random.seed(1)

## Build dataset

In [4]:
isolated_points = np.array([
    [110, 110],
    [113, 114],
    [119, 122],
], dtype='float')

In [5]:
n_obs = 1000
n_col = isolated_points.shape[1]
data = np.vstack([
    isolated_points,
    np.random.normal(0, 1, (n_obs - len(isolated_points), n_col)),
])
perm = np.random.permutation(n_obs)
data = data[perm, :]  # shuffle rows

## Construct RPTree (forest)

In [6]:
%%time
forest = makeForest(data, maxLeafSize = 10, numTrees = 20, \
    distanceFunction = euclidean)

CPU times: user 108 ms, sys: 4.5 ms, total: 112 ms
Wall time: 112 ms


## Query for constant set of isolated points

In [7]:
%%time
distances, indices = forest.kneighbors(isolated_points, k = 3)

CPU times: user 65.6 ms, sys: 2.66 ms, total: 68.3 ms
Wall time: 71.8 ms


##### Assertions to check results of previous section

In [8]:
expected = np.array([
    [0, 5, 15],
    [0, 5, 10],
    [0, 10, 15],
], dtype='float')
np.testing.assert_allclose(distances, expected)

In [9]:
# Extract indices corresponding to isolated points
i1, i2, i3 = [list(perm).index(i) for i in range(3)]

In [10]:
expected = np.array([
    [i1, i2, i3],
    [i2, i1, i3],
    [i3, i2, i1],
], dtype='int')
np.testing.assert_array_equal(indices, expected)

## Query all nearest neighbors

In [11]:
%%time
distances, indices = forest.kneighbors(data, k = 3)

CPU times: user 4.13 s, sys: 22.3 ms, total: 4.15 s
Wall time: 4.19 s
