In [1]:
import numpy
import scipy
import unittest
import time

from nearpy import Engine
from nearpy.distances import EuclideanDistance

from nearpy.hashes import RandomDiscretizedProjections
from nearpy.filters import NearestFilter, UniqueFilter


In [2]:
# Dimension of feature space
DIM = 100

# Number of data points (dont do too much because of exact search)
POINTS = 60000

In [3]:
# We are looking for the ten closest neighbours
nearest = NearestFilter(10)


In [4]:
# Create engines for all configurations
# Use four random projection_dim dimensional discretized projections
bin_width=1.9

projection_dim=3
rdp1 = RandomDiscretizedProjections('rdp1', projection_dim, bin_width)
rdp2 = RandomDiscretizedProjections('rdp2', projection_dim, bin_width)
rdp3 = RandomDiscretizedProjections('rdp3', projection_dim, bin_width)
rdp4 = RandomDiscretizedProjections('rdp4', projection_dim, bin_width)

# Create engine with this configuration
engine = Engine(DIM, lshashes=[rdp1, rdp2, rdp3, rdp4],distance=EuclideanDistance(), vector_filters=[nearest])



*** engine init done ***


In [5]:
print('Creating data structure...')
t0 = time.time()


# First index some random vectors
matrix = numpy.zeros((POINTS,DIM))
for i in range(POINTS):
    v = numpy.random.randn(DIM)
    matrix[i] = v
    engine.store_vector(v)

t1 = time.time()
print('Creating data structure took %f seconds' % (t1-t0))


Creating data structure...
Creating data structure took 2.651110 seconds


In [6]:
# Get random query vector
query = numpy.random.randn(DIM)


In [7]:
# Do random query
print('\nNeighbour distances:')
print('  -> Candidate count is %d' % engine.candidate_count(query))

t0 = time.time()
results = engine.neighbours(query,distance=EuclideanDistance())
t1 = time.time()
print('Query took %f seconds' % (t1-t0))
print('****************')

print('Number of candidates:', len(results))
dists = [x[2] for x in results]
print(dists)



Neighbour distances:
  -> Candidate count is 35
Query took 0.001404 seconds
****************
Number of candidates: 10
[12.176297998401697, 12.34926067988176, 12.391517143556687, 12.444762679864459, 12.485970863275078, 12.708099905247941, 12.918480879627497, 12.983274008446328, 13.016104193845251, 13.050341464505085]


In [8]:
# Real neighbours
print('\n *** Calculate Real neighbour distances ***')
query = query.reshape((DIM))

dists=numpy.zeros(POINTS)
t0 = time.time()
for iter in range(POINTS):
    dists[iter] = EuclideanDistance().distance(matrix[iter], query)
dists = dists.reshape((-1,))
dists = sorted(dists)
t1 = time.time()
print('Query took %f seconds' % (t1-t0))

print('Exact distances:', dists[:10])

print('Max distance:', dists[-1])





 *** Calculate Real neighbour distances ***
Query took 0.421855 seconds
Exact distances: [10.65461663254864, 10.73459990254436, 10.848674025026424, 10.885102868102841, 10.911081466032613, 10.93904728016844, 10.958357893218727, 10.961854457549082, 11.015691755106248, 11.032182206087892]
Max distance: 18.182625421626685
