In [1]:
import time

import numpy
import scipy
import unittest
import nearpy.utils.utils
from nearpy import Engine
from nearpy.distances import EuclideanDistance

from nearpy.hashes import RandomBinaryProjections, RandomBinaryProjectionsBias, RandomBinaryProjectionTree, HashPermutations, HashPermutationMapper


In [18]:
def print_results(results):
    print('  Data \t| Distance')
    for r in results:
        data = r[1]
        dist = r[2]
        print('  {}  \t| {:.6f}'.format(data, dist))


In [3]:
# Dimension of feature space
DIM = 100

# Number of data points (dont do too much because of exact search)
POINTS = 100000



In [4]:
print('Creating engines')


# Create engine 1
# We want 12 projections, 20 results at least
rbpt = RandomBinaryProjectionTree('rbpt', 20, 20)
engine_rbpt = Engine(DIM, lshashes=[rbpt], distance=EuclideanDistance())


Creating engines
*** engine init done ***


In [5]:
# Create engine 2
rbp1 = RandomBinaryProjectionsBias('rbp1', 5,1)
engine = Engine(DIM, lshashes=[rbp1], distance=EuclideanDistance())


*** engine init done ***


In [6]:
# Create engine 3
# Create permutations meta-hash
permutations = HashPermutations('permut')

# Create binary hash as child hash
rbp_perm = RandomBinaryProjections('rbp_perm', 20)
rbp_conf = {'num_permutation':50,'beam_size':10,'num_neighbour':100}

# Add rbp as child hash of permutations hash
permutations.add_child_hash(rbp_perm, rbp_conf)

engine_perm = Engine(DIM, lshashes=[permutations], distance=EuclideanDistance())


*** engine init done ***


In [7]:
# Create engine 4
# Create permutations meta-hash
permutations2 = HashPermutationMapper('permut2')

# Create binary hash as child hash
rbp_perm2 = RandomBinaryProjections('rbp_perm2', 12)

# Add rbp as child hash of permutations hash
permutations2.add_child_hash(rbp_perm2)

engine_perm2 = Engine(DIM, lshashes=[permutations2], distance=EuclideanDistance())



*** engine init done ***


In [8]:
print('Indexing %d random vectors of dimension %d' % (POINTS, DIM))

# First index some random vectors
matrix = numpy.zeros((POINTS,DIM))
for i in range(POINTS):
    v = numpy.random.randn(DIM)
    matrix[i, :] = v
    engine.store_vector(v, i)
    engine_rbpt.store_vector(v, i)
    engine_perm.store_vector(v, i)
    engine_perm2.store_vector(v, i)

print('Buckets 1 = %d' % len(engine.storage.buckets['rbp1'].keys()))
print('Buckets 2 = %d' % len(engine_rbpt.storage.buckets['rbpt'].keys()))


Indexing 100000 random vectors of dimension 100
Buckets 1 = 32
Buckets 2 = 91084


In [9]:
print('Building permuted index for HashPermutations')

# Then update permuted index
permutations.build_permuted_index()


Building permuted index for HashPermutations


In [10]:
print('Generate random data')

# Get random query vector
query = numpy.random.randn(DIM)


Generate random data


In [19]:
# Do random query on engine 1
print('\nNeighbour distances with RandomBinaryProjectionTree:')
print('  -> Candidate count is %d' % engine_rbpt.candidate_count(query))

t0=time.time()
results = engine_rbpt.neighbours(query)
t1 = time.time()
print('Query took %f seconds' % (t1-t0))

print_results(results)



Neighbour distances with RandomBinaryProjectionTree:
  -> Candidate count is 41
Query took 0.000986 seconds
  Data 	| Distance
  35620  	| 12.076558
  37472  	| 12.161507
  72778  	| 12.242319
  37137  	| 12.382099
  27450  	| 12.442067
  46613  	| 12.495322
  12361  	| 12.705276
  44906  	| 12.709231
  24767  	| 12.731053
  59497  	| 12.780457


In [20]:
# Do random query on engine 2
print('\nNeighbour distances with RandomBinaryProjections:')
print('  -> Candidate count is %d' % engine.candidate_count(query))

t0=time.time()
results = engine.neighbours(query)
t1 = time.time()
print('Query took %f seconds' % (t1-t0))

print_results(results)



Neighbour distances with RandomBinaryProjections:
  -> Candidate count is 4295
Query took 0.032374 seconds
  Data 	| Distance
  90116  	| 10.731020
  19272  	| 11.023388
  52424  	| 11.072005
  93266  	| 11.145858
  77467  	| 11.147064
  29432  	| 11.190778
  42763  	| 11.214849
  52428  	| 11.228893
  1227  	| 11.236811
  79397  	| 11.351124


In [21]:
# Do random query on engine 3
print('\nNeighbour distances with HashPermutations:')
print('  -> Candidate count is %d' % engine_perm.candidate_count(query))

t0=time.time()
results = engine_perm.neighbours(query)
t1 = time.time()
print('Query took %f seconds' % (t1-t0))

print_results(results)



Neighbour distances with HashPermutations:
  -> Candidate count is 112
Query took 0.003802 seconds
  Data 	| Distance
  64389  	| 10.738450
  67146  	| 11.435970
  77647  	| 11.557859
  79744  	| 11.608142
  26466  	| 11.641168
  37422  	| 11.681307
  60392  	| 11.778838
  7271  	| 11.821237
  26805  	| 11.855554
  87742  	| 11.855994


In [22]:
# Do random query on engine 4
print('\nNeighbour distances with HashPermutations2:')
print('  -> Candidate count is %d' % engine_perm2.candidate_count(query))

t0=time.time()
results = engine_perm2.neighbours(query)
t1 = time.time()
print('Query took %f seconds' % (t1-t0))

print_results(results)



Neighbour distances with HashPermutations2:
  -> Candidate count is 1812
Query took 0.019906 seconds
  Data 	| Distance
  49265  	| 11.193011
  19773  	| 11.264696
  67382  	| 11.265784
  21409  	| 11.346261
  3659  	| 11.358474
  66952  	| 11.366375
  58134  	| 11.374732
  84958  	| 11.446421
  61601  	| 11.463584
  55163  	| 11.516852


In [23]:
# Real neighbours
print('\nReal neighbour distances:')


dists=numpy.zeros(POINTS)
t0 = time.time()
for iter in range(POINTS):
    dists[iter] = EuclideanDistance().distance(matrix[iter], query)
dists = dists.reshape((-1,))
dists_argsort = numpy.argsort(dists)
t1 = time.time()
print('Query took %f seconds' % (t1-t0))

results = [(None, d, dists[d]) for d in dists_argsort[:10]]
print_results(results)


Real neighbour distances:
Query took 0.599546 seconds
  Data 	| Distance
  92762  	| 10.277807
  71336  	| 10.417020
  63023  	| 10.461655
  45738  	| 10.495865
  86333  	| 10.499976
  84299  	| 10.511342
  47597  	| 10.609366
  57483  	| 10.700567
  90116  	| 10.731020
  59274  	| 10.734705
