In [1]:
import numpy
import scipy
import unittest
import time

from nearpy import Engine
from nearpy.distances import CosineDistance

from nearpy.hashes import RandomBinaryProjections, HashPermutations, HashPermutationMapper


In [2]:
# Dimension of feature space
DIM = 100

# Number of data points (dont do too much because of exact search)
POINTS = 20000

In [3]:
print('Performing indexing with HashPermutations...')
t0 = time.time()

# Create permutations meta-hash
permutations = HashPermutations('permut')

# Create binary hash as child hash
rbp_perm = RandomBinaryProjections('rbp_perm', 14)
rbp_conf = {'num_permutation':50,'beam_size':10,'num_neighbour':100}

# Add rbp as child hash of permutations hash
permutations.add_child_hash(rbp_perm, rbp_conf)

# Create engine
engine_perm = Engine(DIM, lshashes=[permutations], distance=CosineDistance())

# First index some random vectors
matrix = numpy.zeros((POINTS,DIM))
for i in range(POINTS):
    v = numpy.random.randn(DIM)
    matrix[i] = v
    engine_perm.store_vector(v)

# Then update permuted index
permutations.build_permuted_index()

t1 = time.time()
print('Indexing took %f seconds' % (t1-t0))


Performing indexing with HashPermutations...
Indexing took 5.958319 seconds


In [4]:
# Get random query vector
query = numpy.random.randn(DIM)


In [6]:
# Do random query on engine 3
print('\nNeighbour distances with HashPermutations:')
print('  -> Candidate count is %d' % engine_perm.candidate_count(query))
results = engine_perm.neighbours(query)
dists = [x[2] for x in results]
print(dists)



Neighbour distances with HashPermutations:
  -> Candidate count is 180
[0.9500585913143319]


In [17]:
# Real neighbours
print('\nReal neighbour distances:')
query = query.reshape((DIM))
dists = CosineDistance().distance(matrix, query)
dists = dists.reshape((-1,))
dists = sorted(dists)
print(dists[:10])




Real neighbour distances:
[-37.760793231954096, -35.81055212993542, -33.51840944458068, -33.099759059661324, -32.915466819374444, -31.777265986663686, -31.6199439021197, -31.613155317134144, -30.867763640968043, -30.711106210538084]


In [7]:
print('\nPerforming indexing with HashPermutationMapper...')
t0 = time.time()

# Create permutations meta-hash
permutations2 = HashPermutationMapper('permut2')

# Create binary hash as child hash
rbp_perm2 = RandomBinaryProjections('rbp_perm2', 14)

# Add rbp as child hash of permutations hash
permutations2.add_child_hash(rbp_perm2)

# Create engine
engine_perm2 = Engine(DIM, lshashes=[permutations2], distance=CosineDistance())

# First index some random vectors
matrix = numpy.zeros((POINTS,DIM))
for i in range(POINTS):
    v = numpy.random.randn(DIM)
    matrix[i] = v
    engine_perm2.store_vector(v)

t1 = time.time()
print('Indexing took %f seconds' % (t1-t0))



Performing indexing with HashPermutationMapper...
Indexing took 3.477103 seconds


In [8]:
# Get random query vector
query = numpy.random.randn(DIM)


In [9]:
# Do random query on engine 4
print('\nNeighbour distances with HashPermutationMapper:')
print('  -> Candidate count is %d' % engine_perm2.candidate_count(query))
results = engine_perm2.neighbours(query)
dists = [x[2] for x in results]
print(dists)



Neighbour distances with HashPermutationMapper:
  -> Candidate count is 142
[0.8792594766780438]


In [10]:
# Real neighbours
print('\nReal neighbour distances:')
query = query.reshape((DIM))
dists = CosineDistance().distance(matrix,query)
dists = dists.reshape((-1,))
dists = sorted(dists)
print(dists[:10])



Real neighbour distances:
[-37.54304656294086, -37.23739270853423, -33.58340648998262, -33.57368828652864, -33.38211030602349, -33.37605248065024, -33.11079989791898, -32.460870111359824, -32.40499228871257, -32.20875773928526]


In [11]:
print('\nPerforming indexing with multiple binary hashes...')
t0 = time.time()

hashes = []
for k in range(20):
    hashes.append(RandomBinaryProjections('rbp_%d' % k, 10))

# Create engine
engine_rbps = Engine(DIM, lshashes=hashes, distance=CosineDistance())

# First index some random vectors
matrix = numpy.zeros((POINTS,DIM))
for i in range(POINTS):
    v = numpy.random.randn(DIM)
    matrix[i] = v
    engine_rbps.store_vector(v)

t1 = time.time()
print('Indexing took %f seconds' % (t1-t0))



Performing indexing with multiple binary hashes...
Indexing took 2.943007 seconds


In [12]:
# Get random query vector
query = numpy.random.randn(DIM)


In [13]:
# Do random query on engine 4
print('\nNeighbour distances with multiple binary hashes:')
print('  -> Candidate count is %d' % engine_rbps.candidate_count(query))
results = engine_rbps.neighbours(query)
dists = [x[2] for x in results]
print(dists)



Neighbour distances with multiple binary hashes:
  -> Candidate count is 501
[1.1285111465504176]


In [14]:
# Real neighbours
print('\nReal neighbour distances:')
query = query.reshape((DIM))
dists = CosineDistance().distance(matrix,query)
dists = dists.reshape((-1,))
dists = sorted(dists)
print(dists[:10])



Real neighbour distances:
[-34.468639258530004, -32.91035631057431, -32.02828864782643, -30.396000044556228, -29.796426389746387, -29.59224284276246, -29.229534269001558, -28.939848503372616, -28.63480931404359, -28.351543692714177]
