In [6]:
import falconn
import pprint as pp

In [8]:
dp = falconn.get_default_parameters(1600, 128, falconn.DistanceFunction.EuclideanSquared)
pp.pprint(dp)

<_falconn.LSHConstructionParameters object at 0x7ffa33952bc8>


In [12]:
import nmslib

In [37]:
import os.path
import nmslib 

INDEX_DIR='indices'    
import shutil
if os.path.exists(INDEX_DIR):
    shutil.rmtree(INDEX_DIR)

class NmslibReuseIndex:
    def __init__(self, metric, method_name, index_param, save_index, query_param):
        self._nmslib_metric = {'angular': 'cosinesimil', 'euclidean': 'l2'}[metric]
        self._method_name = method_name
        self._save_index = save_index
        self._index_param = index_param
        self._query_param = query_param
        self.name = 'Nmslib(method_name=%s, index_param=%s, query_param=%s)' % (
            method_name, index_param, query_param)
        
        self._index_name = os.path.join(INDEX_DIR, "nmslib_%s_%s_%s" % (
            self._method_name, metric, '_'.join(self._index_param))) 

        d = os.path.dirname(self._index_name)
        if not os.path.exists(d):
          os.makedirs(d)
        
        self._index = nmslib.init(self._nmslib_metric)

    def add(self, i, x) :
        self._index.appDataPoint(i,x)
        
    def fit(self, X):
        if self._method_name == 'vptree':
            # To avoid this issue:
            # terminate called after throwing an instance of 'std::runtime_error'
            # what():  The data size is too small or the bucket size is too big. Select the parameters so that <total # of records> is NOT less than <bucket size> * 1000
            # Aborted (core dumped)
            self._index_param.append('bucketSize=%d' % min(int(X.shape[0] * 0.0005), 1000))
                                        
        for i, x in enumerate(X):
            self._index.addDataPoint(i, x)


        if os.path.exists(self._index_name):
            print "Loading index from file"
            self._index.loadIndex(self._index_name)
        else:
            self._index.createIndex(self._index_param)
            if self._save_index: 
              self._index.saveIndex(self._index_name)
        #self._index.setQueryTimeParams(self._query_param)
        

    def query(self, v, n):
        return self._index.knnQuery( v, k=n)

    def freeIndex(self):
        self._index = None

In [40]:
import numpy as np
import nmslib 

data = np.random.randn(1000, 100).astype(np.float32)

index = NmslibReuseIndex("angular", "hnsw", {"post":2}, False, {"k":10})
index.fit(data)
index.query(data[0], 10)

(array([  0, 485, 824, 249, 325,  52, 243, 954, 408, 662], dtype=int32),
 array([  3.57627869e-07,   7.25090981e-01,   7.28309512e-01,
          7.31392801e-01,   7.34187186e-01,   7.35698462e-01,
          7.43861318e-01,   7.60318637e-01,   7.62889028e-01,
          7.68079579e-01], dtype=float32))

In [7]:
import numpy as np
data = []
for i in range(2):
    data.append(np.random.rand(3,2))

In [8]:
data

[array([[ 0.03592976,  0.68651662],
        [ 0.08654572,  0.1576118 ],
        [ 0.75146299,  0.01702054]]), array([[ 0.26032299,  0.01829923],
        [ 0.55238853,  0.26964409],
        [ 0.1930148 ,  0.44923551]])]

In [9]:
np.concatenate(data)

array([[ 0.03592976,  0.68651662],
       [ 0.08654572,  0.1576118 ],
       [ 0.75146299,  0.01702054],
       [ 0.26032299,  0.01829923],
       [ 0.55238853,  0.26964409],
       [ 0.1930148 ,  0.44923551]])

In [10]:
arr = np.random.rand(3)

In [11]:
arr

array([ 0.4189253 ,  0.54643724,  0.53097753])

In [12]:
np.expand_dims(arr, axis=0)

array([[ 0.4189253 ,  0.54643724,  0.53097753]])

In [13]:
np.expand_dims(arr, axis=1)

array([[ 0.4189253 ],
       [ 0.54643724],
       [ 0.53097753]])