In [33]:
import numpy as np
import time 
from sklearn.neighbors import NearestNeighbors

In [34]:
np.random.seed(1)
veclen = 10
vecs = [np.random.normal(0, 1, veclen) for _ in range(100000)]

In [35]:
np.random.seed(2)
q = np.random.normal(0, 1, veclen)

In [36]:
maxnorm = max([np.linalg.norm(v) for v in vecs])

In [37]:
v = vecs[0]

In [38]:
def transform(vecs):
    maxnorm = max([np.linalg.norm(v) for v in vecs])
    new_vecs = []
    for v in vecs:
        new_vecs.append(np.insert(v, 0, np.sqrt(maxnorm**2-np.linalg.norm(v)**2)))
    return new_vecs

In [39]:
vecs_trans = transform(vecs)

In [40]:
vecs_trans[1000]

array([ 6.32518175, -0.12247391,  0.22816982, -0.35230513, -0.83055344,
       -0.26108982,  0.16935423,  0.6736231 , -0.32720161, -0.30529915,
        0.52486533])

In [41]:
q_trans = np.insert(q, 0, 0)

In [42]:
len(q_trans)

11

In [43]:
def mips_naive(q, vecs):
    mip = -1e10
    idx = -1
    for i, v in enumerate(vecs):
        if np.dot(q, v) > mip:
            mip = np.dot(q, v)
            idx = i
    return idx, mip

In [44]:
start = time.time()
idx,_ = mips_naive(q, vecs)
print("Min index", idx)
print("Elapsed time", time.time()-start)

Min index 50753
Elapsed time 0.12854599952697754


In [45]:
X = np.array(vecs_trans)

In [46]:
nbrs = NearestNeighbors(n_neighbors=1, algorithm='kd_tree').fit(X)

In [47]:
start = time.time()
distances, indices = nbrs.kneighbors(np.array([q_trans]))
print("Min index", indices[0])
print("Elapsed time", time.time()-start)

Min index [50753]
Elapsed time 0.006356954574584961


In [50]:
indices

array([[50753]])

In [49]:
X.shape

(100000, 11)

In [53]:
X[indices[0]]

array([[ 4.28969653, -0.5965094 ,  1.42071035, -3.37551897,  0.73652411,
        -1.97169694, -1.35898454,  0.64774945, -1.61868989, -0.50793098,
        -0.41328024]])

In [54]:
q_trans

array([ 0.        , -0.41675785, -0.05626683, -2.1361961 ,  1.64027081,
       -1.79343559, -0.84174737,  0.50288142, -1.24528809, -1.05795222,
       -0.90900761])