In [1]:
import numpy as np
from scipy.spatial.distance import cdist
from usearch.index import search, MetricKind, Matches, BatchMatches

array([42], dtype=uint64)

# Example with USearch library

Reference: https://github.com/unum-cloud/usearch

In [49]:
# Generate 10'000 random vectors with 1024 dimensions
vectors = np.random.rand(10000, 1024).astype(np.float32)
vector = np.random.rand(1024).astype(np.float32)

one_in_many: Matches = search(vectors, vector, 50, MetricKind.L2sq, exact=True)
# many_in_many: BatchMatches = search(vectors, vectors, 50, MetricKind.L2sq, exact=True)



In [9]:
one_in_many.distances

array([148.28493, 149.36392, 149.39612, 150.33774, 150.76176, 151.26874,
       151.55542, 151.58168, 151.61359, 151.66888, 151.77277, 151.8819 ,
       152.16907, 152.28232, 152.4141 , 152.41916, 152.45041, 152.47855,
       152.56647, 152.61732, 152.81068, 152.91292, 152.9302 , 152.9842 ,
       153.01051, 153.0383 , 153.09265, 153.14703, 153.23404, 153.30553,
       153.60391, 153.68681, 153.98639, 154.01343, 154.26999, 154.3609 ,
       154.38693, 154.4319 , 154.45644, 154.46512, 154.51248, 154.59296,
       154.63261, 154.65158, 154.65529, 154.65904, 154.66281, 154.69044,
       154.72095, 154.80173], dtype=float32)

In [11]:
one_in_many.to_list()

[(6398, 148.28492736816406),
 (143, 149.36392211914062),
 (5364, 149.3961181640625),
 (2408, 150.33773803710938),
 (4788, 150.7617645263672),
 (4569, 151.26873779296875),
 (8272, 151.555419921875),
 (3095, 151.58168029785156),
 (7043, 151.61358642578125),
 (5500, 151.66888427734375),
 (5613, 151.77276611328125),
 (6301, 151.88189697265625),
 (5890, 152.1690673828125),
 (540, 152.28231811523438),
 (9951, 152.41409301757812),
 (171, 152.41915893554688),
 (7865, 152.45040893554688),
 (9505, 152.47854614257812),
 (2065, 152.56646728515625),
 (762, 152.61732482910156),
 (2186, 152.81068420410156),
 (8400, 152.9129180908203),
 (649, 152.93020629882812),
 (4983, 152.9842071533203),
 (7042, 153.01051330566406),
 (5245, 153.03829956054688),
 (2278, 153.0926513671875),
 (609, 153.14703369140625),
 (3126, 153.23403930664062),
 (6393, 153.30552673339844),
 (950, 153.60391235351562),
 (1871, 153.6868133544922),
 (8731, 153.98638916015625),
 (1245, 154.013427734375),
 (8291, 154.26998901367188),
 (7

# Comparison with Scipy Spatial Distance

In [12]:
# Another example to compare with scipy spatial.distance
# Example vectors
vector1 = np.random.uniform(2,3,size=[10,5])
vector2 =np.array([np.random.uniform(2,3,size=5)])

In [13]:
result: BatchMatches = search(vector1, vector2, 10, MetricKind.L2sq, exact=True)

In [35]:
# get the indeces for ordering the pairwise distances
ind = np.array(result.to_list())[:,0].astype('int64')

In [44]:
np.sort(ind)

array([0, 1, 2, 3, 4, 5, 6, 7, 8, 9])

In [15]:
d = result.distances

In [47]:
np.sqrt(d)[np.argsort(ind)]

array([1.2775285 , 0.6731909 , 0.61250603, 1.0589464 , 0.5932575 ,
       0.94723815, 0.7952991 , 0.21066497, 0.96343386, 0.587761  ],
      dtype=float32)

In [37]:
# Compute pairwise distances using Euclidean distance metric
pairwise_distances = cdist(vector1, vector2, metric='euclidean')

print("Pairwise distances:")
print(pairwise_distances)

Pairwise distances:
[[1.27752856]
 [0.67319091]
 [0.61250603]
 [1.0589464 ]
 [0.59325747]
 [0.94723814]
 [0.79529909]
 [0.21066497]
 [0.96343388]
 [0.58776098]]


In [16]:
pairwise_distances.shape

(10, 1)

# FAISS is dangerous

...and slower than USearch.

In [51]:
import faiss

In [54]:
# Concatenate the vectors into a single array
all_vectors = np.concatenate((vectors, vector.reshape(1,-1)), axis=0)

# Initialize Faiss index
d = all_vectors.shape[1]  # Dimension of vectors
index = faiss.IndexFlatL2(d)  # L2 distance (Euclidean distance)

# Add vectors to the index
index.add(all_vectors)

# Query for the nearest neighbors (in this case, excluding the vector itself)
k = all_vectors.shape[0] - 1  # Return distances to all other vectors
D, I = index.search(all_vectors, k)

# The first row of D corresponds to distances from vector1 to all vectors,
# and the second row corresponds to distances from vector2 to all vectors
pairwise_distances = D[1]

print("Pairwise distances:")
print(pairwise_distances)

Pairwise distances:
[  0.      147.81537 149.88068 ... 188.24158 188.37915 188.89383]


In [52]:
# Concatenate the vectors into a single array
all_vectors = np.concatenate((vector1, vector2), axis=0)

# Initialize Faiss index
d = all_vectors.shape[1]  # Dimension of vectors
index = faiss.IndexFlatL2(d)  # L2 distance (Euclidean distance)

# Add vectors to the index
index.add(all_vectors)

# Query for the nearest neighbors (in this case, excluding the vector itself)
k = all_vectors.shape[0] - 1  # Return distances to all other vectors
D, I = index.search(all_vectors, k)

# The first row of D corresponds to distances from vector1 to all vectors,
# and the second row corresponds to distances from vector2 to all vectors
pairwise_distances = D[1]

print("Pairwise distances:")
print(pairwise_distances)


Pairwise distances:
[0.         0.29514208 0.30921572 0.32049134 0.32110634 0.45318595
 0.69156986 0.7355278  0.94303757 1.1888701 ]


# Excuse for FAISS performance

https://medium.com/mlearning-ai/why-you-should-be-careful-using-faiss-c44996eda9ee