In [53]:
# libraries
import numpy as np
import pandas as pd
from sklearn.preprocessing import MinMaxScaler, StandardScaler
from scipy.spatial.distance import cdist
from usearch.index import Index, search, MetricKind, Matches, BatchMatches

In [146]:
# Generate 10'000 random vectors with 1024 dimensions
vectors = np.random.normal(size=(100000, 500))
vector = np.random.normal(size=(1,500))

one_in_many: Matches = search(vectors, vector, 50, MetricKind.L2sq, exact=True)

In [148]:
np.sqrt(one_in_many.distances)

array([27.50897 , 27.542465, 27.67461 , 27.762684, 27.831715, 27.873785,
       27.882494, 27.915936, 27.926235, 27.926502, 27.935497, 28.073334,
       28.073772, 28.09972 , 28.118332, 28.121338, 28.14302 , 28.153204,
       28.177824, 28.193396, 28.196007, 28.208061, 28.219082, 28.220667,
       28.224743, 28.249775, 28.257025, 28.26767 , 28.274187, 28.27597 ,
       28.28245 , 28.307507, 28.308802, 28.31129 , 28.313234, 28.320265,
       28.324936, 28.343542, 28.34546 , 28.349876, 28.350168, 28.356617,
       28.362871, 28.36302 , 28.372284, 28.37434 , 28.387037, 28.387089,
       28.39362 , 28.398333], dtype=float32)

In [149]:
# Extract information about the index of the closest rows
one_in_many.to_list()

[(62471, 756.743408203125),
 (88511, 758.5873413085938),
 (9091, 765.8840942382812),
 (78912, 770.7666015625),
 (46735, 774.6043090820312),
 (4291, 776.9479370117188),
 (97507, 777.4334716796875),
 (55547, 779.2994995117188),
 (7021, 779.8745727539062),
 (97230, 779.8895263671875),
 (16934, 780.3919677734375),
 (42857, 788.112060546875),
 (67145, 788.13671875),
 (60196, 789.5942993164062),
 (99628, 790.640625),
 (46152, 790.8096923828125),
 (98318, 792.0296630859375),
 (31877, 792.6028442382812),
 (90875, 793.9898071289062),
 (91399, 794.8675537109375),
 (66344, 795.0148315429688),
 (56346, 795.6947631835938),
 (32807, 796.3165283203125),
 (86459, 796.4060668945312),
 (58338, 796.6361083984375),
 (23148, 798.0498046875),
 (65232, 798.4594116210938),
 (34587, 799.0611572265625),
 (16259, 799.4296875),
 (53502, 799.5304565429688),
 (63853, 799.8969116210938),
 (23955, 801.3148803710938),
 (80420, 801.3882446289062),
 (690, 801.5292358398438),
 (89421, 801.6392211914062),
 (71853, 802.037

This means that from vectors, the row that is closest to vector has the index 62471.

In [150]:
# Compute pairwise distances using Euclidean distance metric
pairwise_distances = cdist(vectors, vector.reshape(1,-1), metric='euclidean')


### The 'cdist' method computes the same values

In [151]:
np.sort(pairwise_distances.ravel())[:50]

array([27.50896934, 27.5424647 , 27.67461108, 27.76268412, 27.83171455,
       27.87378531, 27.88249395, 27.91593608, 27.92623458, 27.92650213,
       27.93549644, 28.07333346, 28.07377298, 28.09972017, 28.11833298,
       28.12133849, 28.14302201, 28.15320344, 28.1778244 , 28.19339526,
       28.19600717, 28.20806207, 28.21908104, 28.22066716, 28.2247423 ,
       28.24977517, 28.25702424, 28.26767002, 28.27418778, 28.27596947,
       28.28244932, 28.30750543, 28.30880194, 28.31129113, 28.31323456,
       28.32026408, 28.32493508, 28.34354224, 28.34546068, 28.34987672,
       28.35016789, 28.35661834, 28.36287239, 28.36301906, 28.37228275,
       28.37434085, 28.38703636, 28.38708856, 28.39362008, 28.39833346])