In [2]:
import numpy as np
import faiss

# Generate random data for demonstration
np.random.seed(0)
dataset = np.random.random((1000, 128)).astype('float32')  # Sample dataset with 1000 vectors of dimension 128

# Initialize Faiss IndexLSH for binary hashing
num_bits = 8  # Number of bits for the hash codes
num_hashes = 4  # Number of hashes
lsh_index = faiss.IndexLSH(128, num_bits)
lsh_index.train(dataset)
lsh_index.add(dataset)

# Initialize Faiss IndexFlatL2 for exact search with Euclidean distance
euclidean_index = faiss.IndexFlatL2(128)
euclidean_index.add(dataset)

# Query for nearest neighbors using LSH first, then refine using Euclidean distance
query_vector = np.random.random((1, 128)).astype('float32')  # Example query vector
k = 5  # Number of nearest neighbors to retrieve

# LSH search
lsh_distances, lsh_indices = lsh_index.search(query_vector, k)

# Refine using Euclidean distance
refined_distances, refined_indices = euclidean_index.search(query_vector, k)

print("Indices of nearest neighbors (LSH):", lsh_indices)
print("Distances to nearest neighbors (LSH):", lsh_distances)

print("Indices of nearest neighbors (Refined Euclidean):", refined_indices)
print("Distances to nearest neighbors (Refined Euclidean):", refined_distances)

Indices of nearest neighbors (LSH): [[ 17  30  75 118 135]]
Distances to nearest neighbors (LSH): [[0. 0. 0. 0. 0.]]
Indices of nearest neighbors (Refined Euclidean): [[668  46  28 481 912]]
Distances to nearest neighbors (Refined Euclidean): [[14.776387 14.972264 15.160928 15.395977 15.492122]]


In [3]:
x_data, y_data = np.mgrid[0:6, 2:7]
kd_tree = spatial.KDTree(np.c_[x_data.ravel(), y_data.ravel()])

In [4]:
d, i = kd_tree.query([[0, 0], [1.1, 1.9]], k=1)
print(d, i, sep='\n')

[2.         0.14142136]
[0 5]
