Skip to content

Commit

Permalink
fix omp parallelism in fast scan range search
Browse files Browse the repository at this point in the history
Summary: Fix omp n^2 parallelism

Reviewed By: mdouze

Differential Revision: D53705601

fbshipit-source-id: 3fcc2368c436185119f6e988ee2867dfd7d8eb07
  • Loading branch information
algoriddle authored and facebook-github-bot committed Feb 13, 2024
1 parent 8898eab commit f262011
Show file tree
Hide file tree
Showing 3 changed files with 72 additions and 1 deletion.
2 changes: 1 addition & 1 deletion faiss/IndexIVFFastScan.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -643,7 +643,7 @@ void IndexIVFFastScan::range_search_dispatch_implem(
{
RangeSearchPartialResult pres(&rres);

#pragma omp parallel for reduction(+ : ndis, nlist_visited)
#pragma omp for reduction(+ : ndis, nlist_visited)
for (int slice = 0; slice < nslice; slice++) {
idx_t i0 = n * slice / nslice;
idx_t i1 = n * (slice + 1) / nslice;
Expand Down
1 change: 1 addition & 0 deletions tests/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,7 @@ set(FAISS_TEST_SRC
test_code_distance.cpp
test_hnsw.cpp
test_partitioning.cpp
test_fastscan_perf.cpp
)

add_executable(faiss_test ${FAISS_TEST_SRC})
Expand Down
70 changes: 70 additions & 0 deletions tests/test_fastscan_perf.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,70 @@
/**
* Copyright (c) Facebook, Inc. and its affiliates.
*
* This source code is licensed under the MIT license found in the
* LICENSE file in the root directory of this source tree.
*/

#include <gtest/gtest.h>

#include <cstddef>
#include <cstdint>
#include <memory>
#include <random>
#include <vector>

#include <omp.h>

#include <faiss/IndexFlat.h>
#include <faiss/IndexIVFPQFastScan.h>
#include <faiss/impl/AuxIndexStructures.h>

TEST(TestFastScan, knnVSrange) {
// small vectors and database
int d = 64;
size_t nb = 1000;

// ivf centroids
size_t nlist = 4;

// more than 2 threads to surface
// problems related to multi-threading
omp_set_num_threads(8);

// random database, also used as queries
std::vector<float> database(nb * d);
std::mt19937 rng;
std::uniform_real_distribution<> distrib;
for (size_t i = 0; i < nb * d; i++) {
database[i] = distrib(rng);
}

// build index
faiss::IndexFlatL2 coarse_quantizer(d);
faiss::IndexIVFPQFastScan index(
&coarse_quantizer, d, nlist, d / 2, 4, faiss::METRIC_L2, 32);
index.pq.cp.niter = 10; // speed up train
index.nprobe = nlist;
index.train(nb, database.data());
index.add(nb, database.data());

std::vector<float> distances(nb);
std::vector<faiss::idx_t> labels(nb);
auto t = std::chrono::high_resolution_clock::now();
index.search(nb, database.data(), 1, distances.data(), labels.data());
auto knn_time = std::chrono::duration_cast<std::chrono::milliseconds>(
std::chrono::high_resolution_clock::now() - t)
.count();

faiss::RangeSearchResult rsr(nb);
t = std::chrono::high_resolution_clock::now();
index.range_search(nb, database.data(), 1.0, &rsr);
auto range_time = std::chrono::duration_cast<std::chrono::milliseconds>(
std::chrono::high_resolution_clock::now() - t)
.count();

// we expect the perf of knn and range search
// to be similar, at least within a factor of 2
ASSERT_LT(range_time, knn_time * 2);
ASSERT_LT(knn_time, range_time * 2);
}

0 comments on commit f262011

Please sign in to comment.