diff --git a/CHANGELOG.md b/CHANGELOG.md index e61bd997ca..8d289ec2f6 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -9,6 +9,8 @@ We try to indicate most contributions here with the contributor names who are no the Facebook Faiss team. Feel free to add entries here if you submit a PR. ## [Unreleased] +### Changed +- Previously, when moving indices to GPU with coarse quantizers that were not implemented on GPU, the cloner would silently fallback to CPU. This version will now throw an exception instead and the calling code would need to explicitly allow fallback to CPU by setting a flag in cloner config. ## [1.8.0] - 2024-02-27 ### Added diff --git a/faiss/gpu/GpuCloner.cpp b/faiss/gpu/GpuCloner.cpp index 06ad082272..8f895ac9c7 100644 --- a/faiss/gpu/GpuCloner.cpp +++ b/faiss/gpu/GpuCloner.cpp @@ -153,6 +153,7 @@ Index* ToGpuCloner::clone_Index(const Index* index) { config.indicesOptions = indicesOptions; config.flatConfig.useFloat16 = useFloat16CoarseQuantizer; config.use_raft = use_raft; + config.allowCpuCoarseQuantizer = allowCpuCoarseQuantizer; GpuIndexIVFFlat* res = new GpuIndexIVFFlat( provider, ifl->d, ifl->nlist, ifl->metric_type, config); @@ -205,6 +206,7 @@ Index* ToGpuCloner::clone_Index(const Index* index) { config.usePrecomputedTables = usePrecomputed; config.use_raft = use_raft; config.interleavedLayout = use_raft; + config.allowCpuCoarseQuantizer = allowCpuCoarseQuantizer; GpuIndexIVFPQ* res = new GpuIndexIVFPQ(provider, ipq, config); @@ -214,8 +216,13 @@ Index* ToGpuCloner::clone_Index(const Index* index) { return res; } else { - // default: use CPU cloner - return Cloner::clone_Index(index); + // use CPU cloner for IDMap and PreTransform + auto index_idmap = dynamic_cast(index); + auto index_pt = dynamic_cast(index); + if (index_idmap || index_pt) { + return Cloner::clone_Index(index); + } + FAISS_THROW_MSG("This index type is not implemented on GPU."); } } @@ -224,8 +231,6 @@ faiss::Index* index_cpu_to_gpu( int device, const faiss::Index* index, const GpuClonerOptions* options) { - auto index_pq = dynamic_cast(index); - FAISS_THROW_IF_MSG(index_pq, "This index type is not implemented on GPU."); GpuClonerOptions defaults; ToGpuCloner cl(provider, device, options ? *options : defaults); return cl.clone_Index(index); diff --git a/faiss/gpu/GpuClonerOptions.h b/faiss/gpu/GpuClonerOptions.h index 197e09dc88..e643e848fb 100644 --- a/faiss/gpu/GpuClonerOptions.h +++ b/faiss/gpu/GpuClonerOptions.h @@ -43,6 +43,12 @@ struct GpuClonerOptions { #else bool use_raft = false; #endif + + /// This flag controls the CPU fallback logic for coarse quantizer + /// component of the index. When set to false (default), the cloner will + /// throw an exception for indices not implemented on GPU. When set to + /// true, it will fallback to a CPU implementation. + bool allowCpuCoarseQuantizer = false; }; struct GpuMultipleClonerOptions : public GpuClonerOptions { diff --git a/faiss/gpu/GpuIndexIVF.cu b/faiss/gpu/GpuIndexIVF.cu index 0c5b8db686..40129a54c5 100644 --- a/faiss/gpu/GpuIndexIVF.cu +++ b/faiss/gpu/GpuIndexIVF.cu @@ -7,6 +7,7 @@ #include #include +#include #include #include #include @@ -172,10 +173,29 @@ void GpuIndexIVF::copyFrom(const faiss::IndexIVF* index) { // over to the GPU, on the same device that we are on. GpuResourcesProviderFromInstance pfi(getResources()); - GpuClonerOptions options; - auto cloner = ToGpuCloner(&pfi, getDevice(), options); - - quantizer = cloner.clone_Index(index->quantizer); + // Attempt to clone the index to GPU. If it fails because the coarse + // quantizer is not implemented on GPU and the flag to allow CPU + // fallback is set, retry it with CPU cloner and re-throw errors. + try { + GpuClonerOptions options; + auto cloner = ToGpuCloner(&pfi, getDevice(), options); + quantizer = cloner.clone_Index(index->quantizer); + } catch (const std::exception& e) { + if (strstr(e.what(), "not implemented on GPU")) { + if (ivfConfig_.allowCpuCoarseQuantizer) { + Cloner cpuCloner; + quantizer = cpuCloner.clone_Index(index->quantizer); + } else { + FAISS_THROW_MSG( + "This index type is not implemented on " + "GPU and allowCpuCoarseQuantizer is set to false. " + "Please set the flag to true to allow the CPU " + "fallback in cloning."); + } + } else { + throw; + } + } own_fields = true; } else { // Otherwise, this is a GPU coarse quantizer index instance found in a diff --git a/faiss/gpu/GpuIndexIVF.h b/faiss/gpu/GpuIndexIVF.h index a9f092d35b..65a27aa94e 100644 --- a/faiss/gpu/GpuIndexIVF.h +++ b/faiss/gpu/GpuIndexIVF.h @@ -26,6 +26,12 @@ struct GpuIndexIVFConfig : public GpuIndexConfig { /// Configuration for the coarse quantizer object GpuIndexFlatConfig flatConfig; + + /// This flag controls the CPU fallback logic for coarse quantizer + /// component of the index. When set to false (default), the cloner will + /// throw an exception for indices not implemented on GPU. When set to + /// true, it will fallback to a CPU implementation. + bool allowCpuCoarseQuantizer = false; }; /// Base class of all GPU IVF index types. This (for now) deliberately does not diff --git a/faiss/gpu/test/test_gpu_index.py b/faiss/gpu/test/test_gpu_index.py index 620bfea198..28572ebcb4 100755 --- a/faiss/gpu/test/test_gpu_index.py +++ b/faiss/gpu/test/test_gpu_index.py @@ -589,7 +589,10 @@ class TestGpuAutoTune(unittest.TestCase): def test_params(self): index = faiss.index_factory(32, "IVF65536_HNSW,PQ16") - index = faiss.index_cpu_to_gpu(faiss.StandardGpuResources(), 0, index) + res = faiss.StandardGpuResources() + options = faiss.GpuClonerOptions() + options.allowCpuCoarseQuantizer = True + index = faiss.index_cpu_to_gpu(res, 0, index, options) ps = faiss.GpuParameterSpace() ps.initialize(index) for i in range(ps.parameter_ranges.size()): diff --git a/faiss/gpu/test/test_index_cpu_to_gpu.py b/faiss/gpu/test/test_index_cpu_to_gpu.py index 84c35e2af7..088ea2bf74 100644 --- a/faiss/gpu/test/test_index_cpu_to_gpu.py +++ b/faiss/gpu/test/test_index_cpu_to_gpu.py @@ -4,26 +4,86 @@ class TestMoveToGpu(unittest.TestCase): - def test_index_cpu_to_gpu(self): + + @classmethod + def setUpClass(cls): + cls.res = faiss.StandardGpuResources() + + def create_index(self, factory_string): dimension = 128 n = 2500 db_vectors = np.random.random((n, dimension)).astype('float32') - code_size = 16 - res = faiss.StandardGpuResources() - index_pq = faiss.IndexPQ(dimension, code_size, 6) - index_pq.train(db_vectors) - index_pq.add(db_vectors) - self.assertRaisesRegex(Exception, ".*not implemented.*", - faiss.index_cpu_to_gpu, res, 0, index_pq) - - def test_index_cpu_to_gpu_does_not_throw_with_index_flat(self): - dimension = 128 - n = 100 - db_vectors = np.random.random((n, dimension)).astype('float32') - res = faiss.StandardGpuResources() - index_flat = faiss.IndexFlatL2(dimension) - index_flat.add(db_vectors) + index = faiss.index_factory(dimension, factory_string) + index.train(db_vectors) + if factory_string.startswith("IDMap"): + index.add_with_ids(db_vectors, np.arange(n)) + else: + index.add(db_vectors) + return index + + def create_and_clone(self, factory_string, + allowCpuCoarseQuantizer=None, + use_raft=None): + idx = self.create_index(factory_string) + config = faiss.GpuClonerOptions() + if allowCpuCoarseQuantizer is not None: + config.allowCpuCoarseQuantizer = allowCpuCoarseQuantizer + if use_raft is not None: + config.use_raft = use_raft + faiss.index_cpu_to_gpu(self.res, 0, idx, config) + + def verify_throws_not_implemented_exception(self, factory_string): + try: + self.create_and_clone(factory_string) + except Exception as e: + if "not implemented" not in str(e): + self.fail("Expected an exception but no exception was " + "thrown for factory_string: %s." % factory_string) + + def verify_clones_successfully(self, factory_string, + allowCpuCoarseQuantizer=None, + use_raft=None): + try: + self.create_and_clone( + factory_string, + allowCpuCoarseQuantizer=allowCpuCoarseQuantizer, + use_raft=use_raft) + except Exception as e: + self.fail("Unexpected exception thrown factory_string: " + "%s; error message: %s." % (factory_string, str(e))) + + def test_not_implemented_indices(self): + self.verify_throws_not_implemented_exception("PQ16") + self.verify_throws_not_implemented_exception("LSHrt") + self.verify_throws_not_implemented_exception("HNSW") + self.verify_throws_not_implemented_exception("HNSW,PQ16") + self.verify_throws_not_implemented_exception("IDMap,PQ16") + self.verify_throws_not_implemented_exception("IVF256,ITQ64,SH1.2") + + def test_implemented_indices(self): + self.verify_clones_successfully("Flat") + self.verify_clones_successfully("IVF1,Flat") + self.verify_clones_successfully("IVF32,PQ8") + self.verify_clones_successfully("IDMap,Flat") + self.verify_clones_successfully("PCA12,IVF32,Flat") + self.verify_clones_successfully("PCA32,IVF32,PQ8") + self.verify_clones_successfully("PCA32,IVF32,PQ8np") + + # set use_raft to false, these index types are not supported on RAFT + self.verify_clones_successfully("IVF32,SQ8", use_raft=False) + self.verify_clones_successfully( + "PCA32,IVF32,SQ8", use_raft=False) + + def test_with_flag(self): + self.verify_clones_successfully("IVF32_HNSW,Flat", + allowCpuCoarseQuantizer=True) + self.verify_clones_successfully("IVF256(PQ2x4fs),Flat", + allowCpuCoarseQuantizer=True) + + def test_with_flag_set_to_false(self): try: - faiss.index_cpu_to_gpu(res, 0, index_flat) - except Exception: - self.fail("index_cpu_to_gpu() threw an unexpected exception.") + self.verify_clones_successfully("IVF32_HNSW,Flat", + allowCpuCoarseQuantizer=False) + except Exception as e: + if "set the flag to true to allow the CPU fallback" not in str(e): + self.fail("Unexepected error message thrown: %s." % str(e)) diff --git a/faiss/impl/FaissAssert.h b/faiss/impl/FaissAssert.h index 2aea23e6a8..6f666f684c 100644 --- a/faiss/impl/FaissAssert.h +++ b/faiss/impl/FaissAssert.h @@ -94,13 +94,6 @@ } \ } while (false) -#define FAISS_THROW_IF_MSG(X, MSG) \ - do { \ - if (X) { \ - FAISS_THROW_FMT("Error: '%s' failed: " MSG, #X); \ - } \ - } while (false) - #define FAISS_THROW_IF_NOT_MSG(X, MSG) \ do { \ if (!(X)) { \