Skip to content

Commit

Permalink
Support RAFT from python
Browse files Browse the repository at this point in the history
Summary:
Adds use_raft to the cloner options.
Adds tests for the python interface.

Also continue cleanup of data structures to set default arguments.
Add flags GPU and NVIDIA_RAFT to get_compile_options()

Differential Revision: D45943372

fbshipit-source-id: cafe8b53e43cb8ea65a0f02d0a53e03ca4877723
  • Loading branch information
mdouze authored and facebook-github-bot committed May 17, 2023
1 parent 615e3fc commit fac0b96
Show file tree
Hide file tree
Showing 15 changed files with 148 additions and 80 deletions.
9 changes: 9 additions & 0 deletions faiss/gpu/GpuCloner.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -121,6 +121,7 @@ Index* ToGpuCloner::clone_Index(const Index* index) {
GpuIndexFlatConfig config;
config.device = device;
config.useFloat16 = useFloat16;
config.use_raft = use_raft;
return new GpuIndexFlat(provider, ifl, config);
} else if (
dynamic_cast<const IndexScalarQuantizer*>(index) &&
Expand All @@ -129,6 +130,8 @@ Index* ToGpuCloner::clone_Index(const Index* index) {
GpuIndexFlatConfig config;
config.device = device;
config.useFloat16 = true;
FAISS_THROW_IF_NOT_MSG(
!use_raft, "this type of index is not implemented for RAFT");
GpuIndexFlat* gif = new GpuIndexFlat(
provider, index->d, index->metric_type, config);
// transfer data by blocks
Expand All @@ -146,6 +149,8 @@ Index* ToGpuCloner::clone_Index(const Index* index) {
config.device = device;
config.indicesOptions = indicesOptions;
config.flatConfig.useFloat16 = useFloat16CoarseQuantizer;
FAISS_THROW_IF_NOT_MSG(
!use_raft, "this type of index is not implemented for RAFT");

GpuIndexIVFFlat* res = new GpuIndexIVFFlat(
provider, ifl->d, ifl->nlist, ifl->metric_type, config);
Expand All @@ -162,6 +167,8 @@ Index* ToGpuCloner::clone_Index(const Index* index) {
config.device = device;
config.indicesOptions = indicesOptions;
config.flatConfig.useFloat16 = useFloat16CoarseQuantizer;
FAISS_THROW_IF_NOT_MSG(
!use_raft, "this type of index is not implemented for RAFT");

GpuIndexIVFScalarQuantizer* res = new GpuIndexIVFScalarQuantizer(
provider,
Expand Down Expand Up @@ -194,6 +201,8 @@ Index* ToGpuCloner::clone_Index(const Index* index) {
config.flatConfig.useFloat16 = useFloat16CoarseQuantizer;
config.useFloat16LookupTables = useFloat16;
config.usePrecomputedTables = usePrecomputed;
FAISS_THROW_IF_NOT_MSG(
!use_raft, "this type of index is not implemented for RAFT");

GpuIndexIVFPQ* res = new GpuIndexIVFPQ(provider, ipq, config);

Expand Down
4 changes: 4 additions & 0 deletions faiss/gpu/GpuClonerOptions.h
Original file line number Diff line number Diff line change
Expand Up @@ -36,6 +36,10 @@ struct GpuClonerOptions {

/// Set verbose options on the index
bool verbose = false;

/// use the RAFT implementation
bool use_raft = false;

};

struct GpuMultipleClonerOptions : public GpuClonerOptions {
Expand Down
56 changes: 18 additions & 38 deletions faiss/gpu/GpuDistance.h
Original file line number Diff line number Diff line change
Expand Up @@ -28,44 +28,24 @@ enum class IndicesDataType {

/// Arguments to brute-force GPU k-nearest neighbor searching
struct GpuDistanceParams {
GpuDistanceParams()
: metric(faiss::MetricType::METRIC_L2),
metricArg(0),
k(0),
dims(0),
vectors(nullptr),
vectorType(DistanceDataType::F32),
vectorsRowMajor(true),
numVectors(0),
vectorNorms(nullptr),
queries(nullptr),
queryType(DistanceDataType::F32),
queriesRowMajor(true),
numQueries(0),
outDistances(nullptr),
ignoreOutDistances(false),
outIndicesType(IndicesDataType::I64),
outIndices(nullptr),
device(-1) {}

//
// Search parameters
//

/// Search parameter: distance metric
faiss::MetricType metric;
faiss::MetricType metric = METRIC_L2;

/// Search parameter: distance metric argument (if applicable)
/// For metric == METRIC_Lp, this is the p-value
float metricArg;
float metricArg = 0;

/// Search parameter: return k nearest neighbors
/// If the value provided is -1, then we report all pairwise distances
/// without top-k filtering
int k;
int k = 0;

/// Vector dimensionality
int dims;
int dims = 0;

//
// Vectors being queried
Expand All @@ -74,14 +54,14 @@ struct GpuDistanceParams {
/// If vectorsRowMajor is true, this is
/// numVectors x dims, with dims innermost; otherwise,
/// dims x numVectors, with numVectors innermost
const void* vectors;
DistanceDataType vectorType;
bool vectorsRowMajor;
idx_t numVectors;
const void* vectors = nullptr;
DistanceDataType vectorType = DistanceDataType::F32;
bool vectorsRowMajor = true;
idx_t numVectors = 0;

/// Precomputed L2 norms for each vector in `vectors`, which can be
/// optionally provided in advance to speed computation for METRIC_L2
const float* vectorNorms;
const float* vectorNorms = nullptr;

//
// The query vectors (i.e., find k-nearest neighbors in `vectors` for each
Expand All @@ -91,10 +71,10 @@ struct GpuDistanceParams {
/// If queriesRowMajor is true, this is
/// numQueries x dims, with dims innermost; otherwise,
/// dims x numQueries, with numQueries innermost
const void* queries;
DistanceDataType queryType;
bool queriesRowMajor;
idx_t numQueries;
const void* queries = nullptr;
DistanceDataType queryType = DistanceDataType::F32;
bool queriesRowMajor = true;
idx_t numQueries = 0;

//
// Output results
Expand All @@ -103,16 +83,16 @@ struct GpuDistanceParams {
/// A region of memory size numQueries x k, with k
/// innermost (row major) if k > 0, or if k == -1, a region of memory of
/// size numQueries x numVectors
float* outDistances;
float* outDistances = nullptr;

/// Do we only care about the indices reported, rather than the output
/// distances? Not used if k == -1 (all pairwise distances)
bool ignoreOutDistances;
bool ignoreOutDistances = false;

/// A region of memory size numQueries x k, with k
/// innermost (row major). Not used if k == -1 (all pairwise distances)
IndicesDataType outIndicesType;
void* outIndices;
IndicesDataType outIndicesType = IndicesDataType::I64;
void* outIndices = nullptr;

//
// Execution information
Expand All @@ -123,7 +103,7 @@ struct GpuDistanceParams {
/// (via cudaGetDevice/cudaSetDevice) is used
/// Otherwise, an integer 0 <= device < numDevices indicates the device for
/// execution
int device;
int device = -1;

/// Should the index dispatch down to RAFT?
bool use_raft = false;
Expand Down
17 changes: 17 additions & 0 deletions faiss/gpu/GpuIndex.cu
Original file line number Diff line number Diff line change
Expand Up @@ -514,4 +514,21 @@ bool isGpuIndexImplemented(faiss::Index* index) {
}

} // namespace gpu

// This is the one defined in utils.cpp
// Crossing fingers that the InitGpuOptions_instance will
// be instanciated after this global variable
extern std::string gpu_options;

struct InitGpuOptions {
InitGpuOptions() {
gpu_options = "GPU ";
#ifdef USE_NVIDIA_RAFT
gpu_options += "NVIDIA_RAFT ";
#endif
}
};

InitGpuOptions InitGpuOptions_instance;

} // namespace faiss
6 changes: 2 additions & 4 deletions faiss/gpu/GpuIndex.h
Original file line number Diff line number Diff line change
Expand Up @@ -29,15 +29,13 @@ namespace faiss {
namespace gpu {

struct GpuIndexConfig {
inline GpuIndexConfig() : device(0), memorySpace(MemorySpace::Device) {}

/// GPU device on which the index is resident
int device;
int device = 0;

/// What memory space to use for primary storage.
/// On Pascal and above (CC 6+) architectures, allows GPUs to use
/// more memory than is available on the GPU.
MemorySpace memorySpace;
MemorySpace memorySpace = MemorySpace::Device;

/// Should the index dispatch down to RAFT?
bool use_raft = false;
Expand Down
6 changes: 2 additions & 4 deletions faiss/gpu/GpuIndexFlat.h
Original file line number Diff line number Diff line change
Expand Up @@ -24,15 +24,13 @@ namespace gpu {
class FlatIndex;

struct GpuIndexFlatConfig : public GpuIndexConfig {
inline GpuIndexFlatConfig() : useFloat16(false) {}

/// Whether or not data is stored as float16
bool useFloat16;
bool useFloat16 = false;

/// Deprecated: no longer used
/// Previously used to indicate whether internal storage of vectors is
/// transposed
bool storeTransposed;
bool storeTransposed = false;
};

/// Wrapper around the GPU implementation that looks like
Expand Down
4 changes: 1 addition & 3 deletions faiss/gpu/GpuIndexIVF.h
Original file line number Diff line number Diff line change
Expand Up @@ -21,10 +21,8 @@ class GpuIndexFlat;
class IVFBase;

struct GpuIndexIVFConfig : public GpuIndexConfig {
inline GpuIndexIVFConfig() : indicesOptions(INDICES_64_BIT) {}

/// Index storage options for the GPU
IndicesOptions indicesOptions;
IndicesOptions indicesOptions = INDICES_64_BIT;

/// Configuration for the coarse quantizer object
GpuIndexFlatConfig flatConfig;
Expand Down
4 changes: 1 addition & 3 deletions faiss/gpu/GpuIndexIVFFlat.h
Original file line number Diff line number Diff line change
Expand Up @@ -21,11 +21,9 @@ class IVFFlat;
class GpuIndexFlat;

struct GpuIndexIVFFlatConfig : public GpuIndexIVFConfig {
inline GpuIndexIVFFlatConfig() : interleavedLayout(true) {}

/// Use the alternative memory layout for the IVF lists
/// (currently the default)
bool interleavedLayout;
bool interleavedLayout = true;
};

/// Wrapper around the GPU implementation that looks like
Expand Down
14 changes: 4 additions & 10 deletions faiss/gpu/GpuIndexIVFPQ.h
Original file line number Diff line number Diff line change
Expand Up @@ -23,24 +23,18 @@ class GpuIndexFlat;
class IVFPQ;

struct GpuIndexIVFPQConfig : public GpuIndexIVFConfig {
inline GpuIndexIVFPQConfig()
: useFloat16LookupTables(false),
usePrecomputedTables(false),
interleavedLayout(false),
useMMCodeDistance(false) {}

/// Whether or not float16 residual distance tables are used in the
/// list scanning kernels. When subQuantizers * 2^bitsPerCode >
/// 16384, this is required.
bool useFloat16LookupTables;
bool useFloat16LookupTables = false;

/// Whether or not we enable the precomputed table option for
/// search, which can substantially increase the memory requirement.
bool usePrecomputedTables;
bool usePrecomputedTables = false;

/// Use the alternative memory layout for the IVF lists
/// WARNING: this is a feature under development, do not use!
bool interleavedLayout;
bool interleavedLayout = false;

/// Use GEMM-backed computation of PQ code distances for the no precomputed
/// table version of IVFPQ.
Expand All @@ -50,7 +44,7 @@ struct GpuIndexIVFPQConfig : public GpuIndexIVFConfig {
/// Note that MM code distance is enabled automatically if one uses a number
/// of dimensions per sub-quantizer that is not natively specialized (an odd
/// number like 7 or so).
bool useMMCodeDistance;
bool useMMCodeDistance = false;
};

/// IVFPQ index for the GPU
Expand Down
4 changes: 1 addition & 3 deletions faiss/gpu/GpuIndexIVFScalarQuantizer.h
Original file line number Diff line number Diff line change
Expand Up @@ -18,11 +18,9 @@ class IVFFlat;
class GpuIndexFlat;

struct GpuIndexIVFScalarQuantizerConfig : public GpuIndexIVFConfig {
inline GpuIndexIVFScalarQuantizerConfig() : interleavedLayout(true) {}

/// Use the alternative memory layout for the IVF lists
/// (currently the default)
bool interleavedLayout;
bool interleavedLayout = true;
};

/// Wrapper around the GPU implementation that looks like
Expand Down
18 changes: 7 additions & 11 deletions faiss/gpu/GpuResources.h
Original file line number Diff line number Diff line change
Expand Up @@ -102,11 +102,7 @@ std::string memorySpaceToString(MemorySpace s);

/// Information on what/where an allocation is
struct AllocInfo {
inline AllocInfo()
: type(AllocType::Other),
device(0),
space(MemorySpace::Device),
stream(nullptr) {}
inline AllocInfo() {}

inline AllocInfo(AllocType at, int dev, MemorySpace sp, cudaStream_t st)
: type(at), device(dev), space(sp), stream(st) {}
Expand All @@ -115,13 +111,13 @@ struct AllocInfo {
std::string toString() const;

/// The internal category of the allocation
AllocType type;
AllocType type = AllocType::Other;

/// The device on which the allocation is happening
int device;
int device = 0;

/// The memory space of the allocation
MemorySpace space;
MemorySpace space = MemorySpace::Device;

/// The stream on which new work on the memory will be ordered (e.g., if a
/// piece of memory cached and to be returned for this call was last used on
Expand All @@ -131,7 +127,7 @@ struct AllocInfo {
///
/// The memory manager guarantees that the returned memory is free to use
/// without data races on this stream specified.
cudaStream_t stream;
cudaStream_t stream = nullptr;
};

/// Create an AllocInfo for the current device with MemorySpace::Device
Expand All @@ -145,7 +141,7 @@ AllocInfo makeSpaceAlloc(AllocType at, MemorySpace sp, cudaStream_t st);

/// Information on what/where an allocation is, along with how big it should be
struct AllocRequest : public AllocInfo {
inline AllocRequest() : AllocInfo(), size(0) {}
inline AllocRequest() {}

inline AllocRequest(const AllocInfo& info, size_t sz)
: AllocInfo(info), size(sz) {}
Expand All @@ -162,7 +158,7 @@ struct AllocRequest : public AllocInfo {
std::string toString() const;

/// The size in bytes of the allocation
size_t size;
size_t size = 0;
};

/// A RAII object that manages a temporary memory request
Expand Down
6 changes: 6 additions & 0 deletions faiss/gpu/test/test_gpu_basics.py
Original file line number Diff line number Diff line change
Expand Up @@ -426,3 +426,9 @@ def test_with_gpu(self):
self.assertTrue(0.9 * err_rq0 < err_rq1 < 1.1 * err_rq0)

# np.testing.assert_array_equal(codes0, codes1)


class TestGpuFlags(unittest.TestCase):

def test_gpu_flag(self):
assert "GPU" in faiss.get_compile_options().split()
Loading

0 comments on commit fac0b96

Please sign in to comment.