Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Support RAFT from python #2864

Closed
wants to merge 1 commit into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
9 changes: 9 additions & 0 deletions faiss/gpu/GpuCloner.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -121,6 +121,7 @@ Index* ToGpuCloner::clone_Index(const Index* index) {
GpuIndexFlatConfig config;
config.device = device;
config.useFloat16 = useFloat16;
config.use_raft = use_raft;
return new GpuIndexFlat(provider, ifl, config);
} else if (
dynamic_cast<const IndexScalarQuantizer*>(index) &&
Expand All @@ -129,6 +130,8 @@ Index* ToGpuCloner::clone_Index(const Index* index) {
GpuIndexFlatConfig config;
config.device = device;
config.useFloat16 = true;
FAISS_THROW_IF_NOT_MSG(
!use_raft, "this type of index is not implemented for RAFT");
GpuIndexFlat* gif = new GpuIndexFlat(
provider, index->d, index->metric_type, config);
// transfer data by blocks
Expand All @@ -146,6 +149,8 @@ Index* ToGpuCloner::clone_Index(const Index* index) {
config.device = device;
config.indicesOptions = indicesOptions;
config.flatConfig.useFloat16 = useFloat16CoarseQuantizer;
FAISS_THROW_IF_NOT_MSG(
!use_raft, "this type of index is not implemented for RAFT");

GpuIndexIVFFlat* res = new GpuIndexIVFFlat(
provider, ifl->d, ifl->nlist, ifl->metric_type, config);
Expand All @@ -162,6 +167,8 @@ Index* ToGpuCloner::clone_Index(const Index* index) {
config.device = device;
config.indicesOptions = indicesOptions;
config.flatConfig.useFloat16 = useFloat16CoarseQuantizer;
FAISS_THROW_IF_NOT_MSG(
!use_raft, "this type of index is not implemented for RAFT");

GpuIndexIVFScalarQuantizer* res = new GpuIndexIVFScalarQuantizer(
provider,
Expand Down Expand Up @@ -194,6 +201,8 @@ Index* ToGpuCloner::clone_Index(const Index* index) {
config.flatConfig.useFloat16 = useFloat16CoarseQuantizer;
config.useFloat16LookupTables = useFloat16;
config.usePrecomputedTables = usePrecomputed;
FAISS_THROW_IF_NOT_MSG(
!use_raft, "this type of index is not implemented for RAFT");

GpuIndexIVFPQ* res = new GpuIndexIVFPQ(provider, ipq, config);

Expand Down
3 changes: 3 additions & 0 deletions faiss/gpu/GpuClonerOptions.h
Original file line number Diff line number Diff line change
Expand Up @@ -36,6 +36,9 @@ struct GpuClonerOptions {

/// Set verbose options on the index
bool verbose = false;

/// use the RAFT implementation
bool use_raft = false;
};

struct GpuMultipleClonerOptions : public GpuClonerOptions {
Expand Down
56 changes: 18 additions & 38 deletions faiss/gpu/GpuDistance.h
Original file line number Diff line number Diff line change
Expand Up @@ -28,44 +28,24 @@ enum class IndicesDataType {

/// Arguments to brute-force GPU k-nearest neighbor searching
struct GpuDistanceParams {
GpuDistanceParams()
: metric(faiss::MetricType::METRIC_L2),
metricArg(0),
k(0),
dims(0),
vectors(nullptr),
vectorType(DistanceDataType::F32),
vectorsRowMajor(true),
numVectors(0),
vectorNorms(nullptr),
queries(nullptr),
queryType(DistanceDataType::F32),
queriesRowMajor(true),
numQueries(0),
outDistances(nullptr),
ignoreOutDistances(false),
outIndicesType(IndicesDataType::I64),
outIndices(nullptr),
device(-1) {}

//
// Search parameters
//

/// Search parameter: distance metric
faiss::MetricType metric;
faiss::MetricType metric = METRIC_L2;

/// Search parameter: distance metric argument (if applicable)
/// For metric == METRIC_Lp, this is the p-value
float metricArg;
float metricArg = 0;

/// Search parameter: return k nearest neighbors
/// If the value provided is -1, then we report all pairwise distances
/// without top-k filtering
int k;
int k = 0;

/// Vector dimensionality
int dims;
int dims = 0;

//
// Vectors being queried
Expand All @@ -74,14 +54,14 @@ struct GpuDistanceParams {
/// If vectorsRowMajor is true, this is
/// numVectors x dims, with dims innermost; otherwise,
/// dims x numVectors, with numVectors innermost
const void* vectors;
DistanceDataType vectorType;
bool vectorsRowMajor;
idx_t numVectors;
const void* vectors = nullptr;
DistanceDataType vectorType = DistanceDataType::F32;
bool vectorsRowMajor = true;
idx_t numVectors = 0;

/// Precomputed L2 norms for each vector in `vectors`, which can be
/// optionally provided in advance to speed computation for METRIC_L2
const float* vectorNorms;
const float* vectorNorms = nullptr;

//
// The query vectors (i.e., find k-nearest neighbors in `vectors` for each
Expand All @@ -91,10 +71,10 @@ struct GpuDistanceParams {
/// If queriesRowMajor is true, this is
/// numQueries x dims, with dims innermost; otherwise,
/// dims x numQueries, with numQueries innermost
const void* queries;
DistanceDataType queryType;
bool queriesRowMajor;
idx_t numQueries;
const void* queries = nullptr;
DistanceDataType queryType = DistanceDataType::F32;
bool queriesRowMajor = true;
idx_t numQueries = 0;

//
// Output results
Expand All @@ -103,16 +83,16 @@ struct GpuDistanceParams {
/// A region of memory size numQueries x k, with k
/// innermost (row major) if k > 0, or if k == -1, a region of memory of
/// size numQueries x numVectors
float* outDistances;
float* outDistances = nullptr;

/// Do we only care about the indices reported, rather than the output
/// distances? Not used if k == -1 (all pairwise distances)
bool ignoreOutDistances;
bool ignoreOutDistances = false;

/// A region of memory size numQueries x k, with k
/// innermost (row major). Not used if k == -1 (all pairwise distances)
IndicesDataType outIndicesType;
void* outIndices;
IndicesDataType outIndicesType = IndicesDataType::I64;
void* outIndices = nullptr;

//
// Execution information
Expand All @@ -123,7 +103,7 @@ struct GpuDistanceParams {
/// (via cudaGetDevice/cudaSetDevice) is used
/// Otherwise, an integer 0 <= device < numDevices indicates the device for
/// execution
int device;
int device = -1;

/// Should the index dispatch down to RAFT?
bool use_raft = false;
Expand Down
17 changes: 17 additions & 0 deletions faiss/gpu/GpuIndex.cu
Original file line number Diff line number Diff line change
Expand Up @@ -514,4 +514,21 @@ bool isGpuIndexImplemented(faiss::Index* index) {
}

} // namespace gpu

// This is the one defined in utils.cpp
// Crossing fingers that the InitGpuOptions_instance will
// be instanciated after this global variable
extern std::string gpu_options;

struct InitGpuOptions {
InitGpuOptions() {
gpu_options = "GPU ";
#ifdef USE_NVIDIA_RAFT
gpu_options += "NVIDIA_RAFT ";
#endif
}
};

InitGpuOptions InitGpuOptions_instance;

} // namespace faiss
6 changes: 2 additions & 4 deletions faiss/gpu/GpuIndex.h
Original file line number Diff line number Diff line change
Expand Up @@ -29,15 +29,13 @@ namespace faiss {
namespace gpu {

struct GpuIndexConfig {
inline GpuIndexConfig() : device(0), memorySpace(MemorySpace::Device) {}

/// GPU device on which the index is resident
int device;
int device = 0;

/// What memory space to use for primary storage.
/// On Pascal and above (CC 6+) architectures, allows GPUs to use
/// more memory than is available on the GPU.
MemorySpace memorySpace;
MemorySpace memorySpace = MemorySpace::Device;

/// Should the index dispatch down to RAFT?
bool use_raft = false;
Expand Down
6 changes: 2 additions & 4 deletions faiss/gpu/GpuIndexFlat.h
Original file line number Diff line number Diff line change
Expand Up @@ -24,15 +24,13 @@ namespace gpu {
class FlatIndex;

struct GpuIndexFlatConfig : public GpuIndexConfig {
inline GpuIndexFlatConfig() : useFloat16(false) {}

/// Whether or not data is stored as float16
bool useFloat16;
bool useFloat16 = false;

/// Deprecated: no longer used
/// Previously used to indicate whether internal storage of vectors is
/// transposed
bool storeTransposed;
bool storeTransposed = false;
};

/// Wrapper around the GPU implementation that looks like
Expand Down
4 changes: 1 addition & 3 deletions faiss/gpu/GpuIndexIVF.h
Original file line number Diff line number Diff line change
Expand Up @@ -21,10 +21,8 @@ class GpuIndexFlat;
class IVFBase;

struct GpuIndexIVFConfig : public GpuIndexConfig {
inline GpuIndexIVFConfig() : indicesOptions(INDICES_64_BIT) {}

/// Index storage options for the GPU
IndicesOptions indicesOptions;
IndicesOptions indicesOptions = INDICES_64_BIT;

/// Configuration for the coarse quantizer object
GpuIndexFlatConfig flatConfig;
Expand Down
4 changes: 1 addition & 3 deletions faiss/gpu/GpuIndexIVFFlat.h
Original file line number Diff line number Diff line change
Expand Up @@ -21,11 +21,9 @@ class IVFFlat;
class GpuIndexFlat;

struct GpuIndexIVFFlatConfig : public GpuIndexIVFConfig {
inline GpuIndexIVFFlatConfig() : interleavedLayout(true) {}

/// Use the alternative memory layout for the IVF lists
/// (currently the default)
bool interleavedLayout;
bool interleavedLayout = true;
};

/// Wrapper around the GPU implementation that looks like
Expand Down
14 changes: 4 additions & 10 deletions faiss/gpu/GpuIndexIVFPQ.h
Original file line number Diff line number Diff line change
Expand Up @@ -23,24 +23,18 @@ class GpuIndexFlat;
class IVFPQ;

struct GpuIndexIVFPQConfig : public GpuIndexIVFConfig {
inline GpuIndexIVFPQConfig()
: useFloat16LookupTables(false),
usePrecomputedTables(false),
interleavedLayout(false),
useMMCodeDistance(false) {}

/// Whether or not float16 residual distance tables are used in the
/// list scanning kernels. When subQuantizers * 2^bitsPerCode >
/// 16384, this is required.
bool useFloat16LookupTables;
bool useFloat16LookupTables = false;

/// Whether or not we enable the precomputed table option for
/// search, which can substantially increase the memory requirement.
bool usePrecomputedTables;
bool usePrecomputedTables = false;

/// Use the alternative memory layout for the IVF lists
/// WARNING: this is a feature under development, do not use!
bool interleavedLayout;
bool interleavedLayout = false;

/// Use GEMM-backed computation of PQ code distances for the no precomputed
/// table version of IVFPQ.
Expand All @@ -50,7 +44,7 @@ struct GpuIndexIVFPQConfig : public GpuIndexIVFConfig {
/// Note that MM code distance is enabled automatically if one uses a number
/// of dimensions per sub-quantizer that is not natively specialized (an odd
/// number like 7 or so).
bool useMMCodeDistance;
bool useMMCodeDistance = false;
};

/// IVFPQ index for the GPU
Expand Down
4 changes: 1 addition & 3 deletions faiss/gpu/GpuIndexIVFScalarQuantizer.h
Original file line number Diff line number Diff line change
Expand Up @@ -18,11 +18,9 @@ class IVFFlat;
class GpuIndexFlat;

struct GpuIndexIVFScalarQuantizerConfig : public GpuIndexIVFConfig {
inline GpuIndexIVFScalarQuantizerConfig() : interleavedLayout(true) {}

/// Use the alternative memory layout for the IVF lists
/// (currently the default)
bool interleavedLayout;
bool interleavedLayout = true;
};

/// Wrapper around the GPU implementation that looks like
Expand Down
18 changes: 7 additions & 11 deletions faiss/gpu/GpuResources.h
Original file line number Diff line number Diff line change
Expand Up @@ -102,11 +102,7 @@ std::string memorySpaceToString(MemorySpace s);

/// Information on what/where an allocation is
struct AllocInfo {
inline AllocInfo()
: type(AllocType::Other),
device(0),
space(MemorySpace::Device),
stream(nullptr) {}
inline AllocInfo() {}

inline AllocInfo(AllocType at, int dev, MemorySpace sp, cudaStream_t st)
: type(at), device(dev), space(sp), stream(st) {}
Expand All @@ -115,13 +111,13 @@ struct AllocInfo {
std::string toString() const;

/// The internal category of the allocation
AllocType type;
AllocType type = AllocType::Other;

/// The device on which the allocation is happening
int device;
int device = 0;

/// The memory space of the allocation
MemorySpace space;
MemorySpace space = MemorySpace::Device;

/// The stream on which new work on the memory will be ordered (e.g., if a
/// piece of memory cached and to be returned for this call was last used on
Expand All @@ -131,7 +127,7 @@ struct AllocInfo {
///
/// The memory manager guarantees that the returned memory is free to use
/// without data races on this stream specified.
cudaStream_t stream;
cudaStream_t stream = nullptr;
};

/// Create an AllocInfo for the current device with MemorySpace::Device
Expand All @@ -145,7 +141,7 @@ AllocInfo makeSpaceAlloc(AllocType at, MemorySpace sp, cudaStream_t st);

/// Information on what/where an allocation is, along with how big it should be
struct AllocRequest : public AllocInfo {
inline AllocRequest() : AllocInfo(), size(0) {}
inline AllocRequest() {}

inline AllocRequest(const AllocInfo& info, size_t sz)
: AllocInfo(info), size(sz) {}
Expand All @@ -162,7 +158,7 @@ struct AllocRequest : public AllocInfo {
std::string toString() const;

/// The size in bytes of the allocation
size_t size;
size_t size = 0;
};

/// A RAII object that manages a temporary memory request
Expand Down
6 changes: 6 additions & 0 deletions faiss/gpu/test/test_gpu_basics.py
Original file line number Diff line number Diff line change
Expand Up @@ -426,3 +426,9 @@ def test_with_gpu(self):
self.assertTrue(0.9 * err_rq0 < err_rq1 < 1.1 * err_rq0)

# np.testing.assert_array_equal(codes0, codes1)


class TestGpuFlags(unittest.TestCase):

def test_gpu_flag(self):
assert "GPU" in faiss.get_compile_options().split()
Loading