From 31bddeb0cfa204791156522383c9470dcf8efbc1 Mon Sep 17 00:00:00 2001 From: Gergely Szilvasy Date: Tue, 30 Jan 2024 01:44:50 -0800 Subject: [PATCH] add faiss_gpu lib (#3222) Summary: 1. Split the GPU code into a separate static library to reduce build times. 2. faiss-gpu now depends on pytorch-cuda to stop clobbering of CUDA libraries when Faiss and Pytorch installed in the same environment 3. Add CUDA 12 (we support both 11.8 and 12.1) 4. Add Python 3.12 Pull Request resolved: https://github.com/facebookresearch/faiss/pull/3222 Reviewed By: mlomeli1 Differential Revision: D53215033 Pulled By: algoriddle fbshipit-source-id: d48c5707af20b7e5ff72c3aa27b0e677259f22f8 --- .circleci/config.yml | 77 +++++++++++++++++++++------------- conda/conda_build_config.yaml | 1 + conda/faiss-gpu-raft/meta.yaml | 7 ++-- conda/faiss-gpu/meta.yaml | 7 ++-- faiss/gpu/CMakeLists.txt | 29 +++++++------ 5 files changed, 72 insertions(+), 49 deletions(-) diff --git a/.circleci/config.yml b/.circleci/config.yml index 406817e09d..2901894577 100644 --- a/.circleci/config.yml +++ b/.circleci/config.yml @@ -9,13 +9,13 @@ executors: environment: CONDA_ARCH: Linux-x86_64 machine: - image: linux-cuda-11:default + image: linux-cuda-12:default resource_class: gpu.nvidia.medium linux-arm64-cpu: environment: CONDA_ARCH: Linux-aarch64 machine: - image: ubuntu-2004:current + image: ubuntu-2204:current resource_class: arm.medium macosx-arm64-cpu: environment: @@ -138,10 +138,9 @@ jobs: name: Conda build (GPU) no_output_timeout: 60m command: | - sudo update-alternatives --set cuda /usr/local/cuda-<> cd conda conda build faiss-gpu --variants '{ "cudatoolkit": "<>", "c_compiler_version": "<>", "cxx_compiler_version": "<>" }' \ - -c pytorch -c nvidia + -c pytorch -c nvidia/label/cuda-<> - when: condition: and: @@ -153,10 +152,9 @@ jobs: name: Conda build (GPU) w/ anaconda upload no_output_timeout: 60m command: | - sudo update-alternatives --set cuda /usr/local/cuda-<> cd conda conda build faiss-gpu --variants '{ "cudatoolkit": "<>", "c_compiler_version": "<>", "cxx_compiler_version": "<>" }' \ - --user pytorch --label <> -c pytorch -c nvidia + --user pytorch --label <> -c pytorch -c nvidia/label/cuda-<> - when: condition: and: @@ -168,7 +166,6 @@ jobs: name: Conda build (GPU w/ RAFT) no_output_timeout: 60m command: | - sudo update-alternatives --set cuda /usr/local/cuda-<> cd conda conda build faiss-gpu-raft --variants '{ "cudatoolkit": "<>", "c_compiler_version": "<>", "cxx_compiler_version": "<>" }' \ -c pytorch -c nvidia -c rapidsai-nightly -c conda-forge @@ -183,10 +180,9 @@ jobs: name: Conda build (GPU w/ RAFT) w/ anaconda upload no_output_timeout: 60m command: | - sudo update-alternatives --set cuda /usr/local/cuda-<> cd conda conda build faiss-gpu-raft --variants '{ "cudatoolkit": "<>", "c_compiler_version": "<>", "cxx_compiler_version": "<>" }' \ - --user pytorch --label <> -c pytorch -c nvidia -c rapidsai-nightly -c conda-forge + --user pytorch --label <> -c pytorch -c nvidia/label/cuda-<> -c rapidsai-nightly -c conda-forge build_cmake: parameters: @@ -216,27 +212,35 @@ jobs: bash miniconda.sh -b -p $HOME/miniconda ~/miniconda/bin/conda init fi - - when: - condition: - equal: [ "ON", << parameters.gpu >> ] - steps: - - run: - name: Configure CUDA - command: sudo update-alternatives --set cuda /usr/local/cuda-11.4 - run: name: Set up environment command: | conda config --set solver libmamba conda update -y -q conda - conda install -y -q python=3.11 cmake make swig mkl=2023 mkl-devel=2023 numpy scipy pytest gxx_linux-64 sysroot_linux-64 + - when: + condition: + equal: [ "OFF", << parameters.raft >> ] + steps: + - run: + name: Install env using main channel + command: | + conda install -y -q python=3.11 cmake make swig=4.0.2 mkl=2023 mkl-devel=2023 numpy scipy pytest gxx_linux-64 sysroot_linux-64 - when: condition: equal: [ "ON", << parameters.raft >> ] steps: - run: - name: Install libraft + name: Install env using conda-forge channel command: | - conda install -y -q libraft cuda-version=11.4 -c rapidsai-nightly -c nvidia -c pkgs/main -c conda-forge + conda install -y -q python=3.11 cmake make swig=4.0.2 mkl=2023 mkl-devel=2023 numpy scipy pytest gxx_linux-64 sysroot_linux-64=2.28 libraft cuda-version=11.8 -c rapidsai-nightly -c "nvidia/label/cuda-11.8.0" -c conda-forge + - when: + condition: + equal: [ "ON", << parameters.gpu >> ] + steps: + - run: + name: Install CUDA + command: | + conda install -y -q cuda-nvcc cuda-cudart-dev libcublas libcublas-dev cuda-toolkit -c "nvidia/label/cuda-11.8.0" - run: name: Build all targets no_output_timeout: 30m @@ -245,7 +249,7 @@ jobs: conda activate cmake -B build \ -DBUILD_TESTING=ON \ - -DBUILD_SHARED_LIBS=OFF \ + -DBUILD_SHARED_LIBS=ON \ -DFAISS_ENABLE_GPU=<< parameters.gpu >> \ -DFAISS_ENABLE_RAFT=<< parameters.raft >> \ -DFAISS_OPT_LEVEL=<< parameters.opt_level >> \ @@ -283,7 +287,7 @@ jobs: - run: name: Python tests (CPU + GPU) command: | - conda install -y -q pytorch pytorch-cuda=11 -c pytorch -c nvidia + conda install -y -q pytorch pytorch-cuda=11 -c pytorch -c nvidia/label/cuda-11.8.0 pytest --junitxml=test-results/pytest/results.xml tests/test_*.py pytest --junitxml=test-results/pytest/results-torch.xml tests/torch_*.py cp tests/common_faiss_tests.py faiss/gpu/test @@ -350,10 +354,10 @@ workflows: branches: ignore: /.*/ - build_conda: - name: Linux x86_64 GPU packages (CUDA 11.4) + name: Linux x86_64 GPU packages (CUDA 11.8) exec: linux-x86_64-gpu label: main - cuda: "11.4" + cuda: "11.8" cuda_archs: "60-real;61-real;62-real;70-real;72-real;75-real;80;86-real" compiler_version: "11.2" filters: @@ -362,11 +366,11 @@ workflows: branches: ignore: /.*/ - build_conda: - name: Linux x86_64 GPU w/ RAFT packages (CUDA 11.4) + name: Linux x86_64 GPU w/ RAFT packages (CUDA 11.8) exec: linux-x86_64-gpu label: main raft: "ON" - cuda: "11.4" + cuda: "11.8" cuda_archs: "60-real;61-real;62-real;70-real;72-real;75-real;80;86-real" compiler_version: "11.2" filters: @@ -416,17 +420,32 @@ workflows: exec: linux-x86_64-cpu label: nightly - build_conda: - name: Linux x86_64 GPU nightlies (CUDA 11.4) + name: Linux x86_64 GPU nightlies (CUDA 11.8) + exec: linux-x86_64-gpu + cuda: "11.8.0" + cuda_archs: "60-real;61-real;62-real;70-real;72-real;75-real;80;86-real" + compiler_version: "11.2" + label: nightly + - build_conda: + name: Linux x86_64 GPU w/ RAFT nightlies (CUDA 11.8.0) + exec: linux-x86_64-gpu + raft: "ON" + cuda: "11.8.0" + cuda_archs: "60-real;61-real;62-real;70-real;72-real;75-real;80;86-real" + compiler_version: "11.2" + label: nightly + - build_conda: + name: Linux x86_64 GPU nightlies (CUDA 12.1.0) exec: linux-x86_64-gpu - cuda: "11.4" + cuda: "12.1.0" cuda_archs: "60-real;61-real;62-real;70-real;72-real;75-real;80;86-real" compiler_version: "11.2" label: nightly - build_conda: - name: Linux x86_64 GPU w/ RAFT nightlies (CUDA 11.4) + name: Linux x86_64 GPU w/ RAFT nightlies (CUDA 12.1.0) exec: linux-x86_64-gpu raft: "ON" - cuda: "11.4" + cuda: "12.1.0" cuda_archs: "60-real;61-real;62-real;70-real;72-real;75-real;80;86-real" compiler_version: "11.2" label: nightly diff --git a/conda/conda_build_config.yaml b/conda/conda_build_config.yaml index 77f0eec0a2..cbfd2b0e81 100644 --- a/conda/conda_build_config.yaml +++ b/conda/conda_build_config.yaml @@ -2,3 +2,4 @@ python: - 3.9 - 3.10 - 3.11 + - 3.12 diff --git a/conda/faiss-gpu-raft/meta.yaml b/conda/faiss-gpu-raft/meta.yaml index b59c2d19b3..d5c893bd3d 100644 --- a/conda/faiss-gpu-raft/meta.yaml +++ b/conda/faiss-gpu-raft/meta.yaml @@ -44,15 +44,16 @@ outputs: - cmake >=3.23.1 - make # [not win] - mkl-devel =2023 # [x86_64] + - cuda-toolkit {{ cudatoolkit }} + - pytorch-cuda host: - mkl =2023 # [x86_64] - openblas # [not x86_64] - - cuda-version {{ cudatoolkit }} - libraft =24.02 run: - mkl =2023 # [x86_64] - openblas # [not x86_64] - - {{ pin_compatible('cuda-version', max_pin='x') }} + - {{ pin_compatible('pytorch-cuda', max_pin='x') }} - libraft =24.02 test: requires: @@ -91,8 +92,6 @@ outputs: - numpy - scipy - pytorch - - pytorch-cuda =11.8 - - cuda-version =11.8 commands: - python -X faulthandler -m unittest discover -v -s tests/ -p "test_*" - python -X faulthandler -m unittest discover -v -s tests/ -p "torch_*" diff --git a/conda/faiss-gpu/meta.yaml b/conda/faiss-gpu/meta.yaml index 7cc6e4ff3d..c3e60c52ec 100644 --- a/conda/faiss-gpu/meta.yaml +++ b/conda/faiss-gpu/meta.yaml @@ -44,14 +44,15 @@ outputs: - cmake >=3.23.1 - make # [not win] - mkl-devel =2023 # [x86_64] + - cuda-toolkit {{ cudatoolkit }} + - pytorch-cuda host: - mkl =2023 # [x86_64] - openblas # [not x86_64] - - cudatoolkit {{ cudatoolkit }} run: - mkl =2023 # [x86_64] - openblas # [not x86_64] - - {{ pin_compatible('cudatoolkit', max_pin='x') }} + - {{ pin_compatible('pytorch-cuda', max_pin='x') }} test: requires: - conda-build @@ -89,8 +90,6 @@ outputs: - numpy - scipy - pytorch - - pytorch-cuda =11.8 - - cudatoolkit =11.8 commands: - python -X faulthandler -m unittest discover -v -s tests/ -p "test_*" - python -X faulthandler -m unittest discover -v -s tests/ -p "torch_*" diff --git a/faiss/gpu/CMakeLists.txt b/faiss/gpu/CMakeLists.txt index 486546693f..713d3a287c 100644 --- a/faiss/gpu/CMakeLists.txt +++ b/faiss/gpu/CMakeLists.txt @@ -244,18 +244,29 @@ if(FAISS_ENABLE_RAFT) list(APPEND FAISS_GPU_SRC impl/RaftFlatIndex.cu impl/RaftIVFFlat.cu) +endif() + +add_library(faiss_gpu STATIC ${FAISS_GPU_SRC}) +set_target_properties(faiss_gpu PROPERTIES + POSITION_INDEPENDENT_CODE ON + WINDOWS_EXPORT_ALL_SYMBOLS ON +) +target_include_directories(faiss_gpu PUBLIC + $) +if(FAISS_ENABLE_RAFT) target_compile_definitions(faiss PUBLIC USE_NVIDIA_RAFT=1) target_compile_definitions(faiss_avx2 PUBLIC USE_NVIDIA_RAFT=1) target_compile_definitions(faiss_avx512 PUBLIC USE_NVIDIA_RAFT=1) + target_compile_definitions(faiss_gpu PUBLIC USE_NVIDIA_RAFT=1) endif() # Export FAISS_GPU_HEADERS variable to parent scope. set(FAISS_GPU_HEADERS ${FAISS_GPU_HEADERS} PARENT_SCOPE) -target_sources(faiss PRIVATE ${FAISS_GPU_SRC}) -target_sources(faiss_avx2 PRIVATE ${FAISS_GPU_SRC}) -target_sources(faiss_avx512 PRIVATE ${FAISS_GPU_SRC}) +target_link_libraries(faiss PRIVATE "$") +target_link_libraries(faiss_avx2 PRIVATE "$") +target_link_libraries(faiss_avx512 PRIVATE "$") foreach(header ${FAISS_GPU_HEADERS}) get_filename_component(dir ${header} DIRECTORY ) @@ -277,14 +288,8 @@ __nv_relfatbin : { *(__nv_relfatbin) } } ]=] ) -target_link_options(faiss PRIVATE "${CMAKE_CURRENT_BINARY_DIR}/fatbin.ld") -target_link_options(faiss_avx2 PRIVATE "${CMAKE_CURRENT_BINARY_DIR}/fatbin.ld") -target_link_options(faiss_avx512 PRIVATE "${CMAKE_CURRENT_BINARY_DIR}/fatbin.ld") +target_link_options(faiss_gpu PRIVATE "${CMAKE_CURRENT_BINARY_DIR}/fatbin.ld") find_package(CUDAToolkit REQUIRED) -target_link_libraries(faiss PRIVATE CUDA::cudart CUDA::cublas $<$:raft::raft> $<$:raft::compiled> $<$:nvidia::cutlass::cutlass>) -target_link_libraries(faiss_avx2 PRIVATE CUDA::cudart CUDA::cublas $<$:raft::raft> $<$:raft::compiled> $<$:nvidia::cutlass::cutlass>) -target_link_libraries(faiss_avx512 PRIVATE CUDA::cudart CUDA::cublas $<$:raft::raft> $<$:raft::compiled> $<$:nvidia::cutlass::cutlass>) -target_compile_options(faiss PRIVATE $<$:-Xfatbin=-compress-all --expt-extended-lambda --expt-relaxed-constexpr>) -target_compile_options(faiss_avx2 PRIVATE $<$:-Xfatbin=-compress-all --expt-extended-lambda --expt-relaxed-constexpr>) -target_compile_options(faiss_avx512 PRIVATE $<$:-Xfatbin=-compress-all --expt-extended-lambda --expt-relaxed-constexpr>) +target_link_libraries(faiss_gpu PRIVATE CUDA::cudart CUDA::cublas $<$:raft::raft> $<$:raft::compiled> $<$:nvidia::cutlass::cutlass>) +target_compile_options(faiss_gpu PRIVATE $<$:-Xfatbin=-compress-all --expt-extended-lambda --expt-relaxed-constexpr>)