add faiss_gpu lib (#3222)

Summary: 1. Split the GPU code into a separate static library to reduce build times. 2. faiss-gpu now depends on pytorch-cuda to stop clobbering of CUDA libraries when Faiss and Pytorch installed in the same environment 3. Add CUDA 12 (we support both 11.8 and 12.1) 4. Add Python 3.12 Pull Request resolved: #3222 Reviewed By: mlomeli1 Differential Revision: D53215033 Pulled By: algoriddle fbshipit-source-id: d48c5707af20b7e5ff72c3aa27b0e677259f22f8
facebookresearch · Jan 30, 2024 · 31bddeb · 31bddeb
1 parent 2817344
commit 31bddeb
Show file tree

Hide file tree

Showing 5 changed files with 72 additions and 49 deletions.
diff --git a/.circleci/config.yml b/.circleci/config.yml
@@ -9,13 +9,13 @@ executors:
     environment:
       CONDA_ARCH: Linux-x86_64
     machine:
-      image: linux-cuda-11:default
+      image: linux-cuda-12:default
     resource_class: gpu.nvidia.medium
   linux-arm64-cpu:
     environment:
       CONDA_ARCH: Linux-aarch64
     machine:
-      image: ubuntu-2004:current
+      image: ubuntu-2204:current
     resource_class: arm.medium
   macosx-arm64-cpu:
     environment:
@@ -138,10 +138,9 @@ jobs:
                 name: Conda build (GPU)
                 no_output_timeout: 60m
                 command: |
-                  sudo update-alternatives --set cuda /usr/local/cuda-<<parameters.cuda>>
                   cd conda
                   conda build faiss-gpu --variants '{ "cudatoolkit": "<<parameters.cuda>>", "c_compiler_version": "<<parameters.compiler_version>>", "cxx_compiler_version": "<<parameters.compiler_version>>" }' \
-                      -c pytorch -c nvidia
+                      -c pytorch -c nvidia/label/cuda-<<parameters.cuda>>
       - when:
           condition:
             and:
@@ -153,10 +152,9 @@ jobs:
                 name: Conda build (GPU) w/ anaconda upload
                 no_output_timeout: 60m
                 command: |
-                  sudo update-alternatives --set cuda /usr/local/cuda-<<parameters.cuda>>
                   cd conda
                   conda build faiss-gpu --variants '{ "cudatoolkit": "<<parameters.cuda>>", "c_compiler_version": "<<parameters.compiler_version>>", "cxx_compiler_version": "<<parameters.compiler_version>>" }' \
-                      --user pytorch --label <<parameters.label>> -c pytorch -c nvidia
+                      --user pytorch --label <<parameters.label>> -c pytorch -c nvidia/label/cuda-<<parameters.cuda>>
       - when:
           condition:
             and:
@@ -168,7 +166,6 @@ jobs:
                 name: Conda build (GPU w/ RAFT)
                 no_output_timeout: 60m
                 command: |
-                  sudo update-alternatives --set cuda /usr/local/cuda-<<parameters.cuda>>
                   cd conda
                   conda build faiss-gpu-raft --variants '{ "cudatoolkit": "<<parameters.cuda>>", "c_compiler_version": "<<parameters.compiler_version>>", "cxx_compiler_version": "<<parameters.compiler_version>>" }' \
                       -c pytorch -c nvidia -c rapidsai-nightly -c conda-forge
@@ -183,10 +180,9 @@ jobs:
                 name: Conda build (GPU w/ RAFT) w/ anaconda upload
                 no_output_timeout: 60m
                 command: |
-                  sudo update-alternatives --set cuda /usr/local/cuda-<<parameters.cuda>>
                   cd conda
                   conda build faiss-gpu-raft --variants '{ "cudatoolkit": "<<parameters.cuda>>", "c_compiler_version": "<<parameters.compiler_version>>", "cxx_compiler_version": "<<parameters.compiler_version>>" }' \
-                      --user pytorch --label <<parameters.label>> -c pytorch -c nvidia -c rapidsai-nightly -c conda-forge
+                      --user pytorch --label <<parameters.label>> -c pytorch -c nvidia/label/cuda-<<parameters.cuda>> -c rapidsai-nightly -c conda-forge
 
   build_cmake:
     parameters:
@@ -216,27 +212,35 @@ jobs:
               bash miniconda.sh -b -p $HOME/miniconda
               ~/miniconda/bin/conda init
             fi
-      - when:
-          condition:
-            equal: [ "ON", << parameters.gpu >> ]
-          steps:
-            - run:
-                name: Configure CUDA
-                command: sudo update-alternatives --set cuda /usr/local/cuda-11.4
       - run:
           name: Set up environment
           command: |
             conda config --set solver libmamba
             conda update -y -q conda
-            conda install -y -q python=3.11 cmake make swig mkl=2023 mkl-devel=2023 numpy scipy pytest gxx_linux-64 sysroot_linux-64
+      - when:
+          condition:
+            equal: [ "OFF", << parameters.raft >> ]
+          steps:
+            - run:
+                name: Install env using main channel
+                command: |
+                  conda install -y -q python=3.11 cmake make swig=4.0.2 mkl=2023 mkl-devel=2023 numpy scipy pytest gxx_linux-64 sysroot_linux-64
       - when:
           condition:
             equal: [ "ON", << parameters.raft >> ]
           steps:
             - run:
-                name: Install libraft
+                name: Install env using conda-forge channel
                 command: |
-                  conda install -y -q libraft cuda-version=11.4 -c rapidsai-nightly -c nvidia -c pkgs/main -c conda-forge
+                  conda install -y -q python=3.11 cmake make swig=4.0.2 mkl=2023 mkl-devel=2023 numpy scipy pytest gxx_linux-64 sysroot_linux-64=2.28 libraft cuda-version=11.8 -c rapidsai-nightly -c "nvidia/label/cuda-11.8.0" -c conda-forge
+      - when:
+          condition:
+            equal: [ "ON", << parameters.gpu >> ]
+          steps:
+            - run:
+                name: Install CUDA
+                command: |
+                  conda install -y -q cuda-nvcc cuda-cudart-dev libcublas libcublas-dev cuda-toolkit -c "nvidia/label/cuda-11.8.0"
       - run:
           name: Build all targets
           no_output_timeout: 30m
@@ -245,7 +249,7 @@ jobs:
             conda activate
             cmake -B build \
                   -DBUILD_TESTING=ON \
-                  -DBUILD_SHARED_LIBS=OFF \
+                  -DBUILD_SHARED_LIBS=ON \
                   -DFAISS_ENABLE_GPU=<< parameters.gpu >> \
                   -DFAISS_ENABLE_RAFT=<< parameters.raft >> \
                   -DFAISS_OPT_LEVEL=<< parameters.opt_level >> \
@@ -283,7 +287,7 @@ jobs:
             - run:
                 name: Python tests (CPU + GPU)
                 command: |
-                  conda install -y -q pytorch pytorch-cuda=11 -c pytorch -c nvidia
+                  conda install -y -q pytorch pytorch-cuda=11 -c pytorch -c nvidia/label/cuda-11.8.0
                   pytest --junitxml=test-results/pytest/results.xml tests/test_*.py
                   pytest --junitxml=test-results/pytest/results-torch.xml tests/torch_*.py
                   cp tests/common_faiss_tests.py faiss/gpu/test
@@ -350,10 +354,10 @@ workflows:
             branches:
               ignore: /.*/
       - build_conda:
-          name: Linux x86_64 GPU packages (CUDA 11.4)
+          name: Linux x86_64 GPU packages (CUDA 11.8)
           exec: linux-x86_64-gpu
           label: main
-          cuda: "11.4"
+          cuda: "11.8"
           cuda_archs: "60-real;61-real;62-real;70-real;72-real;75-real;80;86-real"
           compiler_version: "11.2"
           filters:
@@ -362,11 +366,11 @@ workflows:
             branches:
               ignore: /.*/
       - build_conda:
-          name: Linux x86_64 GPU w/ RAFT packages (CUDA 11.4)
+          name: Linux x86_64 GPU w/ RAFT packages (CUDA 11.8)
           exec: linux-x86_64-gpu
           label: main
           raft: "ON"
-          cuda: "11.4"
+          cuda: "11.8"
           cuda_archs: "60-real;61-real;62-real;70-real;72-real;75-real;80;86-real"
           compiler_version: "11.2"
           filters:
@@ -416,17 +420,32 @@ workflows:
           exec: linux-x86_64-cpu
           label: nightly
       - build_conda:
-          name: Linux x86_64 GPU nightlies (CUDA 11.4)
+          name: Linux x86_64 GPU nightlies (CUDA 11.8)
+          exec: linux-x86_64-gpu
+          cuda: "11.8.0"
+          cuda_archs: "60-real;61-real;62-real;70-real;72-real;75-real;80;86-real"
+          compiler_version: "11.2"
+          label: nightly
+      - build_conda:
+          name: Linux x86_64 GPU w/ RAFT nightlies (CUDA 11.8.0)
+          exec: linux-x86_64-gpu
+          raft: "ON"
+          cuda: "11.8.0"
+          cuda_archs: "60-real;61-real;62-real;70-real;72-real;75-real;80;86-real"
+          compiler_version: "11.2"
+          label: nightly
+      - build_conda:
+          name: Linux x86_64 GPU nightlies (CUDA 12.1.0)
           exec: linux-x86_64-gpu
-          cuda: "11.4"
+          cuda: "12.1.0"
           cuda_archs: "60-real;61-real;62-real;70-real;72-real;75-real;80;86-real"
           compiler_version: "11.2"
           label: nightly
       - build_conda:
-          name: Linux x86_64 GPU w/ RAFT nightlies (CUDA 11.4)
+          name: Linux x86_64 GPU w/ RAFT nightlies (CUDA 12.1.0)
           exec: linux-x86_64-gpu
           raft: "ON"
-          cuda: "11.4"
+          cuda: "12.1.0"
           cuda_archs: "60-real;61-real;62-real;70-real;72-real;75-real;80;86-real"
           compiler_version: "11.2"
           label: nightly

diff --git a/conda/conda_build_config.yaml b/conda/conda_build_config.yaml
@@ -2,3 +2,4 @@ python:
   - 3.9
   - 3.10
   - 3.11
+  - 3.12
diff --git a/conda/faiss-gpu-raft/meta.yaml b/conda/faiss-gpu-raft/meta.yaml
@@ -44,15 +44,16 @@ outputs:
         - cmake >=3.23.1
         - make  # [not win]
         - mkl-devel =2023  # [x86_64]
+        - cuda-toolkit {{ cudatoolkit }}
+        - pytorch-cuda
       host:
         - mkl =2023  # [x86_64]
         - openblas  # [not x86_64]
-        - cuda-version {{ cudatoolkit }}
         - libraft =24.02
       run:
         - mkl =2023  # [x86_64]
         - openblas  # [not x86_64]
-        - {{ pin_compatible('cuda-version', max_pin='x') }}
+        - {{ pin_compatible('pytorch-cuda', max_pin='x') }}
         - libraft =24.02
     test:
       requires:
@@ -91,8 +92,6 @@ outputs:
         - numpy
         - scipy
         - pytorch
-        - pytorch-cuda =11.8
-        - cuda-version =11.8
       commands:
         - python -X faulthandler -m unittest discover -v -s tests/ -p "test_*"
         - python -X faulthandler -m unittest discover -v -s tests/ -p "torch_*"

diff --git a/conda/faiss-gpu/meta.yaml b/conda/faiss-gpu/meta.yaml
@@ -44,14 +44,15 @@ outputs:
         - cmake >=3.23.1
         - make  # [not win]
         - mkl-devel =2023  # [x86_64]
+        - cuda-toolkit {{ cudatoolkit }}
+        - pytorch-cuda
       host:
         - mkl =2023  # [x86_64]
         - openblas  # [not x86_64]
-        - cudatoolkit {{ cudatoolkit }}
       run:
         - mkl =2023  # [x86_64]
         - openblas  # [not x86_64]
-        - {{ pin_compatible('cudatoolkit', max_pin='x') }}
+        - {{ pin_compatible('pytorch-cuda', max_pin='x') }}
     test:
       requires:
         - conda-build
@@ -89,8 +90,6 @@ outputs:
         - numpy
         - scipy
         - pytorch
-        - pytorch-cuda =11.8
-        - cudatoolkit =11.8
       commands:
         - python -X faulthandler -m unittest discover -v -s tests/ -p "test_*"
         - python -X faulthandler -m unittest discover -v -s tests/ -p "torch_*"

diff --git a/faiss/gpu/CMakeLists.txt b/faiss/gpu/CMakeLists.txt
@@ -244,18 +244,29 @@ if(FAISS_ENABLE_RAFT)
   list(APPEND FAISS_GPU_SRC
           impl/RaftFlatIndex.cu
           impl/RaftIVFFlat.cu)
+endif()
+
+add_library(faiss_gpu STATIC ${FAISS_GPU_SRC})
+set_target_properties(faiss_gpu PROPERTIES
+  POSITION_INDEPENDENT_CODE ON
+  WINDOWS_EXPORT_ALL_SYMBOLS ON
+)
+target_include_directories(faiss_gpu PUBLIC
+  $<BUILD_INTERFACE:${PROJECT_SOURCE_DIR}>)
 
+if(FAISS_ENABLE_RAFT)
   target_compile_definitions(faiss PUBLIC USE_NVIDIA_RAFT=1)
   target_compile_definitions(faiss_avx2 PUBLIC USE_NVIDIA_RAFT=1)
   target_compile_definitions(faiss_avx512 PUBLIC USE_NVIDIA_RAFT=1)
+  target_compile_definitions(faiss_gpu PUBLIC USE_NVIDIA_RAFT=1)
 endif()
 
 # Export FAISS_GPU_HEADERS variable to parent scope.
 set(FAISS_GPU_HEADERS ${FAISS_GPU_HEADERS} PARENT_SCOPE)
 
-target_sources(faiss PRIVATE ${FAISS_GPU_SRC})
-target_sources(faiss_avx2 PRIVATE ${FAISS_GPU_SRC})
-target_sources(faiss_avx512 PRIVATE ${FAISS_GPU_SRC})
+target_link_libraries(faiss PRIVATE  "$<LINK_LIBRARY:WHOLE_ARCHIVE,faiss_gpu>")
+target_link_libraries(faiss_avx2 PRIVATE "$<LINK_LIBRARY:WHOLE_ARCHIVE,faiss_gpu>")
+target_link_libraries(faiss_avx512 PRIVATE "$<LINK_LIBRARY:WHOLE_ARCHIVE,faiss_gpu>")
 
 foreach(header ${FAISS_GPU_HEADERS})
   get_filename_component(dir ${header} DIRECTORY )
@@ -277,14 +288,8 @@ __nv_relfatbin : { *(__nv_relfatbin) }
 }
 ]=]
 )
-target_link_options(faiss PRIVATE "${CMAKE_CURRENT_BINARY_DIR}/fatbin.ld")
-target_link_options(faiss_avx2 PRIVATE "${CMAKE_CURRENT_BINARY_DIR}/fatbin.ld")
-target_link_options(faiss_avx512 PRIVATE "${CMAKE_CURRENT_BINARY_DIR}/fatbin.ld")
+target_link_options(faiss_gpu PRIVATE "${CMAKE_CURRENT_BINARY_DIR}/fatbin.ld")
 
 find_package(CUDAToolkit REQUIRED)
-target_link_libraries(faiss PRIVATE CUDA::cudart CUDA::cublas $<$<BOOL:${FAISS_ENABLE_RAFT}>:raft::raft> $<$<BOOL:${FAISS_ENABLE_RAFT}>:raft::compiled> $<$<BOOL:${FAISS_ENABLE_RAFT}>:nvidia::cutlass::cutlass>)
-target_link_libraries(faiss_avx2 PRIVATE CUDA::cudart CUDA::cublas $<$<BOOL:${FAISS_ENABLE_RAFT}>:raft::raft> $<$<BOOL:${FAISS_ENABLE_RAFT}>:raft::compiled>  $<$<BOOL:${FAISS_ENABLE_RAFT}>:nvidia::cutlass::cutlass>)
-target_link_libraries(faiss_avx512 PRIVATE CUDA::cudart CUDA::cublas $<$<BOOL:${FAISS_ENABLE_RAFT}>:raft::raft> $<$<BOOL:${FAISS_ENABLE_RAFT}>:raft::compiled>  $<$<BOOL:${FAISS_ENABLE_RAFT}>:nvidia::cutlass::cutlass>)
-target_compile_options(faiss PRIVATE $<$<COMPILE_LANGUAGE:CUDA>:-Xfatbin=-compress-all --expt-extended-lambda --expt-relaxed-constexpr>)
-target_compile_options(faiss_avx2 PRIVATE $<$<COMPILE_LANGUAGE:CUDA>:-Xfatbin=-compress-all --expt-extended-lambda --expt-relaxed-constexpr>)
-target_compile_options(faiss_avx512 PRIVATE $<$<COMPILE_LANGUAGE:CUDA>:-Xfatbin=-compress-all --expt-extended-lambda --expt-relaxed-constexpr>)
+target_link_libraries(faiss_gpu PRIVATE CUDA::cudart CUDA::cublas $<$<BOOL:${FAISS_ENABLE_RAFT}>:raft::raft> $<$<BOOL:${FAISS_ENABLE_RAFT}>:raft::compiled> $<$<BOOL:${FAISS_ENABLE_RAFT}>:nvidia::cutlass::cutlass>)
+target_compile_options(faiss_gpu PRIVATE $<$<COMPILE_LANGUAGE:CUDA>:-Xfatbin=-compress-all --expt-extended-lambda --expt-relaxed-constexpr>)