From 31bddeb0cfa204791156522383c9470dcf8efbc1 Mon Sep 17 00:00:00 2001
From: Gergely Szilvasy <gsz@meta.com>
Date: Tue, 30 Jan 2024 01:44:50 -0800
Subject: [PATCH] add faiss_gpu lib (#3222)

Summary:
1. Split the GPU code into a separate static library to reduce build times.
2. faiss-gpu now depends on pytorch-cuda to stop clobbering of CUDA libraries when Faiss and Pytorch installed in the same environment
3. Add CUDA 12 (we support both 11.8 and 12.1)
4. Add Python 3.12

Pull Request resolved: https://github.com/facebookresearch/faiss/pull/3222

Reviewed By: mlomeli1

Differential Revision: D53215033

Pulled By: algoriddle

fbshipit-source-id: d48c5707af20b7e5ff72c3aa27b0e677259f22f8
---
 .circleci/config.yml           | 77 +++++++++++++++++++++-------------
 conda/conda_build_config.yaml  |  1 +
 conda/faiss-gpu-raft/meta.yaml |  7 ++--
 conda/faiss-gpu/meta.yaml      |  7 ++--
 faiss/gpu/CMakeLists.txt       | 29 +++++++------
 5 files changed, 72 insertions(+), 49 deletions(-)

diff --git a/.circleci/config.yml b/.circleci/config.yml
index 406817e09d..2901894577 100644
--- a/.circleci/config.yml
+++ b/.circleci/config.yml
@@ -9,13 +9,13 @@ executors:
     environment:
       CONDA_ARCH: Linux-x86_64
     machine:
-      image: linux-cuda-11:default
+      image: linux-cuda-12:default
     resource_class: gpu.nvidia.medium
   linux-arm64-cpu:
     environment:
       CONDA_ARCH: Linux-aarch64
     machine:
-      image: ubuntu-2004:current
+      image: ubuntu-2204:current
     resource_class: arm.medium
   macosx-arm64-cpu:
     environment:
@@ -138,10 +138,9 @@ jobs:
                 name: Conda build (GPU)
                 no_output_timeout: 60m
                 command: |
-                  sudo update-alternatives --set cuda /usr/local/cuda-<<parameters.cuda>>
                   cd conda
                   conda build faiss-gpu --variants '{ "cudatoolkit": "<<parameters.cuda>>", "c_compiler_version": "<<parameters.compiler_version>>", "cxx_compiler_version": "<<parameters.compiler_version>>" }' \
-                      -c pytorch -c nvidia
+                      -c pytorch -c nvidia/label/cuda-<<parameters.cuda>>
       - when:
           condition:
             and:
@@ -153,10 +152,9 @@ jobs:
                 name: Conda build (GPU) w/ anaconda upload
                 no_output_timeout: 60m
                 command: |
-                  sudo update-alternatives --set cuda /usr/local/cuda-<<parameters.cuda>>
                   cd conda
                   conda build faiss-gpu --variants '{ "cudatoolkit": "<<parameters.cuda>>", "c_compiler_version": "<<parameters.compiler_version>>", "cxx_compiler_version": "<<parameters.compiler_version>>" }' \
-                      --user pytorch --label <<parameters.label>> -c pytorch -c nvidia
+                      --user pytorch --label <<parameters.label>> -c pytorch -c nvidia/label/cuda-<<parameters.cuda>>
       - when:
           condition:
             and:
@@ -168,7 +166,6 @@ jobs:
                 name: Conda build (GPU w/ RAFT)
                 no_output_timeout: 60m
                 command: |
-                  sudo update-alternatives --set cuda /usr/local/cuda-<<parameters.cuda>>
                   cd conda
                   conda build faiss-gpu-raft --variants '{ "cudatoolkit": "<<parameters.cuda>>", "c_compiler_version": "<<parameters.compiler_version>>", "cxx_compiler_version": "<<parameters.compiler_version>>" }' \
                       -c pytorch -c nvidia -c rapidsai-nightly -c conda-forge
@@ -183,10 +180,9 @@ jobs:
                 name: Conda build (GPU w/ RAFT) w/ anaconda upload
                 no_output_timeout: 60m
                 command: |
-                  sudo update-alternatives --set cuda /usr/local/cuda-<<parameters.cuda>>
                   cd conda
                   conda build faiss-gpu-raft --variants '{ "cudatoolkit": "<<parameters.cuda>>", "c_compiler_version": "<<parameters.compiler_version>>", "cxx_compiler_version": "<<parameters.compiler_version>>" }' \
-                      --user pytorch --label <<parameters.label>> -c pytorch -c nvidia -c rapidsai-nightly -c conda-forge
+                      --user pytorch --label <<parameters.label>> -c pytorch -c nvidia/label/cuda-<<parameters.cuda>> -c rapidsai-nightly -c conda-forge
 
   build_cmake:
     parameters:
@@ -216,27 +212,35 @@ jobs:
               bash miniconda.sh -b -p $HOME/miniconda
               ~/miniconda/bin/conda init
             fi
-      - when:
-          condition:
-            equal: [ "ON", << parameters.gpu >> ]
-          steps:
-            - run:
-                name: Configure CUDA
-                command: sudo update-alternatives --set cuda /usr/local/cuda-11.4
       - run:
           name: Set up environment
           command: |
             conda config --set solver libmamba
             conda update -y -q conda
-            conda install -y -q python=3.11 cmake make swig mkl=2023 mkl-devel=2023 numpy scipy pytest gxx_linux-64 sysroot_linux-64
+      - when:
+          condition:
+            equal: [ "OFF", << parameters.raft >> ]
+          steps:
+            - run:
+                name: Install env using main channel
+                command: |
+                  conda install -y -q python=3.11 cmake make swig=4.0.2 mkl=2023 mkl-devel=2023 numpy scipy pytest gxx_linux-64 sysroot_linux-64
       - when:
           condition:
             equal: [ "ON", << parameters.raft >> ]
           steps:
             - run:
-                name: Install libraft
+                name: Install env using conda-forge channel
                 command: |
-                  conda install -y -q libraft cuda-version=11.4 -c rapidsai-nightly -c nvidia -c pkgs/main -c conda-forge
+                  conda install -y -q python=3.11 cmake make swig=4.0.2 mkl=2023 mkl-devel=2023 numpy scipy pytest gxx_linux-64 sysroot_linux-64=2.28 libraft cuda-version=11.8 -c rapidsai-nightly -c "nvidia/label/cuda-11.8.0" -c conda-forge
+      - when:
+          condition:
+            equal: [ "ON", << parameters.gpu >> ]
+          steps:
+            - run:
+                name: Install CUDA
+                command: |
+                  conda install -y -q cuda-nvcc cuda-cudart-dev libcublas libcublas-dev cuda-toolkit -c "nvidia/label/cuda-11.8.0"
       - run:
           name: Build all targets
           no_output_timeout: 30m
@@ -245,7 +249,7 @@ jobs:
             conda activate
             cmake -B build \
                   -DBUILD_TESTING=ON \
-                  -DBUILD_SHARED_LIBS=OFF \
+                  -DBUILD_SHARED_LIBS=ON \
                   -DFAISS_ENABLE_GPU=<< parameters.gpu >> \
                   -DFAISS_ENABLE_RAFT=<< parameters.raft >> \
                   -DFAISS_OPT_LEVEL=<< parameters.opt_level >> \
@@ -283,7 +287,7 @@ jobs:
             - run:
                 name: Python tests (CPU + GPU)
                 command: |
-                  conda install -y -q pytorch pytorch-cuda=11 -c pytorch -c nvidia
+                  conda install -y -q pytorch pytorch-cuda=11 -c pytorch -c nvidia/label/cuda-11.8.0
                   pytest --junitxml=test-results/pytest/results.xml tests/test_*.py
                   pytest --junitxml=test-results/pytest/results-torch.xml tests/torch_*.py
                   cp tests/common_faiss_tests.py faiss/gpu/test
@@ -350,10 +354,10 @@ workflows:
             branches:
               ignore: /.*/
       - build_conda:
-          name: Linux x86_64 GPU packages (CUDA 11.4)
+          name: Linux x86_64 GPU packages (CUDA 11.8)
           exec: linux-x86_64-gpu
           label: main
-          cuda: "11.4"
+          cuda: "11.8"
           cuda_archs: "60-real;61-real;62-real;70-real;72-real;75-real;80;86-real"
           compiler_version: "11.2"
           filters:
@@ -362,11 +366,11 @@ workflows:
             branches:
               ignore: /.*/
       - build_conda:
-          name: Linux x86_64 GPU w/ RAFT packages (CUDA 11.4)
+          name: Linux x86_64 GPU w/ RAFT packages (CUDA 11.8)
           exec: linux-x86_64-gpu
           label: main
           raft: "ON"
-          cuda: "11.4"
+          cuda: "11.8"
           cuda_archs: "60-real;61-real;62-real;70-real;72-real;75-real;80;86-real"
           compiler_version: "11.2"
           filters:
@@ -416,17 +420,32 @@ workflows:
           exec: linux-x86_64-cpu
           label: nightly
       - build_conda:
-          name: Linux x86_64 GPU nightlies (CUDA 11.4)
+          name: Linux x86_64 GPU nightlies (CUDA 11.8)
+          exec: linux-x86_64-gpu
+          cuda: "11.8.0"
+          cuda_archs: "60-real;61-real;62-real;70-real;72-real;75-real;80;86-real"
+          compiler_version: "11.2"
+          label: nightly
+      - build_conda:
+          name: Linux x86_64 GPU w/ RAFT nightlies (CUDA 11.8.0)
+          exec: linux-x86_64-gpu
+          raft: "ON"
+          cuda: "11.8.0"
+          cuda_archs: "60-real;61-real;62-real;70-real;72-real;75-real;80;86-real"
+          compiler_version: "11.2"
+          label: nightly
+      - build_conda:
+          name: Linux x86_64 GPU nightlies (CUDA 12.1.0)
           exec: linux-x86_64-gpu
-          cuda: "11.4"
+          cuda: "12.1.0"
           cuda_archs: "60-real;61-real;62-real;70-real;72-real;75-real;80;86-real"
           compiler_version: "11.2"
           label: nightly
       - build_conda:
-          name: Linux x86_64 GPU w/ RAFT nightlies (CUDA 11.4)
+          name: Linux x86_64 GPU w/ RAFT nightlies (CUDA 12.1.0)
           exec: linux-x86_64-gpu
           raft: "ON"
-          cuda: "11.4"
+          cuda: "12.1.0"
           cuda_archs: "60-real;61-real;62-real;70-real;72-real;75-real;80;86-real"
           compiler_version: "11.2"
           label: nightly
diff --git a/conda/conda_build_config.yaml b/conda/conda_build_config.yaml
index 77f0eec0a2..cbfd2b0e81 100644
--- a/conda/conda_build_config.yaml
+++ b/conda/conda_build_config.yaml
@@ -2,3 +2,4 @@ python:
   - 3.9
   - 3.10
   - 3.11
+  - 3.12
diff --git a/conda/faiss-gpu-raft/meta.yaml b/conda/faiss-gpu-raft/meta.yaml
index b59c2d19b3..d5c893bd3d 100644
--- a/conda/faiss-gpu-raft/meta.yaml
+++ b/conda/faiss-gpu-raft/meta.yaml
@@ -44,15 +44,16 @@ outputs:
         - cmake >=3.23.1
         - make  # [not win]
         - mkl-devel =2023  # [x86_64]
+        - cuda-toolkit {{ cudatoolkit }}
+        - pytorch-cuda
       host:
         - mkl =2023  # [x86_64]
         - openblas  # [not x86_64]
-        - cuda-version {{ cudatoolkit }}
         - libraft =24.02
       run:
         - mkl =2023  # [x86_64]
         - openblas  # [not x86_64]
-        - {{ pin_compatible('cuda-version', max_pin='x') }}
+        - {{ pin_compatible('pytorch-cuda', max_pin='x') }}
         - libraft =24.02
     test:
       requires:
@@ -91,8 +92,6 @@ outputs:
         - numpy
         - scipy
         - pytorch
-        - pytorch-cuda =11.8
-        - cuda-version =11.8
       commands:
         - python -X faulthandler -m unittest discover -v -s tests/ -p "test_*"
         - python -X faulthandler -m unittest discover -v -s tests/ -p "torch_*"
diff --git a/conda/faiss-gpu/meta.yaml b/conda/faiss-gpu/meta.yaml
index 7cc6e4ff3d..c3e60c52ec 100644
--- a/conda/faiss-gpu/meta.yaml
+++ b/conda/faiss-gpu/meta.yaml
@@ -44,14 +44,15 @@ outputs:
         - cmake >=3.23.1
         - make  # [not win]
         - mkl-devel =2023  # [x86_64]
+        - cuda-toolkit {{ cudatoolkit }}
+        - pytorch-cuda
       host:
         - mkl =2023  # [x86_64]
         - openblas  # [not x86_64]
-        - cudatoolkit {{ cudatoolkit }}
       run:
         - mkl =2023  # [x86_64]
         - openblas  # [not x86_64]
-        - {{ pin_compatible('cudatoolkit', max_pin='x') }}
+        - {{ pin_compatible('pytorch-cuda', max_pin='x') }}
     test:
       requires:
         - conda-build
@@ -89,8 +90,6 @@ outputs:
         - numpy
         - scipy
         - pytorch
-        - pytorch-cuda =11.8
-        - cudatoolkit =11.8
       commands:
         - python -X faulthandler -m unittest discover -v -s tests/ -p "test_*"
         - python -X faulthandler -m unittest discover -v -s tests/ -p "torch_*"
diff --git a/faiss/gpu/CMakeLists.txt b/faiss/gpu/CMakeLists.txt
index 486546693f..713d3a287c 100644
--- a/faiss/gpu/CMakeLists.txt
+++ b/faiss/gpu/CMakeLists.txt
@@ -244,18 +244,29 @@ if(FAISS_ENABLE_RAFT)
   list(APPEND FAISS_GPU_SRC
           impl/RaftFlatIndex.cu
           impl/RaftIVFFlat.cu)
+endif()
+
+add_library(faiss_gpu STATIC ${FAISS_GPU_SRC})
+set_target_properties(faiss_gpu PROPERTIES
+  POSITION_INDEPENDENT_CODE ON
+  WINDOWS_EXPORT_ALL_SYMBOLS ON
+)
+target_include_directories(faiss_gpu PUBLIC
+  $<BUILD_INTERFACE:${PROJECT_SOURCE_DIR}>)
 
+if(FAISS_ENABLE_RAFT)
   target_compile_definitions(faiss PUBLIC USE_NVIDIA_RAFT=1)
   target_compile_definitions(faiss_avx2 PUBLIC USE_NVIDIA_RAFT=1)
   target_compile_definitions(faiss_avx512 PUBLIC USE_NVIDIA_RAFT=1)
+  target_compile_definitions(faiss_gpu PUBLIC USE_NVIDIA_RAFT=1)
 endif()
 
 # Export FAISS_GPU_HEADERS variable to parent scope.
 set(FAISS_GPU_HEADERS ${FAISS_GPU_HEADERS} PARENT_SCOPE)
 
-target_sources(faiss PRIVATE ${FAISS_GPU_SRC})
-target_sources(faiss_avx2 PRIVATE ${FAISS_GPU_SRC})
-target_sources(faiss_avx512 PRIVATE ${FAISS_GPU_SRC})
+target_link_libraries(faiss PRIVATE  "$<LINK_LIBRARY:WHOLE_ARCHIVE,faiss_gpu>")
+target_link_libraries(faiss_avx2 PRIVATE "$<LINK_LIBRARY:WHOLE_ARCHIVE,faiss_gpu>")
+target_link_libraries(faiss_avx512 PRIVATE "$<LINK_LIBRARY:WHOLE_ARCHIVE,faiss_gpu>")
 
 foreach(header ${FAISS_GPU_HEADERS})
   get_filename_component(dir ${header} DIRECTORY )
@@ -277,14 +288,8 @@ __nv_relfatbin : { *(__nv_relfatbin) }
 }
 ]=]
 )
-target_link_options(faiss PRIVATE "${CMAKE_CURRENT_BINARY_DIR}/fatbin.ld")
-target_link_options(faiss_avx2 PRIVATE "${CMAKE_CURRENT_BINARY_DIR}/fatbin.ld")
-target_link_options(faiss_avx512 PRIVATE "${CMAKE_CURRENT_BINARY_DIR}/fatbin.ld")
+target_link_options(faiss_gpu PRIVATE "${CMAKE_CURRENT_BINARY_DIR}/fatbin.ld")
 
 find_package(CUDAToolkit REQUIRED)
-target_link_libraries(faiss PRIVATE CUDA::cudart CUDA::cublas $<$<BOOL:${FAISS_ENABLE_RAFT}>:raft::raft> $<$<BOOL:${FAISS_ENABLE_RAFT}>:raft::compiled> $<$<BOOL:${FAISS_ENABLE_RAFT}>:nvidia::cutlass::cutlass>)
-target_link_libraries(faiss_avx2 PRIVATE CUDA::cudart CUDA::cublas $<$<BOOL:${FAISS_ENABLE_RAFT}>:raft::raft> $<$<BOOL:${FAISS_ENABLE_RAFT}>:raft::compiled>  $<$<BOOL:${FAISS_ENABLE_RAFT}>:nvidia::cutlass::cutlass>)
-target_link_libraries(faiss_avx512 PRIVATE CUDA::cudart CUDA::cublas $<$<BOOL:${FAISS_ENABLE_RAFT}>:raft::raft> $<$<BOOL:${FAISS_ENABLE_RAFT}>:raft::compiled>  $<$<BOOL:${FAISS_ENABLE_RAFT}>:nvidia::cutlass::cutlass>)
-target_compile_options(faiss PRIVATE $<$<COMPILE_LANGUAGE:CUDA>:-Xfatbin=-compress-all --expt-extended-lambda --expt-relaxed-constexpr>)
-target_compile_options(faiss_avx2 PRIVATE $<$<COMPILE_LANGUAGE:CUDA>:-Xfatbin=-compress-all --expt-extended-lambda --expt-relaxed-constexpr>)
-target_compile_options(faiss_avx512 PRIVATE $<$<COMPILE_LANGUAGE:CUDA>:-Xfatbin=-compress-all --expt-extended-lambda --expt-relaxed-constexpr>)
+target_link_libraries(faiss_gpu PRIVATE CUDA::cudart CUDA::cublas $<$<BOOL:${FAISS_ENABLE_RAFT}>:raft::raft> $<$<BOOL:${FAISS_ENABLE_RAFT}>:raft::compiled> $<$<BOOL:${FAISS_ENABLE_RAFT}>:nvidia::cutlass::cutlass>)
+target_compile_options(faiss_gpu PRIVATE $<$<COMPILE_LANGUAGE:CUDA>:-Xfatbin=-compress-all --expt-extended-lambda --expt-relaxed-constexpr>)