Build for hip gpu backends (#392)

* Update cmake to handle gpu backend Previously, cmake would always build targeting cuda. We now dispatch on FF_GPU_BACKEND to target either cuda, hip_cuda, or hip_rocm. The hip backends are expected to use the hip cmake config file that comes with a hip install. The discrepancies for targeting hip_cuda vs hip_rocm are documented inline. Configure legion for hip backend * Build configuration scripts * docker build updates * Small source modifications for build add legion max return size check fix std::stringstream var decl parsed as function explicit template instantiations include string * Move tools to top level directory We glob for files under src to get the source files for the flexflow target. Moving tools to the top level directory prevents the tools sourcefiles from accidentally being added to the flexflow target source files. change substitution_to_dot cuda_add_executable to add_executable. When building with hip_rocm, we don't have cuda available and shouldn't need to build with it for substitution_to_dot as the target does not directly build and cuda kernels * fix miopen.h headers * Fix hip kernels * system dependencies install instructions * Ensure docker build script is called from FF_HOME * Add .dockerignore file to ignore build directories * Add new lines * Fix CI for changes in PR do not call sed to manually change config script clone submodules in docker job * Change the python setup script to shell out to the config script instead of parsing it adhoc * Update docs to note FF_GPU_BACKEND=hip_cuda is not supported * Fix path to mt5 dockerfile
flexflow · Nov 8, 2022 · 81304c8 · 81304c8
1 parent 68a048a
commit 81304c8
Show file tree

Hide file tree

Showing 30 changed files with 670 additions and 458 deletions.
diff --git a/.dockerignore b/.dockerignore
@@ -0,0 +1,2 @@
+# Ignore all folders which start with "build"
+/build*/
diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml
@@ -51,12 +51,9 @@ jobs:
           cores_available=$(nproc --all)
           n_build_cores=$(( cores_available -1 ))
           if (( $n_build_cores < 1 )) ; then n_build_cores=1 ; fi
-          sed -i "/FF_CUDA_ARCH/c\FF_CUDA_ARCH=70" ./config/config.linux
-          sed -i "/FF_BUILD_ALL_EXAMPLES/c\FF_BUILD_ALL_EXAMPLES=ON" ./config/config.linux
-          sed -i "/FF_BUILD_UNIT_TESTS/c\FF_BUILD_UNIT_TESTS=ON" ./config/config.linux
           mkdir build
           cd build
-          ../config/config.linux
+          FF_CUDA_ARCH=70 FF_BUILD_ALL_EXAMPLES=ON FF_BUILD_UNIT_TESTS=ON ../config/config.linux
           make -j $n_build_cores
 
       - name: Install FlexFlow

diff --git a/.github/workflows/docker-build.yml b/.github/workflows/docker-build.yml
@@ -24,6 +24,8 @@ jobs:
     steps:
       - name: Checkout Git Repository
         uses: actions/checkout@v3
+        with:
+          submodules: recursive
 
       - name: Build Docker container
         run: ./docker/build.sh base

diff --git a/.github/workflows/pip-install.yml b/.github/workflows/pip-install.yml
@@ -44,7 +44,7 @@ jobs:
         run: |
           export PATH=/opt/conda/bin:$PATH
           export FF_HOME=$(pwd)
-          sed -i "/FF_CUDA_ARCH/c\FF_CUDA_ARCH=70" ./config/config.linux
+          export FF_CUDA_ARCH=70
           pip install . --verbose
 
       - name: Check availability of Python flexflow.core module

diff --git a/CMakeLists.txt b/CMakeLists.txt
@@ -50,6 +50,10 @@ set(FF_CUDA_ARCH "" CACHE STRING "Target CUDA Arch")
 # option for nccl
 option(FF_USE_NCCL "Run FlexFlow with NCCL" OFF)
 
+if (FF_GPU_BACKEND STREQUAL "hip_rocm" AND FF_USE_NCCL STREQUAL "ON")
+  message(FATAL_ERROR "NCCL: ON for FF_GPU_BACKEND: hip_rocm. hip_rocm backend must have NCCL disabled.")
+endif()
+
 # option for avx2
 option(FF_USE_AVX2 "Run FlexFlow with AVX2" OFF)
 
@@ -78,14 +82,22 @@ add_compile_options(${CC_FLAGS})
 set(CUDA_NVCC_FLAGS ${CUDA_NVCC_FLAGS} ${NVCC_FLAGS})
 link_libraries(${LD_FLAGS})
 
+if(FF_GPU_BACKEND STREQUAL "hip_cuda" OR FF_GPU_BACKEND STREQUAL "hip_rocm")
+  set(ROCM_PATH "/opt/rocm" CACHE STRING "Default ROCM installation directory.")
+endif()
+
 # ZLIB
 include(zlib)
 
 # CUDA
-include(cuda)
+if (FF_GPU_BACKEND STREQUAL "cuda" OR FF_GPU_BACKEND STREQUAL "hip_cuda")
+  include(cuda)
+endif()
 
 # CUDNN
-include(cudnn)
+if (FF_GPU_BACKEND STREQUAL "cuda" OR FF_GPU_BACKEND STREQUAL "hip_cuda")
+  include(cudnn)
+endif()
 
 # NCCL
 if(FF_USE_NCCL)
@@ -179,10 +191,6 @@ file(GLOB_RECURSE FLEXFLOW_SRC
   ${FLEXFLOW_ROOT}/src/*.cc)
 list(REMOVE_ITEM FLEXFLOW_SRC "${FLEXFLOW_ROOT}/src/runtime/cpp_driver.cc")
 
-file(GLOB_RECURSE FLEXFLOW_GPU_SRC
-  LIST_DIRECTORIES False
-  ${FLEXFLOW_ROOT}/src/*.cu)
-
 set(FLEXFLOW_CPP_DRV_SRC
   ${FLEXFLOW_ROOT}/src/runtime/cpp_driver.cc)
 
@@ -195,10 +203,71 @@ target_link_libraries(substitution_loader nlohmann_json::nlohmann_json)
 #message("FLEXFLOW_INCLUDE_DIRS: ${FLEXFLOW_INCLUDE_DIRS}")
 
 # compile flexflow lib
-if(BUILD_SHARED_LIBS)
-  cuda_add_library(flexflow SHARED ${FLEXFLOW_GPU_SRC} ${FLEXFLOW_SRC} OPTIONS ${CUDA_GENCODE})
+if (FF_GPU_BACKEND STREQUAL "cuda")
+  file(GLOB_RECURSE FLEXFLOW_GPU_SRC
+    LIST_DIRECTORIES False
+    ${FLEXFLOW_ROOT}/src/*.cu)
+
+  if(BUILD_SHARED_LIBS)
+    cuda_add_library(flexflow SHARED ${FLEXFLOW_GPU_SRC} ${FLEXFLOW_SRC} OPTIONS ${CUDA_GENCODE})
+  else()
+    cuda_add_library(flexflow STATIC ${FLEXFLOW_GPU_SRC} ${FLEXFLOW_SRC} OPTIONS ${CUDA_GENCODE})
+  endif()
+elseif(FF_GPU_BACKEND STREQUAL "hip_cuda" OR FF_GPU_BACKEND STREQUAL "hip_rocm")
+  file(GLOB_RECURSE FLEXFLOW_GPU_SRC
+    LIST_DIRECTORIES False
+    ${FLEXFLOW_ROOT}/src/*.cpp)
+
+  if(BUILD_SHARED_LIBS)
+    add_library(flexflow SHARED ${FLEXFLOW_GPU_SRC} ${FLEXFLOW_SRC})
+  else()
+    add_library(flexflow STATIC ${FLEXFLOW_GPU_SRC} ${FLEXFLOW_SRC})
+  endif()
+
+  list(APPEND CMAKE_PREFIX_PATH ${ROCM_PATH}/hip ${ROCM_PATH})
+
+  find_package(hip REQUIRED)
+
+  if (FF_GPU_BACKEND STREQUAL "hip_cuda")
+    # The targets defined by the hip cmake config only target amd devices.
+    # For targeting nvidia devices, we'll make our own interface target,
+    # hip_device_nvidia, that includes the rocm and hip headers. 
+    add_library(hip_device_nvidia INTERFACE)
+
+    if (NOT FF_CUDA_ARCH STREQUAL "")
+      target_compile_options(hip_device_nvidia INTERFACE -arch=compute_${FF_CUDA_ARCH})
+    endif()
+
+    target_include_directories(hip_device_nvidia SYSTEM INTERFACE ${HIP_INCLUDE_DIRS} ${ROCM_PATH}/include)
+    target_include_directories(hip_device_nvidia INTERFACE ${HIP_INCLUDE_DIRS} ${ROCM_PATH}/include)
+
+    # Linking cuda: 
+    # We do not explicitly link cuda. hipcc when targeting nvidia will 
+    # use nvcc under the hood. nvcc when used for linking will handle 
+    # linking cuda dependencies
+    target_link_libraries(flexflow hip_device_nvidia)
+  elseif(FF_GPU_BACKEND STREQUAL "hip_rocm")
+    find_package(hipblas REQUIRED)
+    find_package(miopen REQUIRED)
+    # find_package(rocrand REQUIRED)
+    find_library(HIP_RAND_LIBRARY hiprand REQUIRED)
+
+    # The hip cmake config module defines three targets, 
+    # hip::amdhip64, hip::host, and hip::device.
+    #
+    # hip::host and hip::device are interface targets. hip::amdhip64 is an 
+    # imported target for libamdhip.
+    #
+    # You do not directly link to hip::amdhip64. hip::host links to hip::amdhip64
+    # and hip::device links to hip::host. Link to hip::host to just use hip without 
+    # compiling any GPU code. Link to hip::device to compile the GPU device code.
+    #
+    # Docs (outdated):
+    # https://rocmdocs.amd.com/en/latest/Installation_Guide/Using-CMake-with-AMD-ROCm.html
+    target_link_libraries(flexflow hip::device roc::hipblas MIOpen ${HIP_RAND_LIBRARY})
+  endif()
 else()
-  cuda_add_library(flexflow STATIC ${FLEXFLOW_GPU_SRC} ${FLEXFLOW_SRC} OPTIONS ${CUDA_GENCODE})
+  message(FATAL_ERROR "Unsupported FF_GPU_BACKEND for cmake: ${FF_GPU_BACKEND}")
 endif()
 
 target_include_directories(flexflow PUBLIC ${FLEXFLOW_INCLUDE_DIRS})
@@ -233,11 +302,11 @@ if(FF_BUILD_UNIT_TESTS)
 endif()
 
 if(FF_BUILD_SUBSTITUTION_TOOL)
-  add_subdirectory(src/tools/protobuf_to_json)
+  add_subdirectory(tools/protobuf_to_json)
 endif()
 
 if(FF_BUILD_VISUALIZATION_TOOL)
-  add_subdirectory(src/tools/substitutions_to_dot)
+  add_subdirectory(tools/substitutions_to_dot)
 endif()
 
 # Python

diff --git a/INSTALL.md b/INSTALL.md
@@ -7,12 +7,26 @@ Clone the FlexFlow source code, and the third-party dependencies from GitHub.
 git clone --recursive https://github.com/flexflow/FlexFlow.git
 ```
 
-## 2. Install the Python dependencies
+## 2. Install system dependencies
+FlexFlow has system dependencies on cuda and/or rocm depending on which gpu backend you target. The gpu backend is configured by the cmake variable FF_GPU_BACKEND. By default, FlexFlow targets CUDA. `docker/base/Dockerfile` installs system dependencies in a standard ubuntu system.
+
+### Targeting CUDA - `FF_GPU_BACKEND=cuda`
+If you are targeting CUDA, FlexFlow requires CUDA and CUDNN to be installed. You can follow the standard nvidia installation instructions [CUDA](https://docs.nvidia.com/cuda/cuda-installation-guide-linux/index.html) and [CUDNN](https://docs.nvidia.com/deeplearning/cudnn/install-guide/index.html).
+
+### Targeting ROCM - `FF_GPU_BACKEND=hip_rocm`
+If you are targeting ROCM, FlexFlow requires a ROCM and HIP installation with a few additional packages. Note that this can be done on a system with or without an AMD GPU. You can follow the standard installation instructions [ROCM](https://docs.amd.com/bundle/ROCm-Installation-Guide-v5.3/page/Introduction_to_ROCm_Installation_Guide_for_Linux.html) and [HIP](https://docs.amd.com/bundle/HIP-Installation-Guide-v5.3/page/Introduction_to_HIP_Installation_Guide.html). When running `amdgpu-install`, install the use cases hip and rocm. You can avoid installing the kernel drivers (not necessary on systems without an AMD graphics card) with `--no-dkms` I.e. `amdgpu-install --usecase=hip,rocm --no-dkms`. Additionally, install the packages `hip-dev`, `hipblas`, `miopen-hip`, and `rocm-hip-sdk`.
+
+See `./docker/base/Dockerfile` for an example ROCM install.
+
+### Targeting CUDA through HIP - `FF_GPU_BACKEND=hip_cuda`
+This is not currently supported.
+
+## 3. Install the Python dependencies
 If you are planning to build the Python interface, you will need to install several additional Python libraries, please check [this](https://github.com/flexflow/FlexFlow/blob/master/requirements.txt) for details. If you are only looking to use the C++ interface, you can skip to the next section.
 
 **We recommend that you create your own `conda` environment and then install the Python dependencies, to avoid any version mismatching with your system pre-installed libraries.** 
 
-## 3. Configuring the FlexFlow build
+## 4. Configuring the FlexFlow build
 Before building FlexFlow, you should configure the build by editing the `config/config.linux` file. Leave it unchanged if you want to build with the default options. We recommend that you spend some time familiarizing with the available options. In particular, the main parameters are:
 * `CUDA_DIR` is used to specify the directory of CUDA. It is only required when CMake can not automatically detect the installation directory of CUDA.
 * `CUDNN_DIR` is used to specify the directory of CUDNN. It is only required when CUDNN is not installed in the CUDA directory.
@@ -25,7 +39,7 @@ Before building FlexFlow, you should configure the build by editing the `config/
 
 More options are available in cmake, please run `ccmake` and search for options starting with FF. 
 
-## 4. Build FlexFlow
+## 5. Build FlexFlow
 You can build FlexFlow in three ways: with CMake, with Make, and with `pip`. We recommend that you use the CMake building system as it will automatically build all C++ dependencies inlcuding NCCL and Legion. 
 
 ### Building FlexFlow with CMake
@@ -45,7 +59,7 @@ To build Flexflow with `pip`, run `pip install .` from the FlexFlow home directo
 The Makefile we provide is mainly for development purpose, and may not be fully up to date. 
 
 
-## 5. Test FlexFlow
+## 6. Test FlexFlow
 After building FlexFlow, you can test it to ensure that the build completed without issue, and that your system is ready to run FlexFlow.
 
 ### Set the `FF_HOME` environment variable before running FlexFlow. To make it permanent, you can add the following line in ~/.bashrc.
@@ -76,7 +90,7 @@ For example, the AlexNet can be run as:
 
 Size of buffers is in MBs, e.g. for an 8GB gpu `-ll:fsize 8000`
 
-## 6. Install FlexFlow
+## 7. Install FlexFlow
 If you built/installed FlexFlow using `pip`, this step is not required. If you built using Make or CMake, install FlexFlow with:
 ```
 cd build

diff --git a/cmake/legion.cmake b/cmake/legion.cmake
@@ -27,8 +27,18 @@ else()
 	endif()
 	message(STATUS "GASNET ROOT: $ENV{GASNet_ROOT_DIR}")
 	set(Legion_MAX_DIM ${FF_MAX_DIM} CACHE STRING "Maximum number of dimensions")
-	set(Legion_USE_CUDA ON CACHE BOOL "enable Legion_USE_CUDA")
-	set(Legion_CUDA_ARCH ${FF_CUDA_ARCH} CACHE STRING "Legion CUDA ARCH")
+	if (FF_GPU_BACKEND STREQUAL "cuda")
+	  set(Legion_USE_CUDA ON CACHE BOOL "enable Legion_USE_CUDA" FORCE)
+	  set(Legion_CUDA_ARCH ${FF_CUDA_ARCH} CACHE STRING "Legion CUDA ARCH" FORCE)
+	elseif (FF_GPU_BACKEND STREQUAL "hip_cuda" OR FF_GPU_BACKEND STREQUAL "hip_rocm")
+	  set(Legion_USE_HIP ON CACHE BOOL "enable Legion_USE_HIP" FORCE)
+
+	  if (FF_GPU_BACKEND STREQUAL "hip_cuda")
+	    set(Legion_HIP_TARGET "CUDA" CACHE STRING "Legion_HIP_TARGET CUDA" FORCE)
+	  elseif(FF_GPU_BACKEND STREQUAL "hip_rocm")
+	    set(Legion_HIP_TARGET "ROCM" CACHE STRING "Legion HIP_TARGET ROCM" FORCE)
+	  endif()
+	endif()
 	add_subdirectory(deps/legion)
 	set(LEGION_LIBRARY Legion)
 endif()

diff --git a/config/config.inc b/config/config.inc
@@ -1,6 +1,5 @@
 #!/bin/bash
 
-function run_cmake() {
 # set CC and CXX
 if [ -n "$CC" ]; then
   SET_CC="-DCMAKE_C_COMPILER=${CC}"
@@ -41,7 +40,8 @@ fi
 # set CUDA dir
 if [ -n "$CUDA_DIR" ]; then
   SET_CUDA="-DCUDA_PATH=${CUDA_DIR}"
-  SET_CUDA_LIB_PATH="CUDA_PATH=${CUDA_DIR}/lib64/stubs"
+  CUDA_PATH="${CUDA_DIR}/lib64/stubs"
+  SET_CUDA_LIB_PATH="CUDA_PATH=${CUDA_PATH}"
 fi
 
 # set cudnn dir
@@ -115,8 +115,54 @@ if [ -n "$FF_MAX_DIM" ]; then
   SET_MAX_DIM="-DFF_MAX_DIM=${FF_MAX_DIM}"
 fi
 
+# set ROCM path
+if [ -n "$ROCM_PATH" ]; then
+  SET_ROCM_PATH="-DROCM_PATH=${ROCM_PATH}"
+fi
+
+# set GPU backend
+if [ -n "$FF_GPU_BACKEND" ]; then
+  SET_FF_GPU_BACKEND="-DFF_GPU_BACKEND=${FF_GPU_BACKEND}"
+
+  # cmake does not play nicely with overrides via `set()` of CMAKE_CXX_COMPILER and friends
+  # because it uses their values to setup the toolchain.
+  # see: https://gitlab.kitware.com/cmake/community/-/wikis/FAQ#how-do-i-use-a-different-compiler
+  #
+  # Ideally we would use the values internally to the cmake script, e.g. HIP_HIPCC_EXECUTABLE,
+  # to set these values but this is a sufficient compromise.
+  if [ "$FF_GPU_BACKEND" = "hip_cuda" ] || [ "$FF_GPU_BACKEND" = "hip_rocm" ]; then
+    if [ -n "$SET_CXX" ]; then
+      echo "FF_GPU_BACKEND is set to ${FF_GPU_BACKEND}. Normally we would set the compiler and linker" 1>&2
+      echo "to hipcc, but the compiler is already set to ${SET_CXX}". 1>&2
+    else
+      if [ "$FF_GPU_BACKEND" = "hip_cuda" ]; then
+        # Configuring hipcc for nvidia:
+        #
+        # The platform hipcc targets is configured by the HIP_PLATFORM env var.
+        # Ideally, as we could in the shell, we would call `HIP_PLATFORM=nvidia hipcc <...>`.
+        # However, CMAKE_CXX_COMPILER doesn't allow configuration as such. Additionally,
+        # cmake doesn't allow setting environment variables for target builds like make does
+        # with exported variables.
+        #
+        # Instead, this file configures hipcc with HIP_PLATFORM and CUDA_PATH
+        #
+        # CMAKE requires CMAKE_CXX_COMPILER exists before cmake is called, so we can't
+        # write out this file during build configuration.
+        echo "HIP_PLATFORM=nvidia CUDA_PATH=${CUDA_DIR} ${ROCM_PATH}/bin/hipcc \$@" > "$(pwd)/nvidia_hipcc"
+        chmod +x "$(pwd)/nvidia_hipcc"
+        SET_CXX="-DCMAKE_CXX_COMPILER=$(pwd)/nvidia_hipcc -DCMAKE_CXX_LINKER=$(pwd)/nvidia_hipcc"
+      else
+        SET_CXX="-DCMAKE_CXX_COMPILER=/opt/rocm/bin/hipcc -DCMAKE_CXX_LINKER=/opt/rocm/bin/hipcc"
+      fi
+    fi
+  fi
+fi
+
+CMAKE_FLAGS="-DCUDA_USE_STATIC_CUDA_RUNTIME=OFF ${SET_CC} ${SET_CXX} ${SET_INSTALL_DIR} ${SET_BUILD} ${SET_CUDA_ARCH} ${SET_CUDA} ${SET_CUDNN} ${SET_PYTHON} ${SET_NCCL} ${SET_GASNET} ${SET_EXAMPLES} ${SET_BUILD_UNIT_TESTS} ${SET_AVX2} ${SET_MAX_DIM} ${SET_ROCM_PATH} ${SET_FF_GPU_BACKEND}"
+
+function run_cmake() {
 SRC_LOCATION=${SRC_LOCATION:=`dirname $0`/../}
-CMAKE_COMMAND="${SET_CC_FLAGS} ${SET_NVCC_FLAGS} ${SET_LD_FLAGS} ${SET_CUDA_LIB_PATH} cmake -DCUDA_USE_STATIC_CUDA_RUNTIME=OFF ${SET_CC} ${SET_CXX} ${SET_INSTALL_DIR} ${SET_BUILD} ${SET_CUDA_ARCH} ${SET_CUDA} ${SET_CUDNN} ${SET_PYTHON} ${SET_NCCL} ${SET_GASNET} ${SET_EXAMPLES} ${SET_BUILD_UNIT_TESTS} ${SET_AVX2} ${SET_MAX_DIM} $* ${SRC_LOCATION}"
+CMAKE_COMMAND="${SET_CC_FLAGS} ${SET_NVCC_FLAGS} ${SET_LD_FLAGS} ${SET_CUDA_LIB_PATH} cmake ${CMAKE_FLAGS} $* ${SRC_LOCATION}"
 echo $CMAKE_COMMAND
 eval $CMAKE_COMMAND
 }
diff --git a/config/config.linux b/config/config.linux
@@ -1,7 +1,5 @@
 #!/bin/bash
 
-echo " Defaults for Linux machine"
-
 # set the CC and CXX, usually it is not needed as cmake can detect it
 # set CC and CXX to mpicc and mpic++ when enable gasnet
 # CC=mpicc
@@ -18,38 +16,55 @@ echo " Defaults for Linux machine"
 # set build type
 BUILD_TYPE=Release
 
-# set CUDA Arch, replace xx with your GPU architecture
-#FF_CUDA_ARCH=xx
+# set CUDA Arch with your GPU architecture
+FF_CUDA_ARCH=${FF_CUDA_ARCH:-""}
 
 # set CUDNN dir in case cmake cannot autodetect a path
-CUDNN_DIR=/usr/local/cuda
+CUDNN_DIR=${CUDNN_DIR:-"/usr/local/cuda"}
 
 # set CUDA dir in case cmake cannot autodetect a path
-CUDA_DIR=/usr/local/cuda
+CUDA_DIR=${CUDA_DIR:-"/usr/local/cuda"}
 
 # enable Python
-FF_USE_PYTHON=ON
-
-# enable NCCL
-FF_USE_NCCL=ON
+FF_USE_PYTHON=${FF_USE_PYTHON:-ON}
 
 # enable GASNet
-FF_USE_GASNET=OFF
+FF_USE_GASNET=${FF_USE_GASNET:-OFF}
 
 # select GASNET conduit
-FF_GASNET_CONDUIT=ibv
+FF_GASNET_CONDUIT=${FF_GASNET_CONDUIT:-ibv}
 
 # build C++ examples
-FF_BUILD_ALL_EXAMPLES=OFF
+FF_BUILD_ALL_EXAMPLES=${FF_BUILD_ALL_EXAMPLES:-OFF}
 
 # build C++ unit tests
-FF_BUILD_UNIT_TESTS=OFF
+FF_BUILD_UNIT_TESTS=${FF_BUILD_UNIT_TESTS:-OFF}
 
 # enable avx2
-FF_USE_AVX2=OFF
+FF_USE_AVX2=${FF_USE_AVX2:-OFF}
 
 # set MAX_DIM
-FF_MAX_DIM=5
+FF_MAX_DIM=${FF_MAX_DIM:-5}
+
+# set ROCM path
+ROCM_PATH=${ROCM_PATH:-"/opt/rocm"}
+
+# set GPU backend
+FF_GPU_BACKEND=${FF_GPU_BACKEND:-cuda}
+
+if [ "$FF_GPU_BACKEND" = "cuda" ] || [ "$FF_GPU_BACKEND" = "hip_cuda" ]; then
+    # enable NCCL
+    FF_USE_NCCL=${FF_USE_NCCL:-ON}
+else
+    FF_USE_NCCL=OFF
+fi
 
 . $(dirname $0)/config.inc
-run_cmake $*
+
+if [ -n "$1" ]; then
+    # You can pass the name of the variable you want to print out as $1. This
+    # is used in the python setup script to get the cmake config
+    echo "${!1}"
+else
+    run_cmake $*
+fi