flexflow · lockshaw · Nov 8, 2022 · Oct 30, 2022 · Nov 3, 2022 · Nov 3, 2022
diff --git a/.dockerignore b/.dockerignore
@@ -0,0 +1,2 @@
+# Ignore all folders which start with "build"
+/build*/
diff --git a/CMakeLists.txt b/CMakeLists.txt
@@ -50,6 +50,10 @@ set(FF_CUDA_ARCH "" CACHE STRING "Target CUDA Arch")
 # option for nccl
 option(FF_USE_NCCL "Run FlexFlow with NCCL" OFF)
 
+if (FF_GPU_BACKEND STREQUAL "hip_rocm" AND FF_USE_NCCL STREQUAL "ON")
+  message(FATAL_ERROR "NCCL: ON for FF_GPU_BACKEND: hip_rocm. hip_rocm backend must have NCCL disabled.")
+endif()
+
 # option for avx2
 option(FF_USE_AVX2 "Run FlexFlow with AVX2" OFF)
 
@@ -78,14 +82,22 @@ add_compile_options(${CC_FLAGS})
 set(CUDA_NVCC_FLAGS ${CUDA_NVCC_FLAGS} ${NVCC_FLAGS})
 link_libraries(${LD_FLAGS})
 
+if(FF_GPU_BACKEND STREQUAL "hip_cuda" OR FF_GPU_BACKEND STREQUAL "hip_rocm")
+  set(ROCM_PATH "/opt/rocm" CACHE STRING "Default ROCM installation directory.")
+endif()
+
 # ZLIB
 include(zlib)
 
 # CUDA
-include(cuda)
+if (FF_GPU_BACKEND STREQUAL "cuda" OR FF_GPU_BACKEND STREQUAL "hip_cuda")
+  include(cuda)
+endif()
 
 # CUDNN
-include(cudnn)
+if (FF_GPU_BACKEND STREQUAL "cuda" OR FF_GPU_BACKEND STREQUAL "hip_cuda")
+  include(cudnn)
+endif()
 
 # NCCL
 if(FF_USE_NCCL)
@@ -179,10 +191,6 @@ file(GLOB_RECURSE FLEXFLOW_SRC
   ${FLEXFLOW_ROOT}/src/*.cc)
 list(REMOVE_ITEM FLEXFLOW_SRC "${FLEXFLOW_ROOT}/src/runtime/cpp_driver.cc")
 
-file(GLOB_RECURSE FLEXFLOW_GPU_SRC
-  LIST_DIRECTORIES False
-  ${FLEXFLOW_ROOT}/src/*.cu)
-
 set(FLEXFLOW_CPP_DRV_SRC
   ${FLEXFLOW_ROOT}/src/runtime/cpp_driver.cc)
 
@@ -195,10 +203,71 @@ target_link_libraries(substitution_loader nlohmann_json::nlohmann_json)
 #message("FLEXFLOW_INCLUDE_DIRS: ${FLEXFLOW_INCLUDE_DIRS}")
 
 # compile flexflow lib
-if(BUILD_SHARED_LIBS)
-  cuda_add_library(flexflow SHARED ${FLEXFLOW_GPU_SRC} ${FLEXFLOW_SRC} OPTIONS ${CUDA_GENCODE})
+if (FF_GPU_BACKEND STREQUAL "cuda")
+  file(GLOB_RECURSE FLEXFLOW_GPU_SRC
+    LIST_DIRECTORIES False
+    ${FLEXFLOW_ROOT}/src/*.cu)
+
+  if(BUILD_SHARED_LIBS)
+    cuda_add_library(flexflow SHARED ${FLEXFLOW_GPU_SRC} ${FLEXFLOW_SRC} OPTIONS ${CUDA_GENCODE})
+  else()
+    cuda_add_library(flexflow STATIC ${FLEXFLOW_GPU_SRC} ${FLEXFLOW_SRC} OPTIONS ${CUDA_GENCODE})
+  endif()
+elseif(FF_GPU_BACKEND STREQUAL "hip_cuda" OR FF_GPU_BACKEND STREQUAL "hip_rocm")
+  file(GLOB_RECURSE FLEXFLOW_GPU_SRC
+    LIST_DIRECTORIES False
+    ${FLEXFLOW_ROOT}/src/*.cpp)
+
+  if(BUILD_SHARED_LIBS)
+    add_library(flexflow SHARED ${FLEXFLOW_GPU_SRC} ${FLEXFLOW_SRC})
+  else()
+    add_library(flexflow STATIC ${FLEXFLOW_GPU_SRC} ${FLEXFLOW_SRC})
+  endif()
+
+  list(APPEND CMAKE_PREFIX_PATH ${ROCM_PATH}/hip ${ROCM_PATH})
+
+  find_package(hip REQUIRED)
+
+  if (FF_GPU_BACKEND STREQUAL "hip_cuda")
+    # The targets defined by the hip cmake config only target amd devices.
+    # For targeting nvidia devices, we'll make our own interface target,
+    # hip_device_nvidia, that includes the rocm and hip headers. 
+    add_library(hip_device_nvidia INTERFACE)
+
+    if (NOT FF_CUDA_ARCH STREQUAL "")
+      target_compile_options(hip_device_nvidia INTERFACE -arch=compute_${FF_CUDA_ARCH})
+    endif()
+
+    target_include_directories(hip_device_nvidia SYSTEM INTERFACE ${HIP_INCLUDE_DIRS} ${ROCM_PATH}/include)
+    target_include_directories(hip_device_nvidia INTERFACE ${HIP_INCLUDE_DIRS} ${ROCM_PATH}/include)
+
+    # Linking cuda: 
+    # We do not explicitly link cuda. hipcc when targeting nvidia will 
+    # use nvcc under the hood. nvcc when used for linking will handle 
+    # linking cuda dependencies
+    target_link_libraries(flexflow hip_device_nvidia)
+  elseif(FF_GPU_BACKEND STREQUAL "hip_rocm")
+    find_package(hipblas REQUIRED)
+    find_package(miopen REQUIRED)
+    # find_package(rocrand REQUIRED)
+    find_library(HIP_RAND_LIBRARY hiprand REQUIRED)
+
+    # The hip cmake config module defines three targets, 
+    # hip::amdhip64, hip::host, and hip::device.
+    #
+    # hip::host and hip::device are interface targets. hip::amdhip64 is an 
+    # imported target for libamdhip.
+    #
+    # You do not directly link to hip::amdhip64. hip::host links to hip::amdhip64
+    # and hip::device links to hip::host. Link to hip::host to just use hip without 
+    # compiling any GPU code. Link to hip::device to compile the GPU device code.
+    #
+    # Docs (outdated):
+    # https://rocmdocs.amd.com/en/latest/Installation_Guide/Using-CMake-with-AMD-ROCm.html
+    target_link_libraries(flexflow hip::device roc::hipblas MIOpen ${HIP_RAND_LIBRARY})
+  endif()
 else()
-  cuda_add_library(flexflow STATIC ${FLEXFLOW_GPU_SRC} ${FLEXFLOW_SRC} OPTIONS ${CUDA_GENCODE})
+  message(FATAL_ERROR "Unsupported FF_GPU_BACKEND for cmake: ${FF_GPU_BACKEND}")
 endif()
 
 target_include_directories(flexflow PUBLIC ${FLEXFLOW_INCLUDE_DIRS})
@@ -233,11 +302,11 @@ if(FF_BUILD_UNIT_TESTS)
 endif()
 
 if(FF_BUILD_SUBSTITUTION_TOOL)
-  add_subdirectory(src/tools/protobuf_to_json)
+  add_subdirectory(tools/protobuf_to_json)
 endif()
 
 if(FF_BUILD_VISUALIZATION_TOOL)
-  add_subdirectory(src/tools/substitutions_to_dot)
+  add_subdirectory(tools/substitutions_to_dot)
 endif()
 
 # Python

diff --git a/INSTALL.md b/INSTALL.md
@@ -7,12 +7,26 @@ Clone the FlexFlow source code, and the third-party dependencies from GitHub.
 git clone --recursive https://github.com/flexflow/FlexFlow.git
 ```
 
-## 2. Install the Python dependencies
+## 2. Install system dependencies
+FlexFlow has system dependencies on cuda and/or rocm depending on which gpu backend you target. The gpu backend is configured by the cmake variable FF_GPU_BACKEND. By default, FlexFlow targets CUDA. `docker/base/Dockerfile` installs system dependencies in a standard ubuntu system.
+
+### Targeting CUDA - `FF_GPU_BACKEND=cuda`
+If you are targeting CUDA, FlexFlow requires CUDA and CUDNN to be installed. You can follow the standard nvidia installation instructions [CUDA](https://docs.nvidia.com/cuda/cuda-installation-guide-linux/index.html) and [CUDNN](https://docs.nvidia.com/deeplearning/cudnn/install-guide/index.html).
+
+### Targeting ROCM - `FF_GPU_BACKEND=hip_rocm`
+If you are targeting ROCM, FlexFlow requires a ROCM and HIP installation with a few additional packages. Note that this can be done on a system with or without an AMD GPU. You can follow the standard installation instructions [ROCM](https://docs.amd.com/bundle/ROCm-Installation-Guide-v5.3/page/Introduction_to_ROCm_Installation_Guide_for_Linux.html) and [HIP](https://docs.amd.com/bundle/HIP-Installation-Guide-v5.3/page/Introduction_to_HIP_Installation_Guide.html). When running `amdgpu-install`, install the use cases hip and rocm. You can avoid installing the kernel drivers (not necessary on systems without an AMD graphics card) with `--no-dkms` I.e. `amdgpu-install --usecase=hip,rocm --no-dkms`. Additionally, install the packages `hip-dev`, `hipblas`, `miopen-hip`, and `rocm-hip-sdk`.
+
+See `./docker/base/Dockerfile` for an example ROCM install.
+
+### Targeting CUDA through HIP - `FF_GPU_BACKEND=hip_cuda`
+Follow both the CUDA and ROCM instructions.
+
+## 3. Install the Python dependencies
 If you are planning to build the Python interface, you will need to install several additional Python libraries, please check [this](https://github.com/flexflow/FlexFlow/blob/master/requirements.txt) for details. If you are only looking to use the C++ interface, you can skip to the next section.
 
 **We recommend that you create your own `conda` environment and then install the Python dependencies, to avoid any version mismatching with your system pre-installed libraries.** 
 
-## 3. Configuring the FlexFlow build
+## 4. Configuring the FlexFlow build
 Before building FlexFlow, you should configure the build by editing the `config/config.linux` file. Leave it unchanged if you want to build with the default options. We recommend that you spend some time familiarizing with the available options. In particular, the main parameters are:
 * `CUDA_DIR` is used to specify the directory of CUDA. It is only required when CMake can not automatically detect the installation directory of CUDA.
 * `CUDNN_DIR` is used to specify the directory of CUDNN. It is only required when CUDNN is not installed in the CUDA directory.
@@ -25,7 +39,7 @@ Before building FlexFlow, you should configure the build by editing the `config/
 
 More options are available in cmake, please run `ccmake` and search for options starting with FF. 
 
-## 4. Build FlexFlow
+## 5. Build FlexFlow
 You can build FlexFlow in three ways: with CMake, with Make, and with `pip`. We recommend that you use the CMake building system as it will automatically build all C++ dependencies inlcuding NCCL and Legion. 
 
 ### Building FlexFlow with CMake
@@ -45,7 +59,7 @@ To build Flexflow with `pip`, run `pip install .` from the FlexFlow home directo
 The Makefile we provide is mainly for development purpose, and may not be fully up to date. 
 
 
-## 5. Test FlexFlow
+## 6. Test FlexFlow
 After building FlexFlow, you can test it to ensure that the build completed without issue, and that your system is ready to run FlexFlow.
 
 ### Set the `FF_HOME` environment variable before running FlexFlow. To make it permanent, you can add the following line in ~/.bashrc.
@@ -76,7 +90,7 @@ For example, the AlexNet can be run as:
 
 Size of buffers is in MBs, e.g. for an 8GB gpu `-ll:fsize 8000`
 
-## 6. Install FlexFlow
+## 7. Install FlexFlow
 If you built/installed FlexFlow using `pip`, this step is not required. If you built using Make or CMake, install FlexFlow with:
 ```
 cd build

diff --git a/cmake/legion.cmake b/cmake/legion.cmake
@@ -27,8 +27,18 @@ else()
 	endif()
 	message(STATUS "GASNET ROOT: $ENV{GASNet_ROOT_DIR}")
 	set(Legion_MAX_DIM ${FF_MAX_DIM} CACHE STRING "Maximum number of dimensions")
-	set(Legion_USE_CUDA ON CACHE BOOL "enable Legion_USE_CUDA")
-	set(Legion_CUDA_ARCH ${FF_CUDA_ARCH} CACHE STRING "Legion CUDA ARCH")
+	if (FF_GPU_BACKEND STREQUAL "cuda")
+	  set(Legion_USE_CUDA ON CACHE BOOL "enable Legion_USE_CUDA" FORCE)
+	  set(Legion_CUDA_ARCH ${FF_CUDA_ARCH} CACHE STRING "Legion CUDA ARCH" FORCE)
+	elseif (FF_GPU_BACKEND STREQUAL "hip_cuda" OR FF_GPU_BACKEND STREQUAL "hip_rocm")
+	  set(Legion_USE_HIP ON CACHE BOOL "enable Legion_USE_HIP" FORCE)
+
+	  if (FF_GPU_BACKEND STREQUAL "hip_cuda")
+	    set(Legion_HIP_TARGET "CUDA" CACHE STRING "Legion_HIP_TARGET CUDA" FORCE)
+	  elseif(FF_GPU_BACKEND STREQUAL "hip_rocm")
+	    set(Legion_HIP_TARGET "ROCM" CACHE STRING "Legion HIP_TARGET ROCM" FORCE)
+	  endif()
+	endif()
 	add_subdirectory(deps/legion)
 	set(LEGION_LIBRARY Legion)
 endif()

diff --git a/config/config.inc b/config/config.inc
@@ -115,8 +115,51 @@ if [ -n "$FF_MAX_DIM" ]; then
   SET_MAX_DIM="-DFF_MAX_DIM=${FF_MAX_DIM}"
 fi
 
+# set ROCM path
+if [ -n "$ROCM_PATH" ]; then
+  SET_ROCM_PATH="-DROCM_PATH=${ROCM_PATH}"
+fi
+
+# set GPU backend
+if [ -n "$FF_GPU_BACKEND" ]; then
+  SET_FF_GPU_BACKEND="-DFF_GPU_BACKEND=${FF_GPU_BACKEND}"
+
+  # cmake does not play nicely with overrides via `set()` of CMAKE_CXX_COMPILER and friends
+  # because it uses their values to setup the toolchain.
+  # see: https://gitlab.kitware.com/cmake/community/-/wikis/FAQ#how-do-i-use-a-different-compiler
+  #
+  # Ideally we would use the values internally to the cmake script, e.g. HIP_HIPCC_EXECUTABLE,
+  # to set these values but this is a sufficient compromise.
+  if [ "$FF_GPU_BACKEND" = "hip_cuda" ] || [ "$FF_GPU_BACKEND" = "hip_rocm" ]; then
+    if [ -n "$SET_CXX" ]; then
+      echo "FF_GPU_BACKEND is set to ${FF_GPU_BACKEND}. Normally we would set the compiler and linker"
+      echo "to hipcc, but the compiler is already set to ${SET_CXX}".
+    else
+      if [ "$FF_GPU_BACKEND" = "hip_cuda" ]; then
+        # Configuring hipcc for nvidia:
+        #
+        # The platform hipcc targets is configured by the HIP_PLATFORM env var.
+        # Ideally, as we could in the shell, we would call `HIP_PLATFORM=nvidia hipcc <...>`.
+        # However, CMAKE_CXX_COMPILER doesn't allow configuration as such. Additionally,
+        # cmake doesn't allow setting environment variables for target builds like make does
+        # with exported variables.
+        #
+        # Instead, this file configures hipcc with HIP_PLATFORM and CUDA_PATH
+        #
+        # CMAKE requires CMAKE_CXX_COMPILER exists before cmake is called, so we can't
+        # write out this file during build configuration.
+        echo "HIP_PLATFORM=nvidia CUDA_PATH=${CUDA_DIR} ${ROCM_PATH}/bin/hipcc \$@" > "$(pwd)/nvidia_hipcc"
+        chmod +x "$(pwd)/nvidia_hipcc"
+        SET_CXX="-DCMAKE_CXX_COMPILER=$(pwd)/nvidia_hipcc -DCMAKE_CXX_LINKER=$(pwd)/nvidia_hipcc"
+      else
+        SET_CXX="-DCMAKE_CXX_COMPILER=/opt/rocm/bin/hipcc -DCMAKE_CXX_LINKER=/opt/rocm/bin/hipcc"
+      fi
+    fi
+  fi
+fi
+
 SRC_LOCATION=${SRC_LOCATION:=`dirname $0`/../}
-CMAKE_COMMAND="${SET_CC_FLAGS} ${SET_NVCC_FLAGS} ${SET_LD_FLAGS} ${SET_CUDA_LIB_PATH} cmake -DCUDA_USE_STATIC_CUDA_RUNTIME=OFF ${SET_CC} ${SET_CXX} ${SET_INSTALL_DIR} ${SET_BUILD} ${SET_CUDA_ARCH} ${SET_CUDA} ${SET_CUDNN} ${SET_PYTHON} ${SET_NCCL} ${SET_GASNET} ${SET_EXAMPLES} ${SET_BUILD_UNIT_TESTS} ${SET_AVX2} ${SET_MAX_DIM} $* ${SRC_LOCATION}"
+CMAKE_COMMAND="${SET_CC_FLAGS} ${SET_NVCC_FLAGS} ${SET_LD_FLAGS} ${SET_CUDA_LIB_PATH} cmake -DCUDA_USE_STATIC_CUDA_RUNTIME=OFF ${SET_CC} ${SET_CXX} ${SET_INSTALL_DIR} ${SET_BUILD} ${SET_CUDA_ARCH} ${SET_CUDA} ${SET_CUDNN} ${SET_PYTHON} ${SET_NCCL} ${SET_GASNET} ${SET_EXAMPLES} ${SET_BUILD_UNIT_TESTS} ${SET_AVX2} ${SET_MAX_DIM} ${SET_ROCM_PATH} ${SET_FF_GPU_BACKEND} $* ${SRC_LOCATION}"
 echo $CMAKE_COMMAND
 eval $CMAKE_COMMAND
 }
diff --git a/config/config.linux b/config/config.linux
@@ -18,38 +18,48 @@ echo " Defaults for Linux machine"
 # set build type
 BUILD_TYPE=Release
 
-# set CUDA Arch, replace xx with your GPU architecture
-#FF_CUDA_ARCH=xx
+# set CUDA Arch with your GPU architecture
+FF_CUDA_ARCH=${FF_CUDA_ARCH:-""}
 
 # set CUDNN dir in case cmake cannot autodetect a path
-CUDNN_DIR=/usr/local/cuda
+CUDNN_DIR=${CUDNN_DIR:-"/usr/local/cuda"}
 
 # set CUDA dir in case cmake cannot autodetect a path
-CUDA_DIR=/usr/local/cuda
+CUDA_DIR=${CUDA_DIR:-"/usr/local/cuda"}
 
 # enable Python
-FF_USE_PYTHON=ON
-
-# enable NCCL
-FF_USE_NCCL=ON
+FF_USE_PYTHON=${FF_USE_PYTHON:-ON}
 
 # enable GASNet
-FF_USE_GASNET=OFF
+FF_USE_GASNET=${FF_USE_GASNET:-OFF}
 
 # select GASNET conduit
-FF_GASNET_CONDUIT=ibv
+FF_GASNET_CONDUIT=${FF_GASNET_CONDUIT:-ibv}
 
 # build C++ examples
-FF_BUILD_ALL_EXAMPLES=OFF
+FF_BUILD_ALL_EXAMPLES=${FF_BUILD_ALL_EXAMPLES:-OFF}
 
 # build C++ unit tests
-FF_BUILD_UNIT_TESTS=OFF
+FF_BUILD_UNIT_TESTS=${FF_BUILD_UNIT_TESTS:-OFF}
 
 # enable avx2
-FF_USE_AVX2=OFF
+FF_USE_AVX2=${FF_USE_AVX2:-OFF}
 
 # set MAX_DIM
-FF_MAX_DIM=5
+FF_MAX_DIM=${FF_MAX_DIM:-5}
+
+# set ROCM path
+ROCM_PATH=${ROCM_PATH:-"/opt/rocm"}
+
+# set GPU backend
+FF_GPU_BACKEND=${FF_GPU_BACKEND:-cuda}
+
+if [ "$FF_GPU_BACKEND" = "cuda" ] || [ "$FF_GPU_BACKEND" = "hip_cuda" ]; then
+    # enable NCCL
+    FF_USE_NCCL=${FF_USE_NCCL:-ON}
+else
+    FF_USE_NCCL=OFF
+fi
 
 . $(dirname $0)/config.inc
 run_cmake $*
diff --git a/docker/README.md b/docker/README.md
@@ -14,3 +14,6 @@ You will need a machine with a NVIDIA GPU, with drivers installed. You will also
 ```
 ./run.sh base
 ```
+
+## Configuring the docker build for different GPU backends
+The container can be built for different GPU backends by setting the `FF_GPU_BACKEND` env var. By default, the container is built to target cuda. See the corresponding cmake variable `FF_GPU_BACKEND` for available values.