Skip to content

Commit

Permalink
Build for hip gpu backends (#392)
Browse files Browse the repository at this point in the history
* Update cmake to handle gpu backend

Previously, cmake would always build targeting cuda.

We now dispatch on FF_GPU_BACKEND to target either cuda, hip_cuda,
or hip_rocm. The hip backends are expected to use the hip cmake config
file that comes with a hip install. The discrepancies for targeting
hip_cuda vs hip_rocm are documented inline.

Configure legion for hip backend

* Build configuration scripts

* docker build updates

* Small source modifications for build

add legion max return size check

fix std::stringstream var decl parsed as function

explicit template instantiations
include string

* Move tools to top level directory

We glob for files under src to get the source files for the flexflow
target. Moving tools to the top level directory prevents the tools
sourcefiles from accidentally being added to the flexflow target
source files.

change substitution_to_dot cuda_add_executable to add_executable.
When building with hip_rocm, we don't have cuda available and shouldn't
need to build with it for substitution_to_dot as the target does
not directly build and cuda kernels

* fix miopen.h headers

* Fix hip kernels

* system dependencies install instructions

* Ensure docker build script is called from FF_HOME

* Add .dockerignore file to ignore build directories

* Add new lines

* Fix CI for changes in PR

do not call sed to manually change config script

clone submodules in docker job

* Change the python setup script to shell out to the config script instead of parsing it adhoc

* Update docs to note FF_GPU_BACKEND=hip_cuda is not supported

* Fix path to mt5 dockerfile
  • Loading branch information
williamberman committed Nov 8, 2022
1 parent 68a048a commit 81304c8
Show file tree
Hide file tree
Showing 30 changed files with 670 additions and 458 deletions.
2 changes: 2 additions & 0 deletions .dockerignore
@@ -0,0 +1,2 @@
# Ignore all folders which start with "build"
/build*/
5 changes: 1 addition & 4 deletions .github/workflows/build.yml
Expand Up @@ -51,12 +51,9 @@ jobs:
cores_available=$(nproc --all)
n_build_cores=$(( cores_available -1 ))
if (( $n_build_cores < 1 )) ; then n_build_cores=1 ; fi
sed -i "/FF_CUDA_ARCH/c\FF_CUDA_ARCH=70" ./config/config.linux
sed -i "/FF_BUILD_ALL_EXAMPLES/c\FF_BUILD_ALL_EXAMPLES=ON" ./config/config.linux
sed -i "/FF_BUILD_UNIT_TESTS/c\FF_BUILD_UNIT_TESTS=ON" ./config/config.linux
mkdir build
cd build
../config/config.linux
FF_CUDA_ARCH=70 FF_BUILD_ALL_EXAMPLES=ON FF_BUILD_UNIT_TESTS=ON ../config/config.linux
make -j $n_build_cores
- name: Install FlexFlow
Expand Down
2 changes: 2 additions & 0 deletions .github/workflows/docker-build.yml
Expand Up @@ -24,6 +24,8 @@ jobs:
steps:
- name: Checkout Git Repository
uses: actions/checkout@v3
with:
submodules: recursive

- name: Build Docker container
run: ./docker/build.sh base
Expand Down
2 changes: 1 addition & 1 deletion .github/workflows/pip-install.yml
Expand Up @@ -44,7 +44,7 @@ jobs:
run: |
export PATH=/opt/conda/bin:$PATH
export FF_HOME=$(pwd)
sed -i "/FF_CUDA_ARCH/c\FF_CUDA_ARCH=70" ./config/config.linux
export FF_CUDA_ARCH=70
pip install . --verbose
- name: Check availability of Python flexflow.core module
Expand Down
91 changes: 80 additions & 11 deletions CMakeLists.txt
Expand Up @@ -50,6 +50,10 @@ set(FF_CUDA_ARCH "" CACHE STRING "Target CUDA Arch")
# option for nccl
option(FF_USE_NCCL "Run FlexFlow with NCCL" OFF)

if (FF_GPU_BACKEND STREQUAL "hip_rocm" AND FF_USE_NCCL STREQUAL "ON")
message(FATAL_ERROR "NCCL: ON for FF_GPU_BACKEND: hip_rocm. hip_rocm backend must have NCCL disabled.")
endif()

# option for avx2
option(FF_USE_AVX2 "Run FlexFlow with AVX2" OFF)

Expand Down Expand Up @@ -78,14 +82,22 @@ add_compile_options(${CC_FLAGS})
set(CUDA_NVCC_FLAGS ${CUDA_NVCC_FLAGS} ${NVCC_FLAGS})
link_libraries(${LD_FLAGS})

if(FF_GPU_BACKEND STREQUAL "hip_cuda" OR FF_GPU_BACKEND STREQUAL "hip_rocm")
set(ROCM_PATH "/opt/rocm" CACHE STRING "Default ROCM installation directory.")
endif()

# ZLIB
include(zlib)

# CUDA
include(cuda)
if (FF_GPU_BACKEND STREQUAL "cuda" OR FF_GPU_BACKEND STREQUAL "hip_cuda")
include(cuda)
endif()

# CUDNN
include(cudnn)
if (FF_GPU_BACKEND STREQUAL "cuda" OR FF_GPU_BACKEND STREQUAL "hip_cuda")
include(cudnn)
endif()

# NCCL
if(FF_USE_NCCL)
Expand Down Expand Up @@ -179,10 +191,6 @@ file(GLOB_RECURSE FLEXFLOW_SRC
${FLEXFLOW_ROOT}/src/*.cc)
list(REMOVE_ITEM FLEXFLOW_SRC "${FLEXFLOW_ROOT}/src/runtime/cpp_driver.cc")

file(GLOB_RECURSE FLEXFLOW_GPU_SRC
LIST_DIRECTORIES False
${FLEXFLOW_ROOT}/src/*.cu)

set(FLEXFLOW_CPP_DRV_SRC
${FLEXFLOW_ROOT}/src/runtime/cpp_driver.cc)

Expand All @@ -195,10 +203,71 @@ target_link_libraries(substitution_loader nlohmann_json::nlohmann_json)
#message("FLEXFLOW_INCLUDE_DIRS: ${FLEXFLOW_INCLUDE_DIRS}")

# compile flexflow lib
if(BUILD_SHARED_LIBS)
cuda_add_library(flexflow SHARED ${FLEXFLOW_GPU_SRC} ${FLEXFLOW_SRC} OPTIONS ${CUDA_GENCODE})
if (FF_GPU_BACKEND STREQUAL "cuda")
file(GLOB_RECURSE FLEXFLOW_GPU_SRC
LIST_DIRECTORIES False
${FLEXFLOW_ROOT}/src/*.cu)

if(BUILD_SHARED_LIBS)
cuda_add_library(flexflow SHARED ${FLEXFLOW_GPU_SRC} ${FLEXFLOW_SRC} OPTIONS ${CUDA_GENCODE})
else()
cuda_add_library(flexflow STATIC ${FLEXFLOW_GPU_SRC} ${FLEXFLOW_SRC} OPTIONS ${CUDA_GENCODE})
endif()
elseif(FF_GPU_BACKEND STREQUAL "hip_cuda" OR FF_GPU_BACKEND STREQUAL "hip_rocm")
file(GLOB_RECURSE FLEXFLOW_GPU_SRC
LIST_DIRECTORIES False
${FLEXFLOW_ROOT}/src/*.cpp)

if(BUILD_SHARED_LIBS)
add_library(flexflow SHARED ${FLEXFLOW_GPU_SRC} ${FLEXFLOW_SRC})
else()
add_library(flexflow STATIC ${FLEXFLOW_GPU_SRC} ${FLEXFLOW_SRC})
endif()

list(APPEND CMAKE_PREFIX_PATH ${ROCM_PATH}/hip ${ROCM_PATH})

find_package(hip REQUIRED)

if (FF_GPU_BACKEND STREQUAL "hip_cuda")
# The targets defined by the hip cmake config only target amd devices.
# For targeting nvidia devices, we'll make our own interface target,
# hip_device_nvidia, that includes the rocm and hip headers.
add_library(hip_device_nvidia INTERFACE)

if (NOT FF_CUDA_ARCH STREQUAL "")
target_compile_options(hip_device_nvidia INTERFACE -arch=compute_${FF_CUDA_ARCH})
endif()

target_include_directories(hip_device_nvidia SYSTEM INTERFACE ${HIP_INCLUDE_DIRS} ${ROCM_PATH}/include)
target_include_directories(hip_device_nvidia INTERFACE ${HIP_INCLUDE_DIRS} ${ROCM_PATH}/include)

# Linking cuda:
# We do not explicitly link cuda. hipcc when targeting nvidia will
# use nvcc under the hood. nvcc when used for linking will handle
# linking cuda dependencies
target_link_libraries(flexflow hip_device_nvidia)
elseif(FF_GPU_BACKEND STREQUAL "hip_rocm")
find_package(hipblas REQUIRED)
find_package(miopen REQUIRED)
# find_package(rocrand REQUIRED)
find_library(HIP_RAND_LIBRARY hiprand REQUIRED)

# The hip cmake config module defines three targets,
# hip::amdhip64, hip::host, and hip::device.
#
# hip::host and hip::device are interface targets. hip::amdhip64 is an
# imported target for libamdhip.
#
# You do not directly link to hip::amdhip64. hip::host links to hip::amdhip64
# and hip::device links to hip::host. Link to hip::host to just use hip without
# compiling any GPU code. Link to hip::device to compile the GPU device code.
#
# Docs (outdated):
# https://rocmdocs.amd.com/en/latest/Installation_Guide/Using-CMake-with-AMD-ROCm.html
target_link_libraries(flexflow hip::device roc::hipblas MIOpen ${HIP_RAND_LIBRARY})
endif()
else()
cuda_add_library(flexflow STATIC ${FLEXFLOW_GPU_SRC} ${FLEXFLOW_SRC} OPTIONS ${CUDA_GENCODE})
message(FATAL_ERROR "Unsupported FF_GPU_BACKEND for cmake: ${FF_GPU_BACKEND}")
endif()

target_include_directories(flexflow PUBLIC ${FLEXFLOW_INCLUDE_DIRS})
Expand Down Expand Up @@ -233,11 +302,11 @@ if(FF_BUILD_UNIT_TESTS)
endif()

if(FF_BUILD_SUBSTITUTION_TOOL)
add_subdirectory(src/tools/protobuf_to_json)
add_subdirectory(tools/protobuf_to_json)
endif()

if(FF_BUILD_VISUALIZATION_TOOL)
add_subdirectory(src/tools/substitutions_to_dot)
add_subdirectory(tools/substitutions_to_dot)
endif()

# Python
Expand Down
24 changes: 19 additions & 5 deletions INSTALL.md
Expand Up @@ -7,12 +7,26 @@ Clone the FlexFlow source code, and the third-party dependencies from GitHub.
git clone --recursive https://github.com/flexflow/FlexFlow.git
```

## 2. Install the Python dependencies
## 2. Install system dependencies
FlexFlow has system dependencies on cuda and/or rocm depending on which gpu backend you target. The gpu backend is configured by the cmake variable FF_GPU_BACKEND. By default, FlexFlow targets CUDA. `docker/base/Dockerfile` installs system dependencies in a standard ubuntu system.

### Targeting CUDA - `FF_GPU_BACKEND=cuda`
If you are targeting CUDA, FlexFlow requires CUDA and CUDNN to be installed. You can follow the standard nvidia installation instructions [CUDA](https://docs.nvidia.com/cuda/cuda-installation-guide-linux/index.html) and [CUDNN](https://docs.nvidia.com/deeplearning/cudnn/install-guide/index.html).

### Targeting ROCM - `FF_GPU_BACKEND=hip_rocm`
If you are targeting ROCM, FlexFlow requires a ROCM and HIP installation with a few additional packages. Note that this can be done on a system with or without an AMD GPU. You can follow the standard installation instructions [ROCM](https://docs.amd.com/bundle/ROCm-Installation-Guide-v5.3/page/Introduction_to_ROCm_Installation_Guide_for_Linux.html) and [HIP](https://docs.amd.com/bundle/HIP-Installation-Guide-v5.3/page/Introduction_to_HIP_Installation_Guide.html). When running `amdgpu-install`, install the use cases hip and rocm. You can avoid installing the kernel drivers (not necessary on systems without an AMD graphics card) with `--no-dkms` I.e. `amdgpu-install --usecase=hip,rocm --no-dkms`. Additionally, install the packages `hip-dev`, `hipblas`, `miopen-hip`, and `rocm-hip-sdk`.

See `./docker/base/Dockerfile` for an example ROCM install.

### Targeting CUDA through HIP - `FF_GPU_BACKEND=hip_cuda`
This is not currently supported.

## 3. Install the Python dependencies
If you are planning to build the Python interface, you will need to install several additional Python libraries, please check [this](https://github.com/flexflow/FlexFlow/blob/master/requirements.txt) for details. If you are only looking to use the C++ interface, you can skip to the next section.

**We recommend that you create your own `conda` environment and then install the Python dependencies, to avoid any version mismatching with your system pre-installed libraries.**

## 3. Configuring the FlexFlow build
## 4. Configuring the FlexFlow build
Before building FlexFlow, you should configure the build by editing the `config/config.linux` file. Leave it unchanged if you want to build with the default options. We recommend that you spend some time familiarizing with the available options. In particular, the main parameters are:
* `CUDA_DIR` is used to specify the directory of CUDA. It is only required when CMake can not automatically detect the installation directory of CUDA.
* `CUDNN_DIR` is used to specify the directory of CUDNN. It is only required when CUDNN is not installed in the CUDA directory.
Expand All @@ -25,7 +39,7 @@ Before building FlexFlow, you should configure the build by editing the `config/

More options are available in cmake, please run `ccmake` and search for options starting with FF.

## 4. Build FlexFlow
## 5. Build FlexFlow
You can build FlexFlow in three ways: with CMake, with Make, and with `pip`. We recommend that you use the CMake building system as it will automatically build all C++ dependencies inlcuding NCCL and Legion.

### Building FlexFlow with CMake
Expand All @@ -45,7 +59,7 @@ To build Flexflow with `pip`, run `pip install .` from the FlexFlow home directo
The Makefile we provide is mainly for development purpose, and may not be fully up to date.


## 5. Test FlexFlow
## 6. Test FlexFlow
After building FlexFlow, you can test it to ensure that the build completed without issue, and that your system is ready to run FlexFlow.

### Set the `FF_HOME` environment variable before running FlexFlow. To make it permanent, you can add the following line in ~/.bashrc.
Expand Down Expand Up @@ -76,7 +90,7 @@ For example, the AlexNet can be run as:

Size of buffers is in MBs, e.g. for an 8GB gpu `-ll:fsize 8000`

## 6. Install FlexFlow
## 7. Install FlexFlow
If you built/installed FlexFlow using `pip`, this step is not required. If you built using Make or CMake, install FlexFlow with:
```
cd build
Expand Down
14 changes: 12 additions & 2 deletions cmake/legion.cmake
Expand Up @@ -27,8 +27,18 @@ else()
endif()
message(STATUS "GASNET ROOT: $ENV{GASNet_ROOT_DIR}")
set(Legion_MAX_DIM ${FF_MAX_DIM} CACHE STRING "Maximum number of dimensions")
set(Legion_USE_CUDA ON CACHE BOOL "enable Legion_USE_CUDA")
set(Legion_CUDA_ARCH ${FF_CUDA_ARCH} CACHE STRING "Legion CUDA ARCH")
if (FF_GPU_BACKEND STREQUAL "cuda")
set(Legion_USE_CUDA ON CACHE BOOL "enable Legion_USE_CUDA" FORCE)
set(Legion_CUDA_ARCH ${FF_CUDA_ARCH} CACHE STRING "Legion CUDA ARCH" FORCE)
elseif (FF_GPU_BACKEND STREQUAL "hip_cuda" OR FF_GPU_BACKEND STREQUAL "hip_rocm")
set(Legion_USE_HIP ON CACHE BOOL "enable Legion_USE_HIP" FORCE)

if (FF_GPU_BACKEND STREQUAL "hip_cuda")
set(Legion_HIP_TARGET "CUDA" CACHE STRING "Legion_HIP_TARGET CUDA" FORCE)
elseif(FF_GPU_BACKEND STREQUAL "hip_rocm")
set(Legion_HIP_TARGET "ROCM" CACHE STRING "Legion HIP_TARGET ROCM" FORCE)
endif()
endif()
add_subdirectory(deps/legion)
set(LEGION_LIBRARY Legion)
endif()
Expand Down
52 changes: 49 additions & 3 deletions config/config.inc
@@ -1,6 +1,5 @@
#!/bin/bash

function run_cmake() {
# set CC and CXX
if [ -n "$CC" ]; then
SET_CC="-DCMAKE_C_COMPILER=${CC}"
Expand Down Expand Up @@ -41,7 +40,8 @@ fi
# set CUDA dir
if [ -n "$CUDA_DIR" ]; then
SET_CUDA="-DCUDA_PATH=${CUDA_DIR}"
SET_CUDA_LIB_PATH="CUDA_PATH=${CUDA_DIR}/lib64/stubs"
CUDA_PATH="${CUDA_DIR}/lib64/stubs"
SET_CUDA_LIB_PATH="CUDA_PATH=${CUDA_PATH}"
fi

# set cudnn dir
Expand Down Expand Up @@ -115,8 +115,54 @@ if [ -n "$FF_MAX_DIM" ]; then
SET_MAX_DIM="-DFF_MAX_DIM=${FF_MAX_DIM}"
fi

# set ROCM path
if [ -n "$ROCM_PATH" ]; then
SET_ROCM_PATH="-DROCM_PATH=${ROCM_PATH}"
fi

# set GPU backend
if [ -n "$FF_GPU_BACKEND" ]; then
SET_FF_GPU_BACKEND="-DFF_GPU_BACKEND=${FF_GPU_BACKEND}"

# cmake does not play nicely with overrides via `set()` of CMAKE_CXX_COMPILER and friends
# because it uses their values to setup the toolchain.
# see: https://gitlab.kitware.com/cmake/community/-/wikis/FAQ#how-do-i-use-a-different-compiler
#
# Ideally we would use the values internally to the cmake script, e.g. HIP_HIPCC_EXECUTABLE,
# to set these values but this is a sufficient compromise.
if [ "$FF_GPU_BACKEND" = "hip_cuda" ] || [ "$FF_GPU_BACKEND" = "hip_rocm" ]; then
if [ -n "$SET_CXX" ]; then
echo "FF_GPU_BACKEND is set to ${FF_GPU_BACKEND}. Normally we would set the compiler and linker" 1>&2
echo "to hipcc, but the compiler is already set to ${SET_CXX}". 1>&2
else
if [ "$FF_GPU_BACKEND" = "hip_cuda" ]; then
# Configuring hipcc for nvidia:
#
# The platform hipcc targets is configured by the HIP_PLATFORM env var.
# Ideally, as we could in the shell, we would call `HIP_PLATFORM=nvidia hipcc <...>`.
# However, CMAKE_CXX_COMPILER doesn't allow configuration as such. Additionally,
# cmake doesn't allow setting environment variables for target builds like make does
# with exported variables.
#
# Instead, this file configures hipcc with HIP_PLATFORM and CUDA_PATH
#
# CMAKE requires CMAKE_CXX_COMPILER exists before cmake is called, so we can't
# write out this file during build configuration.
echo "HIP_PLATFORM=nvidia CUDA_PATH=${CUDA_DIR} ${ROCM_PATH}/bin/hipcc \$@" > "$(pwd)/nvidia_hipcc"
chmod +x "$(pwd)/nvidia_hipcc"
SET_CXX="-DCMAKE_CXX_COMPILER=$(pwd)/nvidia_hipcc -DCMAKE_CXX_LINKER=$(pwd)/nvidia_hipcc"
else
SET_CXX="-DCMAKE_CXX_COMPILER=/opt/rocm/bin/hipcc -DCMAKE_CXX_LINKER=/opt/rocm/bin/hipcc"
fi
fi
fi
fi

CMAKE_FLAGS="-DCUDA_USE_STATIC_CUDA_RUNTIME=OFF ${SET_CC} ${SET_CXX} ${SET_INSTALL_DIR} ${SET_BUILD} ${SET_CUDA_ARCH} ${SET_CUDA} ${SET_CUDNN} ${SET_PYTHON} ${SET_NCCL} ${SET_GASNET} ${SET_EXAMPLES} ${SET_BUILD_UNIT_TESTS} ${SET_AVX2} ${SET_MAX_DIM} ${SET_ROCM_PATH} ${SET_FF_GPU_BACKEND}"

function run_cmake() {
SRC_LOCATION=${SRC_LOCATION:=`dirname $0`/../}
CMAKE_COMMAND="${SET_CC_FLAGS} ${SET_NVCC_FLAGS} ${SET_LD_FLAGS} ${SET_CUDA_LIB_PATH} cmake -DCUDA_USE_STATIC_CUDA_RUNTIME=OFF ${SET_CC} ${SET_CXX} ${SET_INSTALL_DIR} ${SET_BUILD} ${SET_CUDA_ARCH} ${SET_CUDA} ${SET_CUDNN} ${SET_PYTHON} ${SET_NCCL} ${SET_GASNET} ${SET_EXAMPLES} ${SET_BUILD_UNIT_TESTS} ${SET_AVX2} ${SET_MAX_DIM} $* ${SRC_LOCATION}"
CMAKE_COMMAND="${SET_CC_FLAGS} ${SET_NVCC_FLAGS} ${SET_LD_FLAGS} ${SET_CUDA_LIB_PATH} cmake ${CMAKE_FLAGS} $* ${SRC_LOCATION}"
echo $CMAKE_COMMAND
eval $CMAKE_COMMAND
}
49 changes: 32 additions & 17 deletions config/config.linux
@@ -1,7 +1,5 @@
#!/bin/bash

echo " Defaults for Linux machine"

# set the CC and CXX, usually it is not needed as cmake can detect it
# set CC and CXX to mpicc and mpic++ when enable gasnet
# CC=mpicc
Expand All @@ -18,38 +16,55 @@ echo " Defaults for Linux machine"
# set build type
BUILD_TYPE=Release

# set CUDA Arch, replace xx with your GPU architecture
#FF_CUDA_ARCH=xx
# set CUDA Arch with your GPU architecture
FF_CUDA_ARCH=${FF_CUDA_ARCH:-""}

# set CUDNN dir in case cmake cannot autodetect a path
CUDNN_DIR=/usr/local/cuda
CUDNN_DIR=${CUDNN_DIR:-"/usr/local/cuda"}

# set CUDA dir in case cmake cannot autodetect a path
CUDA_DIR=/usr/local/cuda
CUDA_DIR=${CUDA_DIR:-"/usr/local/cuda"}

# enable Python
FF_USE_PYTHON=ON

# enable NCCL
FF_USE_NCCL=ON
FF_USE_PYTHON=${FF_USE_PYTHON:-ON}

# enable GASNet
FF_USE_GASNET=OFF
FF_USE_GASNET=${FF_USE_GASNET:-OFF}

# select GASNET conduit
FF_GASNET_CONDUIT=ibv
FF_GASNET_CONDUIT=${FF_GASNET_CONDUIT:-ibv}

# build C++ examples
FF_BUILD_ALL_EXAMPLES=OFF
FF_BUILD_ALL_EXAMPLES=${FF_BUILD_ALL_EXAMPLES:-OFF}

# build C++ unit tests
FF_BUILD_UNIT_TESTS=OFF
FF_BUILD_UNIT_TESTS=${FF_BUILD_UNIT_TESTS:-OFF}

# enable avx2
FF_USE_AVX2=OFF
FF_USE_AVX2=${FF_USE_AVX2:-OFF}

# set MAX_DIM
FF_MAX_DIM=5
FF_MAX_DIM=${FF_MAX_DIM:-5}

# set ROCM path
ROCM_PATH=${ROCM_PATH:-"/opt/rocm"}

# set GPU backend
FF_GPU_BACKEND=${FF_GPU_BACKEND:-cuda}

if [ "$FF_GPU_BACKEND" = "cuda" ] || [ "$FF_GPU_BACKEND" = "hip_cuda" ]; then
# enable NCCL
FF_USE_NCCL=${FF_USE_NCCL:-ON}
else
FF_USE_NCCL=OFF
fi

. $(dirname $0)/config.inc
run_cmake $*

if [ -n "$1" ]; then
# You can pass the name of the variable you want to print out as $1. This
# is used in the python setup script to get the cmake config
echo "${!1}"
else
run_cmake $*
fi

0 comments on commit 81304c8

Please sign in to comment.