Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Build for hip gpu backends #392

Merged
merged 15 commits into from Nov 8, 2022
Merged
Show file tree
Hide file tree
Changes from 11 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
2 changes: 2 additions & 0 deletions .dockerignore
@@ -0,0 +1,2 @@
# Ignore all folders which start with "build"
/build*/
91 changes: 80 additions & 11 deletions CMakeLists.txt
Expand Up @@ -50,6 +50,10 @@ set(FF_CUDA_ARCH "" CACHE STRING "Target CUDA Arch")
# option for nccl
option(FF_USE_NCCL "Run FlexFlow with NCCL" OFF)

if (FF_GPU_BACKEND STREQUAL "hip_rocm" AND FF_USE_NCCL STREQUAL "ON")
message(FATAL_ERROR "NCCL: ON for FF_GPU_BACKEND: hip_rocm. hip_rocm backend must have NCCL disabled.")
endif()

# option for avx2
option(FF_USE_AVX2 "Run FlexFlow with AVX2" OFF)

Expand Down Expand Up @@ -78,14 +82,22 @@ add_compile_options(${CC_FLAGS})
set(CUDA_NVCC_FLAGS ${CUDA_NVCC_FLAGS} ${NVCC_FLAGS})
link_libraries(${LD_FLAGS})

if(FF_GPU_BACKEND STREQUAL "hip_cuda" OR FF_GPU_BACKEND STREQUAL "hip_rocm")
set(ROCM_PATH "/opt/rocm" CACHE STRING "Default ROCM installation directory.")
endif()

# ZLIB
include(zlib)

# CUDA
include(cuda)
if (FF_GPU_BACKEND STREQUAL "cuda" OR FF_GPU_BACKEND STREQUAL "hip_cuda")
include(cuda)
endif()

# CUDNN
include(cudnn)
if (FF_GPU_BACKEND STREQUAL "cuda" OR FF_GPU_BACKEND STREQUAL "hip_cuda")
include(cudnn)
endif()

# NCCL
if(FF_USE_NCCL)
Expand Down Expand Up @@ -179,10 +191,6 @@ file(GLOB_RECURSE FLEXFLOW_SRC
${FLEXFLOW_ROOT}/src/*.cc)
list(REMOVE_ITEM FLEXFLOW_SRC "${FLEXFLOW_ROOT}/src/runtime/cpp_driver.cc")

file(GLOB_RECURSE FLEXFLOW_GPU_SRC
LIST_DIRECTORIES False
${FLEXFLOW_ROOT}/src/*.cu)

set(FLEXFLOW_CPP_DRV_SRC
${FLEXFLOW_ROOT}/src/runtime/cpp_driver.cc)

Expand All @@ -195,10 +203,71 @@ target_link_libraries(substitution_loader nlohmann_json::nlohmann_json)
#message("FLEXFLOW_INCLUDE_DIRS: ${FLEXFLOW_INCLUDE_DIRS}")

# compile flexflow lib
if(BUILD_SHARED_LIBS)
cuda_add_library(flexflow SHARED ${FLEXFLOW_GPU_SRC} ${FLEXFLOW_SRC} OPTIONS ${CUDA_GENCODE})
if (FF_GPU_BACKEND STREQUAL "cuda")
file(GLOB_RECURSE FLEXFLOW_GPU_SRC
LIST_DIRECTORIES False
${FLEXFLOW_ROOT}/src/*.cu)

if(BUILD_SHARED_LIBS)
cuda_add_library(flexflow SHARED ${FLEXFLOW_GPU_SRC} ${FLEXFLOW_SRC} OPTIONS ${CUDA_GENCODE})
else()
cuda_add_library(flexflow STATIC ${FLEXFLOW_GPU_SRC} ${FLEXFLOW_SRC} OPTIONS ${CUDA_GENCODE})
endif()
elseif(FF_GPU_BACKEND STREQUAL "hip_cuda" OR FF_GPU_BACKEND STREQUAL "hip_rocm")
file(GLOB_RECURSE FLEXFLOW_GPU_SRC
LIST_DIRECTORIES False
${FLEXFLOW_ROOT}/src/*.cpp)

if(BUILD_SHARED_LIBS)
add_library(flexflow SHARED ${FLEXFLOW_GPU_SRC} ${FLEXFLOW_SRC})
else()
add_library(flexflow STATIC ${FLEXFLOW_GPU_SRC} ${FLEXFLOW_SRC})
endif()

list(APPEND CMAKE_PREFIX_PATH ${ROCM_PATH}/hip ${ROCM_PATH})

find_package(hip REQUIRED)

if (FF_GPU_BACKEND STREQUAL "hip_cuda")
# The targets defined by the hip cmake config only target amd devices.
# For targeting nvidia devices, we'll make our own interface target,
# hip_device_nvidia, that includes the rocm and hip headers.
add_library(hip_device_nvidia INTERFACE)

if (NOT FF_CUDA_ARCH STREQUAL "")
target_compile_options(hip_device_nvidia INTERFACE -arch=compute_${FF_CUDA_ARCH})
endif()

target_include_directories(hip_device_nvidia SYSTEM INTERFACE ${HIP_INCLUDE_DIRS} ${ROCM_PATH}/include)
target_include_directories(hip_device_nvidia INTERFACE ${HIP_INCLUDE_DIRS} ${ROCM_PATH}/include)

# Linking cuda:
# We do not explicitly link cuda. hipcc when targeting nvidia will
# use nvcc under the hood. nvcc when used for linking will handle
# linking cuda dependencies
target_link_libraries(flexflow hip_device_nvidia)
elseif(FF_GPU_BACKEND STREQUAL "hip_rocm")
find_package(hipblas REQUIRED)
williamberman marked this conversation as resolved.
Show resolved Hide resolved
find_package(miopen REQUIRED)
# find_package(rocrand REQUIRED)
find_library(HIP_RAND_LIBRARY hiprand REQUIRED)

# The hip cmake config module defines three targets,
# hip::amdhip64, hip::host, and hip::device.
#
# hip::host and hip::device are interface targets. hip::amdhip64 is an
# imported target for libamdhip.
#
# You do not directly link to hip::amdhip64. hip::host links to hip::amdhip64
# and hip::device links to hip::host. Link to hip::host to just use hip without
# compiling any GPU code. Link to hip::device to compile the GPU device code.
#
# Docs (outdated):
# https://rocmdocs.amd.com/en/latest/Installation_Guide/Using-CMake-with-AMD-ROCm.html
target_link_libraries(flexflow hip::device roc::hipblas MIOpen ${HIP_RAND_LIBRARY})
endif()
else()
cuda_add_library(flexflow STATIC ${FLEXFLOW_GPU_SRC} ${FLEXFLOW_SRC} OPTIONS ${CUDA_GENCODE})
message(FATAL_ERROR "Unsupported FF_GPU_BACKEND for cmake: ${FF_GPU_BACKEND}")
endif()

target_include_directories(flexflow PUBLIC ${FLEXFLOW_INCLUDE_DIRS})
Expand Down Expand Up @@ -233,11 +302,11 @@ if(FF_BUILD_UNIT_TESTS)
endif()

if(FF_BUILD_SUBSTITUTION_TOOL)
add_subdirectory(src/tools/protobuf_to_json)
add_subdirectory(tools/protobuf_to_json)
endif()

if(FF_BUILD_VISUALIZATION_TOOL)
add_subdirectory(src/tools/substitutions_to_dot)
add_subdirectory(tools/substitutions_to_dot)
endif()

# Python
Expand Down
24 changes: 19 additions & 5 deletions INSTALL.md
Expand Up @@ -7,12 +7,26 @@ Clone the FlexFlow source code, and the third-party dependencies from GitHub.
git clone --recursive https://github.com/flexflow/FlexFlow.git
```

## 2. Install the Python dependencies
## 2. Install system dependencies
FlexFlow has system dependencies on cuda and/or rocm depending on which gpu backend you target. The gpu backend is configured by the cmake variable FF_GPU_BACKEND. By default, FlexFlow targets CUDA. `docker/base/Dockerfile` installs system dependencies in a standard ubuntu system.

### Targeting CUDA - `FF_GPU_BACKEND=cuda`
If you are targeting CUDA, FlexFlow requires CUDA and CUDNN to be installed. You can follow the standard nvidia installation instructions [CUDA](https://docs.nvidia.com/cuda/cuda-installation-guide-linux/index.html) and [CUDNN](https://docs.nvidia.com/deeplearning/cudnn/install-guide/index.html).

### Targeting ROCM - `FF_GPU_BACKEND=hip_rocm`
If you are targeting ROCM, FlexFlow requires a ROCM and HIP installation with a few additional packages. Note that this can be done on a system with or without an AMD GPU. You can follow the standard installation instructions [ROCM](https://docs.amd.com/bundle/ROCm-Installation-Guide-v5.3/page/Introduction_to_ROCm_Installation_Guide_for_Linux.html) and [HIP](https://docs.amd.com/bundle/HIP-Installation-Guide-v5.3/page/Introduction_to_HIP_Installation_Guide.html). When running `amdgpu-install`, install the use cases hip and rocm. You can avoid installing the kernel drivers (not necessary on systems without an AMD graphics card) with `--no-dkms` I.e. `amdgpu-install --usecase=hip,rocm --no-dkms`. Additionally, install the packages `hip-dev`, `hipblas`, `miopen-hip`, and `rocm-hip-sdk`.

See `./docker/base/Dockerfile` for an example ROCM install.

### Targeting CUDA through HIP - `FF_GPU_BACKEND=hip_cuda`
Follow both the CUDA and ROCM instructions.

## 3. Install the Python dependencies
If you are planning to build the Python interface, you will need to install several additional Python libraries, please check [this](https://github.com/flexflow/FlexFlow/blob/master/requirements.txt) for details. If you are only looking to use the C++ interface, you can skip to the next section.

**We recommend that you create your own `conda` environment and then install the Python dependencies, to avoid any version mismatching with your system pre-installed libraries.**

## 3. Configuring the FlexFlow build
## 4. Configuring the FlexFlow build
Before building FlexFlow, you should configure the build by editing the `config/config.linux` file. Leave it unchanged if you want to build with the default options. We recommend that you spend some time familiarizing with the available options. In particular, the main parameters are:
* `CUDA_DIR` is used to specify the directory of CUDA. It is only required when CMake can not automatically detect the installation directory of CUDA.
* `CUDNN_DIR` is used to specify the directory of CUDNN. It is only required when CUDNN is not installed in the CUDA directory.
Expand All @@ -25,7 +39,7 @@ Before building FlexFlow, you should configure the build by editing the `config/

More options are available in cmake, please run `ccmake` and search for options starting with FF.

## 4. Build FlexFlow
## 5. Build FlexFlow
You can build FlexFlow in three ways: with CMake, with Make, and with `pip`. We recommend that you use the CMake building system as it will automatically build all C++ dependencies inlcuding NCCL and Legion.

### Building FlexFlow with CMake
Expand All @@ -45,7 +59,7 @@ To build Flexflow with `pip`, run `pip install .` from the FlexFlow home directo
The Makefile we provide is mainly for development purpose, and may not be fully up to date.


## 5. Test FlexFlow
## 6. Test FlexFlow
After building FlexFlow, you can test it to ensure that the build completed without issue, and that your system is ready to run FlexFlow.

### Set the `FF_HOME` environment variable before running FlexFlow. To make it permanent, you can add the following line in ~/.bashrc.
Expand Down Expand Up @@ -76,7 +90,7 @@ For example, the AlexNet can be run as:

Size of buffers is in MBs, e.g. for an 8GB gpu `-ll:fsize 8000`

## 6. Install FlexFlow
## 7. Install FlexFlow
If you built/installed FlexFlow using `pip`, this step is not required. If you built using Make or CMake, install FlexFlow with:
```
cd build
Expand Down
14 changes: 12 additions & 2 deletions cmake/legion.cmake
Expand Up @@ -27,8 +27,18 @@ else()
endif()
message(STATUS "GASNET ROOT: $ENV{GASNet_ROOT_DIR}")
set(Legion_MAX_DIM ${FF_MAX_DIM} CACHE STRING "Maximum number of dimensions")
set(Legion_USE_CUDA ON CACHE BOOL "enable Legion_USE_CUDA")
set(Legion_CUDA_ARCH ${FF_CUDA_ARCH} CACHE STRING "Legion CUDA ARCH")
if (FF_GPU_BACKEND STREQUAL "cuda")
set(Legion_USE_CUDA ON CACHE BOOL "enable Legion_USE_CUDA" FORCE)
set(Legion_CUDA_ARCH ${FF_CUDA_ARCH} CACHE STRING "Legion CUDA ARCH" FORCE)
elseif (FF_GPU_BACKEND STREQUAL "hip_cuda" OR FF_GPU_BACKEND STREQUAL "hip_rocm")
set(Legion_USE_HIP ON CACHE BOOL "enable Legion_USE_HIP" FORCE)

if (FF_GPU_BACKEND STREQUAL "hip_cuda")
set(Legion_HIP_TARGET "CUDA" CACHE STRING "Legion_HIP_TARGET CUDA" FORCE)
elseif(FF_GPU_BACKEND STREQUAL "hip_rocm")
set(Legion_HIP_TARGET "ROCM" CACHE STRING "Legion HIP_TARGET ROCM" FORCE)
endif()
endif()
add_subdirectory(deps/legion)
set(LEGION_LIBRARY Legion)
endif()
Expand Down
45 changes: 44 additions & 1 deletion config/config.inc
Expand Up @@ -115,8 +115,51 @@ if [ -n "$FF_MAX_DIM" ]; then
SET_MAX_DIM="-DFF_MAX_DIM=${FF_MAX_DIM}"
fi

# set ROCM path
if [ -n "$ROCM_PATH" ]; then
lockshaw marked this conversation as resolved.
Show resolved Hide resolved
SET_ROCM_PATH="-DROCM_PATH=${ROCM_PATH}"
fi

# set GPU backend
if [ -n "$FF_GPU_BACKEND" ]; then
SET_FF_GPU_BACKEND="-DFF_GPU_BACKEND=${FF_GPU_BACKEND}"

# cmake does not play nicely with overrides via `set()` of CMAKE_CXX_COMPILER and friends
# because it uses their values to setup the toolchain.
# see: https://gitlab.kitware.com/cmake/community/-/wikis/FAQ#how-do-i-use-a-different-compiler
#
# Ideally we would use the values internally to the cmake script, e.g. HIP_HIPCC_EXECUTABLE,
# to set these values but this is a sufficient compromise.
if [ "$FF_GPU_BACKEND" = "hip_cuda" ] || [ "$FF_GPU_BACKEND" = "hip_rocm" ]; then
if [ -n "$SET_CXX" ]; then
echo "FF_GPU_BACKEND is set to ${FF_GPU_BACKEND}. Normally we would set the compiler and linker"
echo "to hipcc, but the compiler is already set to ${SET_CXX}".
else
if [ "$FF_GPU_BACKEND" = "hip_cuda" ]; then
# Configuring hipcc for nvidia:
#
# The platform hipcc targets is configured by the HIP_PLATFORM env var.
# Ideally, as we could in the shell, we would call `HIP_PLATFORM=nvidia hipcc <...>`.
# However, CMAKE_CXX_COMPILER doesn't allow configuration as such. Additionally,
# cmake doesn't allow setting environment variables for target builds like make does
# with exported variables.
#
# Instead, this file configures hipcc with HIP_PLATFORM and CUDA_PATH
#
# CMAKE requires CMAKE_CXX_COMPILER exists before cmake is called, so we can't
# write out this file during build configuration.
echo "HIP_PLATFORM=nvidia CUDA_PATH=${CUDA_DIR} ${ROCM_PATH}/bin/hipcc \$@" > "$(pwd)/nvidia_hipcc"
chmod +x "$(pwd)/nvidia_hipcc"
SET_CXX="-DCMAKE_CXX_COMPILER=$(pwd)/nvidia_hipcc -DCMAKE_CXX_LINKER=$(pwd)/nvidia_hipcc"
else
SET_CXX="-DCMAKE_CXX_COMPILER=/opt/rocm/bin/hipcc -DCMAKE_CXX_LINKER=/opt/rocm/bin/hipcc"
fi
fi
fi
fi

SRC_LOCATION=${SRC_LOCATION:=`dirname $0`/../}
CMAKE_COMMAND="${SET_CC_FLAGS} ${SET_NVCC_FLAGS} ${SET_LD_FLAGS} ${SET_CUDA_LIB_PATH} cmake -DCUDA_USE_STATIC_CUDA_RUNTIME=OFF ${SET_CC} ${SET_CXX} ${SET_INSTALL_DIR} ${SET_BUILD} ${SET_CUDA_ARCH} ${SET_CUDA} ${SET_CUDNN} ${SET_PYTHON} ${SET_NCCL} ${SET_GASNET} ${SET_EXAMPLES} ${SET_BUILD_UNIT_TESTS} ${SET_AVX2} ${SET_MAX_DIM} $* ${SRC_LOCATION}"
CMAKE_COMMAND="${SET_CC_FLAGS} ${SET_NVCC_FLAGS} ${SET_LD_FLAGS} ${SET_CUDA_LIB_PATH} cmake -DCUDA_USE_STATIC_CUDA_RUNTIME=OFF ${SET_CC} ${SET_CXX} ${SET_INSTALL_DIR} ${SET_BUILD} ${SET_CUDA_ARCH} ${SET_CUDA} ${SET_CUDNN} ${SET_PYTHON} ${SET_NCCL} ${SET_GASNET} ${SET_EXAMPLES} ${SET_BUILD_UNIT_TESTS} ${SET_AVX2} ${SET_MAX_DIM} ${SET_ROCM_PATH} ${SET_FF_GPU_BACKEND} $* ${SRC_LOCATION}"
echo $CMAKE_COMMAND
eval $CMAKE_COMMAND
}
38 changes: 24 additions & 14 deletions config/config.linux
Expand Up @@ -18,38 +18,48 @@ echo " Defaults for Linux machine"
# set build type
BUILD_TYPE=Release

# set CUDA Arch, replace xx with your GPU architecture
#FF_CUDA_ARCH=xx
# set CUDA Arch with your GPU architecture
FF_CUDA_ARCH=${FF_CUDA_ARCH:-""}

# set CUDNN dir in case cmake cannot autodetect a path
CUDNN_DIR=/usr/local/cuda
CUDNN_DIR=${CUDNN_DIR:-"/usr/local/cuda"}

# set CUDA dir in case cmake cannot autodetect a path
CUDA_DIR=/usr/local/cuda
CUDA_DIR=${CUDA_DIR:-"/usr/local/cuda"}

# enable Python
FF_USE_PYTHON=ON

# enable NCCL
FF_USE_NCCL=ON
FF_USE_PYTHON=${FF_USE_PYTHON:-ON}

# enable GASNet
FF_USE_GASNET=OFF
FF_USE_GASNET=${FF_USE_GASNET:-OFF}

# select GASNET conduit
FF_GASNET_CONDUIT=ibv
FF_GASNET_CONDUIT=${FF_GASNET_CONDUIT:-ibv}

# build C++ examples
FF_BUILD_ALL_EXAMPLES=OFF
FF_BUILD_ALL_EXAMPLES=${FF_BUILD_ALL_EXAMPLES:-OFF}

# build C++ unit tests
FF_BUILD_UNIT_TESTS=OFF
FF_BUILD_UNIT_TESTS=${FF_BUILD_UNIT_TESTS:-OFF}

# enable avx2
FF_USE_AVX2=OFF
FF_USE_AVX2=${FF_USE_AVX2:-OFF}

# set MAX_DIM
FF_MAX_DIM=5
FF_MAX_DIM=${FF_MAX_DIM:-5}

# set ROCM path
ROCM_PATH=${ROCM_PATH:-"/opt/rocm"}

# set GPU backend
FF_GPU_BACKEND=${FF_GPU_BACKEND:-cuda}

if [ "$FF_GPU_BACKEND" = "cuda" ] || [ "$FF_GPU_BACKEND" = "hip_cuda" ]; then
# enable NCCL
FF_USE_NCCL=${FF_USE_NCCL:-ON}
else
FF_USE_NCCL=OFF
fi

. $(dirname $0)/config.inc
run_cmake $*
3 changes: 3 additions & 0 deletions docker/README.md
Expand Up @@ -14,3 +14,6 @@ You will need a machine with a NVIDIA GPU, with drivers installed. You will also
```
./run.sh base
```

## Configuring the docker build for different GPU backends
The container can be built for different GPU backends by setting the `FF_GPU_BACKEND` env var. By default, the container is built to target cuda. See the corresponding cmake variable `FF_GPU_BACKEND` for available values.