Skip to content

Commit

Permalink
Adds infra to use nvidia dependencies from pypi and cleans up patches (
Browse files Browse the repository at this point in the history
…pytorch#1196)

* Installs NCCL from redist, uses system NCCL, and adds pypi RPATH

* Cleans up nvrtc patches and adds it using main script

* Fixes typo

* Adds more dependencies and builds torch with dynamic linking

* NCCL dirs have to be specified. Otherwise picks up different version

* Handles 11.8

* Adds echo message for nccl 2.15
  • Loading branch information
syed-ahmed committed Dec 19, 2022
1 parent ef93e89 commit ee59264
Show file tree
Hide file tree
Showing 3 changed files with 44 additions and 80 deletions.
24 changes: 22 additions & 2 deletions common/install_cuda.sh
Expand Up @@ -24,7 +24,7 @@ function install_116 {
}

function install_117 {
echo "Installing CUDA 11.7 and CuDNN 8.5"
echo "Installing CUDA 11.7 and CuDNN 8.5 and NCCL 2.14"
rm -rf /usr/local/cuda-11.7 /usr/local/cuda
# install CUDA 11.7.0 in the same container
wget -q https://developer.download.nvidia.com/compute/cuda/11.7.0/local_installers/cuda_11.7.0_515.43.04_linux.run
Expand All @@ -42,10 +42,20 @@ function install_117 {
cd ..
rm -rf tmp_cudnn
ldconfig

# NCCL license: https://docs.nvidia.com/deeplearning/nccl/#licenses
mkdir tmp_nccl && cd tmp_nccl
wget -q https://developer.download.nvidia.com/compute/redist/nccl/v2.14/nccl_2.14.3-1+cuda11.7_x86_64.txz
tar xf nccl_2.14.3-1+cuda11.7_x86_64.txz
cp -a nccl_2.14.3-1+cuda11.7_x86_64/include/* /usr/local/cuda/include/
cp -a nccl_2.14.3-1+cuda11.7_x86_64/lib/* /usr/local/cuda/lib64/
cd ..
rm -rf tmp_nccl
ldconfig
}

function install_118 {
echo "Installing CUDA 11.8 and cuDNN 8.5"
echo "Installing CUDA 11.8 and cuDNN 8.5 and NCCL 2.15"
rm -rf /usr/local/cuda-11.8 /usr/local/cuda
# install CUDA 11.8.0 in the same container
wget -q https://developer.download.nvidia.com/compute/cuda/11.8.0/local_installers/cuda_11.8.0_520.61.05_linux.run
Expand All @@ -63,6 +73,16 @@ function install_118 {
cd ..
rm -rf tmp_cudnn
ldconfig

# NCCL license: https://docs.nvidia.com/deeplearning/nccl/#licenses
mkdir tmp_nccl && cd tmp_nccl
wget -q https://developer.download.nvidia.com/compute/redist/nccl/v2.15.5/nccl_2.15.5-1+cuda11.8_x86_64.txz
tar xf nccl_2.15.5-1+cuda11.8_x86_64.txz
cp -a nccl_2.15.5-1+cuda11.8_x86_64/include/* /usr/local/cuda/include/
cp -a nccl_2.15.5-1+cuda11.8_x86_64/lib/* /usr/local/cuda/lib64/
cd ..
rm -rf tmp_nccl
ldconfig
}

function prune_116 {
Expand Down
86 changes: 22 additions & 64 deletions manywheel/build_cuda.sh
Expand Up @@ -142,22 +142,14 @@ DEPS_SONAME=(
"libcublasLt.so.11"
"libgomp.so.1"
)
elif [[ $CUDA_VERSION == "11.7" ]]; then
elif [[ $CUDA_VERSION == "11.7" || $CUDA_VERSION == "11.8" ]]; then
export USE_STATIC_CUDNN=0
# Try parallelizing nvcc as well
export TORCH_NVCC_FLAGS="-Xfatbin -compress-all --threads 2"
DEPS_LIST=(
"/usr/local/cuda/lib64/libcudart.so.11.0"
"/usr/local/cuda/lib64/libnvToolsExt.so.1"
"/usr/local/cuda/lib64/libnvrtc.so.11.2" # this is not a mistake for 11.7, it links to 11.7.50
"/usr/local/cuda/lib64/libnvrtc-builtins.so.11.7"
"$LIBGOMP_PATH"
)
DEPS_SONAME=(
"libcudart.so.11.0"
"libnvToolsExt.so.1"
"libnvrtc.so.11.2"
"libnvrtc-builtins.so.11.7"
"libgomp.so.1"
)

Expand All @@ -173,6 +165,10 @@ elif [[ $CUDA_VERSION == "11.7" ]]; then
"/usr/local/cuda/lib64/libcudnn.so.8"
"/usr/local/cuda/lib64/libcublas.so.11"
"/usr/local/cuda/lib64/libcublasLt.so.11"
"/usr/local/cuda/lib64/libnvrtc.so.11.2" # this is not a mistake for 11.7, it links to 11.7.50
"/usr/local/cuda/lib64/libnvrtc-builtins.so.11.7"
"/usr/local/cuda/lib64/libcudart.so.11.0"
"/usr/local/cuda/lib64/libnvToolsExt.so.1"
)
DEPS_SONAME+=(
"libcudnn_adv_infer.so.8"
Expand All @@ -186,69 +182,31 @@ elif [[ $CUDA_VERSION == "11.7" ]]; then
"libcublasLt.so.11"
)
else
echo "Using cudnn and cublas from pypi."
CUDA_RPATHS=(
'$ORIGIN/../../nvidia/cublas/lib'
'$ORIGIN/../../nvidia/cudnn/lib'
)
CUDA_RPATHS=$(IFS=: ; echo "${CUDA_RPATHS[*]}")
export C_SO_RPATH=$CUDA_RPATHS':$ORIGIN:$ORIGIN/lib'
export LIB_SO_RPATH=$CUDA_RPATHS':$ORIGIN'
export FORCE_RPATH="--force-rpath"
fi
elif [[ $CUDA_VERSION == "11.8" ]]; then
export USE_STATIC_CUDNN=0
# Try parallelizing nvcc as well
export TORCH_NVCC_FLAGS="-Xfatbin -compress-all --threads 2"
DEPS_LIST=(
"/usr/local/cuda/lib64/libcudart.so.11.0"
"/usr/local/cuda/lib64/libnvToolsExt.so.1"
"/usr/local/cuda/lib64/libnvrtc.so.11.2" # this is not a mistake for 11.8, it links to 11.8.89
"/usr/local/cuda/lib64/libnvrtc-builtins.so.11.8"
"$LIBGOMP_PATH"
)
DEPS_SONAME=(
"libcudart.so.11.0"
"libnvToolsExt.so.1"
"libnvrtc.so.11.2"
"libnvrtc-builtins.so.11.8"
"libgomp.so.1"
)

if [[ -z "$PYTORCH_EXTRA_INSTALL_REQUIREMENTS" ]]; then
echo "Bundling with cudnn and cublas."
DEPS_LIST+=(
"/usr/local/cuda/lib64/libcudnn_adv_infer.so.8"
"/usr/local/cuda/lib64/libcudnn_adv_train.so.8"
"/usr/local/cuda/lib64/libcudnn_cnn_infer.so.8"
"/usr/local/cuda/lib64/libcudnn_cnn_train.so.8"
"/usr/local/cuda/lib64/libcudnn_ops_infer.so.8"
"/usr/local/cuda/lib64/libcudnn_ops_train.so.8"
"/usr/local/cuda/lib64/libcudnn.so.8"
"/usr/local/cuda/lib64/libcublas.so.11"
"/usr/local/cuda/lib64/libcublasLt.so.11"
)
DEPS_SONAME+=(
"libcudnn_adv_infer.so.8"
"libcudnn_adv_train.so.8"
"libcudnn_cnn_infer.so.8"
"libcudnn_cnn_train.so.8"
"libcudnn_ops_infer.so.8"
"libcudnn_ops_train.so.8"
"libcudnn.so.8"
"libcublas.so.11"
"libcublasLt.so.11"
)
else
echo "Using cudnn and cublas from pypi."
echo "Using nvidia libs from pypi."
CUDA_RPATHS=(
'$ORIGIN/../../nvidia/cublas/lib'
'$ORIGIN/../../nvidia/cuda_cupti/lib'
'$ORIGIN/../../nvidia/cuda_nvrtc/lib'
'$ORIGIN/../../nvidia/cuda_runtime/lib'
'$ORIGIN/../../nvidia/cudnn/lib'
'$ORIGIN/../../nvidia/cufft/lib'
'$ORIGIN/../../nvidia/curand/lib'
'$ORIGIN/../../nvidia/cusolver/lib'
'$ORIGIN/../../nvidia/cusparse/lib'
'$ORIGIN/../../nvidia/nccl/lib'
'$ORIGIN/../../nvidia/nvtx/lib'
)
CUDA_RPATHS=$(IFS=: ; echo "${CUDA_RPATHS[*]}")
export C_SO_RPATH=$CUDA_RPATHS':$ORIGIN:$ORIGIN/lib'
export LIB_SO_RPATH=$CUDA_RPATHS':$ORIGIN'
export FORCE_RPATH="--force-rpath"
export USE_STATIC_NCCL=0
export USE_SYSTEM_NCCL=1
export ATEN_STATIC_CUDA=0
export USE_CUDA_STATIC_LINK=0
export USE_CUPTI_SO=1
export NCCL_INCLUDE_DIR="/usr/local/cuda/include/"
export NCCL_LIB_DIR="/usr/local/cuda/lib64/"
fi
else
echo "Unknown cuda version $CUDA_VERSION"
Expand Down
14 changes: 0 additions & 14 deletions release/pypi/prep_binary_for_pypi.sh
Expand Up @@ -56,22 +56,8 @@ for whl_file in "$@"; do
if [[ $whl_file == *"with.pypi.cudnn"* ]]; then
rm -rf "${whl_dir}/caffe2"
rm -rf "${whl_dir}"/torch/lib/libnvrtc*
sed -i -e "s/Requires-Dist: nvidia-cuda-runtime-cu11/Requires-Dist: nvidia-cuda-runtime-cu11 (==11.7.99)/" "${whl_dir}"/*/METADATA
sed -i -e "/^Requires-Dist: nvidia-cublas-cu11 (==11.10.3.66).*/a Requires-Dist: nvidia-cuda-nvrtc-cu11 (==11.7.99) ; platform_system == \"Linux\"" "${whl_dir}"/*/METADATA

sed -i -e "s/-with-pypi-cudnn//g" "${whl_dir}/torch/version.py"
find "${whl_dir}/torch/" -maxdepth 1 -type f -name "*.so*" | while read sofile; do
patchelf --set-rpath '$ORIGIN/../../nvidia/cublas/lib:$ORIGIN/../../nvidia/cudnn/lib:$ORIGIN/../../nvidia/cuda_nvrtc/lib:$ORIGIN:$ORIGIN/lib' \
--force-rpath $sofile
patchelf --print-rpath $sofile
done

find "${whl_dir}/torch/lib" -maxdepth 1 -type f -name "*.so*" | while read sofile; do
patchelf --set-rpath '$ORIGIN/../../nvidia/cublas/lib:$ORIGIN/../../nvidia/cudnn/lib:$ORIGIN/../../nvidia/cuda_nvrtc/lib:$ORIGIN' \
--force-rpath $sofile
patchelf --print-rpath $sofile
done
patchelf --replace-needed libnvrtc-d833c4f3.so.11.2 libnvrtc.so.11.2 "${whl_dir}/torch/lib/libcaffe2_nvrtc.so"
fi

find "${dist_info_folder}" -type f -exec sed -i "s!${version_with_suffix}!${version_no_suffix}!" {} \;
Expand Down

0 comments on commit ee59264

Please sign in to comment.