/
Dockerfile.nvidia
173 lines (146 loc) · 7.8 KB
/
Dockerfile.nvidia
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
##############################################################
# This Dockerfile contains the NVidia HPC SDK (nvc, cuda, OpenMPI) for Devito
##############################################################
ARG ver
ARG arch="nvc"
########################################################################
# Build base image with apt setup and common env
########################################################################
FROM ubuntu:22.04 as sdk-base
ENV DEBIAN_FRONTEND noninteractive
# Install python
RUN apt-get update && \
apt-get install -y -q gpg apt-utils curl wget vim libnuma-dev tmux numactl \
dh-autoreconf python3-venv python3-dev python3-pip
# nodesource: nvdashboard requires nodejs>=10
RUN curl https://developer.download.nvidia.com/hpc-sdk/ubuntu/DEB-GPG-KEY-NVIDIA-HPC-SDK | gpg --yes --dearmor -o /usr/share/keyrings/nvidia-hpcsdk-archive-keyring.gpg
RUN echo 'deb [trusted=yes, signed-by=/usr/share/keyrings/nvidia-hpcsdk-archive-keyring.gpg] https://developer.download.nvidia.com/hpc-sdk/ubuntu/amd64 /' | tee /etc/apt/sources.list.d/nvhpc.list
RUN apt-key update *&& apt-get update -y
# Install tmpi
RUN curl https://raw.githubusercontent.com/Azrael3000/tmpi/master/tmpi -o /usr/local/bin/tmpi
# Install nvhpc. `nvhpc` is the alias for the latest avaialble version
ARG ver=nvhpc
# We use the standard apt-get for the default latest nvhpc. For earlier version, apt has a bug that it will always
# install the latest nvhpc-x-y no matter which version nvhpc-x-z is requested which would double (extra 10Gb) the size of the image.
# So for specific version we directly download the specific deb and install it.
RUN if [ "$ver" = "nvhpc" ]; then \
apt-get install -y -q --allow-unauthenticated ${ver}; \
else \
export year=$(echo $ver | cut -d "-" -f 2) && export minor=$(echo $ver | cut -d "-" -f 3) && \
wget https://developer.download.nvidia.com/hpc-sdk/ubuntu/amd64/nvhpc_${year}.${minor}_amd64.deb && \
apt-get install --allow-unauthenticated -y -q ./nvhpc_${year}.${minor}_amd64.deb; \
fi;
# Nodejs https://github.com/nodesource/distributions
RUN curl -fsSL https://deb.nodesource.com/gpgkey/nodesource-repo.gpg.key | gpg --yes --dearmor -o /etc/apt/keyrings/nodesource.gpg
RUN echo "deb [signed-by=/etc/apt/keyrings/nodesource.gpg] https://deb.nodesource.com/node_18.x nodistro main" | tee /etc/apt/sources.list.d/nodesource.list
RUN apt-get update && apt-get install -y -q \
liblapack-dev libblas-dev \
libibverbs-dev libmlx4-1 libmlx5-1 ibutils \
# Devito Jupyter Notebooks and Ux experience
nodejs ffmpeg gcc-offload-nvptx \
texlive-latex-extra texlive-fonts-recommended dvipng cm-super
# MPI_VER options 3,4,HPCX
ARG MPI_VER=HPCX
ENV MPIVER=${MPI_VER}
# nvidia-container-runtime
ENV NVIDIA_VISIBLE_DEVICES all
ENV NVIDIA_DRIVER_CAPABILITIES compute,utility
# MPI ROOT USER DEFAULTS
ENV OMPI_ALLOW_RUN_AS_ROOT=1
ENV OMPI_ALLOW_RUN_AS_ROOT_CONFIRM=1
ENV OMPI_MCA_rmaps_base_oversubscribe=1
ENV OMPI_MCA_btl_base_warn_component_unused=0
ENV OMPI_MCA_hwloc_base_binding_policy=""
ENV UCX_MEMTYPE_CACHE=no
ENV UCX_NET_DEVICES=all
ENV UCX_SHM_DEVICES=all
ENV UCX_ACC_DEVICES=all
ENV UCX_RNDV_THRESH=0
ENV UCX_RNDV_SCHEME=get_zcopy
ENV NCCL_UCX_RNDV_THRESH=0
ENV NCCL_UCX_RNDV_SCHEME=get_zcopy
ENV NCCL_PLUGIN_P2P=ucx
ENV MELLANOX_MOUNT_DRIVER=1
ENV UCX_TLS=cuda,cuda_copy,cuda_ipc,sm,shm,self
# For Baremetal, these flags are also available
#ENV UCX_TLS=cuda,cuda_copy,cuda_ipc,sm,shm,self,rc_x,gdr_copy
# Make simlink for path setup since ENV doesn't accept shell commands.
RUN export NVARCH=$(ls -1 /opt/nvidia/hpc_sdk/Linux_x86_64/ | grep '\.' | head -n 1) && \
export CUDA_V=$(ls /opt/nvidia/hpc_sdk/Linux_x86_64/${NVARCH}/cuda/ | grep '\.') && \
ln -sf /opt/nvidia/hpc_sdk/Linux_x86_64/${NVARCH} /opt/nvhpc && \
ln -sf /opt/nvidia/hpc_sdk/Linux_x86_64/${NVARCH}/cuda/${CUDA_V}/extras/CUPTI /opt/CUPTI && \
ln -sf /opt/nvidia/hpc_sdk/Linux_x86_64/comm_libs/${CUDA_V}/nvshmem /opt/nvhpc/comm_libs/nvshmem && \
ln -sf /opt/nvidia/hpc_sdk/Linux_x86_64/comm_libs/${CUDA_V}/nccl /opt/nvhpc/comm_libs/nccl
# Starting nvhpc 23.5 and cuda 12.1, hpcx and openmpi are inside the cuda version folder, only the bin is in the comm_libs path
RUN export CUDA_V=$(/opt/nvhpc/cuda/bin/nvcc --version | sed -n 's/^.*release \([0-9]\+\.[0-9]\+\).*$/\1/p') && \
ls /opt/nvhpc/comm_libs/${CUDA_V}/hpcx/ &&\
if [ -d /opt/nvhpc/comm_libs/${CUDA_V}/hpcx ]; then \
rm -rf /opt/nvhpc/comm_libs/hpcx && rm -rf /opt/nvhpc/comm_libs/openmpi4 && \
ln -sf /opt/nvhpc/comm_libs/${CUDA_V}/hpcx /opt/nvhpc/comm_libs/hpcx && \
ln -sf /opt/nvhpc/comm_libs/${CUDA_V}/openmpi4 /opt/nvhpc/comm_libs/openmpi4;\
fi;
# Set base path based on version
ENV HPCSDK_HOME=/opt/nvhpc
ENV HPCSDK_CUPTI=/opt/CUPTI
# required for nvidia-docker v1
RUN echo "$HPCSDK_HOME/cuda/lib" >> /etc/ld.so.conf.d/nvidia.conf && \
echo "$HPCSDK_HOME/cuda/lib64" >> /etc/ld.so.conf.d/nvidia.conf && \
echo "$HPCSDK_HOME/compilers/lib" >> /etc/ld.so.conf.d/nvidia.conf && \
echo "$HPCSDK_HOME/comm_libs/mpi/lib" >> /etc/ld.so.conf.d/nvidia.conf && \
echo "$HPCSDK_CUPTI/lib64" >> /etc/ld.so.conf.d/nvidia.conf && \
echo "$HPCSDK_HOME/math_libs/lib64" >> /etc/ld.so.conf.d/nvidia.conf
# Compiler, CUDA, and Library paths
# CUDA_HOME has been deprecated but keep for now because of other dependencies (@mloubout).
ENV CUDA_HOME $HPCSDK_HOME/cuda
ENV NVHPC_CUDA_HOME $HPCSDK_HOME/cuda
ENV CUDA_ROOT $HPCSDK_HOME/cuda/bin
ENV PATH $HPCSDK_HOME/compilers/bin:$HPCSDK_HOME/cuda/bin:$HPCSDK_HOME/comm_libs/mpi/bin:${PATH}
ENV LD_LIBRARY_PATH $HPCSDK_HOME/cuda/lib:$HPCSDK_HOME/cuda/lib64:$HPCSDK_HOME/compilers/lib:$HPCSDK_HOME/math_libs/lib64:$HPCSDK_HOME/comm_libs/mpi/lib:$HPCSDK_CUPTI/lib64:bitcomp_DIR:${LD_LIBRARY_PATH}
ENV CPATH $HPCSDK_HOME/comm_libs/mpi/include:$HPCSDK_HOME/comm_libs/nvshmem/include:$HPCSDK_HOME/comm_libs/nccl/include:${CPATH}
# MPI
RUN if [ "x$MPI_VER" = "x4" ]; then \
rm -f $HPCSDK_HOME/comm_libs/mpi && \
ln -sf $HPCSDK_HOME/comm_libs/openmpi4/latest \
$HPCSDK_HOME/comm_libs/mpi ; \
elif [ "$MPI_VER" = "HPCX" ]; then \
rm -f $HPCSDK_HOME/comm_libs/mpi && \
ln -sf $HPCSDK_HOME/comm_libs/hpcx/latest/ompi \
$HPCSDK_HOME/comm_libs/mpi ; \
fi;
# Install python nvidia dependencies
RUN python3 -m venv /venv && \
/venv/bin/pip install --no-cache-dir --upgrade pip && \
/venv/bin/pip install --no-cache-dir -r https://raw.githubusercontent.com/devitocodes/devito/master/requirements-nvidia.txt && \
# Install jupyter and setup nvidia configs.
/venv/bin/pip install --no-cache-dir jupyter && \
/venv/bin/jupyter serverextension enable dask_labextension && \
rm -rf ~/.cache/pip
RUN apt-get clean && apt-get autoclean && apt-get autoremove -y && \
rm -rf /var/lib/apt/lists/*
EXPOSE 8888
CMD ["/bin/bash"]
########################################################################
# NVC for GPUs via OpenACC config
########################################################################
FROM sdk-base as nvc
# Make devito env vars file and extras
ADD docker/nvdashboard.json /app/nvdashboard.json
# mpi4py
ENV MPI4PY_FLAGS='CC=nvc CFLAGS="-noswitcherror -tp=px"'
ENV DEVITO_ARCH="nvc"
ENV DEVITO_PLATFORM="nvidiaX"
ENV DEVITO_LANGUAGE="openacc"
########################################################################
# NVC for GPUs via CUDA config
########################################################################
FROM nvc as nvcc
ENV DEVITO_ARCH="cuda"
ENV DEVITO_PLATFORM="nvidiaX"
ENV DEVITO_LANGUAGE="cuda"
########################################################################
# NVC for CPUs config
########################################################################
FROM nvc as nvc-host
ENV DEVITO_ARCH="nvc"
ENV DEVITO_PLATFORM="cpu64"
ENV DEVITO_LANGUAGE="openmp"