From d6b01d20f7e75b710d413686573784c1c0c6e853 Mon Sep 17 00:00:00 2001 From: Tyler Titsworth Date: Tue, 11 Jun 2024 21:39:18 -0700 Subject: [PATCH 01/23] Update Dockerfile Signed-off-by: Tyler Titsworth --- pytorch/Dockerfile | 17 ++++++++++++++++- 1 file changed, 16 insertions(+), 1 deletion(-) diff --git a/pytorch/Dockerfile b/pytorch/Dockerfile index d83877d4f..4a558852b 100644 --- a/pytorch/Dockerfile +++ b/pytorch/Dockerfile @@ -80,21 +80,34 @@ RUN apt-get update -y && apt-get install -y --no-install-recommends --fix-missin gcc \ libgl1-mesa-glx \ libglib2.0-0 \ + openssh-client \ + openssh-server \ virtualenv && \ + rm /etc/ssh/ssh_host_*_key \ + /etc/ssh/ssh_host_*_key.pub && \ apt-get clean && \ rm -rf /var/lib/apt/lists/* +RUN mkdir -p /var/run/sshd && \ + cat /etc/ssh/ssh_config | grep -v StrictHostKeyChecking > /etc/ssh/ssh_config.new && \ + echo " StrictHostKeyChecking no" >> /etc/ssh/ssh_config.new && \ + mv /etc/ssh/ssh_config.new /etc/ssh/ssh_config + + ENV SIGOPT_PROJECT=. WORKDIR / COPY multinode-requirements.txt . +COPY generate_ssh_keys.sh . RUN python -m pip install --no-cache-dir -r multinode-requirements.txt +RUN cat /workspace/generate_ssh_keys.sh >> ${HOME}/.bashrc && \ + /generate_ssh_keys.sh + ARG PYTHON_VERSION RUN echo "source /usr/local/lib/python${PYTHON_VERSION}/dist-packages/oneccl_bindings_for_pytorch/env/setvars.sh" >> ~/.bashrc - ENV I_MPI_ROOT="${I_MPI_ROOT}:/usr/local/lib/python${PYTHON_VERSION}/dist-packages/oneccl_bindings_for_pytorch" ENV CCL_ROOT="${CCL_ROOT}:/usr/local/lib/python${PYTHON_VERSION}/dist-packages/oneccl_bindings_for_pytorch" ENV FI_PROVIDER_PATH="${FI_PROVIDER_PATH}:/usr/local/lib/python${PYTHON_VERSION}/dist-packages/oneccl_bindings_for_pytorch/opt/mpi/libfabric/lib/prov" @@ -109,6 +122,8 @@ RUN wget -q --no-check-certificate https://raw.githubusercontent.com/oneapi-src wget -q --no-check-certificate https://raw.githubusercontent.com/intel/neural-compressor/master/docker/third-party-programs-pytorch.txt -O /licensing/third-party-programs-pytorch.txt && \ wget -q --no-check-certificate https://raw.githubusercontent.com/intel/neural-compressor/master/LICENSE -O /licensing/LICENSE +ENTRYPOINT service ssh start && bash + FROM ${PYTHON_BASE} AS ipex-xpu-base RUN apt-get update && \ From 372676510caabe5c0e989175061f7ee05b0b0575 Mon Sep 17 00:00:00 2001 From: Tyler Titsworth Date: Tue, 11 Jun 2024 21:39:43 -0700 Subject: [PATCH 02/23] Create generate_ssh_keys.sh Signed-off-by: Tyler Titsworth --- pytorch/generate_ssh_keys.sh | 32 ++++++++++++++++++++++++++++++++ 1 file changed, 32 insertions(+) create mode 100644 pytorch/generate_ssh_keys.sh diff --git a/pytorch/generate_ssh_keys.sh b/pytorch/generate_ssh_keys.sh new file mode 100644 index 000000000..4250035fe --- /dev/null +++ b/pytorch/generate_ssh_keys.sh @@ -0,0 +1,32 @@ +#!/usr/bin/env bash +# Copyright (c) 2023 Intel Corporation +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# +# SPDX-License-Identifier: Apache-2.0 + + +function gen_single_key() +{ + ALG_NAME=$1 + if [[ ! -f /etc/ssh/ssh_host_${ALG_NAME}_key ]] + then + ssh-keygen -q -N "" -t ${ALG_NAME} -f /etc/ssh/ssh_host_${ALG_NAME}_key + fi +} + + +gen_single_key dsa +gen_single_key rsa +gen_single_key ecdsa +gen_single_key ed25519 From 3d774ed0bdb1e81fdacbab96e3a391a805783efe Mon Sep 17 00:00:00 2001 From: Tyler Titsworth Date: Tue, 11 Jun 2024 21:45:08 -0700 Subject: [PATCH 03/23] Update generate_ssh_keys.sh Signed-off-by: Tyler Titsworth --- pytorch/generate_ssh_keys.sh | 14 +++++--------- 1 file changed, 5 insertions(+), 9 deletions(-) diff --git a/pytorch/generate_ssh_keys.sh b/pytorch/generate_ssh_keys.sh index 4250035fe..0ee61398e 100644 --- a/pytorch/generate_ssh_keys.sh +++ b/pytorch/generate_ssh_keys.sh @@ -15,17 +15,13 @@ # # SPDX-License-Identifier: Apache-2.0 - -function gen_single_key() -{ - ALG_NAME=$1 - if [[ ! -f /etc/ssh/ssh_host_${ALG_NAME}_key ]] - then - ssh-keygen -q -N "" -t ${ALG_NAME} -f /etc/ssh/ssh_host_${ALG_NAME}_key - fi +function gen_single_key() { + ALG_NAME=$1 + if [[ ! -f /etc/ssh/ssh_host_${ALG_NAME}_key ]]; then + ssh-keygen -q -N "" -t "${ALG_NAME}" -f "/etc/ssh/ssh_host_${ALG_NAME}_key" + fi } - gen_single_key dsa gen_single_key rsa gen_single_key ecdsa From 79fec6252dd583819a269e91f4ce7c705d321663 Mon Sep 17 00:00:00 2001 From: Tyler Titsworth Date: Tue, 11 Jun 2024 21:46:36 -0700 Subject: [PATCH 04/23] Update Dockerfile Signed-off-by: Tyler Titsworth --- pytorch/Dockerfile | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/pytorch/Dockerfile b/pytorch/Dockerfile index 4a558852b..b1a0bef5a 100644 --- a/pytorch/Dockerfile +++ b/pytorch/Dockerfile @@ -88,12 +88,13 @@ RUN apt-get update -y && apt-get install -y --no-install-recommends --fix-missin apt-get clean && \ rm -rf /var/lib/apt/lists/* +# Allow OpenSSH to talk to containers without asking for confirmation +# hadolint global ignore=SC2002 RUN mkdir -p /var/run/sshd && \ cat /etc/ssh/ssh_config | grep -v StrictHostKeyChecking > /etc/ssh/ssh_config.new && \ echo " StrictHostKeyChecking no" >> /etc/ssh/ssh_config.new && \ mv /etc/ssh/ssh_config.new /etc/ssh/ssh_config - ENV SIGOPT_PROJECT=. WORKDIR / @@ -102,8 +103,8 @@ COPY generate_ssh_keys.sh . RUN python -m pip install --no-cache-dir -r multinode-requirements.txt -RUN cat /workspace/generate_ssh_keys.sh >> ${HOME}/.bashrc && \ - /generate_ssh_keys.sh +RUN cat /workspace/generate_ssh_keys.sh >> ~/.bashrc && \ + rm -rf /generate_ssh_keys.sh ARG PYTHON_VERSION RUN echo "source /usr/local/lib/python${PYTHON_VERSION}/dist-packages/oneccl_bindings_for_pytorch/env/setvars.sh" >> ~/.bashrc From 0c8e2d1b809c4bf0d11b1fa27fc23dd7b6ca4616 Mon Sep 17 00:00:00 2001 From: Tyler Titsworth Date: Tue, 11 Jun 2024 21:56:39 -0700 Subject: [PATCH 05/23] Update Dockerfile Signed-off-by: Tyler Titsworth --- pytorch/Dockerfile | 23 ++++++++++++++--------- 1 file changed, 14 insertions(+), 9 deletions(-) diff --git a/pytorch/Dockerfile b/pytorch/Dockerfile index b1a0bef5a..3ac34fdfe 100644 --- a/pytorch/Dockerfile +++ b/pytorch/Dockerfile @@ -80,11 +80,7 @@ RUN apt-get update -y && apt-get install -y --no-install-recommends --fix-missin gcc \ libgl1-mesa-glx \ libglib2.0-0 \ - openssh-client \ - openssh-server \ virtualenv && \ - rm /etc/ssh/ssh_host_*_key \ - /etc/ssh/ssh_host_*_key.pub && \ apt-get clean && \ rm -rf /var/lib/apt/lists/* @@ -103,17 +99,26 @@ COPY generate_ssh_keys.sh . RUN python -m pip install --no-cache-dir -r multinode-requirements.txt -RUN cat /workspace/generate_ssh_keys.sh >> ~/.bashrc && \ - rm -rf /generate_ssh_keys.sh +ENV LD_LIBRARY_PATH="${LD_LIBRARY_PATH}:/usr/local/lib/python${PYTHON_VERSION}/dist-packages/oneccl_bindings_for_pytorch/opt/mpi/libfabric/lib:/usr/local/lib/python${PYTHON_VERSION}/dist-packages/oneccl_bindings_for_pytorch/lib" + +RUN apt-get install -y --no-install-recommends --fix-missing \ + openssh-client \ + openssh-server \ + rm /etc/ssh/ssh_host_*_key \ + /etc/ssh/ssh_host_*_key.pub && \ + apt-get clean && \ + rm -rf /var/lib/apt/lists/* ARG PYTHON_VERSION -RUN echo "source /usr/local/lib/python${PYTHON_VERSION}/dist-packages/oneccl_bindings_for_pytorch/env/setvars.sh" >> ~/.bashrc + +RUN cat /generate_ssh_keys.sh >> ~/.bashrc && \ + rm -rf /generate_ssh_keys.sh && \ + echo "source /usr/local/lib/python${PYTHON_VERSION}/dist-packages/oneccl_bindings_for_pytorch/env/setvars.sh" >> ~/.bashrc ENV I_MPI_ROOT="${I_MPI_ROOT}:/usr/local/lib/python${PYTHON_VERSION}/dist-packages/oneccl_bindings_for_pytorch" ENV CCL_ROOT="${CCL_ROOT}:/usr/local/lib/python${PYTHON_VERSION}/dist-packages/oneccl_bindings_for_pytorch" ENV FI_PROVIDER_PATH="${FI_PROVIDER_PATH}:/usr/local/lib/python${PYTHON_VERSION}/dist-packages/oneccl_bindings_for_pytorch/opt/mpi/libfabric/lib/prov" ENV LIBRARY_PATH="${LIBRARY_PATH}:/usr/local/lib/python${PYTHON_VERSION}/dist-packages/oneccl_bindings_for_pytorch/lib" -ENV LD_LIBRARY_PATH="${LD_LIBRARY_PATH}:/usr/local/lib/python${PYTHON_VERSION}/dist-packages/oneccl_bindings_for_pytorch/opt/mpi/libfabric/lib:/usr/local/lib/python${PYTHON_VERSION}/dist-packages/oneccl_bindings_for_pytorch/lib" ENV PATH="${PATH}:/usr/local/lib/python${PYTHON_VERSION}/dist-packages/oneccl_bindings_for_pytorch/bin" ENV CPATH="${CPATH}:/usr/local/lib/python${PYTHON_VERSION}/dist-packages/oneccl_bindings_for_pytorch/include" @@ -123,7 +128,7 @@ RUN wget -q --no-check-certificate https://raw.githubusercontent.com/oneapi-src wget -q --no-check-certificate https://raw.githubusercontent.com/intel/neural-compressor/master/docker/third-party-programs-pytorch.txt -O /licensing/third-party-programs-pytorch.txt && \ wget -q --no-check-certificate https://raw.githubusercontent.com/intel/neural-compressor/master/LICENSE -O /licensing/LICENSE -ENTRYPOINT service ssh start && bash +ENTRYPOINT ["service", "ssh", "start", "&&", "bash"] FROM ${PYTHON_BASE} AS ipex-xpu-base From 60d2b4d56e4ad4454b020f7d26383d9d5a1f8beb Mon Sep 17 00:00:00 2001 From: Tyler Titsworth Date: Tue, 11 Jun 2024 22:01:39 -0700 Subject: [PATCH 06/23] Update Dockerfile Signed-off-by: Tyler Titsworth --- pytorch/Dockerfile | 16 +++++++++------- 1 file changed, 9 insertions(+), 7 deletions(-) diff --git a/pytorch/Dockerfile b/pytorch/Dockerfile index 3ac34fdfe..26b0af2de 100644 --- a/pytorch/Dockerfile +++ b/pytorch/Dockerfile @@ -84,13 +84,6 @@ RUN apt-get update -y && apt-get install -y --no-install-recommends --fix-missin apt-get clean && \ rm -rf /var/lib/apt/lists/* -# Allow OpenSSH to talk to containers without asking for confirmation -# hadolint global ignore=SC2002 -RUN mkdir -p /var/run/sshd && \ - cat /etc/ssh/ssh_config | grep -v StrictHostKeyChecking > /etc/ssh/ssh_config.new && \ - echo " StrictHostKeyChecking no" >> /etc/ssh/ssh_config.new && \ - mv /etc/ssh/ssh_config.new /etc/ssh/ssh_config - ENV SIGOPT_PROJECT=. WORKDIR / @@ -109,6 +102,13 @@ RUN apt-get install -y --no-install-recommends --fix-missing \ apt-get clean && \ rm -rf /var/lib/apt/lists/* +# Allow OpenSSH to talk to containers without asking for confirmation +# hadolint global ignore=SC2002 +RUN mkdir -p /var/run/sshd && \ + cat /etc/ssh/ssh_config | grep -v StrictHostKeyChecking > /etc/ssh/ssh_config.new && \ + echo " StrictHostKeyChecking no" >> /etc/ssh/ssh_config.new && \ + mv /etc/ssh/ssh_config.new /etc/ssh/ssh_config + ARG PYTHON_VERSION RUN cat /generate_ssh_keys.sh >> ~/.bashrc && \ @@ -128,6 +128,8 @@ RUN wget -q --no-check-certificate https://raw.githubusercontent.com/oneapi-src wget -q --no-check-certificate https://raw.githubusercontent.com/intel/neural-compressor/master/docker/third-party-programs-pytorch.txt -O /licensing/third-party-programs-pytorch.txt && \ wget -q --no-check-certificate https://raw.githubusercontent.com/intel/neural-compressor/master/LICENSE -O /licensing/LICENSE +EXPOSE 22:22 + ENTRYPOINT ["service", "ssh", "start", "&&", "bash"] FROM ${PYTHON_BASE} AS ipex-xpu-base From 9515a315cff4653b6a44ecfcffdabf5d93500713 Mon Sep 17 00:00:00 2001 From: Tyler Titsworth Date: Tue, 11 Jun 2024 22:07:54 -0700 Subject: [PATCH 07/23] Update Dockerfile Signed-off-by: Tyler Titsworth --- pytorch/Dockerfile | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/pytorch/Dockerfile b/pytorch/Dockerfile index 26b0af2de..e348e9f6f 100644 --- a/pytorch/Dockerfile +++ b/pytorch/Dockerfile @@ -96,7 +96,7 @@ ENV LD_LIBRARY_PATH="${LD_LIBRARY_PATH}:/usr/local/lib/python${PYTHON_VERSION}/d RUN apt-get install -y --no-install-recommends --fix-missing \ openssh-client \ - openssh-server \ + openssh-server && \ rm /etc/ssh/ssh_host_*_key \ /etc/ssh/ssh_host_*_key.pub && \ apt-get clean && \ @@ -128,7 +128,7 @@ RUN wget -q --no-check-certificate https://raw.githubusercontent.com/oneapi-src wget -q --no-check-certificate https://raw.githubusercontent.com/intel/neural-compressor/master/docker/third-party-programs-pytorch.txt -O /licensing/third-party-programs-pytorch.txt && \ wget -q --no-check-certificate https://raw.githubusercontent.com/intel/neural-compressor/master/LICENSE -O /licensing/LICENSE -EXPOSE 22:22 +EXPOSE 22 ENTRYPOINT ["service", "ssh", "start", "&&", "bash"] From 6d0c6b8032ea240552c09889c53b0e65cada28dc Mon Sep 17 00:00:00 2001 From: Tyler Titsworth Date: Tue, 11 Jun 2024 22:12:22 -0700 Subject: [PATCH 08/23] Update Dockerfile Signed-off-by: Tyler Titsworth --- pytorch/Dockerfile | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/pytorch/Dockerfile b/pytorch/Dockerfile index e348e9f6f..8e1415fc4 100644 --- a/pytorch/Dockerfile +++ b/pytorch/Dockerfile @@ -80,9 +80,7 @@ RUN apt-get update -y && apt-get install -y --no-install-recommends --fix-missin gcc \ libgl1-mesa-glx \ libglib2.0-0 \ - virtualenv && \ - apt-get clean && \ - rm -rf /var/lib/apt/lists/* + virtualenv ENV SIGOPT_PROJECT=. From d704bacf8fe28387feb94bc9ba1e0ef4ed5191e7 Mon Sep 17 00:00:00 2001 From: Tyler Titsworth Date: Tue, 11 Jun 2024 22:25:02 -0700 Subject: [PATCH 09/23] Update Dockerfile Signed-off-by: Tyler Titsworth --- pytorch/Dockerfile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pytorch/Dockerfile b/pytorch/Dockerfile index 8e1415fc4..16193dcf5 100644 --- a/pytorch/Dockerfile +++ b/pytorch/Dockerfile @@ -90,7 +90,7 @@ COPY generate_ssh_keys.sh . RUN python -m pip install --no-cache-dir -r multinode-requirements.txt -ENV LD_LIBRARY_PATH="${LD_LIBRARY_PATH}:/usr/local/lib/python${PYTHON_VERSION}/dist-packages/oneccl_bindings_for_pytorch/opt/mpi/libfabric/lib:/usr/local/lib/python${PYTHON_VERSION}/dist-packages/oneccl_bindings_for_pytorch/lib" +ENV LD_LIBRARY_PATH="/lib/x86_64-linux-gnu:${LD_LIBRARY_PATH}:/usr/local/lib/python${PYTHON_VERSION}/dist-packages/oneccl_bindings_for_pytorch/opt/mpi/libfabric/lib:/usr/local/lib/python${PYTHON_VERSION}/dist-packages/oneccl_bindings_for_pytorch/lib" RUN apt-get install -y --no-install-recommends --fix-missing \ openssh-client \ From 43be2a942a118e744841c0de8b61e17c13cb74fb Mon Sep 17 00:00:00 2001 From: Tyler Titsworth Date: Tue, 11 Jun 2024 22:33:43 -0700 Subject: [PATCH 10/23] Update README.md Signed-off-by: Tyler Titsworth --- pytorch/README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pytorch/README.md b/pytorch/README.md index aaf70d67b..97b895a08 100644 --- a/pytorch/README.md +++ b/pytorch/README.md @@ -97,7 +97,7 @@ docker run -it --rm \ --net=host \ -v $PWD/workspace:/workspace \ -w /workspace \ - intel/intel-extension-for-tensorflow:xpu-jupyter + intel/intel-extension-for-pytorch:xpu-jupyter ``` After running the command above, copy the URL (something like `http://127.0.0.1:$PORT/?token=***`) into your browser to access the notebook server. From 94b3ee7525f1879b32d7be242c32d7b6f426791d Mon Sep 17 00:00:00 2001 From: tylertitsworth Date: Wed, 12 Jun 2024 08:58:39 -0700 Subject: [PATCH 11/23] config sshd --- pytorch/Dockerfile | 12 +++++++----- pytorch/generate_ssh_keys.sh | 1 + 2 files changed, 8 insertions(+), 5 deletions(-) diff --git a/pytorch/Dockerfile b/pytorch/Dockerfile index 16193dcf5..01a39e022 100644 --- a/pytorch/Dockerfile +++ b/pytorch/Dockerfile @@ -86,7 +86,6 @@ ENV SIGOPT_PROJECT=. WORKDIR / COPY multinode-requirements.txt . -COPY generate_ssh_keys.sh . RUN python -m pip install --no-cache-dir -r multinode-requirements.txt @@ -105,13 +104,18 @@ RUN apt-get install -y --no-install-recommends --fix-missing \ RUN mkdir -p /var/run/sshd && \ cat /etc/ssh/ssh_config | grep -v StrictHostKeyChecking > /etc/ssh/ssh_config.new && \ echo " StrictHostKeyChecking no" >> /etc/ssh/ssh_config.new && \ + sed -i'' -e's/^#PermitRootLogin prohibit-password$/PermitRootLogin yes/' /etc/ssh/sshd_config && \ + sed -i'' -e's/^#PasswordAuthentication yes$/PasswordAuthentication no/' /etc/ssh/sshd_config && \ + sed -i'' -e's/^#PermitEmptyPasswords no$/PermitEmptyPasswords yes/' /etc/ssh/sshd_config && \ mv /etc/ssh/ssh_config.new /etc/ssh/ssh_config ARG PYTHON_VERSION +COPY generate_ssh_keys.sh . + RUN cat /generate_ssh_keys.sh >> ~/.bashrc && \ - rm -rf /generate_ssh_keys.sh && \ - echo "source /usr/local/lib/python${PYTHON_VERSION}/dist-packages/oneccl_bindings_for_pytorch/env/setvars.sh" >> ~/.bashrc + echo "source /usr/local/lib/python${PYTHON_VERSION}/dist-packages/oneccl_bindings_for_pytorch/env/setvars.sh" >> ~/.bashrc && \ + rm -rf /generate_ssh_keys.sh ENV I_MPI_ROOT="${I_MPI_ROOT}:/usr/local/lib/python${PYTHON_VERSION}/dist-packages/oneccl_bindings_for_pytorch" ENV CCL_ROOT="${CCL_ROOT}:/usr/local/lib/python${PYTHON_VERSION}/dist-packages/oneccl_bindings_for_pytorch" @@ -128,8 +132,6 @@ RUN wget -q --no-check-certificate https://raw.githubusercontent.com/oneapi-src EXPOSE 22 -ENTRYPOINT ["service", "ssh", "start", "&&", "bash"] - FROM ${PYTHON_BASE} AS ipex-xpu-base RUN apt-get update && \ diff --git a/pytorch/generate_ssh_keys.sh b/pytorch/generate_ssh_keys.sh index 0ee61398e..b41c15a4d 100644 --- a/pytorch/generate_ssh_keys.sh +++ b/pytorch/generate_ssh_keys.sh @@ -26,3 +26,4 @@ gen_single_key dsa gen_single_key rsa gen_single_key ecdsa gen_single_key ed25519 +service ssh start >/dev/null From adb9591b85dba75ccb6e17ff865d2c11b87ed5b6 Mon Sep 17 00:00:00 2001 From: Tyler Titsworth Date: Thu, 13 Jun 2024 15:39:28 -0700 Subject: [PATCH 12/23] Update Dockerfile Signed-off-by: Tyler Titsworth --- pytorch/Dockerfile | 2 -- 1 file changed, 2 deletions(-) diff --git a/pytorch/Dockerfile b/pytorch/Dockerfile index 01a39e022..be4674589 100644 --- a/pytorch/Dockerfile +++ b/pytorch/Dockerfile @@ -130,8 +130,6 @@ RUN wget -q --no-check-certificate https://raw.githubusercontent.com/oneapi-src wget -q --no-check-certificate https://raw.githubusercontent.com/intel/neural-compressor/master/docker/third-party-programs-pytorch.txt -O /licensing/third-party-programs-pytorch.txt && \ wget -q --no-check-certificate https://raw.githubusercontent.com/intel/neural-compressor/master/LICENSE -O /licensing/LICENSE -EXPOSE 22 - FROM ${PYTHON_BASE} AS ipex-xpu-base RUN apt-get update && \ From 71cc98f67c5db8739ec937d1343cee1bfabd2b86 Mon Sep 17 00:00:00 2001 From: tylertitsworth Date: Fri, 14 Jun 2024 10:18:48 -0700 Subject: [PATCH 13/23] review with sharvil --- pytorch/Dockerfile | 7 +++---- pytorch/generate_ssh_keys.sh | 1 - 2 files changed, 3 insertions(+), 5 deletions(-) diff --git a/pytorch/Dockerfile b/pytorch/Dockerfile index be4674589..21251251c 100644 --- a/pytorch/Dockerfile +++ b/pytorch/Dockerfile @@ -106,16 +106,15 @@ RUN mkdir -p /var/run/sshd && \ echo " StrictHostKeyChecking no" >> /etc/ssh/ssh_config.new && \ sed -i'' -e's/^#PermitRootLogin prohibit-password$/PermitRootLogin yes/' /etc/ssh/sshd_config && \ sed -i'' -e's/^#PasswordAuthentication yes$/PasswordAuthentication no/' /etc/ssh/sshd_config && \ - sed -i'' -e's/^#PermitEmptyPasswords no$/PermitEmptyPasswords yes/' /etc/ssh/sshd_config && \ mv /etc/ssh/ssh_config.new /etc/ssh/ssh_config ARG PYTHON_VERSION COPY generate_ssh_keys.sh . -RUN cat /generate_ssh_keys.sh >> ~/.bashrc && \ - echo "source /usr/local/lib/python${PYTHON_VERSION}/dist-packages/oneccl_bindings_for_pytorch/env/setvars.sh" >> ~/.bashrc && \ - rm -rf /generate_ssh_keys.sh +# modify generate_ssh_keys to be a helper script +# print how to use helper script on bash startup +RUN echo "source /usr/local/lib/python${PYTHON_VERSION}/dist-packages/oneccl_bindings_for_pytorch/env/setvars.sh" >> ~/.bashrc ENV I_MPI_ROOT="${I_MPI_ROOT}:/usr/local/lib/python${PYTHON_VERSION}/dist-packages/oneccl_bindings_for_pytorch" ENV CCL_ROOT="${CCL_ROOT}:/usr/local/lib/python${PYTHON_VERSION}/dist-packages/oneccl_bindings_for_pytorch" diff --git a/pytorch/generate_ssh_keys.sh b/pytorch/generate_ssh_keys.sh index b41c15a4d..0ee61398e 100644 --- a/pytorch/generate_ssh_keys.sh +++ b/pytorch/generate_ssh_keys.sh @@ -26,4 +26,3 @@ gen_single_key dsa gen_single_key rsa gen_single_key ecdsa gen_single_key ed25519 -service ssh start >/dev/null From a8149842ea3293959c5db0006f3bc809c5d9dc7f Mon Sep 17 00:00:00 2001 From: tylertitsworth Date: Fri, 14 Jun 2024 10:43:50 -0700 Subject: [PATCH 14/23] remove sshd config --- pytorch/Dockerfile | 2 -- 1 file changed, 2 deletions(-) diff --git a/pytorch/Dockerfile b/pytorch/Dockerfile index 21251251c..70918f3e9 100644 --- a/pytorch/Dockerfile +++ b/pytorch/Dockerfile @@ -104,8 +104,6 @@ RUN apt-get install -y --no-install-recommends --fix-missing \ RUN mkdir -p /var/run/sshd && \ cat /etc/ssh/ssh_config | grep -v StrictHostKeyChecking > /etc/ssh/ssh_config.new && \ echo " StrictHostKeyChecking no" >> /etc/ssh/ssh_config.new && \ - sed -i'' -e's/^#PermitRootLogin prohibit-password$/PermitRootLogin yes/' /etc/ssh/sshd_config && \ - sed -i'' -e's/^#PasswordAuthentication yes$/PasswordAuthentication no/' /etc/ssh/sshd_config && \ mv /etc/ssh/ssh_config.new /etc/ssh/ssh_config ARG PYTHON_VERSION From 8e1776e142389ef42380b54d3873bf9ae1f6bbeb Mon Sep 17 00:00:00 2001 From: "sharvil.shah" Date: Mon, 17 Jun 2024 11:59:12 -0700 Subject: [PATCH 15/23] Updated Dockerfile with SSH BKM --- pytorch/Dockerfile | 20 ++++++++++++++++++-- pytorch/generate_ssh_keys.sh | 0 2 files changed, 18 insertions(+), 2 deletions(-) mode change 100644 => 100755 pytorch/generate_ssh_keys.sh diff --git a/pytorch/Dockerfile b/pytorch/Dockerfile index 70918f3e9..28dd23467 100644 --- a/pytorch/Dockerfile +++ b/pytorch/Dockerfile @@ -108,11 +108,27 @@ RUN mkdir -p /var/run/sshd && \ ARG PYTHON_VERSION -COPY generate_ssh_keys.sh . +COPY generate_ssh_keys.sh /root/generate_ssh_keys.sh # modify generate_ssh_keys to be a helper script # print how to use helper script on bash startup -RUN echo "source /usr/local/lib/python${PYTHON_VERSION}/dist-packages/oneccl_bindings_for_pytorch/env/setvars.sh" >> ~/.bashrc +# Avoids loop for further execution of the startup file +RUN echo 'unset BASH_ENV' >> ~/.bash_startup && \ + echo "source /usr/local/lib/python${PYTHON_VERSION}/dist-packages/oneccl_bindings_for_pytorch/env/setvars.sh" >> ~/.bash_startup && \ + echo '~/generate_ssh_keys.sh' >> ~/.bash_startup + +ENV BASH_ENV='~/.bash_startup' + +RUN echo 'HostKey /etc/ssh/ssh_host_dsa_key' > /var/run/sshd_config && \ + echo 'HostKey /etc/ssh/ssh_host_rsa_key' > /var/run/sshd_config && \ + echo 'HostKey /etc/ssh/ssh_host_ecdsa_key' > /var/run/sshd_config && \ + echo 'HostKey /etc/ssh/ssh_host_ed25519_key' > /var/run/sshd_config && \ + echo 'AuthorizedKeysFile /etc/ssh/authorized_keys' > /var/run/sshd_config && \ + echo '## Enable DEBUG log. You can ignore this but this may help you debug any issue while enabling SSHD for the first time' > /var/run/sshd_config && \ + echo 'LogLevel DEBUG3' > /var/run/sshd_config && \ + echo 'UsePAM yes' > /var/run/sshd_config && \ + echo 'Subsystem sftp /usr/lib/openssh/sftp-server' > /var/run/sshd_config + ENV I_MPI_ROOT="${I_MPI_ROOT}:/usr/local/lib/python${PYTHON_VERSION}/dist-packages/oneccl_bindings_for_pytorch" ENV CCL_ROOT="${CCL_ROOT}:/usr/local/lib/python${PYTHON_VERSION}/dist-packages/oneccl_bindings_for_pytorch" diff --git a/pytorch/generate_ssh_keys.sh b/pytorch/generate_ssh_keys.sh old mode 100644 new mode 100755 From 4adaac9ac822051bd4651dc1e82eed9af5539a6b Mon Sep 17 00:00:00 2001 From: Tyler Titsworth Date: Mon, 17 Jun 2024 12:07:43 -0700 Subject: [PATCH 16/23] Update Dockerfile Signed-off-by: Tyler Titsworth --- pytorch/Dockerfile | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/pytorch/Dockerfile b/pytorch/Dockerfile index 28dd23467..957a3eb14 100644 --- a/pytorch/Dockerfile +++ b/pytorch/Dockerfile @@ -108,16 +108,17 @@ RUN mkdir -p /var/run/sshd && \ ARG PYTHON_VERSION -COPY generate_ssh_keys.sh /root/generate_ssh_keys.sh +COPY generate_ssh_keys.sh # modify generate_ssh_keys to be a helper script # print how to use helper script on bash startup # Avoids loop for further execution of the startup file RUN echo 'unset BASH_ENV' >> ~/.bash_startup && \ echo "source /usr/local/lib/python${PYTHON_VERSION}/dist-packages/oneccl_bindings_for_pytorch/env/setvars.sh" >> ~/.bash_startup && \ - echo '~/generate_ssh_keys.sh' >> ~/.bash_startup + echo '/generate_ssh_keys.sh' >> ~/.bash_startup && \ + rm -rf /generate_ssh_keys.sh -ENV BASH_ENV='~/.bash_startup' +ENV BASH_ENV='$HOME/.bash_startup' RUN echo 'HostKey /etc/ssh/ssh_host_dsa_key' > /var/run/sshd_config && \ echo 'HostKey /etc/ssh/ssh_host_rsa_key' > /var/run/sshd_config && \ From f473ff50d704c494fa85884320b90a8cda15f084 Mon Sep 17 00:00:00 2001 From: Tyler Titsworth Date: Mon, 17 Jun 2024 12:16:26 -0700 Subject: [PATCH 17/23] Update Dockerfile Signed-off-by: Tyler Titsworth --- pytorch/Dockerfile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pytorch/Dockerfile b/pytorch/Dockerfile index 957a3eb14..8bb82e211 100644 --- a/pytorch/Dockerfile +++ b/pytorch/Dockerfile @@ -108,7 +108,7 @@ RUN mkdir -p /var/run/sshd && \ ARG PYTHON_VERSION -COPY generate_ssh_keys.sh +COPY generate_ssh_keys.sh . # modify generate_ssh_keys to be a helper script # print how to use helper script on bash startup From 20276f66e2c6ca6b59489032751cb08b47e36827 Mon Sep 17 00:00:00 2001 From: sharvil10 Date: Mon, 17 Jun 2024 15:07:13 -0700 Subject: [PATCH 18/23] Enabled generate SSH keys for interactive shell --- pytorch/Dockerfile | 11 +++---- pytorch/README.md | 73 ++++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 79 insertions(+), 5 deletions(-) diff --git a/pytorch/Dockerfile b/pytorch/Dockerfile index 957a3eb14..3231588fe 100644 --- a/pytorch/Dockerfile +++ b/pytorch/Dockerfile @@ -108,17 +108,16 @@ RUN mkdir -p /var/run/sshd && \ ARG PYTHON_VERSION -COPY generate_ssh_keys.sh +COPY generate_ssh_keys.sh . # modify generate_ssh_keys to be a helper script # print how to use helper script on bash startup # Avoids loop for further execution of the startup file RUN echo 'unset BASH_ENV' >> ~/.bash_startup && \ echo "source /usr/local/lib/python${PYTHON_VERSION}/dist-packages/oneccl_bindings_for_pytorch/env/setvars.sh" >> ~/.bash_startup && \ - echo '/generate_ssh_keys.sh' >> ~/.bash_startup && \ - rm -rf /generate_ssh_keys.sh - -ENV BASH_ENV='$HOME/.bash_startup' + cat '/generate_ssh_keys.sh' >> ~/.bash_startup && \ + rm -rf /generate_ssh_keys.sh && \ + echo 'source ~/.bash_startup' >> ~/.bashrc RUN echo 'HostKey /etc/ssh/ssh_host_dsa_key' > /var/run/sshd_config && \ echo 'HostKey /etc/ssh/ssh_host_rsa_key' > /var/run/sshd_config && \ @@ -144,6 +143,8 @@ RUN wget -q --no-check-certificate https://raw.githubusercontent.com/oneapi-src wget -q --no-check-certificate https://raw.githubusercontent.com/intel/neural-compressor/master/docker/third-party-programs-pytorch.txt -O /licensing/third-party-programs-pytorch.txt && \ wget -q --no-check-certificate https://raw.githubusercontent.com/intel/neural-compressor/master/LICENSE -O /licensing/LICENSE +ENV BASH_ENV='$HOME/.bash_startup' + FROM ${PYTHON_BASE} AS ipex-xpu-base RUN apt-get update && \ diff --git a/pytorch/README.md b/pytorch/README.md index 97b895a08..c81ae9776 100644 --- a/pytorch/README.md +++ b/pytorch/README.md @@ -113,6 +113,79 @@ The images below additionally include [IntelĀ® oneAPI Collective Communications | `2.1.0-pip-mulitnode` | [v2.1.0] | [v2.1.0+cpu] | [v2.1.0][ccl-v2.1.0] | [v2.3.1] | [v0.2.3] | | `2.0.0-pip-multinode` | [v2.0.0] | [v2.0.0+cpu] | [v2.0.0][ccl-v2.0.0] | [v2.1.1] | [v0.1.0] | +> **Note:** Passwordless SSH connection is also enabled in the image. +> The container does not contain the SSH ID keys. The user needs to mount those keys at `/root/.ssh/id_rsa` and `/root/.ssh/id_rsa.pub`. +> User also need to append content of id_rsa.pub in `/etc/ssh/authorized_keys` in the SSH server container. +> Since the SSH key is not owned by default user account in docker, please also do "chmod 644 id_rsa.pub; chmod 644 id_rsa" to grant read access for default user account. +> Users could also use "/usr/bin/ssh-keygen -t rsa -b 4096 -N '' -f ~/mnt/ssh_key/id_rsa" to generate a new SSH Key inside the container. +> Users need to mount a config file to list all hostnames at location `/root/.ssh/config` on the SSH client container. +> Once all files are added + +#### Setup and Run IPEX Multi-Node Container + +To run the IPEX multi-node container with OpenSSH the user needs to to setup SSH container correctly. + +There will be a SSH server and a SSH client connecting to the SSH server. We will setup some files to be used by each of them. + - SSH Server + 1. *Authorized Keys* : `/etc/ssh/authorized_keys` + - SSH Client + 1. *Config File with Host IPs* : `/root/.ssh/config` + 2. *Private User Key* : `/root/.ssh/id_rsa` + +To add these files correctly please follow the steps described below. + +* **Step-1** : Setup ID Keys + **Note: Please skip this step if you already have a public private key-pair available.**<\br> + You can use the commands provided below to [generate the Identity keys](https://www.ssh.com/academy/ssh/keygen#creating-an-ssh-key-pair-for-user-authentication) for OpenSSH. After this, you should have a public and private key which will be used for passwordless authentication. If you already have identity keys beforehand you can skip the ssh-keygen command and replace the public and private keys paths with your own in the copy commands. + + ```bash + ssh-keygen -q -N "" -t rsa -b 4096 -f ./id_rsa + cp id_keys/id_rsa.pub ssh_worker/authorized_keys #Adding public key to authorized key + ``` + +* **Step-2** : Add hosts to config + The client container needs to have the a config file with all hostnames specified. An example of a hostfile is provided below. + + ```txt + Host host1 + HostName + IdentitiesOnly yes + Host host2 + HostName + IdentitiesOnly yes + ... + ``` + +* **Step-3** : You also need to make sure the file permission are setup correctly. All files need to be owned by root and should have permission as `600`. Please use the chown and chmod commands to set the permissions correctly. + +* **Step-4** : Example Run commands. Below is an example command to run SSH server and client respectively + - Step 3.1: Example SSH server command + + ```bash + export SSH_PORT= + docker run -it --rm \ + --net=host \ + -v $PWD/authorized_keys:/root/.ssh/authorized_keys \ + -w /workspace \ + -e SSH_PORT=${SSH_PORT} \ + intel/intel-extension-for-pytorch:2.3.0-pip-multinode \ + bash -c '/usr/sbin/sshd -D -p ${SSH_PORT} -f /var/run/sshd_config' + ``` + + - Step 3.2: Example SSH client command + + ```bash + docker run -it --rm \ + --net=host \ + -v $PWD/id_rsa:/root/.ssh/id_rsa \ + -v $PWD/config:/root/.ssh/config \ + -w /workspace \ + -e SSH_PORT=${SSH_PORT} \ + amr-registry.caas.intel.com/aiops/mlops-ci:b-0-ubuntu-22.04-pip-py3.10-ipex-2.3.0-oneccl-inc-2.5.1 \ + bash -c 'ssh -p ${SSH_PORT}' + ``` + + --- The images below are [TorchServe*] with CPU Optimizations: From 4c740c12d1e3591159594fabda2b2b94fcbe1de0 Mon Sep 17 00:00:00 2001 From: sharvil10 Date: Mon, 17 Jun 2024 15:15:11 -0700 Subject: [PATCH 19/23] Updated markdown for passing linting --- pytorch/README.md | 49 +++++++++++++++++++++++------------------------ 1 file changed, 24 insertions(+), 25 deletions(-) diff --git a/pytorch/README.md b/pytorch/README.md index c81ae9776..a58496b19 100644 --- a/pytorch/README.md +++ b/pytorch/README.md @@ -159,32 +159,31 @@ To add these files correctly please follow the steps described below. * **Step-3** : You also need to make sure the file permission are setup correctly. All files need to be owned by root and should have permission as `600`. Please use the chown and chmod commands to set the permissions correctly. * **Step-4** : Example Run commands. Below is an example command to run SSH server and client respectively - - Step 3.1: Example SSH server command - - ```bash - export SSH_PORT= - docker run -it --rm \ - --net=host \ - -v $PWD/authorized_keys:/root/.ssh/authorized_keys \ - -w /workspace \ - -e SSH_PORT=${SSH_PORT} \ - intel/intel-extension-for-pytorch:2.3.0-pip-multinode \ - bash -c '/usr/sbin/sshd -D -p ${SSH_PORT} -f /var/run/sshd_config' - ``` - - - Step 3.2: Example SSH client command - - ```bash - docker run -it --rm \ - --net=host \ - -v $PWD/id_rsa:/root/.ssh/id_rsa \ - -v $PWD/config:/root/.ssh/config \ - -w /workspace \ - -e SSH_PORT=${SSH_PORT} \ - amr-registry.caas.intel.com/aiops/mlops-ci:b-0-ubuntu-22.04-pip-py3.10-ipex-2.3.0-oneccl-inc-2.5.1 \ - bash -c 'ssh -p ${SSH_PORT}' - ``` + * Step 4.1: Example SSH server command + ```bash + export SSH_PORT= + docker run -it --rm \ + --net=host \ + -v $PWD/authorized_keys:/root/.ssh/authorized_keys \ + -w /workspace \ + -e SSH_PORT=${SSH_PORT} \ + intel/intel-extension-for-pytorch:2.3.0-pip-multinode \ + bash -c '/usr/sbin/sshd -D -p ${SSH_PORT} -f /var/run/sshd_config' + ``` + + * Step 4.2: Example SSH client command + + ```bash + docker run -it --rm \ + --net=host \ + -v $PWD/id_rsa:/root/.ssh/id_rsa \ + -v $PWD/config:/root/.ssh/config \ + -w /workspace \ + -e SSH_PORT=${SSH_PORT} \ + amr-registry.caas.intel.com/aiops/mlops-ci:b-0-ubuntu-22.04-pip-py3.10-ipex-2.3.0-oneccl-inc-2.5.1 \ + bash -c 'ssh -p ${SSH_PORT}' + ``` --- From 00d81479e652e5671afae9a89a7fa5593a6b5e3a Mon Sep 17 00:00:00 2001 From: tylertitsworth Date: Mon, 17 Jun 2024 16:05:28 -0700 Subject: [PATCH 20/23] update README.md --- pytorch/Dockerfile | 11 ++++--- pytorch/README.md | 82 ++++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 88 insertions(+), 5 deletions(-) diff --git a/pytorch/Dockerfile b/pytorch/Dockerfile index 957a3eb14..3231588fe 100644 --- a/pytorch/Dockerfile +++ b/pytorch/Dockerfile @@ -108,17 +108,16 @@ RUN mkdir -p /var/run/sshd && \ ARG PYTHON_VERSION -COPY generate_ssh_keys.sh +COPY generate_ssh_keys.sh . # modify generate_ssh_keys to be a helper script # print how to use helper script on bash startup # Avoids loop for further execution of the startup file RUN echo 'unset BASH_ENV' >> ~/.bash_startup && \ echo "source /usr/local/lib/python${PYTHON_VERSION}/dist-packages/oneccl_bindings_for_pytorch/env/setvars.sh" >> ~/.bash_startup && \ - echo '/generate_ssh_keys.sh' >> ~/.bash_startup && \ - rm -rf /generate_ssh_keys.sh - -ENV BASH_ENV='$HOME/.bash_startup' + cat '/generate_ssh_keys.sh' >> ~/.bash_startup && \ + rm -rf /generate_ssh_keys.sh && \ + echo 'source ~/.bash_startup' >> ~/.bashrc RUN echo 'HostKey /etc/ssh/ssh_host_dsa_key' > /var/run/sshd_config && \ echo 'HostKey /etc/ssh/ssh_host_rsa_key' > /var/run/sshd_config && \ @@ -144,6 +143,8 @@ RUN wget -q --no-check-certificate https://raw.githubusercontent.com/oneapi-src wget -q --no-check-certificate https://raw.githubusercontent.com/intel/neural-compressor/master/docker/third-party-programs-pytorch.txt -O /licensing/third-party-programs-pytorch.txt && \ wget -q --no-check-certificate https://raw.githubusercontent.com/intel/neural-compressor/master/LICENSE -O /licensing/LICENSE +ENV BASH_ENV='$HOME/.bash_startup' + FROM ${PYTHON_BASE} AS ipex-xpu-base RUN apt-get update && \ diff --git a/pytorch/README.md b/pytorch/README.md index 97b895a08..0feca83fb 100644 --- a/pytorch/README.md +++ b/pytorch/README.md @@ -113,6 +113,88 @@ The images below additionally include [IntelĀ® oneAPI Collective Communications | `2.1.0-pip-mulitnode` | [v2.1.0] | [v2.1.0+cpu] | [v2.1.0][ccl-v2.1.0] | [v2.3.1] | [v0.2.3] | | `2.0.0-pip-multinode` | [v2.0.0] | [v2.0.0+cpu] | [v2.0.0][ccl-v2.0.0] | [v2.1.1] | [v0.1.0] | +#### Setup and Run Multinode Container + +Some additional assembly is required to utilize this container with OpenSSH. To perform any kind of DDP (Distributed Data Parallel) execution, containers are assigned the roles of launcher and worker respectively: + +SSH Server (Worker) + +1. *Authorized Keys* : `/etc/ssh/authorized_keys` + +SSH Client (Launcher) + +1. *Config File with Host IPs* : `/root/.ssh/config` +2. *Private User Key* : `/root/.ssh/id_rsa` + +To add these files correctly please follow the steps described below. + +1. Setup ID Keys + + You can use the commands provided below to [generate the Identity keys](https://www.ssh.com/academy/ssh/keygen#creating-an-ssh-key-pair-for-user-authentication) for OpenSSH. + + ```bash + ssh-keygen -q -N "" -t rsa -b 4096 -f ./id_rsa + touch authorized_keys + cat id_rsa.pub >> authorized_keys + ``` + +2. Add hosts to config + + The launcher container needs to have the a config file with all hostnames and ports specified. An example of a hostfile is provided below. + + ```bash + touch config + ``` + + ```txt + Host host1 + HostName + IdentitiesOnly yes + Port + Host host2 + HostName + IdentitiesOnly yes + Port + ... + ``` + +3. Configure the permissions and ownership for all of the files you have created so far. + + ```bash + chmod 600 id_rsa.pub id_rsa config authorized_keys + chown root:root id_rsa.pub id_rsa config authorized_keys + ``` + +4. Now start the workers and execute DDP on the launcher. + + 1. Worker run command: + + ```bash + export SSH_PORT= + docker run -it --rm \ + --net=host \ + -v $PWD/authorized_keys:/root/.ssh/authorized_keys \ + -v $PWD/tests:/workspace/tests \ + -w /workspace \ + -e SSH_PORT=${SSH_PORT} \ + intel/intel-extension-for-pytorch:2.3.0-pip-multinode \ + bash -c '/usr/sbin/sshd -D -p ${SSH_PORT} -f /var/run/sshd_config' + ``` + + 2. Launcher run command: + + ```bash + docker run -it --rm \ + --net=host \ + -v $PWD/id_rsa:/root/.ssh/id_rsa \ + -v $PWD/config:/root/.ssh/config \ + -v $PWD/tests:/workspace/tests \ + -w /workspace \ + -e SSH_PORT=${SSH_PORT} \ + intel/intel-extension-for-pytorch:2.3.0-pip-multinode \ + bash -c 'ipexrun cpu /workspace/tests/ipex-resnet50.py --ipex --device cpu --backend ccl' + ``` + --- The images below are [TorchServe*] with CPU Optimizations: From 63159df7284c2166af918eb90fe4074661f45719 Mon Sep 17 00:00:00 2001 From: tylertitsworth Date: Mon, 17 Jun 2024 16:07:51 -0700 Subject: [PATCH 21/23] add a note to cover our butts --- pytorch/README.md | 3 +++ 1 file changed, 3 insertions(+) diff --git a/pytorch/README.md b/pytorch/README.md index 0feca83fb..3f1354b62 100644 --- a/pytorch/README.md +++ b/pytorch/README.md @@ -195,6 +195,9 @@ To add these files correctly please follow the steps described below. bash -c 'ipexrun cpu /workspace/tests/ipex-resnet50.py --ipex --device cpu --backend ccl' ``` +> [!NOTE] +> [Intel MPI](https://www.intel.com/content/www/us/en/developer/tools/oneapi/mpi-library.html) can be configured based on your machine settings. If the above commands do not work for you, see the documentation for how to configure based on your network. + --- The images below are [TorchServe*] with CPU Optimizations: From 65fe3f10bbced21a839b8739877eff8ed1707fea Mon Sep 17 00:00:00 2001 From: sharvil10 Date: Thu, 27 Jun 2024 14:00:24 -0700 Subject: [PATCH 22/23] Added an entrypint to set startup commands like SSH Host keys generation. --- pytorch/Dockerfile | 28 ++++++++++++++-------------- 1 file changed, 14 insertions(+), 14 deletions(-) diff --git a/pytorch/Dockerfile b/pytorch/Dockerfile index 3231588fe..3b020f489 100644 --- a/pytorch/Dockerfile +++ b/pytorch/Dockerfile @@ -113,11 +113,18 @@ COPY generate_ssh_keys.sh . # modify generate_ssh_keys to be a helper script # print how to use helper script on bash startup # Avoids loop for further execution of the startup file -RUN echo 'unset BASH_ENV' >> ~/.bash_startup && \ - echo "source /usr/local/lib/python${PYTHON_VERSION}/dist-packages/oneccl_bindings_for_pytorch/env/setvars.sh" >> ~/.bash_startup && \ - cat '/generate_ssh_keys.sh' >> ~/.bash_startup && \ - rm -rf /generate_ssh_keys.sh && \ - echo 'source ~/.bash_startup' >> ~/.bashrc +RUN echo "source /usr/local/lib/python${PYTHON_VERSION}/dist-packages/oneccl_bindings_for_pytorch/env/setvars.sh" >> ~/.startup && \ + cat '/generate_ssh_keys.sh' >> ~/.startup && \ + rm -rf /generate_ssh_keys.sh + +RUN echo -e "#!/bin/bash \n\ + set -e \n\ + set -a \n\ + source ~/.startup \n\ + set +a \n\ + eval \"\$@\" \n\ + tail -f /dev/null" >> /usr/local/bin/dockerd-entrypoint.sh && \ + chmod +x /usr/local/bin/dockerd-entrypoint.sh RUN echo 'HostKey /etc/ssh/ssh_host_dsa_key' > /var/run/sshd_config && \ echo 'HostKey /etc/ssh/ssh_host_rsa_key' > /var/run/sshd_config && \ @@ -129,21 +136,14 @@ RUN echo 'HostKey /etc/ssh/ssh_host_dsa_key' > /var/run/sshd_config && \ echo 'UsePAM yes' > /var/run/sshd_config && \ echo 'Subsystem sftp /usr/lib/openssh/sftp-server' > /var/run/sshd_config - -ENV I_MPI_ROOT="${I_MPI_ROOT}:/usr/local/lib/python${PYTHON_VERSION}/dist-packages/oneccl_bindings_for_pytorch" -ENV CCL_ROOT="${CCL_ROOT}:/usr/local/lib/python${PYTHON_VERSION}/dist-packages/oneccl_bindings_for_pytorch" -ENV FI_PROVIDER_PATH="${FI_PROVIDER_PATH}:/usr/local/lib/python${PYTHON_VERSION}/dist-packages/oneccl_bindings_for_pytorch/opt/mpi/libfabric/lib/prov" -ENV LIBRARY_PATH="${LIBRARY_PATH}:/usr/local/lib/python${PYTHON_VERSION}/dist-packages/oneccl_bindings_for_pytorch/lib" -ENV PATH="${PATH}:/usr/local/lib/python${PYTHON_VERSION}/dist-packages/oneccl_bindings_for_pytorch/bin" -ENV CPATH="${CPATH}:/usr/local/lib/python${PYTHON_VERSION}/dist-packages/oneccl_bindings_for_pytorch/include" - RUN mkdir -p /licensing RUN wget -q --no-check-certificate https://raw.githubusercontent.com/oneapi-src/oneCCL/b7d66de16e17f88caffd7c6df4cd5e12b266af84/third-party-programs.txt -O /licensing/oneccl_third_party_programs.txt && \ wget -q --no-check-certificate https://raw.githubusercontent.com/intel/neural-compressor/master/docker/third-party-programs-pytorch.txt -O /licensing/third-party-programs-pytorch.txt && \ wget -q --no-check-certificate https://raw.githubusercontent.com/intel/neural-compressor/master/LICENSE -O /licensing/LICENSE -ENV BASH_ENV='$HOME/.bash_startup' +ENTRYPOINT ["/usr/local/bin/dockerd-entrypoint.sh"] +CMD ["bash"] FROM ${PYTHON_BASE} AS ipex-xpu-base From 37f8cca1db7f000fbbeac1d560cde0d278d418d3 Mon Sep 17 00:00:00 2001 From: sharvil10 Date: Thu, 27 Jun 2024 15:29:42 -0700 Subject: [PATCH 23/23] Extra spaces removed --- pytorch/Dockerfile | 13 +++++++------ 1 file changed, 7 insertions(+), 6 deletions(-) diff --git a/pytorch/Dockerfile b/pytorch/Dockerfile index 3b020f489..c02ca9b26 100644 --- a/pytorch/Dockerfile +++ b/pytorch/Dockerfile @@ -117,13 +117,14 @@ RUN echo "source /usr/local/lib/python${PYTHON_VERSION}/dist-packages/oneccl_bin cat '/generate_ssh_keys.sh' >> ~/.startup && \ rm -rf /generate_ssh_keys.sh +# hadolint global ignore=SC3037 RUN echo -e "#!/bin/bash \n\ - set -e \n\ - set -a \n\ - source ~/.startup \n\ - set +a \n\ - eval \"\$@\" \n\ - tail -f /dev/null" >> /usr/local/bin/dockerd-entrypoint.sh && \ +set -e \n\ +set -a \n\ +source ~/.startup \n\ +set +a \n\ +eval \"\$@\" \n\ +tail -f /dev/null" >> /usr/local/bin/dockerd-entrypoint.sh && \ chmod +x /usr/local/bin/dockerd-entrypoint.sh RUN echo 'HostKey /etc/ssh/ssh_host_dsa_key' > /var/run/sshd_config && \