From 3d40f1bef12cdbf5f8fe861fc7ed2251c32fa0a5 Mon Sep 17 00:00:00 2001 From: sharvil10 Date: Wed, 3 Jul 2024 10:35:51 -0700 Subject: [PATCH 01/11] Dockerfile udated to support a fixed SSH port --- pytorch/Dockerfile | 22 +++++++++++++--------- 1 file changed, 13 insertions(+), 9 deletions(-) diff --git a/pytorch/Dockerfile b/pytorch/Dockerfile index 395916ca2..5e33a955e 100644 --- a/pytorch/Dockerfile +++ b/pytorch/Dockerfile @@ -126,15 +126,19 @@ set +a \n\ eval \"\$@\"" >> /usr/local/bin/dockerd-entrypoint.sh && \ chmod +x /usr/local/bin/dockerd-entrypoint.sh -RUN echo 'HostKey /etc/ssh/ssh_host_dsa_key' > /var/run/sshd_config && \ - echo 'HostKey /etc/ssh/ssh_host_rsa_key' > /var/run/sshd_config && \ - echo 'HostKey /etc/ssh/ssh_host_ecdsa_key' > /var/run/sshd_config && \ - echo 'HostKey /etc/ssh/ssh_host_ed25519_key' > /var/run/sshd_config && \ - echo 'AuthorizedKeysFile /etc/ssh/authorized_keys' > /var/run/sshd_config && \ - echo '## Enable DEBUG log. You can ignore this but this may help you debug any issue while enabling SSHD for the first time' > /var/run/sshd_config && \ - echo 'LogLevel DEBUG3' > /var/run/sshd_config && \ - echo 'UsePAM yes' > /var/run/sshd_config && \ - echo 'Subsystem sftp /usr/lib/openssh/sftp-server' > /var/run/sshd_config +RUN echo 'HostKey /etc/ssh/ssh_host_dsa_key' > /etc/ssh/sshd_config && \ + echo 'HostKey /etc/ssh/ssh_host_rsa_key' >> /etc/ssh/sshd_config && \ + echo 'HostKey /etc/ssh/ssh_host_ecdsa_key' >> /etc/ssh/sshd_config && \ + echo 'HostKey /etc/ssh/ssh_host_ed25519_key' >> /etc/ssh/sshd_config && \ + echo 'AuthorizedKeysFile /etc/ssh/authorized_keys' >> /etc/ssh/sshd_config && \ + echo '## Enable DEBUG log. You can ignore this but this may help you debug any issue while enabling SSHD for the first time' >> /etc/ssh/sshd_config && \ + echo 'LogLevel DEBUG3' >> /etc/ssh/sshd_config && \ + echo 'Port 3022' >> /etc/ssh/sshd_config && \ + echo 'UsePAM yes' >> /etc/ssh/sshd_config && \ + echo 'Subsystem sftp /usr/lib/openssh/sftp-server' >> /etc/ssh/sshd_config && \ + echo 'Host *' > /etc/ssh/ssh_config && \ + echo ' Port 3022' >> /etc/ssh/ssh_config && \ + echo ' IdentityFile ~/.ssh/id_rsa' >> /etc/ssh/ssh_config RUN mkdir -p /licensing From 8da1913c926baa5a410a3b0e37c849a3a4a6f6d5 Mon Sep 17 00:00:00 2001 From: sharvil10 Date: Wed, 3 Jul 2024 13:47:18 -0700 Subject: [PATCH 02/11] README updated --- pytorch/README.md | 102 ++++++++++++++++++++++++++++++---------------- 1 file changed, 67 insertions(+), 35 deletions(-) diff --git a/pytorch/README.md b/pytorch/README.md index 198c8c05a..9aaca3637 100644 --- a/pytorch/README.md +++ b/pytorch/README.md @@ -114,12 +114,8 @@ The images below additionally include [IntelĀ® oneAPI Collective Communications | `2.0.0-pip-multinode` | [v2.0.0] | [v2.0.0+cpu] | [v2.0.0][ccl-v2.0.0] | [v2.1.1] | [v0.1.0] | > **Note:** Passwordless SSH connection is also enabled in the image. -> The container does not contain the SSH ID keys. The user needs to mount those keys at `/root/.ssh/id_rsa` and `/root/.ssh/id_rsa.pub`. -> User also need to append content of id_rsa.pub in `/etc/ssh/authorized_keys` in the SSH server container. -> Since the SSH key is not owned by default user account in docker, please also do "chmod 644 id_rsa.pub; chmod 644 id_rsa" to grant read access for default user account. -> Users could also use "/usr/bin/ssh-keygen -t rsa -b 4096 -N '' -f ~/mnt/ssh_key/id_rsa" to generate a new SSH Key inside the container. -> Users need to mount a config file to list all hostnames at location `/root/.ssh/config` on the SSH client container. -> Once all files are added +> The container does not contain the SSH ID keys. The user needs to mount those keys at `/root/.ssh/id_rsa` and `/etc/ssh/authorized_keys`. +> Since the SSH key is not owned by default user account in docker, please also do "chmod 600 authorized_keys; chmod 600 id_rsa" to grant read access for default user account. #### Setup and Run IPEX Multi-Node Container @@ -131,8 +127,7 @@ SSH Server (Worker) SSH Client (Launcher) -1. *Config File with Host IPs* : `/root/.ssh/config` -2. *Private User Key* : `/root/.ssh/id_rsa` +1. *Private User Key* : `/root/.ssh/id_rsa` To add these files correctly please follow the steps described below. @@ -146,61 +141,98 @@ To add these files correctly please follow the steps described below. cat id_rsa.pub >> authorized_keys ``` -2. Add hosts to config - - The launcher container needs to have the a config file with all hostnames and ports specified. An example of a hostfile is provided below. +2. Configure the permissions and ownership for all of the files you have created so far. ```bash - touch config + chmod 600 id_rsa config authorized_keys + chown root:root id_rsa.pub id_rsa config authorized_keys ``` +3. Setup hostfile. The hostfile is needed for running torch distributed using `ipexrun` utility. If you're not using `ipexrun` you can skip this step. ```txt - Host host1 - HostName - IdentitiesOnly yes - Port - Host host2 - HostName - IdentitiesOnly yes - Port + + ... ``` - -3. Configure the permissions and ownership for all of the files you have created so far. - - ```bash - chmod 600 id_rsa.pub id_rsa config authorized_keys - chown root:root id_rsa.pub id_rsa config authorized_keys - ``` - 4. Now start the workers and execute DDP on the launcher. 1. Worker run command: ```bash - export SSH_PORT= docker run -it --rm \ --net=host \ - -v $PWD/authorized_keys:/root/.ssh/authorized_keys \ + -v $PWD/authorized_keys:/etc/ssh/authorized_keys \ -v $PWD/tests:/workspace/tests \ -w /workspace \ - -e SSH_PORT=${SSH_PORT} \ intel/intel-extension-for-pytorch:2.3.0-pip-multinode \ - bash -c '/usr/sbin/sshd -D -p ${SSH_PORT} -f /var/run/sshd_config' + bash -c '/usr/sbin/sshd -D' ``` - 2. Launcher run command: + 3. Launcher run command: ```bash docker run -it --rm \ --net=host \ -v $PWD/id_rsa:/root/.ssh/id_rsa \ - -v $PWD/config:/root/.ssh/config \ -v $PWD/tests:/workspace/tests \ + -v $PWD/hostfile:/workspace/hostfile \ + -w /workspace \ + intel/intel-extension-for-pytorch:2.3.0-pip-multinode \ + bash -c 'ipexrun cpu --nnodes 2 --nprocs-per-node 1 --master-addr 127.0.0.1 --master-port 3022 /workspace/tests/ipex-resnet50.py --ipex --device cpu --backend ccl' + ``` + +4. Start SSH server with a custom port. + If the user wants to define their own port to start the SSH server, it can be done so using the commands described below. + + 1. Worker command: + + ```bash + export SSH_PORT= + docker run -it --rm \ + --net=host \ + -v $PWD/authorized_keys:/etc/ssh/authorized_keys \ + -v $PWD/tests:/workspace/tests \ + -e SSH_PORT=${SSH_PORT} \ -w /workspace \ + intel/intel-extension-for-pytorch:2.3.0-pip-multinode \ + bash -c '/usr/sbin/sshd -D -p ${SSH_PORT}' + ``` + + 2. Add hosts to config. (**Note:** This is an optional step) + + User can optionally mount their own custom client config file to define a list of hosts and ports where the SSH server is running inside the container. An example of a hostfile is provided below. This file is supposed to be mounted in the launcher container at `/etc/ssh/ssh_config`. + + ```bash + touch config + ``` + + ```txt + Host host1 + HostName + IdentitiesOnly yes + IdentityFile ~/.root/id_rsa + Port + Host host2 + HostName + IdentitiesOnly yes + IdentityFile ~/.root/id_rsa + Port + ... + ``` + + 3. Launcher run command: + + ```bash + docker run -it --rm \ + --net=host \ + -v $PWD/id_rsa:/root/.ssh/id_rsa \ + -v $PWD/config:/etc/ssh/ssh_config \ + -v $PWD/hostfile:/workspace/hostfile \ + -v $PWD/tests:/workspace/tests \ -e SSH_PORT=${SSH_PORT} \ + -w /workspace \ intel/intel-extension-for-pytorch:2.3.0-pip-multinode \ - bash -c 'ipexrun cpu /workspace/tests/ipex-resnet50.py --ipex --device cpu --backend ccl' + bash -c 'ipexrun cpu --nnodes 2 --nprocs-per-node 1 --master-addr 127.0.0.1 --master-port ${SSH_PORT} /workspace/tests/ipex-resnet50.py --ipex --device cpu --backend ccl' ``` > [!NOTE] From 3ac24f98081100c007ec684b7fa327e6d5093aab Mon Sep 17 00:00:00 2001 From: sharvil10 Date: Wed, 3 Jul 2024 13:47:36 -0700 Subject: [PATCH 03/11] eval removed from entrypoint --- pytorch/Dockerfile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pytorch/Dockerfile b/pytorch/Dockerfile index 5e33a955e..0ce48a72c 100644 --- a/pytorch/Dockerfile +++ b/pytorch/Dockerfile @@ -123,7 +123,7 @@ set -e \n\ set -a \n\ source ~/.startup \n\ set +a \n\ -eval \"\$@\"" >> /usr/local/bin/dockerd-entrypoint.sh && \ +\"\$@\"" >> /usr/local/bin/dockerd-entrypoint.sh && \ chmod +x /usr/local/bin/dockerd-entrypoint.sh RUN echo 'HostKey /etc/ssh/ssh_host_dsa_key' > /etc/ssh/sshd_config && \ From 242ed98839cf9dfbc50f4c78f14c3a295037ba39 Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Wed, 3 Jul 2024 20:49:22 +0000 Subject: [PATCH 04/11] [pre-commit.ci] auto fixes from pre-commit.com hooks --- pytorch/README.md | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/pytorch/README.md b/pytorch/README.md index 9aaca3637..48191e218 100644 --- a/pytorch/README.md +++ b/pytorch/README.md @@ -198,14 +198,14 @@ To add these files correctly please follow the steps described below. bash -c '/usr/sbin/sshd -D -p ${SSH_PORT}' ``` - 2. Add hosts to config. (**Note:** This is an optional step) + 2. Add hosts to config. (**Note:** This is an optional step) User can optionally mount their own custom client config file to define a list of hosts and ports where the SSH server is running inside the container. An example of a hostfile is provided below. This file is supposed to be mounted in the launcher container at `/etc/ssh/ssh_config`. ```bash touch config ``` - + ```txt Host host1 HostName From 7c35a282cbfad4d7ca3e77337b33b3b7814bb446 Mon Sep 17 00:00:00 2001 From: sharvil10 Date: Wed, 3 Jul 2024 15:28:46 -0700 Subject: [PATCH 05/11] converted echo commands to files --- pytorch/Dockerfile | 25 +++---------------------- pytorch/dockerd-entrypoint.sh | 6 ++++++ pytorch/ssh_config | 3 +++ pytorch/sshd_config | 10 ++++++++++ 4 files changed, 22 insertions(+), 22 deletions(-) create mode 100755 pytorch/dockerd-entrypoint.sh create mode 100644 pytorch/ssh_config create mode 100644 pytorch/sshd_config diff --git a/pytorch/Dockerfile b/pytorch/Dockerfile index 0ce48a72c..becf09d68 100644 --- a/pytorch/Dockerfile +++ b/pytorch/Dockerfile @@ -117,28 +117,9 @@ RUN echo "source /usr/local/lib/python${PYTHON_VERSION}/dist-packages/oneccl_bin cat '/generate_ssh_keys.sh' >> ~/.startup && \ rm -rf /generate_ssh_keys.sh -# hadolint global ignore=SC3037 -RUN echo -e "#!/bin/bash \n\ -set -e \n\ -set -a \n\ -source ~/.startup \n\ -set +a \n\ -\"\$@\"" >> /usr/local/bin/dockerd-entrypoint.sh && \ - chmod +x /usr/local/bin/dockerd-entrypoint.sh - -RUN echo 'HostKey /etc/ssh/ssh_host_dsa_key' > /etc/ssh/sshd_config && \ - echo 'HostKey /etc/ssh/ssh_host_rsa_key' >> /etc/ssh/sshd_config && \ - echo 'HostKey /etc/ssh/ssh_host_ecdsa_key' >> /etc/ssh/sshd_config && \ - echo 'HostKey /etc/ssh/ssh_host_ed25519_key' >> /etc/ssh/sshd_config && \ - echo 'AuthorizedKeysFile /etc/ssh/authorized_keys' >> /etc/ssh/sshd_config && \ - echo '## Enable DEBUG log. You can ignore this but this may help you debug any issue while enabling SSHD for the first time' >> /etc/ssh/sshd_config && \ - echo 'LogLevel DEBUG3' >> /etc/ssh/sshd_config && \ - echo 'Port 3022' >> /etc/ssh/sshd_config && \ - echo 'UsePAM yes' >> /etc/ssh/sshd_config && \ - echo 'Subsystem sftp /usr/lib/openssh/sftp-server' >> /etc/ssh/sshd_config && \ - echo 'Host *' > /etc/ssh/ssh_config && \ - echo ' Port 3022' >> /etc/ssh/ssh_config && \ - echo ' IdentityFile ~/.ssh/id_rsa' >> /etc/ssh/ssh_config +COPY dockerd-entrypoint.sh /usr/local/bin/dockerd-entrypoint.sh +COPY sshd_config /etc/ssh/sshd_config +COPY ssh_config /etc/ssh/ssh_config RUN mkdir -p /licensing diff --git a/pytorch/dockerd-entrypoint.sh b/pytorch/dockerd-entrypoint.sh new file mode 100755 index 000000000..241b6d1ab --- /dev/null +++ b/pytorch/dockerd-entrypoint.sh @@ -0,0 +1,6 @@ +#!/bin/bash +set -e +set -a +source ~/.startup +set +a +"$@" diff --git a/pytorch/ssh_config b/pytorch/ssh_config new file mode 100644 index 000000000..3d823709e --- /dev/null +++ b/pytorch/ssh_config @@ -0,0 +1,3 @@ +Host * + Port 3022 + IdentityFile ~/.ssh/id_rsa diff --git a/pytorch/sshd_config b/pytorch/sshd_config new file mode 100644 index 000000000..119073cdd --- /dev/null +++ b/pytorch/sshd_config @@ -0,0 +1,10 @@ +HostKey /etc/ssh/ssh_host_dsa_key +HostKey /etc/ssh/ssh_host_rsa_key +HostKey /etc/ssh/ssh_host_ecdsa_key +HostKey /etc/ssh/ssh_host_ed25519_key +AuthorizedKeysFile /etc/ssh/authorized_keys +## Enable DEBUG log. You can ignore this but this may help you debug any issue while enabling SSHD for the first time +LogLevel DEBUG3 +Port 3022 +UsePAM yes +Subsystem sftp /usr/lib/openssh/sftp-server From 191f8919567107c5b4880cafa034265ba79c54c0 Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Wed, 3 Jul 2024 22:29:27 +0000 Subject: [PATCH 06/11] [pre-commit.ci] auto fixes from pre-commit.com hooks --- pytorch/dockerd-entrypoint.sh | 14 ++++++++++++++ 1 file changed, 14 insertions(+) diff --git a/pytorch/dockerd-entrypoint.sh b/pytorch/dockerd-entrypoint.sh index 241b6d1ab..6b7de790f 100755 --- a/pytorch/dockerd-entrypoint.sh +++ b/pytorch/dockerd-entrypoint.sh @@ -1,4 +1,18 @@ #!/bin/bash +# Copyright (c) 2024 Intel Corporation +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + set -e set -a source ~/.startup From 83774ad37860cc1554eb332d56206fb8d90a837b Mon Sep 17 00:00:00 2001 From: tylertitsworth Date: Wed, 3 Jul 2024 15:53:25 -0700 Subject: [PATCH 07/11] fix lint issues Signed-off-by: tylertitsworth --- pytorch/Dockerfile | 8 ++++---- pytorch/README.md | 6 ++++-- pytorch/{ => multinode}/dockerd-entrypoint.sh | 3 ++- pytorch/{ => multinode}/generate_ssh_keys.sh | 0 pytorch/{ => multinode}/ssh_config | 0 pytorch/{ => multinode}/sshd_config | 0 6 files changed, 10 insertions(+), 7 deletions(-) rename pytorch/{ => multinode}/dockerd-entrypoint.sh (92%) rename pytorch/{ => multinode}/generate_ssh_keys.sh (100%) rename pytorch/{ => multinode}/ssh_config (100%) rename pytorch/{ => multinode}/sshd_config (100%) diff --git a/pytorch/Dockerfile b/pytorch/Dockerfile index becf09d68..46bee0192 100644 --- a/pytorch/Dockerfile +++ b/pytorch/Dockerfile @@ -108,7 +108,7 @@ RUN mkdir -p /var/run/sshd && \ ARG PYTHON_VERSION -COPY generate_ssh_keys.sh . +COPY multinode/generate_ssh_keys.sh . # modify generate_ssh_keys to be a helper script # print how to use helper script on bash startup @@ -117,9 +117,9 @@ RUN echo "source /usr/local/lib/python${PYTHON_VERSION}/dist-packages/oneccl_bin cat '/generate_ssh_keys.sh' >> ~/.startup && \ rm -rf /generate_ssh_keys.sh -COPY dockerd-entrypoint.sh /usr/local/bin/dockerd-entrypoint.sh -COPY sshd_config /etc/ssh/sshd_config -COPY ssh_config /etc/ssh/ssh_config +COPY multinode/dockerd-entrypoint.sh /usr/local/bin/dockerd-entrypoint.sh +COPY multinode/sshd_config /etc/ssh/sshd_config +COPY multinode/ssh_config /etc/ssh/ssh_config RUN mkdir -p /licensing diff --git a/pytorch/README.md b/pytorch/README.md index 48191e218..b1778cf56 100644 --- a/pytorch/README.md +++ b/pytorch/README.md @@ -147,6 +147,7 @@ To add these files correctly please follow the steps described below. chmod 600 id_rsa config authorized_keys chown root:root id_rsa.pub id_rsa config authorized_keys ``` + 3. Setup hostfile. The hostfile is needed for running torch distributed using `ipexrun` utility. If you're not using `ipexrun` you can skip this step. ```txt @@ -154,6 +155,7 @@ To add these files correctly please follow the steps described below. ... ``` + 4. Now start the workers and execute DDP on the launcher. 1. Worker run command: @@ -168,7 +170,7 @@ To add these files correctly please follow the steps described below. bash -c '/usr/sbin/sshd -D' ``` - 3. Launcher run command: + 2. Launcher run command: ```bash docker run -it --rm \ @@ -181,7 +183,7 @@ To add these files correctly please follow the steps described below. bash -c 'ipexrun cpu --nnodes 2 --nprocs-per-node 1 --master-addr 127.0.0.1 --master-port 3022 /workspace/tests/ipex-resnet50.py --ipex --device cpu --backend ccl' ``` -4. Start SSH server with a custom port. +5. Start SSH server with a custom port. If the user wants to define their own port to start the SSH server, it can be done so using the commands described below. 1. Worker command: diff --git a/pytorch/dockerd-entrypoint.sh b/pytorch/multinode/dockerd-entrypoint.sh similarity index 92% rename from pytorch/dockerd-entrypoint.sh rename to pytorch/multinode/dockerd-entrypoint.sh index 6b7de790f..ba13c0f94 100755 --- a/pytorch/dockerd-entrypoint.sh +++ b/pytorch/multinode/dockerd-entrypoint.sh @@ -15,6 +15,7 @@ set -e set -a -source ~/.startup +# shellcheck disable=SC1091 +source "$HOME/.startup" set +a "$@" diff --git a/pytorch/generate_ssh_keys.sh b/pytorch/multinode/generate_ssh_keys.sh similarity index 100% rename from pytorch/generate_ssh_keys.sh rename to pytorch/multinode/generate_ssh_keys.sh diff --git a/pytorch/ssh_config b/pytorch/multinode/ssh_config similarity index 100% rename from pytorch/ssh_config rename to pytorch/multinode/ssh_config diff --git a/pytorch/sshd_config b/pytorch/multinode/sshd_config similarity index 100% rename from pytorch/sshd_config rename to pytorch/multinode/sshd_config From 25cb55c249568658a91cb7f95f766082a065e936 Mon Sep 17 00:00:00 2001 From: tylertitsworth Date: Wed, 3 Jul 2024 15:55:12 -0700 Subject: [PATCH 08/11] fix path Signed-off-by: tylertitsworth --- pytorch/Dockerfile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pytorch/Dockerfile b/pytorch/Dockerfile index 46bee0192..54d7d40c7 100644 --- a/pytorch/Dockerfile +++ b/pytorch/Dockerfile @@ -108,7 +108,7 @@ RUN mkdir -p /var/run/sshd && \ ARG PYTHON_VERSION -COPY multinode/generate_ssh_keys.sh . +COPY multinode/generate_ssh_keys.sh /generate_ssh_keys.sh # modify generate_ssh_keys to be a helper script # print how to use helper script on bash startup From 26cc88a429237bc0984062aa72dea76dab5a72b5 Mon Sep 17 00:00:00 2001 From: tylertitsworth Date: Wed, 3 Jul 2024 16:00:55 -0700 Subject: [PATCH 09/11] update requirements.txt Signed-off-by: tylertitsworth --- pytorch/Dockerfile | 5 +++-- pytorch/docker-compose.yaml | 2 +- .../requirements.txt} | 0 3 files changed, 4 insertions(+), 3 deletions(-) rename pytorch/{multinode-requirements.txt => multinode/requirements.txt} (100%) diff --git a/pytorch/Dockerfile b/pytorch/Dockerfile index 54d7d40c7..4b7843641 100644 --- a/pytorch/Dockerfile +++ b/pytorch/Dockerfile @@ -85,9 +85,10 @@ RUN apt-get update -y && apt-get install -y --no-install-recommends --fix-missin ENV SIGOPT_PROJECT=. WORKDIR / -COPY multinode-requirements.txt . +COPY multinode/requirements.txt requirements.txt -RUN python -m pip install --no-cache-dir -r multinode-requirements.txt +RUN python -m pip install --no-cache-dir -r requirements.txt && \ + rm -rf requirements.txt ENV LD_LIBRARY_PATH="/lib/x86_64-linux-gnu:${LD_LIBRARY_PATH}:/usr/local/lib/python${PYTHON_VERSION}/dist-packages/oneccl_bindings_for_pytorch/opt/mpi/libfabric/lib:/usr/local/lib/python${PYTHON_VERSION}/dist-packages/oneccl_bindings_for_pytorch/lib" diff --git a/pytorch/docker-compose.yaml b/pytorch/docker-compose.yaml index f18afdf8d..b42f6d84a 100644 --- a/pytorch/docker-compose.yaml +++ b/pytorch/docker-compose.yaml @@ -77,7 +77,7 @@ services: dependency.apt.libglib2: true dependency.apt.python3-dev: true dependency.pip.apt.virtualenv: true - dependency.python.pip: multinode-requirements.txt + dependency.python.pip: multinode/requirements.txt org.opencontainers.base.name: "intel/intel-optimized-pytorch:${IPEX_VERSION:-2.2.0}-${PACKAGE_OPTION:-pip}-base" org.opencontainers.image.title: "IntelĀ® Extension for PyTorch MultiNode Image" org.opencontainers.image.version: ${IPEX_VERSION:-2.2.0}-${PACKAGE_OPTION:-pip}-multinode diff --git a/pytorch/multinode-requirements.txt b/pytorch/multinode/requirements.txt similarity index 100% rename from pytorch/multinode-requirements.txt rename to pytorch/multinode/requirements.txt From 00feb7f8a2248ed332638e04cd765a5330c917aa Mon Sep 17 00:00:00 2001 From: tylertitsworth Date: Wed, 3 Jul 2024 16:05:40 -0700 Subject: [PATCH 10/11] cleanup unecessary ssh configuration Signed-off-by: tylertitsworth --- pytorch/Dockerfile | 7 +------ pytorch/multinode/ssh_config | 1 + 2 files changed, 2 insertions(+), 6 deletions(-) diff --git a/pytorch/Dockerfile b/pytorch/Dockerfile index 4b7843641..64d12c611 100644 --- a/pytorch/Dockerfile +++ b/pytorch/Dockerfile @@ -100,12 +100,7 @@ RUN apt-get install -y --no-install-recommends --fix-missing \ apt-get clean && \ rm -rf /var/lib/apt/lists/* -# Allow OpenSSH to talk to containers without asking for confirmation -# hadolint global ignore=SC2002 -RUN mkdir -p /var/run/sshd && \ - cat /etc/ssh/ssh_config | grep -v StrictHostKeyChecking > /etc/ssh/ssh_config.new && \ - echo " StrictHostKeyChecking no" >> /etc/ssh/ssh_config.new && \ - mv /etc/ssh/ssh_config.new /etc/ssh/ssh_config +RUN mkdir -p /var/run/sshd ARG PYTHON_VERSION diff --git a/pytorch/multinode/ssh_config b/pytorch/multinode/ssh_config index 3d823709e..9ac730173 100644 --- a/pytorch/multinode/ssh_config +++ b/pytorch/multinode/ssh_config @@ -1,3 +1,4 @@ Host * Port 3022 IdentityFile ~/.ssh/id_rsa + StrictHostKeyChecking no From 4a9f47bcec35806bb02cc20786688242481ca0ee Mon Sep 17 00:00:00 2001 From: Tyler Titsworth Date: Mon, 8 Jul 2024 11:56:03 -0700 Subject: [PATCH 11/11] Update sshd_config Signed-off-by: Tyler Titsworth --- pytorch/multinode/sshd_config | 2 ++ 1 file changed, 2 insertions(+) diff --git a/pytorch/multinode/sshd_config b/pytorch/multinode/sshd_config index 119073cdd..4796a48af 100644 --- a/pytorch/multinode/sshd_config +++ b/pytorch/multinode/sshd_config @@ -8,3 +8,5 @@ LogLevel DEBUG3 Port 3022 UsePAM yes Subsystem sftp /usr/lib/openssh/sftp-server +# https://ubuntu.com/security/CVE-2024-6387 +LoginGraceTime 0