From fa8bd928a3daa61839cbaeabfcd9d784b08db832 Mon Sep 17 00:00:00 2001 From: ma-pineda Date: Tue, 18 Jun 2024 22:03:14 -0700 Subject: [PATCH 1/7] Pinning all pip and conda dependencies Signed-off-by: ma-pineda --- preset/classical-ml/Dockerfile | 28 +++--- preset/data-analytics/Dockerfile | 30 +++--- preset/deep-learning/Dockerfile | 122 +++++++++++------------ preset/inference-optimization/Dockerfile | 8 +- 4 files changed, 94 insertions(+), 94 deletions(-) diff --git a/preset/classical-ml/Dockerfile b/preset/classical-ml/Dockerfile index 4f13c34f2..c17ca017c 100644 --- a/preset/classical-ml/Dockerfile +++ b/preset/classical-ml/Dockerfile @@ -67,10 +67,10 @@ RUN wget --progress=dot:giga --no-check-certificate https://github.com/conda-for conda config --add channels intel && \ conda init --all && \ conda install -y \ - jupyterlab \ - notebook \ - jupyterhub \ - jupyter-server-proxy \ + 'jupyterlab>=4.1.8' \ + 'notebook>=7.1.3' \ + 'jupyterhub>=4.1.5' \ + 'jupyter-server-proxy>=4.1.2' \ 'mako>=1.2.2' \ 'pyjwt>=2.4.0' \ 'cryptography>=42.0.5' \ @@ -99,15 +99,15 @@ RUN conda create -yn classical-ml -c ${INTEL_CHANNEL} -c conda-forge \ scikit-learn-intelex==${SKLEARNEX_VERSION} \ xgboost=${XGBOOST_VERSION} \ modin-ray=${MODIN_VERSION} \ - python-dotenv \ - tqdm \ - matplotlib-base \ - dataset_librarian \ - threadpoolctl \ - ipython \ - ipykernel \ - kernda \ - protobuf=4.24 \ + 'python-dotenv>=1.0.1' \ + 'tqdm>=4.66.2' \ + 'matplotlib-base>=3.4.3' \ + 'dataset_librarian>=1.0.4' \ + 'threadpoolctl>=3.3.0' \ + 'ipython>=8.18.1' \ + 'ipykernel>=6.29.3' \ + 'kernda>=0.3.0' \ + 'protobuf>=4.24' \ 'pillow>=10.2.0' \ 'tornado>=6.3.3' && \ conda clean -y --all @@ -116,7 +116,7 @@ RUN conda create -yn classical-ml -c ${INTEL_CHANNEL} -c conda-forge \ # PyPI packages RUN conda run -n classical-ml python -m pip install --no-deps --no-cache-dir \ - cloud-data-connector + 'cloud-data-connector=1.0.3' ENV PYTHONSTARTUP=~/.patch_sklearn.py diff --git a/preset/data-analytics/Dockerfile b/preset/data-analytics/Dockerfile index 7fdb37495..47059d623 100644 --- a/preset/data-analytics/Dockerfile +++ b/preset/data-analytics/Dockerfile @@ -68,16 +68,16 @@ RUN wget --progress=dot:giga --no-check-certificate "https://github.com/conda-fo conda config --add channels intel && \ conda init --all && \ conda install -y \ - jupyterlab \ - notebook \ - jupyterhub \ - jupyter-server-proxy \ + 'jupyterlab>=4.1.8' \ + 'notebook>=7.1.3' \ + 'jupyterhub>=4.1.5' \ + 'jupyter-server-proxy>=4.1.2' \ 'mako>=1.2.2' \ 'pyjwt>=2.4.0' \ 'cryptography>=42.0.5' \ + 'nodejs>=20.12.2' \ 'idna>=3.7' \ 'tqdm>=4.66.2' \ - 'nodejs>=20.12.2' \ && \ jupyter labextension disable "@jupyterlab/apputils-extension:announcements" && \ conda clean -y --all @@ -95,22 +95,22 @@ RUN conda create -yn data-analytics -c "${INTEL_CHANNEL}" -c conda-forge \ numpy="${NUMPY_VERSION}" \ python="${PYTHON_VERSION}" \ modin-ray="${MODIN_VERSION}" \ - python-dotenv \ - tqdm \ - matplotlib-base \ - dataset_librarian \ - threadpoolctl \ - ipython \ - ipykernel \ - kernda \ - protobuf=4.24 \ + 'python-dotenv>=1.0.1' \ + 'tqdm>=4.66.2' \ + 'matplotlib-base>=3.4.3' \ + 'dataset_librarian>=1.0.4' \ + 'threadpoolctl>=3.3.0' \ + 'ipython>=8.18.1' \ + 'ipykernel>=6.29.3' \ + 'kernda>=0.3.0' \ + 'protobuf>=4.24.4' \ 'pillow>=10.2.0' \ 'idna>=3.7' \ 'tornado>=6.3.3' && \ conda clean -y --all RUN conda run -n data-analytics python -m pip install --no-deps --no-cache-dir \ - cloud-data-connector + 'cloud-data-connector=1.0.3' FROM data-analytics-python as data-analytics-jupyter diff --git a/preset/deep-learning/Dockerfile b/preset/deep-learning/Dockerfile index ef62392bf..e0e749a7d 100644 --- a/preset/deep-learning/Dockerfile +++ b/preset/deep-learning/Dockerfile @@ -118,10 +118,10 @@ RUN wget --progress=dot:giga --no-check-certificate "https://github.com/conda-fo conda config --add channels intel && \ conda init --all && \ conda install -c conda-forge \ - jupyterlab \ - notebook \ - jupyterhub \ - jupyter-server-proxy \ + 'jupyterlab>=4.1.8' \ + 'notebook>=7.1.3' \ + 'jupyterhub>=4.1.5' \ + 'jupyter-server-proxy>=4.1.2' \ 'mako>=1.2.2' \ 'pyjwt>=2.4.0' \ 'cryptography>=42.0.5' \ @@ -160,36 +160,36 @@ RUN conda create -yn pytorch-cpu -c intel/label/oneapi -c "${INTEL_CHANNEL}" -c intel-extension-for-pytorch="${IPEX_CPU_VERSION}" \ torchvision="${TORCHVISION_CPU_VERSION}" \ torchaudio="${TORCHAUDIO_CPU_VERSION}" \ - matplotlib-base \ - ipykernel \ - kernda \ + 'matplotlib-base>=3.4.3' \ + 'ipykernel>=6.29.3' \ + 'kernda>=0.3.0' \ 'pillow>=10.2.0' \ 'aiohttp>=3.9.0' \ 'tornado>=6.3.3' \ 'jinja2>=3.1.3' \ 'idna>=3.7' \ - onnx \ + 'onnx>=1.15.0' \ && \ conda clean -y --all # PyPI packages RUN conda run -n pytorch-cpu pip install --no-deps --no-cache-dir --ignore-installed \ - ninja \ - python-dotenv \ + 'ninja>=1.11.1.1' \ + 'python-dotenv>=1.0.1' \ 'tqdm>=4.66.2' \ - cloud-data-connector \ - dataset-librarian && \ + 'cloud-data-connector>=1.0.3' \ + 'dataset-librarian>=1.0.4' && \ conda run -n pytorch-cpu pip install --no-cache-dir --ignore-installed \ - transformers \ - datasets \ - evaluate && \ - conda run -n pytorch-cpu pip install --no-cache-dir -U accelerate && \ + 'transformers>=4.40.2' \ + 'datasets>=2.19.1' \ + 'evaluate>=0.4.2' && \ + conda run -n pytorch-cpu pip install --no-cache-dir -U 'accelerate>=0.30.0' && \ conda run -n pytorch-cpu pip install --no-cache-dir "git+https://github.com/huggingface/optimum-intel.git" && \ conda clean -y --all -RUN conda run -n pytorch-cpu conda install protobuf=4.24 -c conda-forge --override --force-reinstall -y +RUN conda run -n pytorch-cpu conda install 'protobuf=4.24' -c conda-forge --override --force-reinstall -y # PyTorch Installation ARG IDP_VERSION @@ -219,41 +219,41 @@ RUN conda create -yn pytorch-gpu -c intel/label/oneapi -c "${INTEL_CHANNEL}" -c intel-extension-for-pytorch="${IPEX_GPU_VERSION}" \ torchvision="${TORCHVISION_GPU_VERSION}" \ torchaudio="${TORCHAUDIO_GPU_VERSION}" \ - tensorboardx \ - matplotlib-base \ - pandas \ - ipython \ - ipykernel \ - kernda \ + 'tensorboardx>=2.6.2.2' \ + 'matplotlib-base>=3.4.3' \ + 'pandas>=2.2.2' \ + 'ipython>=8.18.1' \ + 'ipykernel>=6.29.3' \ + 'kernda>=0.3.0' \ 'pillow>=10.2.0' \ 'aiohttp>=3.9.0' \ 'tornado>=6.3.3' \ 'jinja2>=3.1.3' \ 'idna>=3.7' \ - onnx \ - packaging=23.2 \ - setuptools=69.1.0 \ + 'onnx>=1.15.0' \ + 'packaging=23.2' \ + 'setuptools=69.1.0' \ && \ conda clean -y --all # PyPI packages RUN conda run -n pytorch-gpu pip install --no-deps --no-cache-dir --ignore-installed \ - ninja \ - python-dotenv \ + 'ninja>=1.11.1.1' \ + 'python-dotenv>=1.0.1' \ 'tqdm>=4.66.2' \ - cloud-data-connector \ - dataset-librarian && \ + 'cloud-data-connector>=1.0.3' \ + 'dataset-librarian>=1.0.4' && \ conda run -n pytorch-gpu pip install --no-cache-dir --ignore-installed \ - transformers \ - datasets \ - evaluate && \ - conda run -n pytorch-gpu pip install --no-cache-dir -U accelerate && \ + 'transformers>=4.40.2' \ + 'datasets>=2.19.1' \ + 'evaluate>=0.4.2' && \ + conda run -n pytorch-gpu pip install --no-cache-dir -U 'accelerate>=0.30.0' && \ conda run -n pytorch-gpu pip install --no-cache-dir "git+https://github.com/huggingface/optimum-intel.git" && \ conda clean -y --all -RUN conda run -n pytorch-gpu conda install protobuf=4.24 -c conda-forge --override --force-reinstall -y +RUN conda run -n pytorch-gpu conda install 'protobuf=4.24' -c conda-forge --override --force-reinstall -y # TensorFlow Installation @@ -283,11 +283,11 @@ RUN conda create -yn tensorflow-cpu -c "${INTEL_CHANNEL}" -c conda-forge \ intel-optimization-for-horovod="${INTEL_HOROVOD}" \ tensorflow="${TF_VERSION}" \ impi-devel="${IMPI_VERSION}" \ - matplotlib-base \ - dataset_librarian \ - ipython \ - ipykernel \ - kernda \ + 'matplotlib-base>=3.4.3' \ + 'dataset_librarian>=1.0.4' \ + 'ipython>=8.18.1' \ + 'ipykernel>=6.29.3' \ + 'kernda>=0.3.0' \ 'pillow>=10.2.0' \ 'cryptography>=42.0.4' \ 'werkzeug>=2.2.3' \ @@ -296,19 +296,19 @@ RUN conda create -yn tensorflow-cpu -c "${INTEL_CHANNEL}" -c conda-forge \ 'pyjwt>=2.8.0' \ 'oauthlib>=3.2.2' \ 'idna>=3.7' \ - onnx \ + 'onnx>=1.14.1' \ && \ conda clean -y --all # PyPI packages RUN conda run -n tensorflow-cpu pip install --no-cache-dir --ignore-installed \ - py-cpuinfo \ - requests \ - cryptography + 'py-cpuinfo>=9.0.0' \ + 'requests>=2.31.0' \ + 'cryptography>=42.0.7' RUN conda run -n tensorflow-cpu pip install --no-deps --no-cache-dir --ignore-installed \ - tensorflow_hub \ + 'tensorflow-hub>=0.16.1' \ 'tqdm>=4.66.2' \ - cloud-data-connector && \ + 'cloud-data-connector>=1.0.3' && \ conda clean -y --all # Tensorflow Env - conda packages @@ -322,11 +322,11 @@ RUN conda create -yn tensorflow-gpu -c "${INTEL_CHANNEL}" -c conda-forge \ intel-optimization-for-horovod="${INTEL_HOROVOD}" \ tensorflow="${TF_VERSION}" \ impi-devel="${IMPI_VERSION}" \ - matplotlib-base \ - dataset_librarian \ - ipython \ - ipykernel \ - kernda \ + 'matplotlib-base>=3.4.3' \ + 'dataset_librarian>=1.0.4' \ + 'ipython>=8.18.1' \ + 'ipykernel>=6.29.3' \ + 'kernda>=0.3.0' \ 'pillow>=10.2.0' \ 'cryptography>=42.0.4' \ 'werkzeug>=2.2.3' \ @@ -335,21 +335,21 @@ RUN conda create -yn tensorflow-gpu -c "${INTEL_CHANNEL}" -c conda-forge \ 'pyjwt>=2.8.0' \ 'oauthlib>=3.2.2' \ 'idna>=3.7' \ - onnx \ - packaging=23.2 \ - setuptools=69.1.0 \ + 'onnx>=1.14.1' \ + 'packaging=23.2' \ + 'setuptools=69.1.0' \ && \ conda clean -y --all # PyPI packages RUN conda run -n tensorflow-gpu pip install --no-cache-dir --ignore-installed \ - py-cpuinfo \ - requests \ - cryptography + 'py-cpuinfo>=9.0.0' \ + 'requests>=2.31.0' \ + 'cryptography>=42.0.7' RUN conda run -n tensorflow-gpu pip install --no-deps --no-cache-dir --ignore-installed \ - tensorflow_hub \ + 'tensorflow-hub>=0.16.1' \ 'tqdm>=4.66.2' \ - cloud-data-connector && \ + 'cloud-data-connector>=1.0.3' && \ conda clean -y --all FROM deep-learning-python as deep-learning-jupyter @@ -408,11 +408,11 @@ USER dev RUN conda install -n pytorch-cpu -c intel/label/oneapi -c "${INTEL_CHANNEL}" -c conda-forge \ deepspeed="${DEEPSPEED_VERSION}" \ - tensorboardx + 'tensorboardx>=2.6.2.2' RUN conda install -n pytorch-gpu -c intel/label/oneapi -c "${INTEL_CHANNEL}" -c conda-forge \ deepspeed="${DEEPSPEED_VERSION}" \ - tensorboardx + 'tensorboardx>=2.6.2.2' COPY --chown=dev notebooks /home/dev/jupyter COPY --chown=dev tests /home/dev/sample-tests diff --git a/preset/inference-optimization/Dockerfile b/preset/inference-optimization/Dockerfile index 0d982ab2b..6689b4370 100644 --- a/preset/inference-optimization/Dockerfile +++ b/preset/inference-optimization/Dockerfile @@ -34,13 +34,13 @@ RUN conda install -yn tensorflow-gpu -c "${INTEL_CHANNEL}" -c conda-forge \ neural-compressor="${NEURAL_COMPRESSOR_VERSION}" RUN conda run -n tensorflow-cpu python -m pip install --no-deps --no-cache-dir \ - tf2onnx \ - onnxruntime && \ + 'tf2onnx>=1.16.1' \ + 'onnxruntime>=1.17.3' && \ conda clean -y --all RUN conda run -n tensorflow-gpu python -m pip install --no-deps --no-cache-dir \ - tf2onnx \ - onnxruntime && \ + 'tf2onnx>=1.16.1' \ + 'onnxruntime>=1.17.3' && \ conda clean -y --all COPY --chown=dev notebooks /home/dev/jupyter From b305ee73ff74ae2190f9f7b4839d17f13fa7d20c Mon Sep 17 00:00:00 2001 From: ma-pineda Date: Wed, 19 Jun 2024 15:16:02 -0700 Subject: [PATCH 2/7] Activating presets Signed-off-by: ma-pineda --- docs/scripts/hook.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/docs/scripts/hook.py b/docs/scripts/hook.py index fffa2b00e..3b862bdfd 100644 --- a/docs/scripts/hook.py +++ b/docs/scripts/hook.py @@ -36,10 +36,10 @@ def create_support_matrix(): compose_to_csv("classical-ml", None) # get_repo(models) - # compose_to_csv("preset/data-analytics", "data_analytics") - # compose_to_csv("preset/classical-ml", "classical_ml") - # compose_to_csv("preset/deep-learning", "deep_learning") - # compose_to_csv("preset/inference-optimization", "inference_optimization") + compose_to_csv("preset/data-analytics", "data_analytics") + compose_to_csv("preset/classical-ml", "classical_ml") + compose_to_csv("preset/deep-learning", "deep_learning") + compose_to_csv("preset/inference-optimization", "inference_optimization") compose_to_csv("workflows/charts/huggingface-llm", "genai") From 2a4cbb534b739aee2debe488fceb5cbddf0675d6 Mon Sep 17 00:00:00 2001 From: ma-pineda Date: Wed, 19 Jun 2024 15:16:56 -0700 Subject: [PATCH 3/7] Adding labels to docker-compose files Signed-off-by: ma-pineda --- preset/classical-ml/docker-compose.yaml | 7 +++++++ preset/data-analytics/docker-compose.yaml | 9 ++++++++- preset/deep-learning/docker-compose.yaml | 7 +++++++ preset/inference-optimization/docker-compose.yaml | 14 ++++++++++++++ 4 files changed, 36 insertions(+), 1 deletion(-) diff --git a/preset/classical-ml/docker-compose.yaml b/preset/classical-ml/docker-compose.yaml index 1bc69b333..beb9f980f 100644 --- a/preset/classical-ml/docker-compose.yaml +++ b/preset/classical-ml/docker-compose.yaml @@ -37,6 +37,13 @@ services: https_proxy: ${https_proxy} no_proxy: '' context: . + labels: + docs: classical-ml + dependency.python: ${PYTHON_VERSION:-3.10} + org.opencontainers.image.title: "Intel® AI Tools Selector Preset Containers - Classical ML" + org.opencontainers.base.name: "ubuntu:22.04" + org.opencontainers.image.name: "intel/classical-ml" + org.opencontainers.image.version: ${IDP_VERSION:-2024.1.0}-py${PYTHON_VERSION:-3.10} target: classical-ml-jupyter command: | bash -c "conda run -n classical-ml python -c 'import sklearn; import xgboost; print(\"SciKit:\", sklearn.__version__, \" XGBoost:\",xgboost.__version__)' && \ diff --git a/preset/data-analytics/docker-compose.yaml b/preset/data-analytics/docker-compose.yaml index aeae270f4..a3808febf 100644 --- a/preset/data-analytics/docker-compose.yaml +++ b/preset/data-analytics/docker-compose.yaml @@ -17,7 +17,7 @@ version: '3' services: - classical-ml: + data-analytics: build: args: BASE_IMAGE: ${BASE_IMAGE:-ubuntu} @@ -35,6 +35,13 @@ services: https_proxy: ${https_proxy} no_proxy: '' context: . + labels: + docs: data-analytics + dependency.python: ${PYTHON_VERSION:-3.10} + org.opencontainers.image.title: "Intel® AI Tools Selector Preset Containers - Data Analytics" + org.opencontainers.base.name: "ubuntu:22.04" + org.opencontainers.image.name: "intel/data-analytics" + org.opencontainers.image.version: ${IDP_VERSION:-2024.1.0}-py${PYTHON_VERSION:-3.10} target: data-analytics-jupyter command: > bash -c "conda run -n data-analytics python -c 'import modin.pandas as pd, modin.config as cfg; cfg.Engine.put(\"Ray\"); df = pd.DataFrame([1]);print(df+1)'" diff --git a/preset/deep-learning/docker-compose.yaml b/preset/deep-learning/docker-compose.yaml index dc328c18e..4ccf7dcf7 100644 --- a/preset/deep-learning/docker-compose.yaml +++ b/preset/deep-learning/docker-compose.yaml @@ -54,6 +54,13 @@ services: https_proxy: ${https_proxy} no_proxy: '' context: . + labels: + docs: deep-learning + dependency.python: ${PYTHON_VERSION:-3.10} + org.opencontainers.image.title: "Intel® AI Tools Selector Preset Containers - Deep Learning" + org.opencontainers.base.name: "ubuntu:22.04" + org.opencontainers.image.name: "intel/deep-learning" + org.opencontainers.image.version: ${IDP_VERSION:-2024.1.0}-py${PYTHON_VERSION:-3.10} target: deep-learning-jupyter command: | bash -c "conda run -n pytorch-cpu python -c 'import torch;print(torch.__version__);import intel_extension_for_pytorch as ipex;print(ipex.__version__);' && \ diff --git a/preset/inference-optimization/docker-compose.yaml b/preset/inference-optimization/docker-compose.yaml index 960fc1cac..fc57ee835 100644 --- a/preset/inference-optimization/docker-compose.yaml +++ b/preset/inference-optimization/docker-compose.yaml @@ -54,6 +54,13 @@ services: https_proxy: ${https_proxy} no_proxy: '' context: ../deep-learning + labels: + docs: deep-learning + dependency.python: ${PYTHON_VERSION:-3.10} + org.opencontainers.image.title: "Intel® AI Tools Selector Preset Containers - Deep Learning" + org.opencontainers.base.name: "ubuntu:22.04" + org.opencontainers.image.name: "intel/deep-learning" + org.opencontainers.image.version: ${IDP_VERSION:-2024.1.0}-py${PYTHON_VERSION:-3.10} target: deep-learning-jupyter environment: http_proxy: ${http_proxy} @@ -72,6 +79,13 @@ services: args: COMPOSE_PROJECT_NAME: ${COMPOSE_PROJECT_NAME:-preset} context: . + labels: + docs: inference-optimization + dependency.python: ${PYTHON_VERSION:-3.10} + org.opencontainers.image.title: "Intel® AI Tools Selector Preset Containers - Inference Optimization" + org.opencontainers.base.name: "intel/deep-learning" + org.opencontainers.image.name: "intel/inference-optimization" + org.opencontainers.image.version: ${IDP_VERSION:-2024.1.0}-py${PYTHON_VERSION:-3.10} target: inference-optimization depends_on: - dl-base From e71d9b02c0a7616e7d7164401ea7d05c58ae5dd2 Mon Sep 17 00:00:00 2001 From: ma-pineda Date: Fri, 28 Jun 2024 13:19:39 -0700 Subject: [PATCH 4/7] Removing signed extra commits Signed-off-by: ma-pineda --- .github/CODEOWNERS | 20 +-- .github/ISSUE_TEMPLATE/bug-report.yml | 2 + .github/action.yml | 24 ++++ .github/dependabot.yml | 8 ++ .github/scan/action.yml | 35 +++++ .github/workflows/chart-ci.yaml | 14 +- .github/workflows/container-ci.yaml | 89 +++++++++---- .github/workflows/dependency-review.yaml | 4 +- .github/workflows/docs.yaml | 15 +-- .github/workflows/integration-test.yaml | 8 +- .github/workflows/lint.yaml | 13 +- .github/workflows/merge_check.yaml | 47 ------- .github/workflows/scorecard.yaml | 2 +- .github/workflows/test-runner-ci.yaml | 75 ++--------- .github/workflows/weekly-test.yaml | 2 +- .gitignore | 1 + CONTRIBUTING.md | 105 +++++++++++++-- README.md | 11 +- docs/matrix.md | 8 +- preset/classical-ml/.actions.json | 1 - preset/classical-ml/Dockerfile | 2 +- preset/classical-ml/docker-compose.yaml | 50 ++++++- preset/classical-ml/requirements.txt | 1 + preset/data-analytics/.actions.json | 1 - preset/data-analytics/Dockerfile | 2 +- preset/data-analytics/docker-compose.yaml | 45 ++++++- preset/data-analytics/requirements.txt | 1 + preset/data-analytics/tests.yaml | 7 +- preset/deep-learning/.actions.json | 1 - preset/deep-learning/Dockerfile | 32 ++++- preset/deep-learning/docker-compose.yaml | 114 +++++++++++++++- preset/deep-learning/requirements.txt | 14 ++ preset/deep-learning/tests.yaml | 23 ++-- preset/inference-optimization/.actions.json | 1 - .../docker-compose.yaml | 123 ++++++++++++++++-- .../inference-optimization/requirements.txt | 16 +++ preset/inference-optimization/tests.yaml | 37 +++--- python/Dockerfile | 4 +- python/requirements.txt | 8 +- pytorch/Dockerfile | 59 +++++++-- pytorch/README.md | 95 +++++++++++++- pytorch/generate_ssh_keys.sh | 28 ++++ pytorch/tests/tests.yaml | 2 +- test-runner/README.md | 7 +- test-runner/action.yml | 10 -- test-runner/dev-requirements.txt | 3 +- test-runner/requirements.txt | 2 - test-runner/tests.yaml | 5 - test-runner/tests/utest.py | 45 +------ test-runner/utils/test.py | 100 +++----------- tox.ini | 21 +-- 51 files changed, 872 insertions(+), 471 deletions(-) create mode 100644 .github/scan/action.yml delete mode 100644 .github/workflows/merge_check.yaml create mode 100644 preset/classical-ml/requirements.txt create mode 100644 preset/data-analytics/requirements.txt create mode 100644 preset/deep-learning/requirements.txt create mode 100644 preset/inference-optimization/requirements.txt create mode 100755 pytorch/generate_ssh_keys.sh diff --git a/.github/CODEOWNERS b/.github/CODEOWNERS index 548b0c6fb..e0218a48f 100644 --- a/.github/CODEOWNERS +++ b/.github/CODEOWNERS @@ -1,9 +1,11 @@ -.github/ @tylertitsworth @sharvil10 @jitendra42 -classical-ml/ @tylertitsworth @jitendra42 @sramakintel @sharvil10 -docs/ @tylertitsworth @jitendra42 -enterprise/ @sharvil10 -preset/ @ma-pineda @jafraustro -pytorch/ @tylertitsworth @jitendra42 @sramakintel @sharvil10 -tensorflow/ @tylertitsworth @jitendra42 @sramakintel @sharvil10 -test-runner/ @tylertitsworth -workflows/ @tylertitsworth @dmsuehir +* @tylertitsworth @jitendra42 +/.github/ @tylertitsworth @sharvil10 @jitendra42 +/classical-ml/ @tylertitsworth @jitendra42 @sramakintel @sharvil10 +/docs/ @tylertitsworth @jitendra42 +/enterprise/ @sharvil10 +/preset/ @ma-pineda @jafraustro +/python/ @tylertitsworth @jitendra42 @sramakintel @sharvil10 +/pytorch/ @tylertitsworth @jitendra42 @sramakintel @sharvil10 +/tensorflow/ @tylertitsworth @jitendra42 @sramakintel @sharvil10 +/test-runner/ @tylertitsworth +/workflows/ @tylertitsworth @dmsuehir diff --git a/.github/ISSUE_TEMPLATE/bug-report.yml b/.github/ISSUE_TEMPLATE/bug-report.yml index c2c794c30..94a21a6fc 100644 --- a/.github/ISSUE_TEMPLATE/bug-report.yml +++ b/.github/ISSUE_TEMPLATE/bug-report.yml @@ -64,6 +64,8 @@ body: required: false - label: test-runner required: false + - label: workflows + required: false - type: textarea id: versions attributes: diff --git a/.github/action.yml b/.github/action.yml index 921ae8bf0..99ea473c1 100644 --- a/.github/action.yml +++ b/.github/action.yml @@ -45,6 +45,10 @@ inputs: required: false default: false type: boolean +outputs: + container-group: + description: "Container Group" + value: ${{ steps.container-output.outputs.group }} runs: using: composite steps: @@ -69,6 +73,26 @@ runs: COMPOSE_PROJECT_NAME=${{ env.project-number }} \ ${{ inputs.env_overrides }} docker compose -p ${{ env.project-number }} up --build --force-recreate --always-recreate-deps --no-start working-directory: ${{ inputs.group_dir }} + - name: Print Containers + id: container-output + shell: bash + run: | + mkdir matrix + images=$(REGISTRY=${{ inputs.registry }} \ + REPO=${{ inputs.repo }} \ + COMPOSE_PROJECT_NAME=${{ env.project-number }} \ + ${{ inputs.env_overrides }} docker compose -p ${{ env.project-number }} images --format json) + for image in $(echo $images | jq -r --arg registry "$REGISTRY" '.[] | select(.Repository | contains($registry)) | .Tag'); do + echo "$image" > matrix/$image.txt + done + echo "group=${{ inputs.group_dir }}" | tr '/' '_' >> $GITHUB_OUTPUT + working-directory: ${{ inputs.group_dir }} + - uses: actions/upload-artifact@v4 + with: + name: ${{ env.project-number }}-${{ steps.container-output.outputs.group }} + path: ${{ inputs.group_dir }}/matrix/* + retention-days: 1 + overwrite: true - name: Push Containers shell: bash if: ${{ !fromJson(inputs.no-push) }} diff --git a/.github/dependabot.yml b/.github/dependabot.yml index f8923f663..24b1e6706 100644 --- a/.github/dependabot.yml +++ b/.github/dependabot.yml @@ -79,3 +79,11 @@ updates: package-ecosystem: pip schedule: interval: weekly + - directory: /preset + groups: + preset: + patterns: + - "*requirements.txt" + package-ecosystem: pip + schedule: + interval: weekly diff --git a/.github/scan/action.yml b/.github/scan/action.yml new file mode 100644 index 000000000..fde7f94c8 --- /dev/null +++ b/.github/scan/action.yml @@ -0,0 +1,35 @@ +# Copyright (c) 2024 Intel Corporation +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +name: 'Aqua Security Trivy' +description: 'Scans container images for vulnerabilities with Trivy without building the image. For use behind firewalls.' +author: 'tyler.titsworth@intel.com' +inputs: + image-ref: + description: 'image reference(for backward compatibility)' + required: true + output: + description: 'writes results to a file with the specified file name' + required: true +runs: + using: 'docker' + image: "docker://ghcr.io/aquasecurity/trivy" + entrypoint: trivy + args: + - '--timeout=30m' + - image + - '--format=sarif' + - '--no-progress' + - '--output=${{ inputs.output }}' + - ${{ inputs.image-ref }} diff --git a/.github/workflows/chart-ci.yaml b/.github/workflows/chart-ci.yaml index 5b00a4f80..af9afd5fc 100644 --- a/.github/workflows/chart-ci.yaml +++ b/.github/workflows/chart-ci.yaml @@ -14,10 +14,7 @@ name: Helm Chart CI on: - merge_group: null - pull_request_target: - types: [opened, edited, reopened, synchronize] - branches: [main] + pull_request: paths: - 'workflows/charts/**' permissions: read-all @@ -33,15 +30,6 @@ jobs: with: egress-policy: audit - uses: actions/checkout@692973e3d937129bcbf40652eb9f2f61becf3332 # v4.1.7 - if: ${{ github.event_name == 'pull_request_target' }} - with: - fetch-depth: 0 - ref: "refs/pull/${{ github.event.number }}/merge" - - uses: actions/checkout@692973e3d937129bcbf40652eb9f2f61becf3332 # v4.1.7 - if: ${{ github.event_name == 'merge_group' }} - with: - fetch-depth: 0 - ref: ${{ github.event.merge_group.head_ref }} - uses: intel/ai-containers/workflows/charts@main with: kubeconfig_path: ${{ secrets.KUBECONFIG_PATH }} diff --git a/.github/workflows/container-ci.yaml b/.github/workflows/container-ci.yaml index cf9f929a5..b448ad851 100644 --- a/.github/workflows/container-ci.yaml +++ b/.github/workflows/container-ci.yaml @@ -25,11 +25,6 @@ on: description: 'Enter Bash Env Variable Overrides in `KEY=VAL KEY2=VAL2` format:' required: false type: string - ref: - description: 'Enter Git Ref:' - required: true - default: 'main' - type: string runner_label: description: 'Enter Validation Runner Label:' default: test-runner @@ -61,13 +56,10 @@ on: no_start: required: false type: boolean - ref: - required: true - type: string jobs: - #################################################################################################### - # Compose Build - #################################################################################################### +#################################################################################################### +# Compose Build +#################################################################################################### setup-build: outputs: matrix: ${{ steps.build-matrix.outputs.matrix }} @@ -78,8 +70,6 @@ jobs: with: egress-policy: audit - uses: actions/checkout@692973e3d937129bcbf40652eb9f2f61becf3332 # v4.1.7 - with: - ref: ${{ inputs.ref }} - name: Set Matrix id: build-matrix run: echo "matrix=$(jq -c . < ${{ inputs.group_dir }}/.actions.json)" >> $GITHUB_OUTPUT @@ -89,14 +79,14 @@ jobs: build-containers: needs: [setup-build] env: ${{ matrix }} - runs-on: k8-runners + runs-on: ubuntu-latest strategy: matrix: ${{ fromJson(needs.setup-build.outputs.matrix) }} fail-fast: false + outputs: + group: ${{ steps.build-group.outputs.container-group }} steps: - uses: actions/checkout@692973e3d937129bcbf40652eb9f2f61becf3332 # v4.1.7 - with: - ref: ${{ inputs.ref }} if: ${{ !inputs.no_build }} - uses: docker/login-action@0d4c9c5ea7693da7b068278f7b52bda2a190a446 # v3.2.0 with: @@ -115,9 +105,63 @@ jobs: repo: ${{ secrets.REPO }} no-push: ${{ inputs.no_push }} no-start: ${{ inputs.no_start }} - #################################################################################################### - # Generic Test Runner - #################################################################################################### +#################################################################################################### +# Trivy Scan +#################################################################################################### + setup-scan: + needs: [build-containers] + if: ${{ github.event_name == 'pull_request' }} + runs-on: ubuntu-latest + outputs: + matrix: ${{ steps.scan-matrix.outputs.matrix }} + steps: + - name: Harden Runner + uses: step-security/harden-runner@17d0e2bd7d51742c71671bd19fa12bdc9d40a3d6 # v2.8.1 + with: + egress-policy: audit + - uses: actions/download-artifact@65a9edc5881444af0b9093a5e628f2fe47ea3b2e # v4.1.7 + with: + path: matrix + - name: Set Matrix + id: scan-matrix + run: echo "matrix=$(cat matrix/*-${{ needs.build-containers.outputs.group }}/*.txt | jq -R '.' | jq -sc '. | unique')" >> $GITHUB_OUTPUT + scan-containers: + needs: [setup-scan] + if: ${{ !inputs.no_build && github.event_name == 'pull_request' }} + runs-on: ubuntu-latest + strategy: + matrix: + container: ${{ fromJSON(needs.setup-scan.outputs.matrix) }} + fail-fast: false + steps: + - name: Harden Runner + uses: step-security/harden-runner@17d0e2bd7d51742c71671bd19fa12bdc9d40a3d6 # v2.8.1 + with: + egress-policy: audit + - uses: actions/checkout@692973e3d937129bcbf40652eb9f2f61becf3332 # v4.1.7 + - uses: docker/login-action@0d4c9c5ea7693da7b068278f7b52bda2a190a446 # v3.2.0 + with: + registry: ${{ secrets.REGISTRY }} + username: ${{ secrets.REGISTRY_USER }} + password: ${{ secrets.REGISTRY_TOKEN }} + - name: Pull Image + run: docker pull ${{ secrets.REGISTRY }}/${{ secrets.REPO }}:${{ matrix.container }} + - name: Scan Container + uses: intel/ai-containers/.github/scan@main + with: + image-ref: ${{ secrets.REGISTRY }}/${{ secrets.REPO }}:${{ matrix.container }} + output: ${{ matrix.container }}-scan.sarif + - name: Cleanup + if: always() + run: docker rmi -f ${{ secrets.REGISTRY }}/${{ secrets.REPO }}:${{ matrix.container }} + - uses: github/codeql-action/upload-sarif@23acc5c183826b7a8a97bce3cecc52db901f8251 # v3.25.10 + with: + sarif_file: '${{ matrix.container }}-scan.sarif' + category: '${{ matrix.container }}' + continue-on-error: true +#################################################################################################### +# Generic Test Runner +#################################################################################################### setup-test: needs: [build-containers] runs-on: ubuntu-latest @@ -129,8 +173,6 @@ jobs: with: egress-policy: audit - uses: actions/checkout@692973e3d937129bcbf40652eb9f2f61becf3332 # v4.1.7 - with: - ref: ${{ inputs.ref }} - name: Get Recipes id: test-matrix run: echo "matrix=$(find ${{ inputs.group_dir }} -type f -name 'tests.yaml' -exec dirname {} \; | jq -R -s -c 'split("\n")[:-1]')" >> $GITHUB_OUTPUT @@ -148,8 +190,6 @@ jobs: with: egress-policy: audit - uses: actions/checkout@692973e3d937129bcbf40652eb9f2f61becf3332 # v4.1.7 - with: - ref: ${{ inputs.ref }} - uses: docker/login-action@0d4c9c5ea7693da7b068278f7b52bda2a190a446 # v3.2.0 with: registry: ${{ secrets.REGISTRY }} @@ -159,9 +199,8 @@ jobs: uses: intel/ai-containers/test-runner@main with: cache_registry: ${{ secrets.CACHE_REGISTRY }} - perf_repo: ${{ secrets.PERF_REPO }} recipe_dir: ${{ inputs.group_dir }} registry: ${{ secrets.REGISTRY }} repo: ${{ secrets.REPO }} test_dir: ${{ matrix.tests }} - token: ${{ secrets.ACTION_TOKEN || github.token }} + token: ${{ secrets.GITHUB_TOKEN }} diff --git a/.github/workflows/dependency-review.yaml b/.github/workflows/dependency-review.yaml index 9621e35ce..f1bd3e8f9 100644 --- a/.github/workflows/dependency-review.yaml +++ b/.github/workflows/dependency-review.yaml @@ -22,9 +22,7 @@ # Source repository: https://github.com/actions/dependency-review-action name: 'Dependency Review' on: - pull_request_target: - types: [opened, edited, reopened, synchronize] - branches: [main] + pull_request: null permissions: contents: read concurrency: diff --git a/.github/workflows/docs.yaml b/.github/workflows/docs.yaml index ce23cdb90..b89294f2c 100644 --- a/.github/workflows/docs.yaml +++ b/.github/workflows/docs.yaml @@ -15,9 +15,7 @@ name: Docs on: merge_group: null - pull_request_target: - types: [opened, edited, reopened, synchronize] - branches: [main] + pull_request: null push: branches: - main @@ -38,17 +36,6 @@ jobs: with: egress-policy: audit - uses: actions/checkout@692973e3d937129bcbf40652eb9f2f61becf3332 # v4.1.7 - if: ${{ github.event_name == 'push' }} - - uses: actions/checkout@692973e3d937129bcbf40652eb9f2f61becf3332 # v4.1.7 - if: ${{ github.event_name == 'pull_request_target' }} - with: - fetch-depth: 0 - ref: "refs/pull/${{ github.event.number }}/merge" - - uses: actions/checkout@692973e3d937129bcbf40652eb9f2f61becf3332 # v4.1.7 - if: ${{ github.event_name == 'merge_group' }} - with: - fetch-depth: 0 - ref: ${{ github.event.merge_group.head_ref }} - uses: actions/setup-python@82c7e631bb3cdc910f68e0081d67478d79c6982d # v5.1.0 with: python-version: 3.8 diff --git a/.github/workflows/integration-test.yaml b/.github/workflows/integration-test.yaml index 87cdb83a3..a5c335d5f 100644 --- a/.github/workflows/integration-test.yaml +++ b/.github/workflows/integration-test.yaml @@ -14,9 +14,7 @@ name: Integration Tests on: - pull_request_target: - types: [opened, edited, reopened, synchronize] - branches: [main] + pull_request: null permissions: read-all concurrency: group: ${{ github.workflow }}-${{ github.event.pull_request.number || github.ref }} @@ -34,13 +32,12 @@ jobs: - uses: actions/checkout@692973e3d937129bcbf40652eb9f2f61becf3332 # v4.1.7 with: fetch-depth: 0 - ref: "refs/pull/${{ github.event.number }}/merge" - name: Output Modified Group Directories id: group-list run: | # Get diff array filtered by specific filetypes DIFF=$(git diff --diff-filter=d \ - --name-only ${{ github.event.merge_group.base_sha || github.event.pull_request.base.sha }}...${{ github.event.merge_group.head_sha || github.event.pull_request.head.sha }} \ + --name-only ${{ github.event.pull_request.base.sha }}...${{ github.event.pull_request.head.sha }} \ -- '*/*Dockerfile' '*.py' '*.yaml' '*.yml' '*.sh' '*/*requirements.txt' '*.json' | \ jq -R '.' | jq -sc '.' \ ) @@ -71,7 +68,6 @@ jobs: uses: intel/ai-containers/.github/workflows/container-ci.yaml@main with: group_dir: ${{ matrix.group }} - ref: "refs/pull/${{ github.event.number }}/merge" secrets: inherit status-check: needs: [group-diff, pipeline-ci] diff --git a/.github/workflows/lint.yaml b/.github/workflows/lint.yaml index 2957d79ee..cbaec176c 100644 --- a/.github/workflows/lint.yaml +++ b/.github/workflows/lint.yaml @@ -16,9 +16,7 @@ name: Lint permissions: read-all on: merge_group: null - pull_request_target: - types: [opened, edited, reopened, synchronize] - branches: [main] + pull_request: null concurrency: group: ${{ github.workflow }}-${{ github.event.pull_request.number || github.ref }} cancel-in-progress: true @@ -32,19 +30,12 @@ jobs: with: egress-policy: audit - uses: actions/checkout@692973e3d937129bcbf40652eb9f2f61becf3332 # v4.1.7 - if: ${{ github.event_name == 'pull_request_target' }} with: fetch-depth: 0 - ref: "refs/pull/${{ github.event.number }}/merge" - - uses: actions/checkout@692973e3d937129bcbf40652eb9f2f61becf3332 # v4.1.7 - if: ${{ github.event_name == 'merge_group' }} - with: - fetch-depth: 0 - ref: ${{ github.event.merge_group.head_ref }} - uses: super-linter/super-linter/slim@88ea3923a7e1f89dd485d079f6eb5f5e8f937589 # v6.6.0 env: # To report GitHub Actions status checks - GITHUB_TOKEN: ${{ secrets.ACTION_TOKEN }} + GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} GITHUB_ACTIONS_COMMAND_ARGS: '-ignore SC.*' VALIDATE_BASH_EXEC: false VALIDATE_CHECKOV: false diff --git a/.github/workflows/merge_check.yaml b/.github/workflows/merge_check.yaml deleted file mode 100644 index ca0569af6..000000000 --- a/.github/workflows/merge_check.yaml +++ /dev/null @@ -1,47 +0,0 @@ -# Copyright (c) 2024 Intel Corporation -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -name: Merge Check -on: - merge_group: null -permissions: read-all -jobs: - status-check: - runs-on: ubuntu-latest - steps: - - uses: fountainhead/action-wait-for-check@5a908a24814494009c4bb27c242ea38c93c593be - id: helm-ci - with: - checkName: helm-ci - ref: ${{ github.event.merge_group.head_ref }} - timeoutSeconds: 1800 - token: ${{ secrets.ACTION_TOKEN }} - - uses: fountainhead/action-wait-for-check@5a908a24814494009c4bb27c242ea38c93c593be - if: ${{ steps.helm-ci.outputs.conclusion == 'success' }} - id: integration-test - with: - checkName: integration-test - ref: ${{ github.event.merge_group.head_ref }} - timeoutSeconds: 1800 - token: ${{ secrets.ACTION_TOKEN }} - - uses: fountainhead/action-wait-for-check@5a908a24814494009c4bb27c242ea38c93c593be - if: ${{ steps.integration-test.outputs.conclusion == 'success' }} - id: coverage - with: - checkName: coverage - ref: ${{ github.event.merge_group.head_ref }} - timeoutSeconds: 1800 - token: ${{ secrets.ACTION_TOKEN }} - - if: ${{ steps.coverage.outputs.conclusion != 'success' }} - run: exit 1 diff --git a/.github/workflows/scorecard.yaml b/.github/workflows/scorecard.yaml index 78158e621..95e71d5d0 100644 --- a/.github/workflows/scorecard.yaml +++ b/.github/workflows/scorecard.yaml @@ -46,7 +46,7 @@ jobs: with: results_file: results.sarif results_format: sarif - repo_token: ${{ secrets.ACTION_TOKEN }} + repo_token: ${{ secrets.GITHUB_TOKEN }} publish_results: true - uses: actions/upload-artifact@65462800fd760344b1a7b4382951275a0abb4808 # v4.3.3 with: diff --git a/.github/workflows/test-runner-ci.yaml b/.github/workflows/test-runner-ci.yaml index 6eee4f518..48950cda3 100644 --- a/.github/workflows/test-runner-ci.yaml +++ b/.github/workflows/test-runner-ci.yaml @@ -14,19 +14,19 @@ name: Test Runner CI on: - merge_group: null - pull_request_target: - types: [opened, edited, reopened, synchronize] - branches: [main] + pull_request: paths: - 'test-runner/**' + push: + branches: + - main permissions: read-all concurrency: group: ${{ github.workflow }}-${{ github.event.pull_request.number || github.ref }} cancel-in-progress: true jobs: unit-test: - runs-on: k8-runners + runs-on: ubuntu-latest strategy: matrix: python: ["3.8", "3.9", "3.10", "3.11", "3.12"] @@ -37,20 +37,6 @@ jobs: with: egress-policy: audit - uses: actions/checkout@692973e3d937129bcbf40652eb9f2f61becf3332 # v4.1.7 - if: ${{ github.event_name == 'pull_request_target' }} - with: - fetch-depth: 0 - ref: "refs/pull/${{ github.event.number }}/merge" - - uses: actions/checkout@692973e3d937129bcbf40652eb9f2f61becf3332 # v4.1.7 - if: ${{ github.event_name == 'merge_group' }} - with: - fetch-depth: 0 - ref: ${{ github.event.merge_group.head_ref }} - - uses: actions/checkout@692973e3d937129bcbf40652eb9f2f61becf3332 # v4.1.7 - with: - path: models-perf - repository: ${{ secrets.PERF_REPO }} - token: ${{ secrets.ACTION_TOKEN }} - uses: docker/setup-buildx-action@d70bba72b1f3fd22344832f00baa16ece964efeb # v3.3.0 with: driver: docker @@ -68,15 +54,13 @@ jobs: run: python -m tox env: CACHE_REGISTRY: ${{ secrets.CACHE_REGISTRY }} + COVERALLS_FLAG_NAME: ${{ matrix.python }} + COVERALLS_PARALLEL: true FORCE_COLOR: 1 - GITHUB_TOKEN: ${{ secrets.ACTION_TOKEN }} + GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} PERF_REPO: ${{ secrets.PERF_REPO }} REGISTRY: ${{ secrets.REGISTRY }} REPO: ${{ secrets.REPO }} - - uses: actions/upload-artifact@65462800fd760344b1a7b4382951275a0abb4808 # v4.3.3 - with: - name: covdata-${{ matrix.python }} - path: ${{ github.workspace }}/.coverage* coverage: needs: [unit-test] runs-on: ubuntu-latest @@ -85,51 +69,17 @@ jobs: uses: step-security/harden-runner@17d0e2bd7d51742c71671bd19fa12bdc9d40a3d6 # v2.8.1 with: egress-policy: audit - - uses: actions/checkout@692973e3d937129bcbf40652eb9f2f61becf3332 # v4.1.7 - if: ${{ github.event_name == 'pull_request_target' }} - with: - fetch-depth: 0 - ref: "refs/pull/${{ github.event.number }}/merge" - - uses: actions/checkout@692973e3d937129bcbf40652eb9f2f61becf3332 # v4.1.7 - if: ${{ github.event_name == 'merge_group' }} - with: - fetch-depth: 0 - ref: ${{ github.event.merge_group.head_ref }} - - uses: actions/setup-python@82c7e631bb3cdc910f68e0081d67478d79c6982d # v5.1.0 - with: - python-version: "3.10" - - uses: actions/download-artifact@65a9edc5881444af0b9093a5e628f2fe47ea3b2e # v4.1.7 - with: - merge-multiple: true - - name: Coverage - run: | - python -m pip install coverage - python -m coverage combine --debug=pathmap - python -m coverage xml --skip-empty - - name: Comment on PR - if: ${{ github.event_name == 'pull_request' }} - uses: orgoro/coverage@4e3bc9302f715595336a40f7dd4a3b15c687bdb4 # v3.1 + - uses: coverallsapp/github-action@643bc377ffa44ace6394b2b5d0d3950076de9f63 # v2.3.0 with: - coverageFile: coverage.xml - token: ${{ secrets.ACTION_TOKEN }} - treshholdAll: 80 + parallel-finished: true integration-test: - runs-on: k8-runners + runs-on: ubuntu-latest steps: - name: Harden Runner uses: step-security/harden-runner@17d0e2bd7d51742c71671bd19fa12bdc9d40a3d6 # v2.8.1 with: egress-policy: audit - uses: actions/checkout@692973e3d937129bcbf40652eb9f2f61becf3332 # v4.1.7 - if: ${{ github.event_name == 'pull_request_target' }} - with: - fetch-depth: 0 - ref: "refs/pull/${{ github.event.number }}/merge" - - uses: actions/checkout@692973e3d937129bcbf40652eb9f2f61becf3332 # v4.1.7 - if: ${{ github.event_name == 'merge_group' }} - with: - fetch-depth: 0 - ref: ${{ github.event.merge_group.head_ref }} - uses: docker/setup-buildx-action@d70bba72b1f3fd22344832f00baa16ece964efeb # v3.3.0 with: driver: docker @@ -145,9 +95,8 @@ jobs: uses: intel/ai-containers/test-runner@main with: cache_registry: ${{ secrets.CACHE_REGISTRY }} - perf_repo: ${{ secrets.PERF_REPO }} recipe_dir: test-runner registry: ${{ secrets.REGISTRY }} repo: ${{ secrets.REPO }} test_dir: test-runner - token: ${{ secrets.ACTION_TOKEN }} + token: ${{ secrets.GITHUB_TOKEN }} diff --git a/.github/workflows/weekly-test.yaml b/.github/workflows/weekly-test.yaml index 924204734..8806a9350 100644 --- a/.github/workflows/weekly-test.yaml +++ b/.github/workflows/weekly-test.yaml @@ -80,5 +80,5 @@ jobs: fetch-depth: 0 - uses: gitleaks/gitleaks-action@44c470ffc35caa8b1eb3e8012ca53c2f9bea4eb5 # v2.3.6 env: - GITHUB_TOKEN: ${{ secrets.ACTION_TOKEN }} + GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} GITLEAKS_LICENSE: ${{ secrets.GITLEAKS_LICENSE}} diff --git a/.gitignore b/.gitignore index 9684f76ea..f3e1d08fa 100644 --- a/.gitignore +++ b/.gitignore @@ -3,6 +3,7 @@ .pytest* .secrets .tox +.venv/ .vscode **__pycache__** **.ipynb_checkpoints** diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md index a1d452b2e..56b2518d7 100644 --- a/CONTRIBUTING.md +++ b/CONTRIBUTING.md @@ -4,7 +4,11 @@ Thank you for considering contributing to Intel® AI Containers! We welcome your ## Getting Started -Before you start contributing, please take a moment to review the following guidelines. +Before you start contributing, submit a request to be added to the [Intel](https://github.com/intel) Organization on GitHub by contacting one of the [`ai-containers-maintain`](https://github.com/orgs/intel/teams/ai-containers-maintain) members. + +Once assigned to the [`ai-containers-write`](https://github.com/orgs/intel/teams/ai-containers-write) or [`ai-containers-maintain`](https://github.com/orgs/intel/teams/ai-containers-maintain) team depending on the scope of your contributions, you will be able to create branches and submit pull requests. + +Once you are added to the organization, you will be given acces to ai-containers' Azure Container Registry (ACR). ## Code of Conduct @@ -12,17 +16,37 @@ This project follows the [Contributor Covenant Code of Conduct](CODE_OF_CONDUCT. ### How to Contribute -1. Fork the repository. -2. Create a new branch for your contribution: `git checkout -b feature/your-feature`. -3. Install [pre-commit](https://pre-commit.com/), [Docker](https://docs.docker.com/engine/install/), and [Python 3.8+](https://www.python.org/downloads/). -4. Follow the [Project Setup](README.md#project-setup) steps. -5. Install the third-party python dependencies necessary for pre-commit depending on the type of contribution you are making: - 1. If you are contributing to Test Runner, run `pip install -r test-runner/dev-requirements.txt`. - 2. If you are contributing to documentation, run `pip install -r docs/requirements.txt`. (This hook will always run) - 3. If you are contributing to helm charts, run `pip install -r workflows/charts/dev-requirements.txt`. Install [Helm](https://helm.sh/docs/intro/install/) and then [Chart Testing](https://github.com/helm/chart-testing) tool. -6. Make your changes, commit, and sign your changes: `git commit -s -m 'Add your feature'`. -7. Push to the branch: `git push origin feature/your-feature`. -8. Submit a pull request. +1. Create a new branch for your contribution: `git checkout -b username/your-feature`. +2. Install [pre-commit](https://pre-commit.com/), [Docker](https://docs.docker.com/engine/install/), and [Python 3.8+](https://www.python.org/downloads/). + 1. `pre-commit install` + 2. `sudo usermod -aG docker $USER` + 3. `sudo apt-get install -y python3-venv` +3. Follow the [Project Setup](README.md#project-setup) steps. +4. Install the third-party python dependencies necessary for pre-commit depending on the type of contribution you are making: + 1. Always install the documentation hook requirements: `pip install -r docs/requirements.txt`. + 2. If you are contributing to Test Runner, run `pip install -r test-runner/dev-requirements.txt`. + 3. If you are contributing to helm charts, run `pip install -r workflows/charts/dev-requirements.txt`. Install [Helm](https://helm.sh/docs/intro/install/) and then the [Chart Testing](https://github.com/helm/chart-testing) tool. +5. Make your changes, commit, and [sign](#sign-your-work) your changes: `git commit -s -m 'Add your feature'`. +6. Push to the branch: `git push origin username/your-feature`. +7. Submit a pull request. + +### Code Review + +All submissions, including submissions by project members, require review. We use GitHub pull requests for this purpose. Consult the [GitHub Help](https://help.github.com/en/github/collaborating-with-issues-and-pull-requests/about-pull-requests) for more information on using pull requests. + +In order to complete the review process, the following steps are required: + +1. All status checks pass. +2. All third-party dependencies are approved by the maintainers and no new vulnerabilities are introduced to the codebase. +3. At least one approval from a [codeowner](https://github.com/intel/ai-containers/blob/main/.github/CODEOWNERS) that maintains the area of the code you are changing. + +Depending on the size and complexity of the change, additional reviews may be required and it may be subject to additional requirements, for example, if you are submitting a contribution to [Test Runner](https://github.com/intel/ai-containers/tree/main/test-runner) you may be required to write unit tests that satisfy our coverage requirements. + +### Merge Queue + +Once your pull request has been approved, it will be added to the merge queue. The merge queue is a list of pull requests that are ready to be merged. The merge queue runs additional CI over your code to make sure no regressions were introduced into other areas of the codebase. + +If your pull request passes the merge queue, it will be merged into the main branch. Otherwise, it will be removed from the merge queue, and you will need to address the issues that caused the failure. ## Contribution Guidelines @@ -36,7 +60,6 @@ To ensure a smooth and effective contribution process, please follow these guide ### Making Changes -- Fork the repository and create a new branch for your changes. - Keep each pull request focused on a single feature or bugfix. - Write clear and descriptive commit messages. - Keep code changes concise and well-documented. @@ -65,6 +88,60 @@ To ensure a smooth and effective contribution process, please follow these guide - Reference the relevant issue(s) if applicable. - Be responsive to feedback and be ready to make further changes if necessary. +### Sign your work + +Please use the sign-off line at the end of the patch. Your signature certifies that you wrote the patch or otherwise have the right to pass it on as an open source patch. The rules are pretty simple: if you can certify +the below (from [developercertificate.org](http://developercertificate.org/)): + +```text +Developer Certificate of Origin +Version 1.1 + +Copyright (C) 2004, 2006 The Linux Foundation and its contributors. +660 York Street, Suite 102, +San Francisco, CA 94110 USA + +Everyone is permitted to copy and distribute verbatim copies of this +license document, but changing it is not allowed. + +Developer's Certificate of Origin 1.1 + +By making a contribution to this project, I certify that: + +(a) The contribution was created in whole or in part by me and I + have the right to submit it under the open source license + indicated in the file; or + +(b) The contribution is based upon previous work that, to the best + of my knowledge, is covered under an appropriate open source + license and I have the right under that license to submit that + work with modifications, whether created in whole or in part + by me, under the same open source license (unless I am + permitted to submit under a different license), as indicated + in the file; or + +(c) The contribution was provided directly to me by some other + person who certified (a), (b) or (c) and I have not modified + it. + +(d) I understand and agree that this project and the contribution + are public and that a record of the contribution (including all + personal information I submit with it, including my sign-off) is + maintained indefinitely and may be redistributed consistent with + this project or the open source license(s) involved. +``` + +Then you just add a line to every git commit message: + +```text +Signed-off-by: Joe Smith +``` + +Use your real name (sorry, no pseudonyms or anonymous contributions.) + +If you set your `user.name` and `user.email` git configs, you can sign your +commit automatically with `git commit -s`. + ## License -Intel® AI Containers is licensed under the terms in [LICENSE](#license). By contributing to the project, you agree to the license and copyright terms therein and release your contribution under these terms. +Intel® AI Containers is licensed under the terms in [LICENSE](./LICENSE). By contributing to the project, you agree to the license and copyright terms therein and release your contribution under these terms. diff --git a/README.md b/README.md index 36ff5bc07..961aed49f 100644 --- a/README.md +++ b/README.md @@ -3,6 +3,7 @@ [![OpenSSF Best Practices](https://www.bestpractices.dev/projects/8270/badge)](https://www.bestpractices.dev/projects/8270) [![OpenSSF Scorecard](https://api.securityscorecards.dev/projects/github.com/intel/ai-containers/badge)](https://securityscorecards.dev/viewer/?uri=github.com/intel/ai-containers) [![pre-commit.ci status](https://results.pre-commit.ci/badge/github/intel/ai-containers/main.svg)](https://results.pre-commit.ci/latest/github/intel/ai-containers/main) +[![Coverage Status](https://coveralls.io/repos/github/intel/ai-containers/badge.svg?branch=main)](https://coveralls.io/github/intel/ai-containers?branch=main) [![CodeQL](https://github.com/intel/ai-containers/actions/workflows/github-code-scanning/codeql/badge.svg?branch=main)](https://github.com/intel/ai-containers/actions/workflows/github-code-scanning/codeql) [![Docs](https://github.com/intel/ai-containers/actions/workflows/docs.yaml/badge.svg?branch=main)](https://github.com/intel/ai-containers/actions/workflows/docs.yaml) [![Lint](https://github.com/intel/ai-containers/actions/workflows/lint.yaml/badge.svg?branch=main)](https://github.com/intel/ai-containers/actions/workflows/lint.yaml) @@ -20,12 +21,14 @@ Define your project's registry and repository each time you use the project: # REGISTRY/REPO:TAG export REGISTRY= export REPO= -``` -The maintainers of Intel® AI Containers use [harbor](https://github.com/goharbor/harbor) to store containers. +docker login $REGISTRY -> [!NOTE] -> `REGISTRY` and `REPO` are used to authenticate with the private registry necessary to push completed container layers and saved them for testing and publication. For example: `REGISTRY=intel && REPO=intel-extension-for-pytorch` would become `intel/intel-extension-for-pytorch` as the name of the container image, followed by the tag generated from the service found in that project's compose file. +# Verify your access permissions +docker pull $REGISTRY/$REPO:latest +``` + +The maintainers of Intel® AI Containers use Azure to store containers, but an open source container registry like [harbor](https://github.com/goharbor/harbor) is preferred. > [!WARNING] > You can optionally skip this step and use some placeholder values, however some container groups depend on other images and will pull from a registry that you have not defined and result in an error. diff --git a/docs/matrix.md b/docs/matrix.md index 6f5c47d82..0af0cf4c9 100644 --- a/docs/matrix.md +++ b/docs/matrix.md @@ -23,13 +23,13 @@ {{ read_csv('assets/max-pvc.csv') }} === "[Preset Containers](https://github.com/intel/ai-containers/blob/main/preset/README.md)" - === "Data Analytics" + === "[Data Analytics](https://hub.docker.com/r/intel/data-analytics)" {{ read_csv('assets/data_analytics.csv') }} - === "Classical ML" + === "[Classical ML](https://hub.docker.com/r/intel/classical-ml)" {{ read_csv('assets/classical_ml.csv') }} - === "Deep Learning" + === "[Deep Learning](https://hub.docker.com/r/intel/deep-learning)" {{ read_csv('assets/deep_learning.csv') }} - === "Inference Optimization" + === "[Inference Optimization](https://hub.docker.com/r/intel/inference-optimization)" {{ read_csv('assets/inference_optimization.csv') }} === "[Workflows](https://hub.docker.com/r/intel/ai-workflows)" diff --git a/preset/classical-ml/.actions.json b/preset/classical-ml/.actions.json index aa899a13a..639f025c2 100644 --- a/preset/classical-ml/.actions.json +++ b/preset/classical-ml/.actions.json @@ -1,6 +1,5 @@ { "PYTHON_VERSION": ["3.9", "3.10"], "experimental": [true], - "RELEASE": ["2024.1.0"], "runner_label": ["PVC"] } diff --git a/preset/classical-ml/Dockerfile b/preset/classical-ml/Dockerfile index c17ca017c..01820bcc8 100644 --- a/preset/classical-ml/Dockerfile +++ b/preset/classical-ml/Dockerfile @@ -116,7 +116,7 @@ RUN conda create -yn classical-ml -c ${INTEL_CHANNEL} -c conda-forge \ # PyPI packages RUN conda run -n classical-ml python -m pip install --no-deps --no-cache-dir \ - 'cloud-data-connector=1.0.3' + 'cloud-data-connector==1.0.3' ENV PYTHONSTARTUP=~/.patch_sklearn.py diff --git a/preset/classical-ml/docker-compose.yaml b/preset/classical-ml/docker-compose.yaml index beb9f980f..f954d9c2d 100644 --- a/preset/classical-ml/docker-compose.yaml +++ b/preset/classical-ml/docker-compose.yaml @@ -38,17 +38,58 @@ services: no_proxy: '' context: . labels: - docs: classical-ml - dependency.python: ${PYTHON_VERSION:-3.10} + docs: classical_ml org.opencontainers.image.title: "Intel® AI Tools Selector Preset Containers - Classical ML" org.opencontainers.base.name: "ubuntu:22.04" org.opencontainers.image.name: "intel/classical-ml" - org.opencontainers.image.version: ${IDP_VERSION:-2024.1.0}-py${PYTHON_VERSION:-3.10} + org.opencontainers.image.version: 2024.1.0-py${PYTHON_VERSION:-3.10} + dependency.python: ${PYTHON_VERSION:-3.10} + dependency.python.pip: requirements.txt + dependency.apt.bzip2: true + dependency.apt.ca-certificates: true + dependency.apt.diffutils: true + dependency.apt.gcc: true + dependency.apt.git: true + dependency.apt.gzip: true + dependency.apt.make: true + dependency.apt.patch: true + dependency.apt.rsync: true + dependency.apt.unzip: true + dependency.apt.wget: true + dependency.apt.xz-utils: true + dependency.conda.jupyterlab: '>=4.1.8' + dependency.conda.notebook: '>=7.1.3' + dependency.conda.jupyterhub: '>=4.1.5' + dependency.conda.jupyter-server-proxy: '>=4.1.2' + dependency.conda.mako: '>=1.2.2' + dependency.conda.pyjwt: '>=2.4.0' + dependency.conda.cryptography: '>=42.0.5' + dependency.conda.nodejs: '>=20.12.2' + dependency.conda.aiohttp: '>=3.9.4' + dependency.conda.idna: '>=3.7' + dependency.conda.oauthlib: '>=3.2.2' + dependency.conda.dpnp: '>=0.14.0' + dependency.conda.numpy: '>=1.26.4' + dependency.conda.python: "=${PYTHON_VERSION:-3.10}" + dependency.conda.scikit-learn-intelex: '>=2024.2.0' + dependency.conda.xgboost: '>=2.0.3' + dependency.conda.modin-ray: '>=0.26.1' + dependency.conda.python-dotenv: '>=1.0.1' + dependency.conda.tqdm: '>=4.66.2' + dependency.conda.matplotlib-base: '>=3.4.3' + dependency.conda.dataset_librarian: '>=1.0.4' + dependency.conda.threadpoolctl: '>=3.3.0' + dependency.conda.ipython: '>=8.18.1' + dependency.conda.ipykernel: '>=6.29.3' + dependency.conda.kernda: '>=0.3.0' + dependency.conda.protobuf: '>=4.24' + dependency.conda.pillow: '>=10.2.0' + dependency.conda.tornado: '>=6.3.3' target: classical-ml-jupyter command: | bash -c "conda run -n classical-ml python -c 'import sklearn; import xgboost; print(\"SciKit:\", sklearn.__version__, \" XGBoost:\",xgboost.__version__)' && \ conda run -n classical-ml python -c 'import modin.pandas as pd, modin.config as cfg; cfg.Engine.put(\"Ray\"); df = pd.DataFrame([1]);print(df+1)'" - image: ${REGISTRY}/aiops/mlops-ci:b-${GITHUB_RUN_NUMBER:-0}-classical-ml-${IDP_VERSION:-2024.1.0}-py${PYTHON_VERSION:-3.10} + image: ${REGISTRY}/${REPO}:b-${GITHUB_RUN_NUMBER:-0}-classical-ml-2024.1.0-py${PYTHON_VERSION:-3.10} environment: http_proxy: ${http_proxy} https_proxy: ${https_proxy} @@ -56,3 +97,4 @@ services: shm_size: 12GB volumes: - /dev/dri/by-path:/dev/dri/by-path + diff --git a/preset/classical-ml/requirements.txt b/preset/classical-ml/requirements.txt new file mode 100644 index 000000000..d231202db --- /dev/null +++ b/preset/classical-ml/requirements.txt @@ -0,0 +1 @@ +cloud-data-connector==1.0.3 diff --git a/preset/data-analytics/.actions.json b/preset/data-analytics/.actions.json index aa899a13a..639f025c2 100644 --- a/preset/data-analytics/.actions.json +++ b/preset/data-analytics/.actions.json @@ -1,6 +1,5 @@ { "PYTHON_VERSION": ["3.9", "3.10"], "experimental": [true], - "RELEASE": ["2024.1.0"], "runner_label": ["PVC"] } diff --git a/preset/data-analytics/Dockerfile b/preset/data-analytics/Dockerfile index 47059d623..c3a50d6c6 100644 --- a/preset/data-analytics/Dockerfile +++ b/preset/data-analytics/Dockerfile @@ -110,7 +110,7 @@ RUN conda create -yn data-analytics -c "${INTEL_CHANNEL}" -c conda-forge \ conda clean -y --all RUN conda run -n data-analytics python -m pip install --no-deps --no-cache-dir \ - 'cloud-data-connector=1.0.3' + 'cloud-data-connector==1.0.3' FROM data-analytics-python as data-analytics-jupyter diff --git a/preset/data-analytics/docker-compose.yaml b/preset/data-analytics/docker-compose.yaml index a3808febf..259caceb2 100644 --- a/preset/data-analytics/docker-compose.yaml +++ b/preset/data-analytics/docker-compose.yaml @@ -36,16 +36,53 @@ services: no_proxy: '' context: . labels: - docs: data-analytics - dependency.python: ${PYTHON_VERSION:-3.10} + docs: data_analytics org.opencontainers.image.title: "Intel® AI Tools Selector Preset Containers - Data Analytics" org.opencontainers.base.name: "ubuntu:22.04" org.opencontainers.image.name: "intel/data-analytics" - org.opencontainers.image.version: ${IDP_VERSION:-2024.1.0}-py${PYTHON_VERSION:-3.10} + org.opencontainers.image.version: 2024.1.0-py${PYTHON_VERSION:-3.10} + dependency.python: ${PYTHON_VERSION:-3.10} + dependency.python.pip: requirements.txt + dependency.apt.bzip2: true + dependency.apt.ca-certificates: true + dependency.apt.diffutils: true + dependency.apt.gcc: true + dependency.apt.git: true + dependency.apt.gzip: true + dependency.apt.make: true + dependency.apt.patch: true + dependency.apt.rsync: true + dependency.apt.unzip: true + dependency.apt.wget: true + dependency.apt.xz-utils: true + dependency.conda.jupyterlab: '>=4.1.8' + dependency.conda.notebook: '>=7.1.3' + dependency.conda.jupyterhub: '>=4.1.5' + dependency.conda.jupyter-server-proxy: '>=4.1.2' + dependency.conda.mako: '>=1.2.2' + dependency.conda.pyjwt: '>=2.4.0' + dependency.conda.cryptography: '>=42.0.5' + dependency.conda.nodejs: '>=20.12.2' + dependency.conda.idna: '>=3.7' + dependency.conda.tqdm: '>=4.66.2' + dependency.conda.dpnp: '>=0.14.0' + dependency.conda.numpy: '>=1.26.4' + dependency.conda.python: "=${PYTHON_VERSION:-3.10}" + dependency.conda.modin-ray: '>=0.26.1' + dependency.conda.python-dotenv: '>=1.0.1' + dependency.conda.matplotlib-base: '>=3.4.3' + dependency.conda.dataset_librarian: '>=1.0.4' + dependency.conda.threadpoolctl: '>=3.3.0' + dependency.conda.ipython: '>=8.18.1' + dependency.conda.ipykernel: '>=6.29.3' + dependency.conda.kernda: '>=0.3.0' + dependency.conda.protobuf: '>=4.24.4' + dependency.conda.pillow: '>=10.2.0' + dependency.conda.tornado: '>=6.3.3' target: data-analytics-jupyter command: > bash -c "conda run -n data-analytics python -c 'import modin.pandas as pd, modin.config as cfg; cfg.Engine.put(\"Ray\"); df = pd.DataFrame([1]);print(df+1)'" - image: ${REGISTRY}/aiops/mlops-ci:b-${GITHUB_RUN_NUMBER:-0}-data-analytics-${IDP_VERSION:-2024.1.0}-py${PYTHON_VERSION:-3.10} + image: ${REGISTRY}/${REPO}:b-${GITHUB_RUN_NUMBER:-0}-data-analytics-2024.1.0-py${PYTHON_VERSION:-3.10} environment: http_proxy: ${http_proxy} https_proxy: ${https_proxy} diff --git a/preset/data-analytics/requirements.txt b/preset/data-analytics/requirements.txt new file mode 100644 index 000000000..d231202db --- /dev/null +++ b/preset/data-analytics/requirements.txt @@ -0,0 +1 @@ +cloud-data-connector==1.0.3 diff --git a/preset/data-analytics/tests.yaml b/preset/data-analytics/tests.yaml index e73ec8aa4..5aff8c816 100644 --- a/preset/data-analytics/tests.yaml +++ b/preset/data-analytics/tests.yaml @@ -12,15 +12,14 @@ # See the License for the specific language governing permissions and # limitations under the License. ---- dataset-librarian-${PYTHON_VERSION:-3.9}: cmd: conda run -n data-analytics bash -c 'yes | python -m dataset_librarian.dataset -n msmarco --download -d ~/msmarco' - img: ${REGISTRY}/aiops/mlops-ci:b-${GITHUB_RUN_NUMBER:-0}-data-analytics-${RELEASE:-2024.1.0}-py${PYTHON_VERSION:-3.9} + img: ${REGISTRY}/${REPO}:b-${GITHUB_RUN_NUMBER:-0}-data-analytics-${RELEASE:-2024.1.0}-py${PYTHON_VERSION:-3.9} modin-${PYTHON_VERSION:-3.9}: cmd: conda run -n data-analytics sample-tests/modin/test_modin.sh - img: ${REGISTRY}/aiops/mlops-ci:b-${GITHUB_RUN_NUMBER:-0}-data-analytics-${RELEASE:-2024.1.0}-py${PYTHON_VERSION:-3.9} + img: ${REGISTRY}/${REPO}:b-${GITHUB_RUN_NUMBER:-0}-data-analytics-${RELEASE:-2024.1.0}-py${PYTHON_VERSION:-3.9} shm_size: 10G modin-notebook-${PYTHON_VERSION:-3.9}: cmd: papermill --log-output jupyter/modin/IntelModin_Vs_Pandas.ipynb -k data-analytics - img: ${REGISTRY}/aiops/mlops-ci:b-${GITHUB_RUN_NUMBER:-0}-data-analytics-${RELEASE:-2024.1.0}-py${PYTHON_VERSION:-3.9} + img: ${REGISTRY}/${REPO}:b-${GITHUB_RUN_NUMBER:-0}-data-analytics-${RELEASE:-2024.1.0}-py${PYTHON_VERSION:-3.9} notebook: True diff --git a/preset/deep-learning/.actions.json b/preset/deep-learning/.actions.json index aa899a13a..639f025c2 100644 --- a/preset/deep-learning/.actions.json +++ b/preset/deep-learning/.actions.json @@ -1,6 +1,5 @@ { "PYTHON_VERSION": ["3.9", "3.10"], "experimental": [true], - "RELEASE": ["2024.1.0"], "runner_label": ["PVC"] } diff --git a/preset/deep-learning/Dockerfile b/preset/deep-learning/Dockerfile index e0e749a7d..c4f5e7f6c 100644 --- a/preset/deep-learning/Dockerfile +++ b/preset/deep-learning/Dockerfile @@ -76,13 +76,35 @@ RUN apt-get update && \ RUN apt-get update && \ apt-get install -y --no-install-recommends --fix-missing \ - intel-media-va-driver-non-free libmfx1 libmfxgen1 libvpl2 \ - libegl-mesa0 libegl1-mesa libegl1-mesa-dev libgbm1 libgl1-mesa-dev libgl1-mesa-dri \ - libglapi-mesa libgles2-mesa-dev libglx-mesa0 libigdgmm12 libxatracker2 mesa-va-drivers \ - mesa-vdpau-drivers mesa-vulkan-drivers va-driver-all vainfo hwinfo clinfo + intel-media-va-driver-non-free \ + libmfx1 \ + libmfxgen1 \ + libvpl2 \ + libegl-mesa0 \ + libegl1-mesa \ + libegl1-mesa-dev \ + libgbm1 \ + libgl1-mesa-dev \ + libgl1-mesa-dri \ + libglapi-mesa \ + libgles2-mesa-dev \ + libglx-mesa0 \ + libigdgmm12 \ + libxatracker2 \ + mesa-va-drivers \ + mesa-vdpau-drivers \ + mesa-vulkan-drivers \ + va-driver-all \ + vainfo \ + hwinfo \ + clinfo RUN apt-get install -y --no-install-recommends --fix-missing \ - libigc-dev intel-igc-cm libigdfcl-dev libigfxcmrt-dev level-zero-dev="${LEVEL_ZERO_DEV_VER}" && \ + libigc-dev \ + intel-igc-cm \ + libigdfcl-dev \ + libigfxcmrt-dev \ + level-zero-dev="${LEVEL_ZERO_DEV_VER}" && \ rm -rf /var/lib/apt/lists/* RUN rm /etc/apt/sources.list.d/*list diff --git a/preset/deep-learning/docker-compose.yaml b/preset/deep-learning/docker-compose.yaml index 4ccf7dcf7..b9490f94d 100644 --- a/preset/deep-learning/docker-compose.yaml +++ b/preset/deep-learning/docker-compose.yaml @@ -55,12 +55,118 @@ services: no_proxy: '' context: . labels: - docs: deep-learning - dependency.python: ${PYTHON_VERSION:-3.10} + docs: deep_learning org.opencontainers.image.title: "Intel® AI Tools Selector Preset Containers - Deep Learning" org.opencontainers.base.name: "ubuntu:22.04" org.opencontainers.image.name: "intel/deep-learning" - org.opencontainers.image.version: ${IDP_VERSION:-2024.1.0}-py${PYTHON_VERSION:-3.10} + org.opencontainers.image.version: 2024.1.0-py${PYTHON_VERSION:-3.10} + dependency.python: ${PYTHON_VERSION:-3.10} + dependency.python.pip: requirements.txt + dependency.apt.apt-utils: true + dependency.apt.build-essential: true + dependency.apt.bzip2: true + dependency.apt.ca-certificates: true + dependency.apt.clinfo: true + dependency.apt.cmake: true + dependency.apt.diffutils: true + dependency.apt.g++: true + dependency.apt.gcc: true + dependency.apt.git: true + dependency.apt.gnupg2: true + dependency.apt.gpg-agent: true + dependency.apt.gzip: true + dependency.apt.hwinfo: true + dependency.apt.intel-igc-cm: true + dependency.apt.intel-level-zero-gpu: '=1.3.27642.40-803~22.04' + dependency.apt.intel-media-va-driver-non-free: true + dependency.apt.intel-opencl-icd: '=23.43.27642.40-803~22.04' + dependency.apt.level-zero: '=1.14.0-744~22.04' + dependency.apt.level-zero-dev: '=1.14.0-744~22.04' + dependency.apt.libegl1-mesa: true + dependency.apt.libegl1-mesa-dev: true + dependency.apt.libegl-mesa0: true + dependency.apt.libgbm1: true + dependency.apt.libgl1-mesa-dev: true + dependency.apt.libgl1-mesa-dri: true + dependency.apt.libglapi-mesa: true + dependency.apt.libgles2-mesa-dev: true + dependency.apt.libglx-mesa0: true + dependency.apt.libigc-dev: true + dependency.apt.libigdfcl-dev: true + dependency.apt.libigdgmm12: true + dependency.apt.libigfxcmrt-dev: true + dependency.apt.libmfx1: true + dependency.apt.libmfxgen1: true + dependency.apt.libopenmpi-dev: true + dependency.apt.libvpl2: true + dependency.apt.libxatracker2: true + dependency.apt.make: true + dependency.apt.mesa-va-drivers: true + dependency.apt.mesa-vdpau-drivers: true + dependency.apt.mesa-vulkan-drivers: true + dependency.apt.numactl: true + dependency.apt.openmpi-bin: true + dependency.apt.openmpi-common: true + dependency.apt.openssh-client: true + dependency.apt.openssh-server: true + dependency.apt.patch: true + dependency.apt.rsync: true + dependency.apt.sudo: true + dependency.apt.unzip: true + dependency.apt.va-driver-all: true + dependency.apt.vainfo: true + dependency.apt.wget: true + dependency.apt.xz-utils: true + dependency.conda.jupyterlab: '>=4.1.8' + dependency.conda.aiohttp: '>=3.9.0' + dependency.conda.cryptography: '>=42.0.4' + dependency.conda.dataset_librarian: '>=1.0.4' + dependency.conda.deepspeed: '=0.14.0' + dependency.conda.dpcpp_impl_linux-64: '=2024.1.0' + dependency.conda.dpcpp-cpp-rt: '=2024.1.0' + dependency.conda.dpnp: '=0.14.0' + dependency.conda.idna: '>=3.7' + dependency.conda.impi-devel: '=2021.12' + dependency.conda.intel-extension-for-pytorch_cpu: '=2.2.0=*cpu*' + dependency.conda.intel-extension-for-pytorch_gpu: '=2.1.20=*xpu*' + dependency.conda.intel-extension-for-tensorflow_cpu: '=2.15=*cpu*' + dependency.conda.intel-extension-for-tensorflow_gpu: '=2.15=*xpu*' + dependency.conda.intel-openmp: '=2024.1.0' + dependency.conda.intel-optimization-for-horovod: '=0.28.1.4' + dependency.conda.ipykernel: '>=6.29.3' + dependency.conda.ipython: '>=8.18.1' + dependency.conda.jinja2: '>=3.1.3' + dependency.conda.jupyterhub: '>=4.1.5' + dependency.conda.jupyter-server-proxy: '>=4.1.2' + dependency.conda.kernda: '>=0.3.0' + dependency.conda.mako: '>=1.2.2' + dependency.conda.matplotlib-base: '>=3.4.3' + dependency.conda.mkl-dpcpp: '2024.1.0' + dependency.conda.nodejs: '>=20.12.2' + dependency.conda.notebook: '>=7.1.3' + dependency.conda.numpy: '=1.26.4' + dependency.conda.oauthlib: '>=3.2.2' + dependency.conda.oneccl_bind_pt_cpu: '=2.2.0=*cpu*' + dependency.conda.oneccl_bind_pt_gpu: '=2.1.200=*xpu*' + dependency.conda.onnx: '>=1.14.1' + dependency.conda.packaging: '=23.2' + dependency.conda.pandas: '>=2.2.2' + dependency.conda.pillow: '>=10.2.0' + dependency.conda.protobuf: '=4.24' + dependency.conda.pyjwt: '>=2.4.0' + dependency.conda.python: "=${PYTHON_VERSION:-3.10}" + dependency.conda.pytorch_cpu: '=2.2.0=*cpu*' + dependency.conda.pytorch_gpu: '=2.1.0=*xpu*' + dependency.conda.setuptools: '=69.1.0' + dependency.conda.tensorboardx: '>=2.6.2.2' + dependency.conda.tensorflow: '=2.15' + dependency.conda.torchaudio_cpu: '=2.2.0=*cpu*' + dependency.conda.torchaudio_gpu: '=2.1.0=*xpu*' + dependency.conda.torchvision_cpu: '=0.17=*cpu*' + dependency.conda.torchvision_gpu: '=0.16.0=*xpu*' + dependency.conda.tornado: '>=6.3.3' + dependency.conda.tqdm: '>=4.66.2' + dependency.conda.werkzeug: '>=2.2.3' target: deep-learning-jupyter command: | bash -c "conda run -n pytorch-cpu python -c 'import torch;print(torch.__version__);import intel_extension_for_pytorch as ipex;print(ipex.__version__);' && \ @@ -89,4 +195,4 @@ services: depends_on: - dl-base extends: dl-base - image: ${REGISTRY}/aiops/mlops-ci:b-${GITHUB_RUN_NUMBER:-0}-deep-learning-${IDP_VERSION:-2024.1.0}-py${PYTHON_VERSION:-3.10} + image: ${REGISTRY}/${REPO}:b-${GITHUB_RUN_NUMBER:-0}-deep-learning-2024.1.0-py${PYTHON_VERSION:-3.10} diff --git a/preset/deep-learning/requirements.txt b/preset/deep-learning/requirements.txt new file mode 100644 index 000000000..4122126b5 --- /dev/null +++ b/preset/deep-learning/requirements.txt @@ -0,0 +1,14 @@ +accelerate>=0.30.0 +cloud-data-connector>=1.0.3 +cryptography>=42.0.7 +dataset-librarian>=1.0.4 +datasets>=2.19.1 +evaluate>=0.4.2 +git+https://github.com/huggingface/optimum-intel.git +ninja>=1.11.1.1 +py-cpuinfo>=9.0.0 +python-dotenv>=1.0.1 +requests>=2.31.0 +tensorflow-hub>=0.16.1 +tqdm>=4.66.2 +transformers>=4.40.2 diff --git a/preset/deep-learning/tests.yaml b/preset/deep-learning/tests.yaml index de14c9a63..0b0cdcaeb 100644 --- a/preset/deep-learning/tests.yaml +++ b/preset/deep-learning/tests.yaml @@ -12,40 +12,39 @@ # See the License for the specific language governing permissions and # limitations under the License. ---- deep-learning-ipex-${PYTHON_VERSION:-3.9}-cpu: cmd: conda run -n pytorch-cpu python -W ignore sample-tests/intel_extension_for_pytorch/test_ipex.py --device cpu --ipex - img: ${REGISTRY}/aiops/mlops-ci:b-${GITHUB_RUN_NUMBER:-0}-deep-learning-${RELEASE:-2024.1.0}-py${PYTHON_VERSION:-3.9} + img: ${REGISTRY}/${REPO}:b-${GITHUB_RUN_NUMBER:-0}-deep-learning-${RELEASE:-2024.1.0}-py${PYTHON_VERSION:-3.9} deep-learning-ipex-${PYTHON_VERSION:-3.9}-gpu: cmd: conda run -n pytorch-gpu python -W ignore sample-tests/intel_extension_for_pytorch/test_ipex.py --device xpu --ipex - img: ${REGISTRY}/aiops/mlops-ci:b-${GITHUB_RUN_NUMBER:-0}-deep-learning-${RELEASE:-2024.1.0}-py${PYTHON_VERSION:-3.9} + img: ${REGISTRY}/${REPO}:b-${GITHUB_RUN_NUMBER:-0}-deep-learning-${RELEASE:-2024.1.0}-py${PYTHON_VERSION:-3.9} deep-learning-ipex-notebook-${PYTHON_VERSION:-3.9}-cpu: cmd: papermill --log-output jupyter/ipex/ResNet50_Inference.ipynb -k pytorch-cpu - img: ${REGISTRY}/aiops/mlops-ci:b-${GITHUB_RUN_NUMBER:-0}-deep-learning-${RELEASE:-2024.1.0}-py${PYTHON_VERSION:-3.9} + img: ${REGISTRY}/${REPO}:b-${GITHUB_RUN_NUMBER:-0}-deep-learning-${RELEASE:-2024.1.0}-py${PYTHON_VERSION:-3.9} notebook: True deep-learning-ipex-notebook-${PYTHON_VERSION:-3.9}-gpu: cmd: papermill --log-output jupyter/ipex/ResNet50_Inference.ipynb -k pytorch-gpu - img: ${REGISTRY}/aiops/mlops-ci:b-${GITHUB_RUN_NUMBER:-0}-deep-learning-${RELEASE:-2024.1.0}-py${PYTHON_VERSION:-3.9} + img: ${REGISTRY}/${REPO}:b-${GITHUB_RUN_NUMBER:-0}-deep-learning-${RELEASE:-2024.1.0}-py${PYTHON_VERSION:-3.9} notebook: True deep-learning-ipex-quantization-notebook-${PYTHON_VERSION:-3.9}-cpu: cmd: papermill --log-output jupyter/ipex-quantization/IntelPytorch_Quantization.ipynb -k pytorch-cpu - img: ${REGISTRY}/aiops/mlops-ci:b-${GITHUB_RUN_NUMBER:-0}-deep-learning-${RELEASE:-2024.1.0}-py${PYTHON_VERSION:-3.9} + img: ${REGISTRY}/${REPO}:b-${GITHUB_RUN_NUMBER:-0}-deep-learning-${RELEASE:-2024.1.0}-py${PYTHON_VERSION:-3.9} notebook: True deep-learning-itex-${PYTHON_VERSION:-3.9}-cpu: cmd: conda run -n tensorflow-cpu python -W ignore sample-tests/intel_extension_for_tensorflow/test_itex.py - img: ${REGISTRY}/aiops/mlops-ci:b-${GITHUB_RUN_NUMBER:-0}-deep-learning-${RELEASE:-2024.1.0}-py${PYTHON_VERSION:-3.9} + img: ${REGISTRY}/${REPO}:b-${GITHUB_RUN_NUMBER:-0}-deep-learning-${RELEASE:-2024.1.0}-py${PYTHON_VERSION:-3.9} deep-learning-itex-${PYTHON_VERSION:-3.9}-gpu: cmd: conda run -n tensorflow-gpu python -W ignore sample-tests/intel_extension_for_tensorflow/test_itex.py - img: ${REGISTRY}/aiops/mlops-ci:b-${GITHUB_RUN_NUMBER:-0}-deep-learning-${RELEASE:-2024.1.0}-py${PYTHON_VERSION:-3.9} + img: ${REGISTRY}/${REPO}:b-${GITHUB_RUN_NUMBER:-0}-deep-learning-${RELEASE:-2024.1.0}-py${PYTHON_VERSION:-3.9} deep-learning-tensorflow-dataset-librarian-${PYTHON_VERSION:-3.9}-cpu: cmd: conda run -n tensorflow-cpu bash -c 'yes | python -m dataset_librarian.dataset -n msmarco --download -d ~/msmarco' - img: ${REGISTRY}/aiops/mlops-ci:b-${GITHUB_RUN_NUMBER:-0}-deep-learning-${RELEASE:-2024.1.0}-py${PYTHON_VERSION:-3.9} + img: ${REGISTRY}/${REPO}:b-${GITHUB_RUN_NUMBER:-0}-deep-learning-${RELEASE:-2024.1.0}-py${PYTHON_VERSION:-3.9} deep-learning-tensorflow-dataset-librarian-${PYTHON_VERSION:-3.9}-gpu: cmd: conda run -n tensorflow-gpu bash -c 'yes | python -m dataset_librarian.dataset -n msmarco --download -d ~/msmarco' - img: ${REGISTRY}/aiops/mlops-ci:b-${GITHUB_RUN_NUMBER:-0}-deep-learning-${RELEASE:-2024.1.0}-py${PYTHON_VERSION:-3.9} + img: ${REGISTRY}/${REPO}:b-${GITHUB_RUN_NUMBER:-0}-deep-learning-${RELEASE:-2024.1.0}-py${PYTHON_VERSION:-3.9} deep-learning-torch-dataset-librarian-${PYTHON_VERSION:-3.9}-cpu: cmd: conda run -n pytorch-cpu bash -c 'yes | python -m dataset_librarian.dataset -n msmarco --download -d ~/msmarco' - img: ${REGISTRY}/aiops/mlops-ci:b-${GITHUB_RUN_NUMBER:-0}-deep-learning-${RELEASE:-2024.1.0}-py${PYTHON_VERSION:-3.9} + img: ${REGISTRY}/${REPO}:b-${GITHUB_RUN_NUMBER:-0}-deep-learning-${RELEASE:-2024.1.0}-py${PYTHON_VERSION:-3.9} deep-learning-torch-dataset-librarian-${PYTHON_VERSION:-3.9}-gpu: cmd: conda run -n pytorch-gpu bash -c 'yes | python -m dataset_librarian.dataset -n msmarco --download -d ~/msmarco' - img: ${REGISTRY}/aiops/mlops-ci:b-${GITHUB_RUN_NUMBER:-0}-deep-learning-${RELEASE:-2024.1.0}-py${PYTHON_VERSION:-3.9} + img: ${REGISTRY}/${REPO}:b-${GITHUB_RUN_NUMBER:-0}-deep-learning-${RELEASE:-2024.1.0}-py${PYTHON_VERSION:-3.9} diff --git a/preset/inference-optimization/.actions.json b/preset/inference-optimization/.actions.json index aa899a13a..639f025c2 100644 --- a/preset/inference-optimization/.actions.json +++ b/preset/inference-optimization/.actions.json @@ -1,6 +1,5 @@ { "PYTHON_VERSION": ["3.9", "3.10"], "experimental": [true], - "RELEASE": ["2024.1.0"], "runner_label": ["PVC"] } diff --git a/preset/inference-optimization/docker-compose.yaml b/preset/inference-optimization/docker-compose.yaml index fc57ee835..cc89dee7e 100644 --- a/preset/inference-optimization/docker-compose.yaml +++ b/preset/inference-optimization/docker-compose.yaml @@ -22,6 +22,7 @@ services: args: BASE_IMAGE: ${BASE_IMAGE:-ubuntu} BASE_TAG: ${BASE_TAG:-22.04} + DEEPSPEED_VERSION: ${DEEPSPEED_VERSION:-0.14.0} DEVICE: ${DEVICE:-flex} DPNP_VERSION: ${NUMBA_DPEX_VERSION:-0.14.0} HOROVOD_VERSION: ${HOROVOD_VERSION:-0.28.1.4} @@ -55,12 +56,7 @@ services: no_proxy: '' context: ../deep-learning labels: - docs: deep-learning - dependency.python: ${PYTHON_VERSION:-3.10} - org.opencontainers.image.title: "Intel® AI Tools Selector Preset Containers - Deep Learning" - org.opencontainers.base.name: "ubuntu:22.04" - org.opencontainers.image.name: "intel/deep-learning" - org.opencontainers.image.version: ${IDP_VERSION:-2024.1.0}-py${PYTHON_VERSION:-3.10} + docs: false target: deep-learning-jupyter environment: http_proxy: ${http_proxy} @@ -80,17 +76,124 @@ services: COMPOSE_PROJECT_NAME: ${COMPOSE_PROJECT_NAME:-preset} context: . labels: - docs: inference-optimization - dependency.python: ${PYTHON_VERSION:-3.10} + docs: inference_optimization org.opencontainers.image.title: "Intel® AI Tools Selector Preset Containers - Inference Optimization" org.opencontainers.base.name: "intel/deep-learning" org.opencontainers.image.name: "intel/inference-optimization" - org.opencontainers.image.version: ${IDP_VERSION:-2024.1.0}-py${PYTHON_VERSION:-3.10} + org.opencontainers.image.version: 2024.1.0-py${PYTHON_VERSION:-3.10} + dependency.python: ${PYTHON_VERSION:-3.10} + dependency.python.pip: requirements.txt + dependency.apt.apt-utils: true + dependency.apt.build-essential: true + dependency.apt.bzip2: true + dependency.apt.ca-certificates: true + dependency.apt.clinfo: true + dependency.apt.cmake: true + dependency.apt.diffutils: true + dependency.apt.g++: true + dependency.apt.gcc: true + dependency.apt.git: true + dependency.apt.gnupg2: true + dependency.apt.gpg-agent: true + dependency.apt.gzip: true + dependency.apt.hwinfo: true + dependency.apt.intel-igc-cm: true + dependency.apt.intel-level-zero-gpu: '1.3.27642.40-803~22.04' + dependency.apt.intel-media-va-driver-non-free: true + dependency.apt.intel-opencl-icd: '23.43.27642.40-803~22.04' + dependency.apt.level-zero: '1.14.0-744~22.04' + dependency.apt.level-zero-dev: '1.14.0-744~22.04' + dependency.apt.libegl1-mesa: true + dependency.apt.libegl1-mesa-dev: true + dependency.apt.libegl-mesa0: true + dependency.apt.libgbm1: true + dependency.apt.libgl1-mesa-dev: true + dependency.apt.libgl1-mesa-dri: true + dependency.apt.libglapi-mesa: true + dependency.apt.libgles2-mesa-dev: true + dependency.apt.libglx-mesa0: true + dependency.apt.libigc-dev: true + dependency.apt.libigdfcl-dev: true + dependency.apt.libigdgmm12: true + dependency.apt.libigfxcmrt-dev: true + dependency.apt.libmfx1: true + dependency.apt.libmfxgen1: true + dependency.apt.libopenmpi-dev: true + dependency.apt.libvpl2: true + dependency.apt.libxatracker2: true + dependency.apt.make: true + dependency.apt.mesa-va-drivers: true + dependency.apt.mesa-vdpau-drivers: true + dependency.apt.mesa-vulkan-drivers: true + dependency.apt.numactl: true + dependency.apt.openmpi-bin: true + dependency.apt.openmpi-common: true + dependency.apt.openssh-client: true + dependency.apt.openssh-server: true + dependency.apt.patch: true + dependency.apt.rsync: true + dependency.apt.sudo: true + dependency.apt.unzip: true + dependency.apt.va-driver-all: true + dependency.apt.vainfo: true + dependency.apt.wget: true + dependency.apt.xz-utils: true + dependency.conda.jupyterlab: '>=4.1.8' + dependency.conda.aiohttp: '>=3.9.0' + dependency.conda.cryptography: '>=42.0.4' + dependency.conda.dataset_librarian: '>=1.0.4' + dependency.conda.deepspeed: '>=0.14.0' + dependency.conda.dpcpp_impl_linux-64: '>=2024.1.' + dependency.conda.dpcpp-cpp-rt: '>=2024.1.' + dependency.conda.dpnp: '>=0.14.0' + dependency.conda.idna: '>=3.7' + dependency.conda.impi-devel: '>=2021.12' + dependency.conda.intel-extension-for-pytorch_cpu: '>=2.2.0=*cpu*' + dependency.conda.intel-extension-for-pytorch_gpu: '>=2.1.20=*xpu*' + dependency.conda.intel-extension-for-tensorflow_cpu: '>=2.15=*cpu*' + dependency.conda.intel-extension-for-tensorflow_gpu: '>=2.15=*xpu*' + dependency.conda.intel-openmp: '>=2024.1.0' + dependency.conda.intel-optimization-for-horovod: '>=0.28.1.4' + dependency.conda.ipykernel: '>=6.29.3' + dependency.conda.ipython: '>=8.18.1' + dependency.conda.jinja2: '>=3.1.3' + dependency.conda.jupyterhub: '>=4.1.5' + dependency.conda.jupyter-server-proxy: '>=4.1.2' + dependency.conda.kernda: '>=0.3.0' + dependency.conda.mako: '>=1.2.2' + dependency.conda.matplotlib-base: '>=3.4.3' + dependency.conda.mkl-dpcpp: '>=2024.1.0' + dependency.conda.neural-compressor: '>=2.4.1' + dependency.conda.nodejs: '>=20.12.2' + dependency.conda.notebook: '>=7.1.3' + dependency.conda.numpy: '>=1.26.4' + dependency.conda.oauthlib: '>=3.2.2' + dependency.conda.oneccl_bind_pt_cpu: '>=2.2.0=*cpu*' + dependency.conda.oneccl_bind_pt_gpu: '>=2.1.200=*xpu*' + dependency.conda.onnx: '>=1.14.1' + dependency.conda.packaging: '>=23.2' + dependency.conda.pandas: '>=2.2.2' + dependency.conda.pillow: '>=10.2.0' + dependency.conda.protobuf: '>=4.24' + dependency.conda.pyjwt: '>=2.4.0' + dependency.conda.python: "=${PYTHON_VERSION:-3.10}" + dependency.conda.pytorch_cpu: '>=2.2.0=*cpu*' + dependency.conda.pytorch_gpu: '>=2.1.0=*xpu*' + dependency.conda.setuptools: '>=69.1.0' + dependency.conda.tensorboardx: '>=2.6.2.2' + dependency.conda.tensorflow: '>=2.15' + dependency.conda.torchaudio_cpu: '>=2.2.0=*cpu*' + dependency.conda.torchaudio_gpu: '>=2.1.0=*xpu*' + dependency.conda.torchvision_cpu: '>=0.17=*cpu*' + dependency.conda.torchvision_gpu: '>=0.16.0=*xpu*' + dependency.conda.tornado: '>=6.3.3' + dependency.conda.tqdm: '>=4.66.2' + dependency.conda.werkzeug: '>=2.2.3' target: inference-optimization depends_on: - dl-base extends: dl-base - image: ${REGISTRY}/aiops/mlops-ci:b-${GITHUB_RUN_NUMBER:-0}-inference-optimization-${IDP_VERSION:-2024.1.0}-py${PYTHON_VERSION:-3.10} + image: ${REGISTRY}/${REPO}:b-${GITHUB_RUN_NUMBER:-0}-inference-optimization-${IDP_VERSION:-2024.1.0}-py${PYTHON_VERSION:-3.10} command: | bash -c "conda run -n pytorch-cpu python -c 'import intel_extension_for_pytorch as ipex;print(ipex.__version__);' && \ conda run -n pytorch-cpu python -c 'import neural_compressor;print(\"Neural Compressor Version:\", neural_compressor.__version__)' && \ diff --git a/preset/inference-optimization/requirements.txt b/preset/inference-optimization/requirements.txt new file mode 100644 index 000000000..15dad7740 --- /dev/null +++ b/preset/inference-optimization/requirements.txt @@ -0,0 +1,16 @@ +accelerate>=0.30.0 +cloud-data-connector>=1.0.3 +cryptography>=42.0.7 +dataset-librarian>=1.0.4 +datasets>=2.19.1 +evaluate>=0.4.2 +git+https://github.com/huggingface/optimum-intel.git +ninja>=1.11.1.1 +onnxruntime>=1.17.3 +py-cpuinfo>=9.0.0 +python-dotenv>=1.0.1 +requests>=2.31.0 +tensorflow-hub>=0.16.1 +tf2onnx>==1.16.1 +tqdm>=4.66.2 +transformers>=4.40.2 diff --git a/preset/inference-optimization/tests.yaml b/preset/inference-optimization/tests.yaml index bb5ee4dd1..987310673 100644 --- a/preset/inference-optimization/tests.yaml +++ b/preset/inference-optimization/tests.yaml @@ -12,79 +12,78 @@ # See the License for the specific language governing permissions and # limitations under the License. ---- inference-optimization-inc-ipex-quantization-notebook-${PYTHON_VERSION:-3.9}-cpu: cmd: papermill --log-output jupyter/inc-ipex-quantization/quantize_with_inc.ipynb result.ipynb -k pytorch-cpu --cwd jupyter/inc-ipex-quantization - img: ${REGISTRY}/aiops/mlops-ci:b-${GITHUB_RUN_NUMBER:-0}-inference-optimization-${RELEASE:-2024.1.0}-py${PYTHON_VERSION:-3.9} + img: ${REGISTRY}/${REPO}:b-${GITHUB_RUN_NUMBER:-0}-inference-optimization-${RELEASE:-2024.1.0}-py${PYTHON_VERSION:-3.9} notebook: True inference-optimization-inc-ipex-quantization-notebook-${PYTHON_VERSION:-3.9}-gpu: cmd: papermill --log-output jupyter/inc-ipex-quantization/quantize_with_inc.ipynb result.ipynb -k pytorch-gpu --cwd jupyter/inc-ipex-quantization - img: ${REGISTRY}/aiops/mlops-ci:b-${GITHUB_RUN_NUMBER:-0}-inference-optimization-${RELEASE:-2024.1.0}-py${PYTHON_VERSION:-3.9} + img: ${REGISTRY}/${REPO}:b-${GITHUB_RUN_NUMBER:-0}-inference-optimization-${RELEASE:-2024.1.0}-py${PYTHON_VERSION:-3.9} notebook: True inference-optimization-inc-itex-notebook-${PYTHON_VERSION:-3.9}-cpu: cmd: papermill --log-output jupyter/inc-itex/inc_sample_tensorflow.ipynb result.ipynb -k tensorflow-cpu --cwd jupyter/inc-itex - img: ${REGISTRY}/aiops/mlops-ci:b-${GITHUB_RUN_NUMBER:-0}-inference-optimization-${RELEASE:-2024.1.0}-py${PYTHON_VERSION:-3.9} + img: ${REGISTRY}/${REPO}:b-${GITHUB_RUN_NUMBER:-0}-inference-optimization-${RELEASE:-2024.1.0}-py${PYTHON_VERSION:-3.9} notebook: True # Status: Commented due to out of resources error # inference-optimization-inc-itex-notebook-${PYTHON_VERSION:-3.9}-gpu: # cmd: papermill --log-output jupyter/inc-itex/inc_sample_tensorflow.ipynb result.ipynb -k tensorflow-gpu --cwd jupyter/inc-itex -# img: ${REGISTRY}/aiops/mlops-ci:b-${GITHUB_RUN_NUMBER:-0}-inference-optimization-${RELEASE:-2024.1.0}-py${PYTHON_VERSION:-3.9} +# img: ${REGISTRY}/${REPO}:b-${GITHUB_RUN_NUMBER:-0}-inference-optimization-${RELEASE:-2024.1.0}-py${PYTHON_VERSION:-3.9} # notebook: True inference-optimization-inc-tensorflow-${PYTHON_VERSION:-3.9}-cpu: cmd: conda run -n tensorflow-cpu sample-tests/neural_compressor/tensorflow/run.sh cpu - img: ${REGISTRY}/aiops/mlops-ci:b-${GITHUB_RUN_NUMBER:-0}-inference-optimization-${RELEASE:-2024.1.0}-py${PYTHON_VERSION:-3.9} + img: ${REGISTRY}/${REPO}:b-${GITHUB_RUN_NUMBER:-0}-inference-optimization-${RELEASE:-2024.1.0}-py${PYTHON_VERSION:-3.9} inference-optimization-inc-tensorflow-${PYTHON_VERSION:-3.9}-gpu: cmd: conda run -n tensorflow-gpu sample-tests/neural_compressor/tensorflow/run.sh gpu - img: ${REGISTRY}/aiops/mlops-ci:b-${GITHUB_RUN_NUMBER:-0}-inference-optimization-${RELEASE:-2024.1.0}-py${PYTHON_VERSION:-3.9} + img: ${REGISTRY}/${REPO}:b-${GITHUB_RUN_NUMBER:-0}-inference-optimization-${RELEASE:-2024.1.0}-py${PYTHON_VERSION:-3.9} inference-optimization-inc-torch-${PYTHON_VERSION:-3.9}-cpu: cmd: conda run -n pytorch-cpu sample-tests/neural_compressor/torch/run.sh cpu - img: ${REGISTRY}/aiops/mlops-ci:b-${GITHUB_RUN_NUMBER:-0}-inference-optimization-${RELEASE:-2024.1.0}-py${PYTHON_VERSION:-3.9} + img: ${REGISTRY}/${REPO}:b-${GITHUB_RUN_NUMBER:-0}-inference-optimization-${RELEASE:-2024.1.0}-py${PYTHON_VERSION:-3.9} inference-optimization-ipex-${PYTHON_VERSION:-3.9}-cpu: cmd: conda run -n pytorch-cpu python -W ignore sample-tests/intel_extension_for_pytorch/test_ipex.py --device cpu --ipex - img: ${REGISTRY}/aiops/mlops-ci:b-${GITHUB_RUN_NUMBER:-0}-inference-optimization-${RELEASE:-2024.1.0}-py${PYTHON_VERSION:-3.9} + img: ${REGISTRY}/${REPO}:b-${GITHUB_RUN_NUMBER:-0}-inference-optimization-${RELEASE:-2024.1.0}-py${PYTHON_VERSION:-3.9} inference-optimization-ipex-${PYTHON_VERSION:-3.9}-gpu: cmd: conda run -n pytorch-gpu python -W ignore sample-tests/intel_extension_for_pytorch/test_ipex.py --device xpu --ipex - img: ${REGISTRY}/aiops/mlops-ci:b-${GITHUB_RUN_NUMBER:-0}-inference-optimization-${RELEASE:-2024.1.0}-py${PYTHON_VERSION:-3.9} + img: ${REGISTRY}/${REPO}:b-${GITHUB_RUN_NUMBER:-0}-inference-optimization-${RELEASE:-2024.1.0}-py${PYTHON_VERSION:-3.9} inference-optimization-itex-${PYTHON_VERSION:-3.9}-cpu: cmd: conda run -n tensorflow-cpu python -W ignore sample-tests/intel_extension_for_tensorflow/test_itex.py - img: ${REGISTRY}/aiops/mlops-ci:b-${GITHUB_RUN_NUMBER:-0}-inference-optimization-${RELEASE:-2024.1.0}-py${PYTHON_VERSION:-3.9} + img: ${REGISTRY}/${REPO}:b-${GITHUB_RUN_NUMBER:-0}-inference-optimization-${RELEASE:-2024.1.0}-py${PYTHON_VERSION:-3.9} inference-optimization-itex-${PYTHON_VERSION:-3.9}-gpu: cmd: conda run -n tensorflow-gpu python -W ignore sample-tests/intel_extension_for_tensorflow/test_itex.py - img: ${REGISTRY}/aiops/mlops-ci:b-${GITHUB_RUN_NUMBER:-0}-inference-optimization-${RELEASE:-2024.1.0}-py${PYTHON_VERSION:-3.9} + img: ${REGISTRY}/${REPO}:b-${GITHUB_RUN_NUMBER:-0}-inference-optimization-${RELEASE:-2024.1.0}-py${PYTHON_VERSION:-3.9} inference-optimization-itex-inference-notebook-${PYTHON_VERSION:-3.9}-cpu: cmd: papermill --log-output jupyter/itex-inference/tutorial_optimize_TensorFlow_pretrained_model.ipynb result.ipynb -k tensorflow-cpu --cwd jupyter/itex-inference - img: ${REGISTRY}/aiops/mlops-ci:b-${GITHUB_RUN_NUMBER:-0}-inference-optimization-${RELEASE:-2024.1.0}-py${PYTHON_VERSION:-3.9} + img: ${REGISTRY}/${REPO}:b-${GITHUB_RUN_NUMBER:-0}-inference-optimization-${RELEASE:-2024.1.0}-py${PYTHON_VERSION:-3.9} notebook: True # Need update from TensorFlow v1 to V2 # inference-optimization-itex-inference-notebook-${PYTHON_VERSION:-3.9}-gpu: # cmd: papermill --log-output jupyter/itex-inference/tutorial_optimize_TensorFlow_pretrained_model.ipynb result.ipynb -k tensorflow-gpu --cwd jupyter/itex-inference -# img: ${REGISTRY}/aiops/mlops-ci:b-${GITHUB_RUN_NUMBER:-0}-inference-optimization-${RELEASE:-2024.1.0}-py${PYTHON_VERSION:-3.9} +# img: ${REGISTRY}/${REPO}:b-${GITHUB_RUN_NUMBER:-0}-inference-optimization-${RELEASE:-2024.1.0}-py${PYTHON_VERSION:-3.9} # notebook: True inference-optimization-onnx-${PYTHON_VERSION:-3.9}-cpu: cmd: conda run -n tensorflow-cpu sample-tests/onnx/run.sh - img: ${REGISTRY}/aiops/mlops-ci:b-${GITHUB_RUN_NUMBER:-0}-inference-optimization-${RELEASE:-2024.1.0}-py${PYTHON_VERSION:-3.9} + img: ${REGISTRY}/${REPO}:b-${GITHUB_RUN_NUMBER:-0}-inference-optimization-${RELEASE:-2024.1.0}-py${PYTHON_VERSION:-3.9} inference-optimization-onnx-${PYTHON_VERSION:-3.9}-gpu: cmd: conda run -n tensorflow-gpu sample-tests/onnx/run.sh - img: ${REGISTRY}/aiops/mlops-ci:b-${GITHUB_RUN_NUMBER:-0}-inference-optimization-${RELEASE:-2024.1.0}-py${PYTHON_VERSION:-3.9} + img: ${REGISTRY}/${REPO}:b-${GITHUB_RUN_NUMBER:-0}-inference-optimization-${RELEASE:-2024.1.0}-py${PYTHON_VERSION:-3.9} inference-optimization-tensorflow-dataset-librarian-${PYTHON_VERSION:-3.9}-cpu: cmd: conda run -n tensorflow-cpu bash -c 'yes | python -m dataset_librarian.dataset -n msmarco --download -d ~/msmarco' - img: ${REGISTRY}/aiops/mlops-ci:b-${GITHUB_RUN_NUMBER:-0}-inference-optimization-${RELEASE:-2024.1.0}-py${PYTHON_VERSION:-3.9} + img: ${REGISTRY}/${REPO}:b-${GITHUB_RUN_NUMBER:-0}-inference-optimization-${RELEASE:-2024.1.0}-py${PYTHON_VERSION:-3.9} inference-optimization-tensorflow-dataset-librarian-${PYTHON_VERSION:-3.9}-gpu: cmd: conda run -n tensorflow-gpu bash -c 'yes | python -m dataset_librarian.dataset -n msmarco --download -d ~/msmarco' inference-optimization-torch-dataset-librarian-${PYTHON_VERSION:-3.9}-cpu: cmd: conda run -n pytorch-cpu bash -c 'yes | python -m dataset_librarian.dataset -n msmarco --download -d ~/msmarco' - img: ${REGISTRY}/aiops/mlops-ci:b-${GITHUB_RUN_NUMBER:-0}-inference-optimization-${RELEASE:-2024.1.0}-py${PYTHON_VERSION:-3.9} + img: ${REGISTRY}/${REPO}:b-${GITHUB_RUN_NUMBER:-0}-inference-optimization-${RELEASE:-2024.1.0}-py${PYTHON_VERSION:-3.9} inference-optimization-torch-dataset-librarian-${PYTHON_VERSION:-3.9}-gpu: cmd: conda run -n pytorch-gpu bash -c 'yes | python -m dataset_librarian.dataset -n msmarco --download -d ~/msmarco' - img: ${REGISTRY}/aiops/mlops-ci:b-${GITHUB_RUN_NUMBER:-0}-inference-optimization-${RELEASE:-2024.1.0}-py${PYTHON_VERSION:-3.9} + img: ${REGISTRY}/${REPO}:b-${GITHUB_RUN_NUMBER:-0}-inference-optimization-${RELEASE:-2024.1.0}-py${PYTHON_VERSION:-3.9} diff --git a/python/Dockerfile b/python/Dockerfile index 6f8bfafd2..422d35cde 100644 --- a/python/Dockerfile +++ b/python/Dockerfile @@ -67,12 +67,12 @@ ENV PATH /opt/conda/envs/idp/bin:/opt/conda/condabin:/opt/conda/bin/:${PATH} ENV LD_LIBRARY_PATH /lib64/:/usr/lib64/:/usr/local/lib64:/opt/conda/envs/idp/lib:${LD_LIBRARY_PATH} RUN echo "conda activate idp" >> ~/.bashrc -SHELL ["conda", "run", "-n", "idp", "/bin/bash", "-c"] WORKDIR / COPY idp-requirements.txt . -RUN conda run -n idp python -m pip install --no-cache-dir -r idp-requirements.txt +RUN conda install -y --file idp-requirements.txt && \ + conda clean -y --all RUN ln -sf /opt/conda/envs/idp/bin/python /usr/local/bin/python && \ ln -sf /opt/conda/envs/idp/bin/python /usr/local/bin/python3 && \ diff --git a/python/requirements.txt b/python/requirements.txt index 73df933f9..26dd2e557 100644 --- a/python/requirements.txt +++ b/python/requirements.txt @@ -1,6 +1,6 @@ numpy==1.26.4 setuptools==69.5.1 -psutil==5.9.8 -mkl==2024.1.0 -mkl-include==2024.1.0 -intel-openmp==2024.1.2 +psutil==6.0.0 +mkl==2024.2.0 +mkl-include==2024.2.0 +intel-openmp==2024.2.0 diff --git a/pytorch/Dockerfile b/pytorch/Dockerfile index d83877d4f..c02ca9b26 100644 --- a/pytorch/Dockerfile +++ b/pytorch/Dockerfile @@ -80,9 +80,7 @@ RUN apt-get update -y && apt-get install -y --no-install-recommends --fix-missin gcc \ libgl1-mesa-glx \ libglib2.0-0 \ - virtualenv && \ - apt-get clean && \ - rm -rf /var/lib/apt/lists/* + virtualenv ENV SIGOPT_PROJECT=. @@ -91,17 +89,53 @@ COPY multinode-requirements.txt . RUN python -m pip install --no-cache-dir -r multinode-requirements.txt +ENV LD_LIBRARY_PATH="/lib/x86_64-linux-gnu:${LD_LIBRARY_PATH}:/usr/local/lib/python${PYTHON_VERSION}/dist-packages/oneccl_bindings_for_pytorch/opt/mpi/libfabric/lib:/usr/local/lib/python${PYTHON_VERSION}/dist-packages/oneccl_bindings_for_pytorch/lib" + +RUN apt-get install -y --no-install-recommends --fix-missing \ + openssh-client \ + openssh-server && \ + rm /etc/ssh/ssh_host_*_key \ + /etc/ssh/ssh_host_*_key.pub && \ + apt-get clean && \ + rm -rf /var/lib/apt/lists/* + +# Allow OpenSSH to talk to containers without asking for confirmation +# hadolint global ignore=SC2002 +RUN mkdir -p /var/run/sshd && \ + cat /etc/ssh/ssh_config | grep -v StrictHostKeyChecking > /etc/ssh/ssh_config.new && \ + echo " StrictHostKeyChecking no" >> /etc/ssh/ssh_config.new && \ + mv /etc/ssh/ssh_config.new /etc/ssh/ssh_config + ARG PYTHON_VERSION -RUN echo "source /usr/local/lib/python${PYTHON_VERSION}/dist-packages/oneccl_bindings_for_pytorch/env/setvars.sh" >> ~/.bashrc +COPY generate_ssh_keys.sh . + +# modify generate_ssh_keys to be a helper script +# print how to use helper script on bash startup +# Avoids loop for further execution of the startup file +RUN echo "source /usr/local/lib/python${PYTHON_VERSION}/dist-packages/oneccl_bindings_for_pytorch/env/setvars.sh" >> ~/.startup && \ + cat '/generate_ssh_keys.sh' >> ~/.startup && \ + rm -rf /generate_ssh_keys.sh -ENV I_MPI_ROOT="${I_MPI_ROOT}:/usr/local/lib/python${PYTHON_VERSION}/dist-packages/oneccl_bindings_for_pytorch" -ENV CCL_ROOT="${CCL_ROOT}:/usr/local/lib/python${PYTHON_VERSION}/dist-packages/oneccl_bindings_for_pytorch" -ENV FI_PROVIDER_PATH="${FI_PROVIDER_PATH}:/usr/local/lib/python${PYTHON_VERSION}/dist-packages/oneccl_bindings_for_pytorch/opt/mpi/libfabric/lib/prov" -ENV LIBRARY_PATH="${LIBRARY_PATH}:/usr/local/lib/python${PYTHON_VERSION}/dist-packages/oneccl_bindings_for_pytorch/lib" -ENV LD_LIBRARY_PATH="${LD_LIBRARY_PATH}:/usr/local/lib/python${PYTHON_VERSION}/dist-packages/oneccl_bindings_for_pytorch/opt/mpi/libfabric/lib:/usr/local/lib/python${PYTHON_VERSION}/dist-packages/oneccl_bindings_for_pytorch/lib" -ENV PATH="${PATH}:/usr/local/lib/python${PYTHON_VERSION}/dist-packages/oneccl_bindings_for_pytorch/bin" -ENV CPATH="${CPATH}:/usr/local/lib/python${PYTHON_VERSION}/dist-packages/oneccl_bindings_for_pytorch/include" +# hadolint global ignore=SC3037 +RUN echo -e "#!/bin/bash \n\ +set -e \n\ +set -a \n\ +source ~/.startup \n\ +set +a \n\ +eval \"\$@\" \n\ +tail -f /dev/null" >> /usr/local/bin/dockerd-entrypoint.sh && \ + chmod +x /usr/local/bin/dockerd-entrypoint.sh + +RUN echo 'HostKey /etc/ssh/ssh_host_dsa_key' > /var/run/sshd_config && \ + echo 'HostKey /etc/ssh/ssh_host_rsa_key' > /var/run/sshd_config && \ + echo 'HostKey /etc/ssh/ssh_host_ecdsa_key' > /var/run/sshd_config && \ + echo 'HostKey /etc/ssh/ssh_host_ed25519_key' > /var/run/sshd_config && \ + echo 'AuthorizedKeysFile /etc/ssh/authorized_keys' > /var/run/sshd_config && \ + echo '## Enable DEBUG log. You can ignore this but this may help you debug any issue while enabling SSHD for the first time' > /var/run/sshd_config && \ + echo 'LogLevel DEBUG3' > /var/run/sshd_config && \ + echo 'UsePAM yes' > /var/run/sshd_config && \ + echo 'Subsystem sftp /usr/lib/openssh/sftp-server' > /var/run/sshd_config RUN mkdir -p /licensing @@ -109,6 +143,9 @@ RUN wget -q --no-check-certificate https://raw.githubusercontent.com/oneapi-src wget -q --no-check-certificate https://raw.githubusercontent.com/intel/neural-compressor/master/docker/third-party-programs-pytorch.txt -O /licensing/third-party-programs-pytorch.txt && \ wget -q --no-check-certificate https://raw.githubusercontent.com/intel/neural-compressor/master/LICENSE -O /licensing/LICENSE +ENTRYPOINT ["/usr/local/bin/dockerd-entrypoint.sh"] +CMD ["bash"] + FROM ${PYTHON_BASE} AS ipex-xpu-base RUN apt-get update && \ diff --git a/pytorch/README.md b/pytorch/README.md index aaf70d67b..3e2d31774 100644 --- a/pytorch/README.md +++ b/pytorch/README.md @@ -97,7 +97,7 @@ docker run -it --rm \ --net=host \ -v $PWD/workspace:/workspace \ -w /workspace \ - intel/intel-extension-for-tensorflow:xpu-jupyter + intel/intel-extension-for-pytorch:xpu-jupyter ``` After running the command above, copy the URL (something like `http://127.0.0.1:$PORT/?token=***`) into your browser to access the notebook server. @@ -113,6 +113,99 @@ The images below additionally include [Intel® oneAPI Collective Communications | `2.1.0-pip-mulitnode` | [v2.1.0] | [v2.1.0+cpu] | [v2.1.0][ccl-v2.1.0] | [v2.3.1] | [v0.2.3] | | `2.0.0-pip-multinode` | [v2.0.0] | [v2.0.0+cpu] | [v2.0.0][ccl-v2.0.0] | [v2.1.1] | [v0.1.0] | +> **Note:** Passwordless SSH connection is also enabled in the image. +> The container does not contain the SSH ID keys. The user needs to mount those keys at `/root/.ssh/id_rsa` and `/root/.ssh/id_rsa.pub`. +> User also need to append content of id_rsa.pub in `/etc/ssh/authorized_keys` in the SSH server container. +> Since the SSH key is not owned by default user account in docker, please also do "chmod 644 id_rsa.pub; chmod 644 id_rsa" to grant read access for default user account. +> Users could also use "/usr/bin/ssh-keygen -t rsa -b 4096 -N '' -f ~/mnt/ssh_key/id_rsa" to generate a new SSH Key inside the container. +> Users need to mount a config file to list all hostnames at location `/root/.ssh/config` on the SSH client container. +> Once all files are added + +#### Setup and Run IPEX Multi-Node Container + +Some additional assembly is required to utilize this container with OpenSSH. To perform any kind of DDP (Distributed Data Parallel) execution, containers are assigned the roles of launcher and worker respectively: + +SSH Server (Worker) + +1. *Authorized Keys* : `/etc/ssh/authorized_keys` + +SSH Client (Launcher) + +1. *Config File with Host IPs* : `/root/.ssh/config` +2. *Private User Key* : `/root/.ssh/id_rsa` + +To add these files correctly please follow the steps described below. + +1. Setup ID Keys + + You can use the commands provided below to [generate the Identity keys](https://www.ssh.com/academy/ssh/keygen#creating-an-ssh-key-pair-for-user-authentication) for OpenSSH. + + ```bash + ssh-keygen -q -N "" -t rsa -b 4096 -f ./id_rsa + touch authorized_keys + cat id_rsa.pub >> authorized_keys + ``` + +2. Add hosts to config + + The launcher container needs to have the a config file with all hostnames and ports specified. An example of a hostfile is provided below. + + ```bash + touch config + ``` + + ```txt + Host host1 + HostName + IdentitiesOnly yes + Port + Host host2 + HostName + IdentitiesOnly yes + Port + ... + ``` + +3. Configure the permissions and ownership for all of the files you have created so far. + + ```bash + chmod 600 id_rsa.pub id_rsa config authorized_keys + chown root:root id_rsa.pub id_rsa config authorized_keys + ``` + +4. Now start the workers and execute DDP on the launcher. + + 1. Worker run command: + + ```bash + export SSH_PORT= + docker run -it --rm \ + --net=host \ + -v $PWD/authorized_keys:/root/.ssh/authorized_keys \ + -v $PWD/tests:/workspace/tests \ + -w /workspace \ + -e SSH_PORT=${SSH_PORT} \ + intel/intel-extension-for-pytorch:2.3.0-pip-multinode \ + bash -c '/usr/sbin/sshd -D -p ${SSH_PORT} -f /var/run/sshd_config' + ``` + + 2. Launcher run command: + + ```bash + docker run -it --rm \ + --net=host \ + -v $PWD/id_rsa:/root/.ssh/id_rsa \ + -v $PWD/config:/root/.ssh/config \ + -v $PWD/tests:/workspace/tests \ + -w /workspace \ + -e SSH_PORT=${SSH_PORT} \ + intel/intel-extension-for-pytorch:2.3.0-pip-multinode \ + bash -c 'ipexrun cpu /workspace/tests/ipex-resnet50.py --ipex --device cpu --backend ccl' + ``` + +> [!NOTE] +> [Intel MPI](https://www.intel.com/content/www/us/en/developer/tools/oneapi/mpi-library.html) can be configured based on your machine settings. If the above commands do not work for you, see the documentation for how to configure based on your network. + --- The images below are [TorchServe*] with CPU Optimizations: diff --git a/pytorch/generate_ssh_keys.sh b/pytorch/generate_ssh_keys.sh new file mode 100755 index 000000000..0ee61398e --- /dev/null +++ b/pytorch/generate_ssh_keys.sh @@ -0,0 +1,28 @@ +#!/usr/bin/env bash +# Copyright (c) 2023 Intel Corporation +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# +# SPDX-License-Identifier: Apache-2.0 + +function gen_single_key() { + ALG_NAME=$1 + if [[ ! -f /etc/ssh/ssh_host_${ALG_NAME}_key ]]; then + ssh-keygen -q -N "" -t "${ALG_NAME}" -f "/etc/ssh/ssh_host_${ALG_NAME}_key" + fi +} + +gen_single_key dsa +gen_single_key rsa +gen_single_key ecdsa +gen_single_key ed25519 diff --git a/pytorch/tests/tests.yaml b/pytorch/tests/tests.yaml index 946fc0adb..7ebbed262 100644 --- a/pytorch/tests/tests.yaml +++ b/pytorch/tests/tests.yaml @@ -43,7 +43,7 @@ ipex-xpu-${PACKAGE_OPTION:-pip}: - dst: /tests src: $PWD/pytorch/tests ipex-xpu-jupyter-${PACKAGE_OPTION:-pip}: - img: ${REGISTRY}/aiops/mlops-ci:b-${GITHUB_RUN_NUMBER:-0}-${BASE_IMAGE_NAME:-ubuntu}-${BASE_IMAGE_TAG:-22.04}-${PACKAGE_OPTION:-pip}-py${PYTHON_VERSION:-3.10}-ipex-${IPEX_VERSION:-2.1.20xpu}-xpu-jupyter + img: ${REGISTRY}/${REPO}:b-${GITHUB_RUN_NUMBER:-0}-${BASE_IMAGE_NAME:-ubuntu}-${BASE_IMAGE_TAG:-22.04}-${PACKAGE_OPTION:-pip}-py${PYTHON_VERSION:-3.10}-ipex-${IPEX_VERSION:-2.1.20xpu}-xpu-jupyter cmd: papermill --log-output /jupyter/xpu.ipynb -k python3 notebook: True oneccl-${PACKAGE_OPTION:-pip}: diff --git a/test-runner/README.md b/test-runner/README.md index 6d6fd4786..ab49a4cf6 100644 --- a/test-runner/README.md +++ b/test-runner/README.md @@ -248,10 +248,6 @@ Inputs for the action: description: 'Container Cache Registry URL' required: false type: string - perf_repo: - description: 'Performance Test Repo' - required: false - type: string recipe_dir: description: 'Path to Recipe Directory' required: true @@ -286,7 +282,6 @@ For testing [tests.yaml](tests.yaml) file, some variables need to be set export CACHE_REGISTRY= export REGISTRY= export REPO= -# optional -export PERF_REPO= + python test-runner/test_runner.py -f test-runner/tests.yaml -a test-runner/.actions.json ``` diff --git a/test-runner/action.yml b/test-runner/action.yml index bc14d0397..61732e7ff 100644 --- a/test-runner/action.yml +++ b/test-runner/action.yml @@ -20,10 +20,6 @@ inputs: description: 'Container Cache Registry URL' required: false type: string - perf_repo: - description: 'Performance Test Repo' - required: false - type: string recipe_dir: description: 'Path to Recipe Directory' required: true @@ -52,12 +48,6 @@ runs: ref: main repository: intel/ai-containers token: ${{ inputs.token }} - - uses: actions/checkout@v4 - with: - path: models-perf - ref: main - repository: ${{ inputs.perf_repo }} - token: ${{ inputs.token }} - name: Install Requirements shell: bash run: | diff --git a/test-runner/dev-requirements.txt b/test-runner/dev-requirements.txt index a40f45302..58d718897 100644 --- a/test-runner/dev-requirements.txt +++ b/test-runner/dev-requirements.txt @@ -1,9 +1,8 @@ black>=24.4.1 coverage>=7.5.0 +coveralls>=4.0.1 expandvars>=0.12.0 -gitpython>=3.1.43 hypothesis>=6.100.1 -Pint>=0.21.1 pydantic==2.7.4 pylint>=3.1.0 pytest>=8.1.1 diff --git a/test-runner/requirements.txt b/test-runner/requirements.txt index 60fd33ff4..f1757e509 100644 --- a/test-runner/requirements.txt +++ b/test-runner/requirements.txt @@ -1,6 +1,4 @@ expandvars>=0.12.0 -gitpython>=3.1.43 -Pint>=0.21.1 pydantic==2.7.4 python_on_whales>=0.70.1 pyyaml>=6.0.1 diff --git a/test-runner/tests.yaml b/test-runner/tests.yaml index 1e37c9a5c..f5361a9a1 100644 --- a/test-runner/tests.yaml +++ b/test-runner/tests.yaml @@ -46,8 +46,3 @@ test7: cmd: "echo 'world: hello'" mask: - world -test8: - cmd: "echo 'test: 123 throughput'" - mask: - - test - performance: perf/test.yaml:test diff --git a/test-runner/tests/utest.py b/test-runner/tests/utest.py index af8c75f04..e7dd72fb4 100644 --- a/test-runner/tests/utest.py +++ b/test-runner/tests/utest.py @@ -21,7 +21,7 @@ from hypothesis import given from hypothesis.strategies import dictionaries, text from test_runner import get_test_list, parse_args, set_log_filename -from utils.test import PerfException, Test +from utils.test import Test @pytest.fixture @@ -143,11 +143,6 @@ def test_get_test_list(test_args_input, test_json_input): "mask": ["hello"], }, "test7": {"cmd": "echo 'world: hello'", "mask": ["world"]}, - "test8": { - "cmd": "echo 'test: 123 throughput'", - "mask": ["test"], - "performance": "perf/test.yaml:test", - }, } test_fn, disable_masking = get_test_list(test_args_input, test_json_input) @@ -164,44 +159,6 @@ def test_masking(test_class_input): assert ": ***" in test.run() -def test_perf_thresholds(): - "test performance thresholds." - test_cases = [ - { - "cmd": "echo 'test: 123 throughput'", - "performance": "perf/test.yaml:test", - "expected_output": "test: 123 throughput", - "should_raise_exception": False, - }, - { - "cmd": "echo 'test: 121 throughput'", - "performance": "perf/test.yaml:test", - "should_raise_exception": True, - }, - { - "cmd": "echo 'test: 123 millithroughput'", - "performance": "perf/test.yaml:test", - "should_raise_exception": True, - }, - { - "cmd": "echo 'test: 125 throughput'", - "performance": "perf/test.yaml:not-test", - "should_raise_exception": True, - }, - ] - - for test_case in test_cases: - test = Test(name="test", **test_case) - if test_case["should_raise_exception"]: - try: - with pytest.raises(Exception, match="Failed") as exc_info: - test.run() - except: - assert isinstance(exc_info.value, PerfException) - else: - assert test_case["expected_output"] in test.run() - - @given(name=text(), arguments=dictionaries(text(), text())) def test_fuzz_container_run(name, arguments): "Fuzz container_run()." diff --git a/test-runner/utils/test.py b/test-runner/utils/test.py index 72f41efbd..41c2798f5 100644 --- a/test-runner/utils/test.py +++ b/test-runner/utils/test.py @@ -21,14 +21,9 @@ from subprocess import PIPE, Popen from typing import Dict, List, Optional -import pint from expandvars import expandvars -from git import Repo from pydantic import BaseModel from python_on_whales import DockerException, docker -from yaml import YAMLError, full_load - -units = pint.UnitRegistry() class PerfException(Exception): @@ -67,28 +62,12 @@ class Test(BaseModel): groups_add: Optional[List[str]] = ["109", "44"] hostname: Optional[str] = None ipc: Optional[str] = None - performance: Optional[str] = None privileged: Optional[bool] = False pull: Optional[str] = "missing" user: Optional[str] = None shm_size: Optional[str] = None workdir: Optional[str] = None - def __init__(self, **data): - super().__init__(**data) - if self.performance: - perf_repo = os.environ.get("PERF_REPO") - if perf_repo: - if not os.path.exists("models-perf"): - Repo.clone_from( - f"https://github.com/{perf_repo}", "models-perf", progress=None - ) - else: - logging.error( - "Performance mode enabled, but PERF_REPO environment variable not set" - ) - units.load_definitions("./models-perf/definitions.txt") - def get_path(self, name): """Given a filename, find that file from the users current working directory @@ -187,16 +166,19 @@ def notebook_run(self, img: str): docker.run(img, ["which", "papermill"]) except DockerException as papermill_not_found: logging.error("Papermill not found: %s", papermill_not_found) + default_env = { + "BASE_IMAGE_NAME": img.split(":")[0], + "BASE_IMAGE_TAG": img.split(":")[1], + } + if "http_proxy" in os.environ: + default_env["http_proxy"] = os.environ.get("http_proxy") + if "https_proxy" in os.environ: + default_env["https_proxy"] = os.environ.get("https_proxy") docker.build( # context path ".", # Image Input and Proxy Args - build_args={ - "BASE_IMAGE_NAME": img.split(":")[0], - "BASE_IMAGE_TAG": img.split(":")[1], - "http_proxy": os.environ.get("http_proxy"), - "https_proxy": os.environ.get("https_proxy"), - }, + build_args=default_env, # Input File file=self.get_path("Dockerfile.notebook"), # Output Tag = Input Tag @@ -205,54 +187,6 @@ def notebook_run(self, img: str): load=True, ) - def check_perf(self, content): - """ - Check the performance of the test against the thresholds. - - Args: - content (str): test output log - - Raises: - PerfException: if the performance does not meet the target performance - """ - with open( - f"models-perf/{self.performance.split(':')[0]}", "r", encoding="utf-8" - ) as file: - try: - thresholds = full_load(file) - except YAMLError as yaml_exc: - raise YAMLError(yaml_exc) - model_thresholds = [ - threshold - for threshold in thresholds - if self.performance.split(":")[1] == threshold["test_id"] - ] - for threshold in model_thresholds: - perf = re.search( - rf"{threshold['key']}[:]?\s+(.\d+[\s]?.*)", - content, - re.IGNORECASE, - ) - if perf: - if threshold["lower_is_better"]: - if units.Quantity(perf.group(1)) > units.Quantity( - f"{threshold['boundary']} {threshold['unit']}" - ): - if not self.mask: - logging.info("%s: %s", threshold["key"], perf.group(1)) - raise PerfException( - f"Performance Threshold {threshold['name']} did not meet the target performance." - ) - else: - if units.Quantity(perf.group(1)) < units.Quantity( - f"{threshold['boundary']} {threshold['unit']}" - ): - if not self.mask: - logging.info("%s: %s", threshold["key"], perf.group(1)) - raise PerfException( - f"Performance Threshold {threshold['name']} did not meet the target performance." - ) - def container_run(self): """Runs the docker container. @@ -269,11 +203,13 @@ def container_run(self): env = ( {key: expandvars(val) for key, val in self.env.items()} if self.env else {} ) - default_env = { - "http_proxy": os.environ.get("http_proxy"), - "https_proxy": os.environ.get("https_proxy"), - "no_proxy": os.environ.get("no_proxy"), - } + default_env = {} + if "http_proxy" in os.environ: + default_env["http_proxy"] = os.environ.get("http_proxy") + if "https_proxy" in os.environ: + default_env["https_proxy"] = os.environ.get("https_proxy") + if "no_proxy" in os.environ: + default_env["no_proxy"] = os.environ.get("no_proxy") # Always add proxies to the envs list env.update(default_env) img = expandvars(self.img, nounset=True) @@ -317,8 +253,6 @@ def container_run(self): log = "" for _, stream_content in output_generator: # All process logs will have the stream_type of stderr despite it being stdout - if self.performance: - self.check_perf(stream_content.decode("utf-8")) for item in self.mask: stream_content = re.sub( rf"({item}[:]?\s+)(.*)", @@ -355,8 +289,6 @@ def run(self): ) try: stdout, stderr = p.communicate() - if self.performance: - self.check_perf(stdout.decode("utf-8")) for item in self.mask: stdout = re.sub( rf"({item}[:]?\s+)(.*)", r"\1***", stdout.decode("utf-8") diff --git a/tox.ini b/tox.ini index 66af473bf..e527d325b 100644 --- a/tox.ini +++ b/tox.ini @@ -6,33 +6,22 @@ source = test-runner/* [testenv] +passenv = GITHUB_*, DOCKER_* deps = -r test-runner/dev-requirements.txt commands = - python -m coverage run -p -m pytest test-runner/tests/utest.py -W ignore::UserWarning + python -m coverage run -m pytest test-runner/tests/utest.py -W ignore::UserWarning + coveralls pythonpath = tests -passenv = DOCKER_* setenv = CACHE_REGISTRY = {env:CACHE_REGISTRY} + COVERALLS_REPO_TOKEN= {env:COVERALLS_REPO_TOKEN} PATH = {env:PATH}:/usr/local/bin/docker - PERF_REPO = {env:PERF_REPO} PWD = {env:PWD} REGISTRY = {env:REGISTRY} REPO = {env:REPO} log_cli = false -[testenv:coverage] -commands = - python -m coverage combine - python -m coverage report -i -m -depends = - 3.8 - 3.8 - 3.9 - 3.10 - 3.11 - 3.12 - [coverage:report] exclude_lines = if __name__ == "__main__": @@ -57,5 +46,5 @@ parallel_show_output = true [testenv:clean] allowlist_externals=/bin/bash commands = - /bin/bash -c "rm -rf .coverage* models-perf" + /bin/bash -c "rm -rf .coverage" ignore_errors = True From 7758dead54eb597b9f0f107fb350783621184afb Mon Sep 17 00:00:00 2001 From: ma-pineda Date: Fri, 28 Jun 2024 20:13:29 -0700 Subject: [PATCH 5/7] fix deep-learning documentation Signed-off-by: ma-pineda --- preset/deep-learning/docker-compose.yaml | 38 +++++++++++++----------- 1 file changed, 21 insertions(+), 17 deletions(-) diff --git a/preset/deep-learning/docker-compose.yaml b/preset/deep-learning/docker-compose.yaml index b9490f94d..fee40eff5 100644 --- a/preset/deep-learning/docker-compose.yaml +++ b/preset/deep-learning/docker-compose.yaml @@ -54,6 +54,23 @@ services: https_proxy: ${https_proxy} no_proxy: '' context: . + labels: + docs: false + target: deep-learning-jupyter + command: | + bash -c "conda run -n pytorch-cpu python -c 'import torch;print(torch.__version__);import intel_extension_for_pytorch as ipex;print(ipex.__version__);' && \ + conda run -n tensorflow-cpu python -c 'import tensorflow as tf; print(tf.__version__)'" + environment: + http_proxy: ${http_proxy} + https_proxy: ${https_proxy} + network_mode: host + shm_size: 12GB + volumes: + - /dev/dri/by-path:/dev/dri/by-path + + + deep-learning: + build: labels: docs: deep_learning org.opencontainers.image.title: "Intel® AI Tools Selector Preset Containers - Deep Learning" @@ -167,20 +184,11 @@ services: dependency.conda.tornado: '>=6.3.3' dependency.conda.tqdm: '>=4.66.2' dependency.conda.werkzeug: '>=2.2.3' - target: deep-learning-jupyter - command: | - bash -c "conda run -n pytorch-cpu python -c 'import torch;print(torch.__version__);import intel_extension_for_pytorch as ipex;print(ipex.__version__);' && \ - conda run -n tensorflow-cpu python -c 'import tensorflow as tf; print(tf.__version__)'" - environment: - http_proxy: ${http_proxy} - https_proxy: ${https_proxy} - network_mode: host - shm_size: 12GB - volumes: - - /dev/dri/by-path:/dev/dri/by-path - deep-learning: - build: target: distributed-deep-learning + depends_on: + - dl-base + extends: dl-base + image: ${REGISTRY}/${REPO}:b-${GITHUB_RUN_NUMBER:-0}-deep-learning-2024.1.0-py${PYTHON_VERSION:-3.10} command: | bash -c "conda run -n pytorch-cpu python -c 'import torch;print(torch.__version__);import intel_extension_for_pytorch as ipex;print(ipex.__version__);' && \ conda run -n pytorch-cpu bash -c 'mpirun --version' && \ @@ -192,7 +200,3 @@ services: conda run -n tensorflow-gpu python -c 'from tensorflow.python.client import device_lib; print(device_lib.list_local_devices())' && \ conda run -n tensorflow-gpu bash -c 'horovodrun --check-build && mpirun --version' && \ conda run -n tensorflow-gpu python -c 'import horovod.tensorflow as hvd;hvd.init();import horovod.tensorflow'" - depends_on: - - dl-base - extends: dl-base - image: ${REGISTRY}/${REPO}:b-${GITHUB_RUN_NUMBER:-0}-deep-learning-2024.1.0-py${PYTHON_VERSION:-3.10} From 6f7a125769fada277202b73bb3c4671fbf33bce8 Mon Sep 17 00:00:00 2001 From: Tyler Titsworth Date: Fri, 28 Jun 2024 21:09:28 -0700 Subject: [PATCH 6/7] Update docker-compose.yaml Signed-off-by: Tyler Titsworth --- preset/deep-learning/docker-compose.yaml | 2 -- 1 file changed, 2 deletions(-) diff --git a/preset/deep-learning/docker-compose.yaml b/preset/deep-learning/docker-compose.yaml index fee40eff5..12de056fe 100644 --- a/preset/deep-learning/docker-compose.yaml +++ b/preset/deep-learning/docker-compose.yaml @@ -67,8 +67,6 @@ services: shm_size: 12GB volumes: - /dev/dri/by-path:/dev/dri/by-path - - deep-learning: build: labels: From a4ebaa5d0495209e479dc8f7057d4ee74d2d9f8d Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Sat, 29 Jun 2024 04:09:40 +0000 Subject: [PATCH 7/7] [pre-commit.ci] auto fixes from pre-commit.com hooks --- preset/classical-ml/docker-compose.yaml | 1 - 1 file changed, 1 deletion(-) diff --git a/preset/classical-ml/docker-compose.yaml b/preset/classical-ml/docker-compose.yaml index f954d9c2d..a82fbd6d8 100644 --- a/preset/classical-ml/docker-compose.yaml +++ b/preset/classical-ml/docker-compose.yaml @@ -97,4 +97,3 @@ services: shm_size: 12GB volumes: - /dev/dri/by-path:/dev/dri/by-path -