From 4adb5581403e08d394c5a6ded5c31eae28feb570 Mon Sep 17 00:00:00 2001 From: philschmid Date: Wed, 24 Aug 2022 06:43:15 +0000 Subject: [PATCH 01/12] add gpu tests and pipeline --- ...gpu-integ-test.xxx => gpu-integ-test.yaml} | 15 ++----- .../webservice_starlette.py | 2 +- tests/integ/test_container.py | 44 ++++++++++++------- 3 files changed, 31 insertions(+), 30 deletions(-) rename .github/workflows/{gpu-integ-test.xxx => gpu-integ-test.yaml} (89%) diff --git a/.github/workflows/gpu-integ-test.xxx b/.github/workflows/gpu-integ-test.yaml similarity index 89% rename from .github/workflows/gpu-integ-test.xxx rename to .github/workflows/gpu-integ-test.yaml index bf981e1f..63a7a2e8 100644 --- a/.github/workflows/gpu-integ-test.xxx +++ b/.github/workflows/gpu-integ-test.yaml @@ -1,10 +1,9 @@ name: GPU - Run Integration Tests on: - push: + pull_request: branches: - main - pull_request: workflow_dispatch: concurrency: @@ -62,11 +61,7 @@ jobs: - name: Build Docker run: docker build -t starlette-transformers:gpu -f dockerfiles/starlette/pytorch/Dockerfile.gpu . - name: Run Integration Tests - env: - AWS_ACCESS_KEY_ID: ${{ secrets.AWS_ACCESS_KEY_ID }} - AWS_SECRET_ACCESS_KEY: ${{ secrets.AWS_SECRET_ACCESS_KEY }} - AWS_DEFAULT_REGION: us-east-1 - run: make integ-test + run: RUN_SLOW=True make integ-test tensorflow-integration-test: needs: - start-runner @@ -82,11 +77,7 @@ jobs: - name: Build Docker run: docker build -t starlette-transformers:gpu -f dockerfiles/starlette/tensorflow/Dockerfile.gpu . - name: Run Integration Tests - env: - AWS_ACCESS_KEY_ID: ${{ secrets.AWS_ACCESS_KEY_ID }} - AWS_SECRET_ACCESS_KEY: ${{ secrets.AWS_SECRET_ACCESS_KEY }} - AWS_DEFAULT_REGION: us-east-1 - run: make integ-test + run: RUN_SLOW=True make integ-test stop-runner: name: Stop self-hosted EC2 runner diff --git a/src/huggingface_inference_toolkit/webservice_starlette.py b/src/huggingface_inference_toolkit/webservice_starlette.py index e999358b..933534b7 100644 --- a/src/huggingface_inference_toolkit/webservice_starlette.py +++ b/src/huggingface_inference_toolkit/webservice_starlette.py @@ -22,7 +22,7 @@ def config_logging(level=logging.INFO): - logging.basicConfig(format="%(asctime)s | %(levelname)s | %(message)s", datefmt="", level=logging.INFO) + logging.basicConfig(format="%(asctime)s | %(levelname)s | %(message)s", datefmt="", level=level) # disable uvicorn access logs to hide /health uvicorn_access = logging.getLogger("uvicorn.access") uvicorn_access.disabled = True diff --git a/tests/integ/test_container.py b/tests/integ/test_container.py index ff30bab7..b51c4ddc 100644 --- a/tests/integ/test_container.py +++ b/tests/integ/test_container.py @@ -6,12 +6,15 @@ import pytest import requests from docker.client import DockerClient -from huggingface_inference_toolkit.utils import _load_repository_from_hf +from huggingface_inference_toolkit.utils import _is_gpu_available, _load_repository_from_hf from integ.config import task2input, task2model, task2output, task2validation -from transformers.testing_utils import require_torch, slow, require_tf +from transformers.testing_utils import require_torch, slow, require_tf, _run_slow_tests client = docker.from_env() +IS_GPU = _run_slow_tests and _is_gpu_available() +IS_GPU = True +DEVICE = "gpu" if IS_GPU else "cpu" def make_sure_other_containers_are_stopped(client: DockerClient, container_name: str): @@ -90,12 +93,14 @@ def verify_task(container: DockerClient, task: str, port: int = 5000, framework: "sentence-ranking", ], ) -def test_pt_cpu_container_remote_model(task) -> None: +def test_pt_container_remote_model(task) -> None: container_name = f"integration-test-{task}" - container_image = "starlette-transformers:cpu" + container_image = f"starlette-transformers:{DEVICE}" framework = "pytorch" model = task2model[task][framework] port = random.randint(5000, 6000) + device_request = [docker.types.DeviceRequest(count=-1, capabilities=[["gpu"]])] if IS_GPU else [] + make_sure_other_containers_are_stopped(client, container_name) container = client.containers.run( container_image, @@ -104,7 +109,7 @@ def test_pt_cpu_container_remote_model(task) -> None: environment={"HF_MODEL_ID": model, "HF_TASK": task}, detach=True, # GPU - # device_requests=[docker.types.DeviceRequest(count=-1, capabilities=[["gpu"]])] + device_requests=device_request, ) # time.sleep(5) verify_task(container, task, port) @@ -141,12 +146,13 @@ def test_pt_cpu_container_remote_model(task) -> None: "sentence-ranking", ], ) -def test_pt_cpu_container_local_model(task) -> None: +def test_pt_container_local_model(task) -> None: container_name = f"integration-test-{task}" - container_image = "starlette-transformers:cpu" + container_image = f"starlette-transformers:{DEVICE}" framework = "pytorch" model = task2model[task][framework] port = random.randint(5000, 6000) + device_request = [docker.types.DeviceRequest(count=-1, capabilities=[["gpu"]])] if IS_GPU else [] make_sure_other_containers_are_stopped(client, container_name) with tempfile.TemporaryDirectory() as tmpdirname: # https://github.com/huggingface/infinity/blob/test-ovh/test/integ/utils.py @@ -159,7 +165,7 @@ def test_pt_cpu_container_local_model(task) -> None: volumes={tmpdirname: {"bind": "/opt/huggingface/model", "mode": "ro"}}, detach=True, # GPU - # device_requests=[docker.types.DeviceRequest(count=-1, capabilities=[["gpu"]])] + device_requests=device_request, ) # time.sleep(5) verify_task(container, task, port) @@ -172,9 +178,11 @@ def test_pt_cpu_container_local_model(task) -> None: "repository_id", ["philschmid/custom-pipeline-text-classification"], ) -def test_pt_cpu_container_custom_pipeline(repository_id) -> None: +def test_pt_container_custom_pipeline(repository_id) -> None: container_name = "integration-test-custom" - container_image = "starlette-transformers:cpu" + container_image = f"starlette-transformers:{DEVICE}" + device_request = [docker.types.DeviceRequest(count=-1, capabilities=[["gpu"]])] if IS_GPU else [] + make_sure_other_containers_are_stopped(client, container_name) with tempfile.TemporaryDirectory() as tmpdirname: # https://github.com/huggingface/infinity/blob/test-ovh/test/integ/utils.py @@ -189,7 +197,7 @@ def test_pt_cpu_container_custom_pipeline(repository_id) -> None: volumes={tmpdirname: {"bind": tmpdirname, "mode": "ro"}}, detach=True, # GPU - # device_requests=[docker.types.DeviceRequest(count=-1, capabilities=[["gpu"]])] + device_requests=device_request, ) BASE_URL = "http://localhost:5000" wait_for_container_to_be_ready(BASE_URL) @@ -230,11 +238,12 @@ def test_pt_cpu_container_custom_pipeline(repository_id) -> None: "sentence-ranking", ], ) -def test_tf_cpu_container_remote_model(task) -> None: +def test_tf_container_remote_model(task) -> None: container_name = f"integration-test-{task}" - container_image = "starlette-transformers:cpu" + container_image = f"starlette-transformers:{DEVICE}" framework = "tensorflow" model = task2model[task][framework] + device_request = [docker.types.DeviceRequest(count=-1, capabilities=[["gpu"]])] if IS_GPU else [] if model is None: pytest.skip("no supported TF model") port = random.randint(5000, 6000) @@ -246,7 +255,7 @@ def test_tf_cpu_container_remote_model(task) -> None: environment={"HF_MODEL_ID": model, "HF_TASK": task}, detach=True, # GPU - # device_requests=[docker.types.DeviceRequest(count=-1, capabilities=[["gpu"]])] + device_requests=device_request, ) # time.sleep(5) verify_task(container, task, port) @@ -283,11 +292,12 @@ def test_tf_cpu_container_remote_model(task) -> None: "sentence-ranking", ], ) -def test_tf_cpu_container_local_model(task) -> None: +def test_tf_container_local_model(task) -> None: container_name = f"integration-test-{task}" - container_image = "starlette-transformers:cpu" + container_image = f"starlette-transformers:{DEVICE}" framework = "tensorflow" model = task2model[task][framework] + device_request = [docker.types.DeviceRequest(count=-1, capabilities=[["gpu"]])] if IS_GPU else [] if model is None: pytest.skip("no supported TF model") port = random.randint(5000, 6000) @@ -303,7 +313,7 @@ def test_tf_cpu_container_local_model(task) -> None: volumes={tmpdirname: {"bind": "/opt/huggingface/model", "mode": "ro"}}, detach=True, # GPU - # device_requests=[docker.types.DeviceRequest(count=-1, capabilities=[["gpu"]])] + device_requests=device_request, ) # time.sleep(5) verify_task(container, task, port) From 874292e5280ceaa383c937cf2ae6213fd8afea35 Mon Sep 17 00:00:00 2001 From: philschmid Date: Wed, 24 Aug 2022 06:48:53 +0000 Subject: [PATCH 02/12] fix tests --- setup.py | 2 +- tests/integ/test_container.py | 1 - 2 files changed, 1 insertion(+), 2 deletions(-) diff --git a/setup.py b/setup.py index efcffed0..36ae01d0 100644 --- a/setup.py +++ b/setup.py @@ -16,7 +16,7 @@ install_requires = [ # transformers - "transformers[sklearn,sentencepiece]>=4.20.1", + "transformers[sklearn,sentencepiece]==4.20.1", # api stuff "orjson", # "robyn", diff --git a/tests/integ/test_container.py b/tests/integ/test_container.py index b51c4ddc..180bb6d6 100644 --- a/tests/integ/test_container.py +++ b/tests/integ/test_container.py @@ -13,7 +13,6 @@ client = docker.from_env() IS_GPU = _run_slow_tests and _is_gpu_available() -IS_GPU = True DEVICE = "gpu" if IS_GPU else "cpu" From d36387c4ca8130f9074d0f6e377e2ca1ddfae81b Mon Sep 17 00:00:00 2001 From: philschmid Date: Wed, 24 Aug 2022 06:58:38 +0000 Subject: [PATCH 03/12] fix tests --- setup.py | 4 ++-- src/huggingface_inference_toolkit/utils.py | 8 ++++---- 2 files changed, 6 insertions(+), 6 deletions(-) diff --git a/setup.py b/setup.py index 36ae01d0..9200e5e8 100644 --- a/setup.py +++ b/setup.py @@ -16,7 +16,7 @@ install_requires = [ # transformers - "transformers[sklearn,sentencepiece]==4.20.1", + "transformers[sklearn,sentencepiece]>=4.20.1", # api stuff "orjson", # "robyn", @@ -69,7 +69,7 @@ install_requires=install_requires, extras_require=extras, entry_points={"console_scripts": "serve=sagemaker_huggingface_inference_toolkit.serving:main"}, - python_requires=">=3.9.0", + python_requires=">=3.8.0", license="Apache License 2.0", classifiers=[ "Development Status :: 5 - Production/Stable", diff --git a/src/huggingface_inference_toolkit/utils.py b/src/huggingface_inference_toolkit/utils.py index 83f0bd5f..de71f7fb 100644 --- a/src/huggingface_inference_toolkit/utils.py +++ b/src/huggingface_inference_toolkit/utils.py @@ -6,7 +6,7 @@ from typing import Optional, Union from huggingface_hub import HfApi -from huggingface_hub._snapshot_download import _filter_repo_files +from huggingface_hub.utils import filter_repo_objects from huggingface_hub.file_download import cached_download, hf_hub_url from transformers import pipeline from transformers.file_utils import is_tf_available, is_torch_available @@ -149,9 +149,9 @@ def _load_repository_from_hf( token=hf_hub_token, ) # apply regex to filter out non-framework specific weights if args.framework is set - filtered_repo_files = _filter_repo_files( - repo_files=[f.rfilename for f in repo_info.siblings], - ignore_regex=ignore_regex, + filtered_repo_files = filter_repo_objects( + items=[f.rfilename for f in repo_info.siblings], + ignore_patterns=ignore_regex, ) # iterate over all files and download them From f5d18b81bb5208a7a0c80d92fa31c5fa5f541941 Mon Sep 17 00:00:00 2001 From: philschmid Date: Wed, 24 Aug 2022 07:07:10 +0000 Subject: [PATCH 04/12] make quality --- src/huggingface_inference_toolkit/utils.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/huggingface_inference_toolkit/utils.py b/src/huggingface_inference_toolkit/utils.py index de71f7fb..b8e6cf5e 100644 --- a/src/huggingface_inference_toolkit/utils.py +++ b/src/huggingface_inference_toolkit/utils.py @@ -6,8 +6,8 @@ from typing import Optional, Union from huggingface_hub import HfApi -from huggingface_hub.utils import filter_repo_objects from huggingface_hub.file_download import cached_download, hf_hub_url +from huggingface_hub.utils import filter_repo_objects from transformers import pipeline from transformers.file_utils import is_tf_available, is_torch_available from transformers.pipelines import Conversation, Pipeline From 29c7a6e2d7bf00881f7fc0ca2d18a67193835318 Mon Sep 17 00:00:00 2001 From: philschmid Date: Wed, 24 Aug 2022 07:07:36 +0000 Subject: [PATCH 05/12] fixed tests --- makefile | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/makefile b/makefile index 6a632d3a..fec9bc02 100644 --- a/makefile +++ b/makefile @@ -5,10 +5,10 @@ check_dirs := src # run tests unit-test: - python -m pytest -s -v ./tests/unit + python3 -m pytest -s -v ./tests/unit integ-test: - python -m pytest -s -v ./tests/integ/ + python3 -m pytest -s -v ./tests/integ/ # Check that source code meets quality standards From 2b9b28d50ef9c02479de45c24c112063a835e9f3 Mon Sep 17 00:00:00 2001 From: philschmid Date: Wed, 24 Aug 2022 07:17:07 +0000 Subject: [PATCH 06/12] update hub version --- starlette_requirements.txt | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/starlette_requirements.txt b/starlette_requirements.txt index 041003dd..e07d24d8 100644 --- a/starlette_requirements.txt +++ b/starlette_requirements.txt @@ -1,4 +1,5 @@ orjson starlette uvicorn -pandas \ No newline at end of file +pandas +huggingface_hub>=0.9.0 \ No newline at end of file From a253439dd6caca3f7b5c99f8f28d9b4882f798e1 Mon Sep 17 00:00:00 2001 From: philschmid Date: Wed, 24 Aug 2022 07:30:37 +0000 Subject: [PATCH 07/12] setup python --- .github/workflows/gpu-integ-test.yaml | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/.github/workflows/gpu-integ-test.yaml b/.github/workflows/gpu-integ-test.yaml index 63a7a2e8..79a3c876 100644 --- a/.github/workflows/gpu-integ-test.yaml +++ b/.github/workflows/gpu-integ-test.yaml @@ -56,6 +56,10 @@ jobs: steps: - name: Checkout uses: actions/checkout@v2 + - name: Set up Python 3.9 + uses: actions/setup-python@v2 + with: + python-version: 3.9 - name: Install Python dependencies run: pip install -e .[test,dev,torch] - name: Build Docker @@ -72,6 +76,10 @@ jobs: steps: - name: Checkout uses: actions/checkout@v2 + - name: Set up Python 3.9 + uses: actions/setup-python@v2 + with: + python-version: 3.9 - name: Install Python dependencies run: pip install -e .[test,dev,tensorflow] - name: Build Docker From 22eedb692a13766ce6681dead821de148209b28e Mon Sep 17 00:00:00 2001 From: philschmid Date: Wed, 24 Aug 2022 07:50:34 +0000 Subject: [PATCH 08/12] test TF --- .github/workflows/gpu-integ-test.yaml | 40 ++++++++++++++------------- 1 file changed, 21 insertions(+), 19 deletions(-) diff --git a/.github/workflows/gpu-integ-test.yaml b/.github/workflows/gpu-integ-test.yaml index 79a3c876..ba8fe4eb 100644 --- a/.github/workflows/gpu-integ-test.yaml +++ b/.github/workflows/gpu-integ-test.yaml @@ -48,28 +48,28 @@ jobs: {"Key": "Name", "Value": "ec2-optimum-github-runner"}, {"Key": "GitHubRepository", "Value": "${{ github.repository }}"} ] - pytorch-integration-test: - needs: start-runner # required to start the main job when the runner is ready - runs-on: ${{ needs.start-runner.outputs.label }} # run the job on the newly created runner - env: - AWS_REGION: us-east-1 - steps: - - name: Checkout - uses: actions/checkout@v2 - - name: Set up Python 3.9 - uses: actions/setup-python@v2 - with: - python-version: 3.9 - - name: Install Python dependencies - run: pip install -e .[test,dev,torch] - - name: Build Docker - run: docker build -t starlette-transformers:gpu -f dockerfiles/starlette/pytorch/Dockerfile.gpu . - - name: Run Integration Tests - run: RUN_SLOW=True make integ-test + # pytorch-integration-test: + # needs: start-runner # required to start the main job when the runner is ready + # runs-on: ${{ needs.start-runner.outputs.label }} # run the job on the newly created runner + # env: + # AWS_REGION: us-east-1 + # steps: + # - name: Checkout + # uses: actions/checkout@v2 + # - name: Set up Python 3.9 + # uses: actions/setup-python@v2 + # with: + # python-version: 3.9 + # - name: Install Python dependencies + # run: pip install -e .[test,dev,torch] + # - name: Build Docker + # run: docker build -t starlette-transformers:gpu -f dockerfiles/starlette/pytorch/Dockerfile.gpu . + # - name: Run Integration Tests + # run: RUN_SLOW=True make integ-test tensorflow-integration-test: needs: - start-runner - - pytorch-integration-test + # - pytorch-integration-test runs-on: ${{ needs.start-runner.outputs.label }} # run the job on the newly created runner env: AWS_REGION: us-east-1 @@ -80,6 +80,8 @@ jobs: uses: actions/setup-python@v2 with: python-version: 3.9 + - name: Uninstall pytorch + run: pip uninstall torch torchvision -y - name: Install Python dependencies run: pip install -e .[test,dev,tensorflow] - name: Build Docker From c9383c1ee24cd386b59d5105e990fcb1ae2a03db Mon Sep 17 00:00:00 2001 From: philschmid Date: Wed, 24 Aug 2022 07:51:17 +0000 Subject: [PATCH 09/12] fix --- .github/workflows/gpu-integ-test.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/gpu-integ-test.yaml b/.github/workflows/gpu-integ-test.yaml index ba8fe4eb..ca7e2cad 100644 --- a/.github/workflows/gpu-integ-test.yaml +++ b/.github/workflows/gpu-integ-test.yaml @@ -93,7 +93,7 @@ jobs: name: Stop self-hosted EC2 runner needs: - start-runner - - pytorch-integration-test + # - pytorch-integration-test - tensorflow-integration-test runs-on: ubuntu-latest env: From b1546d92bd02b7bc687c8d17538cd016c683979f Mon Sep 17 00:00:00 2001 From: philschmid Date: Wed, 24 Aug 2022 08:03:28 +0000 Subject: [PATCH 10/12] test tf again --- tests/integ/test_container.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/tests/integ/test_container.py b/tests/integ/test_container.py index 180bb6d6..a0415923 100644 --- a/tests/integ/test_container.py +++ b/tests/integ/test_container.py @@ -10,10 +10,11 @@ from integ.config import task2input, task2model, task2output, task2validation from transformers.testing_utils import require_torch, slow, require_tf, _run_slow_tests +IS_GPU = _run_slow_tests +DEVICE = "gpu" if IS_GPU else "cpu" client = docker.from_env() -IS_GPU = _run_slow_tests and _is_gpu_available() -DEVICE = "gpu" if IS_GPU else "cpu" + def make_sure_other_containers_are_stopped(client: DockerClient, container_name: str): From 6e6311073c43e5789a85521506c0079534874e83 Mon Sep 17 00:00:00 2001 From: philschmid Date: Wed, 24 Aug 2022 11:00:53 +0000 Subject: [PATCH 11/12] chagned models --- tests/integ/config.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tests/integ/config.py b/tests/integ/config.py index cad44af2..8a9b5610 100644 --- a/tests/integ/config.py +++ b/tests/integ/config.py @@ -47,11 +47,11 @@ }, "translation_xx_to_yy": { "pytorch": "hf-internal-testing/tiny-random-t5", - "tensorflow": "hf-internal-testing/tiny-random-t5", + "tensorflow": "hf-internal-testing/tiny-random-marian", }, "text2text-generation": { "pytorch": "hf-internal-testing/tiny-random-t5", - "tensorflow": "hf-internal-testing/tiny-random-t5", + "tensorflow": "hf-internal-testing/tiny-random-bart", }, "text-generation": { "pytorch": "hf-internal-testing/tiny-random-gpt2", From 1911ce77aa615a3ac6f7f27f371b4baaa2e1c825 Mon Sep 17 00:00:00 2001 From: philschmid Date: Wed, 24 Aug 2022 11:19:48 +0000 Subject: [PATCH 12/12] added pt again --- .github/workflows/gpu-integ-test.yaml | 36 +++++++++++++-------------- 1 file changed, 18 insertions(+), 18 deletions(-) diff --git a/.github/workflows/gpu-integ-test.yaml b/.github/workflows/gpu-integ-test.yaml index ca7e2cad..bf2eaf99 100644 --- a/.github/workflows/gpu-integ-test.yaml +++ b/.github/workflows/gpu-integ-test.yaml @@ -48,24 +48,24 @@ jobs: {"Key": "Name", "Value": "ec2-optimum-github-runner"}, {"Key": "GitHubRepository", "Value": "${{ github.repository }}"} ] - # pytorch-integration-test: - # needs: start-runner # required to start the main job when the runner is ready - # runs-on: ${{ needs.start-runner.outputs.label }} # run the job on the newly created runner - # env: - # AWS_REGION: us-east-1 - # steps: - # - name: Checkout - # uses: actions/checkout@v2 - # - name: Set up Python 3.9 - # uses: actions/setup-python@v2 - # with: - # python-version: 3.9 - # - name: Install Python dependencies - # run: pip install -e .[test,dev,torch] - # - name: Build Docker - # run: docker build -t starlette-transformers:gpu -f dockerfiles/starlette/pytorch/Dockerfile.gpu . - # - name: Run Integration Tests - # run: RUN_SLOW=True make integ-test + pytorch-integration-test: + needs: start-runner # required to start the main job when the runner is ready + runs-on: ${{ needs.start-runner.outputs.label }} # run the job on the newly created runner + env: + AWS_REGION: us-east-1 + steps: + - name: Checkout + uses: actions/checkout@v2 + - name: Set up Python 3.9 + uses: actions/setup-python@v2 + with: + python-version: 3.9 + - name: Install Python dependencies + run: pip install -e .[test,dev,torch] + - name: Build Docker + run: docker build -t starlette-transformers:gpu -f dockerfiles/starlette/pytorch/Dockerfile.gpu . + - name: Run Integration Tests + run: RUN_SLOW=True make integ-test tensorflow-integration-test: needs: - start-runner