diff --git a/.github/workflows/gpu-integ-test.xxx b/.github/workflows/gpu-integ-test.yaml similarity index 88% rename from .github/workflows/gpu-integ-test.xxx rename to .github/workflows/gpu-integ-test.yaml index bf981e1f..bf2eaf99 100644 --- a/.github/workflows/gpu-integ-test.xxx +++ b/.github/workflows/gpu-integ-test.yaml @@ -1,10 +1,9 @@ name: GPU - Run Integration Tests on: - push: + pull_request: branches: - main - pull_request: workflow_dispatch: concurrency: @@ -57,42 +56,44 @@ jobs: steps: - name: Checkout uses: actions/checkout@v2 + - name: Set up Python 3.9 + uses: actions/setup-python@v2 + with: + python-version: 3.9 - name: Install Python dependencies run: pip install -e .[test,dev,torch] - name: Build Docker run: docker build -t starlette-transformers:gpu -f dockerfiles/starlette/pytorch/Dockerfile.gpu . - name: Run Integration Tests - env: - AWS_ACCESS_KEY_ID: ${{ secrets.AWS_ACCESS_KEY_ID }} - AWS_SECRET_ACCESS_KEY: ${{ secrets.AWS_SECRET_ACCESS_KEY }} - AWS_DEFAULT_REGION: us-east-1 - run: make integ-test + run: RUN_SLOW=True make integ-test tensorflow-integration-test: needs: - start-runner - - pytorch-integration-test + # - pytorch-integration-test runs-on: ${{ needs.start-runner.outputs.label }} # run the job on the newly created runner env: AWS_REGION: us-east-1 steps: - name: Checkout uses: actions/checkout@v2 + - name: Set up Python 3.9 + uses: actions/setup-python@v2 + with: + python-version: 3.9 + - name: Uninstall pytorch + run: pip uninstall torch torchvision -y - name: Install Python dependencies run: pip install -e .[test,dev,tensorflow] - name: Build Docker run: docker build -t starlette-transformers:gpu -f dockerfiles/starlette/tensorflow/Dockerfile.gpu . - name: Run Integration Tests - env: - AWS_ACCESS_KEY_ID: ${{ secrets.AWS_ACCESS_KEY_ID }} - AWS_SECRET_ACCESS_KEY: ${{ secrets.AWS_SECRET_ACCESS_KEY }} - AWS_DEFAULT_REGION: us-east-1 - run: make integ-test + run: RUN_SLOW=True make integ-test stop-runner: name: Stop self-hosted EC2 runner needs: - start-runner - - pytorch-integration-test + # - pytorch-integration-test - tensorflow-integration-test runs-on: ubuntu-latest env: diff --git a/makefile b/makefile index 6a632d3a..fec9bc02 100644 --- a/makefile +++ b/makefile @@ -5,10 +5,10 @@ check_dirs := src # run tests unit-test: - python -m pytest -s -v ./tests/unit + python3 -m pytest -s -v ./tests/unit integ-test: - python -m pytest -s -v ./tests/integ/ + python3 -m pytest -s -v ./tests/integ/ # Check that source code meets quality standards diff --git a/setup.py b/setup.py index efcffed0..9200e5e8 100644 --- a/setup.py +++ b/setup.py @@ -69,7 +69,7 @@ install_requires=install_requires, extras_require=extras, entry_points={"console_scripts": "serve=sagemaker_huggingface_inference_toolkit.serving:main"}, - python_requires=">=3.9.0", + python_requires=">=3.8.0", license="Apache License 2.0", classifiers=[ "Development Status :: 5 - Production/Stable", diff --git a/src/huggingface_inference_toolkit/utils.py b/src/huggingface_inference_toolkit/utils.py index 83f0bd5f..b8e6cf5e 100644 --- a/src/huggingface_inference_toolkit/utils.py +++ b/src/huggingface_inference_toolkit/utils.py @@ -6,8 +6,8 @@ from typing import Optional, Union from huggingface_hub import HfApi -from huggingface_hub._snapshot_download import _filter_repo_files from huggingface_hub.file_download import cached_download, hf_hub_url +from huggingface_hub.utils import filter_repo_objects from transformers import pipeline from transformers.file_utils import is_tf_available, is_torch_available from transformers.pipelines import Conversation, Pipeline @@ -149,9 +149,9 @@ def _load_repository_from_hf( token=hf_hub_token, ) # apply regex to filter out non-framework specific weights if args.framework is set - filtered_repo_files = _filter_repo_files( - repo_files=[f.rfilename for f in repo_info.siblings], - ignore_regex=ignore_regex, + filtered_repo_files = filter_repo_objects( + items=[f.rfilename for f in repo_info.siblings], + ignore_patterns=ignore_regex, ) # iterate over all files and download them diff --git a/src/huggingface_inference_toolkit/webservice_starlette.py b/src/huggingface_inference_toolkit/webservice_starlette.py index e999358b..933534b7 100644 --- a/src/huggingface_inference_toolkit/webservice_starlette.py +++ b/src/huggingface_inference_toolkit/webservice_starlette.py @@ -22,7 +22,7 @@ def config_logging(level=logging.INFO): - logging.basicConfig(format="%(asctime)s | %(levelname)s | %(message)s", datefmt="", level=logging.INFO) + logging.basicConfig(format="%(asctime)s | %(levelname)s | %(message)s", datefmt="", level=level) # disable uvicorn access logs to hide /health uvicorn_access = logging.getLogger("uvicorn.access") uvicorn_access.disabled = True diff --git a/starlette_requirements.txt b/starlette_requirements.txt index 041003dd..e07d24d8 100644 --- a/starlette_requirements.txt +++ b/starlette_requirements.txt @@ -1,4 +1,5 @@ orjson starlette uvicorn -pandas \ No newline at end of file +pandas +huggingface_hub>=0.9.0 \ No newline at end of file diff --git a/tests/integ/config.py b/tests/integ/config.py index cad44af2..8a9b5610 100644 --- a/tests/integ/config.py +++ b/tests/integ/config.py @@ -47,11 +47,11 @@ }, "translation_xx_to_yy": { "pytorch": "hf-internal-testing/tiny-random-t5", - "tensorflow": "hf-internal-testing/tiny-random-t5", + "tensorflow": "hf-internal-testing/tiny-random-marian", }, "text2text-generation": { "pytorch": "hf-internal-testing/tiny-random-t5", - "tensorflow": "hf-internal-testing/tiny-random-t5", + "tensorflow": "hf-internal-testing/tiny-random-bart", }, "text-generation": { "pytorch": "hf-internal-testing/tiny-random-gpt2", diff --git a/tests/integ/test_container.py b/tests/integ/test_container.py index ff30bab7..a0415923 100644 --- a/tests/integ/test_container.py +++ b/tests/integ/test_container.py @@ -6,14 +6,17 @@ import pytest import requests from docker.client import DockerClient -from huggingface_inference_toolkit.utils import _load_repository_from_hf +from huggingface_inference_toolkit.utils import _is_gpu_available, _load_repository_from_hf from integ.config import task2input, task2model, task2output, task2validation -from transformers.testing_utils import require_torch, slow, require_tf +from transformers.testing_utils import require_torch, slow, require_tf, _run_slow_tests +IS_GPU = _run_slow_tests +DEVICE = "gpu" if IS_GPU else "cpu" client = docker.from_env() + def make_sure_other_containers_are_stopped(client: DockerClient, container_name: str): try: previous = client.containers.get(container_name) @@ -90,12 +93,14 @@ def verify_task(container: DockerClient, task: str, port: int = 5000, framework: "sentence-ranking", ], ) -def test_pt_cpu_container_remote_model(task) -> None: +def test_pt_container_remote_model(task) -> None: container_name = f"integration-test-{task}" - container_image = "starlette-transformers:cpu" + container_image = f"starlette-transformers:{DEVICE}" framework = "pytorch" model = task2model[task][framework] port = random.randint(5000, 6000) + device_request = [docker.types.DeviceRequest(count=-1, capabilities=[["gpu"]])] if IS_GPU else [] + make_sure_other_containers_are_stopped(client, container_name) container = client.containers.run( container_image, @@ -104,7 +109,7 @@ def test_pt_cpu_container_remote_model(task) -> None: environment={"HF_MODEL_ID": model, "HF_TASK": task}, detach=True, # GPU - # device_requests=[docker.types.DeviceRequest(count=-1, capabilities=[["gpu"]])] + device_requests=device_request, ) # time.sleep(5) verify_task(container, task, port) @@ -141,12 +146,13 @@ def test_pt_cpu_container_remote_model(task) -> None: "sentence-ranking", ], ) -def test_pt_cpu_container_local_model(task) -> None: +def test_pt_container_local_model(task) -> None: container_name = f"integration-test-{task}" - container_image = "starlette-transformers:cpu" + container_image = f"starlette-transformers:{DEVICE}" framework = "pytorch" model = task2model[task][framework] port = random.randint(5000, 6000) + device_request = [docker.types.DeviceRequest(count=-1, capabilities=[["gpu"]])] if IS_GPU else [] make_sure_other_containers_are_stopped(client, container_name) with tempfile.TemporaryDirectory() as tmpdirname: # https://github.com/huggingface/infinity/blob/test-ovh/test/integ/utils.py @@ -159,7 +165,7 @@ def test_pt_cpu_container_local_model(task) -> None: volumes={tmpdirname: {"bind": "/opt/huggingface/model", "mode": "ro"}}, detach=True, # GPU - # device_requests=[docker.types.DeviceRequest(count=-1, capabilities=[["gpu"]])] + device_requests=device_request, ) # time.sleep(5) verify_task(container, task, port) @@ -172,9 +178,11 @@ def test_pt_cpu_container_local_model(task) -> None: "repository_id", ["philschmid/custom-pipeline-text-classification"], ) -def test_pt_cpu_container_custom_pipeline(repository_id) -> None: +def test_pt_container_custom_pipeline(repository_id) -> None: container_name = "integration-test-custom" - container_image = "starlette-transformers:cpu" + container_image = f"starlette-transformers:{DEVICE}" + device_request = [docker.types.DeviceRequest(count=-1, capabilities=[["gpu"]])] if IS_GPU else [] + make_sure_other_containers_are_stopped(client, container_name) with tempfile.TemporaryDirectory() as tmpdirname: # https://github.com/huggingface/infinity/blob/test-ovh/test/integ/utils.py @@ -189,7 +197,7 @@ def test_pt_cpu_container_custom_pipeline(repository_id) -> None: volumes={tmpdirname: {"bind": tmpdirname, "mode": "ro"}}, detach=True, # GPU - # device_requests=[docker.types.DeviceRequest(count=-1, capabilities=[["gpu"]])] + device_requests=device_request, ) BASE_URL = "http://localhost:5000" wait_for_container_to_be_ready(BASE_URL) @@ -230,11 +238,12 @@ def test_pt_cpu_container_custom_pipeline(repository_id) -> None: "sentence-ranking", ], ) -def test_tf_cpu_container_remote_model(task) -> None: +def test_tf_container_remote_model(task) -> None: container_name = f"integration-test-{task}" - container_image = "starlette-transformers:cpu" + container_image = f"starlette-transformers:{DEVICE}" framework = "tensorflow" model = task2model[task][framework] + device_request = [docker.types.DeviceRequest(count=-1, capabilities=[["gpu"]])] if IS_GPU else [] if model is None: pytest.skip("no supported TF model") port = random.randint(5000, 6000) @@ -246,7 +255,7 @@ def test_tf_cpu_container_remote_model(task) -> None: environment={"HF_MODEL_ID": model, "HF_TASK": task}, detach=True, # GPU - # device_requests=[docker.types.DeviceRequest(count=-1, capabilities=[["gpu"]])] + device_requests=device_request, ) # time.sleep(5) verify_task(container, task, port) @@ -283,11 +292,12 @@ def test_tf_cpu_container_remote_model(task) -> None: "sentence-ranking", ], ) -def test_tf_cpu_container_local_model(task) -> None: +def test_tf_container_local_model(task) -> None: container_name = f"integration-test-{task}" - container_image = "starlette-transformers:cpu" + container_image = f"starlette-transformers:{DEVICE}" framework = "tensorflow" model = task2model[task][framework] + device_request = [docker.types.DeviceRequest(count=-1, capabilities=[["gpu"]])] if IS_GPU else [] if model is None: pytest.skip("no supported TF model") port = random.randint(5000, 6000) @@ -303,7 +313,7 @@ def test_tf_cpu_container_local_model(task) -> None: volumes={tmpdirname: {"bind": "/opt/huggingface/model", "mode": "ro"}}, detach=True, # GPU - # device_requests=[docker.types.DeviceRequest(count=-1, capabilities=[["gpu"]])] + device_requests=device_request, ) # time.sleep(5) verify_task(container, task, port)