Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
@@ -1,10 +1,9 @@
name: GPU - Run Integration Tests

on:
push:
pull_request:
branches:
- main
pull_request:
workflow_dispatch:

concurrency:
Expand Down Expand Up @@ -57,42 +56,44 @@ jobs:
steps:
- name: Checkout
uses: actions/checkout@v2
- name: Set up Python 3.9
uses: actions/setup-python@v2
with:
python-version: 3.9
- name: Install Python dependencies
run: pip install -e .[test,dev,torch]
- name: Build Docker
run: docker build -t starlette-transformers:gpu -f dockerfiles/starlette/pytorch/Dockerfile.gpu .
- name: Run Integration Tests
env:
AWS_ACCESS_KEY_ID: ${{ secrets.AWS_ACCESS_KEY_ID }}
AWS_SECRET_ACCESS_KEY: ${{ secrets.AWS_SECRET_ACCESS_KEY }}
AWS_DEFAULT_REGION: us-east-1
run: make integ-test
run: RUN_SLOW=True make integ-test
tensorflow-integration-test:
needs:
- start-runner
- pytorch-integration-test
# - pytorch-integration-test
runs-on: ${{ needs.start-runner.outputs.label }} # run the job on the newly created runner
env:
AWS_REGION: us-east-1
steps:
- name: Checkout
uses: actions/checkout@v2
- name: Set up Python 3.9
uses: actions/setup-python@v2
with:
python-version: 3.9
- name: Uninstall pytorch
run: pip uninstall torch torchvision -y
- name: Install Python dependencies
run: pip install -e .[test,dev,tensorflow]
- name: Build Docker
run: docker build -t starlette-transformers:gpu -f dockerfiles/starlette/tensorflow/Dockerfile.gpu .
- name: Run Integration Tests
env:
AWS_ACCESS_KEY_ID: ${{ secrets.AWS_ACCESS_KEY_ID }}
AWS_SECRET_ACCESS_KEY: ${{ secrets.AWS_SECRET_ACCESS_KEY }}
AWS_DEFAULT_REGION: us-east-1
run: make integ-test
run: RUN_SLOW=True make integ-test

stop-runner:
name: Stop self-hosted EC2 runner
needs:
- start-runner
- pytorch-integration-test
# - pytorch-integration-test
- tensorflow-integration-test
runs-on: ubuntu-latest
env:
Expand Down
4 changes: 2 additions & 2 deletions makefile
Original file line number Diff line number Diff line change
Expand Up @@ -5,10 +5,10 @@ check_dirs := src
# run tests

unit-test:
python -m pytest -s -v ./tests/unit
python3 -m pytest -s -v ./tests/unit

integ-test:
python -m pytest -s -v ./tests/integ/
python3 -m pytest -s -v ./tests/integ/

# Check that source code meets quality standards

Expand Down
2 changes: 1 addition & 1 deletion setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -69,7 +69,7 @@
install_requires=install_requires,
extras_require=extras,
entry_points={"console_scripts": "serve=sagemaker_huggingface_inference_toolkit.serving:main"},
python_requires=">=3.9.0",
python_requires=">=3.8.0",
license="Apache License 2.0",
classifiers=[
"Development Status :: 5 - Production/Stable",
Expand Down
8 changes: 4 additions & 4 deletions src/huggingface_inference_toolkit/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,8 +6,8 @@
from typing import Optional, Union

from huggingface_hub import HfApi
from huggingface_hub._snapshot_download import _filter_repo_files
from huggingface_hub.file_download import cached_download, hf_hub_url
from huggingface_hub.utils import filter_repo_objects
from transformers import pipeline
from transformers.file_utils import is_tf_available, is_torch_available
from transformers.pipelines import Conversation, Pipeline
Expand Down Expand Up @@ -149,9 +149,9 @@ def _load_repository_from_hf(
token=hf_hub_token,
)
# apply regex to filter out non-framework specific weights if args.framework is set
filtered_repo_files = _filter_repo_files(
repo_files=[f.rfilename for f in repo_info.siblings],
ignore_regex=ignore_regex,
filtered_repo_files = filter_repo_objects(
items=[f.rfilename for f in repo_info.siblings],
ignore_patterns=ignore_regex,
)

# iterate over all files and download them
Expand Down
2 changes: 1 addition & 1 deletion src/huggingface_inference_toolkit/webservice_starlette.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,7 @@


def config_logging(level=logging.INFO):
logging.basicConfig(format="%(asctime)s | %(levelname)s | %(message)s", datefmt="", level=logging.INFO)
logging.basicConfig(format="%(asctime)s | %(levelname)s | %(message)s", datefmt="", level=level)
# disable uvicorn access logs to hide /health
uvicorn_access = logging.getLogger("uvicorn.access")
uvicorn_access.disabled = True
Expand Down
3 changes: 2 additions & 1 deletion starlette_requirements.txt
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
orjson
starlette
uvicorn
pandas
pandas
huggingface_hub>=0.9.0
4 changes: 2 additions & 2 deletions tests/integ/config.py
Original file line number Diff line number Diff line change
Expand Up @@ -47,11 +47,11 @@
},
"translation_xx_to_yy": {
"pytorch": "hf-internal-testing/tiny-random-t5",
"tensorflow": "hf-internal-testing/tiny-random-t5",
"tensorflow": "hf-internal-testing/tiny-random-marian",
},
"text2text-generation": {
"pytorch": "hf-internal-testing/tiny-random-t5",
"tensorflow": "hf-internal-testing/tiny-random-t5",
"tensorflow": "hf-internal-testing/tiny-random-bart",
},
"text-generation": {
"pytorch": "hf-internal-testing/tiny-random-gpt2",
Expand Down
44 changes: 27 additions & 17 deletions tests/integ/test_container.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,14 +6,17 @@
import pytest
import requests
from docker.client import DockerClient
from huggingface_inference_toolkit.utils import _load_repository_from_hf
from huggingface_inference_toolkit.utils import _is_gpu_available, _load_repository_from_hf
from integ.config import task2input, task2model, task2output, task2validation
from transformers.testing_utils import require_torch, slow, require_tf
from transformers.testing_utils import require_torch, slow, require_tf, _run_slow_tests

IS_GPU = _run_slow_tests
DEVICE = "gpu" if IS_GPU else "cpu"

client = docker.from_env()



def make_sure_other_containers_are_stopped(client: DockerClient, container_name: str):
try:
previous = client.containers.get(container_name)
Expand Down Expand Up @@ -90,12 +93,14 @@ def verify_task(container: DockerClient, task: str, port: int = 5000, framework:
"sentence-ranking",
],
)
def test_pt_cpu_container_remote_model(task) -> None:
def test_pt_container_remote_model(task) -> None:
container_name = f"integration-test-{task}"
container_image = "starlette-transformers:cpu"
container_image = f"starlette-transformers:{DEVICE}"
framework = "pytorch"
model = task2model[task][framework]
port = random.randint(5000, 6000)
device_request = [docker.types.DeviceRequest(count=-1, capabilities=[["gpu"]])] if IS_GPU else []

make_sure_other_containers_are_stopped(client, container_name)
container = client.containers.run(
container_image,
Expand All @@ -104,7 +109,7 @@ def test_pt_cpu_container_remote_model(task) -> None:
environment={"HF_MODEL_ID": model, "HF_TASK": task},
detach=True,
# GPU
# device_requests=[docker.types.DeviceRequest(count=-1, capabilities=[["gpu"]])]
device_requests=device_request,
)
# time.sleep(5)
verify_task(container, task, port)
Expand Down Expand Up @@ -141,12 +146,13 @@ def test_pt_cpu_container_remote_model(task) -> None:
"sentence-ranking",
],
)
def test_pt_cpu_container_local_model(task) -> None:
def test_pt_container_local_model(task) -> None:
container_name = f"integration-test-{task}"
container_image = "starlette-transformers:cpu"
container_image = f"starlette-transformers:{DEVICE}"
framework = "pytorch"
model = task2model[task][framework]
port = random.randint(5000, 6000)
device_request = [docker.types.DeviceRequest(count=-1, capabilities=[["gpu"]])] if IS_GPU else []
make_sure_other_containers_are_stopped(client, container_name)
with tempfile.TemporaryDirectory() as tmpdirname:
# https://github.com/huggingface/infinity/blob/test-ovh/test/integ/utils.py
Expand All @@ -159,7 +165,7 @@ def test_pt_cpu_container_local_model(task) -> None:
volumes={tmpdirname: {"bind": "/opt/huggingface/model", "mode": "ro"}},
detach=True,
# GPU
# device_requests=[docker.types.DeviceRequest(count=-1, capabilities=[["gpu"]])]
device_requests=device_request,
)
# time.sleep(5)
verify_task(container, task, port)
Expand All @@ -172,9 +178,11 @@ def test_pt_cpu_container_local_model(task) -> None:
"repository_id",
["philschmid/custom-pipeline-text-classification"],
)
def test_pt_cpu_container_custom_pipeline(repository_id) -> None:
def test_pt_container_custom_pipeline(repository_id) -> None:
container_name = "integration-test-custom"
container_image = "starlette-transformers:cpu"
container_image = f"starlette-transformers:{DEVICE}"
device_request = [docker.types.DeviceRequest(count=-1, capabilities=[["gpu"]])] if IS_GPU else []

make_sure_other_containers_are_stopped(client, container_name)
with tempfile.TemporaryDirectory() as tmpdirname:
# https://github.com/huggingface/infinity/blob/test-ovh/test/integ/utils.py
Expand All @@ -189,7 +197,7 @@ def test_pt_cpu_container_custom_pipeline(repository_id) -> None:
volumes={tmpdirname: {"bind": tmpdirname, "mode": "ro"}},
detach=True,
# GPU
# device_requests=[docker.types.DeviceRequest(count=-1, capabilities=[["gpu"]])]
device_requests=device_request,
)
BASE_URL = "http://localhost:5000"
wait_for_container_to_be_ready(BASE_URL)
Expand Down Expand Up @@ -230,11 +238,12 @@ def test_pt_cpu_container_custom_pipeline(repository_id) -> None:
"sentence-ranking",
],
)
def test_tf_cpu_container_remote_model(task) -> None:
def test_tf_container_remote_model(task) -> None:
container_name = f"integration-test-{task}"
container_image = "starlette-transformers:cpu"
container_image = f"starlette-transformers:{DEVICE}"
framework = "tensorflow"
model = task2model[task][framework]
device_request = [docker.types.DeviceRequest(count=-1, capabilities=[["gpu"]])] if IS_GPU else []
if model is None:
pytest.skip("no supported TF model")
port = random.randint(5000, 6000)
Expand All @@ -246,7 +255,7 @@ def test_tf_cpu_container_remote_model(task) -> None:
environment={"HF_MODEL_ID": model, "HF_TASK": task},
detach=True,
# GPU
# device_requests=[docker.types.DeviceRequest(count=-1, capabilities=[["gpu"]])]
device_requests=device_request,
)
# time.sleep(5)
verify_task(container, task, port)
Expand Down Expand Up @@ -283,11 +292,12 @@ def test_tf_cpu_container_remote_model(task) -> None:
"sentence-ranking",
],
)
def test_tf_cpu_container_local_model(task) -> None:
def test_tf_container_local_model(task) -> None:
container_name = f"integration-test-{task}"
container_image = "starlette-transformers:cpu"
container_image = f"starlette-transformers:{DEVICE}"
framework = "tensorflow"
model = task2model[task][framework]
device_request = [docker.types.DeviceRequest(count=-1, capabilities=[["gpu"]])] if IS_GPU else []
if model is None:
pytest.skip("no supported TF model")
port = random.randint(5000, 6000)
Expand All @@ -303,7 +313,7 @@ def test_tf_cpu_container_local_model(task) -> None:
volumes={tmpdirname: {"bind": "/opt/huggingface/model", "mode": "ro"}},
detach=True,
# GPU
# device_requests=[docker.types.DeviceRequest(count=-1, capabilities=[["gpu"]])]
device_requests=device_request,
)
# time.sleep(5)
verify_task(container, task, port)
Expand Down