diff --git a/.github/workflows/build-container.yaml b/.github/workflows/build-container.yaml index 24159c11..ec642af2 100644 --- a/.github/workflows/build-container.yaml +++ b/.github/workflows/build-container.yaml @@ -14,23 +14,43 @@ concurrency: cancel-in-progress: true jobs: - starlette-cpu: + starlette-pytorch-cpu: uses: ./.github/workflows/docker-build-action.yaml with: context: ./ repository: huggingface - image: hf-endpoints-inference-cpu - dockerfile: dockerfiles/starlette/Dockerfile.cpu + image: hf-endpoints-inference-pytorch-cpu + dockerfile: dockerfiles/starlette/pytorch/Dockerfile.cpu secrets: DOCKER_USERNAME: ${{ secrets.DOCKER_USERNAME }} DOCKER_PASSWORD: ${{ secrets.DOCKER_PASSWORD }} - starlette-gpu: + starlette-pytorch-gpu: uses: ./.github/workflows/docker-build-action.yaml with: context: ./ repository: huggingface - image: hf-endpoints-inference-gpu - dockerfile: dockerfiles/starlette/Dockerfile.gpu + image: hf-endpoints-inference-pytorch-gpu + dockerfile: dockerfiles/starlette/pytorch/Dockerfile.gpu + secrets: + DOCKER_USERNAME: ${{ secrets.DOCKER_USERNAME }} + DOCKER_PASSWORD: ${{ secrets.DOCKER_PASSWORD }} + starlette-tensorflow-cpu: + uses: ./.github/workflows/docker-build-action.yaml + with: + context: ./ + repository: huggingface + image: hf-endpoints-inference-tensorflow-cpu + dockerfile: dockerfiles/starlette/tensorflow/Dockerfile.cpu + secrets: + DOCKER_USERNAME: ${{ secrets.DOCKER_USERNAME }} + DOCKER_PASSWORD: ${{ secrets.DOCKER_PASSWORD }} + starlette-tensorflow-gpu: + uses: ./.github/workflows/docker-build-action.yaml + with: + context: ./ + repository: huggingface + image: hf-endpoints-inference-tensorflow-gpu + dockerfile: dockerfiles/starlette/tensorflow/Dockerfile.gpu secrets: DOCKER_USERNAME: ${{ secrets.DOCKER_USERNAME }} DOCKER_PASSWORD: ${{ secrets.DOCKER_PASSWORD }} diff --git a/.github/workflows/gpu-integ-test.xxx b/.github/workflows/gpu-integ-test.xxx new file mode 100644 index 00000000..bf981e1f --- /dev/null +++ b/.github/workflows/gpu-integ-test.xxx @@ -0,0 +1,114 @@ +name: GPU - Run Integration Tests + +on: + push: + branches: + - main + pull_request: + workflow_dispatch: + +concurrency: + group: ${{ github.workflow }}-${{ github.head_ref || github.run_id }} + cancel-in-progress: true + + +jobs: + start-runner: + name: Start self-hosted EC2 runner + runs-on: ubuntu-latest + env: + AWS_REGION: us-east-1 + EC2_AMI_ID: ami-0dc1c26161f869ed1 + EC2_INSTANCE_TYPE: g4dn.xlarge + EC2_SUBNET_ID: subnet-859322b4,subnet-b7533b96,subnet-47cfad21,subnet-a396b2ad,subnet-06576a4b,subnet-df0f6180 + EC2_SECURITY_GROUP: sg-0bb210cd3ec725a13 + EC2_IAM_ROLE: optimum-ec2-github-actions-role + outputs: + label: ${{ steps.start-ec2-runner.outputs.label }} + ec2-instance-id: ${{ steps.start-ec2-runner.outputs.ec2-instance-id }} + steps: + - name: Configure AWS credentials + uses: aws-actions/configure-aws-credentials@v1 + with: + aws-access-key-id: ${{ secrets.AWS_ACCESS_KEY_ID }} + aws-secret-access-key: ${{ secrets.AWS_SECRET_ACCESS_KEY }} + aws-region: ${{ env.AWS_REGION }} + - name: Start EC2 runner + id: start-ec2-runner + uses: philschmid/philschmid-ec2-github-runner@main + with: + mode: start + github-token: ${{ secrets.GH_PERSONAL_ACCESS_TOKEN }} + ec2-image-id: ${{ env.EC2_AMI_ID }} + ec2-instance-type: ${{ env.EC2_INSTANCE_TYPE }} + subnet-id: ${{ env.EC2_SUBNET_ID }} + security-group-id: ${{ env.EC2_SECURITY_GROUP }} + iam-role-name: ${{ env.EC2_IAM_ROLE }} + aws-resource-tags: > # optional, requires additional permissions + [ + {"Key": "Name", "Value": "ec2-optimum-github-runner"}, + {"Key": "GitHubRepository", "Value": "${{ github.repository }}"} + ] + pytorch-integration-test: + needs: start-runner # required to start the main job when the runner is ready + runs-on: ${{ needs.start-runner.outputs.label }} # run the job on the newly created runner + env: + AWS_REGION: us-east-1 + steps: + - name: Checkout + uses: actions/checkout@v2 + - name: Install Python dependencies + run: pip install -e .[test,dev,torch] + - name: Build Docker + run: docker build -t starlette-transformers:gpu -f dockerfiles/starlette/pytorch/Dockerfile.gpu . + - name: Run Integration Tests + env: + AWS_ACCESS_KEY_ID: ${{ secrets.AWS_ACCESS_KEY_ID }} + AWS_SECRET_ACCESS_KEY: ${{ secrets.AWS_SECRET_ACCESS_KEY }} + AWS_DEFAULT_REGION: us-east-1 + run: make integ-test + tensorflow-integration-test: + needs: + - start-runner + - pytorch-integration-test + runs-on: ${{ needs.start-runner.outputs.label }} # run the job on the newly created runner + env: + AWS_REGION: us-east-1 + steps: + - name: Checkout + uses: actions/checkout@v2 + - name: Install Python dependencies + run: pip install -e .[test,dev,tensorflow] + - name: Build Docker + run: docker build -t starlette-transformers:gpu -f dockerfiles/starlette/tensorflow/Dockerfile.gpu . + - name: Run Integration Tests + env: + AWS_ACCESS_KEY_ID: ${{ secrets.AWS_ACCESS_KEY_ID }} + AWS_SECRET_ACCESS_KEY: ${{ secrets.AWS_SECRET_ACCESS_KEY }} + AWS_DEFAULT_REGION: us-east-1 + run: make integ-test + + stop-runner: + name: Stop self-hosted EC2 runner + needs: + - start-runner + - pytorch-integration-test + - tensorflow-integration-test + runs-on: ubuntu-latest + env: + AWS_REGION: us-east-1 + if: ${{ always() }} # required to stop the runner even if the error happened in the previous jobs + steps: + - name: Configure AWS credentials + uses: aws-actions/configure-aws-credentials@v1 + with: + aws-access-key-id: ${{ secrets.AWS_ACCESS_KEY_ID }} + aws-secret-access-key: ${{ secrets.AWS_SECRET_ACCESS_KEY }} + aws-region: ${{ env.AWS_REGION }} + - name: Stop EC2 runner + uses: philschmid/philschmid-ec2-github-runner@main + with: + mode: stop + github-token: ${{ secrets.GH_PERSONAL_ACCESS_TOKEN }} + label: ${{ needs.start-runner.outputs.label }} + ec2-instance-id: ${{ needs.start-runner.outputs.ec2-instance-id }} \ No newline at end of file diff --git a/.github/workflows/integ-test.yml b/.github/workflows/integ-test.yaml similarity index 50% rename from .github/workflows/integ-test.yml rename to .github/workflows/integ-test.yaml index 61dadab1..e77d4a32 100644 --- a/.github/workflows/integ-test.yml +++ b/.github/workflows/integ-test.yaml @@ -1,4 +1,4 @@ -name: Run Integration Tests +name: CPU - Run Integration Tests on: push: @@ -13,7 +13,7 @@ concurrency: jobs: - test: + pytorch-integration-test: runs-on: ubuntu-latest steps: - uses: actions/checkout@v2 @@ -24,7 +24,25 @@ jobs: - name: Install Python dependencies run: pip install -e .[test,dev,torch] - name: Build Docker - run: docker build -t starlette-transformers:cpu -f dockerfiles/starlette/Dockerfile.cpu . + run: docker build -t starlette-transformers:cpu -f dockerfiles/starlette/pytorch/Dockerfile.cpu . + - name: Run Integration Tests + env: + AWS_ACCESS_KEY_ID: ${{ secrets.AWS_ACCESS_KEY_ID }} + AWS_SECRET_ACCESS_KEY: ${{ secrets.AWS_SECRET_ACCESS_KEY }} + AWS_DEFAULT_REGION: us-east-1 + run: make integ-test + tensorflow-integration-test: + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v2 + - name: Set up Python 3.9 + uses: actions/setup-python@v2 + with: + python-version: 3.9 + - name: Install Python dependencies + run: pip install -e .[test,dev,tensorflow] + - name: Build Docker + run: docker build -t starlette-transformers:cpu -f dockerfiles/starlette/tensorflow/Dockerfile.cpu . - name: Run Integration Tests env: AWS_ACCESS_KEY_ID: ${{ secrets.AWS_ACCESS_KEY_ID }} diff --git a/.github/workflows/quality.yml b/.github/workflows/quality.yaml similarity index 100% rename from .github/workflows/quality.yml rename to .github/workflows/quality.yaml diff --git a/.github/workflows/unit-test.yml b/.github/workflows/unit-test.yaml similarity index 52% rename from .github/workflows/unit-test.yml rename to .github/workflows/unit-test.yaml index 1bc17baf..f02f455f 100644 --- a/.github/workflows/unit-test.yml +++ b/.github/workflows/unit-test.yaml @@ -12,7 +12,7 @@ concurrency: cancel-in-progress: true jobs: - test: + pytorch-unit-test: runs-on: ubuntu-latest steps: - uses: actions/checkout@v2 @@ -21,7 +21,7 @@ jobs: with: python-version: 3.9.12 - name: Install Python dependencies - run: pip install -e .[test,dev,torch] + run: pip install -e .[test,dev,torch,st] - name: Run Unit test_const run: python -m pytest -s -v ./tests/unit/test_const.py - name: Run Unit test_handler @@ -32,5 +32,25 @@ jobs: run: python -m pytest -s -v ./tests/unit/test_serializer.py - name: Run Unit test_utils run: python -m pytest -s -v ./tests/unit/test_utils.py - # - name: Run Unit Tests - # run: make unit test \ No newline at end of file + tensorflow-unit-test: + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v2 + - name: Set up Python 3.9 + uses: actions/setup-python@v2 + with: + python-version: 3.9.12 + - name: Install Python dependencies + run: pip install -e .[test,dev,tensorflow] + - name: Run Unit test_const + run: python -m pytest -s -v ./tests/unit/test_const.py + - name: Run Unit test_handler + run: python -m pytest -s -v ./tests/unit/test_handler.py + - name: Run Unit test_sentence_transformers + run: python -m pytest -s -v ./tests/unit/test_sentence_transformers.py + - name: Run Unit test_serializer + run: python -m pytest -s -v ./tests/unit/test_serializer.py + - name: Run Unit test_utils + run: python -m pytest -s -v ./tests/unit/test_utils.py + + diff --git a/README.md b/README.md index 22fde9b3..852be949 100644 --- a/README.md +++ b/README.md @@ -17,21 +17,21 @@ mkdir tmp2/ HF_MODEL_ID=hf-internal-testing/tiny-random-distilbert HF_MODEL_DIR=tmp2 HF_TASK=text-classification uvicorn src.huggingface_inference_toolkit.webservice_starlette:app --port 5000 ``` -HF_MODEL_ID=RobertoFont/pegasus-large-samsum HF_MODEL_DIR=tmp HF_TASK=text2text-generation uvicorn src.huggingface_inference_toolkit.webservice_starlette:app --port 5000 - ### Container -1. build the preferred container for either CPU or GPU. +1. build the preferred container for either CPU or GPU for PyTorch or TensorFlow. _cpu images_ ```bash -docker build -t starlette-transformers:cpu -f dockerfiles/starlette/Dockerfile.cpu . +docker build -t starlette-transformers:cpu -f dockerfiles/starlette/pytorch/Dockerfile.cpu . +docker build -t starlette-transformers:cpu -f dockerfiles/starlette/tensorflow/Dockerfile.cpu . ``` _gpu images_ ```bash -docker build -t starlette-transformers:gpu -f dockerfiles/starlette/Dockerfile.gpu . +docker build -t starlette-transformers:gpu -f dockerfiles/starlette/pytorch/Dockerfile.gpu . +docker build -t starlette-transformers:gpu -f dockerfiles/starlette/tensorflow/Dockerfile.gpu . ``` 2. Run the container and provide either environment variables to the HUB model you want to use or mount a volume to the container, where your model is stored. diff --git a/dockerfiles/starlette/pytorch/Dockerfile.cpu b/dockerfiles/starlette/pytorch/Dockerfile.cpu new file mode 100644 index 00000000..877ea3a6 --- /dev/null +++ b/dockerfiles/starlette/pytorch/Dockerfile.cpu @@ -0,0 +1,16 @@ +FROM huggingface/transformers-inference:4.20.1-pt1.11-cpu + +# install starlette framework +COPY starlette_requirements.txt /tmp/requirements.txt +RUN pip install --no-cache-dir -r /tmp/requirements.txt && rm /tmp/requirements.txt +# Think about a better solution -> base contaienr has pt 1.11. thats why need below 0.13 +RUN pip install --no-cache-dir sentence_transformers torchvision~="0.12.0" + +# copy application +COPY src/huggingface_inference_toolkit huggingface_inference_toolkit +COPY src/huggingface_inference_toolkit/webservice_starlette.py webservice_starlette.py + +# run app +ENTRYPOINT ["uvicorn", "webservice_starlette:app", "--host", "0.0.0.0", "--port", "5000"] + + diff --git a/dockerfiles/starlette/pytorch/Dockerfile.gpu b/dockerfiles/starlette/pytorch/Dockerfile.gpu new file mode 100644 index 00000000..6da7b455 --- /dev/null +++ b/dockerfiles/starlette/pytorch/Dockerfile.gpu @@ -0,0 +1,14 @@ +FROM huggingface/transformers-inference:4.20.1-pt1.11-cuda11.5 + +# install starlette framework +COPY starlette_requirements.txt /tmp/requirements.txt +RUN pip install --no-cache-dir -r /tmp/requirements.txt && rm /tmp/requirements.txt +# Think about a better solution -> base contaienr has pt 1.11. thats why need below 0.13 +RUN pip install --no-cache-dir sentence_transformers torchvision~="0.12.0" + +# copy application +COPY src/huggingface_inference_toolkit huggingface_inference_toolkit +COPY src/huggingface_inference_toolkit/webservice_starlette.py webservice_starlette.py + +# run app +ENTRYPOINT ["uvicorn", "webservice_starlette:app", "--host", "0.0.0.0", "--port", "5000"] \ No newline at end of file diff --git a/dockerfiles/starlette/Dockerfile.cpu b/dockerfiles/starlette/tensorflow/Dockerfile.cpu similarity index 88% rename from dockerfiles/starlette/Dockerfile.cpu rename to dockerfiles/starlette/tensorflow/Dockerfile.cpu index ce09f056..d551c47d 100644 --- a/dockerfiles/starlette/Dockerfile.cpu +++ b/dockerfiles/starlette/tensorflow/Dockerfile.cpu @@ -1,4 +1,4 @@ -FROM huggingface/transformers-inference:4.20.1-pt1.11-cpu +FROM huggingface/transformers-inference:4.21.1-tf2.9-cpu # install starlette framework COPY starlette_requirements.txt /tmp/requirements.txt diff --git a/dockerfiles/starlette/Dockerfile.gpu b/dockerfiles/starlette/tensorflow/Dockerfile.gpu similarity index 87% rename from dockerfiles/starlette/Dockerfile.gpu rename to dockerfiles/starlette/tensorflow/Dockerfile.gpu index 5d96bee6..5ace9ec8 100644 --- a/dockerfiles/starlette/Dockerfile.gpu +++ b/dockerfiles/starlette/tensorflow/Dockerfile.gpu @@ -1,4 +1,4 @@ -FROM huggingface/transformers-inference:4.20.1-pt1.11-cuda11.5 +FROM huggingface/transformers-inference:4.21.1-tf2.9-cuda11.2 # install starlette framework COPY starlette_requirements.txt /tmp/requirements.txt diff --git a/setup.py b/setup.py index 40f4cd3d..efcffed0 100644 --- a/setup.py +++ b/setup.py @@ -26,13 +26,13 @@ "librosa", "pyctcdecode>=0.3.0", "phonemizer", - # sentence transformers - "sentence_transformers", - "torchvision<=0.12.0", ] extras = {} +extras["st"] = ["sentence_transformers"] + + # Hugging Face specific dependencies # framework specific dependencies extras["torch"] = ["torch>=1.8.0", "torchaudio"] diff --git a/src/huggingface_inference_toolkit/const.py b/src/huggingface_inference_toolkit/const.py index c73f02ac..958afcd9 100644 --- a/src/huggingface_inference_toolkit/const.py +++ b/src/huggingface_inference_toolkit/const.py @@ -5,7 +5,7 @@ HF_MODEL_DIR = os.environ.get("HF_MODEL_DIR", "/opt/huggingface/model") HF_MODEL_ID = os.environ.get("HF_MODEL_ID", None) HF_TASK = os.environ.get("HF_TASK", None) -HF_FRAMEWORK = os.environ.get("HF_FRAMEWORK", "pytorch") +HF_FRAMEWORK = os.environ.get("HF_FRAMEWORK", None) HF_REVISION = os.environ.get("HF_REVISION", None) HF_HUB_TOKEN = os.environ.get("HF_HUB_TOKEN", None) # custom handler consts diff --git a/src/huggingface_inference_toolkit/sentence_transformers_utils.py b/src/huggingface_inference_toolkit/sentence_transformers_utils.py index b72c6816..a3f79ded 100644 --- a/src/huggingface_inference_toolkit/sentence_transformers_utils.py +++ b/src/huggingface_inference_toolkit/sentence_transformers_utils.py @@ -1,4 +1,15 @@ -from sentence_transformers import CrossEncoder, SentenceTransformer, util +import importlib.util + + +_sentence_transformers = importlib.util.find_spec("sentence_transformers") is not None + + +def is_sentence_transformers_available(): + return _sentence_transformers + + +if is_sentence_transformers_available(): + from sentence_transformers import CrossEncoder, SentenceTransformer, util class SentenceSimilarityPipeline: diff --git a/src/huggingface_inference_toolkit/utils.py b/src/huggingface_inference_toolkit/utils.py index af6a984c..83f0bd5f 100644 --- a/src/huggingface_inference_toolkit/utils.py +++ b/src/huggingface_inference_toolkit/utils.py @@ -13,7 +13,10 @@ from transformers.pipelines import Conversation, Pipeline from huggingface_inference_toolkit.const import HF_DEFAULT_PIPELINE_NAME, HF_MODULE_NAME -from huggingface_inference_toolkit.sentence_transformers_utils import get_sentence_transformers_pipeline +from huggingface_inference_toolkit.sentence_transformers_utils import ( + get_sentence_transformers_pipeline, + is_sentence_transformers_available, +) logger = logging.getLogger(__name__) @@ -27,7 +30,6 @@ import torch _optimum_available = importlib.util.find_spec("optimum") is not None -_sentence_transformers = importlib.util.find_spec("sentence_transformers") is not None def is_optimum_available(): @@ -36,10 +38,6 @@ def is_optimum_available(): # return _optimum_available -def is_sentence_transformers(): - return _sentence_transformers - - framework2weight = { "pytorch": "pytorch*", "tensorflow": "tf*", @@ -49,10 +47,13 @@ def is_sentence_transformers(): "rust": "rust*", "onnx": "*onnx", } -ignore_regex_list = ["pytorch*", "tf*", "flax*", "rust*", "*onnx"] def create_artifact_filter(framework): + """ + Returns a list of regex pattern based on the DL Framework. which will be to used to ignore files when downloading + """ + ignore_regex_list = ["pytorch*", "tf*", "flax*", "rust*", "*onnx"] pattern = framework2weight.get(framework, None) if pattern in ignore_regex_list: ignore_regex_list.remove(pattern) @@ -62,6 +63,10 @@ def create_artifact_filter(framework): def wrap_conversation_pipeline(pipeline): + """ + Wrap a Conversation with a helper for better UX when using REST API + """ + def wrapped_pipeline(inputs, *args, **kwargs): converted_input = Conversation( inputs["text"], @@ -122,6 +127,9 @@ def _load_repository_from_hf( """ Load a model from huggingface hub. """ + if framework is None: + framework = _get_framework() + if isinstance(target_dir, str): target_dir = Path(target_dir) @@ -238,7 +246,11 @@ def get_pipeline(task: str, model_dir: Path, **kwargs) -> Pipeline: # TODO: add check for optimum accelerated pipeline logger.info("Optimum is not implement yet using default pipeline.") hf_pipeline = pipeline(task=task, model=model_dir, device=device, **kwargs) - elif is_sentence_transformers() and task in ["sentence-similarity", "sentence-embeddings", "sentence-ranking"]: + elif is_sentence_transformers_available() and task in [ + "sentence-similarity", + "sentence-embeddings", + "sentence-ranking", + ]: hf_pipeline = get_sentence_transformers_pipeline(task=task, model_dir=model_dir, device=device, **kwargs) else: hf_pipeline = pipeline(task=task, model=model_dir, device=device, **kwargs) diff --git a/starlette_requirements.txt b/starlette_requirements.txt index 1fe11c75..041003dd 100644 --- a/starlette_requirements.txt +++ b/starlette_requirements.txt @@ -1,6 +1,4 @@ orjson starlette uvicorn -pandas -sentence-transformers -torchvision==0.12.0 # needed for st and should be installed in the main one \ No newline at end of file +pandas \ No newline at end of file diff --git a/tests/integ/config.py b/tests/integ/config.py index 627e53a7..cad44af2 100644 --- a/tests/integ/config.py +++ b/tests/integ/config.py @@ -23,7 +23,7 @@ }, "zero-shot-classification": { "pytorch": "hf-internal-testing/tiny-random-bart", - "tensorflow": "hf-internal-testing/tiny-random-bart", + "tensorflow": "typeform/distilbert-base-uncased-mnli", }, "feature-extraction": { "pytorch": "hf-internal-testing/tiny-random-bert", @@ -63,7 +63,7 @@ }, "automatic-speech-recognition": { "pytorch": "hf-internal-testing/tiny-random-wav2vec2", - "tensorflow": "hf-internal-testing/tiny-random-wav2vec2", + "tensorflow": None, }, "audio-classification": { "pytorch": "hf-internal-testing/tiny-random-wavlm", diff --git a/tests/integ/test_container.py b/tests/integ/test_container.py index fb1406fe..ff30bab7 100644 --- a/tests/integ/test_container.py +++ b/tests/integ/test_container.py @@ -8,6 +8,7 @@ from docker.client import DockerClient from huggingface_inference_toolkit.utils import _load_repository_from_hf from integ.config import task2input, task2model, task2output, task2validation +from transformers.testing_utils import require_torch, slow, require_tf client = docker.from_env() @@ -60,6 +61,7 @@ def verify_task(container: DockerClient, task: str, port: int = 5000, framework: assert task2validation[task](result=prediction, snapshot=task2output[task]) is True +@require_torch @pytest.mark.parametrize( "task", [ @@ -88,31 +90,29 @@ def verify_task(container: DockerClient, task: str, port: int = 5000, framework: "sentence-ranking", ], ) -def test_cpu_container_remote_model(task) -> None: +def test_pt_cpu_container_remote_model(task) -> None: container_name = f"integration-test-{task}" container_image = "starlette-transformers:cpu" framework = "pytorch" model = task2model[task][framework] port = random.randint(5000, 6000) make_sure_other_containers_are_stopped(client, container_name) - with tempfile.TemporaryDirectory() as tmpdirname: - # https://github.com/huggingface/infinity/blob/test-ovh/test/integ/utils.py - storage_dir = _load_repository_from_hf(model, tmpdirname, framework="pytorch") - container = client.containers.run( - container_image, - name=container_name, - ports={"5000": port}, - environment={"HF_MODEL_ID": model, "HF_TASK": task}, - detach=True, - # GPU - # device_requests=[docker.types.DeviceRequest(count=-1, capabilities=[["gpu"]])] - ) - # time.sleep(5) - verify_task(container, task, port) - container.stop() - container.remove() + container = client.containers.run( + container_image, + name=container_name, + ports={"5000": port}, + environment={"HF_MODEL_ID": model, "HF_TASK": task}, + detach=True, + # GPU + # device_requests=[docker.types.DeviceRequest(count=-1, capabilities=[["gpu"]])] + ) + # time.sleep(5) + verify_task(container, task, port) + container.stop() + container.remove() +@require_torch @pytest.mark.parametrize( "task", [ @@ -141,7 +141,7 @@ def test_cpu_container_remote_model(task) -> None: "sentence-ranking", ], ) -def test_cpu_container_local_model(task) -> None: +def test_pt_cpu_container_local_model(task) -> None: container_name = f"integration-test-{task}" container_image = "starlette-transformers:cpu" framework = "pytorch" @@ -167,11 +167,12 @@ def test_cpu_container_local_model(task) -> None: container.remove() +@require_torch @pytest.mark.parametrize( "repository_id", ["philschmid/custom-pipeline-text-classification"], ) -def test_cpu_container_custom_pipeline(repository_id) -> None: +def test_pt_cpu_container_custom_pipeline(repository_id) -> None: container_name = "integration-test-custom" container_image = "starlette-transformers:cpu" make_sure_other_containers_are_stopped(client, container_name) @@ -198,3 +199,147 @@ def test_cpu_container_custom_pipeline(repository_id) -> None: # time.sleep(5) container.stop() container.remove() + + +@require_tf +@pytest.mark.parametrize( + "task", + [ + "text-classification", + "zero-shot-classification", + "ner", + "question-answering", + "fill-mask", + "summarization", + "translation_xx_to_yy", + "text2text-generation", + "text-generation", + "feature-extraction", + "image-classification", + "automatic-speech-recognition", + "audio-classification", + "object-detection", + "image-segmentation", + "table-question-answering", + "conversational", + # TODO currently not supported due to multimodality input + # "visual-question-answering", + # "zero-shot-image-classification", + "sentence-similarity", + "sentence-embeddings", + "sentence-ranking", + ], +) +def test_tf_cpu_container_remote_model(task) -> None: + container_name = f"integration-test-{task}" + container_image = "starlette-transformers:cpu" + framework = "tensorflow" + model = task2model[task][framework] + if model is None: + pytest.skip("no supported TF model") + port = random.randint(5000, 6000) + make_sure_other_containers_are_stopped(client, container_name) + container = client.containers.run( + container_image, + name=container_name, + ports={"5000": port}, + environment={"HF_MODEL_ID": model, "HF_TASK": task}, + detach=True, + # GPU + # device_requests=[docker.types.DeviceRequest(count=-1, capabilities=[["gpu"]])] + ) + # time.sleep(5) + verify_task(container, task, port) + container.stop() + container.remove() + + +@require_tf +@pytest.mark.parametrize( + "task", + [ + "text-classification", + "zero-shot-classification", + "ner", + "question-answering", + "fill-mask", + "summarization", + "translation_xx_to_yy", + "text2text-generation", + "text-generation", + "feature-extraction", + "image-classification", + "automatic-speech-recognition", + "audio-classification", + "object-detection", + "image-segmentation", + "table-question-answering", + "conversational", + # TODO currently not supported due to multimodality input + # "visual-question-answering", + # "zero-shot-image-classification", + "sentence-similarity", + "sentence-embeddings", + "sentence-ranking", + ], +) +def test_tf_cpu_container_local_model(task) -> None: + container_name = f"integration-test-{task}" + container_image = "starlette-transformers:cpu" + framework = "tensorflow" + model = task2model[task][framework] + if model is None: + pytest.skip("no supported TF model") + port = random.randint(5000, 6000) + make_sure_other_containers_are_stopped(client, container_name) + with tempfile.TemporaryDirectory() as tmpdirname: + # https://github.com/huggingface/infinity/blob/test-ovh/test/integ/utils.py + storage_dir = _load_repository_from_hf(model, tmpdirname, framework=framework) + container = client.containers.run( + container_image, + name=container_name, + ports={"5000": port}, + environment={"HF_MODEL_DIR": "/opt/huggingface/model", "HF_TASK": task}, + volumes={tmpdirname: {"bind": "/opt/huggingface/model", "mode": "ro"}}, + detach=True, + # GPU + # device_requests=[docker.types.DeviceRequest(count=-1, capabilities=[["gpu"]])] + ) + # time.sleep(5) + verify_task(container, task, port) + container.stop() + container.remove() + + +# @require_tf +# @pytest.mark.parametrize( +# "repository_id", +# ["philschmid/custom-pipeline-text-classification"], +# ) +# def test_tf_cpu_container_custom_pipeline(repository_id) -> None: +# container_name = "integration-test-custom" +# container_image = "starlette-transformers:cpu" +# make_sure_other_containers_are_stopped(client, container_name) +# with tempfile.TemporaryDirectory() as tmpdirname: +# # https://github.com/huggingface/infinity/blob/test-ovh/test/integ/utils.py +# storage_dir = _load_repository_from_hf("philschmid/custom-pipeline-text-classification", tmpdirname) +# container = client.containers.run( +# container_image, +# name=container_name, +# ports={"5000": "5000"}, +# environment={ +# "HF_MODEL_DIR": tmpdirname, +# }, +# volumes={tmpdirname: {"bind": tmpdirname, "mode": "ro"}}, +# detach=True, +# # GPU +# # device_requests=[docker.types.DeviceRequest(count=-1, capabilities=[["gpu"]])] +# ) +# BASE_URL = "http://localhost:5000" +# wait_for_container_to_be_ready(BASE_URL) +# payload = {"inputs": "this is a test"} +# prediction = requests.post(f"{BASE_URL}", json=payload).json() +# assert prediction == payload +# # time.sleep(5) +# container.stop() +# container.remove() diff --git a/tests/unit/test_handler.py b/tests/unit/test_handler.py index d01d034d..9306cdc3 100644 --- a/tests/unit/test_handler.py +++ b/tests/unit/test_handler.py @@ -1,6 +1,6 @@ import tempfile -from transformers.testing_utils import require_torch, slow +from transformers.testing_utils import require_torch, slow, require_tf import pytest from huggingface_inference_toolkit.handler import ( @@ -8,7 +8,7 @@ get_inference_handler_either_custom_or_default_handler, ) -from huggingface_inference_toolkit.utils import _load_repository_from_hf +from huggingface_inference_toolkit.utils import _is_gpu_available, _load_repository_from_hf TASK = "text-classification" @@ -17,7 +17,7 @@ @require_torch -def test_get_device(): +def test_pt_get_device(): import torch with tempfile.TemporaryDirectory() as tmpdirname: @@ -31,7 +31,7 @@ def test_get_device(): @require_torch -def test_predict_call(): +def test_pt_predict_call(): with tempfile.TemporaryDirectory() as tmpdirname: # https://github.com/huggingface/infinity/blob/test-ovh/test/integ/utils.py storage_dir = _load_repository_from_hf(MODEL, tmpdirname, framework="pytorch") @@ -43,7 +43,7 @@ def test_predict_call(): @require_torch -def test_custom_pipeline(): +def test_pt_custom_pipeline(): with tempfile.TemporaryDirectory() as tmpdirname: storage_dir = _load_repository_from_hf( "philschmid/custom-pipeline-text-classification", tmpdirname, framework="pytorch" @@ -53,7 +53,7 @@ def test_custom_pipeline(): @require_torch -def test_sentence_transformers_pipeline(): +def test_pt_sentence_transformers_pipeline(): with tempfile.TemporaryDirectory() as tmpdirname: storage_dir = _load_repository_from_hf( "sentence-transformers/all-MiniLM-L6-v2", tmpdirname, framework="pytorch" @@ -61,3 +61,52 @@ def test_sentence_transformers_pipeline(): h = get_inference_handler_either_custom_or_default_handler(str(storage_dir), task="sentence-embeddings") pred = h(INPUT) assert isinstance(pred["embeddings"], list) + + +@require_tf +def test_tf_get_device(): + import tensorflow as tf + + with tempfile.TemporaryDirectory() as tmpdirname: + # https://github.com/huggingface/infinity/blob/test-ovh/test/integ/utils.py + storage_dir = _load_repository_from_hf(MODEL, tmpdirname, framework="tensorflow") + h = HuggingFaceHandler(model_dir=str(storage_dir), task=TASK) + if _is_gpu_available(): + assert h.pipeline.device == 0 + else: + assert h.pipeline.device == -1 + + +@require_tf +def test_tf_predict_call(): + with tempfile.TemporaryDirectory() as tmpdirname: + # https://github.com/huggingface/infinity/blob/test-ovh/test/integ/utils.py + storage_dir = _load_repository_from_hf(MODEL, tmpdirname, framework="tensorflow") + h = HuggingFaceHandler(model_dir=str(storage_dir), task=TASK) + + prediction = h(INPUT) + assert "label" in prediction[0] + assert "score" in prediction[0] + + +@require_tf +def test_tf_custom_pipeline(): + with tempfile.TemporaryDirectory() as tmpdirname: + storage_dir = _load_repository_from_hf( + "philschmid/custom-pipeline-text-classification", tmpdirname, framework="tensorflow" + ) + h = get_inference_handler_either_custom_or_default_handler(str(storage_dir), task="custom") + assert h(INPUT) == INPUT + + +@require_tf +def test_tf_sentence_transformers_pipeline(): + # TODO should fail! because TF is not supported yet + with tempfile.TemporaryDirectory() as tmpdirname: + storage_dir = _load_repository_from_hf( + "sentence-transformers/all-MiniLM-L6-v2", tmpdirname, framework="tensorflow" + ) + with pytest.raises(Exception) as exc_info: + h = get_inference_handler_either_custom_or_default_handler(str(storage_dir), task="sentence-embeddings") + + assert "Unknown task sentence-embeddings" in str(exc_info.value) diff --git a/tests/unit/test_sentence_transformers.py b/tests/unit/test_sentence_transformers.py index b2cf6d32..233da490 100644 --- a/tests/unit/test_sentence_transformers.py +++ b/tests/unit/test_sentence_transformers.py @@ -16,6 +16,7 @@ ) +@require_torch def test_get_sentence_transformers_pipeline(): with tempfile.TemporaryDirectory() as tmpdirname: storage_dir = _load_repository_from_hf( @@ -25,6 +26,7 @@ def test_get_sentence_transformers_pipeline(): assert isinstance(pipe, SentenceEmbeddingPipeline) +@require_torch def test_sentence_embedding_task(): with tempfile.TemporaryDirectory() as tmpdirname: storage_dir = _load_repository_from_hf( @@ -35,6 +37,7 @@ def test_sentence_embedding_task(): assert isinstance(res["embeddings"], list) +@require_torch def test_sentence_similarity(): with tempfile.TemporaryDirectory() as tmpdirname: storage_dir = _load_repository_from_hf( @@ -45,6 +48,7 @@ def test_sentence_similarity(): assert isinstance(res["similarities"], list) +@require_torch def test_sentence_ranking(): with tempfile.TemporaryDirectory() as tmpdirname: storage_dir = _load_repository_from_hf("cross-encoder/ms-marco-MiniLM-L-6-v2", tmpdirname, framework="pytorch") diff --git a/tests/unit/test_utils.py b/tests/unit/test_utils.py index feecd3bb..7ed732ef 100644 --- a/tests/unit/test_utils.py +++ b/tests/unit/test_utils.py @@ -1,4 +1,5 @@ import os +from pathlib import Path import tempfile from transformers import pipeline @@ -28,9 +29,6 @@ def test_load_revision_repository_from_hf(): storage_folder = _load_repository_from_hf(MODEL, tmpdirname, revision=REVISION) # folder contains all config files and pytorch_model.bin folder_contents = os.listdir(storage_folder) - assert "pytorch_model.bin" in folder_contents - # filter framework - assert "tf_model.h5" in folder_contents # revision doesn't have tokenizer assert "tokenizer_config.json" not in folder_contents @@ -39,7 +37,10 @@ def test_load_revision_repository_from_hf(): def test_load_tensorflow_repository_from_hf(): MODEL = "lysandre/tiny-bert-random" with tempfile.TemporaryDirectory() as tmpdirname: - storage_folder = _load_repository_from_hf(MODEL, tmpdirname, framework="tensorflow") + tf_tmp = Path(tmpdirname).joinpath("tf") + tf_tmp.mkdir(parents=True, exist_ok=True) + + storage_folder = _load_repository_from_hf(MODEL, tf_tmp, framework="tensorflow") # folder contains all config files and pytorch_model.bin folder_contents = os.listdir(storage_folder) assert "pytorch_model.bin" not in folder_contents @@ -54,7 +55,10 @@ def test_load_tensorflow_repository_from_hf(): def test_load_onnx_repository_from_hf(): MODEL = "philschmid/distilbert-onnx-banking77" with tempfile.TemporaryDirectory() as tmpdirname: - storage_folder = _load_repository_from_hf(MODEL, tmpdirname, framework="onnx") + ox_tmp = Path(tmpdirname).joinpath("onnx") + ox_tmp.mkdir(parents=True, exist_ok=True) + + storage_folder = _load_repository_from_hf(MODEL, ox_tmp, framework="onnx") # folder contains all config files and pytorch_model.bin folder_contents = os.listdir(storage_folder) assert "pytorch_model.bin" not in folder_contents @@ -74,7 +78,10 @@ def test_load_onnx_repository_from_hf(): def test_load_pytorch_repository_from_hf(): MODEL = "lysandre/tiny-bert-random" with tempfile.TemporaryDirectory() as tmpdirname: - storage_folder = _load_repository_from_hf(MODEL, tmpdirname, framework="pytorch") + pt_tmp = Path(tmpdirname).joinpath("pt") + pt_tmp.mkdir(parents=True, exist_ok=True) + + storage_folder = _load_repository_from_hf(MODEL, pt_tmp, framework="pytorch") # folder contains all config files and pytorch_model.bin folder_contents = os.listdir(storage_folder) assert "pytorch_model.bin" in folder_contents @@ -107,6 +114,7 @@ def test_get_framework_tensorflow(): assert framework == "tensorflow" +@require_torch def test_get_pipeline(): with tempfile.TemporaryDirectory() as tmpdirname: storage_dir = _load_repository_from_hf(MODEL, tmpdirname, framework="pytorch") @@ -154,7 +162,7 @@ def test_local_custom_pipeline(): pipeline = check_and_register_custom_pipeline_from_directory(model_dir) payload = "test" assert pipeline.path == model_dir - assert pipeline(payload) == payload[::-1] + assert pipeline(payload) == payload[::-1] def test_remote_custom_pipeline():