diff --git a/.dockerignore b/.dockerignore new file mode 100644 index 00000000..2cb0b490 --- /dev/null +++ b/.dockerignore @@ -0,0 +1,10 @@ +.github +.pytest_cache +.ruff_cache +.tox +.venv +.gitignore +makefile +__pycache__ +tests +.vscode diff --git a/.github/workflows/build-container.yaml b/.github/workflows/build-container.yaml index 031207c0..fe12fbf6 100644 --- a/.github/workflows/build-container.yaml +++ b/.github/workflows/build-container.yaml @@ -19,7 +19,8 @@ jobs: uses: ./.github/workflows/docker-build-action.yaml with: image: inference-pytorch-cpu - dockerfile: dockerfiles/pytorch/cpu/Dockerfile + dockerfile: dockerfiles/pytorch/Dockerfile + build_args: "BASE_IMAGE=ubuntu:22.04" secrets: TAILSCALE_AUTHKEY: ${{ secrets.TAILSCALE_AUTHKEY }} REGISTRY_USERNAME: ${{ secrets.REGISTRY_USERNAME }} @@ -28,7 +29,7 @@ jobs: uses: ./.github/workflows/docker-build-action.yaml with: image: inference-pytorch-gpu - dockerfile: dockerfiles/pytorch/gpu/Dockerfile + dockerfile: dockerfiles/pytorch/Dockerfile secrets: TAILSCALE_AUTHKEY: ${{ secrets.TAILSCALE_AUTHKEY }} REGISTRY_USERNAME: ${{ secrets.REGISTRY_USERNAME }} diff --git a/.github/workflows/gpu-integ-test.yaml b/.github/workflows/gpu-integ-test.yaml deleted file mode 100644 index ede153ea..00000000 --- a/.github/workflows/gpu-integ-test.yaml +++ /dev/null @@ -1,116 +0,0 @@ -name: GPU - Run Integration Tests - -on: - push: - branches: - - main - pull_request: - workflow_dispatch: - -concurrency: - group: ${{ github.workflow }}-${{ github.head_ref || github.run_id }} - cancel-in-progress: true - - -jobs: - start-runner: - name: Start self-hosted EC2 runner - runs-on: ubuntu-latest - env: - AWS_REGION: us-east-1 - EC2_AMI_ID: ami-0dc1c26161f869ed1 - EC2_INSTANCE_TYPE: g4dn.xlarge - EC2_SUBNET_ID: subnet-859322b4,subnet-b7533b96,subnet-47cfad21,subnet-a396b2ad,subnet-06576a4b,subnet-df0f6180 - EC2_SECURITY_GROUP: sg-0bb210cd3ec725a13 - EC2_IAM_ROLE: optimum-ec2-github-actions-role - outputs: - label: ${{ steps.start-ec2-runner.outputs.label }} - ec2-instance-id: ${{ steps.start-ec2-runner.outputs.ec2-instance-id }} - steps: - - name: Configure AWS credentials - uses: aws-actions/configure-aws-credentials@v1 - with: - aws-access-key-id: ${{ secrets.AWS_ACCESS_KEY_ID }} - aws-secret-access-key: ${{ secrets.AWS_SECRET_ACCESS_KEY }} - aws-region: ${{ env.AWS_REGION }} - - name: Start EC2 runner - id: start-ec2-runner - uses: philschmid/philschmid-ec2-github-runner@main - with: - mode: start - github-token: ${{ secrets.GH_PERSONAL_ACCESS_TOKEN }} - ec2-image-id: ${{ env.EC2_AMI_ID }} - ec2-instance-type: ${{ env.EC2_INSTANCE_TYPE }} - subnet-id: ${{ env.EC2_SUBNET_ID }} - security-group-id: ${{ env.EC2_SECURITY_GROUP }} - iam-role-name: ${{ env.EC2_IAM_ROLE }} - aws-resource-tags: > # optional, requires additional permissions - [ - {"Key": "Name", "Value": "ec2-optimum-github-runner"}, - {"Key": "GitHubRepository", "Value": "${{ github.repository }}"} - ] - pytorch-integration-test: - needs: start-runner # required to start the main job when the runner is ready - runs-on: ${{ needs.start-runner.outputs.label }} # run the job on the newly created runner - env: - AWS_REGION: us-east-1 - steps: - - name: Checkout - uses: actions/checkout@v2 - - name: Set up Python 3.9 - uses: actions/setup-python@v2 - with: - python-version: 3.9 - - name: Install Python dependencies - run: pip install -e .[test,dev,torch] - - name: Build Docker - run: docker build -t starlette-transformers:gpu -f dockerfiles/pytorch/gpu/Dockerfile . - - name: Run Integration Tests - run: RUN_SLOW=True make integ-test - tensorflow-integration-test: - needs: - - start-runner - - pytorch-integration-test - runs-on: ${{ needs.start-runner.outputs.label }} # run the job on the newly created runner - env: - AWS_REGION: us-east-1 - steps: - - name: Checkout - uses: actions/checkout@v2 - - name: Set up Python 3.9 - uses: actions/setup-python@v2 - with: - python-version: 3.9 - - name: Uninstall pytorch - run: pip uninstall torch torchvision -y - - name: Install Python dependencies - run: pip install -e .[test,dev,tensorflow] - - name: Build Docker - run: docker build -t starlette-transformers:gpu -f dockerfiles/tensorflow/gpu/Dockerfile . - - name: Run Integration Tests - run: RUN_SLOW=True make integ-test - - stop-runner: - name: Stop self-hosted EC2 runner - needs: - - start-runner - - pytorch-integration-test - - tensorflow-integration-test - runs-on: ubuntu-latest - env: - AWS_REGION: us-east-1 - if: ${{ always() }} # required to stop the runner even if the error happened in the previous jobs - steps: - - name: Configure AWS credentials - uses: aws-actions/configure-aws-credentials@v1 - with: - aws-access-key-id: ${{ secrets.AWS_ACCESS_KEY_ID }} - aws-secret-access-key: ${{ secrets.AWS_SECRET_ACCESS_KEY }} - aws-region: ${{ env.AWS_REGION }} - - name: Stop EC2 runner - uses: philschmid/philschmid-ec2-github-runner@main - with: - mode: stop - github-token: ${{ secrets.GH_PERSONAL_ACCESS_TOKEN }} - label: ${{ needs.start-runner.outputs.label }} - ec2-instance-id: ${{ needs.start-runner.outputs.ec2-instance-id }} \ No newline at end of file diff --git a/.github/workflows/integ-test.yaml b/.github/workflows/integ-test.yaml deleted file mode 100644 index f6f6bba0..00000000 --- a/.github/workflows/integ-test.yaml +++ /dev/null @@ -1,51 +0,0 @@ -name: CPU - Run Integration Tests - -on: - push: - branches: - - main - pull_request: - workflow_dispatch: - -concurrency: - group: ${{ github.workflow }}-${{ github.head_ref || github.run_id }} - cancel-in-progress: true - - -jobs: - pytorch-integration-test: - runs-on: ubuntu-latest - steps: - - uses: actions/checkout@v2 - - name: Set up Python 3.9 - uses: actions/setup-python@v2 - with: - python-version: 3.9 - - name: Install Python dependencies - run: pip install -e .[test,dev,torch] - - name: Build Docker - run: docker build -t starlette-transformers:cpu -f dockerfiles/pytorch/cpu/Dockerfile . - - name: Run Integration Tests - env: - AWS_ACCESS_KEY_ID: ${{ secrets.AWS_ACCESS_KEY_ID }} - AWS_SECRET_ACCESS_KEY: ${{ secrets.AWS_SECRET_ACCESS_KEY }} - AWS_DEFAULT_REGION: us-east-1 - run: make integ-test - tensorflow-integration-test: - runs-on: ubuntu-latest - steps: - - uses: actions/checkout@v2 - - name: Set up Python 3.9 - uses: actions/setup-python@v2 - with: - python-version: 3.9 - - name: Install Python dependencies - run: pip install -e .[test,dev,tensorflow] - - name: Build Docker - run: docker build -t starlette-transformers:cpu -f dockerfiles/tensorflow/cpu/Dockerfile . - - name: Run Integration Tests - env: - AWS_ACCESS_KEY_ID: ${{ secrets.AWS_ACCESS_KEY_ID }} - AWS_SECRET_ACCESS_KEY: ${{ secrets.AWS_SECRET_ACCESS_KEY }} - AWS_DEFAULT_REGION: us-east-1 - run: make integ-test \ No newline at end of file diff --git a/.github/workflows/integration-test-action.yaml b/.github/workflows/integration-test-action.yaml new file mode 100644 index 00000000..2e3479fc --- /dev/null +++ b/.github/workflows/integration-test-action.yaml @@ -0,0 +1,69 @@ +on: + workflow_call: + inputs: + region: + type: string + required: false + default: "us-east-1" + hf_home: + required: false + type: string + default: "/mnt/hf_cache/" + hf_hub_cache: + required: false + type: string + default: "/mnt/hf_cache/hub" + run_slow: + required: false + type: string + default: "True" + test_path: + type: string + required: true + test_parallelism: + type: string + required: false + default: "4" + build_img_cmd: + type: string + required: false + default: "make inference-pytorch-gpu" + log_level: + type: string + required: false + default: "ERROR" + log_format: + type: string + required: false + default: "%(asctime)s %(levelname)s %(module)s:%(lineno)d %(message)s" + runs_on: + type: string + required: false + default: '["single-gpu", "nvidia-gpu", "t4", "ci"]' + +jobs: + pytorch-integration-tests: + runs-on: ${{ fromJson(inputs.runs_on) }} + env: + AWS_REGION: ${{ inputs.region }} + HF_HOME: ${{ inputs.hf_home }} + HF_HUB_CACHE: ${{ inputs.hf_hub_cache }} + RUN_SLOW: ${{ inputs.run_slow }} + steps: + - uses: actions/checkout@v4.1.1 + - name: Docker Setup Buildx + uses: docker/setup-buildx-action@v3.0.0 + - name: Docker Build + run: ${{ inputs.build_img_cmd }} + - name: Set up Python 3.11 + uses: actions/setup-python@v2 + with: + python-version: 3.11 + - name: Install dependencies + run: pip install ".[torch, test]" + - name: Run local integration tests + run: | + python -m pytest \ + ${{ inputs.test_path }} -n ${{ inputs.test_parallelism }} \ + --log-cli-level='${{ inputs.log_level }}' \ + --log-format='${{ inputs.log_format }}' \ No newline at end of file diff --git a/.github/workflows/integration-test.yaml b/.github/workflows/integration-test.yaml new file mode 100644 index 00000000..7aa1aa2f --- /dev/null +++ b/.github/workflows/integration-test.yaml @@ -0,0 +1,44 @@ +name: Run Integration Tests + +on: + push: + paths-ignore: + - 'README.md' + - '.github/workflows/unit-test.yaml' + - '.github/workflows/quality.yaml' + branches: + - main + pull_request: + workflow_dispatch: + +concurrency: + group: ${{ github.workflow }}-${{ github.head_ref || github.run_id }} + cancel-in-progress: true + +jobs: + pytorch-integration-local-gpu: + name: Local Integration Tests - GPU + uses: ./.github/workflows/integration-test-action.yaml + with: + test_path: "tests/integ/test_pytorch_local_gpu.py" + build_img_cmd: "make inference-pytorch-gpu" + pytorch-integration-remote-gpu: + name: Remote Integration Tests - GPU + uses: ./.github/workflows/integration-test-action.yaml + with: + test_path: "tests/integ/test_pytorch_remote_gpu.py" + build_img_cmd: "make inference-pytorch-gpu" + pytorch-integration-remote-cpu: + name: Remote Integration Tests - CPU + uses: ./.github/workflows/integration-test-action.yaml + with: + test_path: "tests/integ/test_pytorch_remote_cpu.py" + build_img_cmd: "make inference-pytorch-cpu" + runs_on: "['ci']" + pytorch-integration-local-cpu: + name: Local Integration Tests - CPU + uses: ./.github/workflows/integration-test-action.yaml + with: + test_path: "tests/integ/test_pytorch_local_cpu.py" + build_img_cmd: "make inference-pytorch-cpu" + runs_on: "['ci']" \ No newline at end of file diff --git a/.github/workflows/quality.yaml b/.github/workflows/quality.yaml index 6c7e6c57..09929fde 100644 --- a/.github/workflows/quality.yaml +++ b/.github/workflows/quality.yaml @@ -2,6 +2,8 @@ name: Quality Check on: push: + paths-ignore: + - 'README.md' branches: - main pull_request: @@ -16,10 +18,10 @@ jobs: runs-on: ubuntu-latest steps: - uses: actions/checkout@v2 - - name: Set up Python 3.9 + - name: Set up Python 3.11 uses: actions/setup-python@v2 with: - python-version: 3.9 + python-version: 3.11 - name: Install Python dependencies run: pip install -e .[quality] - name: Run Quality check diff --git a/.github/workflows/unit-test.yaml b/.github/workflows/unit-test.yaml index 599b8f7f..a15cca96 100644 --- a/.github/workflows/unit-test.yaml +++ b/.github/workflows/unit-test.yaml @@ -2,57 +2,47 @@ name: Run Unit-Tests on: push: + paths-ignore: + - 'README.md' branches: - - main + - main pull_request: workflow_dispatch: +env: + ACTIONS_RUNNER_DEBUG: true + ACTIONS_STEP_DEBUG: true + concurrency: group: ${{ github.workflow }}-${{ github.head_ref || github.run_id }} cancel-in-progress: true jobs: pytorch-unit-test: - runs-on: ubuntu-latest - steps: - - uses: actions/checkout@v2 - - name: Set up Python 3.9 - uses: actions/setup-python@v2 - with: - python-version: 3.9.12 - - name: Install Python dependencies - run: pip install -e .[test,dev,torch,st] - - uses: FedericoCarboni/setup-ffmpeg@v2 - id: setup-ffmpeg - - name: Run Unit test_const - run: python -m pytest -s -v ./tests/unit/test_const.py - - name: Run Unit test_handler - run: python -m pytest -s -v ./tests/unit/test_handler.py - - name: Run Unit test_sentence_transformers - run: python -m pytest -s -v ./tests/unit/test_sentence_transformers.py - - name: Run Unit test_serializer - run: python -m pytest -s -v ./tests/unit/test_serializer.py - - name: Run Unit test_utils - run: python -m pytest -s -v ./tests/unit/test_utils.py - tensorflow-unit-test: - runs-on: ubuntu-latest + runs-on: [single-gpu, nvidia-gpu, t4, ci] + env: + AWS_REGION: us-east-1 + CACHE_TEST_DIR: /mnt/hf_cache/hf-inference-toolkit-tests + RUN_SLOW: True steps: - - uses: actions/checkout@v2 - - name: Set up Python 3.9 - uses: actions/setup-python@v2 - with: - python-version: 3.9.12 - - name: Install Python dependencies - run: pip install -e .[test,dev,tensorflow] - - name: Run Unit test_const - run: python -m pytest -s -v ./tests/unit/test_const.py - - name: Run Unit test_handler - run: python -m pytest -s -v ./tests/unit/test_handler.py - - name: Run Unit test_sentence_transformers - run: python -m pytest -s -v ./tests/unit/test_sentence_transformers.py - - name: Run Unit test_serializer - run: python -m pytest -s -v ./tests/unit/test_serializer.py - - name: Run Unit test_utils - run: python -m pytest -s -v ./tests/unit/test_utils.py - - + - uses: actions/checkout@v4.1.1 + - name: Copy unit tests to cache mount + run: | + rm -rf ${{ env.CACHE_TEST_DIR }} && \ + mkdir ${{ env.CACHE_TEST_DIR }} && \ + cp -r tests ${{ env.CACHE_TEST_DIR }} + - name: Docker Setup Buildx + uses: docker/setup-buildx-action@v3.0.0 + - name: Docker Build + run: make inference-pytorch-gpu + - name: Run unit tests + run: | + docker run \ + -e RUN_SLOW='${{ env.RUN_SLOW }}' \ + --gpus all \ + -e CACHE_TEST_DIR='${{ env.CACHE_TEST_DIR }}' \ + -v ./tests:${{ env.CACHE_TEST_DIR }} \ + --entrypoint /bin/bash \ + integration-test-pytorch:gpu \ + -c "pip install '.[test, st, diffusers]' && pytest ${{ env.CACHE_TEST_DIR }}/unit" + \ No newline at end of file diff --git a/.gitignore b/.gitignore index 4042db87..bb0c387b 100644 --- a/.gitignore +++ b/.gitignore @@ -1,7 +1,15 @@ # Docker project generated files to ignore # if you want to ignore files created by your editor/tools, # please consider a global .gitignore https://help.github.com/articles/ignoring-files +.gitignore +.egg-info +.ruff_cache .vagrant* +.hcl +.terraform.lock.hcl +.terraform +pip-unpack-* +__pycache__ bin docker/docker .*.swp @@ -27,6 +35,9 @@ Vagrantfile __pycache__/ *.py[cod] *$py.class +.vscode +.make +tox.ini # C extensions *.so @@ -166,4 +177,6 @@ cython_debug/ .sagemaker model tests/tmp -tmp/ \ No newline at end of file +tmp/ +act.sh +.act \ No newline at end of file diff --git a/README.md b/README.md index fb469b1a..f2f66b40 100644 --- a/README.md +++ b/README.md @@ -24,25 +24,23 @@ HF_MODEL_ID=hf-internal-testing/tiny-random-distilbert HF_MODEL_DIR=tmp2 HF_TASK _cpu images_ ```bash -docker build -t starlette-transformers:cpu -f dockerfiles/pytorch/cpu/Dockerfile . -docker build -t starlette-transformers:cpu -f dockerfiles/tensorflow/cpu/Dockerfile . +make inference-pytorch-cpu ``` _gpu images_ ```bash -docker build -t starlette-transformers:gpu -f dockerfiles/pytorch/gpu/Dockerfile . -docker build -t starlette-transformers:gpu -f dockerfiles/tensorflow/gpu/Dockerfile . +make inference-pytorch-gpu ``` 2. Run the container and provide either environment variables to the HUB model you want to use or mount a volume to the container, where your model is stored. ```bash -docker run -ti -p 5000:5000 -e HF_MODEL_ID=distilbert-base-uncased-distilled-squad -e HF_TASK=question-answering starlette-transformers:cpu -docker run -ti -p 5000:5000 --gpus all -e HF_MODEL_ID=nlpconnect/vit-gpt2-image-captioning -e HF_TASK=image-to-text starlette-transformers:gpu -docker run -ti -p 5000:5000 --gpus all -e HF_MODEL_ID=echarlaix/tiny-random-stable-diffusion-xl -e HF_TASK=text-to-image starlette-transformers:gpu -docker run -ti -p 5000:5000 --gpus all -e HF_MODEL_ID=stabilityai/stable-diffusion-xl-base-1.0 -e HF_TASK=text-to-image starlette-transformers:gpu -docker run -ti -p 5000:5000 -e HF_MODEL_DIR=/repository -v $(pwd)/distilbert-base-uncased-emotion:/repository starlette-transformers:cpu +docker run -ti -p 5000:5000 -e HF_MODEL_ID=distilbert-base-uncased-distilled-squad -e HF_TASK=question-answering integration-test-pytorch:cpu +docker run -ti -p 5000:5000 --gpus all -e HF_MODEL_ID=nlpconnect/vit-gpt2-image-captioning -e HF_TASK=image-to-text integration-test-pytorch:gpu +docker run -ti -p 5000:5000 --gpus all -e HF_MODEL_ID=echarlaix/tiny-random-stable-diffusion-xl -e HF_TASK=text-to-image integration-test-pytorch:gpu +docker run -ti -p 5000:5000 --gpus all -e HF_MODEL_ID=stabilityai/stable-diffusion-xl-base-1.0 -e HF_TASK=text-to-image integration-test-pytorch:gpu +docker run -ti -p 5000:5000 -e HF_MODEL_DIR=/repository -v $(pwd)/distilbert-base-uncased-emotion:/repository integration-test-pytorch:cpu ``` @@ -184,7 +182,17 @@ Below you ll find a list of supported and tested transformers and sentence trans --- ## 🤝 Contributing -TBD. +### Development + +* Recommended Python version: 3.11 +* We recommend `pyenv` for easily switching between different Python versions +* There are two options for unit and integration tests: + * `Make` - see `makefile` + +#### Testing with Make + +* Unit Testing: `make unit-test` +* Integration testing: `make integ-test` --- ## 📜 License diff --git a/dockerfiles/pytorch/Dockerfile b/dockerfiles/pytorch/Dockerfile new file mode 100644 index 00000000..8e4c4d35 --- /dev/null +++ b/dockerfiles/pytorch/Dockerfile @@ -0,0 +1,48 @@ +ARG BASE_IMAGE=nvidia/cuda:12.1.0-devel-ubuntu22.04 + +FROM $BASE_IMAGE +SHELL ["/bin/bash", "-c"] + +LABEL maintainer="Hugging Face" + +ENV DEBIAN_FRONTEND=noninteractive + +WORKDIR /app + +RUN apt-get update && \ + apt-get install software-properties-common -y && \ + add-apt-repository ppa:deadsnakes/ppa && \ + apt-get -y upgrade --only-upgrade systemd openssl cryptsetup && \ + apt-get install -y \ + build-essential \ + bzip2 \ + curl \ + git \ + git-lfs \ + tar \ + gcc \ + g++ \ + cmake \ + libprotobuf-dev \ + protobuf-compiler \ + python3-dev \ + python3-pip \ + python3.11 \ + libsndfile1-dev \ + ffmpeg \ + && apt-get clean autoremove --yes \ + && rm -rf /var/lib/{apt,dpkg,cache,log} +# Copying only necessary files as filtered by .dockerignore +COPY . . + +# install wheel and setuptools +RUN pip install --no-cache-dir -U pip ".[torch, st, diffusers]" + +# copy application +COPY src/huggingface_inference_toolkit huggingface_inference_toolkit +COPY src/huggingface_inference_toolkit/webservice_starlette.py webservice_starlette.py + +# copy entrypoint and change permissions +COPY --chmod=0755 scripts/entrypoint.sh entrypoint.sh + +ENTRYPOINT ["bash", "-c", "./entrypoint.sh"] \ No newline at end of file diff --git a/dockerfiles/pytorch/cpu/Dockerfile b/dockerfiles/pytorch/cpu/Dockerfile deleted file mode 100644 index 61e573b4..00000000 --- a/dockerfiles/pytorch/cpu/Dockerfile +++ /dev/null @@ -1,52 +0,0 @@ -FROM ubuntu:22.04 - -LABEL maintainer="Hugging Face" - -ENV DEBIAN_FRONTEND=noninteractive - -RUN apt-get update \ - && apt-get -y upgrade --only-upgrade systemd openssl cryptsetup \ - && apt-get install -y \ - bzip2 \ - curl \ - git \ - git-lfs \ - tar \ - gcc \ - g++ \ - # audio - libsndfile1-dev \ - ffmpeg \ - && apt-get clean autoremove --yes \ - && rm -rf /var/lib/{apt,dpkg,cache,log} - -# install micromamba -ENV MAMBA_ROOT_PREFIX=/opt/conda -ENV PATH=/opt/conda/bin:$PATH -RUN curl -L https://micro.mamba.pm/api/micromamba/linux-64/latest | tar -xj "bin/micromamba" \ - && touch /root/.bashrc \ - && ./bin/micromamba shell init -s bash -p /opt/conda \ - && grep -v '[ -z "\$PS1" ] && return' /root/.bashrc > /opt/conda/bashrc - -WORKDIR /app - -# install base python dependencies -COPY dockerfiles/pytorch/cpu/environment.yaml /app/environment.yaml -RUN micromamba install -y -n base -f environment.yaml \ - && rm environment.yaml \ - && micromamba clean --all --yes - -# install huggingface inference toolkit -COPY requirements.txt /tmp/requirements.txt -RUN pip install --no-cache-dir -r /tmp/requirements.txt && rm /tmp/requirements.txt - -# copy application -COPY src/huggingface_inference_toolkit huggingface_inference_toolkit -COPY src/huggingface_inference_toolkit/webservice_starlette.py webservice_starlette.py - -# copy entrypoint and change permissions -COPY scripts/entrypoint.sh entrypoint.sh -RUN chmod +x entrypoint.sh - -# run app -ENTRYPOINT ["/bin/bash", "entrypoint.sh"] diff --git a/dockerfiles/pytorch/cpu/environment.yaml b/dockerfiles/pytorch/cpu/environment.yaml deleted file mode 100644 index 4bd1b693..00000000 --- a/dockerfiles/pytorch/cpu/environment.yaml +++ /dev/null @@ -1,13 +0,0 @@ -name: base -channels: -- conda-forge -dependencies: -- python=3.9.13 -- pytorch::pytorch=1.13.1=py3.9_cpu_0 -- pip: - - transformers[sklearn,sentencepiece,audio,vision]==4.31.0 - - sentence_transformers==2.2.2 - - torchvision==0.14.1 - - diffusers==0.20.0 - - accelerate==0.21.0 - - safetensors \ No newline at end of file diff --git a/dockerfiles/pytorch/gpu/Dockerfile b/dockerfiles/pytorch/gpu/Dockerfile deleted file mode 100644 index 1a3941a7..00000000 --- a/dockerfiles/pytorch/gpu/Dockerfile +++ /dev/null @@ -1,54 +0,0 @@ -FROM nvidia/cuda:11.7.1-devel-ubuntu22.04 - -LABEL maintainer="Hugging Face" - -ENV DEBIAN_FRONTEND=noninteractive - -RUN apt-get update \ - && apt-get -y upgrade --only-upgrade systemd openssl cryptsetup \ - && apt-get install -y \ - bzip2 \ - curl \ - git \ - git-lfs \ - tar \ - gcc \ - g++ \ - # audio - libsndfile1-dev \ - ffmpeg \ - && apt-get clean autoremove --yes \ - && rm -rf /var/lib/{apt,dpkg,cache,log} - -# install micromamba -ENV MAMBA_ROOT_PREFIX=/opt/conda -ENV PATH=/opt/conda/bin:$PATH -ENV LD_LIBRARY_PATH="/opt/conda/lib:${LD_LIBRARY_PATH}" - -RUN curl -L https://micro.mamba.pm/api/micromamba/linux-64/latest | tar -xj "bin/micromamba" \ - && touch /root/.bashrc \ - && ./bin/micromamba shell init -s bash -p /opt/conda \ - && grep -v '[ -z "\$PS1" ] && return' /root/.bashrc > /opt/conda/bashrc - -WORKDIR /app - -# install base python dependencies -COPY dockerfiles/pytorch/gpu/environment.yaml /app/environment.yaml -RUN micromamba install -y -n base -f environment.yaml \ - && rm environment.yaml \ - && micromamba clean --all --yes - -# install huggingface inference toolkit -COPY requirements.txt /tmp/requirements.txt -RUN pip install --no-cache-dir -r /tmp/requirements.txt && rm /tmp/requirements.txt - -# copy application -COPY src/huggingface_inference_toolkit huggingface_inference_toolkit -COPY src/huggingface_inference_toolkit/webservice_starlette.py webservice_starlette.py - -# copy entrypoint and change permissions -COPY scripts/entrypoint.sh entrypoint.sh -RUN chmod +x entrypoint.sh - -# run app -ENTRYPOINT ["/bin/bash", "entrypoint.sh"] diff --git a/dockerfiles/pytorch/gpu/environment.yaml b/dockerfiles/pytorch/gpu/environment.yaml deleted file mode 100644 index 8c1012f7..00000000 --- a/dockerfiles/pytorch/gpu/environment.yaml +++ /dev/null @@ -1,14 +0,0 @@ -name: base -channels: -- conda-forge -dependencies: -- python=3.9.13 -- nvidia::cudatoolkit=11.7 -- pytorch::pytorch=1.13.1=py3.9_cuda11.7* -- pip: - - transformers[sklearn,sentencepiece,audio,vision]==4.31.0 - - sentence_transformers==2.2.2 - - torchvision==0.14.1 - - diffusers==0.20.0 - - accelerate==0.21.0 - - safetensors \ No newline at end of file diff --git a/dockerfiles/tensorflow/cpu/Dockerfile b/dockerfiles/tensorflow/cpu/Dockerfile index c52abf13..d16010bb 100644 --- a/dockerfiles/tensorflow/cpu/Dockerfile +++ b/dockerfiles/tensorflow/cpu/Dockerfile @@ -14,6 +14,7 @@ RUN apt-get update \ tar \ gcc \ g++ \ + cmake \ # audio libsndfile1-dev \ ffmpeg \ @@ -49,4 +50,4 @@ COPY scripts/entrypoint.sh entrypoint.sh RUN chmod +x entrypoint.sh # run app -ENTRYPOINT ["/bin/bash", "entrypoint.sh"] +ENTRYPOINT ["/bin/bash", "entrypoint.sh"] \ No newline at end of file diff --git a/dockerfiles/tensorflow/gpu/Dockerfile b/dockerfiles/tensorflow/gpu/Dockerfile index d989111c..02018371 100644 --- a/dockerfiles/tensorflow/gpu/Dockerfile +++ b/dockerfiles/tensorflow/gpu/Dockerfile @@ -15,6 +15,7 @@ RUN apt-get update \ tar \ gcc \ g++ \ + cmake \ # audio libsndfile1-dev \ ffmpeg \ @@ -33,6 +34,11 @@ RUN curl -L https://micro.mamba.pm/api/micromamba/linux-64/latest | tar -xj "bin WORKDIR /app +RUN curl -LsSf https://astral.sh/uv/install.sh | sh && \ + source $HOME/.cargo/env && \ + source .venv/bin/activate && \ + ls -all + # install base python dependencies COPY dockerfiles/tensorflow/gpu/environment.yaml /app/environment.yaml RUN micromamba install -y -n base -f environment.yaml \ @@ -43,6 +49,9 @@ RUN micromamba install -y -n base -f environment.yaml \ COPY requirements.txt /tmp/requirements.txt RUN pip install --no-cache-dir -r /tmp/requirements.txt && rm /tmp/requirements.txt +# copy tests +COPY . /tmp/hf-inference-test + # copy application COPY src/huggingface_inference_toolkit huggingface_inference_toolkit COPY src/huggingface_inference_toolkit/webservice_starlette.py webservice_starlette.py @@ -52,4 +61,4 @@ COPY scripts/entrypoint.sh entrypoint.sh RUN chmod +x entrypoint.sh # run app -ENTRYPOINT ["/bin/bash", "entrypoint.sh"] +ENTRYPOINT ["/bin/bash", "entrypoint.sh"] \ No newline at end of file diff --git a/makefile b/makefile index 49855723..a9490428 100644 --- a/makefile +++ b/makefile @@ -5,10 +5,10 @@ check_dirs := src # run tests unit-test: - python3 -m pytest -s -v ./tests/unit + RUN_SLOW=True python3 -m pytest -s -v tests/unit -n 10 --log-cli-level='ERROR' integ-test: - python3 -m pytest -s -v ./tests/integ/ + python3 -m pytest -s -v tests/integ/ # Check that source code meets quality standards @@ -18,4 +18,13 @@ quality: # Format source code automatically style: - ruff $(check_dirs) --fix \ No newline at end of file + ruff $(check_dirs) --fix + +inference-pytorch-gpu: + docker build -f dockerfiles/pytorch/Dockerfile -t integration-test-pytorch:gpu . + +inference-pytorch-cpu: + docker build --build-arg="BASE_IMAGE=ubuntu:22.04" -f dockerfiles/pytorch/Dockerfile -t integration-test-pytorch:cpu . + +stop-all: + docker stop $$(docker ps -a -q) && docker container prune --force \ No newline at end of file diff --git a/pyproject.toml b/pyproject.toml index 96ef9084..a692967f 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -4,7 +4,7 @@ no_implicit_optional = true scripts_are_modules = true [tool.ruff] -select = [ +lint.select = [ "E", # pycodestyle errors "W", # pycodestyle warnings "F", # pyflakes @@ -12,8 +12,8 @@ select = [ "C", # flake8-comprehensions "B", # flake8-bugbear ] -ignore = [ - "E501", # line too long, handled by black +lint.ignore = [ + "E501", # Line length (handled by ruff-format) "B008", # do not perform function calls in argument defaults "C901", # too complex ] @@ -21,14 +21,13 @@ ignore = [ line-length = 119 # Allow unused variables when underscore-prefixed. -dummy-variable-rgx = "^(_+|(_+[a-zA-Z0-9_]*[a-zA-Z0-9]+?))$" +lint.dummy-variable-rgx = "^(_+|(_+[a-zA-Z0-9_]*[a-zA-Z0-9]+?))$" -# Assume Python 3.8. -target-version = "py39" +# Assume Python 3.11. +target-version = "py311" -[tool.ruff.per-file-ignores] -"__init__.py" = ["F401"] +lint.per-file-ignores = {"__init__.py" = ["F401"]} [tool.isort] profile = "black" -known_third_party = ["transforemrs", "starlette", "huggingface_hub"] +known_third_party = ["transformers", "starlette", "huggingface_hub"] \ No newline at end of file diff --git a/requirements.txt b/requirements.txt index 8a178f8d..e69de29b 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,5 +0,0 @@ -orjson -starlette -uvicorn -pandas -huggingface_hub>=0.13.2 \ No newline at end of file diff --git a/setup.cfg b/setup.cfg index 924033ba..21085086 100644 --- a/setup.cfg +++ b/setup.cfg @@ -12,7 +12,6 @@ known_third_party = torch robyn - line_length = 119 lines_after_imports = 2 multi_line_output = 3 diff --git a/setup.py b/setup.py index 92132915..bdd64fba 100644 --- a/setup.py +++ b/setup.py @@ -1,12 +1,11 @@ from __future__ import absolute_import -import os from datetime import date from setuptools import find_packages, setup # We don't declare our dependency on transformers here because we build with # different packages for different variants -VERSION = "0.1.0" +VERSION = "0.3.0" # Ubuntu packages @@ -15,33 +14,33 @@ # libavcodec-extra : libavcodec-extra inculdes additional codecs for ffmpeg install_requires = [ - # transformers - "transformers[sklearn,sentencepiece]>=4.25.1", - "huggingface_hub>=0.13.3", - # api stuff + "wheel==0.42.0", + "setuptools==69.1.0", + "cmake==3.28.3", + "transformers[sklearn,sentencepiece, audio, vision]==4.38.1", + "huggingface_hub==0.20.3", "orjson", - # "robyn", # vision "Pillow", - # speech + torchaudio "librosa", + # speech + torchaudio "pyctcdecode>=0.3.0", "phonemizer", + "ffmpeg", + # web api + "starlette", + "uvicorn", + "pandas" ] extras = {} -extras["st"] = ["sentence_transformers"] -extras["diffusers"] = ["diffusers==0.8.1", "accelerate==0.14.0"] - - -# Hugging Face specific dependencies -# framework specific dependencies -extras["torch"] = ["torch>=1.8.0", "torchaudio"] -extras["tensorflow"] = ["tensorflow==2.9.0"] -# test and quality +extras["st"] = ["sentence_transformers==2.4.0"] +extras["diffusers"] = ["diffusers==0.26.3", "accelerate==0.27.2"] +extras["torch"] = ["torch==2.2.0", "torchvision", "torchaudio"] +extras["tensorflow"] = ["tensorflow"] extras["test"] = [ - "pytest", + "pytest==7.2.1", "pytest-xdist", "parameterized", "psutil", @@ -50,12 +49,11 @@ "mock==2.0.0", "docker", "requests", + "tenacity" ] extras["quality"] = [ - "black", "isort", - "flake8", - "ruff", + "ruff" ] setup( @@ -63,9 +61,6 @@ version=VERSION, author="HuggingFace", description=".", - # long_description=open("README.md", "r", encoding="utf-8").read(), - # long_description_content_type="text/markdown", - # keywords="NLP deep-learning transformer pytorch tensorflow BERT GPT GPT-2 AWS Amazon SageMaker Cloud", url="", package_dir={"": "src"}, packages=find_packages(where="src"), @@ -82,7 +77,7 @@ "License :: OSI Approved :: Apache Software License", "Operating System :: OS Independent", "Programming Language :: Python :: 3", - "Programming Language :: Python :: 3.9", + "Programming Language :: Python :: 3.11", "Topic :: Scientific/Engineering :: Artificial Intelligence", ], ) diff --git a/src/huggingface_inference_toolkit/diffusers_utils.py b/src/huggingface_inference_toolkit/diffusers_utils.py index 7068df9d..521a85df 100644 --- a/src/huggingface_inference_toolkit/diffusers_utils.py +++ b/src/huggingface_inference_toolkit/diffusers_utils.py @@ -32,6 +32,7 @@ def __init__(self, model_dir: str, device: str = None): # needs "cuda" for GPU self.pipeline.scheduler = DPMSolverMultistepScheduler.from_config(self.pipeline.scheduler.config) except Exception: pass + self.pipeline.to(device) def __call__( diff --git a/src/huggingface_inference_toolkit/handler.py b/src/huggingface_inference_toolkit/handler.py index 097a12c9..08368326 100644 --- a/src/huggingface_inference_toolkit/handler.py +++ b/src/huggingface_inference_toolkit/handler.py @@ -10,11 +10,16 @@ class HuggingFaceHandler: """ - A Default Hugging Face Inference Handler which works with all transformers pipelines, Sentence Transformers and Optimum. + A Default Hugging Face Inference Handler which works with all + transformers pipelines, Sentence Transformers and Optimum. """ - def __init__(self, model_dir: Union[str, Path], task=None): - self.pipeline = get_pipeline(model_dir=model_dir, task=task) + def __init__(self, model_dir: Union[str, Path], task=None, framework="pt"): + self.pipeline = get_pipeline( + model_dir=model_dir, + task=task, + framework=framework + ) def __call__(self, data): """ @@ -25,6 +30,7 @@ def __call__(self, data): """ inputs = data.pop("inputs", data) parameters = data.pop("parameters", None) + # pass inputs with all kwargs in data if parameters is not None: prediction = self.pipeline(inputs, **parameters) @@ -34,7 +40,10 @@ def __call__(self, data): return prediction -def get_inference_handler_either_custom_or_default_handler(model_dir: Path, task: Optional[str] = None): +def get_inference_handler_either_custom_or_default_handler( + model_dir: Path, + task: Optional[str] = None +): """ get inference handler either custom or default Handler """ diff --git a/src/huggingface_inference_toolkit/sentence_transformers_utils.py b/src/huggingface_inference_toolkit/sentence_transformers_utils.py index 2a3c0055..72bb2ee2 100644 --- a/src/huggingface_inference_toolkit/sentence_transformers_utils.py +++ b/src/huggingface_inference_toolkit/sentence_transformers_utils.py @@ -47,7 +47,12 @@ def __call__(self, inputs): } -def get_sentence_transformers_pipeline(task=None, model_dir=None, device=-1, **kwargs): +def get_sentence_transformers_pipeline( + task=None, + model_dir=None, + device=-1, + **kwargs +): device = "cuda" if device == 0 else "cpu" pipeline = SENTENCE_TRANSFORMERS_TASKS[task](model_dir=model_dir, device=device) return pipeline diff --git a/src/huggingface_inference_toolkit/serialization/base.py b/src/huggingface_inference_toolkit/serialization/base.py index eb965b64..dc7d6839 100644 --- a/src/huggingface_inference_toolkit/serialization/base.py +++ b/src/huggingface_inference_toolkit/serialization/base.py @@ -42,15 +42,21 @@ def get_deserializer(content_type): if content_type in content_type_mapping: return content_type_mapping[content_type] else: - raise Exception( - f'Content type "{content_type}" not supported. Supported content types are: {", ".join(list(content_type_mapping.keys()))}' - ) + message = f""" + Content type "{content_type}" not supported. + Supported content types are: + {", ".join(list(content_type_mapping.keys()))} + """ + raise Exception(message) @staticmethod def get_serializer(accept): if accept in content_type_mapping: return content_type_mapping[accept] else: - raise Exception( - f'Accept type "{accept}" not supported. Supported accept types are: {", ".join(list(content_type_mapping.keys()))}' - ) + message = f""" + Accept type "{accept}" not supported. + Supported accept types are: + {", ".join(list(content_type_mapping.keys()))} + """ + raise Exception(message) diff --git a/src/huggingface_inference_toolkit/utils.py b/src/huggingface_inference_toolkit/utils.py index ffe8d2c3..1570317b 100644 --- a/src/huggingface_inference_toolkit/utils.py +++ b/src/huggingface_inference_toolkit/utils.py @@ -7,7 +7,7 @@ from huggingface_hub import HfApi, login, snapshot_download from transformers import WhisperForConditionalGeneration, pipeline from transformers.file_utils import is_tf_available, is_torch_available -from transformers.pipelines import Conversation, Pipeline +from transformers.pipelines import Pipeline from huggingface_inference_toolkit.const import HF_DEFAULT_PIPELINE_NAME, HF_MODULE_NAME from huggingface_inference_toolkit.diffusers_utils import ( @@ -75,19 +75,12 @@ def wrap_conversation_pipeline(pipeline): """ def wrapped_pipeline(inputs, *args, **kwargs): - converted_input = Conversation( - inputs["text"], - past_user_inputs=inputs.get("past_user_inputs", []), - generated_responses=inputs.get("generated_responses", []), - ) - prediction = pipeline(converted_input, *args, **kwargs) - return { - "generated_text": prediction.generated_responses[-1], - "conversation": { - "past_user_inputs": prediction.past_user_inputs, - "generated_responses": prediction.generated_responses, - }, - } + logger.info(f"Inputs: {inputs}") + logger.info(f"Args: {args}") + logger.info(f"KWArgs: {kwargs}") + prediction = pipeline(inputs, *args, **kwargs) + logger.info(f"Prediction: {prediction}") + return list(prediction) return wrapped_pipeline @@ -112,6 +105,7 @@ def _get_framework(): """ extracts which DL framework is used for inference, if both are installed use pytorch """ + if is_torch_available(): return "pytorch" elif is_tf_available(): @@ -134,6 +128,7 @@ def _load_repository_from_hf( """ Load a model from huggingface hub. """ + if hf_hub_token is not None: login(token=hf_hub_token) @@ -157,13 +152,14 @@ def _load_repository_from_hf( ignore_regex = create_artifact_filter(framework) logger.info(f"Ignore regex pattern for files, which are not downloaded: { ', '.join(ignore_regex) }") - # Download the repository to the workdir and filter out non-framework specific weights + # Download the repository to the workdir and filter out non-framework + # specific weights snapshot_download( - repository_id, - revision=revision, - local_dir=str(target_dir), - local_dir_use_symlinks=False, - ignore_patterns=ignore_regex, + repo_id = repository_id, + revision = revision, + local_dir = str(target_dir), + local_dir_use_symlinks = False, + ignore_patterns = ignore_regex, ) return target_dir @@ -188,9 +184,12 @@ def check_and_register_custom_pipeline_from_directory(model_dir): spec.loader.exec_module(handler) # init custom handler with model_dir custom_pipeline = handler.EndpointHandler(model_dir) + elif legacy_module.is_file(): logger.warning( - "You are using a legacy custom pipeline with. Please update to the new format. See documentation for more information." + """You are using a legacy custom pipeline. + Please update to the new format. + See documentation for more information.""" ) spec = importlib.util.spec_from_file_location("pipeline.PreTrainedPipeline", legacy_module) if spec: @@ -212,13 +211,20 @@ def get_device(): """ The get device function will return the device for the DL Framework. """ - if _is_gpu_available(): + gpu = _is_gpu_available() + + if gpu: return 0 else: return -1 -def get_pipeline(task: str, model_dir: Path, **kwargs) -> Pipeline: +def get_pipeline( + task: str, + model_dir: Path, + framework = "pytorch", + **kwargs, +) -> Pipeline: """ create pipeline class for a specific task based on local saved model """ @@ -229,7 +235,8 @@ def get_pipeline(task: str, model_dir: Path, **kwargs) -> Pipeline: raise EnvironmentError( "The task for this model is not set: Please set one: https://huggingface.co/docs#how-is-a-models-type-of-inference-api-and-widget-determined" ) - # define tokenizer or feature extractor as kwargs to load it the pipeline correctly + # define tokenizer or feature extractor as kwargs to load it the pipeline + # correctly if task in { "automatic-speech-recognition", "image-segmentation", @@ -244,37 +251,50 @@ def get_pipeline(task: str, model_dir: Path, **kwargs) -> Pipeline: else: kwargs["tokenizer"] = model_dir - # add check for optimum accelerated pipeline if is_optimum_available(): - # TODO: add check for optimum accelerated pipeline - logger.info("Optimum is not implement yet using default pipeline.") + logger.info("Optimum is not implemented yet using default pipeline.") hf_pipeline = pipeline(task=task, model=model_dir, device=device, **kwargs) elif is_sentence_transformers_available() and task in [ "sentence-similarity", "sentence-embeddings", "sentence-ranking", ]: - hf_pipeline = get_sentence_transformers_pipeline(task=task, model_dir=model_dir, device=device, **kwargs) + hf_pipeline = get_sentence_transformers_pipeline( + task=task, + model_dir=model_dir, + device=device, + **kwargs + ) elif is_diffusers_available() and task == "text-to-image": - hf_pipeline = get_diffusers_pipeline(task=task, model_dir=model_dir, device=device, **kwargs) + hf_pipeline = get_diffusers_pipeline( + task=task, + model_dir=model_dir, + device=device, + **kwargs + ) else: - hf_pipeline = pipeline(task=task, model=model_dir, device=device, **kwargs) + hf_pipeline = pipeline( + task=task, + model=model_dir, + device=device, + **kwargs + ) - # wrapp specific pipeline to support better ux + # wrap specific pipeline to support better ux if task == "conversational": hf_pipeline = wrap_conversation_pipeline(hf_pipeline) - elif task == "automatic-speech-recognition" and isinstance(hf_pipeline.model, WhisperForConditionalGeneration): + + elif task == "automatic-speech-recognition" and isinstance( + hf_pipeline.model, + WhisperForConditionalGeneration + ): # set chunk length to 30s for whisper to enable long audio files hf_pipeline._preprocess_params["chunk_length_s"] = 30 - hf_pipeline._preprocess_params["ignore_warning"] = True - # set decoder to english by default - # TODO: replace when transformers 4.26.0 is release with - # hf_pipeline.model.config.forced_decoder_ids = pipe.tokenizer.get_decoder_prompt_ids(language=lang, task="transcribe") - hf_pipeline.tokenizer.language = "english" - hf_pipeline.tokenizer.task = "transcribe" - hf_pipeline.model.config.forced_decoder_ids = [ - (rank + 1, token) for rank, token in enumerate(hf_pipeline.tokenizer.prefix_tokens[1:]) - ] + hf_pipeline.model.config.forced_decoder_ids = hf_pipeline.tokenizer.get_decoder_prompt_ids( + language="english", + task="transcribe" + ) + return hf_pipeline diff --git a/src/huggingface_inference_toolkit/webservice_robyn.py b/src/huggingface_inference_toolkit/webservice_robyn.py index a1c437af..5aeaf605 100644 --- a/src/huggingface_inference_toolkit/webservice_robyn.py +++ b/src/huggingface_inference_toolkit/webservice_robyn.py @@ -21,7 +21,10 @@ # if empty_directory_or_not_hf_remote_id is None or task is None: # raise ValueError( -# f"Can't initialize model. Please set correct model id and task. provided values are model_id:{model_id_or_path} and task:{task}" +# f"""Can't initialize model. +# Please set correct model id and task. +# Provided values are model_id: +# {model_id_or_path} and task:{task}""" # ) # logger.info(f"Initializing model with model_id:{model_id_or_path} and task:{task}") diff --git a/src/huggingface_inference_toolkit/webservice_starlette.py b/src/huggingface_inference_toolkit/webservice_starlette.py index 64935925..8bc68b2e 100644 --- a/src/huggingface_inference_toolkit/webservice_starlette.py +++ b/src/huggingface_inference_toolkit/webservice_starlette.py @@ -49,7 +49,10 @@ async def some_startup_task(): ) else: raise ValueError( - f"Can't initialize model. Please set env HF_MODEL_DIR or provider a HF_MODEL_ID. Provided values are HF_MODEL_DIR:{HF_MODEL_DIR} and HF_MODEL_ID:{HF_MODEL_ID}" + f"""Can't initialize model. + Please set env HF_MODEL_DIR or provider a HF_MODEL_ID. + Provided values are: + HF_MODEL_DIR: {HF_MODEL_DIR} and HF_MODEL_ID:{HF_MODEL_ID}""" ) logger.info(f"Initializing model from directory:{HF_MODEL_DIR}") diff --git a/tests/__init__.py b/tests/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/tests/integ/config.py b/tests/integ/config.py index 467afde2..b1d4d605 100644 --- a/tests/integ/config.py +++ b/tests/integ/config.py @@ -1,6 +1,6 @@ import os -from integ.utils import ( +from tests.integ.utils import ( validate_automatic_speech_recognition, validate_classification, validate_feature_extraction, @@ -14,6 +14,8 @@ validate_text_to_image, validate_translation, validate_zero_shot_classification, + validate_custom, + validate_conversational ) @@ -63,32 +65,25 @@ "tensorflow": "hf-internal-testing/tiny-random-vit", }, "automatic-speech-recognition": { - "pytorch": "hf-internal-testing/tiny-random-wav2vec2", + "pytorch": "hf-internal-testing/tiny-random-Wav2Vec2Model", "tensorflow": None, }, "audio-classification": { - "pytorch": "hf-internal-testing/tiny-random-wavlm", + "pytorch": "hf-internal-testing/tiny-random-WavLMModel", "tensorflow": None, }, "object-detection": { "pytorch": "hustvl/yolos-tiny", "tensorflow": None, }, - "image-segmentation": { - "pytorch": "hf-internal-testing/tiny-random-beit-pipeline", - "tensorflow": None, - }, - "table-question-answering": { - "pytorch": "philschmid/tapex-tiny", - "tensorflow": None, - }, "zero-shot-image-classification": { "pytorch": "hf-internal-testing/tiny-random-clip-zero-shot-image-classification", "tensorflow": "hf-internal-testing/tiny-random-clip-zero-shot-image-classification", }, "conversational": { - "pytorch": "hf-internal-testing/tiny-random-blenderbot", - "tensorflow": "hf-internal-testing/tiny-random-blenderbot", + #"pytorch": "hf-internal-testing/tiny-random-blenderbot-small", + "pytorch": "microsoft/DialoGPT-small", + "tensorflow": None, }, "sentence-similarity": { "pytorch": "sentence-transformers/all-MiniLM-L6-v2", @@ -106,6 +101,14 @@ "pytorch": "hf-internal-testing/tiny-stable-diffusion-torch", "tensorflow": None, }, + "table-question-answering": { + "pytorch": "philschmid/tapex-tiny", + "tensorflow": None, + }, + "image-segmentation": { + "pytorch": "hf-internal-testing/tiny-random-beit-pipeline", + "tensorflow": None, + }, } @@ -149,19 +152,27 @@ }, } }, - "conversational": { - "inputs": { - "past_user_inputs": ["Which movie is the best ?"], - "generated_responses": ["It's Die Hard for sure."], - "text": "Can you explain why?", + "conversational": {"inputs": [ + { + "role": "user", + "content": "Which movie is the best ?" + }, + { + "role": "assistant", + "content": "It's Die Hard for sure." + }, + { + "role": "user", + "content": "Can you explain why?" } - }, + ]}, "sentence-similarity": { "inputs": {"source_sentence": "Lets create an embedding", "sentences": ["Lets create an embedding"]} }, "sentence-embeddings": {"inputs": "Lets create an embedding"}, "sentence-ranking": {"inputs": ["Lets create an embedding", "Lets create an embedding"]}, "text-to-image": {"inputs": "a man on a horse jumps over a broken down airplane."}, + "custom": {"inputs": "this is a test"} } task2output = { @@ -206,11 +217,17 @@ "object-detection": [{"score": 0.9143241047859192, "label": "cat", "box": {}}], "image-segmentation": [{"score": 0.9143241047859192, "label": "cat", "mask": {}}], "table-question-answering": {"answer": "36542"}, - "conversational": {"generated_text": "", "conversation": {}}, + "conversational": [ + {'role': 'user', 'content': 'Which movie is the best ?'}, + {'role': 'assistant', 'content': "It's Die Hard for sure."}, + {'role': 'user', 'content': 'Can you explain why?'}, + {'role': 'assistant', 'content': "It's a great movie."}, + ], "sentence-similarity": {"similarities": ""}, "sentence-embeddings": {"embeddings": ""}, "sentence-ranking": {"scores": ""}, "text-to-image": bytes, + "custom": {"inputs": "this is a test"} } @@ -232,9 +249,10 @@ "object-detection": validate_object_detection, "image-segmentation": validate_object_detection, "table-question-answering": validate_zero_shot_classification, - "conversational": validate_zero_shot_classification, + "conversational": validate_conversational, "sentence-similarity": validate_zero_shot_classification, "sentence-embeddings": validate_zero_shot_classification, "sentence-ranking": validate_zero_shot_classification, "text-to-image": validate_text_to_image, + "custom": validate_custom } diff --git a/tests/integ/conftest.py b/tests/integ/conftest.py new file mode 100644 index 00000000..ec282ea8 --- /dev/null +++ b/tests/integ/conftest.py @@ -0,0 +1,159 @@ +import docker +import pytest +import random +import logging +from tests.integ.config import task2model +import tenacity +import time +from huggingface_inference_toolkit.utils import ( + _is_gpu_available, + _load_repository_from_hf +) +from transformers.testing_utils import ( + slow, + _run_slow_tests +) +import uuid +import socket +import os + +HF_HUB_CACHE = os.environ.get("HF_HUB_CACHE", "/home/ubuntu/.cache/huggingface/hub") +IS_GPU = _run_slow_tests +DEVICE = "gpu" if IS_GPU else "cpu" + +@tenacity.retry( + retry = tenacity.retry_if_exception(docker.errors.APIError), + stop = tenacity.stop_after_attempt(10) +) +@pytest.fixture(scope = "function") +def remote_container( + device, + task, + framework +): + time.sleep(random.randint(1, 5)) + #client = docker.DockerClient(base_url='unix://var/run/docker.sock') + client = docker.from_env() + container_name = f"integration-test-{framework}-{task}-{device}" + container_image = f"integration-test-{framework}:{device}" + port = random.randint(5000, 9000) + model = task2model[task][framework] + + #check if port is already open + sock = socket.socket(socket.AF_INET, socket.SOCK_STREAM) + while sock.connect_ex(("localhost", port)) == 0: + logging.debug(f"Port {port} is already being used; getting a new one...") + port = random.randint(5000, 9000) + + logging.debug(f"Image: {container_image}") + logging.debug(f"Port: {port}") + + device_request = [ + docker.types.DeviceRequest( + count=-1, + capabilities=[["gpu"]]) + ] if device == "gpu" else [] + + yield client.containers.run( + image = container_image, + name=container_name, + ports={"5000": port}, + environment={ + "HF_MODEL_ID": model, + "HF_TASK": task, + "CUDA_LAUNCH_BLOCKING": 1 + }, + detach=True, + # GPU + device_requests=device_request, + ), port + + #Teardown + previous = client.containers.get(container_name) + previous.stop() + previous.remove() + + +@tenacity.retry( + stop = tenacity.stop_after_attempt(10), + reraise = True +) +@pytest.fixture(scope = "function") +def local_container( + device, + task, + repository_id, + framework +): + try: + time.sleep(random.randint(1, 5)) + id = uuid.uuid4() + if not (task == "custom"): + model = task2model[task][framework] + id = task + else: + model = repository_id + + logging.info(f"Starting container with model: {model}") + + if not model: + message = f"No model supported for {framework}" + logging.error(message) + raise ValueError(message) + + logging.info(f"Starting container with Model = {model}") + client = docker.from_env() + container_name = f"integration-test-{framework}-{id}-{device}" + container_image = f"integration-test-{framework}:{device}" + + port = random.randint(5000, 9000) + + #check if port is already open + sock = socket.socket(socket.AF_INET, socket.SOCK_STREAM) + while sock.connect_ex(("localhost", port)) == 0: + logging.debug(f"Port {port} is already being used; getting a new one...") + port = random.randint(5000, 9000) + + logging.debug(f"Image: {container_image}") + logging.debug(f"Port: {port}") + + device_request = [ + docker.types.DeviceRequest(count=-1, capabilities=[["gpu"]]) + ] if device == "gpu" else [] + + object_id = model.replace("/", "--") + model_dir = f"{HF_HUB_CACHE}/{object_id}" + + storage_dir = _load_repository_from_hf( + repository_id = model, + target_dir = model_dir, + framework = framework + ) + + yield client.containers.run( + container_image, + name=container_name, + ports={"5000": port}, + environment={ + "HF_MODEL_DIR": "/opt/huggingface/model", + "HF_TASK": task + }, + volumes = { + model_dir: { + "bind": "/opt/huggingface/model", + "mode": "ro" + } + }, + detach=True, + # GPU + device_requests=device_request, + ), port + + #Teardown + previous = client.containers.get(container_name) + previous.stop() + previous.remove() + except Exception as exception: + logging.error(f"Error starting container: {str(exception)}") + raise exception + diff --git a/tests/integ/test_container.py b/tests/integ/helpers.py similarity index 79% rename from tests/integ/test_container.py rename to tests/integ/helpers.py index 6c343c6a..0dae2598 100644 --- a/tests/integ/test_container.py +++ b/tests/integ/helpers.py @@ -1,20 +1,35 @@ import random import tempfile import time - import docker import pytest import requests -from docker.client import DockerClient -from huggingface_inference_toolkit.utils import _is_gpu_available, _load_repository_from_hf -from integ.config import task2input, task2model, task2output, task2validation -from transformers.testing_utils import require_torch, slow, require_tf, _run_slow_tests +from huggingface_inference_toolkit.utils import ( + _is_gpu_available, + _load_repository_from_hf +) +from tests.integ.config import ( + task2input, + task2model, + task2output, + task2validation +) +from transformers.testing_utils import ( + require_torch, + slow, + require_tf, + _run_slow_tests +) +import tenacity +from docker import DockerClient +import logging +import traceback +import urllib3 IS_GPU = _run_slow_tests DEVICE = "gpu" if IS_GPU else "cpu" -client = docker.from_env() - +client = docker.DockerClient(base_url='unix://var/run/docker.sock') def make_sure_other_containers_are_stopped(client: DockerClient, container_name: str): try: @@ -25,44 +40,89 @@ def make_sure_other_containers_are_stopped(client: DockerClient, container_name: return None -def wait_for_container_to_be_ready(base_url): - t = 0 - while t < 10: +#@tenacity.retry( +# retry = tenacity.retry_if_exception(ValueError), +# stop = tenacity.stop_after_attempt(10), +# reraise = True +#) +def wait_for_container_to_be_ready( + base_url, + time_between_retries = 1, + max_retries = 30 +): + + retries = 0 + error = None + + while retries < max_retries: + time.sleep(time_between_retries) try: response = requests.get(f"{base_url}/health") if response.status_code == 200: - break - except Exception: - pass - finally: - t += 1 - time.sleep(2) - return True - + logging.info("Container ready!") + return True + else: + raise ConnectionError(f"Error: {response.status_code}") + except Exception as exception: + error = exception + logging.warning(f"Container at {base_url} not ready, trying again...") + retries += 1 + + logging.error(f"Unable to start container: {str(error)}") + raise error -def verify_task(container: DockerClient, task: str, port: int = 5000, framework: str = "pytorch"): +def verify_task( + #container: DockerClient, + task: str, + port: int = 5000, + framework: str = "pytorch" +): BASE_URL = f"http://localhost:{port}" + logging.info(f"Base URL: {BASE_URL}") + logging.info(f"Port: {port}") input = task2input[task] - # health check - wait_for_container_to_be_ready(BASE_URL) - if ( - task == "image-classification" - or task == "object-detection" - or task == "image-segmentation" - or task == "zero-shot-image-classification" - ): - prediction = requests.post( - f"{BASE_URL}", data=task2input[task], headers={"content-type": "image/x-image"} - ).json() - elif task == "automatic-speech-recognition" or task == "audio-classification": - prediction = requests.post( - f"{BASE_URL}", data=task2input[task], headers={"content-type": "audio/x-audio"} - ).json() - elif task == "text-to-image": - prediction = requests.post(f"{BASE_URL}", json=input, headers={"accept": "image/png"}).content - else: - prediction = requests.post(f"{BASE_URL}", json=input).json() - assert task2validation[task](result=prediction, snapshot=task2output[task]) is True + + try: + # health check + wait_for_container_to_be_ready(BASE_URL) + if ( + task == "image-classification" + or task == "object-detection" + or task == "image-segmentation" + or task == "zero-shot-image-classification" + ): + prediction = requests.post( + f"{BASE_URL}", data=task2input[task], headers={"content-type": "image/x-image"} + ).json() + elif task == "automatic-speech-recognition" or task == "audio-classification": + prediction = requests.post( + f"{BASE_URL}", data=task2input[task], headers={"content-type": "audio/x-audio"} + ).json() + elif task == "text-to-image": + prediction = requests.post(f"{BASE_URL}", json=input, headers={"accept": "image/png"}).content + + else: + prediction = requests.post(f"{BASE_URL}", json=input).json() + + logging.info(f"Input: {input}") + logging.info(f"Prediction: {prediction}") + logging.info(f"Snapshot: {task2output[task]}") + + if task == "conversational": + for message in prediction: + assert "error" not in message.keys() + else: + assert task2validation[task]( + result=prediction, + snapshot=task2output[task] + ) + except Exception as exception: + logging.error(f"Base URL: {BASE_URL}") + logging.error(f"Task: {task}") + logging.error(f"Input: {input}") + logging.error(f"Error: {str(exception)}") + logging.error(f"Stack: {traceback.format_exc()}") + raise exception @require_torch @@ -114,9 +174,9 @@ def test_pt_container_remote_model(task) -> None: # GPU device_requests=device_request, ) - # time.sleep(5) + time.sleep(5) - verify_task(container, task, port) + verify_task(task = task, port = port) container.stop() container.remove() diff --git a/tests/integ/test_pytorch_local_cpu.py b/tests/integ/test_pytorch_local_cpu.py new file mode 100644 index 00000000..17e651e9 --- /dev/null +++ b/tests/integ/test_pytorch_local_cpu.py @@ -0,0 +1,127 @@ +import tempfile +from tests.integ.helpers import verify_task +from tests.integ.config import ( + task2input, + task2model, + task2output, + task2validation +) +from transformers.testing_utils import ( + require_torch, + slow, + _run_slow_tests +) +import pytest + +class TestPytorchLocal: + + @require_torch + @pytest.mark.parametrize( + "task", + [ + "text-classification", + "zero-shot-classification", + "ner", + "question-answering", + "fill-mask", + "summarization", + "translation_xx_to_yy", + "text2text-generation", + "text-generation", + "feature-extraction", + "image-classification", + "automatic-speech-recognition", + "audio-classification", + "object-detection", + "image-segmentation", + "table-question-answering", + "conversational", + "sentence-similarity", + "sentence-embeddings", + "sentence-ranking", + "text-to-image", + ], + ) + @pytest.mark.parametrize( + "device", + ["cpu"] + ) + @pytest.mark.parametrize( + "framework", + ["pytorch"] + ) + @pytest.mark.parametrize( + "repository_id", + [""] + ) + @pytest.mark.usefixtures('local_container') + def test_pt_container_local_model( + self, + local_container, + task, + framework, + device + ) -> None: + + verify_task(task = task, port = local_container[1]) + + + @require_torch + @pytest.mark.parametrize( + "repository_id", + ["philschmid/custom-handler-test", "philschmid/custom-handler-distilbert"], + ) + @pytest.mark.parametrize( + "device", + ["cpu"] + ) + @pytest.mark.parametrize( + "framework", + ["pytorch"] + ) + @pytest.mark.parametrize( + "task", + ["custom"] + ) + @pytest.mark.usefixtures('local_container') + def test_pt_container_custom_handler( + self, + local_container, + task, + device, + repository_id + ) -> None: + + verify_task( + task = task, + port = local_container[1], + ) + + + @require_torch + @pytest.mark.parametrize( + "repository_id", + ["philschmid/custom-pipeline-text-classification"], + ) + @pytest.mark.parametrize( + "device", + ["cpu"] + ) + @pytest.mark.parametrize( + "framework", + ["pytorch"] + ) + @pytest.mark.parametrize( + "task", + ["custom"] + ) + @pytest.mark.usefixtures('local_container') + def test_pt_container_legacy_custom_pipeline( + self, + local_container, + repository_id, + device, + task + ) -> None: + + verify_task(task = task, port = local_container[1]) diff --git a/tests/integ/test_pytorch_local_gpu.py b/tests/integ/test_pytorch_local_gpu.py new file mode 100644 index 00000000..15ffebde --- /dev/null +++ b/tests/integ/test_pytorch_local_gpu.py @@ -0,0 +1,127 @@ +import tempfile +from tests.integ.helpers import verify_task +from tests.integ.config import ( + task2input, + task2model, + task2output, + task2validation +) +from transformers.testing_utils import ( + require_torch, + slow, + _run_slow_tests +) +import pytest + +class TestPytorchLocal: + + @require_torch + @pytest.mark.parametrize( + "task", + [ + "text-classification", + "zero-shot-classification", + "ner", + "question-answering", + "fill-mask", + "summarization", + "translation_xx_to_yy", + "text2text-generation", + "text-generation", + "feature-extraction", + "image-classification", + "automatic-speech-recognition", + "audio-classification", + "object-detection", + "image-segmentation", + "table-question-answering", + "conversational", + "sentence-similarity", + "sentence-embeddings", + "sentence-ranking", + "text-to-image", + ], + ) + @pytest.mark.parametrize( + "device", + ["gpu"] + ) + @pytest.mark.parametrize( + "framework", + ["pytorch"] + ) + @pytest.mark.parametrize( + "repository_id", + [""] + ) + @pytest.mark.usefixtures('local_container') + def test_pt_container_local_model( + self, + local_container, + task, + framework, + device + ) -> None: + + verify_task(task = task, port = local_container[1]) + + + @require_torch + @pytest.mark.parametrize( + "repository_id", + ["philschmid/custom-handler-test", "philschmid/custom-handler-distilbert"], + ) + @pytest.mark.parametrize( + "device", + ["gpu"] + ) + @pytest.mark.parametrize( + "framework", + ["pytorch"] + ) + @pytest.mark.parametrize( + "task", + ["custom"] + ) + @pytest.mark.usefixtures('local_container') + def test_pt_container_custom_handler( + self, + local_container, + task, + device, + repository_id + ) -> None: + + verify_task( + task = task, + port = local_container[1], + ) + + + @require_torch + @pytest.mark.parametrize( + "repository_id", + ["philschmid/custom-pipeline-text-classification"], + ) + @pytest.mark.parametrize( + "device", + ["gpu"] + ) + @pytest.mark.parametrize( + "framework", + ["pytorch"] + ) + @pytest.mark.parametrize( + "task", + ["custom"] + ) + @pytest.mark.usefixtures('local_container') + def test_pt_container_legacy_custom_pipeline( + self, + local_container, + repository_id, + device, + task + ) -> None: + + verify_task(task = task, port = local_container[1]) diff --git a/tests/integ/test_pytorch_remote_cpu.py b/tests/integ/test_pytorch_remote_cpu.py new file mode 100644 index 00000000..14001dda --- /dev/null +++ b/tests/integ/test_pytorch_remote_cpu.py @@ -0,0 +1,62 @@ +import tempfile +from tests.integ.helpers import verify_task +from tests.integ.config import ( + task2input, + task2model, + task2output, + task2validation +) +from transformers.testing_utils import ( + require_torch, + slow, + _run_slow_tests +) +import pytest +import tenacity +import docker + +class TestPytorchRemote: + + @tenacity.retry( + retry = tenacity.retry_if_exception(docker.errors.APIError), + stop = tenacity.stop_after_attempt(5), + reraise = True + ) + @pytest.mark.parametrize( + "device", + ["cpu"] + ) + @pytest.mark.parametrize( + "task", + [ + "text-classification", + "zero-shot-classification", + "question-answering", + "fill-mask", + "summarization", + "ner", + "translation_xx_to_yy", + "text2text-generation", + "text-generation", + "feature-extraction", + "image-classification", + "automatic-speech-recognition", + "audio-classification", + "object-detection", + "image-segmentation", + "table-question-answering", + "conversational", + "sentence-similarity", + "sentence-embeddings", + "sentence-ranking", + "text-to-image" + ] + ) + @pytest.mark.parametrize( + "framework", + ["pytorch"] + ) + @pytest.mark.usefixtures('remote_container') + def test_inference_remote(self, remote_container, task, framework, device): + + verify_task(task = task, port = remote_container[1]) diff --git a/tests/integ/test_pytorch_remote_gpu.py b/tests/integ/test_pytorch_remote_gpu.py new file mode 100644 index 00000000..ec79f4a5 --- /dev/null +++ b/tests/integ/test_pytorch_remote_gpu.py @@ -0,0 +1,62 @@ +import tempfile +from tests.integ.helpers import verify_task +from tests.integ.config import ( + task2input, + task2model, + task2output, + task2validation +) +from transformers.testing_utils import ( + require_torch, + slow, + _run_slow_tests +) +import pytest +import tenacity +import docker + +class TestPytorchRemote: + + @tenacity.retry( + retry = tenacity.retry_if_exception(docker.errors.APIError), + stop = tenacity.stop_after_attempt(5), + reraise = True + ) + @pytest.mark.parametrize( + "device", + ["gpu"] + ) + @pytest.mark.parametrize( + "task", + [ + "text-classification", + "zero-shot-classification", + "question-answering", + "fill-mask", + "summarization", + "ner", + "translation_xx_to_yy", + "text2text-generation", + "text-generation", + "feature-extraction", + "image-classification", + "automatic-speech-recognition", + "audio-classification", + "object-detection", + "image-segmentation", + "table-question-answering", + "conversational", + "sentence-similarity", + "sentence-embeddings", + "sentence-ranking", + "text-to-image" + ] + ) + @pytest.mark.parametrize( + "framework", + ["pytorch"] + ) + @pytest.mark.usefixtures('remote_container') + def test_inference_remote(self, remote_container, task, framework, device): + + verify_task(task = task, port = remote_container[1]) diff --git a/tests/integ/utils.py b/tests/integ/utils.py index 813ba751..2b826cdb 100644 --- a/tests/integ/utils.py +++ b/tests/integ/utils.py @@ -1,21 +1,21 @@ import logging -import re -import signal from contextlib import contextmanager from time import time -LOGGER = logging.getLogger("timeout") - def validate_classification(result=None, snapshot=None): for idx, _ in enumerate(result): assert result[idx].keys() == snapshot[idx].keys() - # assert result[idx]["score"] >= snapshot[idx]["score"] return True +def validate_conversational(result=None, snapshot=None): + assert len(result) >= len(snapshot) + def validate_zero_shot_classification(result=None, snapshot=None): + logging.info(f"Result: {result}") + logging.info(f"Snapshot: {snapshot}") assert result.keys() == snapshot.keys() # assert result["labels"] == snapshot["labels"] # assert result["sequence"] == snapshot["sequence"] @@ -84,3 +84,8 @@ def validate_object_detection(result=None, snapshot=None): def validate_text_to_image(result=None, snapshot=None): assert isinstance(result, snapshot) return True + +def validate_custom(result=None, snapshot=None): + logging.info(f"Validate custom task - result: {result}, snapshot: {snapshot}") + assert result == snapshot + return True diff --git a/tests/unit/conftest.py b/tests/unit/conftest.py new file mode 100644 index 00000000..ddba0442 --- /dev/null +++ b/tests/unit/conftest.py @@ -0,0 +1,7 @@ +import os +import logging +import pytest + +@pytest.fixture(scope = "session") +def cache_test_dir(): + yield os.environ.get("CACHE_TEST_DIR", "./tests") \ No newline at end of file diff --git a/tests/unit/test_diffusers.py b/tests/unit/test_diffusers.py index 32b10cf0..0f2890a8 100644 --- a/tests/unit/test_diffusers.py +++ b/tests/unit/test_diffusers.py @@ -7,12 +7,17 @@ from huggingface_inference_toolkit.diffusers_utils import get_diffusers_pipeline, IEAutoPipelineForText2Image from huggingface_inference_toolkit.utils import _load_repository_from_hf, get_pipeline +import logging + +logging.basicConfig(level="DEBUG") @require_torch def test_get_diffusers_pipeline(): with tempfile.TemporaryDirectory() as tmpdirname: storage_dir = _load_repository_from_hf( - "hf-internal-testing/tiny-stable-diffusion-torch", tmpdirname, framework="pytorch" + "hf-internal-testing/tiny-stable-diffusion-torch", + tmpdirname, + framework="pytorch" ) pipe = get_pipeline("text-to-image", storage_dir.as_posix()) assert isinstance(pipe, IEAutoPipelineForText2Image) @@ -23,17 +28,25 @@ def test_get_diffusers_pipeline(): def test_pipe_on_gpu(): with tempfile.TemporaryDirectory() as tmpdirname: storage_dir = _load_repository_from_hf( - "hf-internal-testing/tiny-stable-diffusion-torch", tmpdirname, framework="pytorch" + "hf-internal-testing/tiny-stable-diffusion-torch", + tmpdirname, + framework="pytorch" ) - pipe = get_pipeline("text-to-image", storage_dir.as_posix()) - assert pipe.device.type == "cuda" + pipe = get_pipeline( + "text-to-image", + storage_dir.as_posix() + ) + logging.error(f"Pipe: {pipe.pipeline}") + assert pipe.pipeline.device.type == "cuda" @require_torch def test_text_to_image_task(): with tempfile.TemporaryDirectory() as tmpdirname: storage_dir = _load_repository_from_hf( - "hf-internal-testing/tiny-stable-diffusion-torch", tmpdirname, framework="pytorch" + "hf-internal-testing/tiny-stable-diffusion-torch", + tmpdirname, + framework="pytorch" ) pipe = get_pipeline("text-to-image", storage_dir.as_posix()) res = pipe("Lets create an embedding") diff --git a/tests/unit/test_handler.py b/tests/unit/test_handler.py index 9306cdc3..d1a0a561 100644 --- a/tests/unit/test_handler.py +++ b/tests/unit/test_handler.py @@ -1,14 +1,19 @@ import tempfile - -from transformers.testing_utils import require_torch, slow, require_tf - +from transformers.testing_utils import ( + require_tf, + require_torch, + slow +) import pytest from huggingface_inference_toolkit.handler import ( HuggingFaceHandler, get_inference_handler_either_custom_or_default_handler, ) -from huggingface_inference_toolkit.utils import _is_gpu_available, _load_repository_from_hf +from huggingface_inference_toolkit.utils import ( + _is_gpu_available, + _load_repository_from_hf +) TASK = "text-classification" @@ -19,7 +24,6 @@ @require_torch def test_pt_get_device(): import torch - with tempfile.TemporaryDirectory() as tmpdirname: # https://github.com/huggingface/infinity/blob/test-ovh/test/integ/utils.py storage_dir = _load_repository_from_hf(MODEL, tmpdirname, framework="pytorch") @@ -34,7 +38,11 @@ def test_pt_get_device(): def test_pt_predict_call(): with tempfile.TemporaryDirectory() as tmpdirname: # https://github.com/huggingface/infinity/blob/test-ovh/test/integ/utils.py - storage_dir = _load_repository_from_hf(MODEL, tmpdirname, framework="pytorch") + storage_dir = _load_repository_from_hf( + MODEL, + tmpdirname, + framework="pytorch" + ) h = HuggingFaceHandler(model_dir=str(storage_dir), task=TASK) prediction = h(INPUT) @@ -46,7 +54,9 @@ def test_pt_predict_call(): def test_pt_custom_pipeline(): with tempfile.TemporaryDirectory() as tmpdirname: storage_dir = _load_repository_from_hf( - "philschmid/custom-pipeline-text-classification", tmpdirname, framework="pytorch" + "philschmid/custom-pipeline-text-classification", + tmpdirname, + framework="pytorch" ) h = get_inference_handler_either_custom_or_default_handler(str(storage_dir), task="custom") assert h(INPUT) == INPUT @@ -56,7 +66,9 @@ def test_pt_custom_pipeline(): def test_pt_sentence_transformers_pipeline(): with tempfile.TemporaryDirectory() as tmpdirname: storage_dir = _load_repository_from_hf( - "sentence-transformers/all-MiniLM-L6-v2", tmpdirname, framework="pytorch" + "sentence-transformers/all-MiniLM-L6-v2", + tmpdirname, + framework="pytorch" ) h = get_inference_handler_either_custom_or_default_handler(str(storage_dir), task="sentence-embeddings") pred = h(INPUT) @@ -65,11 +77,14 @@ def test_pt_sentence_transformers_pipeline(): @require_tf def test_tf_get_device(): - import tensorflow as tf with tempfile.TemporaryDirectory() as tmpdirname: # https://github.com/huggingface/infinity/blob/test-ovh/test/integ/utils.py - storage_dir = _load_repository_from_hf(MODEL, tmpdirname, framework="tensorflow") + storage_dir = _load_repository_from_hf( + MODEL, + tmpdirname, + framework="tensorflow" + ) h = HuggingFaceHandler(model_dir=str(storage_dir), task=TASK) if _is_gpu_available(): assert h.pipeline.device == 0 @@ -81,10 +96,18 @@ def test_tf_get_device(): def test_tf_predict_call(): with tempfile.TemporaryDirectory() as tmpdirname: # https://github.com/huggingface/infinity/blob/test-ovh/test/integ/utils.py - storage_dir = _load_repository_from_hf(MODEL, tmpdirname, framework="tensorflow") - h = HuggingFaceHandler(model_dir=str(storage_dir), task=TASK) + storage_dir = _load_repository_from_hf( + MODEL, + tmpdirname, + framework="tensorflow" + ) + handler = HuggingFaceHandler( + model_dir=str(storage_dir), + task=TASK, + framework="tf" + ) - prediction = h(INPUT) + prediction = handler(INPUT) assert "label" in prediction[0] assert "score" in prediction[0] @@ -104,9 +127,12 @@ def test_tf_sentence_transformers_pipeline(): # TODO should fail! because TF is not supported yet with tempfile.TemporaryDirectory() as tmpdirname: storage_dir = _load_repository_from_hf( - "sentence-transformers/all-MiniLM-L6-v2", tmpdirname, framework="tensorflow" + "sentence-transformers/all-MiniLM-L6-v2", + tmpdirname, + framework="tensorflow" ) with pytest.raises(Exception) as exc_info: - h = get_inference_handler_either_custom_or_default_handler(str(storage_dir), task="sentence-embeddings") - - assert "Unknown task sentence-embeddings" in str(exc_info.value) + h = get_inference_handler_either_custom_or_default_handler( + str(storage_dir), + task="sentence-embeddings" + ) diff --git a/tests/unit/test_serializer.py b/tests/unit/test_serializer.py index 98e528e5..07dfd5c1 100644 --- a/tests/unit/test_serializer.py +++ b/tests/unit/test_serializer.py @@ -3,10 +3,13 @@ import numpy as np import pytest import os -from huggingface_inference_toolkit.serialization import Jsoner, Audioer, Imager +from huggingface_inference_toolkit.serialization import ( + Jsoner, + Audioer, + Imager +) from PIL import Image - def test_json_serialization(): t = {"res": np.array([2.0]), "text": "I like you.", "float": 1.2} assert b'{"res":[2.0],"text":"I like you.","float":1.2}' == Jsoner.serialize(t) @@ -30,9 +33,10 @@ def test_json_deserialization(): raw_content = b'{\n\t"inputs": "i like you"\n}' assert {"inputs": "i like you"} == Jsoner.deserialize(raw_content) +@pytest.mark.usefixtures('cache_test_dir') +def test_image_deserialization(cache_test_dir): -def test_image_deserialization(): - image_files_path = os.path.join(os.getcwd(), "tests/resources/image") + image_files_path = f"{cache_test_dir}/resources/image" for image_file in os.listdir(image_files_path): image_bytes = open(os.path.join(image_files_path, image_file), "rb").read() @@ -41,9 +45,10 @@ def test_image_deserialization(): assert isinstance(decoded_data, dict) assert isinstance(decoded_data["inputs"], Image.Image) +@pytest.mark.usefixtures('cache_test_dir') +def test_audio_deserialization(cache_test_dir): -def test_audio_deserialization(): - audio_files_path = os.path.join(os.getcwd(), "tests/resources/audio") + audio_files_path = f"{cache_test_dir}/resources/audio" for audio_file in os.listdir(audio_files_path): audio_bytes = open(os.path.join(audio_files_path, audio_file), "rb").read() diff --git a/tests/unit/test_utils.py b/tests/unit/test_utils.py index 9d5052ee..79cff93d 100644 --- a/tests/unit/test_utils.py +++ b/tests/unit/test_utils.py @@ -2,6 +2,7 @@ from pathlib import Path import tempfile + from transformers import pipeline from transformers.file_utils import is_torch_available from transformers.testing_utils import require_tf, require_torch, slow @@ -16,6 +17,7 @@ wrap_conversation_pipeline, ) +import logging MODEL = "lysandre/tiny-bert-random" TASK = "text-classification" @@ -112,17 +114,33 @@ def test_get_framework_tensorflow(): def test_get_pipeline(): with tempfile.TemporaryDirectory() as tmpdirname: storage_dir = _load_repository_from_hf(MODEL, tmpdirname, framework="pytorch") - pipe = get_pipeline(TASK, storage_dir.as_posix()) + pipe = get_pipeline( + task = TASK, + model_dir = storage_dir.as_posix(), + framework = "pytorch" + ) res = pipe("Life is good, Life is bad") assert "score" in res[0] @require_torch -def test_whisper_long_audio(): +def test_whisper_long_audio(cache_test_dir): with tempfile.TemporaryDirectory() as tmpdirname: - storage_dir = _load_repository_from_hf("openai/whisper-tiny", tmpdirname, framework="pytorch") - pipe = get_pipeline("automatic-speech-recognition", storage_dir.as_posix()) - res = pipe(os.path.join(os.getcwd(), "tests/resources/audio", "long_sample.mp3")) + storage_dir = _load_repository_from_hf( + repository_id = "openai/whisper-tiny", + target_dir = tmpdirname, + framework = "pytorch", + revision = "be0ba7c2f24f0127b27863a23a08002af4c2c279" + ) + logging.info(f"Temp dir: {tmpdirname}") + logging.info(f"POSIX Path: {storage_dir.as_posix()}") + logging.info(f"Contents: {os.listdir(tmpdirname)}") + pipe = get_pipeline( + task = "automatic-speech-recognition", + model_dir = storage_dir.as_posix(), + framework = "safetensors" + ) + res = pipe(f"{cache_test_dir}/resources/audio/long_sample.mp3") assert len(res["text"]) > 700 @@ -136,33 +154,57 @@ def test_wrap_conversation_pipeline(): framework="pt", ) conv_pipe = wrap_conversation_pipeline(init_pipeline) - data = { - "past_user_inputs": ["Which movie is the best ?"], - "generated_responses": ["It's Die Hard for sure."], - "text": "Can you explain why?", - } + data = [ + { + "role": "user", + "content": "Which movie is the best ?" + }, + { + "role": "assistant", + "content": "It's Die Hard for sure." + }, + { + "role": "user", + "content": "Can you explain why?" + } + ] res = conv_pipe(data) - assert "conversation" in res - assert "generated_text" in res + logging.info(f"Response: {res}") + assert res[-1]["role"] == "assistant" + assert "error" not in res[-1]["content"] @require_torch def test_wrapped_pipeline(): with tempfile.TemporaryDirectory() as tmpdirname: - storage_dir = _load_repository_from_hf("microsoft/DialoGPT-small", tmpdirname, framework="pytorch") + storage_dir = _load_repository_from_hf( + repository_id = "microsoft/DialoGPT-small", + target_dir = tmpdirname, + framework="pytorch" + ) conv_pipe = get_pipeline("conversational", storage_dir.as_posix()) - data = { - "past_user_inputs": ["Which movie is the best ?"], - "generated_responses": ["It's Die Hard for sure."], - "text": "Can you explain why?", - } - res = conv_pipe(data) - assert "conversation" in res - assert "generated_text" in res - - -def test_local_custom_pipeline(): - model_dir = os.path.join(os.getcwd(), "tests/resources/custom_handler") + data = [ + { + "role": "user", + "content": "Which movie is the best ?" + }, + { + "role": "assistant", + "content": "It's Die Hard for sure." + }, + { + "role": "user", + "content": "Can you explain why?" + } + ] + res = conv_pipe(data, max_new_tokens = 100) + logging.info(f"Response: {res}") + assert res[-1]["role"] == "assistant" + assert "error" not in res[-1]["content"] + + +def test_local_custom_pipeline(cache_test_dir): + model_dir = f"{cache_test_dir}/resources/custom_handler" pipeline = check_and_register_custom_pipeline_from_directory(model_dir) payload = "test" assert pipeline.path == model_dir @@ -172,7 +214,9 @@ def test_local_custom_pipeline(): def test_remote_custom_pipeline(): with tempfile.TemporaryDirectory() as tmpdirname: storage_dir = _load_repository_from_hf( - "philschmid/custom-pipeline-text-classification", tmpdirname, framework="pytorch" + "philschmid/custom-pipeline-text-classification", + tmpdirname, + framework="pytorch" ) pipeline = check_and_register_custom_pipeline_from_directory(str(storage_dir)) payload = "test" @@ -183,7 +227,9 @@ def test_remote_custom_pipeline(): def test_get_inference_handler_either_custom_or_default_pipeline(): with tempfile.TemporaryDirectory() as tmpdirname: storage_dir = _load_repository_from_hf( - "philschmid/custom-pipeline-text-classification", tmpdirname, framework="pytorch" + "philschmid/custom-pipeline-text-classification", + tmpdirname, + framework="pytorch" ) pipeline = get_inference_handler_either_custom_or_default_handler(str(storage_dir)) payload = "test"