From 682b4876fcb27015d6e2734ccc84daf2f2929b61 Mon Sep 17 00:00:00 2001 From: philschmid Date: Mon, 5 Feb 2024 08:08:57 +0000 Subject: [PATCH 001/173] test --- dockerfiles/pytorch/gpu/Dockerfile | 2 +- dockerfiles/pytorch/gpu/environment.yaml | 15 +++++++-------- 2 files changed, 8 insertions(+), 9 deletions(-) diff --git a/dockerfiles/pytorch/gpu/Dockerfile b/dockerfiles/pytorch/gpu/Dockerfile index 1a3941a7..9ec97284 100644 --- a/dockerfiles/pytorch/gpu/Dockerfile +++ b/dockerfiles/pytorch/gpu/Dockerfile @@ -1,4 +1,4 @@ -FROM nvidia/cuda:11.7.1-devel-ubuntu22.04 +FROM nvidia/cuda:12.1.0-devel-ubuntu22.04 LABEL maintainer="Hugging Face" diff --git a/dockerfiles/pytorch/gpu/environment.yaml b/dockerfiles/pytorch/gpu/environment.yaml index 8c1012f7..7f4ebf79 100644 --- a/dockerfiles/pytorch/gpu/environment.yaml +++ b/dockerfiles/pytorch/gpu/environment.yaml @@ -3,12 +3,11 @@ channels: - conda-forge dependencies: - python=3.9.13 -- nvidia::cudatoolkit=11.7 -- pytorch::pytorch=1.13.1=py3.9_cuda11.7* +- nvidia::pytorch-cuda=12.1 +- pytorch::pytorch=2.1.2=py3.9_cuda12.1* - pip: - - transformers[sklearn,sentencepiece,audio,vision]==4.31.0 - - sentence_transformers==2.2.2 - - torchvision==0.14.1 - - diffusers==0.20.0 - - accelerate==0.21.0 - - safetensors \ No newline at end of file + - transformers[sklearn,sentencepiece,audio,vision]==4.37.2 + - sentence_transformers==2.3.1 + - torchvision==0.16.2 + - diffusers==0.26.1 + - accelerate==0.26.1 \ No newline at end of file From 43dd281b2a29fe6e5c51ed95dd3ef7dcefede23b Mon Sep 17 00:00:00 2001 From: philschmid Date: Mon, 5 Feb 2024 08:17:33 +0000 Subject: [PATCH 002/173] to 4.36 --- dockerfiles/pytorch/gpu/environment.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/dockerfiles/pytorch/gpu/environment.yaml b/dockerfiles/pytorch/gpu/environment.yaml index 7f4ebf79..00537355 100644 --- a/dockerfiles/pytorch/gpu/environment.yaml +++ b/dockerfiles/pytorch/gpu/environment.yaml @@ -6,7 +6,7 @@ dependencies: - nvidia::pytorch-cuda=12.1 - pytorch::pytorch=2.1.2=py3.9_cuda12.1* - pip: - - transformers[sklearn,sentencepiece,audio,vision]==4.37.2 + - transformers[sklearn,sentencepiece,audio,vision]==4.36.2 - sentence_transformers==2.3.1 - torchvision==0.16.2 - diffusers==0.26.1 From 6584b7341e8f1f7138579a254fba3f7419643426 Mon Sep 17 00:00:00 2001 From: philschmid Date: Mon, 5 Feb 2024 08:43:02 +0000 Subject: [PATCH 003/173] build image --- dockerfiles/pytorch/gpu/Dockerfile | 1 + dockerfiles/pytorch/gpu/environment.yaml | 8 +++++--- setup.py | 4 ++-- 3 files changed, 8 insertions(+), 5 deletions(-) diff --git a/dockerfiles/pytorch/gpu/Dockerfile b/dockerfiles/pytorch/gpu/Dockerfile index 9ec97284..0b262487 100644 --- a/dockerfiles/pytorch/gpu/Dockerfile +++ b/dockerfiles/pytorch/gpu/Dockerfile @@ -7,6 +7,7 @@ ENV DEBIAN_FRONTEND=noninteractive RUN apt-get update \ && apt-get -y upgrade --only-upgrade systemd openssl cryptsetup \ && apt-get install -y \ + build-essential \ bzip2 \ curl \ git \ diff --git a/dockerfiles/pytorch/gpu/environment.yaml b/dockerfiles/pytorch/gpu/environment.yaml index 00537355..fdd39421 100644 --- a/dockerfiles/pytorch/gpu/environment.yaml +++ b/dockerfiles/pytorch/gpu/environment.yaml @@ -1,13 +1,15 @@ name: base channels: - conda-forge +- pytorch +- nvidia dependencies: - python=3.9.13 -- nvidia::pytorch-cuda=12.1 -- pytorch::pytorch=2.1.2=py3.9_cuda12.1* +- pytorch-cuda=12.1 +- pytorch=2.1.2 +- torchvision==0.16.2 - pip: - transformers[sklearn,sentencepiece,audio,vision]==4.36.2 - sentence_transformers==2.3.1 - - torchvision==0.16.2 - diffusers==0.26.1 - accelerate==0.26.1 \ No newline at end of file diff --git a/setup.py b/setup.py index 92132915..509abb2c 100644 --- a/setup.py +++ b/setup.py @@ -31,8 +31,8 @@ extras = {} -extras["st"] = ["sentence_transformers"] -extras["diffusers"] = ["diffusers==0.8.1", "accelerate==0.14.0"] +extras["st"] = ["sentence_transformers==2.3.1==2.3.1"] +extras["diffusers"] = ["diffusers==0.26.1", "accelerate==0.26.1"] # Hugging Face specific dependencies From 1cda02f8e3c759bada2067eba0a83a2adbcd41d3 Mon Sep 17 00:00:00 2001 From: philschmid Date: Mon, 5 Feb 2024 08:48:03 +0000 Subject: [PATCH 004/173] fxi --- setup.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/setup.py b/setup.py index 509abb2c..1b567d4b 100644 --- a/setup.py +++ b/setup.py @@ -31,7 +31,7 @@ extras = {} -extras["st"] = ["sentence_transformers==2.3.1==2.3.1"] +extras["st"] = ["sentence_transformers==2.3.1"] extras["diffusers"] = ["diffusers==0.26.1", "accelerate==0.26.1"] From b262224516c359172d134b307c557c72bc2507f0 Mon Sep 17 00:00:00 2001 From: Guillaume LEGENDRE Date: Tue, 13 Feb 2024 13:18:01 +0100 Subject: [PATCH 005/173] Move GPU to EKS --- .github/workflows/gpu-integ-test.yaml | 67 +-------------------------- 1 file changed, 2 insertions(+), 65 deletions(-) diff --git a/.github/workflows/gpu-integ-test.yaml b/.github/workflows/gpu-integ-test.yaml index ede153ea..e85f5498 100644 --- a/.github/workflows/gpu-integ-test.yaml +++ b/.github/workflows/gpu-integ-test.yaml @@ -13,45 +13,8 @@ concurrency: jobs: - start-runner: - name: Start self-hosted EC2 runner - runs-on: ubuntu-latest - env: - AWS_REGION: us-east-1 - EC2_AMI_ID: ami-0dc1c26161f869ed1 - EC2_INSTANCE_TYPE: g4dn.xlarge - EC2_SUBNET_ID: subnet-859322b4,subnet-b7533b96,subnet-47cfad21,subnet-a396b2ad,subnet-06576a4b,subnet-df0f6180 - EC2_SECURITY_GROUP: sg-0bb210cd3ec725a13 - EC2_IAM_ROLE: optimum-ec2-github-actions-role - outputs: - label: ${{ steps.start-ec2-runner.outputs.label }} - ec2-instance-id: ${{ steps.start-ec2-runner.outputs.ec2-instance-id }} - steps: - - name: Configure AWS credentials - uses: aws-actions/configure-aws-credentials@v1 - with: - aws-access-key-id: ${{ secrets.AWS_ACCESS_KEY_ID }} - aws-secret-access-key: ${{ secrets.AWS_SECRET_ACCESS_KEY }} - aws-region: ${{ env.AWS_REGION }} - - name: Start EC2 runner - id: start-ec2-runner - uses: philschmid/philschmid-ec2-github-runner@main - with: - mode: start - github-token: ${{ secrets.GH_PERSONAL_ACCESS_TOKEN }} - ec2-image-id: ${{ env.EC2_AMI_ID }} - ec2-instance-type: ${{ env.EC2_INSTANCE_TYPE }} - subnet-id: ${{ env.EC2_SUBNET_ID }} - security-group-id: ${{ env.EC2_SECURITY_GROUP }} - iam-role-name: ${{ env.EC2_IAM_ROLE }} - aws-resource-tags: > # optional, requires additional permissions - [ - {"Key": "Name", "Value": "ec2-optimum-github-runner"}, - {"Key": "GitHubRepository", "Value": "${{ github.repository }}"} - ] pytorch-integration-test: - needs: start-runner # required to start the main job when the runner is ready - runs-on: ${{ needs.start-runner.outputs.label }} # run the job on the newly created runner + runs-on: [single-gpu, nvidia-gpu, t4, ci] env: AWS_REGION: us-east-1 steps: @@ -69,9 +32,8 @@ jobs: run: RUN_SLOW=True make integ-test tensorflow-integration-test: needs: - - start-runner - pytorch-integration-test - runs-on: ${{ needs.start-runner.outputs.label }} # run the job on the newly created runner + runs-on: [single-gpu, nvidia-gpu, t4, ci] env: AWS_REGION: us-east-1 steps: @@ -89,28 +51,3 @@ jobs: run: docker build -t starlette-transformers:gpu -f dockerfiles/tensorflow/gpu/Dockerfile . - name: Run Integration Tests run: RUN_SLOW=True make integ-test - - stop-runner: - name: Stop self-hosted EC2 runner - needs: - - start-runner - - pytorch-integration-test - - tensorflow-integration-test - runs-on: ubuntu-latest - env: - AWS_REGION: us-east-1 - if: ${{ always() }} # required to stop the runner even if the error happened in the previous jobs - steps: - - name: Configure AWS credentials - uses: aws-actions/configure-aws-credentials@v1 - with: - aws-access-key-id: ${{ secrets.AWS_ACCESS_KEY_ID }} - aws-secret-access-key: ${{ secrets.AWS_SECRET_ACCESS_KEY }} - aws-region: ${{ env.AWS_REGION }} - - name: Stop EC2 runner - uses: philschmid/philschmid-ec2-github-runner@main - with: - mode: stop - github-token: ${{ secrets.GH_PERSONAL_ACCESS_TOKEN }} - label: ${{ needs.start-runner.outputs.label }} - ec2-instance-id: ${{ needs.start-runner.outputs.ec2-instance-id }} \ No newline at end of file From 8271cc77b7449bee24a545e5da5a85e5115bfbd5 Mon Sep 17 00:00:00 2001 From: EC2 Default User Date: Tue, 13 Feb 2024 13:09:17 +0000 Subject: [PATCH 006/173] cuda 12, remove conda --- dockerfiles/pytorch/gpu/Dockerfile | 25 +++++++++--------------- dockerfiles/pytorch/gpu/requirements.txt | 6 ++++++ dockerfiles/tensorflow/gpu/Dockerfile | 8 +++++++- makefile | 11 ++++++++++- scripts/entrypoint.sh | 2 +- 5 files changed, 33 insertions(+), 19 deletions(-) create mode 100644 dockerfiles/pytorch/gpu/requirements.txt diff --git a/dockerfiles/pytorch/gpu/Dockerfile b/dockerfiles/pytorch/gpu/Dockerfile index 0b262487..c22c06ea 100644 --- a/dockerfiles/pytorch/gpu/Dockerfile +++ b/dockerfiles/pytorch/gpu/Dockerfile @@ -1,4 +1,4 @@ -FROM nvidia/cuda:12.1.0-devel-ubuntu22.04 +FROM nvidia/cuda:12.3.1-devel-ubuntu22.04 LABEL maintainer="Hugging Face" @@ -15,29 +15,22 @@ RUN apt-get update \ tar \ gcc \ g++ \ + cmake \ + libprotobuf-dev \ + protobuf-compiler \ + python3 \ + python3-pip \ # audio libsndfile1-dev \ ffmpeg \ && apt-get clean autoremove --yes \ && rm -rf /var/lib/{apt,dpkg,cache,log} -# install micromamba -ENV MAMBA_ROOT_PREFIX=/opt/conda -ENV PATH=/opt/conda/bin:$PATH -ENV LD_LIBRARY_PATH="/opt/conda/lib:${LD_LIBRARY_PATH}" - -RUN curl -L https://micro.mamba.pm/api/micromamba/linux-64/latest | tar -xj "bin/micromamba" \ - && touch /root/.bashrc \ - && ./bin/micromamba shell init -s bash -p /opt/conda \ - && grep -v '[ -z "\$PS1" ] && return' /root/.bashrc > /opt/conda/bashrc - WORKDIR /app -# install base python dependencies -COPY dockerfiles/pytorch/gpu/environment.yaml /app/environment.yaml -RUN micromamba install -y -n base -f environment.yaml \ - && rm environment.yaml \ - && micromamba clean --all --yes +# install dependencies +COPY dockerfiles/pytorch/gpu/requirements.txt /tmp/requirements.txt +RUN pip install --no-cache-dir -r /tmp/requirements.txt && rm /tmp/requirements.txt # install huggingface inference toolkit COPY requirements.txt /tmp/requirements.txt diff --git a/dockerfiles/pytorch/gpu/requirements.txt b/dockerfiles/pytorch/gpu/requirements.txt new file mode 100644 index 00000000..165f27b8 --- /dev/null +++ b/dockerfiles/pytorch/gpu/requirements.txt @@ -0,0 +1,6 @@ +torch==2.1.2 +torchvision==0.16.2 +transformers[sklearn,sentencepiece,audio,vision]==4.37.2 +sentence_transformers==2.3.1 +diffusers==0.26.1 +accelerate==0.26.1 \ No newline at end of file diff --git a/dockerfiles/tensorflow/gpu/Dockerfile b/dockerfiles/tensorflow/gpu/Dockerfile index d989111c..6b87b265 100644 --- a/dockerfiles/tensorflow/gpu/Dockerfile +++ b/dockerfiles/tensorflow/gpu/Dockerfile @@ -39,9 +39,15 @@ RUN micromamba install -y -n base -f environment.yaml \ && rm environment.yaml \ && micromamba clean --all --yes +# install dependencies +COPY dockerfiles/pytorch/gpu/requirements.txt /tmp/requirements.txt +RUN pip install -r /tmp/requirements.txt && rm /tmp/requirements.txt + # install huggingface inference toolkit COPY requirements.txt /tmp/requirements.txt -RUN pip install --no-cache-dir -r /tmp/requirements.txt && rm /tmp/requirements.txt +RUN pip install -r /tmp/requirements.txt && rm /tmp/requirements.txt + + # copy application COPY src/huggingface_inference_toolkit huggingface_inference_toolkit diff --git a/makefile b/makefile index 49855723..beaae9d8 100644 --- a/makefile +++ b/makefile @@ -18,4 +18,13 @@ quality: # Format source code automatically style: - ruff $(check_dirs) --fix \ No newline at end of file + ruff $(check_dirs) --fix + +build-torch-gpu: + docker build -f dockerfiles/pytorch/gpu/Dockerfile -t starlette-transformers:gpu . + +build-torch-cpu: + docker build -f dockerfiles/pytorch/cpu/Dockerfile -t starlette-transformers:cpu . + +run-classification: + docker run -e HF_MODEL="hf-internal-testing/tiny-random-distilbert" -e HF_MODEL_DIR="/tmp2" -e HF_TASK="text-classification" --gpus all starlette-transformers:gpu \ No newline at end of file diff --git a/scripts/entrypoint.sh b/scripts/entrypoint.sh index 8544a63c..53b6e4d0 100644 --- a/scripts/entrypoint.sh +++ b/scripts/entrypoint.sh @@ -10,4 +10,4 @@ if [[ ! -z "${HF_MODEL_DIR}" ]]; then fi # start the server -uvicorn webservice_starlette:app --host 0.0.0.0 --port 5000 \ No newline at end of file +python3 -m uvicorn webservice_starlette:app --host 0.0.0.0 --port 5000 \ No newline at end of file From f514a5e9e6ccdd3b243f033d7ce23087eb71d5c7 Mon Sep 17 00:00:00 2001 From: Rafael Pierre Date: Wed, 14 Feb 2024 10:55:11 +0000 Subject: [PATCH 007/173] integ test 2.0 --- .github/workflows/gpu-integ-test.yaml | 47 +++--- .github/workflows/gpu-integration-2.0.yaml | 164 +++++++++++++++++++++ dockerfiles/pytorch/gpu/environment.yaml | 15 -- tests/integ/test_container.py | 2 +- 4 files changed, 185 insertions(+), 43 deletions(-) create mode 100644 .github/workflows/gpu-integration-2.0.yaml delete mode 100644 dockerfiles/pytorch/gpu/environment.yaml diff --git a/.github/workflows/gpu-integ-test.yaml b/.github/workflows/gpu-integ-test.yaml index e85f5498..74b603a3 100644 --- a/.github/workflows/gpu-integ-test.yaml +++ b/.github/workflows/gpu-integ-test.yaml @@ -13,41 +13,34 @@ concurrency: jobs: - pytorch-integration-test: + pytorch-build-image-gpu: runs-on: [single-gpu, nvidia-gpu, t4, ci] env: AWS_REGION: us-east-1 steps: - name: Checkout uses: actions/checkout@v2 - - name: Set up Python 3.9 - uses: actions/setup-python@v2 + - name: Build and export + uses: docker/build-push-action@v2 with: - python-version: 3.9 - - name: Install Python dependencies - run: pip install -e .[test,dev,torch] - - name: Build Docker - run: docker build -t starlette-transformers:gpu -f dockerfiles/pytorch/gpu/Dockerfile . - - name: Run Integration Tests - run: RUN_SLOW=True make integ-test - tensorflow-integration-test: - needs: - - pytorch-integration-test + context: . + file: dockerfiles/pytorch/gpu/Dockerfile + tags: starlette-transformers:gpu + outputs: type=oci,dest=/tmp/starlette-transformers-gpu.tar + - name: Upload starlette-gpu image as artifact + uses: actions/upload-artifact@v2 + with: + name: starlette-transformers:gpu + path: /tmp/starlette-transformers-gpu.tar + pytorch-integration-test-gpu: runs-on: [single-gpu, nvidia-gpu, t4, ci] env: AWS_REGION: us-east-1 steps: - - name: Checkout - uses: actions/checkout@v2 - - name: Set up Python 3.9 - uses: actions/setup-python@v2 - with: - python-version: 3.9 - - name: Uninstall pytorch - run: pip uninstall torch torchvision -y - - name: Install Python dependencies - run: pip install -e .[test,dev,tensorflow] - - name: Build Docker - run: docker build -t starlette-transformers:gpu -f dockerfiles/tensorflow/gpu/Dockerfile . - - name: Run Integration Tests - run: RUN_SLOW=True make integ-test + - name: Download artifacts (Docker images) from previous workflows + uses: actions/download-artifact@v2 + - name: Load Docker images from previous workflows + run: | + docker load --input /tmp/starlette-transformers-gpu.tar + - run: docker image ls + \ No newline at end of file diff --git a/.github/workflows/gpu-integration-2.0.yaml b/.github/workflows/gpu-integration-2.0.yaml new file mode 100644 index 00000000..fce7f9a6 --- /dev/null +++ b/.github/workflows/gpu-integration-2.0.yaml @@ -0,0 +1,164 @@ +name: GPU integrationt ests + +on: + workflow_dispatch: + +env: + HF_HOME: /mnt/cache + +jobs: + setup: + name: Setup + strategy: + matrix: + machine_type: [single-gpu, multi-gpu] + runs-on: ['${{ matrix.machine_type }}', nvidia-gpu, t4, daily-ci] + container: + image: huggingface/transformers-all-latest-gpu + options: --gpus 0 --shm-size "16gb" --ipc host -v /mnt/cache/.cache/huggingface:/mnt/cache/ + outputs: + folder_slices: ${{ steps.set-matrix.outputs.folder_slices }} + slice_ids: ${{ steps.set-matrix.outputs.slice_ids }} + steps: + - name: Update clone + working-directory: /transformers + run: | + git fetch && git checkout ${{ github.sha }} + + - name: Cleanup + working-directory: /transformers + run: | + rm -rf tests/__pycache__ + rm -rf tests/models/__pycache__ + rm -rf reports + + - name: Show installed libraries and their versions + working-directory: /transformers + run: pip freeze + + - id: set-matrix + name: Identify models to test + working-directory: /transformers/tests + run: | + echo "folder_slices=$(python3 ../utils/split_model_tests.py --num_splits ${{ env.NUM_SLICES }})" >> $GITHUB_OUTPUT + echo "slice_ids=$(python3 -c 'd = list(range(${{ env.NUM_SLICES }})); print(d)')" >> $GITHUB_OUTPUT + + - name: NVIDIA-SMI + run: | + nvidia-smi + + run_tests_gpu: + name: " " + needs: setup + strategy: + fail-fast: false + matrix: + machine_type: [single-gpu, multi-gpu] + slice_id: ${{ fromJSON(needs.setup.outputs.slice_ids) }} + uses: ./.github/workflows/model_jobs.yml + with: + folder_slices: ${{ needs.setup.outputs.folder_slices }} + machine_type: ${{ matrix.machine_type }} + slice_id: ${{ matrix.slice_id }} + secrets: inherit + + run_examples_gpu: + name: Examples directory + strategy: + fail-fast: false + matrix: + machine_type: [single-gpu] + runs-on: ['${{ matrix.machine_type }}', nvidia-gpu, t4, daily-ci] + container: + image: huggingface/transformers-all-latest-gpu + options: --gpus 0 --shm-size "16gb" --ipc host -v /mnt/cache/.cache/huggingface:/mnt/cache/ + needs: setup + steps: + - name: Update clone + working-directory: /transformers + run: git fetch && git checkout ${{ github.sha }} + + - name: Reinstall transformers in edit mode (remove the one installed during docker image build) + working-directory: /transformers + run: python3 -m pip uninstall -y transformers && python3 -m pip install -e . + + - name: NVIDIA-SMI + run: | + nvidia-smi + + - name: Environment + working-directory: /transformers + run: | + python3 utils/print_env.py + + - name: Show installed libraries and their versions + working-directory: /transformers + run: pip freeze + + - name: Run examples tests on GPU + working-directory: /transformers + run: | + pip install -r examples/pytorch/_tests_requirements.txt + python3 -m pytest -v --make-reports=${{ matrix.machine_type }}_examples_gpu examples/pytorch + + - name: Failure short reports + if: ${{ failure() }} + continue-on-error: true + run: cat /transformers/reports/${{ matrix.machine_type }}_examples_gpu/failures_short.txt + + - name: "Test suite reports artifacts: ${{ matrix.machine_type }}_run_examples_gpu" + if: ${{ always() }} + uses: actions/upload-artifact@v3 + with: + name: ${{ matrix.machine_type }}_run_examples_gpu + path: /transformers/reports/${{ matrix.machine_type }}_examples_gpu + + run_pipelines_torch_gpu: + name: PyTorch pipelines + strategy: + fail-fast: false + matrix: + machine_type: [single-gpu, multi-gpu] + runs-on: ['${{ matrix.machine_type }}', nvidia-gpu, t4, daily-ci] + container: + image: huggingface/transformers-pytorch-gpu + options: --gpus all --shm-size "16gb" --ipc host -v /mnt/cache/.cache/huggingface:/mnt/cache/ + needs: setup + steps: + - name: Update clone + working-directory: /transformers + run: git fetch && git checkout ${{ github.sha }} + + - name: Reinstall transformers in edit mode (remove the one installed during docker image build) + working-directory: /transformers + run: python3 -m pip uninstall -y transformers && python3 -m pip install -e . + + - name: NVIDIA-SMI + run: | + nvidia-smi + + - name: Environment + working-directory: /transformers + run: | + python3 utils/print_env.py + + - name: Show installed libraries and their versions + working-directory: /transformers + run: pip freeze + + - name: Run all pipeline tests on GPU + working-directory: /transformers + run: | + python3 -m pytest -n 1 -v --dist=loadfile --make-reports=${{ matrix.machine_type }}_tests_torch_pipeline_gpu tests/pipelines + + - name: Failure short reports + if: ${{ failure() }} + continue-on-error: true + run: cat /transformers/reports/${{ matrix.machine_type }}_tests_torch_pipeline_gpu/failures_short.txt + + - name: "Test suite reports artifacts: ${{ matrix.machine_type }}_run_tests_torch_pipeline_gpu" + if: ${{ always() }} + uses: actions/upload-artifact@v3 + with: + name: ${{ matrix.machine_type }}_run_tests_torch_pipeline_gpu + path: /transformers/reports/${{ matrix.machine_type }}_tests_torch_pipeline_gpu \ No newline at end of file diff --git a/dockerfiles/pytorch/gpu/environment.yaml b/dockerfiles/pytorch/gpu/environment.yaml deleted file mode 100644 index fdd39421..00000000 --- a/dockerfiles/pytorch/gpu/environment.yaml +++ /dev/null @@ -1,15 +0,0 @@ -name: base -channels: -- conda-forge -- pytorch -- nvidia -dependencies: -- python=3.9.13 -- pytorch-cuda=12.1 -- pytorch=2.1.2 -- torchvision==0.16.2 -- pip: - - transformers[sklearn,sentencepiece,audio,vision]==4.36.2 - - sentence_transformers==2.3.1 - - diffusers==0.26.1 - - accelerate==0.26.1 \ No newline at end of file diff --git a/tests/integ/test_container.py b/tests/integ/test_container.py index 6c343c6a..9197d606 100644 --- a/tests/integ/test_container.py +++ b/tests/integ/test_container.py @@ -13,7 +13,7 @@ IS_GPU = _run_slow_tests DEVICE = "gpu" if IS_GPU else "cpu" -client = docker.from_env() +client = docker.DockerClient(base_url='unix://var/run/docker.sock') def make_sure_other_containers_are_stopped(client: DockerClient, container_name: str): From b164b31d9ef1f1b1898cd487fa5fd549b7e41bfc Mon Sep 17 00:00:00 2001 From: Rafael Pierre Date: Wed, 14 Feb 2024 11:00:14 +0000 Subject: [PATCH 008/173] 2.0 --- .github/workflows/gpu-integ-test.yaml | 46 ++--- .github/workflows/gpu-integration-2.0.yaml | 189 ++++----------------- 2 files changed, 62 insertions(+), 173 deletions(-) diff --git a/.github/workflows/gpu-integ-test.yaml b/.github/workflows/gpu-integ-test.yaml index 74b603a3..058d6ad9 100644 --- a/.github/workflows/gpu-integ-test.yaml +++ b/.github/workflows/gpu-integ-test.yaml @@ -13,34 +13,42 @@ concurrency: jobs: - pytorch-build-image-gpu: + pytorch-integration-test: runs-on: [single-gpu, nvidia-gpu, t4, ci] env: AWS_REGION: us-east-1 steps: - name: Checkout uses: actions/checkout@v2 - - name: Build and export - uses: docker/build-push-action@v2 + - name: Set up Python 3.9 + uses: actions/setup-python@v2 with: - context: . - file: dockerfiles/pytorch/gpu/Dockerfile - tags: starlette-transformers:gpu - outputs: type=oci,dest=/tmp/starlette-transformers-gpu.tar - - name: Upload starlette-gpu image as artifact - uses: actions/upload-artifact@v2 - with: - name: starlette-transformers:gpu - path: /tmp/starlette-transformers-gpu.tar - pytorch-integration-test-gpu: + python-version: 3.9 + - name: Install Python dependencies + run: pip install -e .[test,dev,torch] + - name: Build Docker + run: docker build -t starlette-transformers:gpu -f dockerfiles/pytorch/gpu/Dockerfile . + - name: Run Integration Tests + run: RUN_SLOW=True make integ-test + tensorflow-integration-test: + needs: + - pytorch-integration-test runs-on: [single-gpu, nvidia-gpu, t4, ci] env: AWS_REGION: us-east-1 steps: - - name: Download artifacts (Docker images) from previous workflows - uses: actions/download-artifact@v2 - - name: Load Docker images from previous workflows - run: | - docker load --input /tmp/starlette-transformers-gpu.tar - - run: docker image ls + - name: Checkout + uses: actions/checkout@v2 + - name: Set up Python 3.9 + uses: actions/setup-python@v2 + with: + python-version: 3.9 + - name: Uninstall pytorch + run: pip uninstall torch torchvision -y + - name: Install Python dependencies + run: pip install -e .[test,dev,tensorflow] + - name: Build Docker + run: docker build -t starlette-transformers:gpu -f dockerfiles/tensorflow/gpu/Dockerfile . + - name: Run Integration Tests + run: RUN_SLOW=True make integ-test \ No newline at end of file diff --git a/.github/workflows/gpu-integration-2.0.yaml b/.github/workflows/gpu-integration-2.0.yaml index fce7f9a6..da4f50cc 100644 --- a/.github/workflows/gpu-integration-2.0.yaml +++ b/.github/workflows/gpu-integration-2.0.yaml @@ -1,164 +1,45 @@ -name: GPU integrationt ests +name: GPU - Run Integration Tests on: + push: + branches: + - main + pull_request: workflow_dispatch: -env: - HF_HOME: /mnt/cache +concurrency: + group: ${{ github.workflow }}-${{ github.head_ref || github.run_id }} + cancel-in-progress: true -jobs: - setup: - name: Setup - strategy: - matrix: - machine_type: [single-gpu, multi-gpu] - runs-on: ['${{ matrix.machine_type }}', nvidia-gpu, t4, daily-ci] - container: - image: huggingface/transformers-all-latest-gpu - options: --gpus 0 --shm-size "16gb" --ipc host -v /mnt/cache/.cache/huggingface:/mnt/cache/ - outputs: - folder_slices: ${{ steps.set-matrix.outputs.folder_slices }} - slice_ids: ${{ steps.set-matrix.outputs.slice_ids }} - steps: - - name: Update clone - working-directory: /transformers - run: | - git fetch && git checkout ${{ github.sha }} - - - name: Cleanup - working-directory: /transformers - run: | - rm -rf tests/__pycache__ - rm -rf tests/models/__pycache__ - rm -rf reports - - name: Show installed libraries and their versions - working-directory: /transformers - run: pip freeze - - - id: set-matrix - name: Identify models to test - working-directory: /transformers/tests - run: | - echo "folder_slices=$(python3 ../utils/split_model_tests.py --num_splits ${{ env.NUM_SLICES }})" >> $GITHUB_OUTPUT - echo "slice_ids=$(python3 -c 'd = list(range(${{ env.NUM_SLICES }})); print(d)')" >> $GITHUB_OUTPUT - - - name: NVIDIA-SMI - run: | - nvidia-smi - - run_tests_gpu: - name: " " - needs: setup - strategy: - fail-fast: false - matrix: - machine_type: [single-gpu, multi-gpu] - slice_id: ${{ fromJSON(needs.setup.outputs.slice_ids) }} - uses: ./.github/workflows/model_jobs.yml - with: - folder_slices: ${{ needs.setup.outputs.folder_slices }} - machine_type: ${{ matrix.machine_type }} - slice_id: ${{ matrix.slice_id }} - secrets: inherit - - run_examples_gpu: - name: Examples directory - strategy: - fail-fast: false - matrix: - machine_type: [single-gpu] - runs-on: ['${{ matrix.machine_type }}', nvidia-gpu, t4, daily-ci] - container: - image: huggingface/transformers-all-latest-gpu - options: --gpus 0 --shm-size "16gb" --ipc host -v /mnt/cache/.cache/huggingface:/mnt/cache/ - needs: setup +jobs: + pytorch-build-image-gpu: + runs-on: [single-gpu, nvidia-gpu, t4, ci] + env: + AWS_REGION: us-east-1 steps: - - name: Update clone - working-directory: /transformers - run: git fetch && git checkout ${{ github.sha }} - - - name: Reinstall transformers in edit mode (remove the one installed during docker image build) - working-directory: /transformers - run: python3 -m pip uninstall -y transformers && python3 -m pip install -e . - - - name: NVIDIA-SMI - run: | - nvidia-smi - - - name: Environment - working-directory: /transformers - run: | - python3 utils/print_env.py - - - name: Show installed libraries and their versions - working-directory: /transformers - run: pip freeze - - - name: Run examples tests on GPU - working-directory: /transformers - run: | - pip install -r examples/pytorch/_tests_requirements.txt - python3 -m pytest -v --make-reports=${{ matrix.machine_type }}_examples_gpu examples/pytorch - - - name: Failure short reports - if: ${{ failure() }} - continue-on-error: true - run: cat /transformers/reports/${{ matrix.machine_type }}_examples_gpu/failures_short.txt - - - name: "Test suite reports artifacts: ${{ matrix.machine_type }}_run_examples_gpu" - if: ${{ always() }} - uses: actions/upload-artifact@v3 + - name: Checkout + uses: actions/checkout@v2 + - name: Build and export + uses: docker/build-push-action@v2 with: - name: ${{ matrix.machine_type }}_run_examples_gpu - path: /transformers/reports/${{ matrix.machine_type }}_examples_gpu - - run_pipelines_torch_gpu: - name: PyTorch pipelines - strategy: - fail-fast: false - matrix: - machine_type: [single-gpu, multi-gpu] - runs-on: ['${{ matrix.machine_type }}', nvidia-gpu, t4, daily-ci] - container: - image: huggingface/transformers-pytorch-gpu - options: --gpus all --shm-size "16gb" --ipc host -v /mnt/cache/.cache/huggingface:/mnt/cache/ - needs: setup + context: . + file: dockerfiles/pytorch/gpu/Dockerfile + tags: starlette-transformers:gpu + outputs: type=oci,dest=/tmp/starlette-transformers-gpu.tar + - name: Upload starlette-gpu image as artifact + uses: actions/upload-artifact@v2 + with: + name: starlette-transformers:gpu + path: /tmp/starlette-transformers-gpu.tar + pytorch-integration-test-gpu: + runs-on: [single-gpu, nvidia-gpu, t4, ci] + env: + AWS_REGION: us-east-1 steps: - - name: Update clone - working-directory: /transformers - run: git fetch && git checkout ${{ github.sha }} - - - name: Reinstall transformers in edit mode (remove the one installed during docker image build) - working-directory: /transformers - run: python3 -m pip uninstall -y transformers && python3 -m pip install -e . - - - name: NVIDIA-SMI + - name: Download artifacts (Docker images) from previous workflows + uses: actions/download-artifact@v2 + - name: Load Docker images from previous workflows run: | - nvidia-smi - - - name: Environment - working-directory: /transformers - run: | - python3 utils/print_env.py - - - name: Show installed libraries and their versions - working-directory: /transformers - run: pip freeze - - - name: Run all pipeline tests on GPU - working-directory: /transformers - run: | - python3 -m pytest -n 1 -v --dist=loadfile --make-reports=${{ matrix.machine_type }}_tests_torch_pipeline_gpu tests/pipelines - - - name: Failure short reports - if: ${{ failure() }} - continue-on-error: true - run: cat /transformers/reports/${{ matrix.machine_type }}_tests_torch_pipeline_gpu/failures_short.txt - - - name: "Test suite reports artifacts: ${{ matrix.machine_type }}_run_tests_torch_pipeline_gpu" - if: ${{ always() }} - uses: actions/upload-artifact@v3 - with: - name: ${{ matrix.machine_type }}_run_tests_torch_pipeline_gpu - path: /transformers/reports/${{ matrix.machine_type }}_tests_torch_pipeline_gpu \ No newline at end of file + docker load --input /tmp/starlette-transformers-gpu.tar + - run: docker image ls \ No newline at end of file From 70d6003ca8e62d63096ce1f7c234670473be85a2 Mon Sep 17 00:00:00 2001 From: Rafael Pierre Date: Wed, 14 Feb 2024 11:01:24 +0000 Subject: [PATCH 009/173] fix --- .github/workflows/gpu-integration-2.0.yaml | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/.github/workflows/gpu-integration-2.0.yaml b/.github/workflows/gpu-integration-2.0.yaml index da4f50cc..64e6543b 100644 --- a/.github/workflows/gpu-integration-2.0.yaml +++ b/.github/workflows/gpu-integration-2.0.yaml @@ -39,7 +39,7 @@ jobs: steps: - name: Download artifacts (Docker images) from previous workflows uses: actions/download-artifact@v2 - - name: Load Docker images from previous workflows - run: | - docker load --input /tmp/starlette-transformers-gpu.tar - - run: docker image ls \ No newline at end of file + - name: Load Docker images from previous workflows + run: | + docker load --input /tmp/starlette-transformers-gpu.tar + - run: docker image ls \ No newline at end of file From b1cc6a2a4e2a427ebbfa9a8c5ab041fb3ebbde52 Mon Sep 17 00:00:00 2001 From: Rafael Pierre Date: Wed, 14 Feb 2024 11:02:16 +0000 Subject: [PATCH 010/173] indent --- .github/workflows/gpu-integration-2.0.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/gpu-integration-2.0.yaml b/.github/workflows/gpu-integration-2.0.yaml index 64e6543b..01dffa08 100644 --- a/.github/workflows/gpu-integration-2.0.yaml +++ b/.github/workflows/gpu-integration-2.0.yaml @@ -38,7 +38,7 @@ jobs: AWS_REGION: us-east-1 steps: - name: Download artifacts (Docker images) from previous workflows - uses: actions/download-artifact@v2 + uses: actions/download-artifact@v2 - name: Load Docker images from previous workflows run: | docker load --input /tmp/starlette-transformers-gpu.tar From c728190b84ab9471371660fb2b3d6f0587c29f11 Mon Sep 17 00:00:00 2001 From: Rafael Pierre Date: Wed, 14 Feb 2024 11:03:24 +0000 Subject: [PATCH 011/173] docker buildx --- .github/workflows/gpu-integration-2.0.yaml | 2 ++ 1 file changed, 2 insertions(+) diff --git a/.github/workflows/gpu-integration-2.0.yaml b/.github/workflows/gpu-integration-2.0.yaml index 01dffa08..8dfcd74d 100644 --- a/.github/workflows/gpu-integration-2.0.yaml +++ b/.github/workflows/gpu-integration-2.0.yaml @@ -20,6 +20,8 @@ jobs: steps: - name: Checkout uses: actions/checkout@v2 + - name: Set up Docker Buildx + uses: docker/setup-buildx-action@v1 - name: Build and export uses: docker/build-push-action@v2 with: From b5ba045ffcf4bd2d9378bd912bb3a5fad6cb7b16 Mon Sep 17 00:00:00 2001 From: Rafael Pierre Date: Wed, 14 Feb 2024 11:04:46 +0000 Subject: [PATCH 012/173] depends on --- .github/workflows/gpu-integration-2.0.yaml | 1 + 1 file changed, 1 insertion(+) diff --git a/.github/workflows/gpu-integration-2.0.yaml b/.github/workflows/gpu-integration-2.0.yaml index 8dfcd74d..ee9b5c02 100644 --- a/.github/workflows/gpu-integration-2.0.yaml +++ b/.github/workflows/gpu-integration-2.0.yaml @@ -35,6 +35,7 @@ jobs: name: starlette-transformers:gpu path: /tmp/starlette-transformers-gpu.tar pytorch-integration-test-gpu: + needs: pytorch-build-image-gpu runs-on: [single-gpu, nvidia-gpu, t4, ci] env: AWS_REGION: us-east-1 From 10b62b7c81d0969908f05cce6ab7bd3b64d20e5c Mon Sep 17 00:00:00 2001 From: Rafael Pierre Date: Wed, 14 Feb 2024 11:08:04 +0000 Subject: [PATCH 013/173] name --- .github/workflows/gpu-integration-2.0.yaml | 6 +----- 1 file changed, 1 insertion(+), 5 deletions(-) diff --git a/.github/workflows/gpu-integration-2.0.yaml b/.github/workflows/gpu-integration-2.0.yaml index ee9b5c02..bbb5d4c7 100644 --- a/.github/workflows/gpu-integration-2.0.yaml +++ b/.github/workflows/gpu-integration-2.0.yaml @@ -1,10 +1,6 @@ -name: GPU - Run Integration Tests +name: GPU - Run Integration Tests 2.0 on: - push: - branches: - - main - pull_request: workflow_dispatch: concurrency: From 383dab3085745e2517a60e95b1541fd490c933d4 Mon Sep 17 00:00:00 2001 From: Rafael Pierre Date: Wed, 14 Feb 2024 11:10:39 +0000 Subject: [PATCH 014/173] indent --- .github/workflows/gpu-integration-2.0.yaml | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/.github/workflows/gpu-integration-2.0.yaml b/.github/workflows/gpu-integration-2.0.yaml index bbb5d4c7..d3f28f13 100644 --- a/.github/workflows/gpu-integration-2.0.yaml +++ b/.github/workflows/gpu-integration-2.0.yaml @@ -36,9 +36,9 @@ jobs: env: AWS_REGION: us-east-1 steps: - - name: Download artifacts (Docker images) from previous workflows - uses: actions/download-artifact@v2 - - name: Load Docker images from previous workflows - run: | - docker load --input /tmp/starlette-transformers-gpu.tar - - run: docker image ls \ No newline at end of file + - name: Download artifacts (Docker images) from previous workflows + uses: actions/download-artifact@v2 + - name: Load Docker images from previous workflows + run: | + docker load --input /tmp/starlette-transformers-gpu.tar + - run: docker image ls \ No newline at end of file From 0d162de206a12601335f7623630b26bbd950f961 Mon Sep 17 00:00:00 2001 From: Rafael Pierre Date: Wed, 14 Feb 2024 11:12:20 +0000 Subject: [PATCH 015/173] name --- .../workflows/{gpu-integration-2.0.yaml => gpu-integ-new.yaml} | 0 1 file changed, 0 insertions(+), 0 deletions(-) rename .github/workflows/{gpu-integration-2.0.yaml => gpu-integ-new.yaml} (100%) diff --git a/.github/workflows/gpu-integration-2.0.yaml b/.github/workflows/gpu-integ-new.yaml similarity index 100% rename from .github/workflows/gpu-integration-2.0.yaml rename to .github/workflows/gpu-integ-new.yaml From b22370bf7daf1982c0d7185627228ede8d743289 Mon Sep 17 00:00:00 2001 From: Rafael Pierre Date: Wed, 14 Feb 2024 11:14:39 +0000 Subject: [PATCH 016/173] trigger --- .github/workflows/gpu-integ-new.yaml | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/.github/workflows/gpu-integ-new.yaml b/.github/workflows/gpu-integ-new.yaml index d3f28f13..16ed4077 100644 --- a/.github/workflows/gpu-integ-new.yaml +++ b/.github/workflows/gpu-integ-new.yaml @@ -1,6 +1,10 @@ name: GPU - Run Integration Tests 2.0 on: + push: + branches: + - main + pull_request: workflow_dispatch: concurrency: From db90673603df9121e9c8791d62aa148618cf6801 Mon Sep 17 00:00:00 2001 From: Rafael Pierre Date: Wed, 14 Feb 2024 11:26:00 +0000 Subject: [PATCH 017/173] colon --- .github/workflows/build-container.yaml | 14 +++++++------- .github/workflows/gpu-integ-new.yaml | 2 +- .github/workflows/gpu-integ-test.yaml | 8 ++++---- .github/workflows/integ-test.yaml | 8 ++++---- .github/workflows/quality.yaml | 8 ++++---- .github/workflows/unit-test.yaml | 8 ++++---- 6 files changed, 24 insertions(+), 24 deletions(-) diff --git a/.github/workflows/build-container.yaml b/.github/workflows/build-container.yaml index 031207c0..24ffdab5 100644 --- a/.github/workflows/build-container.yaml +++ b/.github/workflows/build-container.yaml @@ -1,13 +1,13 @@ name: "Build applications images" on: - push: - branches: - - main - paths: - - "src/**" - - "dockerfiles/**" - - "scripts/**" + #push: + # branches: + # - main + # paths: + # - "src/**" + # - "dockerfiles/**" + # - "scripts/**" workflow_dispatch: concurrency: diff --git a/.github/workflows/gpu-integ-new.yaml b/.github/workflows/gpu-integ-new.yaml index 16ed4077..9a996b83 100644 --- a/.github/workflows/gpu-integ-new.yaml +++ b/.github/workflows/gpu-integ-new.yaml @@ -32,7 +32,7 @@ jobs: - name: Upload starlette-gpu image as artifact uses: actions/upload-artifact@v2 with: - name: starlette-transformers:gpu + name: starlette-transformers-gpu path: /tmp/starlette-transformers-gpu.tar pytorch-integration-test-gpu: needs: pytorch-build-image-gpu diff --git a/.github/workflows/gpu-integ-test.yaml b/.github/workflows/gpu-integ-test.yaml index 058d6ad9..036cdefc 100644 --- a/.github/workflows/gpu-integ-test.yaml +++ b/.github/workflows/gpu-integ-test.yaml @@ -1,10 +1,10 @@ name: GPU - Run Integration Tests on: - push: - branches: - - main - pull_request: + #push: + # branches: + # - main + #pull_request: workflow_dispatch: concurrency: diff --git a/.github/workflows/integ-test.yaml b/.github/workflows/integ-test.yaml index f6f6bba0..97546f5b 100644 --- a/.github/workflows/integ-test.yaml +++ b/.github/workflows/integ-test.yaml @@ -1,10 +1,10 @@ name: CPU - Run Integration Tests on: - push: - branches: - - main - pull_request: + #push: + # branches: + # - main + #pull_request: workflow_dispatch: concurrency: diff --git a/.github/workflows/quality.yaml b/.github/workflows/quality.yaml index 6c7e6c57..b393d203 100644 --- a/.github/workflows/quality.yaml +++ b/.github/workflows/quality.yaml @@ -1,10 +1,10 @@ name: Quality Check on: - push: - branches: - - main - pull_request: + #push: + # branches: + # - main + #pull_request: workflow_dispatch: concurrency: diff --git a/.github/workflows/unit-test.yaml b/.github/workflows/unit-test.yaml index 599b8f7f..7a344a53 100644 --- a/.github/workflows/unit-test.yaml +++ b/.github/workflows/unit-test.yaml @@ -1,10 +1,10 @@ name: Run Unit-Tests on: - push: - branches: - - main - pull_request: + #push: + # branches: + # - main + #pull_request: workflow_dispatch: concurrency: From 90875ba2dc5d3d1efd7c1d33ea6521553dde1e6e Mon Sep 17 00:00:00 2001 From: Rafael Pierre Date: Wed, 14 Feb 2024 11:43:42 +0000 Subject: [PATCH 018/173] v4 --- .github/workflows/gpu-integ-new.yaml | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/.github/workflows/gpu-integ-new.yaml b/.github/workflows/gpu-integ-new.yaml index 9a996b83..30f3fd0a 100644 --- a/.github/workflows/gpu-integ-new.yaml +++ b/.github/workflows/gpu-integ-new.yaml @@ -30,7 +30,9 @@ jobs: tags: starlette-transformers:gpu outputs: type=oci,dest=/tmp/starlette-transformers-gpu.tar - name: Upload starlette-gpu image as artifact - uses: actions/upload-artifact@v2 + env: + ACTIONS_STEP_DEBUG: true + uses: actions/upload-artifact@v4 with: name: starlette-transformers-gpu path: /tmp/starlette-transformers-gpu.tar @@ -39,6 +41,7 @@ jobs: runs-on: [single-gpu, nvidia-gpu, t4, ci] env: AWS_REGION: us-east-1 + ACTIONS_STEP_DEBUG: true steps: - name: Download artifacts (Docker images) from previous workflows uses: actions/download-artifact@v2 From 9066cc8129af5ba191aaf29a8e684c4554f46427 Mon Sep 17 00:00:00 2001 From: Rafael Pierre Date: Wed, 14 Feb 2024 12:01:43 +0000 Subject: [PATCH 019/173] download --- .github/workflows/gpu-integ-new.yaml | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/.github/workflows/gpu-integ-new.yaml b/.github/workflows/gpu-integ-new.yaml index 30f3fd0a..0c419557 100644 --- a/.github/workflows/gpu-integ-new.yaml +++ b/.github/workflows/gpu-integ-new.yaml @@ -44,7 +44,10 @@ jobs: ACTIONS_STEP_DEBUG: true steps: - name: Download artifacts (Docker images) from previous workflows - uses: actions/download-artifact@v2 + uses: actions/download-artifact@v4 + with: + name: starlette-transformers-gpu + path: /tmp/starlette-transformers-gpu.tar - name: Load Docker images from previous workflows run: | docker load --input /tmp/starlette-transformers-gpu.tar From 6fa3cb03caa62155f9b731a6775be7e49c69e2a2 Mon Sep 17 00:00:00 2001 From: Rafael Pierre Date: Wed, 14 Feb 2024 13:06:37 +0000 Subject: [PATCH 020/173] ls --- .github/workflows/gpu-integ-new.yaml | 2 ++ 1 file changed, 2 insertions(+) diff --git a/.github/workflows/gpu-integ-new.yaml b/.github/workflows/gpu-integ-new.yaml index 0c419557..ded26a36 100644 --- a/.github/workflows/gpu-integ-new.yaml +++ b/.github/workflows/gpu-integ-new.yaml @@ -43,6 +43,8 @@ jobs: AWS_REGION: us-east-1 ACTIONS_STEP_DEBUG: true steps: + - run: | + ls -all /tmp/starlette-transformers-gpu.tar - name: Download artifacts (Docker images) from previous workflows uses: actions/download-artifact@v4 with: From 667299a0917daf9e5db2d69231cb821efcc4c783 Mon Sep 17 00:00:00 2001 From: Rafael Pierre Date: Wed, 14 Feb 2024 13:12:34 +0000 Subject: [PATCH 021/173] indent --- .github/workflows/gpu-integ-new.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/gpu-integ-new.yaml b/.github/workflows/gpu-integ-new.yaml index ded26a36..4de779a3 100644 --- a/.github/workflows/gpu-integ-new.yaml +++ b/.github/workflows/gpu-integ-new.yaml @@ -44,7 +44,7 @@ jobs: ACTIONS_STEP_DEBUG: true steps: - run: | - ls -all /tmp/starlette-transformers-gpu.tar + ls -all /tmp/starlette-transformers-gpu.tar - name: Download artifacts (Docker images) from previous workflows uses: actions/download-artifact@v4 with: From e64a76a6f275ab36054b014120149846c09f920a Mon Sep 17 00:00:00 2001 From: Rafael Pierre Date: Wed, 14 Feb 2024 13:28:23 +0000 Subject: [PATCH 022/173] cache --- .github/workflows/gpu-integ-new.yaml | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/.github/workflows/gpu-integ-new.yaml b/.github/workflows/gpu-integ-new.yaml index 4de779a3..6ecd6f85 100644 --- a/.github/workflows/gpu-integ-new.yaml +++ b/.github/workflows/gpu-integ-new.yaml @@ -23,12 +23,14 @@ jobs: - name: Set up Docker Buildx uses: docker/setup-buildx-action@v1 - name: Build and export - uses: docker/build-push-action@v2 + uses: docker/build-push-action@v4 with: context: . file: dockerfiles/pytorch/gpu/Dockerfile tags: starlette-transformers:gpu outputs: type=oci,dest=/tmp/starlette-transformers-gpu.tar + cache-from: type=local,src=/tmp/.buildx-cache + cache-to: type=local,dest=/tmp/.buildx-cache-new,mode=max - name: Upload starlette-gpu image as artifact env: ACTIONS_STEP_DEBUG: true @@ -43,13 +45,13 @@ jobs: AWS_REGION: us-east-1 ACTIONS_STEP_DEBUG: true steps: - - run: | - ls -all /tmp/starlette-transformers-gpu.tar - name: Download artifacts (Docker images) from previous workflows - uses: actions/download-artifact@v4 + uses: actions/download-artifact@v3 with: name: starlette-transformers-gpu path: /tmp/starlette-transformers-gpu.tar + - run: | + ls -all /tmp/starlette-transformers-gpu.tar - name: Load Docker images from previous workflows run: | docker load --input /tmp/starlette-transformers-gpu.tar From 6b6b33c6e3224851a5af513f5cce935316b19dd9 Mon Sep 17 00:00:00 2001 From: Rafael Pierre Date: Wed, 14 Feb 2024 13:51:02 +0000 Subject: [PATCH 023/173] v4 --- .github/workflows/gpu-integ-new.yaml | 2 +- dockerfiles/pytorch/gpu/Dockerfile | 18 ++++++++++++++---- 2 files changed, 15 insertions(+), 5 deletions(-) diff --git a/.github/workflows/gpu-integ-new.yaml b/.github/workflows/gpu-integ-new.yaml index 6ecd6f85..d26105a2 100644 --- a/.github/workflows/gpu-integ-new.yaml +++ b/.github/workflows/gpu-integ-new.yaml @@ -46,7 +46,7 @@ jobs: ACTIONS_STEP_DEBUG: true steps: - name: Download artifacts (Docker images) from previous workflows - uses: actions/download-artifact@v3 + uses: actions/download-artifact@v4 with: name: starlette-transformers-gpu path: /tmp/starlette-transformers-gpu.tar diff --git a/dockerfiles/pytorch/gpu/Dockerfile b/dockerfiles/pytorch/gpu/Dockerfile index c22c06ea..c2fe400a 100644 --- a/dockerfiles/pytorch/gpu/Dockerfile +++ b/dockerfiles/pytorch/gpu/Dockerfile @@ -1,4 +1,11 @@ -FROM nvidia/cuda:12.3.1-devel-ubuntu22.04 +FROM nvidia/cuda:12.3.1-devel-ubuntu22.04 as base + +WORKDIR /app + +ARG PYTHON_WHEEL_PATH=/var/cache/python/wheels +RUN mkdir -p ${PYTHON_WHEEL_PATH} + +FROM base as builder LABEL maintainer="Hugging Face" @@ -26,11 +33,14 @@ RUN apt-get update \ && apt-get clean autoremove --yes \ && rm -rf /var/lib/{apt,dpkg,cache,log} -WORKDIR /app - # install dependencies COPY dockerfiles/pytorch/gpu/requirements.txt /tmp/requirements.txt -RUN pip install --no-cache-dir -r /tmp/requirements.txt && rm /tmp/requirements.txt +RUN pip install \ + --no-cache-dir \ + --no-index \ + --find-links=${PYTHON_WHEEL_PATH} + -r /tmp/requirements.txt && \ + rm /tmp/requirements.txt # install huggingface inference toolkit COPY requirements.txt /tmp/requirements.txt From 9c223fea66b7805d359fdc199ade49336428a28a Mon Sep 17 00:00:00 2001 From: Rafael Pierre Date: Wed, 14 Feb 2024 13:55:12 +0000 Subject: [PATCH 024/173] revert --- dockerfiles/pytorch/gpu/Dockerfile | 20 +++++--------------- 1 file changed, 5 insertions(+), 15 deletions(-) diff --git a/dockerfiles/pytorch/gpu/Dockerfile b/dockerfiles/pytorch/gpu/Dockerfile index c2fe400a..f2918636 100644 --- a/dockerfiles/pytorch/gpu/Dockerfile +++ b/dockerfiles/pytorch/gpu/Dockerfile @@ -1,11 +1,4 @@ -FROM nvidia/cuda:12.3.1-devel-ubuntu22.04 as base - -WORKDIR /app - -ARG PYTHON_WHEEL_PATH=/var/cache/python/wheels -RUN mkdir -p ${PYTHON_WHEEL_PATH} - -FROM base as builder +FROM nvidia/cuda:12.3.1-devel-ubuntu22.04 LABEL maintainer="Hugging Face" @@ -33,14 +26,11 @@ RUN apt-get update \ && apt-get clean autoremove --yes \ && rm -rf /var/lib/{apt,dpkg,cache,log} +WORKDIR /app + # install dependencies COPY dockerfiles/pytorch/gpu/requirements.txt /tmp/requirements.txt -RUN pip install \ - --no-cache-dir \ - --no-index \ - --find-links=${PYTHON_WHEEL_PATH} - -r /tmp/requirements.txt && \ - rm /tmp/requirements.txt +RUN pip install --no-cache-dir -r /tmp/requirements.txt && rm /tmp/requirements.txt # install huggingface inference toolkit COPY requirements.txt /tmp/requirements.txt @@ -55,4 +45,4 @@ COPY scripts/entrypoint.sh entrypoint.sh RUN chmod +x entrypoint.sh # run app -ENTRYPOINT ["/bin/bash", "entrypoint.sh"] +ENTRYPOINT ["/bin/bash", "entrypoint.sh"] \ No newline at end of file From 6036a44a8ac391af5e7afed76c2224a97f6f956f Mon Sep 17 00:00:00 2001 From: Rafael Pierre Date: Wed, 14 Feb 2024 15:03:57 +0000 Subject: [PATCH 025/173] path --- .github/workflows/gpu-integ-new.yaml | 14 +++++++------- makefile | 6 +++--- 2 files changed, 10 insertions(+), 10 deletions(-) diff --git a/.github/workflows/gpu-integ-new.yaml b/.github/workflows/gpu-integ-new.yaml index d26105a2..6f3fc9db 100644 --- a/.github/workflows/gpu-integ-new.yaml +++ b/.github/workflows/gpu-integ-new.yaml @@ -21,14 +21,14 @@ jobs: - name: Checkout uses: actions/checkout@v2 - name: Set up Docker Buildx - uses: docker/setup-buildx-action@v1 + uses: docker/setup-buildx-action@v3 - name: Build and export - uses: docker/build-push-action@v4 + uses: docker/build-push-action@v5 with: context: . file: dockerfiles/pytorch/gpu/Dockerfile tags: starlette-transformers:gpu - outputs: type=oci,dest=/tmp/starlette-transformers-gpu.tar + outputs: type=docker,dest=/tmp/starlette-transformers-gpu.tar cache-from: type=local,src=/tmp/.buildx-cache cache-to: type=local,dest=/tmp/.buildx-cache-new,mode=max - name: Upload starlette-gpu image as artifact @@ -37,7 +37,7 @@ jobs: uses: actions/upload-artifact@v4 with: name: starlette-transformers-gpu - path: /tmp/starlette-transformers-gpu.tar + path: /tmp/ pytorch-integration-test-gpu: needs: pytorch-build-image-gpu runs-on: [single-gpu, nvidia-gpu, t4, ci] @@ -49,10 +49,10 @@ jobs: uses: actions/download-artifact@v4 with: name: starlette-transformers-gpu - path: /tmp/starlette-transformers-gpu.tar + path: /tmp - run: | - ls -all /tmp/starlette-transformers-gpu.tar + ls /tmp/ *.tar - name: Load Docker images from previous workflows run: | docker load --input /tmp/starlette-transformers-gpu.tar - - run: docker image ls \ No newline at end of file + - run: docker image ls -a \ No newline at end of file diff --git a/makefile b/makefile index beaae9d8..fa4bdb82 100644 --- a/makefile +++ b/makefile @@ -20,10 +20,10 @@ quality: style: ruff $(check_dirs) --fix -build-torch-gpu: - docker build -f dockerfiles/pytorch/gpu/Dockerfile -t starlette-transformers:gpu . +torch-gpu: + docker build --no-cache -f dockerfiles/pytorch/gpu/Dockerfile -t starlette-transformers:gpu . -build-torch-cpu: +torch-cpu: docker build -f dockerfiles/pytorch/cpu/Dockerfile -t starlette-transformers:cpu . run-classification: From 3731fd41c433630159c3dafb7fe2c06d4372aaf7 Mon Sep 17 00:00:00 2001 From: Rafael Pierre Date: Wed, 14 Feb 2024 15:20:24 +0000 Subject: [PATCH 026/173] slash --- .github/workflows/gpu-integ-new.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/gpu-integ-new.yaml b/.github/workflows/gpu-integ-new.yaml index 6f3fc9db..280bd3a2 100644 --- a/.github/workflows/gpu-integ-new.yaml +++ b/.github/workflows/gpu-integ-new.yaml @@ -37,7 +37,7 @@ jobs: uses: actions/upload-artifact@v4 with: name: starlette-transformers-gpu - path: /tmp/ + path: /tmp pytorch-integration-test-gpu: needs: pytorch-build-image-gpu runs-on: [single-gpu, nvidia-gpu, t4, ci] From ecde720675a8249158275c7ec142dc813ee85ae9 Mon Sep 17 00:00:00 2001 From: Rafael Pierre Date: Wed, 14 Feb 2024 15:23:39 +0000 Subject: [PATCH 027/173] tar --- .github/workflows/gpu-integ-new.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/gpu-integ-new.yaml b/.github/workflows/gpu-integ-new.yaml index 280bd3a2..f09356c7 100644 --- a/.github/workflows/gpu-integ-new.yaml +++ b/.github/workflows/gpu-integ-new.yaml @@ -37,7 +37,7 @@ jobs: uses: actions/upload-artifact@v4 with: name: starlette-transformers-gpu - path: /tmp + path: /tmp/starlette-transformers-gpu.tar pytorch-integration-test-gpu: needs: pytorch-build-image-gpu runs-on: [single-gpu, nvidia-gpu, t4, ci] From 85a2996df74d184fd37be05aad35418b28183afe Mon Sep 17 00:00:00 2001 From: Rafael Pierre Date: Wed, 14 Feb 2024 15:41:25 +0000 Subject: [PATCH 028/173] path --- .github/workflows/gpu-integ-new.yaml | 2 -- 1 file changed, 2 deletions(-) diff --git a/.github/workflows/gpu-integ-new.yaml b/.github/workflows/gpu-integ-new.yaml index f09356c7..51ab33ee 100644 --- a/.github/workflows/gpu-integ-new.yaml +++ b/.github/workflows/gpu-integ-new.yaml @@ -50,8 +50,6 @@ jobs: with: name: starlette-transformers-gpu path: /tmp - - run: | - ls /tmp/ *.tar - name: Load Docker images from previous workflows run: | docker load --input /tmp/starlette-transformers-gpu.tar From cf2b0ae33e3f53bfa605364655a62bc6c147ca5d Mon Sep 17 00:00:00 2001 From: Rafael Pierre Date: Thu, 15 Feb 2024 12:12:27 +0000 Subject: [PATCH 029/173] reduce image size --- dockerfiles/pytorch/gpu/Dockerfile | 41 ++- dockerfiles/pytorch/gpu/requirements.txt | 3 + makefile | 2 +- pyproject.toml | 6 +- scripts/entrypoint.sh | 2 +- setup.py | 2 +- tests/integ/test_container_new.py | 399 +++++++++++++++++++++++ tox.ini | 0 8 files changed, 441 insertions(+), 14 deletions(-) create mode 100644 tests/integ/test_container_new.py create mode 100644 tox.ini diff --git a/dockerfiles/pytorch/gpu/Dockerfile b/dockerfiles/pytorch/gpu/Dockerfile index f2918636..52af380d 100644 --- a/dockerfiles/pytorch/gpu/Dockerfile +++ b/dockerfiles/pytorch/gpu/Dockerfile @@ -1,9 +1,12 @@ -FROM nvidia/cuda:12.3.1-devel-ubuntu22.04 +FROM nvidia/cuda:12.3.1-devel-ubuntu22.04 as builder +SHELL ["/bin/bash", "-c"] LABEL maintainer="Hugging Face" ENV DEBIAN_FRONTEND=noninteractive +WORKDIR /build + RUN apt-get update \ && apt-get -y upgrade --only-upgrade systemd openssl cryptsetup \ && apt-get install -y \ @@ -20,21 +23,44 @@ RUN apt-get update \ protobuf-compiler \ python3 \ python3-pip \ + python3.10-venv \ # audio libsndfile1-dev \ ffmpeg \ && apt-get clean autoremove --yes \ && rm -rf /var/lib/{apt,dpkg,cache,log} +# install dependencies +COPY dockerfiles/pytorch/gpu/requirements.txt requirements-docker.txt +COPY requirements.txt requirements-toolkit.txt + +# install wheel and setuptools +RUN pip install -U pip && \ + pip download --dest ./wheels -r requirements-docker.txt && \ + pip download --dest ./wheels -r requirements-toolkit.txt && \ + pip install --no-index --find-links=./wheels -r requirements-docker.txt && \ + pip install --no-index --find-links=./wheels -r requirements-toolkit.txt + +### Runner + +FROM nvidia/cuda:12.3.1-base-ubuntu22.04 as runner +SHELL ["/bin/bash", "-c"] + WORKDIR /app +COPY --from=builder /build/wheels /app/wheels + +RUN apt-get update -y && apt-get upgrade -y && \ + apt-get install -y \ + python3 \ + python3-pip # install dependencies -COPY dockerfiles/pytorch/gpu/requirements.txt /tmp/requirements.txt -RUN pip install --no-cache-dir -r /tmp/requirements.txt && rm /tmp/requirements.txt +COPY dockerfiles/pytorch/gpu/requirements.txt requirements-docker.txt +COPY requirements.txt requirements-toolkit.txt -# install huggingface inference toolkit -COPY requirements.txt /tmp/requirements.txt -RUN pip install --no-cache-dir -r /tmp/requirements.txt && rm /tmp/requirements.txt +RUN pip install -U pip && \ + pip install --no-index --find-links=/app/wheels -r requirements-docker.txt && \ + pip install --no-index --find-links=/app/wheels -r requirements-toolkit.txt # copy application COPY src/huggingface_inference_toolkit huggingface_inference_toolkit @@ -44,5 +70,4 @@ COPY src/huggingface_inference_toolkit/webservice_starlette.py webservice_starle COPY scripts/entrypoint.sh entrypoint.sh RUN chmod +x entrypoint.sh -# run app -ENTRYPOINT ["/bin/bash", "entrypoint.sh"] \ No newline at end of file +ENTRYPOINT ["bash", "-c", "./entrypoint.sh"] \ No newline at end of file diff --git a/dockerfiles/pytorch/gpu/requirements.txt b/dockerfiles/pytorch/gpu/requirements.txt index 165f27b8..04d440db 100644 --- a/dockerfiles/pytorch/gpu/requirements.txt +++ b/dockerfiles/pytorch/gpu/requirements.txt @@ -1,3 +1,6 @@ +cmake==3.28.3 +wheel==0.42.0 +setuptools==69.1.0 torch==2.1.2 torchvision==0.16.2 transformers[sklearn,sentencepiece,audio,vision]==4.37.2 diff --git a/makefile b/makefile index fa4bdb82..3f3dbb6c 100644 --- a/makefile +++ b/makefile @@ -21,7 +21,7 @@ style: ruff $(check_dirs) --fix torch-gpu: - docker build --no-cache -f dockerfiles/pytorch/gpu/Dockerfile -t starlette-transformers:gpu . + docker build -f dockerfiles/pytorch/gpu/Dockerfile -t starlette-transformers:gpu . torch-cpu: docker build -f dockerfiles/pytorch/cpu/Dockerfile -t starlette-transformers:cpu . diff --git a/pyproject.toml b/pyproject.toml index 96ef9084..2627f501 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -23,12 +23,12 @@ line-length = 119 # Allow unused variables when underscore-prefixed. dummy-variable-rgx = "^(_+|(_+[a-zA-Z0-9_]*[a-zA-Z0-9]+?))$" -# Assume Python 3.8. -target-version = "py39" +# Assume Python 3.11. +target-version = "py311" [tool.ruff.per-file-ignores] "__init__.py" = ["F401"] [tool.isort] profile = "black" -known_third_party = ["transforemrs", "starlette", "huggingface_hub"] +known_third_party = ["transformers", "starlette", "huggingface_hub"] \ No newline at end of file diff --git a/scripts/entrypoint.sh b/scripts/entrypoint.sh index 53b6e4d0..8544a63c 100644 --- a/scripts/entrypoint.sh +++ b/scripts/entrypoint.sh @@ -10,4 +10,4 @@ if [[ ! -z "${HF_MODEL_DIR}" ]]; then fi # start the server -python3 -m uvicorn webservice_starlette:app --host 0.0.0.0 --port 5000 \ No newline at end of file +uvicorn webservice_starlette:app --host 0.0.0.0 --port 5000 \ No newline at end of file diff --git a/setup.py b/setup.py index 1b567d4b..c4e6dc97 100644 --- a/setup.py +++ b/setup.py @@ -37,7 +37,7 @@ # Hugging Face specific dependencies # framework specific dependencies -extras["torch"] = ["torch>=1.8.0", "torchaudio"] +extras["torch"] = ["torch>=2.1.2", "torchaudio"] extras["tensorflow"] = ["tensorflow==2.9.0"] # test and quality extras["test"] = [ diff --git a/tests/integ/test_container_new.py b/tests/integ/test_container_new.py new file mode 100644 index 00000000..9197d606 --- /dev/null +++ b/tests/integ/test_container_new.py @@ -0,0 +1,399 @@ +import random +import tempfile +import time + +import docker +import pytest +import requests +from docker.client import DockerClient +from huggingface_inference_toolkit.utils import _is_gpu_available, _load_repository_from_hf +from integ.config import task2input, task2model, task2output, task2validation +from transformers.testing_utils import require_torch, slow, require_tf, _run_slow_tests + +IS_GPU = _run_slow_tests +DEVICE = "gpu" if IS_GPU else "cpu" + +client = docker.DockerClient(base_url='unix://var/run/docker.sock') + + +def make_sure_other_containers_are_stopped(client: DockerClient, container_name: str): + try: + previous = client.containers.get(container_name) + previous.stop() + previous.remove() + except Exception: + return None + + +def wait_for_container_to_be_ready(base_url): + t = 0 + while t < 10: + try: + response = requests.get(f"{base_url}/health") + if response.status_code == 200: + break + except Exception: + pass + finally: + t += 1 + time.sleep(2) + return True + + +def verify_task(container: DockerClient, task: str, port: int = 5000, framework: str = "pytorch"): + BASE_URL = f"http://localhost:{port}" + input = task2input[task] + # health check + wait_for_container_to_be_ready(BASE_URL) + if ( + task == "image-classification" + or task == "object-detection" + or task == "image-segmentation" + or task == "zero-shot-image-classification" + ): + prediction = requests.post( + f"{BASE_URL}", data=task2input[task], headers={"content-type": "image/x-image"} + ).json() + elif task == "automatic-speech-recognition" or task == "audio-classification": + prediction = requests.post( + f"{BASE_URL}", data=task2input[task], headers={"content-type": "audio/x-audio"} + ).json() + elif task == "text-to-image": + prediction = requests.post(f"{BASE_URL}", json=input, headers={"accept": "image/png"}).content + else: + prediction = requests.post(f"{BASE_URL}", json=input).json() + assert task2validation[task](result=prediction, snapshot=task2output[task]) is True + + +@require_torch +@pytest.mark.parametrize( + "task", + [ + "text-classification", + "zero-shot-classification", + "ner", + "question-answering", + "fill-mask", + "summarization", + "translation_xx_to_yy", + "text2text-generation", + "text-generation", + "feature-extraction", + "image-classification", + "automatic-speech-recognition", + "audio-classification", + "object-detection", + "image-segmentation", + "table-question-answering", + "conversational", + # TODO currently not supported due to multimodality input + # "visual-question-answering", + # "zero-shot-image-classification", + "sentence-similarity", + "sentence-embeddings", + "sentence-ranking", + # diffusers + "text-to-image", + ], +) +def test_pt_container_remote_model(task) -> None: + container_name = f"integration-test-{task}" + container_image = f"starlette-transformers:{DEVICE}" + framework = "pytorch" + model = task2model[task][framework] + port = random.randint(5000, 6000) + device_request = [docker.types.DeviceRequest(count=-1, capabilities=[["gpu"]])] if IS_GPU else [] + + make_sure_other_containers_are_stopped(client, container_name) + container = client.containers.run( + container_image, + name=container_name, + ports={"5000": port}, + environment={"HF_MODEL_ID": model, "HF_TASK": task}, + detach=True, + # GPU + device_requests=device_request, + ) + # time.sleep(5) + + verify_task(container, task, port) + container.stop() + container.remove() + + +@require_torch +@pytest.mark.parametrize( + "task", + [ + "text-classification", + "zero-shot-classification", + "ner", + "question-answering", + "fill-mask", + "summarization", + "translation_xx_to_yy", + "text2text-generation", + "text-generation", + "feature-extraction", + "image-classification", + "automatic-speech-recognition", + "audio-classification", + "object-detection", + "image-segmentation", + "table-question-answering", + "conversational", + # TODO currently not supported due to multimodality input + # "visual-question-answering", + # "zero-shot-image-classification", + "sentence-similarity", + "sentence-embeddings", + "sentence-ranking", + # diffusers + "text-to-image", + ], +) +def test_pt_container_local_model(task) -> None: + container_name = f"integration-test-{task}" + container_image = f"starlette-transformers:{DEVICE}" + framework = "pytorch" + model = task2model[task][framework] + port = random.randint(5000, 6000) + device_request = [docker.types.DeviceRequest(count=-1, capabilities=[["gpu"]])] if IS_GPU else [] + make_sure_other_containers_are_stopped(client, container_name) + with tempfile.TemporaryDirectory() as tmpdirname: + # https://github.com/huggingface/infinity/blob/test-ovh/test/integ/utils.py + storage_dir = _load_repository_from_hf(model, tmpdirname, framework="pytorch") + container = client.containers.run( + container_image, + name=container_name, + ports={"5000": port}, + environment={"HF_MODEL_DIR": "/opt/huggingface/model", "HF_TASK": task}, + volumes={tmpdirname: {"bind": "/opt/huggingface/model", "mode": "ro"}}, + detach=True, + # GPU + device_requests=device_request, + ) + # time.sleep(5) + verify_task(container, task, port) + container.stop() + container.remove() + + +@require_torch +@pytest.mark.parametrize( + "repository_id", + ["philschmid/custom-handler-test", "philschmid/custom-handler-distilbert"], +) +def test_pt_container_custom_handler(repository_id) -> None: + container_name = "integration-test-custom" + container_image = f"starlette-transformers:{DEVICE}" + device_request = [docker.types.DeviceRequest(count=-1, capabilities=[["gpu"]])] if IS_GPU else [] + port = random.randint(5000, 6000) + + make_sure_other_containers_are_stopped(client, container_name) + with tempfile.TemporaryDirectory() as tmpdirname: + # https://github.com/huggingface/infinity/blob/test-ovh/test/integ/utils.py + storage_dir = _load_repository_from_hf(repository_id, tmpdirname) + container = client.containers.run( + container_image, + name=container_name, + ports={"5000": port}, + environment={ + "HF_MODEL_DIR": tmpdirname, + }, + volumes={tmpdirname: {"bind": tmpdirname, "mode": "ro"}}, + detach=True, + # GPU + device_requests=device_request, + ) + BASE_URL = f"http://localhost:{port}" + wait_for_container_to_be_ready(BASE_URL) + payload = {"inputs": "this is a test"} + prediction = requests.post(f"{BASE_URL}", json=payload).json() + assert prediction == payload + # time.sleep(5) + container.stop() + container.remove() + + +@require_torch +@pytest.mark.parametrize( + "repository_id", + ["philschmid/custom-pipeline-text-classification"], +) +def test_pt_container_legacy_custom_pipeline(repository_id) -> None: + container_name = "integration-test-custom" + container_image = f"starlette-transformers:{DEVICE}" + device_request = [docker.types.DeviceRequest(count=-1, capabilities=[["gpu"]])] if IS_GPU else [] + port = random.randint(5000, 6000) + + make_sure_other_containers_are_stopped(client, container_name) + with tempfile.TemporaryDirectory() as tmpdirname: + # https://github.com/huggingface/infinity/blob/test-ovh/test/integ/utils.py + storage_dir = _load_repository_from_hf(repository_id, tmpdirname) + container = client.containers.run( + container_image, + name=container_name, + ports={"5000": port}, + environment={ + "HF_MODEL_DIR": tmpdirname, + }, + volumes={tmpdirname: {"bind": tmpdirname, "mode": "ro"}}, + detach=True, + # GPU + device_requests=device_request, + ) + BASE_URL = f"http://localhost:{port}" + wait_for_container_to_be_ready(BASE_URL) + payload = {"inputs": "this is a test"} + prediction = requests.post(f"{BASE_URL}", json=payload).json() + assert prediction == payload + # time.sleep(5) + container.stop() + container.remove() + + +@require_tf +@pytest.mark.parametrize( + "task", + [ + "text-classification", + "zero-shot-classification", + "ner", + "question-answering", + "fill-mask", + "summarization", + "translation_xx_to_yy", + "text2text-generation", + "text-generation", + "feature-extraction", + "image-classification", + "automatic-speech-recognition", + "audio-classification", + "object-detection", + "image-segmentation", + "table-question-answering", + "conversational", + # TODO currently not supported due to multimodality input + # "visual-question-answering", + # "zero-shot-image-classification", + "sentence-similarity", + "sentence-embeddings", + "sentence-ranking", + ], +) +def test_tf_container_remote_model(task) -> None: + container_name = f"integration-test-{task}" + container_image = f"starlette-transformers:{DEVICE}" + framework = "tensorflow" + model = task2model[task][framework] + device_request = [docker.types.DeviceRequest(count=-1, capabilities=[["gpu"]])] if IS_GPU else [] + if model is None: + pytest.skip("no supported TF model") + port = random.randint(5000, 6000) + make_sure_other_containers_are_stopped(client, container_name) + container = client.containers.run( + container_image, + name=container_name, + ports={"5000": port}, + environment={"HF_MODEL_ID": model, "HF_TASK": task}, + detach=True, + # GPU + device_requests=device_request, + ) + # time.sleep(5) + verify_task(container, task, port) + container.stop() + container.remove() + + +@require_tf +@pytest.mark.parametrize( + "task", + [ + "text-classification", + "zero-shot-classification", + "ner", + "question-answering", + "fill-mask", + "summarization", + "translation_xx_to_yy", + "text2text-generation", + "text-generation", + "feature-extraction", + "image-classification", + "automatic-speech-recognition", + "audio-classification", + "object-detection", + "image-segmentation", + "table-question-answering", + "conversational", + # TODO currently not supported due to multimodality input + # "visual-question-answering", + # "zero-shot-image-classification", + "sentence-similarity", + "sentence-embeddings", + "sentence-ranking", + ], +) +def test_tf_container_local_model(task) -> None: + container_name = f"integration-test-{task}" + container_image = f"starlette-transformers:{DEVICE}" + framework = "tensorflow" + model = task2model[task][framework] + device_request = [docker.types.DeviceRequest(count=-1, capabilities=[["gpu"]])] if IS_GPU else [] + if model is None: + pytest.skip("no supported TF model") + port = random.randint(5000, 6000) + make_sure_other_containers_are_stopped(client, container_name) + with tempfile.TemporaryDirectory() as tmpdirname: + # https://github.com/huggingface/infinity/blob/test-ovh/test/integ/utils.py + storage_dir = _load_repository_from_hf(model, tmpdirname, framework=framework) + container = client.containers.run( + container_image, + name=container_name, + ports={"5000": port}, + environment={"HF_MODEL_DIR": "/opt/huggingface/model", "HF_TASK": task}, + volumes={tmpdirname: {"bind": "/opt/huggingface/model", "mode": "ro"}}, + detach=True, + # GPU + device_requests=device_request, + ) + # time.sleep(5) + verify_task(container, task, port) + container.stop() + container.remove() + + +# @require_tf +# @pytest.mark.parametrize( +# "repository_id", +# ["philschmid/custom-pipeline-text-classification"], +# ) +# def test_tf_cpu_container_custom_pipeline(repository_id) -> None: +# container_name = "integration-test-custom" +# container_image = "starlette-transformers:cpu" +# make_sure_other_containers_are_stopped(client, container_name) +# with tempfile.TemporaryDirectory() as tmpdirname: +# # https://github.com/huggingface/infinity/blob/test-ovh/test/integ/utils.py +# storage_dir = _load_repository_from_hf("philschmid/custom-pipeline-text-classification", tmpdirname) +# container = client.containers.run( +# container_image, +# name=container_name, +# ports={"5000": "5000"}, +# environment={ +# "HF_MODEL_DIR": tmpdirname, +# }, +# volumes={tmpdirname: {"bind": tmpdirname, "mode": "ro"}}, +# detach=True, +# # GPU +# # device_requests=[docker.types.DeviceRequest(count=-1, capabilities=[["gpu"]])] +# ) +# BASE_URL = "http://localhost:5000" +# wait_for_container_to_be_ready(BASE_URL) +# payload = {"inputs": "this is a test"} +# prediction = requests.post(f"{BASE_URL}", json=payload).json() +# assert prediction == payload +# # time.sleep(5) +# container.stop() +# container.remove() diff --git a/tox.ini b/tox.ini new file mode 100644 index 00000000..e69de29b From 356c81375f06cf77d3da0c714320927050768917 Mon Sep 17 00:00:00 2001 From: Rafael Pierre Date: Thu, 15 Feb 2024 12:46:01 +0000 Subject: [PATCH 030/173] test_integ_new --- .github/workflows/gpu-integ-new.yaml | 10 +- tests/integ/test_container_new.py | 307 +-------------------------- 2 files changed, 14 insertions(+), 303 deletions(-) diff --git a/.github/workflows/gpu-integ-new.yaml b/.github/workflows/gpu-integ-new.yaml index 51ab33ee..5aa05d89 100644 --- a/.github/workflows/gpu-integ-new.yaml +++ b/.github/workflows/gpu-integ-new.yaml @@ -53,4 +53,12 @@ jobs: - name: Load Docker images from previous workflows run: | docker load --input /tmp/starlette-transformers-gpu.tar - - run: docker image ls -a \ No newline at end of file + - run: docker run -d starlette-transformers + - name: Set up Python 3.11 + uses: actions/setup-python@v2 + with: + python-version: 3.11 + - name: Install Python dependencies + run: pip install -e .[test,dev,torch] + - name: Run Integration Tests + run: RUN_SLOW=True pytest tests/integ/test_container_new.py \ No newline at end of file diff --git a/tests/integ/test_container_new.py b/tests/integ/test_container_new.py index 9197d606..5de924c5 100644 --- a/tests/integ/test_container_new.py +++ b/tests/integ/test_container_new.py @@ -13,8 +13,7 @@ IS_GPU = _run_slow_tests DEVICE = "gpu" if IS_GPU else "cpu" -client = docker.DockerClient(base_url='unix://var/run/docker.sock') - +client = docker.from_env() def make_sure_other_containers_are_stopped(client: DockerClient, container_name: str): try: @@ -40,11 +39,10 @@ def wait_for_container_to_be_ready(base_url): return True -def verify_task(container: DockerClient, task: str, port: int = 5000, framework: str = "pytorch"): +def verify_task(task: str, port: int = 5000, framework: str = "pytorch"): BASE_URL = f"http://localhost:{port}" input = task2input[task] - # health check - wait_for_container_to_be_ready(BASE_URL) + if ( task == "image-classification" or task == "object-detection" @@ -97,303 +95,8 @@ def verify_task(container: DockerClient, task: str, port: int = 5000, framework: ], ) def test_pt_container_remote_model(task) -> None: - container_name = f"integration-test-{task}" - container_image = f"starlette-transformers:{DEVICE}" - framework = "pytorch" - model = task2model[task][framework] - port = random.randint(5000, 6000) - device_request = [docker.types.DeviceRequest(count=-1, capabilities=[["gpu"]])] if IS_GPU else [] - - make_sure_other_containers_are_stopped(client, container_name) - container = client.containers.run( - container_image, - name=container_name, - ports={"5000": port}, - environment={"HF_MODEL_ID": model, "HF_TASK": task}, - detach=True, - # GPU - device_requests=device_request, - ) - # time.sleep(5) - verify_task(container, task, port) - container.stop() - container.remove() - - -@require_torch -@pytest.mark.parametrize( - "task", - [ - "text-classification", - "zero-shot-classification", - "ner", - "question-answering", - "fill-mask", - "summarization", - "translation_xx_to_yy", - "text2text-generation", - "text-generation", - "feature-extraction", - "image-classification", - "automatic-speech-recognition", - "audio-classification", - "object-detection", - "image-segmentation", - "table-question-answering", - "conversational", - # TODO currently not supported due to multimodality input - # "visual-question-answering", - # "zero-shot-image-classification", - "sentence-similarity", - "sentence-embeddings", - "sentence-ranking", - # diffusers - "text-to-image", - ], -) -def test_pt_container_local_model(task) -> None: - container_name = f"integration-test-{task}" - container_image = f"starlette-transformers:{DEVICE}" framework = "pytorch" - model = task2model[task][framework] - port = random.randint(5000, 6000) - device_request = [docker.types.DeviceRequest(count=-1, capabilities=[["gpu"]])] if IS_GPU else [] - make_sure_other_containers_are_stopped(client, container_name) - with tempfile.TemporaryDirectory() as tmpdirname: - # https://github.com/huggingface/infinity/blob/test-ovh/test/integ/utils.py - storage_dir = _load_repository_from_hf(model, tmpdirname, framework="pytorch") - container = client.containers.run( - container_image, - name=container_name, - ports={"5000": port}, - environment={"HF_MODEL_DIR": "/opt/huggingface/model", "HF_TASK": task}, - volumes={tmpdirname: {"bind": "/opt/huggingface/model", "mode": "ro"}}, - detach=True, - # GPU - device_requests=device_request, - ) - # time.sleep(5) - verify_task(container, task, port) - container.stop() - container.remove() - - -@require_torch -@pytest.mark.parametrize( - "repository_id", - ["philschmid/custom-handler-test", "philschmid/custom-handler-distilbert"], -) -def test_pt_container_custom_handler(repository_id) -> None: - container_name = "integration-test-custom" - container_image = f"starlette-transformers:{DEVICE}" - device_request = [docker.types.DeviceRequest(count=-1, capabilities=[["gpu"]])] if IS_GPU else [] - port = random.randint(5000, 6000) - - make_sure_other_containers_are_stopped(client, container_name) - with tempfile.TemporaryDirectory() as tmpdirname: - # https://github.com/huggingface/infinity/blob/test-ovh/test/integ/utils.py - storage_dir = _load_repository_from_hf(repository_id, tmpdirname) - container = client.containers.run( - container_image, - name=container_name, - ports={"5000": port}, - environment={ - "HF_MODEL_DIR": tmpdirname, - }, - volumes={tmpdirname: {"bind": tmpdirname, "mode": "ro"}}, - detach=True, - # GPU - device_requests=device_request, - ) - BASE_URL = f"http://localhost:{port}" - wait_for_container_to_be_ready(BASE_URL) - payload = {"inputs": "this is a test"} - prediction = requests.post(f"{BASE_URL}", json=payload).json() - assert prediction == payload - # time.sleep(5) - container.stop() - container.remove() - - -@require_torch -@pytest.mark.parametrize( - "repository_id", - ["philschmid/custom-pipeline-text-classification"], -) -def test_pt_container_legacy_custom_pipeline(repository_id) -> None: - container_name = "integration-test-custom" - container_image = f"starlette-transformers:{DEVICE}" - device_request = [docker.types.DeviceRequest(count=-1, capabilities=[["gpu"]])] if IS_GPU else [] - port = random.randint(5000, 6000) - - make_sure_other_containers_are_stopped(client, container_name) - with tempfile.TemporaryDirectory() as tmpdirname: - # https://github.com/huggingface/infinity/blob/test-ovh/test/integ/utils.py - storage_dir = _load_repository_from_hf(repository_id, tmpdirname) - container = client.containers.run( - container_image, - name=container_name, - ports={"5000": port}, - environment={ - "HF_MODEL_DIR": tmpdirname, - }, - volumes={tmpdirname: {"bind": tmpdirname, "mode": "ro"}}, - detach=True, - # GPU - device_requests=device_request, - ) - BASE_URL = f"http://localhost:{port}" - wait_for_container_to_be_ready(BASE_URL) - payload = {"inputs": "this is a test"} - prediction = requests.post(f"{BASE_URL}", json=payload).json() - assert prediction == payload - # time.sleep(5) - container.stop() - container.remove() - - -@require_tf -@pytest.mark.parametrize( - "task", - [ - "text-classification", - "zero-shot-classification", - "ner", - "question-answering", - "fill-mask", - "summarization", - "translation_xx_to_yy", - "text2text-generation", - "text-generation", - "feature-extraction", - "image-classification", - "automatic-speech-recognition", - "audio-classification", - "object-detection", - "image-segmentation", - "table-question-answering", - "conversational", - # TODO currently not supported due to multimodality input - # "visual-question-answering", - # "zero-shot-image-classification", - "sentence-similarity", - "sentence-embeddings", - "sentence-ranking", - ], -) -def test_tf_container_remote_model(task) -> None: - container_name = f"integration-test-{task}" - container_image = f"starlette-transformers:{DEVICE}" - framework = "tensorflow" - model = task2model[task][framework] - device_request = [docker.types.DeviceRequest(count=-1, capabilities=[["gpu"]])] if IS_GPU else [] - if model is None: - pytest.skip("no supported TF model") - port = random.randint(5000, 6000) - make_sure_other_containers_are_stopped(client, container_name) - container = client.containers.run( - container_image, - name=container_name, - ports={"5000": port}, - environment={"HF_MODEL_ID": model, "HF_TASK": task}, - detach=True, - # GPU - device_requests=device_request, - ) - # time.sleep(5) - verify_task(container, task, port) - container.stop() - container.remove() - - -@require_tf -@pytest.mark.parametrize( - "task", - [ - "text-classification", - "zero-shot-classification", - "ner", - "question-answering", - "fill-mask", - "summarization", - "translation_xx_to_yy", - "text2text-generation", - "text-generation", - "feature-extraction", - "image-classification", - "automatic-speech-recognition", - "audio-classification", - "object-detection", - "image-segmentation", - "table-question-answering", - "conversational", - # TODO currently not supported due to multimodality input - # "visual-question-answering", - # "zero-shot-image-classification", - "sentence-similarity", - "sentence-embeddings", - "sentence-ranking", - ], -) -def test_tf_container_local_model(task) -> None: - container_name = f"integration-test-{task}" - container_image = f"starlette-transformers:{DEVICE}" - framework = "tensorflow" - model = task2model[task][framework] - device_request = [docker.types.DeviceRequest(count=-1, capabilities=[["gpu"]])] if IS_GPU else [] - if model is None: - pytest.skip("no supported TF model") - port = random.randint(5000, 6000) - make_sure_other_containers_are_stopped(client, container_name) - with tempfile.TemporaryDirectory() as tmpdirname: - # https://github.com/huggingface/infinity/blob/test-ovh/test/integ/utils.py - storage_dir = _load_repository_from_hf(model, tmpdirname, framework=framework) - container = client.containers.run( - container_image, - name=container_name, - ports={"5000": port}, - environment={"HF_MODEL_DIR": "/opt/huggingface/model", "HF_TASK": task}, - volumes={tmpdirname: {"bind": "/opt/huggingface/model", "mode": "ro"}}, - detach=True, - # GPU - device_requests=device_request, - ) - # time.sleep(5) - verify_task(container, task, port) - container.stop() - container.remove() - + port = 5000 #random.randint(5000, 6000) -# @require_tf -# @pytest.mark.parametrize( -# "repository_id", -# ["philschmid/custom-pipeline-text-classification"], -# ) -# def test_tf_cpu_container_custom_pipeline(repository_id) -> None: -# container_name = "integration-test-custom" -# container_image = "starlette-transformers:cpu" -# make_sure_other_containers_are_stopped(client, container_name) -# with tempfile.TemporaryDirectory() as tmpdirname: -# # https://github.com/huggingface/infinity/blob/test-ovh/test/integ/utils.py -# storage_dir = _load_repository_from_hf("philschmid/custom-pipeline-text-classification", tmpdirname) -# container = client.containers.run( -# container_image, -# name=container_name, -# ports={"5000": "5000"}, -# environment={ -# "HF_MODEL_DIR": tmpdirname, -# }, -# volumes={tmpdirname: {"bind": tmpdirname, "mode": "ro"}}, -# detach=True, -# # GPU -# # device_requests=[docker.types.DeviceRequest(count=-1, capabilities=[["gpu"]])] -# ) -# BASE_URL = "http://localhost:5000" -# wait_for_container_to_be_ready(BASE_URL) -# payload = {"inputs": "this is a test"} -# prediction = requests.post(f"{BASE_URL}", json=payload).json() -# assert prediction == payload -# # time.sleep(5) -# container.stop() -# container.remove() + verify_task(task, port, framework) \ No newline at end of file From c2f661814783d9e4a47068421b146ddd2324a755 Mon Sep 17 00:00:00 2001 From: Rafael Pierre Date: Thu, 15 Feb 2024 13:21:31 +0000 Subject: [PATCH 031/173] tenacity --- .github/workflows/gpu-integ-new.yaml | 64 ---------------- .github/workflows/gpu-integ-test.yaml | 4 +- setup.py | 1 + tests/integ/test_container.py | 3 +- tests/integ/test_container_new.py | 102 -------------------------- 5 files changed, 5 insertions(+), 169 deletions(-) delete mode 100644 .github/workflows/gpu-integ-new.yaml delete mode 100644 tests/integ/test_container_new.py diff --git a/.github/workflows/gpu-integ-new.yaml b/.github/workflows/gpu-integ-new.yaml deleted file mode 100644 index 5aa05d89..00000000 --- a/.github/workflows/gpu-integ-new.yaml +++ /dev/null @@ -1,64 +0,0 @@ -name: GPU - Run Integration Tests 2.0 - -on: - push: - branches: - - main - pull_request: - workflow_dispatch: - -concurrency: - group: ${{ github.workflow }}-${{ github.head_ref || github.run_id }} - cancel-in-progress: true - - -jobs: - pytorch-build-image-gpu: - runs-on: [single-gpu, nvidia-gpu, t4, ci] - env: - AWS_REGION: us-east-1 - steps: - - name: Checkout - uses: actions/checkout@v2 - - name: Set up Docker Buildx - uses: docker/setup-buildx-action@v3 - - name: Build and export - uses: docker/build-push-action@v5 - with: - context: . - file: dockerfiles/pytorch/gpu/Dockerfile - tags: starlette-transformers:gpu - outputs: type=docker,dest=/tmp/starlette-transformers-gpu.tar - cache-from: type=local,src=/tmp/.buildx-cache - cache-to: type=local,dest=/tmp/.buildx-cache-new,mode=max - - name: Upload starlette-gpu image as artifact - env: - ACTIONS_STEP_DEBUG: true - uses: actions/upload-artifact@v4 - with: - name: starlette-transformers-gpu - path: /tmp/starlette-transformers-gpu.tar - pytorch-integration-test-gpu: - needs: pytorch-build-image-gpu - runs-on: [single-gpu, nvidia-gpu, t4, ci] - env: - AWS_REGION: us-east-1 - ACTIONS_STEP_DEBUG: true - steps: - - name: Download artifacts (Docker images) from previous workflows - uses: actions/download-artifact@v4 - with: - name: starlette-transformers-gpu - path: /tmp - - name: Load Docker images from previous workflows - run: | - docker load --input /tmp/starlette-transformers-gpu.tar - - run: docker run -d starlette-transformers - - name: Set up Python 3.11 - uses: actions/setup-python@v2 - with: - python-version: 3.11 - - name: Install Python dependencies - run: pip install -e .[test,dev,torch] - - name: Run Integration Tests - run: RUN_SLOW=True pytest tests/integ/test_container_new.py \ No newline at end of file diff --git a/.github/workflows/gpu-integ-test.yaml b/.github/workflows/gpu-integ-test.yaml index 036cdefc..d13146ae 100644 --- a/.github/workflows/gpu-integ-test.yaml +++ b/.github/workflows/gpu-integ-test.yaml @@ -20,10 +20,10 @@ jobs: steps: - name: Checkout uses: actions/checkout@v2 - - name: Set up Python 3.9 + - name: Set up Python 3.11 uses: actions/setup-python@v2 with: - python-version: 3.9 + python-version: 3.11 - name: Install Python dependencies run: pip install -e .[test,dev,torch] - name: Build Docker diff --git a/setup.py b/setup.py index c4e6dc97..b22b0ef0 100644 --- a/setup.py +++ b/setup.py @@ -50,6 +50,7 @@ "mock==2.0.0", "docker", "requests", + "tenacity" ] extras["quality"] = [ "black", diff --git a/tests/integ/test_container.py b/tests/integ/test_container.py index 9197d606..c1baaf70 100644 --- a/tests/integ/test_container.py +++ b/tests/integ/test_container.py @@ -9,6 +9,7 @@ from huggingface_inference_toolkit.utils import _is_gpu_available, _load_repository_from_hf from integ.config import task2input, task2model, task2output, task2validation from transformers.testing_utils import require_torch, slow, require_tf, _run_slow_tests +from tenacity import retry IS_GPU = _run_slow_tests DEVICE = "gpu" if IS_GPU else "cpu" @@ -39,7 +40,7 @@ def wait_for_container_to_be_ready(base_url): time.sleep(2) return True - +@retry def verify_task(container: DockerClient, task: str, port: int = 5000, framework: str = "pytorch"): BASE_URL = f"http://localhost:{port}" input = task2input[task] diff --git a/tests/integ/test_container_new.py b/tests/integ/test_container_new.py deleted file mode 100644 index 5de924c5..00000000 --- a/tests/integ/test_container_new.py +++ /dev/null @@ -1,102 +0,0 @@ -import random -import tempfile -import time - -import docker -import pytest -import requests -from docker.client import DockerClient -from huggingface_inference_toolkit.utils import _is_gpu_available, _load_repository_from_hf -from integ.config import task2input, task2model, task2output, task2validation -from transformers.testing_utils import require_torch, slow, require_tf, _run_slow_tests - -IS_GPU = _run_slow_tests -DEVICE = "gpu" if IS_GPU else "cpu" - -client = docker.from_env() - -def make_sure_other_containers_are_stopped(client: DockerClient, container_name: str): - try: - previous = client.containers.get(container_name) - previous.stop() - previous.remove() - except Exception: - return None - - -def wait_for_container_to_be_ready(base_url): - t = 0 - while t < 10: - try: - response = requests.get(f"{base_url}/health") - if response.status_code == 200: - break - except Exception: - pass - finally: - t += 1 - time.sleep(2) - return True - - -def verify_task(task: str, port: int = 5000, framework: str = "pytorch"): - BASE_URL = f"http://localhost:{port}" - input = task2input[task] - - if ( - task == "image-classification" - or task == "object-detection" - or task == "image-segmentation" - or task == "zero-shot-image-classification" - ): - prediction = requests.post( - f"{BASE_URL}", data=task2input[task], headers={"content-type": "image/x-image"} - ).json() - elif task == "automatic-speech-recognition" or task == "audio-classification": - prediction = requests.post( - f"{BASE_URL}", data=task2input[task], headers={"content-type": "audio/x-audio"} - ).json() - elif task == "text-to-image": - prediction = requests.post(f"{BASE_URL}", json=input, headers={"accept": "image/png"}).content - else: - prediction = requests.post(f"{BASE_URL}", json=input).json() - assert task2validation[task](result=prediction, snapshot=task2output[task]) is True - - -@require_torch -@pytest.mark.parametrize( - "task", - [ - "text-classification", - "zero-shot-classification", - "ner", - "question-answering", - "fill-mask", - "summarization", - "translation_xx_to_yy", - "text2text-generation", - "text-generation", - "feature-extraction", - "image-classification", - "automatic-speech-recognition", - "audio-classification", - "object-detection", - "image-segmentation", - "table-question-answering", - "conversational", - # TODO currently not supported due to multimodality input - # "visual-question-answering", - # "zero-shot-image-classification", - "sentence-similarity", - "sentence-embeddings", - "sentence-ranking", - # diffusers - "text-to-image", - ], -) -def test_pt_container_remote_model(task) -> None: - - framework = "pytorch" - port = 5000 #random.randint(5000, 6000) - - verify_task(task, port, framework) \ No newline at end of file From 79ee67e83947b95d62744c1c7266f96b9ebca11e Mon Sep 17 00:00:00 2001 From: Rafael Pierre Date: Thu, 15 Feb 2024 13:55:54 +0000 Subject: [PATCH 032/173] retry if --- tests/integ/test_container.py | 11 +++++++++-- 1 file changed, 9 insertions(+), 2 deletions(-) diff --git a/tests/integ/test_container.py b/tests/integ/test_container.py index c1baaf70..14c55930 100644 --- a/tests/integ/test_container.py +++ b/tests/integ/test_container.py @@ -9,7 +9,10 @@ from huggingface_inference_toolkit.utils import _is_gpu_available, _load_repository_from_hf from integ.config import task2input, task2model, task2output, task2validation from transformers.testing_utils import require_torch, slow, require_tf, _run_slow_tests -from tenacity import retry +import tenacity +import logging + +logging.basicConfig(level = "DEBUG") IS_GPU = _run_slow_tests DEVICE = "gpu" if IS_GPU else "cpu" @@ -40,7 +43,11 @@ def wait_for_container_to_be_ready(base_url): time.sleep(2) return True -@retry +@tenacity.retry( + wait = tenacity.wait_random(min=1, max=2), + retry = tenacity.retry_if_exception(requests.exception.ConnectionError), + stop = tenacity.retry.stop_after_attempt(5) +) def verify_task(container: DockerClient, task: str, port: int = 5000, framework: str = "pytorch"): BASE_URL = f"http://localhost:{port}" input = task2input[task] From 12af852c0731b5868311d963e6082a1dafb0f603 Mon Sep 17 00:00:00 2001 From: Rafael Pierre Date: Thu, 15 Feb 2024 14:00:47 +0000 Subject: [PATCH 033/173] retry config --- tests/integ/test_container.py | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/tests/integ/test_container.py b/tests/integ/test_container.py index 14c55930..5b9cb793 100644 --- a/tests/integ/test_container.py +++ b/tests/integ/test_container.py @@ -19,7 +19,6 @@ client = docker.DockerClient(base_url='unix://var/run/docker.sock') - def make_sure_other_containers_are_stopped(client: DockerClient, container_name: str): try: previous = client.containers.get(container_name) @@ -45,8 +44,8 @@ def wait_for_container_to_be_ready(base_url): @tenacity.retry( wait = tenacity.wait_random(min=1, max=2), - retry = tenacity.retry_if_exception(requests.exception.ConnectionError), - stop = tenacity.retry.stop_after_attempt(5) + retry = tenacity.retry_if_exception(requests.exceptions.ConnectionError), + stop = tenacity.stop_after_attempt(5) ) def verify_task(container: DockerClient, task: str, port: int = 5000, framework: str = "pytorch"): BASE_URL = f"http://localhost:{port}" From 86977238a0427cd2516b3723e9310c246026fa74 Mon Sep 17 00:00:00 2001 From: Ubuntu Date: Fri, 16 Feb 2024 11:36:20 +0000 Subject: [PATCH 034/173] uv & venv --- dockerfiles/pytorch/gpu/Dockerfile | 30 ++++++++++++++++-------------- setup.py | 4 ++-- 2 files changed, 18 insertions(+), 16 deletions(-) diff --git a/dockerfiles/pytorch/gpu/Dockerfile b/dockerfiles/pytorch/gpu/Dockerfile index 52af380d..f4ddb60c 100644 --- a/dockerfiles/pytorch/gpu/Dockerfile +++ b/dockerfiles/pytorch/gpu/Dockerfile @@ -5,7 +5,7 @@ LABEL maintainer="Hugging Face" ENV DEBIAN_FRONTEND=noninteractive -WORKDIR /build +WORKDIR /app RUN apt-get update \ && apt-get -y upgrade --only-upgrade systemd openssl cryptsetup \ @@ -35,11 +35,12 @@ COPY dockerfiles/pytorch/gpu/requirements.txt requirements-docker.txt COPY requirements.txt requirements-toolkit.txt # install wheel and setuptools -RUN pip install -U pip && \ - pip download --dest ./wheels -r requirements-docker.txt && \ - pip download --dest ./wheels -r requirements-toolkit.txt && \ - pip install --no-index --find-links=./wheels -r requirements-docker.txt && \ - pip install --no-index --find-links=./wheels -r requirements-toolkit.txt +RUN curl -LsSf https://astral.sh/uv/install.sh | sh && \ + source $HOME/.cargo/env && \ + uv venv && \ + source .venv/bin/activate && \ + uv pip install --no-cache-dir -r requirements-docker.txt && \ + uv pip install --no-cache-dir -r requirements-toolkit.txt ### Runner @@ -47,20 +48,21 @@ FROM nvidia/cuda:12.3.1-base-ubuntu22.04 as runner SHELL ["/bin/bash", "-c"] WORKDIR /app -COPY --from=builder /build/wheels /app/wheels RUN apt-get update -y && apt-get upgrade -y && \ apt-get install -y \ python3 \ - python3-pip + python3-pip \ + python3.10-venv \ + curl # install dependencies -COPY dockerfiles/pytorch/gpu/requirements.txt requirements-docker.txt -COPY requirements.txt requirements-toolkit.txt +COPY --from=builder /app . -RUN pip install -U pip && \ - pip install --no-index --find-links=/app/wheels -r requirements-docker.txt && \ - pip install --no-index --find-links=/app/wheels -r requirements-toolkit.txt +RUN curl -LsSf https://astral.sh/uv/install.sh | sh && \ + source $HOME/.cargo/env && \ + source .venv/bin/activate && \ + ls -all # copy application COPY src/huggingface_inference_toolkit huggingface_inference_toolkit @@ -70,4 +72,4 @@ COPY src/huggingface_inference_toolkit/webservice_starlette.py webservice_starle COPY scripts/entrypoint.sh entrypoint.sh RUN chmod +x entrypoint.sh -ENTRYPOINT ["bash", "-c", "./entrypoint.sh"] \ No newline at end of file +ENTRYPOINT ["bash", "-c", "source .venv/bin/activate && ./entrypoint.sh"] \ No newline at end of file diff --git a/setup.py b/setup.py index b22b0ef0..c95037c8 100644 --- a/setup.py +++ b/setup.py @@ -6,7 +6,7 @@ # We don't declare our dependency on transformers here because we build with # different packages for different variants -VERSION = "0.1.0" +VERSION = "0.2.0" # Ubuntu packages @@ -16,7 +16,7 @@ install_requires = [ # transformers - "transformers[sklearn,sentencepiece]>=4.25.1", + "transformers[sklearn,sentencepiece]>=4.37.2", "huggingface_hub>=0.13.3", # api stuff "orjson", From 3587eda272066f5ed459f0c85931b49837df50be Mon Sep 17 00:00:00 2001 From: Ubuntu Date: Sat, 17 Feb 2024 10:44:54 +0000 Subject: [PATCH 035/173] fast unit tests passing --- .gitignore | 2 ++ requirements.txt | 2 +- setup.cfg | 1 - setup.py | 14 +++----- .../diffusers_utils.py | 1 + src/huggingface_inference_toolkit/utils.py | 25 +++++++------ tests/unit/test_diffusers.py | 23 +++++++++--- tox.ini | 35 +++++++++++++++++++ 8 files changed, 77 insertions(+), 26 deletions(-) diff --git a/.gitignore b/.gitignore index 4042db87..c2013bef 100644 --- a/.gitignore +++ b/.gitignore @@ -1,7 +1,9 @@ # Docker project generated files to ignore # if you want to ignore files created by your editor/tools, # please consider a global .gitignore https://help.github.com/articles/ignoring-files +.egg-info .vagrant* +__pycache__ bin docker/docker .*.swp diff --git a/requirements.txt b/requirements.txt index 8a178f8d..0437bb78 100644 --- a/requirements.txt +++ b/requirements.txt @@ -2,4 +2,4 @@ orjson starlette uvicorn pandas -huggingface_hub>=0.13.2 \ No newline at end of file +huggingface_hub>=0.20.3 \ No newline at end of file diff --git a/setup.cfg b/setup.cfg index 924033ba..21085086 100644 --- a/setup.cfg +++ b/setup.cfg @@ -12,7 +12,6 @@ known_third_party = torch robyn - line_length = 119 lines_after_imports = 2 multi_line_output = 3 diff --git a/setup.py b/setup.py index c95037c8..f35c623c 100644 --- a/setup.py +++ b/setup.py @@ -17,7 +17,7 @@ install_requires = [ # transformers "transformers[sklearn,sentencepiece]>=4.37.2", - "huggingface_hub>=0.13.3", + "huggingface_hub>=0.20.3", # api stuff "orjson", # "robyn", @@ -27,12 +27,13 @@ "librosa", "pyctcdecode>=0.3.0", "phonemizer", + "ffmpeg" ] extras = {} extras["st"] = ["sentence_transformers==2.3.1"] -extras["diffusers"] = ["diffusers==0.26.1", "accelerate==0.26.1"] +extras["diffusers"] = ["diffusers==0.26.3", "accelerate==0.27.2"] # Hugging Face specific dependencies @@ -53,10 +54,8 @@ "tenacity" ] extras["quality"] = [ - "black", "isort", - "flake8", - "ruff", + "ruff" ] setup( @@ -64,9 +63,6 @@ version=VERSION, author="HuggingFace", description=".", - # long_description=open("README.md", "r", encoding="utf-8").read(), - # long_description_content_type="text/markdown", - # keywords="NLP deep-learning transformer pytorch tensorflow BERT GPT GPT-2 AWS Amazon SageMaker Cloud", url="", package_dir={"": "src"}, packages=find_packages(where="src"), @@ -83,7 +79,7 @@ "License :: OSI Approved :: Apache Software License", "Operating System :: OS Independent", "Programming Language :: Python :: 3", - "Programming Language :: Python :: 3.9", + "Programming Language :: Python :: 3.11", "Topic :: Scientific/Engineering :: Artificial Intelligence", ], ) diff --git a/src/huggingface_inference_toolkit/diffusers_utils.py b/src/huggingface_inference_toolkit/diffusers_utils.py index 7068df9d..d8bf9542 100644 --- a/src/huggingface_inference_toolkit/diffusers_utils.py +++ b/src/huggingface_inference_toolkit/diffusers_utils.py @@ -32,6 +32,7 @@ def __init__(self, model_dir: str, device: str = None): # needs "cuda" for GPU self.pipeline.scheduler = DPMSolverMultistepScheduler.from_config(self.pipeline.scheduler.config) except Exception: pass + self.pipeline.to(device) def __call__( diff --git a/src/huggingface_inference_toolkit/utils.py b/src/huggingface_inference_toolkit/utils.py index ffe8d2c3..5d85b80b 100644 --- a/src/huggingface_inference_toolkit/utils.py +++ b/src/huggingface_inference_toolkit/utils.py @@ -20,7 +20,7 @@ ) logger = logging.getLogger(__name__) -logging.basicConfig(format="%(asctime)s | %(levelname)s | %(message)s", level=logging.INFO) +#logging.basicConfig(format="%(asctime)s | %(levelname)s | %(message)s", level=logging.INFO) if is_tf_available(): @@ -99,6 +99,7 @@ def _is_gpu_available(): if is_tf_available(): return True if len(tf.config.list_physical_devices("GPU")) > 0 else False elif is_torch_available(): + logger.info(f"CUDA: {torch.cuda.is_available()}") return torch.cuda.is_available() else: raise RuntimeError( @@ -212,7 +213,10 @@ def get_device(): """ The get device function will return the device for the DL Framework. """ - if _is_gpu_available(): + gpu = _is_gpu_available() + logger.info(f"GPU Available: {gpu}") + + if gpu: return 0 else: return -1 @@ -264,17 +268,18 @@ def get_pipeline(task: str, model_dir: Path, **kwargs) -> Pipeline: if task == "conversational": hf_pipeline = wrap_conversation_pipeline(hf_pipeline) elif task == "automatic-speech-recognition" and isinstance(hf_pipeline.model, WhisperForConditionalGeneration): + + language = kwargs.get("language") + if not language: + # If no lang parameter was passed, english is defult + language = "english" + # set chunk length to 30s for whisper to enable long audio files hf_pipeline._preprocess_params["chunk_length_s"] = 30 - hf_pipeline._preprocess_params["ignore_warning"] = True + #hf_pipeline._preprocess_params["ignore_warning"] = True # set decoder to english by default - # TODO: replace when transformers 4.26.0 is release with - # hf_pipeline.model.config.forced_decoder_ids = pipe.tokenizer.get_decoder_prompt_ids(language=lang, task="transcribe") - hf_pipeline.tokenizer.language = "english" - hf_pipeline.tokenizer.task = "transcribe" - hf_pipeline.model.config.forced_decoder_ids = [ - (rank + 1, token) for rank, token in enumerate(hf_pipeline.tokenizer.prefix_tokens[1:]) - ] + hf_pipeline.model.config.forced_decoder_ids = hf_pipeline.tokenizer.get_decoder_prompt_ids(language=language, task="transcribe") + return hf_pipeline diff --git a/tests/unit/test_diffusers.py b/tests/unit/test_diffusers.py index 32b10cf0..0f2890a8 100644 --- a/tests/unit/test_diffusers.py +++ b/tests/unit/test_diffusers.py @@ -7,12 +7,17 @@ from huggingface_inference_toolkit.diffusers_utils import get_diffusers_pipeline, IEAutoPipelineForText2Image from huggingface_inference_toolkit.utils import _load_repository_from_hf, get_pipeline +import logging + +logging.basicConfig(level="DEBUG") @require_torch def test_get_diffusers_pipeline(): with tempfile.TemporaryDirectory() as tmpdirname: storage_dir = _load_repository_from_hf( - "hf-internal-testing/tiny-stable-diffusion-torch", tmpdirname, framework="pytorch" + "hf-internal-testing/tiny-stable-diffusion-torch", + tmpdirname, + framework="pytorch" ) pipe = get_pipeline("text-to-image", storage_dir.as_posix()) assert isinstance(pipe, IEAutoPipelineForText2Image) @@ -23,17 +28,25 @@ def test_get_diffusers_pipeline(): def test_pipe_on_gpu(): with tempfile.TemporaryDirectory() as tmpdirname: storage_dir = _load_repository_from_hf( - "hf-internal-testing/tiny-stable-diffusion-torch", tmpdirname, framework="pytorch" + "hf-internal-testing/tiny-stable-diffusion-torch", + tmpdirname, + framework="pytorch" ) - pipe = get_pipeline("text-to-image", storage_dir.as_posix()) - assert pipe.device.type == "cuda" + pipe = get_pipeline( + "text-to-image", + storage_dir.as_posix() + ) + logging.error(f"Pipe: {pipe.pipeline}") + assert pipe.pipeline.device.type == "cuda" @require_torch def test_text_to_image_task(): with tempfile.TemporaryDirectory() as tmpdirname: storage_dir = _load_repository_from_hf( - "hf-internal-testing/tiny-stable-diffusion-torch", tmpdirname, framework="pytorch" + "hf-internal-testing/tiny-stable-diffusion-torch", + tmpdirname, + framework="pytorch" ) pipe = get_pipeline("text-to-image", storage_dir.as_posix()) res = pipe("Lets create an embedding") diff --git a/tox.ini b/tox.ini index e69de29b..e8bfb6c4 100644 --- a/tox.ini +++ b/tox.ini @@ -0,0 +1,35 @@ +[tox] +envlist = test_service +skipsdist = true + +[testenv] +deps = -r requirements.txt +allowlist_externals = rm +install_command = + pip install -U pip + pip install -e ./src + pip install {opts} {packages} +setenv = + PYTHONPATH=. + +[testenv:lint] +basepython = python +commands = ruff src + +[testenv:fix] +basepython = python +commands = ruff src --fix + +# TODO: Add separate sections for different test cases + +[testenv:diffusers] +deps = -e ".[diffusers]" + +commands = + pytest \ + {tty:--color=yes} \ + tests/{posargs} \ + --log-cli-level=DEBUG \ + --log-format='%(asctime)s %(levelname)s %(module)s:%(lineno)d %(message)s' \ + --cov=src \ + --cov-report xml \ No newline at end of file From e0f5ea201618f13b67a15ff9a5fd4aaab28da860 Mon Sep 17 00:00:00 2001 From: Rafael Pierre Date: Sat, 17 Feb 2024 12:29:16 +0000 Subject: [PATCH 036/173] pass short unit --- setup.py | 12 ++---- src/huggingface_inference_toolkit/handler.py | 1 + src/huggingface_inference_toolkit/utils.py | 37 ++++++++++++------- tests/unit/test_handler.py | 39 ++++++++++++++------ 4 files changed, 55 insertions(+), 34 deletions(-) diff --git a/setup.py b/setup.py index f35c623c..b46bc35a 100644 --- a/setup.py +++ b/setup.py @@ -1,5 +1,4 @@ from __future__ import absolute_import -import os from datetime import date from setuptools import find_packages, setup @@ -16,7 +15,7 @@ install_requires = [ # transformers - "transformers[sklearn,sentencepiece]>=4.37.2", + "transformers[sklearn,sentencepiece]==4.27.0", "huggingface_hub>=0.20.3", # api stuff "orjson", @@ -32,15 +31,10 @@ extras = {} -extras["st"] = ["sentence_transformers==2.3.1"] +extras["st"] = ["sentence_transformers==2.2.1"] extras["diffusers"] = ["diffusers==0.26.3", "accelerate==0.27.2"] - - -# Hugging Face specific dependencies -# framework specific dependencies -extras["torch"] = ["torch>=2.1.2", "torchaudio"] +extras["torch"] = ["torch>=1.8.0", "torchaudio"] extras["tensorflow"] = ["tensorflow==2.9.0"] -# test and quality extras["test"] = [ "pytest", "pytest-xdist", diff --git a/src/huggingface_inference_toolkit/handler.py b/src/huggingface_inference_toolkit/handler.py index 097a12c9..993e4967 100644 --- a/src/huggingface_inference_toolkit/handler.py +++ b/src/huggingface_inference_toolkit/handler.py @@ -25,6 +25,7 @@ def __call__(self, data): """ inputs = data.pop("inputs", data) parameters = data.pop("parameters", None) + # pass inputs with all kwargs in data if parameters is not None: prediction = self.pipeline(inputs, **parameters) diff --git a/src/huggingface_inference_toolkit/utils.py b/src/huggingface_inference_toolkit/utils.py index 5d85b80b..23b4b3bd 100644 --- a/src/huggingface_inference_toolkit/utils.py +++ b/src/huggingface_inference_toolkit/utils.py @@ -189,9 +189,10 @@ def check_and_register_custom_pipeline_from_directory(model_dir): spec.loader.exec_module(handler) # init custom handler with model_dir custom_pipeline = handler.EndpointHandler(model_dir) + elif legacy_module.is_file(): logger.warning( - "You are using a legacy custom pipeline with. Please update to the new format. See documentation for more information." + "You are using a legacy custom pipeline. Please update to the new format. See documentation for more information." ) spec = importlib.util.spec_from_file_location("pipeline.PreTrainedPipeline", legacy_module) if spec: @@ -248,9 +249,7 @@ def get_pipeline(task: str, model_dir: Path, **kwargs) -> Pipeline: else: kwargs["tokenizer"] = model_dir - # add check for optimum accelerated pipeline if is_optimum_available(): - # TODO: add check for optimum accelerated pipeline logger.info("Optimum is not implement yet using default pipeline.") hf_pipeline = pipeline(task=task, model=model_dir, device=device, **kwargs) elif is_sentence_transformers_available() and task in [ @@ -258,9 +257,19 @@ def get_pipeline(task: str, model_dir: Path, **kwargs) -> Pipeline: "sentence-embeddings", "sentence-ranking", ]: - hf_pipeline = get_sentence_transformers_pipeline(task=task, model_dir=model_dir, device=device, **kwargs) + hf_pipeline = get_sentence_transformers_pipeline( + task=task, + model_dir=model_dir, + device=device, + **kwargs + ) elif is_diffusers_available() and task == "text-to-image": - hf_pipeline = get_diffusers_pipeline(task=task, model_dir=model_dir, device=device, **kwargs) + hf_pipeline = get_diffusers_pipeline( + task=task, + model_dir=model_dir, + device=device, + **kwargs + ) else: hf_pipeline = pipeline(task=task, model=model_dir, device=device, **kwargs) @@ -268,17 +277,19 @@ def get_pipeline(task: str, model_dir: Path, **kwargs) -> Pipeline: if task == "conversational": hf_pipeline = wrap_conversation_pipeline(hf_pipeline) elif task == "automatic-speech-recognition" and isinstance(hf_pipeline.model, WhisperForConditionalGeneration): - - language = kwargs.get("language") - if not language: - # If no lang parameter was passed, english is defult - language = "english" - + # set chunk length to 30s for whisper to enable long audio files hf_pipeline._preprocess_params["chunk_length_s"] = 30 - #hf_pipeline._preprocess_params["ignore_warning"] = True + hf_pipeline._preprocess_params["ignore_warning"] = True # set decoder to english by default - hf_pipeline.model.config.forced_decoder_ids = hf_pipeline.tokenizer.get_decoder_prompt_ids(language=language, task="transcribe") + # TODO: replace when transformers 4.26.0 is release with + hf_pipeline.model.config.forced_decoder_ids = hf_pipeline.tokenizer.get_decoder_prompt_ids(language="english", task="transcribe") + """" + hf_pipeline.tokenizer.language = "english" + hf_pipeline.tokenizer.task = "transcribe" + hf_pipeline.model.config.forced_decoder_ids = [ + (rank + 1, token) for rank, token in enumerate(hf_pipeline.tokenizer.prefix_tokens[1:]) + ]""" return hf_pipeline diff --git a/tests/unit/test_handler.py b/tests/unit/test_handler.py index 9306cdc3..0fdfb15b 100644 --- a/tests/unit/test_handler.py +++ b/tests/unit/test_handler.py @@ -1,14 +1,16 @@ import tempfile - +import torch from transformers.testing_utils import require_torch, slow, require_tf - import pytest from huggingface_inference_toolkit.handler import ( HuggingFaceHandler, get_inference_handler_either_custom_or_default_handler, ) -from huggingface_inference_toolkit.utils import _is_gpu_available, _load_repository_from_hf +from huggingface_inference_toolkit.utils import ( + _is_gpu_available, + _load_repository_from_hf +) TASK = "text-classification" @@ -18,7 +20,6 @@ @require_torch def test_pt_get_device(): - import torch with tempfile.TemporaryDirectory() as tmpdirname: # https://github.com/huggingface/infinity/blob/test-ovh/test/integ/utils.py @@ -34,7 +35,11 @@ def test_pt_get_device(): def test_pt_predict_call(): with tempfile.TemporaryDirectory() as tmpdirname: # https://github.com/huggingface/infinity/blob/test-ovh/test/integ/utils.py - storage_dir = _load_repository_from_hf(MODEL, tmpdirname, framework="pytorch") + storage_dir = _load_repository_from_hf( + MODEL, + tmpdirname, + framework="pytorch" + ) h = HuggingFaceHandler(model_dir=str(storage_dir), task=TASK) prediction = h(INPUT) @@ -46,7 +51,9 @@ def test_pt_predict_call(): def test_pt_custom_pipeline(): with tempfile.TemporaryDirectory() as tmpdirname: storage_dir = _load_repository_from_hf( - "philschmid/custom-pipeline-text-classification", tmpdirname, framework="pytorch" + "philschmid/custom-pipeline-text-classification", + tmpdirname, + framework="pytorch" ) h = get_inference_handler_either_custom_or_default_handler(str(storage_dir), task="custom") assert h(INPUT) == INPUT @@ -56,7 +63,9 @@ def test_pt_custom_pipeline(): def test_pt_sentence_transformers_pipeline(): with tempfile.TemporaryDirectory() as tmpdirname: storage_dir = _load_repository_from_hf( - "sentence-transformers/all-MiniLM-L6-v2", tmpdirname, framework="pytorch" + "sentence-transformers/all-MiniLM-L6-v2", + tmpdirname, + framework="pytorch" ) h = get_inference_handler_either_custom_or_default_handler(str(storage_dir), task="sentence-embeddings") pred = h(INPUT) @@ -65,7 +74,6 @@ def test_pt_sentence_transformers_pipeline(): @require_tf def test_tf_get_device(): - import tensorflow as tf with tempfile.TemporaryDirectory() as tmpdirname: # https://github.com/huggingface/infinity/blob/test-ovh/test/integ/utils.py @@ -81,10 +89,17 @@ def test_tf_get_device(): def test_tf_predict_call(): with tempfile.TemporaryDirectory() as tmpdirname: # https://github.com/huggingface/infinity/blob/test-ovh/test/integ/utils.py - storage_dir = _load_repository_from_hf(MODEL, tmpdirname, framework="tensorflow") - h = HuggingFaceHandler(model_dir=str(storage_dir), task=TASK) + storage_dir = _load_repository_from_hf( + MODEL, + tmpdirname, + framework="tensorflow" + ) + handler = HuggingFaceHandler( + model_dir=str(storage_dir), + task=TASK + ) - prediction = h(INPUT) + prediction = handler(INPUT) assert "label" in prediction[0] assert "score" in prediction[0] @@ -109,4 +124,4 @@ def test_tf_sentence_transformers_pipeline(): with pytest.raises(Exception) as exc_info: h = get_inference_handler_either_custom_or_default_handler(str(storage_dir), task="sentence-embeddings") - assert "Unknown task sentence-embeddings" in str(exc_info.value) + assert "Use `from_tf=True` to load this model from those weights." in str(exc_info.value) From cd508717605abde73ac2385f62999ef8ae88e471 Mon Sep 17 00:00:00 2001 From: Rafael Pierre Date: Sat, 17 Feb 2024 13:17:27 +0000 Subject: [PATCH 037/173] tensorflow --- setup.py | 2 +- tox.ini | 24 +++++++++++++++--------- 2 files changed, 16 insertions(+), 10 deletions(-) diff --git a/setup.py b/setup.py index b46bc35a..9324f0cb 100644 --- a/setup.py +++ b/setup.py @@ -34,7 +34,7 @@ extras["st"] = ["sentence_transformers==2.2.1"] extras["diffusers"] = ["diffusers==0.26.3", "accelerate==0.27.2"] extras["torch"] = ["torch>=1.8.0", "torchaudio"] -extras["tensorflow"] = ["tensorflow==2.9.0"] +extras["tensorflow"] = ["tensorflow==2.9.3"] extras["test"] = [ "pytest", "pytest-xdist", diff --git a/tox.ini b/tox.ini index e8bfb6c4..a000006c 100644 --- a/tox.ini +++ b/tox.ini @@ -4,11 +4,9 @@ skipsdist = true [testenv] deps = -r requirements.txt -allowlist_externals = rm install_command = pip install -U pip - pip install -e ./src - pip install {opts} {packages} + pip install -e . setenv = PYTHONPATH=. @@ -22,14 +20,22 @@ commands = ruff src --fix # TODO: Add separate sections for different test cases -[testenv:diffusers] -deps = -e ".[diffusers]" +[testenv:unit-torch] +install_command = pip install -e ".[torch, st, diffusers]" +allowlist_externals = pytest +commands = + pytest \ + {tty:--color=yes} \ + tests/unit/{posargs} \ + --log-cli-level=DEBUG \ + --log-format='%(asctime)s %(levelname)s %(module)s:%(lineno)d %(message)s' +[testenv:unit-tensorflow] +install_command = pip install -e ".[tensorflow, st, diffusers]" +allowlist_externals = pytest commands = pytest \ {tty:--color=yes} \ - tests/{posargs} \ + tests/unit/{posargs} \ --log-cli-level=DEBUG \ - --log-format='%(asctime)s %(levelname)s %(module)s:%(lineno)d %(message)s' \ - --cov=src \ - --cov-report xml \ No newline at end of file + --log-format='%(asctime)s %(levelname)s %(module)s:%(lineno)d %(message)s' \ No newline at end of file From 600edb01121ffb7c3f3eaa84e50531d080f02334 Mon Sep 17 00:00:00 2001 From: Rafael Pierre Date: Sat, 17 Feb 2024 13:27:31 +0000 Subject: [PATCH 038/173] tox --- tox.ini | 18 +++++++++++++++--- 1 file changed, 15 insertions(+), 3 deletions(-) diff --git a/tox.ini b/tox.ini index a000006c..73ae4181 100644 --- a/tox.ini +++ b/tox.ini @@ -1,5 +1,5 @@ [tox] -envlist = test_service +envlist = py39 skipsdist = true [testenv] @@ -27,9 +27,21 @@ commands = pytest \ {tty:--color=yes} \ tests/unit/{posargs} \ - --log-cli-level=DEBUG \ + --log-cli-level=INFO \ --log-format='%(asctime)s %(levelname)s %(module)s:%(lineno)d %(message)s' +[testenv:unit-torch-slow] +install_command = pip install -e ".[torch, st, diffusers]" +allowlist_externals = pytest +commands = + pytest \ + {tty:--color=yes} \ + tests/unit/{posargs} \ + --log-cli-level=INFO \ + --log-format='%(asctime)s %(levelname)s %(module)s:%(lineno)d %(message)s' +setenv = + RUN_SLOW=True + [testenv:unit-tensorflow] install_command = pip install -e ".[tensorflow, st, diffusers]" allowlist_externals = pytest @@ -37,5 +49,5 @@ commands = pytest \ {tty:--color=yes} \ tests/unit/{posargs} \ - --log-cli-level=DEBUG \ + --log-cli-level=INFO \ --log-format='%(asctime)s %(levelname)s %(module)s:%(lineno)d %(message)s' \ No newline at end of file From 7e5708521e5216a2ecd93faa7f9ad1bd366d9a32 Mon Sep 17 00:00:00 2001 From: Rafael Pierre Date: Mon, 19 Feb 2024 10:10:41 +0000 Subject: [PATCH 039/173] cpu images --- .gitignore | 4 + dockerfiles/pytorch/cpu/Dockerfile | 1 + dockerfiles/pytorch/gpu/Dockerfile | 3 +- dockerfiles/tensorflow/cpu/Dockerfile | 1 + dockerfiles/tensorflow/gpu/Dockerfile | 63 ++++++---- dockerfiles/tensorflow/gpu/environment.yaml | 9 -- dockerfiles/tensorflow/gpu/requirements.txt | 8 ++ makefile | 18 ++- tests/__init__.py | 0 tests/integ/config.py | 2 +- tests/integ/fixtures/__init__.py | 0 tests/integ/fixtures/docker.py | 52 +++++++++ tests/integ/{test_container.py => helpers.py} | 110 +++++++++++------- tests/integ/test_text_classification.py | 38 ++++++ tox.ini | 26 ++++- 15 files changed, 250 insertions(+), 85 deletions(-) delete mode 100644 dockerfiles/tensorflow/gpu/environment.yaml create mode 100644 dockerfiles/tensorflow/gpu/requirements.txt create mode 100644 tests/__init__.py create mode 100644 tests/integ/fixtures/__init__.py create mode 100644 tests/integ/fixtures/docker.py rename tests/integ/{test_container.py => helpers.py} (84%) create mode 100644 tests/integ/test_text_classification.py diff --git a/.gitignore b/.gitignore index c2013bef..78b208e2 100644 --- a/.gitignore +++ b/.gitignore @@ -1,8 +1,12 @@ # Docker project generated files to ignore # if you want to ignore files created by your editor/tools, # please consider a global .gitignore https://help.github.com/articles/ignoring-files +.gitignore .egg-info .vagrant* +.hcl +.terraform.lock.hcl +.terraform __pycache__ bin docker/docker diff --git a/dockerfiles/pytorch/cpu/Dockerfile b/dockerfiles/pytorch/cpu/Dockerfile index 61e573b4..53faf0ef 100644 --- a/dockerfiles/pytorch/cpu/Dockerfile +++ b/dockerfiles/pytorch/cpu/Dockerfile @@ -14,6 +14,7 @@ RUN apt-get update \ tar \ gcc \ g++ \ + cmake \ # audio libsndfile1-dev \ ffmpeg \ diff --git a/dockerfiles/pytorch/gpu/Dockerfile b/dockerfiles/pytorch/gpu/Dockerfile index f4ddb60c..4742810e 100644 --- a/dockerfiles/pytorch/gpu/Dockerfile +++ b/dockerfiles/pytorch/gpu/Dockerfile @@ -54,7 +54,8 @@ RUN apt-get update -y && apt-get upgrade -y && \ python3 \ python3-pip \ python3.10-venv \ - curl + curl \ + ffmpeg # install dependencies COPY --from=builder /app . diff --git a/dockerfiles/tensorflow/cpu/Dockerfile b/dockerfiles/tensorflow/cpu/Dockerfile index c52abf13..82f3ea7d 100644 --- a/dockerfiles/tensorflow/cpu/Dockerfile +++ b/dockerfiles/tensorflow/cpu/Dockerfile @@ -14,6 +14,7 @@ RUN apt-get update \ tar \ gcc \ g++ \ + cmake \ # audio libsndfile1-dev \ ffmpeg \ diff --git a/dockerfiles/tensorflow/gpu/Dockerfile b/dockerfiles/tensorflow/gpu/Dockerfile index 6b87b265..e66f62d1 100644 --- a/dockerfiles/tensorflow/gpu/Dockerfile +++ b/dockerfiles/tensorflow/gpu/Dockerfile @@ -1,13 +1,16 @@ -FROM nvidia/cuda:11.2.2-base-ubuntu20.04 +FROM nvidia/cuda:12.3.1-devel-ubuntu22.04 as builder +SHELL ["/bin/bash", "-c"] LABEL maintainer="Hugging Face" ENV DEBIAN_FRONTEND=noninteractive -ENV CONDA_OVERRIDE_CUDA="11.2" + +WORKDIR /app RUN apt-get update \ && apt-get -y upgrade --only-upgrade systemd openssl cryptsetup \ && apt-get install -y \ + build-essential \ bzip2 \ curl \ git \ @@ -15,39 +18,52 @@ RUN apt-get update \ tar \ gcc \ g++ \ + cmake \ + libprotobuf-dev \ + protobuf-compiler \ + python3 \ + python3-pip \ + python3.10-venv \ # audio libsndfile1-dev \ ffmpeg \ && apt-get clean autoremove --yes \ && rm -rf /var/lib/{apt,dpkg,cache,log} -# install micromamba -ENV MAMBA_ROOT_PREFIX=/opt/conda -ENV PATH=/opt/conda/bin:$PATH -ENV LD_LIBRARY_PATH="/opt/conda/lib:${LD_LIBRARY_PATH}" +# install dependencies +COPY dockerfiles/tensorflow/gpu/requirements.txt requirements-docker.txt +COPY requirements.txt requirements-toolkit.txt -RUN curl -L https://micro.mamba.pm/api/micromamba/linux-64/latest | tar -xj "bin/micromamba" \ - && touch /root/.bashrc \ - && ./bin/micromamba shell init -s bash -p /opt/conda \ - && grep -v '[ -z "\$PS1" ] && return' /root/.bashrc > /opt/conda/bashrc +# install wheel and setuptools +RUN curl -LsSf https://astral.sh/uv/install.sh | sh && \ + source $HOME/.cargo/env && \ + uv venv && \ + source .venv/bin/activate && \ + uv pip install --no-cache-dir -r requirements-docker.txt && \ + uv pip install --no-cache-dir -r requirements-toolkit.txt -WORKDIR /app +### Runner -# install base python dependencies -COPY dockerfiles/tensorflow/gpu/environment.yaml /app/environment.yaml -RUN micromamba install -y -n base -f environment.yaml \ - && rm environment.yaml \ - && micromamba clean --all --yes +FROM nvidia/cuda:12.3.1-base-ubuntu22.04 as runner +SHELL ["/bin/bash", "-c"] -# install dependencies -COPY dockerfiles/pytorch/gpu/requirements.txt /tmp/requirements.txt -RUN pip install -r /tmp/requirements.txt && rm /tmp/requirements.txt +WORKDIR /app -# install huggingface inference toolkit -COPY requirements.txt /tmp/requirements.txt -RUN pip install -r /tmp/requirements.txt && rm /tmp/requirements.txt +RUN apt-get update -y && apt-get upgrade -y && \ + apt-get install -y \ + python3 \ + python3-pip \ + python3.10-venv \ + curl \ + ffmpeg +# install dependencies +COPY --from=builder /app . +RUN curl -LsSf https://astral.sh/uv/install.sh | sh && \ + source $HOME/.cargo/env && \ + source .venv/bin/activate && \ + ls -all # copy application COPY src/huggingface_inference_toolkit huggingface_inference_toolkit @@ -57,5 +73,4 @@ COPY src/huggingface_inference_toolkit/webservice_starlette.py webservice_starle COPY scripts/entrypoint.sh entrypoint.sh RUN chmod +x entrypoint.sh -# run app -ENTRYPOINT ["/bin/bash", "entrypoint.sh"] +ENTRYPOINT ["bash", "-c", "source .venv/bin/activate && ./entrypoint.sh"] \ No newline at end of file diff --git a/dockerfiles/tensorflow/gpu/environment.yaml b/dockerfiles/tensorflow/gpu/environment.yaml deleted file mode 100644 index 1d886795..00000000 --- a/dockerfiles/tensorflow/gpu/environment.yaml +++ /dev/null @@ -1,9 +0,0 @@ -name: base -channels: -- conda-forge -dependencies: -- python=3.9.13 -- nvidia::cudatoolkit=11.7 -- tensorflow=2.9.1=*cuda112*py39* -- pip: - - transformers[sklearn,sentencepiece,audio,vision]==4.27.2 \ No newline at end of file diff --git a/dockerfiles/tensorflow/gpu/requirements.txt b/dockerfiles/tensorflow/gpu/requirements.txt new file mode 100644 index 00000000..dfb9d127 --- /dev/null +++ b/dockerfiles/tensorflow/gpu/requirements.txt @@ -0,0 +1,8 @@ +cmake==3.28.3 +wheel==0.42.0 +setuptools==69.1.0 +tensorflow==2.9.3 +transformers[sklearn,sentencepiece,audio,vision]==4.37.2 +sentence_transformers==2.3.1 +diffusers==0.26.1 +accelerate==0.26.1 \ No newline at end of file diff --git a/makefile b/makefile index 3f3dbb6c..1f1d05b9 100644 --- a/makefile +++ b/makefile @@ -20,11 +20,17 @@ quality: style: ruff $(check_dirs) --fix -torch-gpu: - docker build -f dockerfiles/pytorch/gpu/Dockerfile -t starlette-transformers:gpu . +inference-pytorch-gpu: + docker build -f dockerfiles/pytorch/gpu/Dockerfile -t integration-test-pytorch:gpu . -torch-cpu: - docker build -f dockerfiles/pytorch/cpu/Dockerfile -t starlette-transformers:cpu . +inference-pytorch-cpu: + docker build -f dockerfiles/pytorch/cpu/Dockerfile -t integration-test-pytorch:cpu . -run-classification: - docker run -e HF_MODEL="hf-internal-testing/tiny-random-distilbert" -e HF_MODEL_DIR="/tmp2" -e HF_TASK="text-classification" --gpus all starlette-transformers:gpu \ No newline at end of file +inference-tensorflow-gpu: + docker build -f dockerfiles/tensorflow/gpu/Dockerfile -t integration-test-tensorflow:gpu . + +inference-tensorflow-cpu: + docker build -f dockerfiles/tensorflow/cpu/Dockerfile -t integration-test-tensorflow:cpu . + +stop-all: + docker stop $$(docker ps -a -q) \ No newline at end of file diff --git a/tests/__init__.py b/tests/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/tests/integ/config.py b/tests/integ/config.py index 467afde2..e174e1f5 100644 --- a/tests/integ/config.py +++ b/tests/integ/config.py @@ -1,6 +1,6 @@ import os -from integ.utils import ( +from tests.integ.utils import ( validate_automatic_speech_recognition, validate_classification, validate_feature_extraction, diff --git a/tests/integ/fixtures/__init__.py b/tests/integ/fixtures/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/tests/integ/fixtures/docker.py b/tests/integ/fixtures/docker.py new file mode 100644 index 00000000..39aadcc1 --- /dev/null +++ b/tests/integ/fixtures/docker.py @@ -0,0 +1,52 @@ +import docker +import pytest +import random +import time +import logging + + +@pytest.fixture(scope = "module") +def start_container( + device, + task, + model, + framework +): + client = docker.DockerClient(base_url='unix://var/run/docker.sock') + container_name = f"integration-test-{framework}-{task}-{device}" + container_image = f"integration-test-{framework}:{device}" + port = random.randint(5000, 6000) + + logging.debug(f"Image: {container_image}") + logging.debug(f"Port: {port}") + + previous = client.containers.get(container_name) + if previous: + previous.stop() + previous.remove() + + device_request = [ + docker.types.DeviceRequest( + count=-1, + capabilities=[["gpu"]]) + ] if device == "gpu" else [] + + container = client.containers.run( + image = container_image, + name=container_name, + ports={"5000": port}, + environment={"HF_MODEL_ID": model, "HF_TASK": task}, + detach=True, + # GPU + device_requests=device_request, + ) + + return container_name, port + +def stop_container(container_name): + + client = docker.DockerClient(base_url='unix://var/run/docker.sock') + previous = client.containers.get(container_name) + previous.stop() + previous.remove() + diff --git a/tests/integ/test_container.py b/tests/integ/helpers.py similarity index 84% rename from tests/integ/test_container.py rename to tests/integ/helpers.py index 5b9cb793..98bb4d35 100644 --- a/tests/integ/test_container.py +++ b/tests/integ/helpers.py @@ -1,18 +1,24 @@ import random import tempfile import time - import docker import pytest import requests -from docker.client import DockerClient -from huggingface_inference_toolkit.utils import _is_gpu_available, _load_repository_from_hf -from integ.config import task2input, task2model, task2output, task2validation +from huggingface_inference_toolkit.utils import ( + _is_gpu_available, + _load_repository_from_hf +) +from tests.integ.config import ( + task2input, + task2model, + task2output, + task2validation +) from transformers.testing_utils import require_torch, slow, require_tf, _run_slow_tests import tenacity +from docker import DockerClient import logging - -logging.basicConfig(level = "DEBUG") +import traceback IS_GPU = _run_slow_tests DEVICE = "gpu" if IS_GPU else "cpu" @@ -28,48 +34,66 @@ def make_sure_other_containers_are_stopped(client: DockerClient, container_name: return None +#@tenacity.retry( +# retry = tenacity.retry_if_exception(ValueError), +# stop = tenacity.stop_after_attempt(10), +# reraise = True +#) def wait_for_container_to_be_ready(base_url): - t = 0 - while t < 10: - try: - response = requests.get(f"{base_url}/health") - if response.status_code == 200: - break - except Exception: - pass - finally: - t += 1 - time.sleep(2) - return True + + while True: + response = requests.get(f"{base_url}/health") + if response.status_code == 200: + logging.info("Container ready!") + return True + else: + logging.info("Container not ready; trying again...") @tenacity.retry( - wait = tenacity.wait_random(min=1, max=2), + wait = tenacity.wait_random(min = 1, max = 10), retry = tenacity.retry_if_exception(requests.exceptions.ConnectionError), - stop = tenacity.stop_after_attempt(5) + stop = tenacity.stop_after_attempt(5), + reraise = True ) -def verify_task(container: DockerClient, task: str, port: int = 5000, framework: str = "pytorch"): +def verify_task( + #container: DockerClient, + task: str, + port: int = 5000, + framework: str = "pytorch" +): BASE_URL = f"http://localhost:{port}" + logging.info(f"Base URL: {BASE_URL}") + logging.info(f"Port: {port}") input = task2input[task] - # health check - wait_for_container_to_be_ready(BASE_URL) - if ( - task == "image-classification" - or task == "object-detection" - or task == "image-segmentation" - or task == "zero-shot-image-classification" - ): - prediction = requests.post( - f"{BASE_URL}", data=task2input[task], headers={"content-type": "image/x-image"} - ).json() - elif task == "automatic-speech-recognition" or task == "audio-classification": - prediction = requests.post( - f"{BASE_URL}", data=task2input[task], headers={"content-type": "audio/x-audio"} - ).json() - elif task == "text-to-image": - prediction = requests.post(f"{BASE_URL}", json=input, headers={"accept": "image/png"}).content - else: - prediction = requests.post(f"{BASE_URL}", json=input).json() - assert task2validation[task](result=prediction, snapshot=task2output[task]) is True + + try: + # health check + #wait_for_container_to_be_ready(BASE_URL) + if ( + task == "image-classification" + or task == "object-detection" + or task == "image-segmentation" + or task == "zero-shot-image-classification" + ): + prediction = requests.post( + f"{BASE_URL}", data=task2input[task], headers={"content-type": "image/x-image"} + ).json() + elif task == "automatic-speech-recognition" or task == "audio-classification": + prediction = requests.post( + f"{BASE_URL}", data=task2input[task], headers={"content-type": "audio/x-audio"} + ).json() + elif task == "text-to-image": + prediction = requests.post(f"{BASE_URL}", json=input, headers={"accept": "image/png"}).content + else: + prediction = requests.post(f"{BASE_URL}", json=input).json() + assert task2validation[task](result=prediction, snapshot=task2output[task]) is True + except Exception as exception: + logging.error(f"Base URL: {BASE_URL}") + logging.error(f"Task: {task}") + logging.error(f"Input: {input}") + logging.error(f"Error: {str(exception)}") + logging.error(f"Stack: {traceback.format_exc()}") + assert False @require_torch @@ -121,9 +145,9 @@ def test_pt_container_remote_model(task) -> None: # GPU device_requests=device_request, ) - # time.sleep(5) + time.sleep(5) - verify_task(container, task, port) + verify_task(task = task, port = port) container.stop() container.remove() diff --git a/tests/integ/test_text_classification.py b/tests/integ/test_text_classification.py new file mode 100644 index 00000000..69e7b710 --- /dev/null +++ b/tests/integ/test_text_classification.py @@ -0,0 +1,38 @@ +from tests.integ.fixtures.docker import start_container, stop_container +from tests.integ.helpers import verify_task +from tests.integ.config import ( + task2input, + task2model, + task2output, + task2validation +) +import pytest +import time +import tenacity + +class TestTextClassification: + + @pytest.mark.parametrize( + "device", + ["gpu"] + ) + @pytest.mark.parametrize( + "task", + ["text-classification"] + ) + @pytest.mark.parametrize( + "model", + [task2model["text-classification"]["pytorch"]] + ) + @pytest.mark.parametrize( + "framework", + ["pytorch"] + ) + def test_classification(start_container): + + time.sleep(5) + verify_task( + task = "text-classification", + port = start_container[1] + ) + diff --git a/tox.ini b/tox.ini index 73ae4181..de29d5a6 100644 --- a/tox.ini +++ b/tox.ini @@ -50,4 +50,28 @@ commands = {tty:--color=yes} \ tests/unit/{posargs} \ --log-cli-level=INFO \ - --log-format='%(asctime)s %(levelname)s %(module)s:%(lineno)d %(message)s' \ No newline at end of file + --log-format='%(asctime)s %(levelname)s %(module)s:%(lineno)d %(message)s' + +[testenv:unit-tensorflow-slow] +install_command = pip install -e ".[tensorflow, st, diffusers]" +allowlist_externals = pytest +commands = + pytest \ + {tty:--color=yes} \ + tests/unit/{posargs} \ + --log-cli-level=INFO \ + --log-format='%(asctime)s %(levelname)s %(module)s:%(lineno)d %(message)s' +setenv = + RUN_SLOW=True + +[testenv:integration-torch-gpu] +allowlist_externals = + pytest +commands = + pytest \ + {tty:--color=yes} \ + tests/integ/{posargs} \ + --log-cli-level=DEBUG \ + --log-format='%(asctime)s %(levelname)s %(module)s:%(lineno)d %(message)s' +setenv = + RUN_SLOW=True \ No newline at end of file From 16dc0f498536eb3b10e02ecb8adbe74b5a98ecd5 Mon Sep 17 00:00:00 2001 From: Rafael Pierre Date: Mon, 19 Feb 2024 15:02:19 +0000 Subject: [PATCH 040/173] conversational passing integration --- dockerfiles/pytorch/gpu/Dockerfile | 7 ++- dockerfiles/pytorch/gpu/requirements.txt | 4 +- makefile | 2 +- setup.py | 2 +- tests/integ/config.py | 6 ++- .../integ/{fixtures/docker.py => conftest.py} | 27 ++++------ tests/integ/fixtures/__init__.py | 0 tests/integ/helpers.py | 28 +++++----- tests/integ/test_pytorch.py | 52 +++++++++++++++++++ tests/integ/test_tensorflow.py | 52 +++++++++++++++++++ tests/integ/test_text_classification.py | 38 -------------- tests/integ/utils.py | 6 +-- tox.ini | 4 +- 13 files changed, 147 insertions(+), 81 deletions(-) rename tests/integ/{fixtures/docker.py => conftest.py} (66%) delete mode 100644 tests/integ/fixtures/__init__.py create mode 100644 tests/integ/test_pytorch.py create mode 100644 tests/integ/test_tensorflow.py delete mode 100644 tests/integ/test_text_classification.py diff --git a/dockerfiles/pytorch/gpu/Dockerfile b/dockerfiles/pytorch/gpu/Dockerfile index 4742810e..90c070cc 100644 --- a/dockerfiles/pytorch/gpu/Dockerfile +++ b/dockerfiles/pytorch/gpu/Dockerfile @@ -1,9 +1,10 @@ -FROM nvidia/cuda:12.3.1-devel-ubuntu22.04 as builder +FROM nvidia/cuda:12.1.0-devel-ubuntu22.04 as builder SHELL ["/bin/bash", "-c"] LABEL maintainer="Hugging Face" ENV DEBIAN_FRONTEND=noninteractive +ENV TORCH_USE_CUDA_DSA=1 WORKDIR /app @@ -44,11 +45,13 @@ RUN curl -LsSf https://astral.sh/uv/install.sh | sh && \ ### Runner -FROM nvidia/cuda:12.3.1-base-ubuntu22.04 as runner +FROM nvidia/cuda:12.1.0-base-ubuntu22.04 as runner SHELL ["/bin/bash", "-c"] WORKDIR /app +ENV TORCH_USE_CUDA_DSA=1 + RUN apt-get update -y && apt-get upgrade -y && \ apt-get install -y \ python3 \ diff --git a/dockerfiles/pytorch/gpu/requirements.txt b/dockerfiles/pytorch/gpu/requirements.txt index 04d440db..b6ca030e 100644 --- a/dockerfiles/pytorch/gpu/requirements.txt +++ b/dockerfiles/pytorch/gpu/requirements.txt @@ -1,8 +1,8 @@ cmake==3.28.3 wheel==0.42.0 setuptools==69.1.0 -torch==2.1.2 -torchvision==0.16.2 +torch==2.2.0 +torchvision transformers[sklearn,sentencepiece,audio,vision]==4.37.2 sentence_transformers==2.3.1 diffusers==0.26.1 diff --git a/makefile b/makefile index 1f1d05b9..13b5b4fb 100644 --- a/makefile +++ b/makefile @@ -33,4 +33,4 @@ inference-tensorflow-cpu: docker build -f dockerfiles/tensorflow/cpu/Dockerfile -t integration-test-tensorflow:cpu . stop-all: - docker stop $$(docker ps -a -q) \ No newline at end of file + docker stop $$(docker ps -a -q) && docker container prune --force \ No newline at end of file diff --git a/setup.py b/setup.py index 9324f0cb..2ec9f028 100644 --- a/setup.py +++ b/setup.py @@ -33,7 +33,7 @@ extras["st"] = ["sentence_transformers==2.2.1"] extras["diffusers"] = ["diffusers==0.26.3", "accelerate==0.27.2"] -extras["torch"] = ["torch>=1.8.0", "torchaudio"] +extras["torch"] = ["torch==2.2.0", "torchaudio"] extras["tensorflow"] = ["tensorflow==2.9.3"] extras["test"] = [ "pytest", diff --git a/tests/integ/config.py b/tests/integ/config.py index e174e1f5..b370c8e2 100644 --- a/tests/integ/config.py +++ b/tests/integ/config.py @@ -87,8 +87,10 @@ "tensorflow": "hf-internal-testing/tiny-random-clip-zero-shot-image-classification", }, "conversational": { - "pytorch": "hf-internal-testing/tiny-random-blenderbot", - "tensorflow": "hf-internal-testing/tiny-random-blenderbot", + "pytorch": "microsoft/DialoGPT-small", + "tensorflow": "microsoft/DialoGPT-small", + #"pytorch": "hf-internal-testing/tiny-random-blenderbot", + #"tensorflow": "hf-internal-testing/tiny-random-blenderbot", }, "sentence-similarity": { "pytorch": "sentence-transformers/all-MiniLM-L6-v2", diff --git a/tests/integ/fixtures/docker.py b/tests/integ/conftest.py similarity index 66% rename from tests/integ/fixtures/docker.py rename to tests/integ/conftest.py index 39aadcc1..dcaab938 100644 --- a/tests/integ/fixtures/docker.py +++ b/tests/integ/conftest.py @@ -1,51 +1,46 @@ import docker import pytest import random -import time import logging +from tests.integ.config import task2model -@pytest.fixture(scope = "module") +@pytest.fixture(scope = "function") def start_container( device, task, - model, framework ): client = docker.DockerClient(base_url='unix://var/run/docker.sock') container_name = f"integration-test-{framework}-{task}-{device}" container_image = f"integration-test-{framework}:{device}" port = random.randint(5000, 6000) + model = task2model[task][framework] logging.debug(f"Image: {container_image}") logging.debug(f"Port: {port}") - previous = client.containers.get(container_name) - if previous: - previous.stop() - previous.remove() - device_request = [ docker.types.DeviceRequest( count=-1, capabilities=[["gpu"]]) ] if device == "gpu" else [] - container = client.containers.run( + yield client.containers.run( image = container_image, name=container_name, ports={"5000": port}, - environment={"HF_MODEL_ID": model, "HF_TASK": task}, + environment={ + "HF_MODEL_ID": model, + "HF_TASK": task, + "CUDA_LAUNCH_BLOCKING": 1 + }, detach=True, # GPU device_requests=device_request, - ) - - return container_name, port + ), port -def stop_container(container_name): - - client = docker.DockerClient(base_url='unix://var/run/docker.sock') + #Teardown previous = client.containers.get(container_name) previous.stop() previous.remove() diff --git a/tests/integ/fixtures/__init__.py b/tests/integ/fixtures/__init__.py deleted file mode 100644 index e69de29b..00000000 diff --git a/tests/integ/helpers.py b/tests/integ/helpers.py index 98bb4d35..e453875b 100644 --- a/tests/integ/helpers.py +++ b/tests/integ/helpers.py @@ -19,6 +19,7 @@ from docker import DockerClient import logging import traceback +import urllib3 IS_GPU = _run_slow_tests DEVICE = "gpu" if IS_GPU else "cpu" @@ -42,19 +43,17 @@ def make_sure_other_containers_are_stopped(client: DockerClient, container_name: def wait_for_container_to_be_ready(base_url): while True: - response = requests.get(f"{base_url}/health") - if response.status_code == 200: - logging.info("Container ready!") - return True - else: - logging.info("Container not ready; trying again...") + time.sleep(1) + try: + response = requests.get(f"{base_url}/health") + if response.status_code == 200: + logging.info("Container ready!") + return True + else: + logging.info("Container not ready; trying again...") + except: + logging.error(f"Container not ready; trying again...") -@tenacity.retry( - wait = tenacity.wait_random(min = 1, max = 10), - retry = tenacity.retry_if_exception(requests.exceptions.ConnectionError), - stop = tenacity.stop_after_attempt(5), - reraise = True -) def verify_task( #container: DockerClient, task: str, @@ -68,7 +67,7 @@ def verify_task( try: # health check - #wait_for_container_to_be_ready(BASE_URL) + wait_for_container_to_be_ready(BASE_URL) if ( task == "image-classification" or task == "object-detection" @@ -86,6 +85,9 @@ def verify_task( prediction = requests.post(f"{BASE_URL}", json=input, headers={"accept": "image/png"}).content else: prediction = requests.post(f"{BASE_URL}", json=input).json() + + logging.info(f"Prediction: {prediction}") + logging.info(f"Snapshot: {task2output[task]}") assert task2validation[task](result=prediction, snapshot=task2output[task]) is True except Exception as exception: logging.error(f"Base URL: {BASE_URL}") diff --git a/tests/integ/test_pytorch.py b/tests/integ/test_pytorch.py new file mode 100644 index 00000000..beeb0405 --- /dev/null +++ b/tests/integ/test_pytorch.py @@ -0,0 +1,52 @@ +from tests.integ.helpers import verify_task +from tests.integ.config import ( + task2input, + task2model, + task2output, + task2validation +) +import pytest + +class TestPytorchInference: + + @pytest.mark.parametrize( + "device", + ["gpu", "cpu"] + ) + @pytest.mark.parametrize( + "task", + [ + #"text-classification", + #"zero-shot-classification", + #"ner", + #"question-answering", + #"fill-mask", + #"summarization", + #"translation_xx_to_yy", + #"text2text-generation", + #"text-generation", + #"feature-extraction", + #"image-classification", + #"automatic-speech-recognition", + #"audio-classification", + #"object-detection", + #"image-segmentation", + #"table-question-answering", + "conversational" + #"sentence-similarity", + #"sentence-embeddings", + #"sentence-ranking", + #"text-to-image" + ] + ) + @pytest.mark.parametrize( + "framework", + ["pytorch"] + ) + @pytest.mark.usefixtures('start_container') + def test_classification(self, start_container, task, framework, device): + + verify_task( + task = task, + port = start_container[1] + ) diff --git a/tests/integ/test_tensorflow.py b/tests/integ/test_tensorflow.py new file mode 100644 index 00000000..b7699117 --- /dev/null +++ b/tests/integ/test_tensorflow.py @@ -0,0 +1,52 @@ +from tests.integ.fixtures.docker import start_container, stop_container +from tests.integ.helpers import verify_task +from tests.integ.config import ( + task2input, + task2model, + task2output, + task2validation +) +import pytest + +class TestTensorflowInference: + + @pytest.mark.parametrize( + "device", + ["gpu", "cpu"] + ) + @pytest.mark.parametrize( + "task", + [ + "text-classification", + "zero-shot-classification", + "ner", + "question-answering", + "fill-mask", + "summarization", + "translation_xx_to_yy", + "text2text-generation", + "text-generation", + "feature-extraction", + "image-classification", + "automatic-speech-recognition", + "audio-classification", + "object-detection", + "image-segmentation", + "table-question-answering", + "conversational", + "sentence-similarity", + "sentence-embeddings", + "sentence-ranking" + ] + ) + @pytest.mark.parametrize( + "framework", + ["tensorflow"] + ) + def test_classification(self, start_container, task): + + verify_task( + task = task, + port = start_container[1] + ) + diff --git a/tests/integ/test_text_classification.py b/tests/integ/test_text_classification.py deleted file mode 100644 index 69e7b710..00000000 --- a/tests/integ/test_text_classification.py +++ /dev/null @@ -1,38 +0,0 @@ -from tests.integ.fixtures.docker import start_container, stop_container -from tests.integ.helpers import verify_task -from tests.integ.config import ( - task2input, - task2model, - task2output, - task2validation -) -import pytest -import time -import tenacity - -class TestTextClassification: - - @pytest.mark.parametrize( - "device", - ["gpu"] - ) - @pytest.mark.parametrize( - "task", - ["text-classification"] - ) - @pytest.mark.parametrize( - "model", - [task2model["text-classification"]["pytorch"]] - ) - @pytest.mark.parametrize( - "framework", - ["pytorch"] - ) - def test_classification(start_container): - - time.sleep(5) - verify_task( - task = "text-classification", - port = start_container[1] - ) - diff --git a/tests/integ/utils.py b/tests/integ/utils.py index 813ba751..7fd0ab5b 100644 --- a/tests/integ/utils.py +++ b/tests/integ/utils.py @@ -1,12 +1,8 @@ import logging -import re -import signal from contextlib import contextmanager from time import time -LOGGER = logging.getLogger("timeout") - def validate_classification(result=None, snapshot=None): for idx, _ in enumerate(result): @@ -16,6 +12,8 @@ def validate_classification(result=None, snapshot=None): def validate_zero_shot_classification(result=None, snapshot=None): + logging.info(f"Result: {result}") + logging.info(f"Snapshot: {snapshot}") assert result.keys() == snapshot.keys() # assert result["labels"] == snapshot["labels"] # assert result["sequence"] == snapshot["sequence"] diff --git a/tox.ini b/tox.ini index de29d5a6..be3fc7a1 100644 --- a/tox.ini +++ b/tox.ini @@ -64,14 +64,14 @@ commands = setenv = RUN_SLOW=True -[testenv:integration-torch-gpu] +[testenv:integration-torch] allowlist_externals = pytest commands = pytest \ {tty:--color=yes} \ tests/integ/{posargs} \ - --log-cli-level=DEBUG \ + --log-cli-level=INFO \ --log-format='%(asctime)s %(levelname)s %(module)s:%(lineno)d %(message)s' setenv = RUN_SLOW=True \ No newline at end of file From 286a877890c53ee24a0d24be1583fece64035d28 Mon Sep 17 00:00:00 2001 From: Rafael Pierre Date: Mon, 19 Feb 2024 16:39:45 +0000 Subject: [PATCH 041/173] tox multiprocess --- tests/integ/conftest.py | 10 ++++++-- tests/integ/helpers.py | 10 ++++---- tests/integ/test_pytorch.py | 48 +++++++++++++++++++++---------------- tox.ini | 2 +- 4 files changed, 42 insertions(+), 28 deletions(-) diff --git a/tests/integ/conftest.py b/tests/integ/conftest.py index dcaab938..285069d5 100644 --- a/tests/integ/conftest.py +++ b/tests/integ/conftest.py @@ -3,18 +3,24 @@ import random import logging from tests.integ.config import task2model +import tenacity +import time - +@tenacity.retry( + retry = tenacity.retry_if_exception(docker.errors.APIError), + stop = tenacity.stop_after_attempt(3) +) @pytest.fixture(scope = "function") def start_container( device, task, framework ): + time.sleep(random.randint(1, 5)) client = docker.DockerClient(base_url='unix://var/run/docker.sock') container_name = f"integration-test-{framework}-{task}-{device}" container_image = f"integration-test-{framework}:{device}" - port = random.randint(5000, 6000) + port = random.randint(5000, 7000) model = task2model[task][framework] logging.debug(f"Image: {container_image}") diff --git a/tests/integ/helpers.py b/tests/integ/helpers.py index e453875b..8036923f 100644 --- a/tests/integ/helpers.py +++ b/tests/integ/helpers.py @@ -40,9 +40,10 @@ def make_sure_other_containers_are_stopped(client: DockerClient, container_name: # stop = tenacity.stop_after_attempt(10), # reraise = True #) -def wait_for_container_to_be_ready(base_url): +def wait_for_container_to_be_ready(base_url, max_retries = 100): - while True: + retries = 0 + while retries < max_retries: time.sleep(1) try: response = requests.get(f"{base_url}/health") @@ -50,9 +51,10 @@ def wait_for_container_to_be_ready(base_url): logging.info("Container ready!") return True else: - logging.info("Container not ready; trying again...") + raise ConnectionError() except: - logging.error(f"Container not ready; trying again...") + logging.warning(f"Container not ready; trying again...") + retries += 1 def verify_task( #container: DockerClient, diff --git a/tests/integ/test_pytorch.py b/tests/integ/test_pytorch.py index beeb0405..bfda8e6a 100644 --- a/tests/integ/test_pytorch.py +++ b/tests/integ/test_pytorch.py @@ -6,9 +6,15 @@ task2validation ) import pytest +import tenacity +import docker class TestPytorchInference: + @tenacity.retry( + retry = tenacity.retry_if_exception(docker.errors.APIError), + stop = tenacity.stop_after_attempt(3) + ) @pytest.mark.parametrize( "device", ["gpu", "cpu"] @@ -16,27 +22,27 @@ class TestPytorchInference: @pytest.mark.parametrize( "task", [ - #"text-classification", - #"zero-shot-classification", - #"ner", - #"question-answering", - #"fill-mask", - #"summarization", - #"translation_xx_to_yy", - #"text2text-generation", - #"text-generation", - #"feature-extraction", - #"image-classification", - #"automatic-speech-recognition", - #"audio-classification", - #"object-detection", - #"image-segmentation", - #"table-question-answering", - "conversational" - #"sentence-similarity", - #"sentence-embeddings", - #"sentence-ranking", - #"text-to-image" + "text-classification", + "zero-shot-classification", + "question-answering", + "fill-mask", + "summarization", + "ner", + "translation_xx_to_yy", + "text2text-generation", + "text-generation", + "feature-extraction", + "image-classification", + "automatic-speech-recognition", + "audio-classification", + "object-detection", + "image-segmentation", + "table-question-answering", + "conversational", + "sentence-similarity", + "sentence-embeddings", + "sentence-ranking", + "text-to-image" ] ) @pytest.mark.parametrize( diff --git a/tox.ini b/tox.ini index be3fc7a1..71c70e0a 100644 --- a/tox.ini +++ b/tox.ini @@ -68,7 +68,7 @@ setenv = allowlist_externals = pytest commands = - pytest \ + pytest -s -v -n 8 \ {tty:--color=yes} \ tests/integ/{posargs} \ --log-cli-level=INFO \ From 54d110b80896af33dd35b6374330ce1e01e60a14 Mon Sep 17 00:00:00 2001 From: Rafael Pierre Date: Mon, 19 Feb 2024 17:14:56 +0000 Subject: [PATCH 042/173] remove tf from integration test in tox.ini --- tests/integ/test_pytorch.py | 7 ++--- tests/integ/test_tensorflow.py | 56 +++++++++++++++++++++++++++++----- tox.ini | 4 +-- 3 files changed, 53 insertions(+), 14 deletions(-) diff --git a/tests/integ/test_pytorch.py b/tests/integ/test_pytorch.py index bfda8e6a..092fb07a 100644 --- a/tests/integ/test_pytorch.py +++ b/tests/integ/test_pytorch.py @@ -50,9 +50,6 @@ class TestPytorchInference: ["pytorch"] ) @pytest.mark.usefixtures('start_container') - def test_classification(self, start_container, task, framework, device): + def test_inference(self, start_container, task, framework, device): - verify_task( - task = task, - port = start_container[1] - ) + verify_task(task = task, port = start_container[1]) diff --git a/tests/integ/test_tensorflow.py b/tests/integ/test_tensorflow.py index b7699117..a831108e 100644 --- a/tests/integ/test_tensorflow.py +++ b/tests/integ/test_tensorflow.py @@ -1,4 +1,3 @@ -from tests.integ.fixtures.docker import start_container, stop_container from tests.integ.helpers import verify_task from tests.integ.config import ( task2input, @@ -7,12 +6,18 @@ task2validation ) import pytest +import tenacity +import docker class TestTensorflowInference: + @tenacity.retry( + retry = tenacity.retry_if_exception(docker.errors.APIError), + stop = tenacity.stop_after_attempt(3) + ) @pytest.mark.parametrize( "device", - ["gpu", "cpu"] + ["gpu"] ) @pytest.mark.parametrize( "task", @@ -43,10 +48,47 @@ class TestTensorflowInference: "framework", ["tensorflow"] ) - def test_classification(self, start_container, task): + @pytest.mark.usefixtures('start_container') + def test_inference_gpu(self, start_container, task, framework, device): + + verify_task(task = task, port = start_container[1]) - verify_task( - task = task, - port = start_container[1] - ) +""" + @pytest.mark.parametrize( + "device", + ["cpu"] + ) + @pytest.mark.parametrize( + "task", + [ + "text-classification", + "zero-shot-classification", + "ner", + "question-answering", + "fill-mask", + "summarization", + "translation_xx_to_yy", + "text2text-generation", + "text-generation", + "feature-extraction", + "image-classification", + "automatic-speech-recognition", + "audio-classification", + "object-detection", + "image-segmentation", + "table-question-answering", + "conversational", + "sentence-similarity", + "sentence-embeddings", + "sentence-ranking" + ] + ) + @pytest.mark.parametrize( + "framework", + ["tensorflow"] + ) + @pytest.mark.usefixtures('start_container') + def test_inference_cpu(self, start_container, task, framework, device): + verify_task(task = task, port = start_container[1]) +""" \ No newline at end of file diff --git a/tox.ini b/tox.ini index 71c70e0a..37cf3b90 100644 --- a/tox.ini +++ b/tox.ini @@ -64,13 +64,13 @@ commands = setenv = RUN_SLOW=True -[testenv:integration-torch] +[testenv:integration] allowlist_externals = pytest commands = pytest -s -v -n 8 \ {tty:--color=yes} \ - tests/integ/{posargs} \ + tests/integ/test_pytorch.py{posargs} \ --log-cli-level=INFO \ --log-format='%(asctime)s %(levelname)s %(module)s:%(lineno)d %(message)s' setenv = From 749093bd5143b642987d0063b3d53b1d8ebd07e5 Mon Sep 17 00:00:00 2001 From: Rafael Pierre Date: Tue, 20 Feb 2024 10:10:29 +0000 Subject: [PATCH 043/173] local container tests --- dockerfiles/tensorflow/cpu/Dockerfile | 2 +- dockerfiles/tensorflow/gpu/Dockerfile | 60 ++++++----------- dockerfiles/tensorflow/gpu/environment.yaml | 14 ++++ dockerfiles/tensorflow/gpu/requirements.txt | 8 --- tests/integ/conftest.py | 62 +++++++++++++++++- tests/integ/helpers.py | 7 +- tests/integ/test_pytorch.py | 71 ++++++++++++++++++++- 7 files changed, 170 insertions(+), 54 deletions(-) create mode 100644 dockerfiles/tensorflow/gpu/environment.yaml delete mode 100644 dockerfiles/tensorflow/gpu/requirements.txt diff --git a/dockerfiles/tensorflow/cpu/Dockerfile b/dockerfiles/tensorflow/cpu/Dockerfile index 82f3ea7d..d16010bb 100644 --- a/dockerfiles/tensorflow/cpu/Dockerfile +++ b/dockerfiles/tensorflow/cpu/Dockerfile @@ -50,4 +50,4 @@ COPY scripts/entrypoint.sh entrypoint.sh RUN chmod +x entrypoint.sh # run app -ENTRYPOINT ["/bin/bash", "entrypoint.sh"] +ENTRYPOINT ["/bin/bash", "entrypoint.sh"] \ No newline at end of file diff --git a/dockerfiles/tensorflow/gpu/Dockerfile b/dockerfiles/tensorflow/gpu/Dockerfile index e66f62d1..462f7a83 100644 --- a/dockerfiles/tensorflow/gpu/Dockerfile +++ b/dockerfiles/tensorflow/gpu/Dockerfile @@ -1,16 +1,13 @@ -FROM nvidia/cuda:12.3.1-devel-ubuntu22.04 as builder -SHELL ["/bin/bash", "-c"] +FROM nvidia/cuda:11.2.2-base-ubuntu20.04 LABEL maintainer="Hugging Face" ENV DEBIAN_FRONTEND=noninteractive - -WORKDIR /app +ENV CONDA_OVERRIDE_CUDA="11.2" RUN apt-get update \ && apt-get -y upgrade --only-upgrade systemd openssl cryptsetup \ && apt-get install -y \ - build-essential \ bzip2 \ curl \ git \ @@ -19,51 +16,33 @@ RUN apt-get update \ gcc \ g++ \ cmake \ - libprotobuf-dev \ - protobuf-compiler \ - python3 \ - python3-pip \ - python3.10-venv \ # audio libsndfile1-dev \ ffmpeg \ && apt-get clean autoremove --yes \ && rm -rf /var/lib/{apt,dpkg,cache,log} -# install dependencies -COPY dockerfiles/tensorflow/gpu/requirements.txt requirements-docker.txt -COPY requirements.txt requirements-toolkit.txt - -# install wheel and setuptools -RUN curl -LsSf https://astral.sh/uv/install.sh | sh && \ - source $HOME/.cargo/env && \ - uv venv && \ - source .venv/bin/activate && \ - uv pip install --no-cache-dir -r requirements-docker.txt && \ - uv pip install --no-cache-dir -r requirements-toolkit.txt +# install micromamba +ENV MAMBA_ROOT_PREFIX=/opt/conda +ENV PATH=/opt/conda/bin:$PATH +ENV LD_LIBRARY_PATH="/opt/conda/lib:${LD_LIBRARY_PATH}" -### Runner - -FROM nvidia/cuda:12.3.1-base-ubuntu22.04 as runner -SHELL ["/bin/bash", "-c"] +RUN curl -L https://micro.mamba.pm/api/micromamba/linux-64/latest | tar -xj "bin/micromamba" \ + && touch /root/.bashrc \ + && ./bin/micromamba shell init -s bash -p /opt/conda \ + && grep -v '[ -z "\$PS1" ] && return' /root/.bashrc > /opt/conda/bashrc WORKDIR /app -RUN apt-get update -y && apt-get upgrade -y && \ - apt-get install -y \ - python3 \ - python3-pip \ - python3.10-venv \ - curl \ - ffmpeg - -# install dependencies -COPY --from=builder /app . +# install base python dependencies +COPY dockerfiles/tensorflow/gpu/environment.yaml /app/environment.yaml +RUN micromamba install -y -n base -f environment.yaml \ + && rm environment.yaml \ + && micromamba clean --all --yes -RUN curl -LsSf https://astral.sh/uv/install.sh | sh && \ - source $HOME/.cargo/env && \ - source .venv/bin/activate && \ - ls -all +# install huggingface inference toolkit +COPY requirements.txt /tmp/requirements.txt +RUN pip install --no-cache-dir -r /tmp/requirements.txt && rm /tmp/requirements.txt # copy application COPY src/huggingface_inference_toolkit huggingface_inference_toolkit @@ -73,4 +52,5 @@ COPY src/huggingface_inference_toolkit/webservice_starlette.py webservice_starle COPY scripts/entrypoint.sh entrypoint.sh RUN chmod +x entrypoint.sh -ENTRYPOINT ["bash", "-c", "source .venv/bin/activate && ./entrypoint.sh"] \ No newline at end of file +# run app +ENTRYPOINT ["/bin/bash", "entrypoint.sh"] \ No newline at end of file diff --git a/dockerfiles/tensorflow/gpu/environment.yaml b/dockerfiles/tensorflow/gpu/environment.yaml new file mode 100644 index 00000000..8c1012f7 --- /dev/null +++ b/dockerfiles/tensorflow/gpu/environment.yaml @@ -0,0 +1,14 @@ +name: base +channels: +- conda-forge +dependencies: +- python=3.9.13 +- nvidia::cudatoolkit=11.7 +- pytorch::pytorch=1.13.1=py3.9_cuda11.7* +- pip: + - transformers[sklearn,sentencepiece,audio,vision]==4.31.0 + - sentence_transformers==2.2.2 + - torchvision==0.14.1 + - diffusers==0.20.0 + - accelerate==0.21.0 + - safetensors \ No newline at end of file diff --git a/dockerfiles/tensorflow/gpu/requirements.txt b/dockerfiles/tensorflow/gpu/requirements.txt deleted file mode 100644 index dfb9d127..00000000 --- a/dockerfiles/tensorflow/gpu/requirements.txt +++ /dev/null @@ -1,8 +0,0 @@ -cmake==3.28.3 -wheel==0.42.0 -setuptools==69.1.0 -tensorflow==2.9.3 -transformers[sklearn,sentencepiece,audio,vision]==4.37.2 -sentence_transformers==2.3.1 -diffusers==0.26.1 -accelerate==0.26.1 \ No newline at end of file diff --git a/tests/integ/conftest.py b/tests/integ/conftest.py index 285069d5..8acfd0ab 100644 --- a/tests/integ/conftest.py +++ b/tests/integ/conftest.py @@ -5,13 +5,27 @@ from tests.integ.config import task2model import tenacity import time +import tempfile +from huggingface_inference_toolkit.utils import ( + _is_gpu_available, + _load_repository_from_hf +) +from transformers.testing_utils import ( + require_torch, + slow, + require_tf, + _run_slow_tests +) + +IS_GPU = _run_slow_tests +DEVICE = "gpu" if IS_GPU else "cpu" @tenacity.retry( retry = tenacity.retry_if_exception(docker.errors.APIError), stop = tenacity.stop_after_attempt(3) ) @pytest.fixture(scope = "function") -def start_container( +def remote_container( device, task, framework @@ -51,3 +65,49 @@ def start_container( previous.stop() previous.remove() + +@tenacity.retry( + retry = tenacity.retry_if_exception(docker.errors.APIError), + stop = tenacity.stop_after_attempt(3) +) +@pytest.fixture(scope = "function") +def local_container( + device, + task, + framework +): + time.sleep(random.randint(1, 5)) + client = docker.DockerClient(base_url='unix://var/run/docker.sock') + container_name = f"integration-test-{framework}-{task}-{device}" + container_image = f"integration-test-{framework}:{device}" + + + port = random.randint(5000, 7000) + model = task2model[task][framework] + + logging.debug(f"Image: {container_image}") + logging.debug(f"Port: {port}") + + device_request = [ + docker.types.DeviceRequest(count=-1, capabilities=[["gpu"]]) + ] if IS_GPU else [] + + with tempfile.TemporaryDirectory() as tmpdirname: + # https://github.com/huggingface/infinity/blob/test-ovh/test/integ/utils.py + storage_dir = _load_repository_from_hf(model, tmpdirname, framework="pytorch") + yield client.containers.run( + container_image, + name=container_name, + ports={"5000": port}, + environment={"HF_MODEL_DIR": "/opt/huggingface/model", "HF_TASK": task}, + volumes={tmpdirname: {"bind": "/opt/huggingface/model", "mode": "ro"}}, + detach=True, + # GPU + device_requests=device_request, + ), port + + #Teardown + previous = client.containers.get(container_name) + previous.stop() + previous.remove() + diff --git a/tests/integ/helpers.py b/tests/integ/helpers.py index 8036923f..4f6439ae 100644 --- a/tests/integ/helpers.py +++ b/tests/integ/helpers.py @@ -14,7 +14,12 @@ task2output, task2validation ) -from transformers.testing_utils import require_torch, slow, require_tf, _run_slow_tests +from transformers.testing_utils import ( + require_torch, + slow, + require_tf, + _run_slow_tests +) import tenacity from docker import DockerClient import logging diff --git a/tests/integ/test_pytorch.py b/tests/integ/test_pytorch.py index 092fb07a..c3c0fa7e 100644 --- a/tests/integ/test_pytorch.py +++ b/tests/integ/test_pytorch.py @@ -1,3 +1,4 @@ +import tempfile from tests.integ.helpers import verify_task from tests.integ.config import ( task2input, @@ -5,6 +6,11 @@ task2output, task2validation ) +from transformers.testing_utils import ( + require_torch, + slow, + _run_slow_tests +) import pytest import tenacity import docker @@ -49,7 +55,66 @@ class TestPytorchInference: "framework", ["pytorch"] ) - @pytest.mark.usefixtures('start_container') - def test_inference(self, start_container, task, framework, device): + @pytest.mark.usefixtures('remote_container') + def test_inference_remote(self, remote_container, task, framework, device): + + verify_task(task = task, port = remote_container[1]) + + @require_torch + @pytest.mark.parametrize( + "task", + [ + "text-classification", + "zero-shot-classification", + "ner", + "question-answering", + "fill-mask", + "summarization", + "translation_xx_to_yy", + "text2text-generation", + "text-generation", + "feature-extraction", + "image-classification", + "automatic-speech-recognition", + "audio-classification", + "object-detection", + "image-segmentation", + "table-question-answering", + "conversational", + "sentence-similarity", + "sentence-embeddings", + "sentence-ranking", + "text-to-image", + ], + ) + @pytest.mark.usefixtures('local_container') + def test_pt_container_local_model(self, local_container, task, framework, device) -> None: + + verify_task(task = task, port = local_container[1]) + + + @require_torch + @pytest.mark.parametrize( + "repository_id", + ["philschmid/custom-handler-test", "philschmid/custom-handler-distilbert"], + ) + @pytest.mark.usefixtures('local_container') + def test_pt_container_custom_handler(self, local_container, task, device, repository_id) -> None: + + verify_task(task = task, port = local_container[1]) + + + @require_torch + @pytest.mark.parametrize( + "repository_id", + ["philschmid/custom-pipeline-text-classification"], + ) + @pytest.mark.usefixtures('local_container') + def test_pt_container_legacy_custom_pipeline( + local_container, + repository_id, + device, + task + ) -> None: - verify_task(task = task, port = start_container[1]) + verify_task(task = task, port = local_container[1]) From 3daec64e874ed84b85de9f936e6598a9043ee8e0 Mon Sep 17 00:00:00 2001 From: Rafael Pierre Date: Tue, 20 Feb 2024 11:19:33 +0000 Subject: [PATCH 044/173] torch integ local passing --- tests/integ/config.py | 4 + tests/integ/conftest.py | 16 +++- tests/integ/helpers.py | 2 +- ...{test_pytorch.py => test_pytorch_local.py} | 95 ++++++++++--------- tests/integ/test_pytorch_remote.py | 61 ++++++++++++ tests/integ/utils.py | 5 + tox.ini | 16 +++- 7 files changed, 147 insertions(+), 52 deletions(-) rename tests/integ/{test_pytorch.py => test_pytorch_local.py} (62%) create mode 100644 tests/integ/test_pytorch_remote.py diff --git a/tests/integ/config.py b/tests/integ/config.py index b370c8e2..421fb7d6 100644 --- a/tests/integ/config.py +++ b/tests/integ/config.py @@ -14,6 +14,7 @@ validate_text_to_image, validate_translation, validate_zero_shot_classification, + validate_custom ) @@ -164,6 +165,7 @@ "sentence-embeddings": {"inputs": "Lets create an embedding"}, "sentence-ranking": {"inputs": ["Lets create an embedding", "Lets create an embedding"]}, "text-to-image": {"inputs": "a man on a horse jumps over a broken down airplane."}, + "custom": {"inputs": "this is a test"} } task2output = { @@ -213,6 +215,7 @@ "sentence-embeddings": {"embeddings": ""}, "sentence-ranking": {"scores": ""}, "text-to-image": bytes, + "custom": {"inputs": "this is a test"} } @@ -239,4 +242,5 @@ "sentence-embeddings": validate_zero_shot_classification, "sentence-ranking": validate_zero_shot_classification, "text-to-image": validate_text_to_image, + "custom": validate_custom } diff --git a/tests/integ/conftest.py b/tests/integ/conftest.py index 8acfd0ab..64a5342d 100644 --- a/tests/integ/conftest.py +++ b/tests/integ/conftest.py @@ -16,6 +16,7 @@ require_tf, _run_slow_tests ) +import uuid IS_GPU = _run_slow_tests DEVICE = "gpu" if IS_GPU else "cpu" @@ -74,16 +75,23 @@ def remote_container( def local_container( device, task, + repository_id, framework ): time.sleep(random.randint(1, 5)) + + id = uuid.uuid4() + if not (task == "custom"): + model = task2model[task][framework] + id = task + else: + model = repository_id + client = docker.DockerClient(base_url='unix://var/run/docker.sock') - container_name = f"integration-test-{framework}-{task}-{device}" + container_name = f"integration-test-{framework}-{id}-{device}" container_image = f"integration-test-{framework}:{device}" - port = random.randint(5000, 7000) - model = task2model[task][framework] logging.debug(f"Image: {container_image}") logging.debug(f"Port: {port}") @@ -94,7 +102,7 @@ def local_container( with tempfile.TemporaryDirectory() as tmpdirname: # https://github.com/huggingface/infinity/blob/test-ovh/test/integ/utils.py - storage_dir = _load_repository_from_hf(model, tmpdirname, framework="pytorch") + storage_dir = _load_repository_from_hf(model, tmpdirname, framework=framework) yield client.containers.run( container_image, name=container_name, diff --git a/tests/integ/helpers.py b/tests/integ/helpers.py index 4f6439ae..85091424 100644 --- a/tests/integ/helpers.py +++ b/tests/integ/helpers.py @@ -95,7 +95,7 @@ def verify_task( logging.info(f"Prediction: {prediction}") logging.info(f"Snapshot: {task2output[task]}") - assert task2validation[task](result=prediction, snapshot=task2output[task]) is True + assert task2validation[task](result=prediction, snapshot=task2output[task]) except Exception as exception: logging.error(f"Base URL: {BASE_URL}") logging.error(f"Task: {task}") diff --git a/tests/integ/test_pytorch.py b/tests/integ/test_pytorch_local.py similarity index 62% rename from tests/integ/test_pytorch.py rename to tests/integ/test_pytorch_local.py index c3c0fa7e..564cf23d 100644 --- a/tests/integ/test_pytorch.py +++ b/tests/integ/test_pytorch_local.py @@ -12,28 +12,20 @@ _run_slow_tests ) import pytest -import tenacity -import docker + class TestPytorchInference: - @tenacity.retry( - retry = tenacity.retry_if_exception(docker.errors.APIError), - stop = tenacity.stop_after_attempt(3) - ) - @pytest.mark.parametrize( - "device", - ["gpu", "cpu"] - ) + @require_torch @pytest.mark.parametrize( "task", [ "text-classification", "zero-shot-classification", + "ner", "question-answering", "fill-mask", "summarization", - "ner", "translation_xx_to_yy", "text2text-generation", "text-generation", @@ -48,47 +40,29 @@ class TestPytorchInference: "sentence-similarity", "sentence-embeddings", "sentence-ranking", - "text-to-image" - ] + "text-to-image", + ], + ) + @pytest.mark.parametrize( + "device", + ["gpu", "cpu"] ) @pytest.mark.parametrize( "framework", ["pytorch"] ) - @pytest.mark.usefixtures('remote_container') - def test_inference_remote(self, remote_container, task, framework, device): - - verify_task(task = task, port = remote_container[1]) - - @require_torch @pytest.mark.parametrize( - "task", - [ - "text-classification", - "zero-shot-classification", - "ner", - "question-answering", - "fill-mask", - "summarization", - "translation_xx_to_yy", - "text2text-generation", - "text-generation", - "feature-extraction", - "image-classification", - "automatic-speech-recognition", - "audio-classification", - "object-detection", - "image-segmentation", - "table-question-answering", - "conversational", - "sentence-similarity", - "sentence-embeddings", - "sentence-ranking", - "text-to-image", - ], + "repository_id", + [""] ) @pytest.mark.usefixtures('local_container') - def test_pt_container_local_model(self, local_container, task, framework, device) -> None: + def test_pt_container_local_model( + self, + local_container, + task, + framework, + device + ) -> None: verify_task(task = task, port = local_container[1]) @@ -98,8 +72,26 @@ def test_pt_container_local_model(self, local_container, task, framework, device "repository_id", ["philschmid/custom-handler-test", "philschmid/custom-handler-distilbert"], ) + @pytest.mark.parametrize( + "device", + ["gpu", "cpu"] + ) + @pytest.mark.parametrize( + "framework", + ["pytorch"] + ) + @pytest.mark.parametrize( + "task", + ["custom"] + ) @pytest.mark.usefixtures('local_container') - def test_pt_container_custom_handler(self, local_container, task, device, repository_id) -> None: + def test_pt_container_custom_handler( + self, + local_container, + task, + device, + repository_id + ) -> None: verify_task(task = task, port = local_container[1]) @@ -109,8 +101,21 @@ def test_pt_container_custom_handler(self, local_container, task, device, reposi "repository_id", ["philschmid/custom-pipeline-text-classification"], ) + @pytest.mark.parametrize( + "device", + ["gpu", "cpu"] + ) + @pytest.mark.parametrize( + "framework", + ["pytorch"] + ) + @pytest.mark.parametrize( + "task", + ["custom"] + ) @pytest.mark.usefixtures('local_container') def test_pt_container_legacy_custom_pipeline( + self, local_container, repository_id, device, diff --git a/tests/integ/test_pytorch_remote.py b/tests/integ/test_pytorch_remote.py new file mode 100644 index 00000000..33a26a4a --- /dev/null +++ b/tests/integ/test_pytorch_remote.py @@ -0,0 +1,61 @@ +import tempfile +from tests.integ.helpers import verify_task +from tests.integ.config import ( + task2input, + task2model, + task2output, + task2validation +) +from transformers.testing_utils import ( + require_torch, + slow, + _run_slow_tests +) +import pytest +import tenacity +import docker + +class TestPytorchRemote: + + @tenacity.retry( + retry = tenacity.retry_if_exception(docker.errors.APIError), + stop = tenacity.stop_after_attempt(3) + ) + @pytest.mark.parametrize( + "device", + ["gpu", "cpu"] + ) + @pytest.mark.parametrize( + "task", + [ + "text-classification", + "zero-shot-classification", + "question-answering", + "fill-mask", + "summarization", + "ner", + "translation_xx_to_yy", + "text2text-generation", + "text-generation", + "feature-extraction", + "image-classification", + "automatic-speech-recognition", + "audio-classification", + "object-detection", + "image-segmentation", + "table-question-answering", + "conversational", + "sentence-similarity", + "sentence-embeddings", + "sentence-ranking", + "text-to-image" + ] + ) + @pytest.mark.parametrize( + "framework", + ["pytorch"] + ) + @pytest.mark.usefixtures('remote_container') + def test_inference_remote(self, remote_container, task, framework, device): + + verify_task(task = task, port = remote_container[1]) diff --git a/tests/integ/utils.py b/tests/integ/utils.py index 7fd0ab5b..042aa233 100644 --- a/tests/integ/utils.py +++ b/tests/integ/utils.py @@ -82,3 +82,8 @@ def validate_object_detection(result=None, snapshot=None): def validate_text_to_image(result=None, snapshot=None): assert isinstance(result, snapshot) return True + +def validate_custom(result=None, snapshot=None): + logging.info(f"Validate custom task - result: {result}, snapshot: {snapshot}") + assert result == snapshot + return True diff --git a/tox.ini b/tox.ini index 37cf3b90..b9df08df 100644 --- a/tox.ini +++ b/tox.ini @@ -64,13 +64,25 @@ commands = setenv = RUN_SLOW=True -[testenv:integration] +[testenv:torch-integration-remote] allowlist_externals = pytest commands = pytest -s -v -n 8 \ {tty:--color=yes} \ - tests/integ/test_pytorch.py{posargs} \ + tests/integ/test_pytorch_remote.py{posargs} \ + --log-cli-level=INFO \ + --log-format='%(asctime)s %(levelname)s %(module)s:%(lineno)d %(message)s' +setenv = + RUN_SLOW=True + +[testenv:torch-integration-local] +allowlist_externals = + pytest +commands = + pytest -s -v -n 8 \ + {tty:--color=yes} \ + tests/integ/test_pytorch_local.py{posargs} \ --log-cli-level=INFO \ --log-format='%(asctime)s %(levelname)s %(module)s:%(lineno)d %(message)s' setenv = From de58ba546ee4f4fcf6151326953ef3c07fd01063 Mon Sep 17 00:00:00 2001 From: Rafael Pierre Date: Tue, 20 Feb 2024 15:39:12 +0000 Subject: [PATCH 045/173] tf local pass --- dockerfiles/tensorflow/gpu/environment.yaml | 9 +- makefile | 16 +++- src/huggingface_inference_toolkit/utils.py | 14 ++- tests/integ/conftest.py | 78 ++++++++++------- tests/integ/helpers.py | 16 ++-- tests/integ/test_pytorch_local.py | 2 +- tests/integ/test_tensorflow.py | 94 --------------------- tests/integ/test_tensorflow_local.py | 61 +++++++++++++ tests/integ/test_tensorflow_remote.py | 61 +++++++++++++ tox.ini | 40 +++++++-- 10 files changed, 240 insertions(+), 151 deletions(-) delete mode 100644 tests/integ/test_tensorflow.py create mode 100644 tests/integ/test_tensorflow_local.py create mode 100644 tests/integ/test_tensorflow_remote.py diff --git a/dockerfiles/tensorflow/gpu/environment.yaml b/dockerfiles/tensorflow/gpu/environment.yaml index 8c1012f7..1d886795 100644 --- a/dockerfiles/tensorflow/gpu/environment.yaml +++ b/dockerfiles/tensorflow/gpu/environment.yaml @@ -4,11 +4,6 @@ channels: dependencies: - python=3.9.13 - nvidia::cudatoolkit=11.7 -- pytorch::pytorch=1.13.1=py3.9_cuda11.7* +- tensorflow=2.9.1=*cuda112*py39* - pip: - - transformers[sklearn,sentencepiece,audio,vision]==4.31.0 - - sentence_transformers==2.2.2 - - torchvision==0.14.1 - - diffusers==0.20.0 - - accelerate==0.21.0 - - safetensors \ No newline at end of file + - transformers[sklearn,sentencepiece,audio,vision]==4.27.2 \ No newline at end of file diff --git a/makefile b/makefile index 13b5b4fb..09da51ce 100644 --- a/makefile +++ b/makefile @@ -27,10 +27,22 @@ inference-pytorch-cpu: docker build -f dockerfiles/pytorch/cpu/Dockerfile -t integration-test-pytorch:cpu . inference-tensorflow-gpu: - docker build -f dockerfiles/tensorflow/gpu/Dockerfile -t integration-test-tensorflow:gpu . + docker build --no-cache -f dockerfiles/tensorflow/gpu/Dockerfile -t integration-test-tensorflow:gpu . inference-tensorflow-cpu: docker build -f dockerfiles/tensorflow/cpu/Dockerfile -t integration-test-tensorflow:cpu . stop-all: - docker stop $$(docker ps -a -q) && docker container prune --force \ No newline at end of file + docker stop $$(docker ps -a -q) && docker container prune --force + +run-tensorflow-remote-gpu: + docker run -e HF_TASK=text-classification -e HF_MODEL_ID=distilbert/distilbert-base-uncased integration-test-tensorflow:gpu + +run-tensorflow-local-gpu: + rm -rf /tmp/distilbert && \ + huggingface-cli download hf-internal-testing/tiny-random-distilbert --local-dir /tmp/distilbert && \ + docker run --gpus all \ + -v /tmp/distilbert:/opt/huggingface/model \ + -e HF_MODEL_DIR=/opt/huggingface/model \ + -e HF_TASK=text-classification \ + integration-test-tensorflow:gpu \ No newline at end of file diff --git a/src/huggingface_inference_toolkit/utils.py b/src/huggingface_inference_toolkit/utils.py index 23b4b3bd..84c358a3 100644 --- a/src/huggingface_inference_toolkit/utils.py +++ b/src/huggingface_inference_toolkit/utils.py @@ -20,8 +20,7 @@ ) logger = logging.getLogger(__name__) -#logging.basicConfig(format="%(asctime)s | %(levelname)s | %(message)s", level=logging.INFO) - +logging.basicConfig(format="%(asctime)s | %(levelname)s | %(message)s", level=logging.INFO) if is_tf_available(): import tensorflow as tf @@ -271,7 +270,16 @@ def get_pipeline(task: str, model_dir: Path, **kwargs) -> Pipeline: **kwargs ) else: - hf_pipeline = pipeline(task=task, model=model_dir, device=device, **kwargs) + logging.info(f"Task: {task}") + logging.info(f"Model: {model_dir}") + logging.info(f"Device: {device}") + logging.info(f"Args: {kwargs}") + hf_pipeline = pipeline( + task=task, + model=model_dir, + device=device, + **kwargs + ) # wrapp specific pipeline to support better ux if task == "conversational": diff --git a/tests/integ/conftest.py b/tests/integ/conftest.py index 64a5342d..120109a7 100644 --- a/tests/integ/conftest.py +++ b/tests/integ/conftest.py @@ -11,9 +11,7 @@ _load_repository_from_hf ) from transformers.testing_utils import ( - require_torch, slow, - require_tf, _run_slow_tests ) import uuid @@ -87,35 +85,51 @@ def local_container( else: model = repository_id - client = docker.DockerClient(base_url='unix://var/run/docker.sock') - container_name = f"integration-test-{framework}-{id}-{device}" - container_image = f"integration-test-{framework}:{device}" + logging.info(f"Starting container with model: {model}") - port = random.randint(5000, 7000) - - logging.debug(f"Image: {container_image}") - logging.debug(f"Port: {port}") - - device_request = [ - docker.types.DeviceRequest(count=-1, capabilities=[["gpu"]]) - ] if IS_GPU else [] - - with tempfile.TemporaryDirectory() as tmpdirname: - # https://github.com/huggingface/infinity/blob/test-ovh/test/integ/utils.py - storage_dir = _load_repository_from_hf(model, tmpdirname, framework=framework) - yield client.containers.run( - container_image, - name=container_name, - ports={"5000": port}, - environment={"HF_MODEL_DIR": "/opt/huggingface/model", "HF_TASK": task}, - volumes={tmpdirname: {"bind": "/opt/huggingface/model", "mode": "ro"}}, - detach=True, - # GPU - device_requests=device_request, - ), port - - #Teardown - previous = client.containers.get(container_name) - previous.stop() - previous.remove() + if not model: + logging.info(f"No model supported for {framework}") + yield None + else: + try: + logging.info(f"Starting container with Model = {model}") + client = docker.DockerClient(base_url='unix://var/run/docker.sock') + container_name = f"integration-test-{framework}-{id}-{device}" + container_image = f"integration-test-{framework}:{device}" + + port = random.randint(5000, 7000) + + logging.debug(f"Image: {container_image}") + logging.debug(f"Port: {port}") + + device_request = [ + docker.types.DeviceRequest(count=-1, capabilities=[["gpu"]]) + ] if IS_GPU else [] + + with tempfile.TemporaryDirectory() as tmpdirname: + # https://github.com/huggingface/infinity/blob/test-ovh/test/integ/utils.py + storage_dir = _load_repository_from_hf( + repository_id = model, + target_dir = tmpdirname, + framework = framework + ) + logging.info(f"Temp dir name: {tmpdirname}") + yield client.containers.run( + container_image, + name=container_name, + ports={"5000": port}, + environment={"HF_MODEL_DIR": "/opt/huggingface/model", "HF_TASK": task}, + volumes={tmpdirname: {"bind": "/opt/huggingface/model", "mode": "ro"}}, + detach=True, + # GPU + device_requests=device_request, + ), port + + #Teardown + previous = client.containers.get(container_name) + previous.stop() + previous.remove() + except Exception as exception: + logging.error(f"Error starting container: {str(exception)}") + raise exception diff --git a/tests/integ/helpers.py b/tests/integ/helpers.py index 85091424..3083b5e6 100644 --- a/tests/integ/helpers.py +++ b/tests/integ/helpers.py @@ -45,20 +45,24 @@ def make_sure_other_containers_are_stopped(client: DockerClient, container_name: # stop = tenacity.stop_after_attempt(10), # reraise = True #) -def wait_for_container_to_be_ready(base_url, max_retries = 100): +def wait_for_container_to_be_ready( + base_url, + time_between_retries = 1, + max_retries = 30 +): retries = 0 while retries < max_retries: - time.sleep(1) + time.sleep(time_between_retries) try: response = requests.get(f"{base_url}/health") if response.status_code == 200: logging.info("Container ready!") return True else: - raise ConnectionError() - except: - logging.warning(f"Container not ready; trying again...") + raise ConnectionError(f"Error: {response.status_code}") + except Exception as exception: + logging.warning(f"Container at {base_url} not ready, trying again...") retries += 1 def verify_task( @@ -102,7 +106,7 @@ def verify_task( logging.error(f"Input: {input}") logging.error(f"Error: {str(exception)}") logging.error(f"Stack: {traceback.format_exc()}") - assert False + raise exception @require_torch diff --git a/tests/integ/test_pytorch_local.py b/tests/integ/test_pytorch_local.py index 564cf23d..c48bf29d 100644 --- a/tests/integ/test_pytorch_local.py +++ b/tests/integ/test_pytorch_local.py @@ -14,7 +14,7 @@ import pytest -class TestPytorchInference: +class TestPytorchLocal: @require_torch @pytest.mark.parametrize( diff --git a/tests/integ/test_tensorflow.py b/tests/integ/test_tensorflow.py deleted file mode 100644 index a831108e..00000000 --- a/tests/integ/test_tensorflow.py +++ /dev/null @@ -1,94 +0,0 @@ -from tests.integ.helpers import verify_task -from tests.integ.config import ( - task2input, - task2model, - task2output, - task2validation -) -import pytest -import tenacity -import docker - -class TestTensorflowInference: - - @tenacity.retry( - retry = tenacity.retry_if_exception(docker.errors.APIError), - stop = tenacity.stop_after_attempt(3) - ) - @pytest.mark.parametrize( - "device", - ["gpu"] - ) - @pytest.mark.parametrize( - "task", - [ - "text-classification", - "zero-shot-classification", - "ner", - "question-answering", - "fill-mask", - "summarization", - "translation_xx_to_yy", - "text2text-generation", - "text-generation", - "feature-extraction", - "image-classification", - "automatic-speech-recognition", - "audio-classification", - "object-detection", - "image-segmentation", - "table-question-answering", - "conversational", - "sentence-similarity", - "sentence-embeddings", - "sentence-ranking" - ] - ) - @pytest.mark.parametrize( - "framework", - ["tensorflow"] - ) - @pytest.mark.usefixtures('start_container') - def test_inference_gpu(self, start_container, task, framework, device): - - verify_task(task = task, port = start_container[1]) - -""" - @pytest.mark.parametrize( - "device", - ["cpu"] - ) - @pytest.mark.parametrize( - "task", - [ - "text-classification", - "zero-shot-classification", - "ner", - "question-answering", - "fill-mask", - "summarization", - "translation_xx_to_yy", - "text2text-generation", - "text-generation", - "feature-extraction", - "image-classification", - "automatic-speech-recognition", - "audio-classification", - "object-detection", - "image-segmentation", - "table-question-answering", - "conversational", - "sentence-similarity", - "sentence-embeddings", - "sentence-ranking" - ] - ) - @pytest.mark.parametrize( - "framework", - ["tensorflow"] - ) - @pytest.mark.usefixtures('start_container') - def test_inference_cpu(self, start_container, task, framework, device): - - verify_task(task = task, port = start_container[1]) -""" \ No newline at end of file diff --git a/tests/integ/test_tensorflow_local.py b/tests/integ/test_tensorflow_local.py new file mode 100644 index 00000000..45d37526 --- /dev/null +++ b/tests/integ/test_tensorflow_local.py @@ -0,0 +1,61 @@ +import tempfile +from tests.integ.helpers import verify_task +from tests.integ.config import ( + task2input, + task2model, + task2output, + task2validation +) +from transformers.testing_utils import ( + require_tf, + slow, + _run_slow_tests +) +import pytest + + +class TestTensorflowLocal: + + @pytest.mark.parametrize( + "task", + [ + "text-classification", + "zero-shot-classification", + "ner", + "question-answering", + "fill-mask", + "summarization", + "translation_xx_to_yy", + "text2text-generation", + "text-generation", + "feature-extraction", + "image-classification", + "conversational", + ], + ) + @pytest.mark.parametrize( + "device", + ["gpu", "cpu"] + ) + @pytest.mark.parametrize( + "framework", + ["tensorflow"] + ) + @pytest.mark.parametrize( + "repository_id", + [""] + ) + @pytest.mark.usefixtures('local_container') + def test_tf_container_local_model( + self, + local_container, + task, + framework, + device + ) -> None: + + verify_task( + task = task, + port = local_container[1], + framework = framework + ) diff --git a/tests/integ/test_tensorflow_remote.py b/tests/integ/test_tensorflow_remote.py new file mode 100644 index 00000000..347f8e20 --- /dev/null +++ b/tests/integ/test_tensorflow_remote.py @@ -0,0 +1,61 @@ +import tempfile +from tests.integ.helpers import verify_task +from tests.integ.config import ( + task2input, + task2model, + task2output, + task2validation +) +from transformers.testing_utils import ( + require_tf, + slow, + _run_slow_tests +) +import pytest +import tenacity +import docker + +class TestTensorflowRemote: + + @tenacity.retry( + retry = tenacity.retry_if_exception(docker.errors.APIError), + stop = tenacity.stop_after_attempt(3) + ) + @pytest.mark.parametrize( + "device", + ["gpu", "cpu"] + ) + @pytest.mark.parametrize( + "task", + [ + "text-classification", + "zero-shot-classification", + "question-answering", + "fill-mask", + "summarization", + "ner", + "translation_xx_to_yy", + "text2text-generation", + "text-generation", + "feature-extraction", + "image-classification", + "automatic-speech-recognition", + "audio-classification", + "object-detection", + "image-segmentation", + "table-question-answering", + "conversational", + "sentence-similarity", + "sentence-embeddings", + "sentence-ranking", + "text-to-image" + ] + ) + @pytest.mark.parametrize( + "framework", + ["tensorflow"] + ) + @pytest.mark.usefixtures('remote_container') + def test_inference_remote(self, remote_container, task, framework, device): + + verify_task(task = task, port = remote_container[1]) diff --git a/tox.ini b/tox.ini index b9df08df..211aec09 100644 --- a/tox.ini +++ b/tox.ini @@ -21,7 +21,7 @@ commands = ruff src --fix # TODO: Add separate sections for different test cases [testenv:unit-torch] -install_command = pip install -e ".[torch, st, diffusers]" +install_command = pip install -e ".[torch]" allowlist_externals = pytest commands = pytest \ @@ -31,7 +31,7 @@ commands = --log-format='%(asctime)s %(levelname)s %(module)s:%(lineno)d %(message)s' [testenv:unit-torch-slow] -install_command = pip install -e ".[torch, st, diffusers]" +install_command = pip install -e ".[torch]" allowlist_externals = pytest commands = pytest \ @@ -43,7 +43,7 @@ setenv = RUN_SLOW=True [testenv:unit-tensorflow] -install_command = pip install -e ".[tensorflow, st, diffusers]" +install_command = pip install -e ".[tensorflow]" allowlist_externals = pytest commands = pytest \ @@ -53,7 +53,7 @@ commands = --log-format='%(asctime)s %(levelname)s %(module)s:%(lineno)d %(message)s' [testenv:unit-tensorflow-slow] -install_command = pip install -e ".[tensorflow, st, diffusers]" +install_command = pip install -e ".[tensorflow]" allowlist_externals = pytest commands = pytest \ @@ -65,10 +65,11 @@ setenv = RUN_SLOW=True [testenv:torch-integration-remote] +install_command = pip install -e ".[torch]" allowlist_externals = pytest commands = - pytest -s -v -n 8 \ + pytest -s -v -n 12 \ {tty:--color=yes} \ tests/integ/test_pytorch_remote.py{posargs} \ --log-cli-level=INFO \ @@ -77,13 +78,40 @@ setenv = RUN_SLOW=True [testenv:torch-integration-local] +install_command = pip install -e ".[torch]" allowlist_externals = pytest commands = - pytest -s -v -n 8 \ + pytest -s -v -n 12 \ {tty:--color=yes} \ tests/integ/test_pytorch_local.py{posargs} \ --log-cli-level=INFO \ --log-format='%(asctime)s %(levelname)s %(module)s:%(lineno)d %(message)s' +setenv = + RUN_SLOW=True + +[testenv:tf-integration-remote] +install_command = pip install -e ".[tensorflow]" +allowlist_externals = + pytest +commands = + pytest -s -v -n 4 \ + {tty:--color=yes} \ + tests/integ/test_tensorflow_remote.py{posargs} \ + --log-cli-level=ERROR \ + --log-format='%(asctime)s %(levelname)s %(module)s:%(lineno)d %(message)s' +setenv = + RUN_SLOW=True + +[testenv:tf-integration-local] +install_command = pip install -e ".[tensorflow]" +allowlist_externals = + pytest +commands = + pytest -v \ + {tty:--color=yes} \ + tests/integ/test_tensorflow_local.py{posargs} \ + --log-cli-level=INFO \ + --log-format='%(asctime)s %(levelname)s %(module)s:%(lineno)d %(message)s' setenv = RUN_SLOW=True \ No newline at end of file From bbdd3a0066abecba2826a1cfc74256c8e5f9cb98 Mon Sep 17 00:00:00 2001 From: Rafael Pierre Date: Tue, 20 Feb 2024 15:53:05 +0000 Subject: [PATCH 046/173] tf remote pass --- tests/integ/test_tensorflow_remote.py | 19 +++++++------------ tox.ini | 6 +++--- 2 files changed, 10 insertions(+), 15 deletions(-) diff --git a/tests/integ/test_tensorflow_remote.py b/tests/integ/test_tensorflow_remote.py index 347f8e20..a0c32342 100644 --- a/tests/integ/test_tensorflow_remote.py +++ b/tests/integ/test_tensorflow_remote.py @@ -7,7 +7,7 @@ task2validation ) from transformers.testing_utils import ( - require_tf, + require_torch, slow, _run_slow_tests ) @@ -30,25 +30,16 @@ class TestTensorflowRemote: [ "text-classification", "zero-shot-classification", + "ner", "question-answering", "fill-mask", "summarization", - "ner", "translation_xx_to_yy", "text2text-generation", "text-generation", "feature-extraction", "image-classification", - "automatic-speech-recognition", - "audio-classification", - "object-detection", - "image-segmentation", - "table-question-answering", "conversational", - "sentence-similarity", - "sentence-embeddings", - "sentence-ranking", - "text-to-image" ] ) @pytest.mark.parametrize( @@ -58,4 +49,8 @@ class TestTensorflowRemote: @pytest.mark.usefixtures('remote_container') def test_inference_remote(self, remote_container, task, framework, device): - verify_task(task = task, port = remote_container[1]) + verify_task( + task = task, + port = remote_container[1], + framework = framework + ) diff --git a/tox.ini b/tox.ini index 211aec09..27b0503c 100644 --- a/tox.ini +++ b/tox.ini @@ -95,10 +95,10 @@ install_command = pip install -e ".[tensorflow]" allowlist_externals = pytest commands = - pytest -s -v -n 4 \ + pytest -n 2 \ {tty:--color=yes} \ tests/integ/test_tensorflow_remote.py{posargs} \ - --log-cli-level=ERROR \ + --log-cli-level=INFO \ --log-format='%(asctime)s %(levelname)s %(module)s:%(lineno)d %(message)s' setenv = RUN_SLOW=True @@ -108,7 +108,7 @@ install_command = pip install -e ".[tensorflow]" allowlist_externals = pytest commands = - pytest -v \ + pytest -n 2 \ {tty:--color=yes} \ tests/integ/test_tensorflow_local.py{posargs} \ --log-cli-level=INFO \ From dde132ec2b523e8707f9bf8ff63d45c192ca4ad3 Mon Sep 17 00:00:00 2001 From: Rafael Pierre Date: Tue, 20 Feb 2024 15:59:51 +0000 Subject: [PATCH 047/173] tox --- .github/workflows/gpu-integ-test.yaml | 20 ++++++++++---------- tox.ini | 24 ++++++++++++------------ 2 files changed, 22 insertions(+), 22 deletions(-) diff --git a/.github/workflows/gpu-integ-test.yaml b/.github/workflows/gpu-integ-test.yaml index d13146ae..9b0bf103 100644 --- a/.github/workflows/gpu-integ-test.yaml +++ b/.github/workflows/gpu-integ-test.yaml @@ -13,26 +13,25 @@ concurrency: jobs: - pytorch-integration-test: + pytorch-integration-test-local: runs-on: [single-gpu, nvidia-gpu, t4, ci] env: AWS_REGION: us-east-1 steps: - name: Checkout uses: actions/checkout@v2 - - name: Set up Python 3.11 + - name: Set up Python 3.9 uses: actions/setup-python@v2 with: - python-version: 3.11 + python-version: 3.9 - name: Install Python dependencies run: pip install -e .[test,dev,torch] - name: Build Docker - run: docker build -t starlette-transformers:gpu -f dockerfiles/pytorch/gpu/Dockerfile . + run: docker build -t integration-test-pytorch:gpu -f dockerfiles/pytorch/gpu/Dockerfile . - name: Run Integration Tests - run: RUN_SLOW=True make integ-test - tensorflow-integration-test: - needs: - - pytorch-integration-test + run: tox -e + pytorch-integration-test-remote: + tensorflow-integration-test-local: runs-on: [single-gpu, nvidia-gpu, t4, ci] env: AWS_REGION: us-east-1 @@ -45,8 +44,9 @@ jobs: python-version: 3.9 - name: Uninstall pytorch run: pip uninstall torch torchvision -y - - name: Install Python dependencies - run: pip install -e .[test,dev,tensorflow] + - name: Install Tox + run: pip install tox + - name: "Run tox: local - name: Build Docker run: docker build -t starlette-transformers:gpu -f dockerfiles/tensorflow/gpu/Dockerfile . - name: Run Integration Tests diff --git a/tox.ini b/tox.ini index 27b0503c..23670f08 100644 --- a/tox.ini +++ b/tox.ini @@ -26,7 +26,7 @@ allowlist_externals = pytest commands = pytest \ {tty:--color=yes} \ - tests/unit/{posargs} \ + tests/unit/ {posargs} \ --log-cli-level=INFO \ --log-format='%(asctime)s %(levelname)s %(module)s:%(lineno)d %(message)s' @@ -36,7 +36,7 @@ allowlist_externals = pytest commands = pytest \ {tty:--color=yes} \ - tests/unit/{posargs} \ + tests/unit/ {posargs} \ --log-cli-level=INFO \ --log-format='%(asctime)s %(levelname)s %(module)s:%(lineno)d %(message)s' setenv = @@ -48,7 +48,7 @@ allowlist_externals = pytest commands = pytest \ {tty:--color=yes} \ - tests/unit/{posargs} \ + tests/unit/ {posargs} \ --log-cli-level=INFO \ --log-format='%(asctime)s %(levelname)s %(module)s:%(lineno)d %(message)s' @@ -58,7 +58,7 @@ allowlist_externals = pytest commands = pytest \ {tty:--color=yes} \ - tests/unit/{posargs} \ + tests/unit/ {posargs} \ --log-cli-level=INFO \ --log-format='%(asctime)s %(levelname)s %(module)s:%(lineno)d %(message)s' setenv = @@ -69,9 +69,9 @@ install_command = pip install -e ".[torch]" allowlist_externals = pytest commands = - pytest -s -v -n 12 \ + pytest \ {tty:--color=yes} \ - tests/integ/test_pytorch_remote.py{posargs} \ + tests/integ/test_pytorch_remote.py {posargs} \ --log-cli-level=INFO \ --log-format='%(asctime)s %(levelname)s %(module)s:%(lineno)d %(message)s' setenv = @@ -82,9 +82,9 @@ install_command = pip install -e ".[torch]" allowlist_externals = pytest commands = - pytest -s -v -n 12 \ + pytest \ {tty:--color=yes} \ - tests/integ/test_pytorch_local.py{posargs} \ + tests/integ/test_pytorch_local.py {posargs} \ --log-cli-level=INFO \ --log-format='%(asctime)s %(levelname)s %(module)s:%(lineno)d %(message)s' setenv = @@ -95,9 +95,9 @@ install_command = pip install -e ".[tensorflow]" allowlist_externals = pytest commands = - pytest -n 2 \ + pytest \ {tty:--color=yes} \ - tests/integ/test_tensorflow_remote.py{posargs} \ + tests/integ/test_tensorflow_remote.py {posargs} \ --log-cli-level=INFO \ --log-format='%(asctime)s %(levelname)s %(module)s:%(lineno)d %(message)s' setenv = @@ -108,9 +108,9 @@ install_command = pip install -e ".[tensorflow]" allowlist_externals = pytest commands = - pytest -n 2 \ + pytest \ {tty:--color=yes} \ - tests/integ/test_tensorflow_local.py{posargs} \ + tests/integ/test_tensorflow_local.py {posargs} \ --log-cli-level=INFO \ --log-format='%(asctime)s %(levelname)s %(module)s:%(lineno)d %(message)s' setenv = From daeae063bfc28acb4e469cc85b16d2e49b2dae6c Mon Sep 17 00:00:00 2001 From: Rafael Pierre Date: Tue, 20 Feb 2024 17:11:39 +0000 Subject: [PATCH 048/173] require_tf --- .github/workflows/unit-test.yaml | 56 ++++++++------------ src/huggingface_inference_toolkit/handler.py | 13 ++++- src/huggingface_inference_toolkit/utils.py | 8 ++- tests/unit/test_handler.py | 15 ++++-- tests/unit/test_utils.py | 2 +- 5 files changed, 53 insertions(+), 41 deletions(-) diff --git a/.github/workflows/unit-test.yaml b/.github/workflows/unit-test.yaml index 7a344a53..1ab50167 100644 --- a/.github/workflows/unit-test.yaml +++ b/.github/workflows/unit-test.yaml @@ -1,10 +1,10 @@ name: Run Unit-Tests on: - #push: - # branches: - # - main - #pull_request: + push: + branches: + - main + pull_request: workflow_dispatch: concurrency: @@ -16,43 +16,31 @@ jobs: runs-on: ubuntu-latest steps: - uses: actions/checkout@v2 - - name: Set up Python 3.9 + - name: Set up Python 3.9.18 uses: actions/setup-python@v2 with: - python-version: 3.9.12 - - name: Install Python dependencies - run: pip install -e .[test,dev,torch,st] - - uses: FedericoCarboni/setup-ffmpeg@v2 - id: setup-ffmpeg - - name: Run Unit test_const - run: python -m pytest -s -v ./tests/unit/test_const.py - - name: Run Unit test_handler - run: python -m pytest -s -v ./tests/unit/test_handler.py - - name: Run Unit test_sentence_transformers - run: python -m pytest -s -v ./tests/unit/test_sentence_transformers.py - - name: Run Unit test_serializer - run: python -m pytest -s -v ./tests/unit/test_serializer.py - - name: Run Unit test_utils - run: python -m pytest -s -v ./tests/unit/test_utils.py + python-version: 3.9.18 + - name: Install Tox + run: pip install tox + - uses: Install FFMPEG + run: | + sudo apt-get update -y && + sudo apt-get upgrade -y && + sudo apt-get install -y ffmpeg + - name: Run unit tests for Pytorch + run: tox -e unit-torch-slow -- -n 4 tensorflow-unit-test: runs-on: ubuntu-latest steps: - uses: actions/checkout@v2 - - name: Set up Python 3.9 + - name: Set up Python 3.9.18 uses: actions/setup-python@v2 with: - python-version: 3.9.12 - - name: Install Python dependencies - run: pip install -e .[test,dev,tensorflow] - - name: Run Unit test_const - run: python -m pytest -s -v ./tests/unit/test_const.py - - name: Run Unit test_handler - run: python -m pytest -s -v ./tests/unit/test_handler.py - - name: Run Unit test_sentence_transformers - run: python -m pytest -s -v ./tests/unit/test_sentence_transformers.py - - name: Run Unit test_serializer - run: python -m pytest -s -v ./tests/unit/test_serializer.py - - name: Run Unit test_utils - run: python -m pytest -s -v ./tests/unit/test_utils.py + python-version: 3.9.18 + - name: Install Tox + run: pip install tox + - name: Run unit tests for Tensorflow + run: tox -e unit-tensorflow-slow -- -n 4 + diff --git a/src/huggingface_inference_toolkit/handler.py b/src/huggingface_inference_toolkit/handler.py index 993e4967..521d3a8a 100644 --- a/src/huggingface_inference_toolkit/handler.py +++ b/src/huggingface_inference_toolkit/handler.py @@ -13,8 +13,17 @@ class HuggingFaceHandler: A Default Hugging Face Inference Handler which works with all transformers pipelines, Sentence Transformers and Optimum. """ - def __init__(self, model_dir: Union[str, Path], task=None): - self.pipeline = get_pipeline(model_dir=model_dir, task=task) + def __init__( + self, + model_dir: Union[str, Path], + task=None, + framework="pt" + ): + self.pipeline = get_pipeline( + model_dir=model_dir, + task=task, + framework=framework + ) def __call__(self, data): """ diff --git a/src/huggingface_inference_toolkit/utils.py b/src/huggingface_inference_toolkit/utils.py index 84c358a3..68236e87 100644 --- a/src/huggingface_inference_toolkit/utils.py +++ b/src/huggingface_inference_toolkit/utils.py @@ -112,6 +112,7 @@ def _get_framework(): """ extracts which DL framework is used for inference, if both are installed use pytorch """ + if is_torch_available(): return "pytorch" elif is_tf_available(): @@ -250,7 +251,12 @@ def get_pipeline(task: str, model_dir: Path, **kwargs) -> Pipeline: if is_optimum_available(): logger.info("Optimum is not implement yet using default pipeline.") - hf_pipeline = pipeline(task=task, model=model_dir, device=device, **kwargs) + hf_pipeline = pipeline( + task=task, + model=model_dir, + device=device, + **kwargs + ) elif is_sentence_transformers_available() and task in [ "sentence-similarity", "sentence-embeddings", diff --git a/tests/unit/test_handler.py b/tests/unit/test_handler.py index 0fdfb15b..42e65b2d 100644 --- a/tests/unit/test_handler.py +++ b/tests/unit/test_handler.py @@ -1,6 +1,10 @@ import tempfile import torch -from transformers.testing_utils import require_torch, slow, require_tf +from transformers.testing_utils import ( + require_tf, + require_torch, + slow +) import pytest from huggingface_inference_toolkit.handler import ( HuggingFaceHandler, @@ -77,7 +81,11 @@ def test_tf_get_device(): with tempfile.TemporaryDirectory() as tmpdirname: # https://github.com/huggingface/infinity/blob/test-ovh/test/integ/utils.py - storage_dir = _load_repository_from_hf(MODEL, tmpdirname, framework="tensorflow") + storage_dir = _load_repository_from_hf( + MODEL, + tmpdirname, + framework="tensorflow" + ) h = HuggingFaceHandler(model_dir=str(storage_dir), task=TASK) if _is_gpu_available(): assert h.pipeline.device == 0 @@ -96,7 +104,8 @@ def test_tf_predict_call(): ) handler = HuggingFaceHandler( model_dir=str(storage_dir), - task=TASK + task=TASK, + framework="tf" ) prediction = handler(INPUT) diff --git a/tests/unit/test_utils.py b/tests/unit/test_utils.py index 9d5052ee..b60ad038 100644 --- a/tests/unit/test_utils.py +++ b/tests/unit/test_utils.py @@ -2,6 +2,7 @@ from pathlib import Path import tempfile + from transformers import pipeline from transformers.file_utils import is_torch_available from transformers.testing_utils import require_tf, require_torch, slow @@ -16,7 +17,6 @@ wrap_conversation_pipeline, ) - MODEL = "lysandre/tiny-bert-random" TASK = "text-classification" TASK_MODEL = "sshleifer/tiny-dbmdz-bert-large-cased-finetuned-conll03-english" From 3c174521bc0d113e41f4256a744b3f7165aea082 Mon Sep 17 00:00:00 2001 From: Rafael Pierre Date: Tue, 20 Feb 2024 17:12:47 +0000 Subject: [PATCH 049/173] workflow --- .github/workflows/unit-test.yaml | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/.github/workflows/unit-test.yaml b/.github/workflows/unit-test.yaml index 1ab50167..f1d241af 100644 --- a/.github/workflows/unit-test.yaml +++ b/.github/workflows/unit-test.yaml @@ -40,7 +40,4 @@ jobs: - name: Install Tox run: pip install tox - name: Run unit tests for Tensorflow - run: tox -e unit-tensorflow-slow -- -n 4 - - - + run: tox -e unit-tensorflow-slow -- -n 4 \ No newline at end of file From e01ea5c1207eb6d0cc3523bce1ae25286f02398f Mon Sep 17 00:00:00 2001 From: Rafael Pierre Date: Tue, 20 Feb 2024 17:18:06 +0000 Subject: [PATCH 050/173] gpu integ --- .github/workflows/gpu-integ-test.yaml | 46 ++++++++++++++++++++++----- 1 file changed, 38 insertions(+), 8 deletions(-) diff --git a/.github/workflows/gpu-integ-test.yaml b/.github/workflows/gpu-integ-test.yaml index 9b0bf103..920c38df 100644 --- a/.github/workflows/gpu-integ-test.yaml +++ b/.github/workflows/gpu-integ-test.yaml @@ -28,9 +28,25 @@ jobs: run: pip install -e .[test,dev,torch] - name: Build Docker run: docker build -t integration-test-pytorch:gpu -f dockerfiles/pytorch/gpu/Dockerfile . - - name: Run Integration Tests - run: tox -e + - name: "Run Integration Tests: Torch Local" + run: tox -e torch-integration-local -- -n 4 pytorch-integration-test-remote: + runs-on: [single-gpu, nvidia-gpu, t4, ci] + env: + AWS_REGION: us-east-1 + steps: + - name: Checkout + uses: actions/checkout@v2 + - name: Set up Python 3.9 + uses: actions/setup-python@v2 + with: + python-version: 3.9 + - name: Install Python dependencies + run: pip install -e .[test,dev,torch] + - name: Build Docker + run: docker build -t integration-test-pytorch:gpu -f dockerfiles/pytorch/gpu/Dockerfile . + - name: "Run Integration Tests: Torch Remote" + run: tox -e torch-integration-remote -- -n 4 tensorflow-integration-test-local: runs-on: [single-gpu, nvidia-gpu, t4, ci] env: @@ -42,13 +58,27 @@ jobs: uses: actions/setup-python@v2 with: python-version: 3.9 - - name: Uninstall pytorch - run: pip uninstall torch torchvision -y + - name: Build Docker + run: docker build -f dockerfiles/tensorflow/gpu/Dockerfile -t integration-test-tensorflow:gpu . - name: Install Tox run: pip install tox - - name: "Run tox: local + - name: "Run Integration Tests: TF Local" + run: tox -e tensorflow-integration-local -- -n 4 + tensorflow-integration-test-remote: + runs-on: [single-gpu, nvidia-gpu, t4, ci] + env: + AWS_REGION: us-east-1 + steps: + - name: Checkout + uses: actions/checkout@v2 + - name: Set up Python 3.9 + uses: actions/setup-python@v2 + with: + python-version: 3.9 - name: Build Docker - run: docker build -t starlette-transformers:gpu -f dockerfiles/tensorflow/gpu/Dockerfile . - - name: Run Integration Tests - run: RUN_SLOW=True make integ-test + run: docker build -f dockerfiles/tensorflow/gpu/Dockerfile -t integration-test-tensorflow:gpu . + - name: Install Tox + run: pip install tox + - name: "Run Integration Tests: TF Remote" + run: tox -e tensorflow-integration-remote -- -n 4 \ No newline at end of file From 09adb51b2f44672b4c1457d4c6f415e469ed9523 Mon Sep 17 00:00:00 2001 From: Rafael Pierre Date: Tue, 20 Feb 2024 17:19:36 +0000 Subject: [PATCH 051/173] unit --- .github/workflows/unit-test.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/unit-test.yaml b/.github/workflows/unit-test.yaml index f1d241af..d9254b06 100644 --- a/.github/workflows/unit-test.yaml +++ b/.github/workflows/unit-test.yaml @@ -22,7 +22,7 @@ jobs: python-version: 3.9.18 - name: Install Tox run: pip install tox - - uses: Install FFMPEG + - name: Install FFMPEG run: | sudo apt-get update -y && sudo apt-get upgrade -y && From 6e11450267d1525dcb1081913585e7b4dc43197d Mon Sep 17 00:00:00 2001 From: Rafael Pierre Date: Tue, 20 Feb 2024 17:29:54 +0000 Subject: [PATCH 052/173] log level --- tox.ini | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tox.ini b/tox.ini index 23670f08..d83735c8 100644 --- a/tox.ini +++ b/tox.ini @@ -27,7 +27,7 @@ commands = pytest \ {tty:--color=yes} \ tests/unit/ {posargs} \ - --log-cli-level=INFO \ + --log-cli-level=ERROR \ --log-format='%(asctime)s %(levelname)s %(module)s:%(lineno)d %(message)s' [testenv:unit-torch-slow] @@ -37,7 +37,7 @@ commands = pytest \ {tty:--color=yes} \ tests/unit/ {posargs} \ - --log-cli-level=INFO \ + --log-cli-level=ERROR \ --log-format='%(asctime)s %(levelname)s %(module)s:%(lineno)d %(message)s' setenv = RUN_SLOW=True From 591ae0aff6d499d30e5b893578e87620428d17b2 Mon Sep 17 00:00:00 2001 From: Rafael Pierre Date: Tue, 20 Feb 2024 17:33:14 +0000 Subject: [PATCH 053/173] verbose --- tox.ini | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/tox.ini b/tox.ini index d83735c8..ee695986 100644 --- a/tox.ini +++ b/tox.ini @@ -24,7 +24,7 @@ commands = ruff src --fix install_command = pip install -e ".[torch]" allowlist_externals = pytest commands = - pytest \ + pytest -v \ {tty:--color=yes} \ tests/unit/ {posargs} \ --log-cli-level=ERROR \ @@ -46,10 +46,10 @@ setenv = install_command = pip install -e ".[tensorflow]" allowlist_externals = pytest commands = - pytest \ + pytest -v \ {tty:--color=yes} \ tests/unit/ {posargs} \ - --log-cli-level=INFO \ + --log-cli-level=ERROR \ --log-format='%(asctime)s %(levelname)s %(module)s:%(lineno)d %(message)s' [testenv:unit-tensorflow-slow] From 73ae3fe29082d6bd89b4a181ed682221e98c8ddb Mon Sep 17 00:00:00 2001 From: Rafael Pierre Date: Tue, 20 Feb 2024 17:34:23 +0000 Subject: [PATCH 054/173] ffmpeg --- .github/workflows/unit-test.yaml | 2 -- 1 file changed, 2 deletions(-) diff --git a/.github/workflows/unit-test.yaml b/.github/workflows/unit-test.yaml index d9254b06..68132773 100644 --- a/.github/workflows/unit-test.yaml +++ b/.github/workflows/unit-test.yaml @@ -24,8 +24,6 @@ jobs: run: pip install tox - name: Install FFMPEG run: | - sudo apt-get update -y && - sudo apt-get upgrade -y && sudo apt-get install -y ffmpeg - name: Run unit tests for Pytorch run: tox -e unit-torch-slow -- -n 4 From fa24df0af17c27e648b7357b7132d645b316b043 Mon Sep 17 00:00:00 2001 From: Rafael Pierre Date: Tue, 20 Feb 2024 17:35:31 +0000 Subject: [PATCH 055/173] update --- .github/workflows/unit-test.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/unit-test.yaml b/.github/workflows/unit-test.yaml index 68132773..b354316f 100644 --- a/.github/workflows/unit-test.yaml +++ b/.github/workflows/unit-test.yaml @@ -24,7 +24,7 @@ jobs: run: pip install tox - name: Install FFMPEG run: | - sudo apt-get install -y ffmpeg + sudo apt-get update -y && sudo apt-get install -y ffmpeg - name: Run unit tests for Pytorch run: tox -e unit-torch-slow -- -n 4 tensorflow-unit-test: From 2e5efd098f214e477f41a26d4a42f37aacefbcdb Mon Sep 17 00:00:00 2001 From: Rafael Pierre Date: Tue, 20 Feb 2024 17:41:18 +0000 Subject: [PATCH 056/173] level: --- tox.ini | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) diff --git a/tox.ini b/tox.ini index ee695986..61ba0fac 100644 --- a/tox.ini +++ b/tox.ini @@ -21,20 +21,20 @@ commands = ruff src --fix # TODO: Add separate sections for different test cases [testenv:unit-torch] -install_command = pip install -e ".[torch]" +install_command = pip install -e ".[torch, st, diffusers]" allowlist_externals = pytest commands = - pytest -v \ + pytest -s -v \ {tty:--color=yes} \ tests/unit/ {posargs} \ --log-cli-level=ERROR \ --log-format='%(asctime)s %(levelname)s %(module)s:%(lineno)d %(message)s' [testenv:unit-torch-slow] -install_command = pip install -e ".[torch]" +install_command = pip install -e ".[torch, st, diffusers]" allowlist_externals = pytest commands = - pytest \ + pytest -s -v \ {tty:--color=yes} \ tests/unit/ {posargs} \ --log-cli-level=ERROR \ @@ -43,23 +43,23 @@ setenv = RUN_SLOW=True [testenv:unit-tensorflow] -install_command = pip install -e ".[tensorflow]" +install_command = pip install -e ".[tensorflow, st, diffusers]" allowlist_externals = pytest commands = - pytest -v \ + pytest -l -v \ {tty:--color=yes} \ tests/unit/ {posargs} \ --log-cli-level=ERROR \ --log-format='%(asctime)s %(levelname)s %(module)s:%(lineno)d %(message)s' [testenv:unit-tensorflow-slow] -install_command = pip install -e ".[tensorflow]" +install_command = pip install -e ".[tensorflow, st, diffusers]" allowlist_externals = pytest commands = - pytest \ + pytest -l -v \ {tty:--color=yes} \ tests/unit/ {posargs} \ - --log-cli-level=INFO \ + --log-cli-level=ERROR \ --log-format='%(asctime)s %(levelname)s %(module)s:%(lineno)d %(message)s' setenv = RUN_SLOW=True From 65c6f160c391da1c23fde2b42bdec82492c1a0fd Mon Sep 17 00:00:00 2001 From: Rafael Pierre Date: Tue, 20 Feb 2024 17:47:26 +0000 Subject: [PATCH 057/173] debug --- .github/workflows/unit-test.yaml | 4 ++++ tox.ini | 8 ++++---- 2 files changed, 8 insertions(+), 4 deletions(-) diff --git a/.github/workflows/unit-test.yaml b/.github/workflows/unit-test.yaml index b354316f..7dcde317 100644 --- a/.github/workflows/unit-test.yaml +++ b/.github/workflows/unit-test.yaml @@ -7,6 +7,10 @@ on: pull_request: workflow_dispatch: +env: + ACTIONS_RUNNER_DEBUG: True + ACTIONS_STEP_DEBUG: True + concurrency: group: ${{ github.workflow }}-${{ github.head_ref || github.run_id }} cancel-in-progress: true diff --git a/tox.ini b/tox.ini index 61ba0fac..af955ed3 100644 --- a/tox.ini +++ b/tox.ini @@ -43,20 +43,20 @@ setenv = RUN_SLOW=True [testenv:unit-tensorflow] -install_command = pip install -e ".[tensorflow, st, diffusers]" +install_command = pip install -e ".[tensorflow]" allowlist_externals = pytest commands = - pytest -l -v \ + pytest -s -v \ {tty:--color=yes} \ tests/unit/ {posargs} \ --log-cli-level=ERROR \ --log-format='%(asctime)s %(levelname)s %(module)s:%(lineno)d %(message)s' [testenv:unit-tensorflow-slow] -install_command = pip install -e ".[tensorflow, st, diffusers]" +install_command = pip install -e ".[tensorflow]" allowlist_externals = pytest commands = - pytest -l -v \ + pytest -s -v \ {tty:--color=yes} \ tests/unit/ {posargs} \ --log-cli-level=ERROR \ From 3014c042763ece6ef827eef5524a79dad3b21b7d Mon Sep 17 00:00:00 2001 From: Rafael Pierre Date: Tue, 20 Feb 2024 17:48:56 +0000 Subject: [PATCH 058/173] true --- .github/workflows/unit-test.yaml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/.github/workflows/unit-test.yaml b/.github/workflows/unit-test.yaml index 7dcde317..37861f41 100644 --- a/.github/workflows/unit-test.yaml +++ b/.github/workflows/unit-test.yaml @@ -8,8 +8,8 @@ on: workflow_dispatch: env: - ACTIONS_RUNNER_DEBUG: True - ACTIONS_STEP_DEBUG: True + ACTIONS_RUNNER_DEBUG: true + ACTIONS_STEP_DEBUG: true concurrency: group: ${{ github.workflow }}-${{ github.head_ref || github.run_id }} From cff49c921e7b66605eb51be4e714df9e49c0e05a Mon Sep 17 00:00:00 2001 From: Rafael Pierre Date: Tue, 20 Feb 2024 17:57:30 +0000 Subject: [PATCH 059/173] install command --- tox.ini | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tox.ini b/tox.ini index af955ed3..c66b3dee 100644 --- a/tox.ini +++ b/tox.ini @@ -6,7 +6,7 @@ skipsdist = true deps = -r requirements.txt install_command = pip install -U pip - pip install -e . + pip install -e ".[test]" setenv = PYTHONPATH=. From ca4a96499b64bd5abc55c2175797eb0063ce753d Mon Sep 17 00:00:00 2001 From: Rafael Pierre Date: Tue, 20 Feb 2024 18:00:18 +0000 Subject: [PATCH 060/173] deps --- .github/workflows/unit-test.yaml | 8 ++++---- tox.ini | 2 +- 2 files changed, 5 insertions(+), 5 deletions(-) diff --git a/.github/workflows/unit-test.yaml b/.github/workflows/unit-test.yaml index 37861f41..017b20cd 100644 --- a/.github/workflows/unit-test.yaml +++ b/.github/workflows/unit-test.yaml @@ -24,8 +24,8 @@ jobs: uses: actions/setup-python@v2 with: python-version: 3.9.18 - - name: Install Tox - run: pip install tox + - name: Install Tox & Dependencies + run: pip install tox ".[test]" - name: Install FFMPEG run: | sudo apt-get update -y && sudo apt-get install -y ffmpeg @@ -39,7 +39,7 @@ jobs: uses: actions/setup-python@v2 with: python-version: 3.9.18 - - name: Install Tox - run: pip install tox + - name: Install Tox & Dependencies + run: pip install tox ".[test]" - name: Run unit tests for Tensorflow run: tox -e unit-tensorflow-slow -- -n 4 \ No newline at end of file diff --git a/tox.ini b/tox.ini index c66b3dee..c07a0026 100644 --- a/tox.ini +++ b/tox.ini @@ -6,7 +6,7 @@ skipsdist = true deps = -r requirements.txt install_command = pip install -U pip - pip install -e ".[test]" + pip install -e ".[test, quality]" setenv = PYTHONPATH=. From 70fb4016d5d218884fa3cfc2a15a9bd24354eff0 Mon Sep 17 00:00:00 2001 From: Rafael Pierre Date: Tue, 20 Feb 2024 18:06:34 +0000 Subject: [PATCH 061/173] torch --- tests/unit/test_handler.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/tests/unit/test_handler.py b/tests/unit/test_handler.py index 42e65b2d..3addba8f 100644 --- a/tests/unit/test_handler.py +++ b/tests/unit/test_handler.py @@ -1,5 +1,4 @@ import tempfile -import torch from transformers.testing_utils import ( require_tf, require_torch, @@ -24,7 +23,7 @@ @require_torch def test_pt_get_device(): - + import torch with tempfile.TemporaryDirectory() as tmpdirname: # https://github.com/huggingface/infinity/blob/test-ovh/test/integ/utils.py storage_dir = _load_repository_from_hf(MODEL, tmpdirname, framework="pytorch") From 1cecf47408258031546d950e30d08ce4777c7422 Mon Sep 17 00:00:00 2001 From: Rafael Pierre Date: Tue, 20 Feb 2024 18:08:10 +0000 Subject: [PATCH 062/173] runs on --- .github/workflows/unit-test.yaml | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/.github/workflows/unit-test.yaml b/.github/workflows/unit-test.yaml index 017b20cd..10beae7f 100644 --- a/.github/workflows/unit-test.yaml +++ b/.github/workflows/unit-test.yaml @@ -17,7 +17,9 @@ concurrency: jobs: pytorch-unit-test: - runs-on: ubuntu-latest + runs-on: [single-gpu, nvidia-gpu, t4, ci] + env: + AWS_REGION: us-east-1 steps: - uses: actions/checkout@v2 - name: Set up Python 3.9.18 @@ -32,7 +34,9 @@ jobs: - name: Run unit tests for Pytorch run: tox -e unit-torch-slow -- -n 4 tensorflow-unit-test: - runs-on: ubuntu-latest + runs-on: [single-gpu, nvidia-gpu, t4, ci] + env: + AWS_REGION: us-east-1 steps: - uses: actions/checkout@v2 - name: Set up Python 3.9.18 From 231efa524164044a034534dda8559c3c8ea47015 Mon Sep 17 00:00:00 2001 From: Rafael Pierre Date: Tue, 20 Feb 2024 18:12:41 +0000 Subject: [PATCH 063/173] unit --- .github/workflows/unit-test.yaml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/.github/workflows/unit-test.yaml b/.github/workflows/unit-test.yaml index 10beae7f..8b953b5b 100644 --- a/.github/workflows/unit-test.yaml +++ b/.github/workflows/unit-test.yaml @@ -32,7 +32,7 @@ jobs: run: | sudo apt-get update -y && sudo apt-get install -y ffmpeg - name: Run unit tests for Pytorch - run: tox -e unit-torch-slow -- -n 4 + run: tox -e unit-torch -- -n 4 tensorflow-unit-test: runs-on: [single-gpu, nvidia-gpu, t4, ci] env: @@ -46,4 +46,4 @@ jobs: - name: Install Tox & Dependencies run: pip install tox ".[test]" - name: Run unit tests for Tensorflow - run: tox -e unit-tensorflow-slow -- -n 4 \ No newline at end of file + run: tox -e unit-tensorflow -- -n 4 \ No newline at end of file From c094365a7b9b331c64096f4b7dcafa3a325ec652 Mon Sep 17 00:00:00 2001 From: Rafael Pierre Date: Tue, 20 Feb 2024 19:36:40 +0000 Subject: [PATCH 064/173] workflow --- .github/workflows/unit-test.yaml | 12 +++++++----- requirements-test.txt | 11 +++++++++++ 2 files changed, 18 insertions(+), 5 deletions(-) create mode 100644 requirements-test.txt diff --git a/.github/workflows/unit-test.yaml b/.github/workflows/unit-test.yaml index 8b953b5b..c82ae6c5 100644 --- a/.github/workflows/unit-test.yaml +++ b/.github/workflows/unit-test.yaml @@ -21,14 +21,16 @@ jobs: env: AWS_REGION: us-east-1 steps: - - uses: actions/checkout@v2 + - name: nvidia-smi + run: nvidia-smi + - uses: actions/checkout@v4.1.1 - name: Set up Python 3.9.18 - uses: actions/setup-python@v2 + uses: actions/setup-python@v5 with: python-version: 3.9.18 - - name: Install Tox & Dependencies - run: pip install tox ".[test]" - - name: Install FFMPEG + - name: Install test dependencies + run: pip install -U pip -r requirements-test.txt + - name: Install ffmpeg run: | sudo apt-get update -y && sudo apt-get install -y ffmpeg - name: Run unit tests for Pytorch diff --git a/requirements-test.txt b/requirements-test.txt new file mode 100644 index 00000000..97d215a3 --- /dev/null +++ b/requirements-test.txt @@ -0,0 +1,11 @@ +tox +pytest +pytest-xdist +parameterized +psutil +datasets +pytest-sugar +mock==2.0.0 +docker +requests +tenacity \ No newline at end of file From 5f35e46109fad3c89e1f6559395ac7959ad798f5 Mon Sep 17 00:00:00 2001 From: Rafael Pierre Date: Tue, 20 Feb 2024 19:41:32 +0000 Subject: [PATCH 065/173] install --- .github/workflows/unit-test.yaml | 2 ++ tox.ini | 4 ++-- 2 files changed, 4 insertions(+), 2 deletions(-) diff --git a/.github/workflows/unit-test.yaml b/.github/workflows/unit-test.yaml index c82ae6c5..5a5dbaf8 100644 --- a/.github/workflows/unit-test.yaml +++ b/.github/workflows/unit-test.yaml @@ -21,6 +21,8 @@ jobs: env: AWS_REGION: us-east-1 steps: + - name: cuda-toolkit + uses: Jimver/cuda-toolkit@v0.2.14 - name: nvidia-smi run: nvidia-smi - uses: actions/checkout@v4.1.1 diff --git a/tox.ini b/tox.ini index c07a0026..af22782a 100644 --- a/tox.ini +++ b/tox.ini @@ -6,7 +6,7 @@ skipsdist = true deps = -r requirements.txt install_command = pip install -U pip - pip install -e ".[test, quality]" + pip install -e . ".[test, quality]" setenv = PYTHONPATH=. @@ -27,7 +27,7 @@ commands = pytest -s -v \ {tty:--color=yes} \ tests/unit/ {posargs} \ - --log-cli-level=ERROR \ + --log-cli-level=DEBUG \ --log-format='%(asctime)s %(levelname)s %(module)s:%(lineno)d %(message)s' [testenv:unit-torch-slow] From e2691f56affbdbf4071bea71c9847e03a8180249 Mon Sep 17 00:00:00 2001 From: Rafael Pierre Date: Tue, 20 Feb 2024 19:46:44 +0000 Subject: [PATCH 066/173] cuda --- .github/workflows/unit-test.yaml | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/.github/workflows/unit-test.yaml b/.github/workflows/unit-test.yaml index 5a5dbaf8..194b5499 100644 --- a/.github/workflows/unit-test.yaml +++ b/.github/workflows/unit-test.yaml @@ -21,8 +21,9 @@ jobs: env: AWS_REGION: us-east-1 steps: - - name: cuda-toolkit - uses: Jimver/cuda-toolkit@v0.2.14 + - name: Install CUDA + run: | + sudo apt update -y && sudo apt install nvidia-cuda-toolkit -y - name: nvidia-smi run: nvidia-smi - uses: actions/checkout@v4.1.1 From 60cc692b2f2f584160c62fe2e80f403e51e675e6 Mon Sep 17 00:00:00 2001 From: Rafael Pierre Date: Tue, 20 Feb 2024 19:52:54 +0000 Subject: [PATCH 067/173] cuda & transformers --- .github/workflows/unit-test.yaml | 5 ++++- tox.ini | 4 +++- 2 files changed, 7 insertions(+), 2 deletions(-) diff --git a/.github/workflows/unit-test.yaml b/.github/workflows/unit-test.yaml index 194b5499..9d397e9f 100644 --- a/.github/workflows/unit-test.yaml +++ b/.github/workflows/unit-test.yaml @@ -23,7 +23,10 @@ jobs: steps: - name: Install CUDA run: | - sudo apt update -y && sudo apt install nvidia-cuda-toolkit -y + wget https://developer.download.nvidia.com/compute/cuda/repos/ubuntu2004/x86_64/cuda-keyring_1.1-1_all.deb + sudo dpkg -i cuda-keyring_1.1-1_all.deb + sudo apt-get update + sudo apt-get -y install cuda-toolkit-12-3 - name: nvidia-smi run: nvidia-smi - uses: actions/checkout@v4.1.1 diff --git a/tox.ini b/tox.ini index af22782a..fe0b0b02 100644 --- a/tox.ini +++ b/tox.ini @@ -21,7 +21,9 @@ commands = ruff src --fix # TODO: Add separate sections for different test cases [testenv:unit-torch] -install_command = pip install -e ".[torch, st, diffusers]" +install_command = + pip install -e . + pip install -e ".[torch, st, diffusers]" allowlist_externals = pytest commands = pytest -s -v \ From 5c7d2db8e3443b7f59d2f7d4b5b6ecd9c3bbca6b Mon Sep 17 00:00:00 2001 From: Rafael Pierre Date: Tue, 20 Feb 2024 20:03:45 +0000 Subject: [PATCH 068/173] dependencies --- pyproject.toml | 13 +++---- .../diffusers_utils.py | 2 +- src/huggingface_inference_toolkit/handler.py | 13 +------ src/huggingface_inference_toolkit/utils.py | 37 +++++-------------- tox.ini | 6 +-- 5 files changed, 20 insertions(+), 51 deletions(-) diff --git a/pyproject.toml b/pyproject.toml index 2627f501..14cf8939 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -4,7 +4,7 @@ no_implicit_optional = true scripts_are_modules = true [tool.ruff] -select = [ +lint.select = [ "E", # pycodestyle errors "W", # pycodestyle warnings "F", # pyflakes @@ -12,7 +12,7 @@ select = [ "C", # flake8-comprehensions "B", # flake8-bugbear ] -ignore = [ +lint.ignore = [ "E501", # line too long, handled by black "B008", # do not perform function calls in argument defaults "C901", # too complex @@ -21,13 +21,12 @@ ignore = [ line-length = 119 # Allow unused variables when underscore-prefixed. -dummy-variable-rgx = "^(_+|(_+[a-zA-Z0-9_]*[a-zA-Z0-9]+?))$" +lint.dummy-variable-rgx = "^(_+|(_+[a-zA-Z0-9_]*[a-zA-Z0-9]+?))$" -# Assume Python 3.11. -target-version = "py311" +# Assume Python 3.9. +target-version = "py39" -[tool.ruff.per-file-ignores] -"__init__.py" = ["F401"] +lint.per-file-ignores = {"__init__.py" = ["F401"]} [tool.isort] profile = "black" diff --git a/src/huggingface_inference_toolkit/diffusers_utils.py b/src/huggingface_inference_toolkit/diffusers_utils.py index d8bf9542..521a85df 100644 --- a/src/huggingface_inference_toolkit/diffusers_utils.py +++ b/src/huggingface_inference_toolkit/diffusers_utils.py @@ -32,7 +32,7 @@ def __init__(self, model_dir: str, device: str = None): # needs "cuda" for GPU self.pipeline.scheduler = DPMSolverMultistepScheduler.from_config(self.pipeline.scheduler.config) except Exception: pass - + self.pipeline.to(device) def __call__( diff --git a/src/huggingface_inference_toolkit/handler.py b/src/huggingface_inference_toolkit/handler.py index 521d3a8a..c7f9fccb 100644 --- a/src/huggingface_inference_toolkit/handler.py +++ b/src/huggingface_inference_toolkit/handler.py @@ -13,17 +13,8 @@ class HuggingFaceHandler: A Default Hugging Face Inference Handler which works with all transformers pipelines, Sentence Transformers and Optimum. """ - def __init__( - self, - model_dir: Union[str, Path], - task=None, - framework="pt" - ): - self.pipeline = get_pipeline( - model_dir=model_dir, - task=task, - framework=framework - ) + def __init__(self, model_dir: Union[str, Path], task=None, framework="pt"): + self.pipeline = get_pipeline(model_dir=model_dir, task=task, framework=framework) def __call__(self, data): """ diff --git a/src/huggingface_inference_toolkit/utils.py b/src/huggingface_inference_toolkit/utils.py index 68236e87..6e2f9dfd 100644 --- a/src/huggingface_inference_toolkit/utils.py +++ b/src/huggingface_inference_toolkit/utils.py @@ -112,7 +112,7 @@ def _get_framework(): """ extracts which DL framework is used for inference, if both are installed use pytorch """ - + if is_torch_available(): return "pytorch" elif is_tf_available(): @@ -251,60 +251,41 @@ def get_pipeline(task: str, model_dir: Path, **kwargs) -> Pipeline: if is_optimum_available(): logger.info("Optimum is not implement yet using default pipeline.") - hf_pipeline = pipeline( - task=task, - model=model_dir, - device=device, - **kwargs - ) + hf_pipeline = pipeline(task=task, model=model_dir, device=device, **kwargs) elif is_sentence_transformers_available() and task in [ "sentence-similarity", "sentence-embeddings", "sentence-ranking", ]: - hf_pipeline = get_sentence_transformers_pipeline( - task=task, - model_dir=model_dir, - device=device, - **kwargs - ) + hf_pipeline = get_sentence_transformers_pipeline(task=task, model_dir=model_dir, device=device, **kwargs) elif is_diffusers_available() and task == "text-to-image": - hf_pipeline = get_diffusers_pipeline( - task=task, - model_dir=model_dir, - device=device, - **kwargs - ) + hf_pipeline = get_diffusers_pipeline(task=task, model_dir=model_dir, device=device, **kwargs) else: logging.info(f"Task: {task}") logging.info(f"Model: {model_dir}") logging.info(f"Device: {device}") logging.info(f"Args: {kwargs}") - hf_pipeline = pipeline( - task=task, - model=model_dir, - device=device, - **kwargs - ) + hf_pipeline = pipeline(task=task, model=model_dir, device=device, **kwargs) # wrapp specific pipeline to support better ux if task == "conversational": hf_pipeline = wrap_conversation_pipeline(hf_pipeline) elif task == "automatic-speech-recognition" and isinstance(hf_pipeline.model, WhisperForConditionalGeneration): - # set chunk length to 30s for whisper to enable long audio files hf_pipeline._preprocess_params["chunk_length_s"] = 30 hf_pipeline._preprocess_params["ignore_warning"] = True # set decoder to english by default # TODO: replace when transformers 4.26.0 is release with - hf_pipeline.model.config.forced_decoder_ids = hf_pipeline.tokenizer.get_decoder_prompt_ids(language="english", task="transcribe") + hf_pipeline.model.config.forced_decoder_ids = hf_pipeline.tokenizer.get_decoder_prompt_ids( + language="english", task="transcribe" + ) """" hf_pipeline.tokenizer.language = "english" hf_pipeline.tokenizer.task = "transcribe" hf_pipeline.model.config.forced_decoder_ids = [ (rank + 1, token) for rank, token in enumerate(hf_pipeline.tokenizer.prefix_tokens[1:]) ]""" - + return hf_pipeline diff --git a/tox.ini b/tox.ini index fe0b0b02..b9d2aeee 100644 --- a/tox.ini +++ b/tox.ini @@ -6,7 +6,7 @@ skipsdist = true deps = -r requirements.txt install_command = pip install -U pip - pip install -e . ".[test, quality]" + pip install -e . setenv = PYTHONPATH=. @@ -18,12 +18,10 @@ commands = ruff src basepython = python commands = ruff src --fix -# TODO: Add separate sections for different test cases - [testenv:unit-torch] install_command = pip install -e . - pip install -e ".[torch, st, diffusers]" + pip install -e ".[test,dev,torch,st]" allowlist_externals = pytest commands = pytest -s -v \ From 6397b4cc9f99ff515d9fafb52c7ddf37a9627030 Mon Sep 17 00:00:00 2001 From: Rafael Pierre Date: Tue, 20 Feb 2024 20:11:18 +0000 Subject: [PATCH 069/173] nvidia & cache --- .github/workflows/unit-test.yaml | 12 +++++++++++- 1 file changed, 11 insertions(+), 1 deletion(-) diff --git a/.github/workflows/unit-test.yaml b/.github/workflows/unit-test.yaml index 9d397e9f..3e2ba4db 100644 --- a/.github/workflows/unit-test.yaml +++ b/.github/workflows/unit-test.yaml @@ -21,12 +21,22 @@ jobs: env: AWS_REGION: us-east-1 steps: + - name: Use Apt lists cache + uses: actions/cache@v4.0.0 + with: + path: /var/lib/apt/lists + key: ${{ runner.os }}-apt-lists + - name: Use Apt packages cache + uses: actions/cache@v4.0.0 + with: + path: /var/cache/apt + key: ${{ runner.os }}-apt-packages - name: Install CUDA run: | wget https://developer.download.nvidia.com/compute/cuda/repos/ubuntu2004/x86_64/cuda-keyring_1.1-1_all.deb sudo dpkg -i cuda-keyring_1.1-1_all.deb sudo apt-get update - sudo apt-get -y install cuda-toolkit-12-3 + sudo apt-get -y install cuda-toolkit-12-3 nvidia-kernel-open-545 cuda-drivers-545 - name: nvidia-smi run: nvidia-smi - uses: actions/checkout@v4.1.1 From 78d79dae92fcc2a5d0137b1c33f2c34aa385dd35 Mon Sep 17 00:00:00 2001 From: Rafael Pierre Date: Tue, 20 Feb 2024 20:16:45 +0000 Subject: [PATCH 070/173] cuda drivers --- .github/workflows/unit-test.yaml | 2 +- tox.ini | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/.github/workflows/unit-test.yaml b/.github/workflows/unit-test.yaml index 3e2ba4db..a7a33482 100644 --- a/.github/workflows/unit-test.yaml +++ b/.github/workflows/unit-test.yaml @@ -36,7 +36,7 @@ jobs: wget https://developer.download.nvidia.com/compute/cuda/repos/ubuntu2004/x86_64/cuda-keyring_1.1-1_all.deb sudo dpkg -i cuda-keyring_1.1-1_all.deb sudo apt-get update - sudo apt-get -y install cuda-toolkit-12-3 nvidia-kernel-open-545 cuda-drivers-545 + sudo apt-get -y install cuda-toolkit-12-3 cuda-drivers - name: nvidia-smi run: nvidia-smi - uses: actions/checkout@v4.1.1 diff --git a/tox.ini b/tox.ini index b9d2aeee..bb6ce07a 100644 --- a/tox.ini +++ b/tox.ini @@ -27,7 +27,7 @@ commands = pytest -s -v \ {tty:--color=yes} \ tests/unit/ {posargs} \ - --log-cli-level=DEBUG \ + --log-cli-level=ERROR \ --log-format='%(asctime)s %(levelname)s %(module)s:%(lineno)d %(message)s' [testenv:unit-torch-slow] From c80d1aa0b8e63d2ba03c97085d58c0a3a0923ce4 Mon Sep 17 00:00:00 2001 From: Rafael Pierre Date: Tue, 20 Feb 2024 22:03:11 +0000 Subject: [PATCH 071/173] whisper tiny pass --- .gitignore | 2 + src/huggingface_inference_toolkit/utils.py | 46 ++++++++++++++++------ tests/unit/test_utils.py | 34 +++++++++++++--- tox.ini | 15 ++++--- 4 files changed, 75 insertions(+), 22 deletions(-) diff --git a/.gitignore b/.gitignore index 78b208e2..1cee519e 100644 --- a/.gitignore +++ b/.gitignore @@ -3,10 +3,12 @@ # please consider a global .gitignore https://help.github.com/articles/ignoring-files .gitignore .egg-info +.ruff_cache .vagrant* .hcl .terraform.lock.hcl .terraform +pip-unpack-* __pycache__ bin docker/docker diff --git a/src/huggingface_inference_toolkit/utils.py b/src/huggingface_inference_toolkit/utils.py index 6e2f9dfd..85683a50 100644 --- a/src/huggingface_inference_toolkit/utils.py +++ b/src/huggingface_inference_toolkit/utils.py @@ -140,6 +140,7 @@ def _load_repository_from_hf( if framework is None: framework = _get_framework() + logging.info(f"Framework: {framework}") if isinstance(target_dir, str): target_dir = Path(target_dir) @@ -149,22 +150,24 @@ def _load_repository_from_hf( target_dir.mkdir(parents=True) # check if safetensors weights are available - if framework == "pytorch": - files = HfApi().model_info(repository_id).siblings - if any(f.rfilename.endswith("safetensors") for f in files): - framework = "safetensors" + #if framework == "pytorch": + #files = HfApi().model_info(repository_id).siblings + #if any(f.rfilename.endswith("safetensors") for f in files): + #framework = "safetensors" # create regex to only include the framework specific weights ignore_regex = create_artifact_filter(framework) + logging.info(f"ignore_regex: {ignore_regex}") + logging.info(f"Framework after filtering: {framework}") logger.info(f"Ignore regex pattern for files, which are not downloaded: { ', '.join(ignore_regex) }") # Download the repository to the workdir and filter out non-framework specific weights snapshot_download( - repository_id, - revision=revision, - local_dir=str(target_dir), - local_dir_use_symlinks=False, - ignore_patterns=ignore_regex, + repo_id = repository_id, + revision = revision, + local_dir = str(target_dir), + local_dir_use_symlinks = False, + ignore_patterns = ignore_regex, ) return target_dir @@ -223,7 +226,12 @@ def get_device(): return -1 -def get_pipeline(task: str, model_dir: Path, **kwargs) -> Pipeline: +def get_pipeline( + task: str, + model_dir: Path, + framework = "pytorch", + **kwargs, +) -> Pipeline: """ create pipeline class for a specific task based on local saved model """ @@ -244,6 +252,12 @@ def get_pipeline(task: str, model_dir: Path, **kwargs) -> Pipeline: "zero-shot-image-classification", }: kwargs["feature_extractor"] = model_dir + hf_pipeline = pipeline( + task=task, + model=model_dir, + device=device, + **kwargs + ) elif task in {"image-to-text"}: pass else: @@ -265,12 +279,20 @@ def get_pipeline(task: str, model_dir: Path, **kwargs) -> Pipeline: logging.info(f"Model: {model_dir}") logging.info(f"Device: {device}") logging.info(f"Args: {kwargs}") - hf_pipeline = pipeline(task=task, model=model_dir, device=device, **kwargs) + hf_pipeline = pipeline( + task=task, + model=model_dir, + device=device, + **kwargs + ) # wrapp specific pipeline to support better ux if task == "conversational": hf_pipeline = wrap_conversation_pipeline(hf_pipeline) - elif task == "automatic-speech-recognition" and isinstance(hf_pipeline.model, WhisperForConditionalGeneration): + elif task == "automatic-speech-recognition" and isinstance( + hf_pipeline.model, + WhisperForConditionalGeneration + ): # set chunk length to 30s for whisper to enable long audio files hf_pipeline._preprocess_params["chunk_length_s"] = 30 hf_pipeline._preprocess_params["ignore_warning"] = True diff --git a/tests/unit/test_utils.py b/tests/unit/test_utils.py index b60ad038..0ca1d93f 100644 --- a/tests/unit/test_utils.py +++ b/tests/unit/test_utils.py @@ -17,6 +17,8 @@ wrap_conversation_pipeline, ) +import logging + MODEL = "lysandre/tiny-bert-random" TASK = "text-classification" TASK_MODEL = "sshleifer/tiny-dbmdz-bert-large-cased-finetuned-conll03-english" @@ -112,7 +114,11 @@ def test_get_framework_tensorflow(): def test_get_pipeline(): with tempfile.TemporaryDirectory() as tmpdirname: storage_dir = _load_repository_from_hf(MODEL, tmpdirname, framework="pytorch") - pipe = get_pipeline(TASK, storage_dir.as_posix()) + pipe = get_pipeline( + task = TASK, + model_dir = storage_dir.as_posix(), + framework = "pytorch" + ) res = pipe("Life is good, Life is bad") assert "score" in res[0] @@ -120,9 +126,27 @@ def test_get_pipeline(): @require_torch def test_whisper_long_audio(): with tempfile.TemporaryDirectory() as tmpdirname: - storage_dir = _load_repository_from_hf("openai/whisper-tiny", tmpdirname, framework="pytorch") - pipe = get_pipeline("automatic-speech-recognition", storage_dir.as_posix()) - res = pipe(os.path.join(os.getcwd(), "tests/resources/audio", "long_sample.mp3")) + storage_dir = _load_repository_from_hf( + repository_id = "openai/whisper-tiny", + target_dir = tmpdirname, + framework = "pytorch", + revision = "be0ba7c2f24f0127b27863a23a08002af4c2c279" + ) + logging.info(f"Temp dir: {tmpdirname}") + logging.info(f"POSIX Path: {storage_dir.as_posix()}") + logging.info(f"Contents: {os.listdir(tmpdirname)}") + pipe = get_pipeline( + task = "automatic-speech-recognition", + model_dir = storage_dir.as_posix(), + framework = "safetensors" + ) + res = pipe( + os.path.join( + os.getcwd(), + "tests/resources/audio", + "long_sample.mp3" + ) + ) assert len(res["text"]) > 700 @@ -149,7 +173,7 @@ def test_wrap_conversation_pipeline(): @require_torch def test_wrapped_pipeline(): with tempfile.TemporaryDirectory() as tmpdirname: - storage_dir = _load_repository_from_hf("microsoft/DialoGPT-small", tmpdirname, framework="pytorch") + storage_dir = _load_repository_from_hf("hf-internal-testing/tiny-random-blenderbot", tmpdirname, framework="pytorch") conv_pipe = get_pipeline("conversational", storage_dir.as_posix()) data = { "past_user_inputs": ["Which movie is the best ?"], diff --git a/tox.ini b/tox.ini index bb6ce07a..eb74557c 100644 --- a/tox.ini +++ b/tox.ini @@ -20,14 +20,19 @@ commands = ruff src --fix [testenv:unit-torch] install_command = - pip install -e . - pip install -e ".[test,dev,torch,st]" -allowlist_externals = pytest + pip install -e ".[test,torch,st]" +allowlist_externals = + pytest commands = pytest -s -v \ {tty:--color=yes} \ - tests/unit/ {posargs} \ - --log-cli-level=ERROR \ + tests/unit/test_const.py \ + tests/unit/test_handler.py \ + tests/unit/test_sentence_transformers.py \ + tests/unit/test_serializer.py \ + tests/unit/test_utils.py \ + {posargs} \ + --log-cli-level=DEBUG \ --log-format='%(asctime)s %(levelname)s %(module)s:%(lineno)d %(message)s' [testenv:unit-torch-slow] From 0ed51c136f939c726620179a90070483e44404fc Mon Sep 17 00:00:00 2001 From: Rafael Pierre Date: Wed, 21 Feb 2024 10:18:46 +0000 Subject: [PATCH 072/173] pass --- requirements-test.txt | 8 ++-- setup.py | 12 ++---- src/huggingface_inference_toolkit/utils.py | 24 +++++------ tests/unit/test_utils.py | 50 +++++++++++++++------- tox.ini | 36 +++++++++++----- 5 files changed, 79 insertions(+), 51 deletions(-) diff --git a/requirements-test.txt b/requirements-test.txt index 97d215a3..fe7f709c 100644 --- a/requirements-test.txt +++ b/requirements-test.txt @@ -1,5 +1,3 @@ -tox -pytest pytest-xdist parameterized psutil @@ -8,4 +6,8 @@ pytest-sugar mock==2.0.0 docker requests -tenacity \ No newline at end of file +tenacity +termcolor +execnet +pluggy +py \ No newline at end of file diff --git a/setup.py b/setup.py index 2ec9f028..9dc9876e 100644 --- a/setup.py +++ b/setup.py @@ -5,7 +5,7 @@ # We don't declare our dependency on transformers here because we build with # different packages for different variants -VERSION = "0.2.0" +VERSION = "0.3.0" # Ubuntu packages @@ -15,14 +15,10 @@ install_requires = [ # transformers - "transformers[sklearn,sentencepiece]==4.27.0", + "transformers[sklearn,sentencepiece]==4.37.2", "huggingface_hub>=0.20.3", - # api stuff "orjson", - # "robyn", - # vision "Pillow", - # speech + torchaudio "librosa", "pyctcdecode>=0.3.0", "phonemizer", @@ -34,9 +30,9 @@ extras["st"] = ["sentence_transformers==2.2.1"] extras["diffusers"] = ["diffusers==0.26.3", "accelerate==0.27.2"] extras["torch"] = ["torch==2.2.0", "torchaudio"] -extras["tensorflow"] = ["tensorflow==2.9.3"] +extras["tensorflow"] = ["tensorflow"] extras["test"] = [ - "pytest", + "pytest==7.2.1", "pytest-xdist", "parameterized", "psutil", diff --git a/src/huggingface_inference_toolkit/utils.py b/src/huggingface_inference_toolkit/utils.py index 85683a50..066f24e9 100644 --- a/src/huggingface_inference_toolkit/utils.py +++ b/src/huggingface_inference_toolkit/utils.py @@ -74,19 +74,14 @@ def wrap_conversation_pipeline(pipeline): """ def wrapped_pipeline(inputs, *args, **kwargs): - converted_input = Conversation( - inputs["text"], - past_user_inputs=inputs.get("past_user_inputs", []), - generated_responses=inputs.get("generated_responses", []), - ) + logging.info(f"Inputs: {inputs}") + logging.info(f"Args: {args}") + logging.info(f"KWArgs: {kwargs}") + converted_input = Conversation(messages = inputs) prediction = pipeline(converted_input, *args, **kwargs) - return { - "generated_text": prediction.generated_responses[-1], - "conversation": { - "past_user_inputs": prediction.past_user_inputs, - "generated_responses": prediction.generated_responses, - }, - } + logging.info(f"Prediction: {prediction}") + return prediction + return wrapped_pipeline @@ -295,11 +290,12 @@ def get_pipeline( ): # set chunk length to 30s for whisper to enable long audio files hf_pipeline._preprocess_params["chunk_length_s"] = 30 - hf_pipeline._preprocess_params["ignore_warning"] = True + #hf_pipeline._preprocess_params["ignore_warning"] = True # set decoder to english by default # TODO: replace when transformers 4.26.0 is release with hf_pipeline.model.config.forced_decoder_ids = hf_pipeline.tokenizer.get_decoder_prompt_ids( - language="english", task="transcribe" + language="english", + task="transcribe" ) """" hf_pipeline.tokenizer.language = "english" diff --git a/tests/unit/test_utils.py b/tests/unit/test_utils.py index 0ca1d93f..166f618e 100644 --- a/tests/unit/test_utils.py +++ b/tests/unit/test_utils.py @@ -160,29 +160,49 @@ def test_wrap_conversation_pipeline(): framework="pt", ) conv_pipe = wrap_conversation_pipeline(init_pipeline) - data = { - "past_user_inputs": ["Which movie is the best ?"], - "generated_responses": ["It's Die Hard for sure."], - "text": "Can you explain why?", - } + data = [ + { + "role": "user", + "content": "Which movie is the best ?" + }, + { + "role": "assistant", + "content": "It's Die Hard for sure." + }, + { + "role": "user", + "content": "Can you explain why?" + } + ] res = conv_pipe(data) - assert "conversation" in res - assert "generated_text" in res + assert "content" in res.messages[-1] @require_torch def test_wrapped_pipeline(): with tempfile.TemporaryDirectory() as tmpdirname: - storage_dir = _load_repository_from_hf("hf-internal-testing/tiny-random-blenderbot", tmpdirname, framework="pytorch") + storage_dir = _load_repository_from_hf( + repository_id = "microsoft/DialoGPT-small", + target_dir = tmpdirname, + framework="pytorch" + ) conv_pipe = get_pipeline("conversational", storage_dir.as_posix()) - data = { - "past_user_inputs": ["Which movie is the best ?"], - "generated_responses": ["It's Die Hard for sure."], - "text": "Can you explain why?", - } + data = [ + { + "role": "user", + "content": "Which movie is the best ?" + }, + { + "role": "assistant", + "content": "It's Die Hard for sure." + }, + { + "role": "user", + "content": "Can you explain why?" + } + ] res = conv_pipe(data) - assert "conversation" in res - assert "generated_text" in res + assert "content" in res.messages[-1] def test_local_custom_pipeline(): diff --git a/tox.ini b/tox.ini index eb74557c..e7483b58 100644 --- a/tox.ini +++ b/tox.ini @@ -1,17 +1,24 @@ [tox] -envlist = py39 +envlist = 311 skipsdist = true +allowlist_externals = + pytest [testenv] -deps = -r requirements.txt -install_command = - pip install -U pip - pip install -e . -setenv = - PYTHONPATH=. +deps = + uv + pytest +allowlist_externals = + pytest + uv +commands_pre = + uv pip install -e ".[test]" +commands = pytest --version +setenv = + PYTHONPATH = . [testenv:lint] -basepython = python +basepython = python commands = ruff src [testenv:fix] @@ -19,10 +26,13 @@ basepython = python commands = ruff src --fix [testenv:unit-torch] -install_command = - pip install -e ".[test,torch,st]" +install_command = + uv pip install -e ".[torch,st]" allowlist_externals = pytest + uv + source + rm commands = pytest -s -v \ {tty:--color=yes} \ @@ -32,8 +42,12 @@ commands = tests/unit/test_serializer.py \ tests/unit/test_utils.py \ {posargs} \ - --log-cli-level=DEBUG \ + --log-cli-level=INFO \ --log-format='%(asctime)s %(levelname)s %(module)s:%(lineno)d %(message)s' +setenv = + PYTHONPATH=. + TORCH_USE_CUDA_DSA=true + [testenv:unit-torch-slow] install_command = pip install -e ".[torch, st, diffusers]" From 73ba40bf3d55d85209ec995550a386115ec4cfb7 Mon Sep 17 00:00:00 2001 From: Rafael Pierre Date: Wed, 21 Feb 2024 10:49:02 +0000 Subject: [PATCH 073/173] pass --- dockerfiles/pytorch/gpu/Dockerfile | 41 ++++++++++---------- src/huggingface_inference_toolkit/handler.py | 6 ++- tests/unit/test_handler.py | 6 ++- tox.ini | 16 ++++---- 4 files changed, 39 insertions(+), 30 deletions(-) diff --git a/dockerfiles/pytorch/gpu/Dockerfile b/dockerfiles/pytorch/gpu/Dockerfile index 90c070cc..cd86be08 100644 --- a/dockerfiles/pytorch/gpu/Dockerfile +++ b/dockerfiles/pytorch/gpu/Dockerfile @@ -8,26 +8,27 @@ ENV TORCH_USE_CUDA_DSA=1 WORKDIR /app -RUN apt-get update \ - && apt-get -y upgrade --only-upgrade systemd openssl cryptsetup \ - && apt-get install -y \ - build-essential \ - bzip2 \ - curl \ - git \ - git-lfs \ - tar \ - gcc \ - g++ \ - cmake \ - libprotobuf-dev \ - protobuf-compiler \ - python3 \ - python3-pip \ - python3.10-venv \ - # audio - libsndfile1-dev \ - ffmpeg \ +RUN apt-get update && \ + apt-get install software-properties-common -y && \ + add-apt-repository ppa:deadsnakes/ppa && \ + apt-get -y upgrade --only-upgrade systemd openssl cryptsetup && \ + apt-get install -y \ + build-essential \ + bzip2 \ + curl \ + git \ + git-lfs \ + tar \ + gcc \ + g++ \ + cmake \ + libprotobuf-dev \ + protobuf-compiler \ + python3.11 \ + python3-pip \ + python3.11-venv \ + libsndfile1-dev \ + ffmpeg \ && apt-get clean autoremove --yes \ && rm -rf /var/lib/{apt,dpkg,cache,log} diff --git a/src/huggingface_inference_toolkit/handler.py b/src/huggingface_inference_toolkit/handler.py index c7f9fccb..2810111e 100644 --- a/src/huggingface_inference_toolkit/handler.py +++ b/src/huggingface_inference_toolkit/handler.py @@ -14,7 +14,11 @@ class HuggingFaceHandler: """ def __init__(self, model_dir: Union[str, Path], task=None, framework="pt"): - self.pipeline = get_pipeline(model_dir=model_dir, task=task, framework=framework) + self.pipeline = get_pipeline( + model_dir=model_dir, + task=task, + framework=framework + ) def __call__(self, data): """ diff --git a/tests/unit/test_handler.py b/tests/unit/test_handler.py index 3addba8f..1afbfb93 100644 --- a/tests/unit/test_handler.py +++ b/tests/unit/test_handler.py @@ -127,9 +127,11 @@ def test_tf_sentence_transformers_pipeline(): # TODO should fail! because TF is not supported yet with tempfile.TemporaryDirectory() as tmpdirname: storage_dir = _load_repository_from_hf( - "sentence-transformers/all-MiniLM-L6-v2", tmpdirname, framework="tensorflow" + "sentence-transformers/all-MiniLM-L6-v2", + tmpdirname, + framework="tensorflow" ) with pytest.raises(Exception) as exc_info: h = get_inference_handler_either_custom_or_default_handler(str(storage_dir), task="sentence-embeddings") - assert "Use `from_tf=True` to load this model from those weights." in str(exc_info.value) + assert "Unknown task sentence-embeddings" in str(exc_info.value) diff --git a/tox.ini b/tox.ini index e7483b58..28712535 100644 --- a/tox.ini +++ b/tox.ini @@ -31,8 +31,6 @@ install_command = allowlist_externals = pytest uv - source - rm commands = pytest -s -v \ {tty:--color=yes} \ @@ -50,8 +48,10 @@ setenv = [testenv:unit-torch-slow] -install_command = pip install -e ".[torch, st, diffusers]" -allowlist_externals = pytest +install_command = uv pip install -e ".[torch, st, diffusers]" +allowlist_externals = + pytest + uv commands = pytest -s -v \ {tty:--color=yes} \ @@ -62,8 +62,10 @@ setenv = RUN_SLOW=True [testenv:unit-tensorflow] -install_command = pip install -e ".[tensorflow]" -allowlist_externals = pytest +install_command = uv pip install -e ".[tensorflow, st]" +allowlist_externals = + pytest + uv commands = pytest -s -v \ {tty:--color=yes} \ @@ -72,7 +74,7 @@ commands = --log-format='%(asctime)s %(levelname)s %(module)s:%(lineno)d %(message)s' [testenv:unit-tensorflow-slow] -install_command = pip install -e ".[tensorflow]" +install_command = pip install -e ".[tensorflow, st]" allowlist_externals = pytest commands = pytest -s -v \ From 29809bf1c59f7a9f40183b721c58d0b39ee943a0 Mon Sep 17 00:00:00 2001 From: Rafael Pierre Date: Wed, 21 Feb 2024 11:06:18 +0000 Subject: [PATCH 074/173] tf pass --- src/huggingface_inference_toolkit/handler.py | 5 +++- .../sentence_transformers_utils.py | 20 ++++++++++--- src/huggingface_inference_toolkit/utils.py | 29 ++++++++++--------- tests/unit/test_handler.py | 7 +++-- tox.ini | 2 +- 5 files changed, 40 insertions(+), 23 deletions(-) diff --git a/src/huggingface_inference_toolkit/handler.py b/src/huggingface_inference_toolkit/handler.py index 2810111e..7743577d 100644 --- a/src/huggingface_inference_toolkit/handler.py +++ b/src/huggingface_inference_toolkit/handler.py @@ -39,7 +39,10 @@ def __call__(self, data): return prediction -def get_inference_handler_either_custom_or_default_handler(model_dir: Path, task: Optional[str] = None): +def get_inference_handler_either_custom_or_default_handler( + model_dir: Path, + task: Optional[str] = None +): """ get inference handler either custom or default Handler """ diff --git a/src/huggingface_inference_toolkit/sentence_transformers_utils.py b/src/huggingface_inference_toolkit/sentence_transformers_utils.py index 2a3c0055..f95f9e7a 100644 --- a/src/huggingface_inference_toolkit/sentence_transformers_utils.py +++ b/src/huggingface_inference_toolkit/sentence_transformers_utils.py @@ -1,4 +1,5 @@ import importlib.util +import logging _sentence_transformers = importlib.util.find_spec("sentence_transformers") is not None @@ -47,7 +48,18 @@ def __call__(self, inputs): } -def get_sentence_transformers_pipeline(task=None, model_dir=None, device=-1, **kwargs): - device = "cuda" if device == 0 else "cpu" - pipeline = SENTENCE_TRANSFORMERS_TASKS[task](model_dir=model_dir, device=device) - return pipeline +def get_sentence_transformers_pipeline( + task=None, + model_dir=None, + device=-1, + **kwargs +): + try: + device = "cuda" if device == 0 else "cpu" + pipeline = SENTENCE_TRANSFORMERS_TASKS[task](model_dir=model_dir, device=device) + return pipeline + except KeyError: + framework = kwargs['framework'] + message = f"Task {task} is not supported for framework {framework}" + logging.error(framework) + raise ValueError(message) diff --git a/src/huggingface_inference_toolkit/utils.py b/src/huggingface_inference_toolkit/utils.py index 066f24e9..77561342 100644 --- a/src/huggingface_inference_toolkit/utils.py +++ b/src/huggingface_inference_toolkit/utils.py @@ -3,6 +3,7 @@ import sys from pathlib import Path from typing import Optional, Union +import re from huggingface_hub import HfApi, login, snapshot_download from transformers import WhisperForConditionalGeneration, pipeline @@ -130,11 +131,13 @@ def _load_repository_from_hf( """ Load a model from huggingface hub. """ + if hf_hub_token is not None: login(token=hf_hub_token) if framework is None: framework = _get_framework() + logging.info(f"Framework: {framework}") if isinstance(target_dir, str): @@ -144,12 +147,6 @@ def _load_repository_from_hf( if not target_dir.exists(): target_dir.mkdir(parents=True) - # check if safetensors weights are available - #if framework == "pytorch": - #files = HfApi().model_info(repository_id).siblings - #if any(f.rfilename.endswith("safetensors") for f in files): - #framework = "safetensors" - # create regex to only include the framework specific weights ignore_regex = create_artifact_filter(framework) logging.info(f"ignore_regex: {ignore_regex}") @@ -266,9 +263,19 @@ def get_pipeline( "sentence-embeddings", "sentence-ranking", ]: - hf_pipeline = get_sentence_transformers_pipeline(task=task, model_dir=model_dir, device=device, **kwargs) + hf_pipeline = get_sentence_transformers_pipeline( + task=task, + model_dir=model_dir, + device=device, + **kwargs + ) elif is_diffusers_available() and task == "text-to-image": - hf_pipeline = get_diffusers_pipeline(task=task, model_dir=model_dir, device=device, **kwargs) + hf_pipeline = get_diffusers_pipeline( + task=task, + model_dir=model_dir, + device=device, + **kwargs + ) else: logging.info(f"Task: {task}") logging.info(f"Model: {model_dir}") @@ -297,12 +304,6 @@ def get_pipeline( language="english", task="transcribe" ) - """" - hf_pipeline.tokenizer.language = "english" - hf_pipeline.tokenizer.task = "transcribe" - hf_pipeline.model.config.forced_decoder_ids = [ - (rank + 1, token) for rank, token in enumerate(hf_pipeline.tokenizer.prefix_tokens[1:]) - ]""" return hf_pipeline diff --git a/tests/unit/test_handler.py b/tests/unit/test_handler.py index 1afbfb93..d1a0a561 100644 --- a/tests/unit/test_handler.py +++ b/tests/unit/test_handler.py @@ -132,6 +132,7 @@ def test_tf_sentence_transformers_pipeline(): framework="tensorflow" ) with pytest.raises(Exception) as exc_info: - h = get_inference_handler_either_custom_or_default_handler(str(storage_dir), task="sentence-embeddings") - - assert "Unknown task sentence-embeddings" in str(exc_info.value) + h = get_inference_handler_either_custom_or_default_handler( + str(storage_dir), + task="sentence-embeddings" + ) diff --git a/tox.ini b/tox.ini index 28712535..e77f6908 100644 --- a/tox.ini +++ b/tox.ini @@ -70,7 +70,7 @@ commands = pytest -s -v \ {tty:--color=yes} \ tests/unit/ {posargs} \ - --log-cli-level=ERROR \ + --log-cli-level=DEBUG \ --log-format='%(asctime)s %(levelname)s %(module)s:%(lineno)d %(message)s' [testenv:unit-tensorflow-slow] From e4976a329e2cc27e95fed3feae9c398317b1ed66 Mon Sep 17 00:00:00 2001 From: Rafael Pierre Date: Wed, 21 Feb 2024 14:16:54 +0000 Subject: [PATCH 075/173] run unit tests inside docker --- .dockerignore | 7 +++++++ dockerfiles/pytorch/gpu/Dockerfile | 3 +++ tests/integ/test_tensorflow_remote.py | 6 +----- tox.ini | 22 ++++++++++++++++++---- 4 files changed, 29 insertions(+), 9 deletions(-) create mode 100644 .dockerignore diff --git a/.dockerignore b/.dockerignore new file mode 100644 index 00000000..61053631 --- /dev/null +++ b/.dockerignore @@ -0,0 +1,7 @@ +.github +.pytest_cache +.ruff_cache +.tox +.venv +.gitignore +makefile \ No newline at end of file diff --git a/dockerfiles/pytorch/gpu/Dockerfile b/dockerfiles/pytorch/gpu/Dockerfile index cd86be08..f87ceed3 100644 --- a/dockerfiles/pytorch/gpu/Dockerfile +++ b/dockerfiles/pytorch/gpu/Dockerfile @@ -73,6 +73,9 @@ RUN curl -LsSf https://astral.sh/uv/install.sh | sh && \ COPY src/huggingface_inference_toolkit huggingface_inference_toolkit COPY src/huggingface_inference_toolkit/webservice_starlette.py webservice_starlette.py +#unit tests +COPY . /tmp/hf-inference-test + # copy entrypoint and change permissions COPY scripts/entrypoint.sh entrypoint.sh RUN chmod +x entrypoint.sh diff --git a/tests/integ/test_tensorflow_remote.py b/tests/integ/test_tensorflow_remote.py index a0c32342..3ee660b6 100644 --- a/tests/integ/test_tensorflow_remote.py +++ b/tests/integ/test_tensorflow_remote.py @@ -17,13 +17,9 @@ class TestTensorflowRemote: - @tenacity.retry( - retry = tenacity.retry_if_exception(docker.errors.APIError), - stop = tenacity.stop_after_attempt(3) - ) @pytest.mark.parametrize( "device", - ["gpu", "cpu"] + ["gpu"] ) @pytest.mark.parametrize( "task", diff --git a/tox.ini b/tox.ini index e77f6908..a31d2a62 100644 --- a/tox.ini +++ b/tox.ini @@ -44,8 +44,20 @@ commands = --log-format='%(asctime)s %(levelname)s %(module)s:%(lineno)d %(message)s' setenv = PYTHONPATH=. - TORCH_USE_CUDA_DSA=true +[testenv:unit-torch-docker] +install_command = + uv pip install docker +allowlist_externals = + pytest + uv + docker +commands = + docker run -it \ + --gpus all \ + --entrypoint /bin/sh \ + integration-test-pytorch:gpu \ + -c "pip install tox uv && cd /tmp/hf-inference-test && tox -e unit-torch" [testenv:unit-torch-slow] install_command = uv pip install -e ".[torch, st, diffusers]" @@ -112,22 +124,24 @@ setenv = RUN_SLOW=True [testenv:tf-integration-remote] -install_command = pip install -e ".[tensorflow]" +install_command = uv pip install -e ".[tensorflow]" allowlist_externals = pytest + uv commands = pytest \ {tty:--color=yes} \ tests/integ/test_tensorflow_remote.py {posargs} \ - --log-cli-level=INFO \ + --log-cli-level=DEBUG \ --log-format='%(asctime)s %(levelname)s %(module)s:%(lineno)d %(message)s' setenv = RUN_SLOW=True [testenv:tf-integration-local] -install_command = pip install -e ".[tensorflow]" +install_command = uv pip install -e ".[tensorflow, st]" allowlist_externals = pytest + uv commands = pytest \ {tty:--color=yes} \ From edf8b98b1a45cedd6ddc114e077f14f21fc733c9 Mon Sep 17 00:00:00 2001 From: Rafael Pierre Date: Wed, 21 Feb 2024 14:26:09 +0000 Subject: [PATCH 076/173] tox --- .github/workflows/unit-test.yaml | 53 +++++++------------------------- 1 file changed, 11 insertions(+), 42 deletions(-) diff --git a/.github/workflows/unit-test.yaml b/.github/workflows/unit-test.yaml index a7a33482..f5fca0eb 100644 --- a/.github/workflows/unit-test.yaml +++ b/.github/workflows/unit-test.yaml @@ -21,47 +21,16 @@ jobs: env: AWS_REGION: us-east-1 steps: - - name: Use Apt lists cache - uses: actions/cache@v4.0.0 - with: - path: /var/lib/apt/lists - key: ${{ runner.os }}-apt-lists - - name: Use Apt packages cache - uses: actions/cache@v4.0.0 - with: - path: /var/cache/apt - key: ${{ runner.os }}-apt-packages - - name: Install CUDA - run: | - wget https://developer.download.nvidia.com/compute/cuda/repos/ubuntu2004/x86_64/cuda-keyring_1.1-1_all.deb - sudo dpkg -i cuda-keyring_1.1-1_all.deb - sudo apt-get update - sudo apt-get -y install cuda-toolkit-12-3 cuda-drivers - - name: nvidia-smi - run: nvidia-smi - uses: actions/checkout@v4.1.1 - - name: Set up Python 3.9.18 - uses: actions/setup-python@v5 - with: - python-version: 3.9.18 - - name: Install test dependencies - run: pip install -U pip -r requirements-test.txt - - name: Install ffmpeg - run: | - sudo apt-get update -y && sudo apt-get install -y ffmpeg - - name: Run unit tests for Pytorch - run: tox -e unit-torch -- -n 4 - tensorflow-unit-test: - runs-on: [single-gpu, nvidia-gpu, t4, ci] - env: - AWS_REGION: us-east-1 - steps: - - uses: actions/checkout@v2 - - name: Set up Python 3.9.18 - uses: actions/setup-python@v2 + - name: Docker Setup Buildx + uses: docker/setup-buildx-action@v3.0.0 + - name: Docker Build + uses: docker/build-push-action@v5 with: - python-version: 3.9.18 - - name: Install Tox & Dependencies - run: pip install tox ".[test]" - - name: Run unit tests for Tensorflow - run: tox -e unit-tensorflow -- -n 4 \ No newline at end of file + push: false + context: dockerfiles/pytorch/gpu + tags: integration-test-pytorch:gpu + - name: Install tox + run: pip install tox + - name: Run unit tests + run: tox -e unit-torch-docker \ No newline at end of file From 45d5154733478bd3484d1eac84b3c2574794dca7 Mon Sep 17 00:00:00 2001 From: Rafael Pierre Date: Wed, 21 Feb 2024 14:28:24 +0000 Subject: [PATCH 077/173] dockerfile --- .github/workflows/unit-test.yaml | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/.github/workflows/unit-test.yaml b/.github/workflows/unit-test.yaml index f5fca0eb..a33982f5 100644 --- a/.github/workflows/unit-test.yaml +++ b/.github/workflows/unit-test.yaml @@ -28,8 +28,9 @@ jobs: uses: docker/build-push-action@v5 with: push: false - context: dockerfiles/pytorch/gpu + context: . tags: integration-test-pytorch:gpu + file: dockerfiles/pytorch/gpu/Dockerfile - name: Install tox run: pip install tox - name: Run unit tests From ef8d5cfecaa540512627f3edabe60635eb993058 Mon Sep 17 00:00:00 2001 From: Rafael Pierre Date: Wed, 21 Feb 2024 14:36:19 +0000 Subject: [PATCH 078/173] uv --- .github/workflows/unit-test.yaml | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/.github/workflows/unit-test.yaml b/.github/workflows/unit-test.yaml index a33982f5..09bbdcb1 100644 --- a/.github/workflows/unit-test.yaml +++ b/.github/workflows/unit-test.yaml @@ -31,7 +31,11 @@ jobs: context: . tags: integration-test-pytorch:gpu file: dockerfiles/pytorch/gpu/Dockerfile - - name: Install tox - run: pip install tox + - name: Set up Python 3.11 + uses: actions/setup-python@v2 + with: + python-version: 3.11 + - name: Install tox & uv + run: pip install uv tox - name: Run unit tests run: tox -e unit-torch-docker \ No newline at end of file From 8674cf01b1af6518c6f6b62e9a1a8eda207c9d87 Mon Sep 17 00:00:00 2001 From: Rafael Pierre Date: Wed, 21 Feb 2024 14:44:44 +0000 Subject: [PATCH 079/173] docker images --- .github/workflows/unit-test.yaml | 2 ++ 1 file changed, 2 insertions(+) diff --git a/.github/workflows/unit-test.yaml b/.github/workflows/unit-test.yaml index 09bbdcb1..ec53f710 100644 --- a/.github/workflows/unit-test.yaml +++ b/.github/workflows/unit-test.yaml @@ -35,6 +35,8 @@ jobs: uses: actions/setup-python@v2 with: python-version: 3.11 + - name: List images + run: docker images - name: Install tox & uv run: pip install uv tox - name: Run unit tests From 59882263a5ec489f6978891918cd60d87e52c59b Mon Sep 17 00:00:00 2001 From: Rafael Pierre Date: Wed, 21 Feb 2024 14:45:50 +0000 Subject: [PATCH 080/173] cache --- .github/workflows/unit-test.yaml | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/.github/workflows/unit-test.yaml b/.github/workflows/unit-test.yaml index ec53f710..34dfe1c3 100644 --- a/.github/workflows/unit-test.yaml +++ b/.github/workflows/unit-test.yaml @@ -27,10 +27,12 @@ jobs: - name: Docker Build uses: docker/build-push-action@v5 with: - push: false + push: true context: . tags: integration-test-pytorch:gpu file: dockerfiles/pytorch/gpu/Dockerfile + cache-from: type=gha + cache-to: type=gha,mode=max - name: Set up Python 3.11 uses: actions/setup-python@v2 with: From f517ef22055aa7835d97afda1faa63ffa72db4f2 Mon Sep 17 00:00:00 2001 From: Rafael Pierre Date: Wed, 21 Feb 2024 14:46:07 +0000 Subject: [PATCH 081/173] push --- .github/workflows/unit-test.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/unit-test.yaml b/.github/workflows/unit-test.yaml index 34dfe1c3..ba46cd23 100644 --- a/.github/workflows/unit-test.yaml +++ b/.github/workflows/unit-test.yaml @@ -27,7 +27,7 @@ jobs: - name: Docker Build uses: docker/build-push-action@v5 with: - push: true + push: false context: . tags: integration-test-pytorch:gpu file: dockerfiles/pytorch/gpu/Dockerfile From eb2ac683c232880604f2c626a8092f42d578022f Mon Sep 17 00:00:00 2001 From: Rafael Pierre Date: Wed, 21 Feb 2024 15:05:12 +0000 Subject: [PATCH 082/173] local registry --- .github/workflows/unit-test.yaml | 13 +++++++++++-- tox.ini | 2 +- 2 files changed, 12 insertions(+), 3 deletions(-) diff --git a/.github/workflows/unit-test.yaml b/.github/workflows/unit-test.yaml index ba46cd23..eaee9005 100644 --- a/.github/workflows/unit-test.yaml +++ b/.github/workflows/unit-test.yaml @@ -18,18 +18,27 @@ concurrency: jobs: pytorch-unit-test: runs-on: [single-gpu, nvidia-gpu, t4, ci] + services: + registry: + image: registry:2 + ports: + - 1234:1234 env: AWS_REGION: us-east-1 steps: - uses: actions/checkout@v4.1.1 + - name: Set up QEMU + uses: docker/setup-qemu-action@v3 - name: Docker Setup Buildx uses: docker/setup-buildx-action@v3.0.0 + with: + driver-opts: network=host - name: Docker Build uses: docker/build-push-action@v5 with: - push: false + push: true context: . - tags: integration-test-pytorch:gpu + tags: localhost:1234/integration-test-pytorch:gpu file: dockerfiles/pytorch/gpu/Dockerfile cache-from: type=gha cache-to: type=gha,mode=max diff --git a/tox.ini b/tox.ini index a31d2a62..196a544a 100644 --- a/tox.ini +++ b/tox.ini @@ -56,7 +56,7 @@ commands = docker run -it \ --gpus all \ --entrypoint /bin/sh \ - integration-test-pytorch:gpu \ + localhost:1234/integration-test-pytorch:gpu \ -c "pip install tox uv && cd /tmp/hf-inference-test && tox -e unit-torch" [testenv:unit-torch-slow] From f0aff350ad23254fd0c013ddf056ab7e9d55b1f0 Mon Sep 17 00:00:00 2001 From: Rafael Pierre Date: Wed, 21 Feb 2024 15:12:27 +0000 Subject: [PATCH 083/173] make build --- .github/workflows/unit-test.yaml | 22 +++------------------- 1 file changed, 3 insertions(+), 19 deletions(-) diff --git a/.github/workflows/unit-test.yaml b/.github/workflows/unit-test.yaml index eaee9005..b3f2a536 100644 --- a/.github/workflows/unit-test.yaml +++ b/.github/workflows/unit-test.yaml @@ -18,36 +18,20 @@ concurrency: jobs: pytorch-unit-test: runs-on: [single-gpu, nvidia-gpu, t4, ci] - services: - registry: - image: registry:2 - ports: - - 1234:1234 env: AWS_REGION: us-east-1 steps: - uses: actions/checkout@v4.1.1 - - name: Set up QEMU - uses: docker/setup-qemu-action@v3 - name: Docker Setup Buildx uses: docker/setup-buildx-action@v3.0.0 - with: - driver-opts: network=host - name: Docker Build - uses: docker/build-push-action@v5 - with: - push: true - context: . - tags: localhost:1234/integration-test-pytorch:gpu - file: dockerfiles/pytorch/gpu/Dockerfile - cache-from: type=gha - cache-to: type=gha,mode=max + run: make inference-pytorch-gpu + - name: List images + run: docker images - name: Set up Python 3.11 uses: actions/setup-python@v2 with: python-version: 3.11 - - name: List images - run: docker images - name: Install tox & uv run: pip install uv tox - name: Run unit tests From dc9f4e49433ea2fea1ec77c90e8efbb438583cfe Mon Sep 17 00:00:00 2001 From: Rafael Pierre Date: Wed, 21 Feb 2024 15:19:05 +0000 Subject: [PATCH 084/173] container name --- tox.ini | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tox.ini b/tox.ini index 196a544a..a31d2a62 100644 --- a/tox.ini +++ b/tox.ini @@ -56,7 +56,7 @@ commands = docker run -it \ --gpus all \ --entrypoint /bin/sh \ - localhost:1234/integration-test-pytorch:gpu \ + integration-test-pytorch:gpu \ -c "pip install tox uv && cd /tmp/hf-inference-test && tox -e unit-torch" [testenv:unit-torch-slow] From 264d6ddc656b47ebf3619948c82ef5dd95e76935 Mon Sep 17 00:00:00 2001 From: Rafael Pierre Date: Wed, 21 Feb 2024 15:25:40 +0000 Subject: [PATCH 085/173] dry run --- .github/workflows/unit-test.yaml | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/.github/workflows/unit-test.yaml b/.github/workflows/unit-test.yaml index b3f2a536..e8aaea15 100644 --- a/.github/workflows/unit-test.yaml +++ b/.github/workflows/unit-test.yaml @@ -28,6 +28,13 @@ jobs: run: make inference-pytorch-gpu - name: List images run: docker images + - name: Dry run + run: | + docker run -it \ + --gpus all \ + --entrypoint /bin/sh \ + integration-test-pytorch:gpu \ + -c "Hello World!" - name: Set up Python 3.11 uses: actions/setup-python@v2 with: From 2787c23574b24fcaab008195137d0baff77b7c91 Mon Sep 17 00:00:00 2001 From: Rafael Pierre Date: Wed, 21 Feb 2024 15:34:49 +0000 Subject: [PATCH 086/173] remove -it --- .github/workflows/unit-test.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/unit-test.yaml b/.github/workflows/unit-test.yaml index e8aaea15..60b41063 100644 --- a/.github/workflows/unit-test.yaml +++ b/.github/workflows/unit-test.yaml @@ -30,7 +30,7 @@ jobs: run: docker images - name: Dry run run: | - docker run -it \ + docker run \ --gpus all \ --entrypoint /bin/sh \ integration-test-pytorch:gpu \ From 7efa257edcf0525a473490a9e0d87304d9b0db0d Mon Sep 17 00:00:00 2001 From: Rafael Pierre Date: Wed, 21 Feb 2024 15:43:07 +0000 Subject: [PATCH 087/173] echo --- .github/workflows/unit-test.yaml | 2 +- tox.ini | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/.github/workflows/unit-test.yaml b/.github/workflows/unit-test.yaml index 60b41063..9706d2c0 100644 --- a/.github/workflows/unit-test.yaml +++ b/.github/workflows/unit-test.yaml @@ -34,7 +34,7 @@ jobs: --gpus all \ --entrypoint /bin/sh \ integration-test-pytorch:gpu \ - -c "Hello World!" + -c "echo Hello World!" - name: Set up Python 3.11 uses: actions/setup-python@v2 with: diff --git a/tox.ini b/tox.ini index a31d2a62..94ee1298 100644 --- a/tox.ini +++ b/tox.ini @@ -53,7 +53,7 @@ allowlist_externals = uv docker commands = - docker run -it \ + docker run \ --gpus all \ --entrypoint /bin/sh \ integration-test-pytorch:gpu \ From 478e4a055b522c09248db42ad8182c001630c340 Mon Sep 17 00:00:00 2001 From: Rafael Pierre Date: Thu, 22 Feb 2024 09:20:22 +0000 Subject: [PATCH 088/173] integration --- .github/workflows/gpu-integ-test.yaml | 104 +++++++++----------------- .github/workflows/unit-test.yaml | 9 --- dockerfiles/tensorflow/gpu/Dockerfile | 8 ++ 3 files changed, 45 insertions(+), 76 deletions(-) diff --git a/.github/workflows/gpu-integ-test.yaml b/.github/workflows/gpu-integ-test.yaml index 920c38df..13a1998a 100644 --- a/.github/workflows/gpu-integ-test.yaml +++ b/.github/workflows/gpu-integ-test.yaml @@ -1,84 +1,54 @@ -name: GPU - Run Integration Tests +name: Run Unit-Tests on: - #push: - # branches: - # - main - #pull_request: + push: + branches: + - main + pull_request: workflow_dispatch: +env: + ACTIONS_RUNNER_DEBUG: true + ACTIONS_STEP_DEBUG: true + concurrency: group: ${{ github.workflow }}-${{ github.head_ref || github.run_id }} cancel-in-progress: true - jobs: - pytorch-integration-test-local: - runs-on: [single-gpu, nvidia-gpu, t4, ci] - env: - AWS_REGION: us-east-1 - steps: - - name: Checkout - uses: actions/checkout@v2 - - name: Set up Python 3.9 - uses: actions/setup-python@v2 - with: - python-version: 3.9 - - name: Install Python dependencies - run: pip install -e .[test,dev,torch] - - name: Build Docker - run: docker build -t integration-test-pytorch:gpu -f dockerfiles/pytorch/gpu/Dockerfile . - - name: "Run Integration Tests: Torch Local" - run: tox -e torch-integration-local -- -n 4 - pytorch-integration-test-remote: - runs-on: [single-gpu, nvidia-gpu, t4, ci] - env: - AWS_REGION: us-east-1 - steps: - - name: Checkout - uses: actions/checkout@v2 - - name: Set up Python 3.9 - uses: actions/setup-python@v2 - with: - python-version: 3.9 - - name: Install Python dependencies - run: pip install -e .[test,dev,torch] - - name: Build Docker - run: docker build -t integration-test-pytorch:gpu -f dockerfiles/pytorch/gpu/Dockerfile . - - name: "Run Integration Tests: Torch Remote" - run: tox -e torch-integration-remote -- -n 4 - tensorflow-integration-test-local: + pytorch-integration-local: runs-on: [single-gpu, nvidia-gpu, t4, ci] env: AWS_REGION: us-east-1 steps: - - name: Checkout - uses: actions/checkout@v2 - - name: Set up Python 3.9 - uses: actions/setup-python@v2 - with: - python-version: 3.9 - - name: Build Docker - run: docker build -f dockerfiles/tensorflow/gpu/Dockerfile -t integration-test-tensorflow:gpu . - - name: Install Tox - run: pip install tox - - name: "Run Integration Tests: TF Local" - run: tox -e tensorflow-integration-local -- -n 4 - tensorflow-integration-test-remote: + - uses: actions/checkout@v4.1.1 + - name: Docker Setup Buildx + uses: docker/setup-buildx-action@v3.0.0 + - name: Docker Build + run: make inference-pytorch-gpu + - name: Set up Python 3.11 + uses: actions/setup-python@v2 + with: + python-version: 3.11 + - name: Install tox & uv + run: pip install uv tox + - name: Run unit tests + run: tox -e torch-integration-local -- -n 4 + pytorch-integration-remote: runs-on: [single-gpu, nvidia-gpu, t4, ci] env: AWS_REGION: us-east-1 steps: - - name: Checkout - uses: actions/checkout@v2 - - name: Set up Python 3.9 - uses: actions/setup-python@v2 - with: - python-version: 3.9 - - name: Build Docker - run: docker build -f dockerfiles/tensorflow/gpu/Dockerfile -t integration-test-tensorflow:gpu . - - name: Install Tox - run: pip install tox - - name: "Run Integration Tests: TF Remote" - run: tox -e tensorflow-integration-remote -- -n 4 - \ No newline at end of file + - uses: actions/checkout@v4.1.1 + - name: Docker Setup Buildx + uses: docker/setup-buildx-action@v3.0.0 + - name: Docker Build + run: make inference-pytorch-gpu + - name: Set up Python 3.11 + uses: actions/setup-python@v2 + with: + python-version: 3.11 + - name: Install tox & uv + run: pip install uv tox + - name: Run unit tests + run: tox -e torch-integration-remote -- -n 4 \ No newline at end of file diff --git a/.github/workflows/unit-test.yaml b/.github/workflows/unit-test.yaml index 9706d2c0..8f7389ad 100644 --- a/.github/workflows/unit-test.yaml +++ b/.github/workflows/unit-test.yaml @@ -26,15 +26,6 @@ jobs: uses: docker/setup-buildx-action@v3.0.0 - name: Docker Build run: make inference-pytorch-gpu - - name: List images - run: docker images - - name: Dry run - run: | - docker run \ - --gpus all \ - --entrypoint /bin/sh \ - integration-test-pytorch:gpu \ - -c "echo Hello World!" - name: Set up Python 3.11 uses: actions/setup-python@v2 with: diff --git a/dockerfiles/tensorflow/gpu/Dockerfile b/dockerfiles/tensorflow/gpu/Dockerfile index 462f7a83..02018371 100644 --- a/dockerfiles/tensorflow/gpu/Dockerfile +++ b/dockerfiles/tensorflow/gpu/Dockerfile @@ -34,6 +34,11 @@ RUN curl -L https://micro.mamba.pm/api/micromamba/linux-64/latest | tar -xj "bin WORKDIR /app +RUN curl -LsSf https://astral.sh/uv/install.sh | sh && \ + source $HOME/.cargo/env && \ + source .venv/bin/activate && \ + ls -all + # install base python dependencies COPY dockerfiles/tensorflow/gpu/environment.yaml /app/environment.yaml RUN micromamba install -y -n base -f environment.yaml \ @@ -44,6 +49,9 @@ RUN micromamba install -y -n base -f environment.yaml \ COPY requirements.txt /tmp/requirements.txt RUN pip install --no-cache-dir -r /tmp/requirements.txt && rm /tmp/requirements.txt +# copy tests +COPY . /tmp/hf-inference-test + # copy application COPY src/huggingface_inference_toolkit huggingface_inference_toolkit COPY src/huggingface_inference_toolkit/webservice_starlette.py webservice_starlette.py From c92d27fc6c7802171539fd730f81894fd28299b2 Mon Sep 17 00:00:00 2001 From: Rafael Pierre Date: Thu, 22 Feb 2024 09:41:45 +0000 Subject: [PATCH 089/173] debug --- .github/workflows/gpu-integ-test.yaml | 4 ++++ .github/workflows/unit-test.yaml | 2 ++ tox.ini | 4 ++-- 3 files changed, 8 insertions(+), 2 deletions(-) diff --git a/.github/workflows/gpu-integ-test.yaml b/.github/workflows/gpu-integ-test.yaml index 13a1998a..e4f042e7 100644 --- a/.github/workflows/gpu-integ-test.yaml +++ b/.github/workflows/gpu-integ-test.yaml @@ -26,6 +26,8 @@ jobs: uses: docker/setup-buildx-action@v3.0.0 - name: Docker Build run: make inference-pytorch-gpu + - name: List images + run: docker images - name: Set up Python 3.11 uses: actions/setup-python@v2 with: @@ -44,6 +46,8 @@ jobs: uses: docker/setup-buildx-action@v3.0.0 - name: Docker Build run: make inference-pytorch-gpu + - name: List images + run: docker images - name: Set up Python 3.11 uses: actions/setup-python@v2 with: diff --git a/.github/workflows/unit-test.yaml b/.github/workflows/unit-test.yaml index 8f7389ad..b3f2a536 100644 --- a/.github/workflows/unit-test.yaml +++ b/.github/workflows/unit-test.yaml @@ -26,6 +26,8 @@ jobs: uses: docker/setup-buildx-action@v3.0.0 - name: Docker Build run: make inference-pytorch-gpu + - name: List images + run: docker images - name: Set up Python 3.11 uses: actions/setup-python@v2 with: diff --git a/tox.ini b/tox.ini index 94ee1298..bc3a75ab 100644 --- a/tox.ini +++ b/tox.ini @@ -105,7 +105,7 @@ commands = pytest \ {tty:--color=yes} \ tests/integ/test_pytorch_remote.py {posargs} \ - --log-cli-level=INFO \ + --log-cli-level=ERROR \ --log-format='%(asctime)s %(levelname)s %(module)s:%(lineno)d %(message)s' setenv = RUN_SLOW=True @@ -118,7 +118,7 @@ commands = pytest \ {tty:--color=yes} \ tests/integ/test_pytorch_local.py {posargs} \ - --log-cli-level=INFO \ + --log-cli-level=ERROR \ --log-format='%(asctime)s %(levelname)s %(module)s:%(lineno)d %(message)s' setenv = RUN_SLOW=True From 2bd0851389be53d222e8608d75082474e7561447 Mon Sep 17 00:00:00 2001 From: Rafael Pierre Date: Thu, 22 Feb 2024 14:24:03 +0000 Subject: [PATCH 090/173] conversational --- .github/workflows/gpu-integ-test.yaml | 10 +- src/huggingface_inference_toolkit/utils.py | 7 +- tests/integ/config.py | 31 +++-- tests/integ/conftest.py | 5 +- tests/integ/helpers.py | 12 +- ...rch_local.py => test_pytorch_local_cpu.py} | 2 +- tests/integ/test_pytorch_local_gpu.py | 125 ++++++++++++++++++ ...h_remote.py => test_pytorch_remote_cpu.py} | 5 +- tests/integ/test_pytorch_remote_gpu.py | 62 +++++++++ tests/integ/utils.py | 4 +- tests/unit/test_utils.py | 7 +- tox.ini | 30 ++++- 12 files changed, 269 insertions(+), 31 deletions(-) rename tests/integ/{test_pytorch_local.py => test_pytorch_local_cpu.py} (99%) create mode 100644 tests/integ/test_pytorch_local_gpu.py rename tests/integ/{test_pytorch_remote.py => test_pytorch_remote_cpu.py} (94%) create mode 100644 tests/integ/test_pytorch_remote_gpu.py diff --git a/.github/workflows/gpu-integ-test.yaml b/.github/workflows/gpu-integ-test.yaml index e4f042e7..20591998 100644 --- a/.github/workflows/gpu-integ-test.yaml +++ b/.github/workflows/gpu-integ-test.yaml @@ -1,4 +1,4 @@ -name: Run Unit-Tests +name: Run GPU Integration Tests on: push: @@ -34,8 +34,8 @@ jobs: python-version: 3.11 - name: Install tox & uv run: pip install uv tox - - name: Run unit tests - run: tox -e torch-integration-local -- -n 4 + - name: Run local integration tests + run: tox -e torch-integration-local-gpu -- -n 4 pytorch-integration-remote: runs-on: [single-gpu, nvidia-gpu, t4, ci] env: @@ -54,5 +54,5 @@ jobs: python-version: 3.11 - name: Install tox & uv run: pip install uv tox - - name: Run unit tests - run: tox -e torch-integration-remote -- -n 4 \ No newline at end of file + - name: Run remote integration tests + run: tox -e torch-integration-remote-gpu -- -n 4 \ No newline at end of file diff --git a/src/huggingface_inference_toolkit/utils.py b/src/huggingface_inference_toolkit/utils.py index 77561342..7499a097 100644 --- a/src/huggingface_inference_toolkit/utils.py +++ b/src/huggingface_inference_toolkit/utils.py @@ -21,7 +21,7 @@ ) logger = logging.getLogger(__name__) -logging.basicConfig(format="%(asctime)s | %(levelname)s | %(message)s", level=logging.INFO) +#logging.basicConfig(format="%(asctime)s | %(levelname)s | %(message)s", level=logging.INFO) if is_tf_available(): import tensorflow as tf @@ -78,10 +78,9 @@ def wrapped_pipeline(inputs, *args, **kwargs): logging.info(f"Inputs: {inputs}") logging.info(f"Args: {args}") logging.info(f"KWArgs: {kwargs}") - converted_input = Conversation(messages = inputs) - prediction = pipeline(converted_input, *args, **kwargs) + prediction = pipeline(inputs, *args, **kwargs) logging.info(f"Prediction: {prediction}") - return prediction + return list(prediction) return wrapped_pipeline diff --git a/tests/integ/config.py b/tests/integ/config.py index 421fb7d6..eb161741 100644 --- a/tests/integ/config.py +++ b/tests/integ/config.py @@ -14,7 +14,8 @@ validate_text_to_image, validate_translation, validate_zero_shot_classification, - validate_custom + validate_custom, + validate_conversational ) @@ -152,13 +153,20 @@ }, } }, - "conversational": { - "inputs": { - "past_user_inputs": ["Which movie is the best ?"], - "generated_responses": ["It's Die Hard for sure."], - "text": "Can you explain why?", + "conversational": {"inputs": [ + { + "role": "user", + "content": "Which movie is the best ?" + }, + { + "role": "assistant", + "content": "It's Die Hard for sure." + }, + { + "role": "user", + "content": "Can you explain why?" } - }, + ]}, "sentence-similarity": { "inputs": {"source_sentence": "Lets create an embedding", "sentences": ["Lets create an embedding"]} }, @@ -210,7 +218,12 @@ "object-detection": [{"score": 0.9143241047859192, "label": "cat", "box": {}}], "image-segmentation": [{"score": 0.9143241047859192, "label": "cat", "mask": {}}], "table-question-answering": {"answer": "36542"}, - "conversational": {"generated_text": "", "conversation": {}}, + "conversational": [ + {'role': 'user', 'content': 'Which movie is the best ?'}, + {'role': 'assistant', 'content': "It's Die Hard for sure."}, + {'role': 'user', 'content': 'Can you explain why?'}, + {'role': 'assistant', 'content': "It's a great movie."}, + ], "sentence-similarity": {"similarities": ""}, "sentence-embeddings": {"embeddings": ""}, "sentence-ranking": {"scores": ""}, @@ -237,7 +250,7 @@ "object-detection": validate_object_detection, "image-segmentation": validate_object_detection, "table-question-answering": validate_zero_shot_classification, - "conversational": validate_zero_shot_classification, + "conversational": validate_conversational, "sentence-similarity": validate_zero_shot_classification, "sentence-embeddings": validate_zero_shot_classification, "sentence-ranking": validate_zero_shot_classification, diff --git a/tests/integ/conftest.py b/tests/integ/conftest.py index 120109a7..71a98ff4 100644 --- a/tests/integ/conftest.py +++ b/tests/integ/conftest.py @@ -21,7 +21,7 @@ @tenacity.retry( retry = tenacity.retry_if_exception(docker.errors.APIError), - stop = tenacity.stop_after_attempt(3) + stop = tenacity.stop_after_attempt(10) ) @pytest.fixture(scope = "function") def remote_container( @@ -30,7 +30,8 @@ def remote_container( framework ): time.sleep(random.randint(1, 5)) - client = docker.DockerClient(base_url='unix://var/run/docker.sock') + #client = docker.DockerClient(base_url='unix://var/run/docker.sock') + client = docker.from_env() container_name = f"integration-test-{framework}-{task}-{device}" container_image = f"integration-test-{framework}:{device}" port = random.randint(5000, 7000) diff --git a/tests/integ/helpers.py b/tests/integ/helpers.py index 3083b5e6..c854dcd2 100644 --- a/tests/integ/helpers.py +++ b/tests/integ/helpers.py @@ -94,12 +94,22 @@ def verify_task( ).json() elif task == "text-to-image": prediction = requests.post(f"{BASE_URL}", json=input, headers={"accept": "image/png"}).content + else: prediction = requests.post(f"{BASE_URL}", json=input).json() + logging.info(f"Input: {input}") logging.info(f"Prediction: {prediction}") logging.info(f"Snapshot: {task2output[task]}") - assert task2validation[task](result=prediction, snapshot=task2output[task]) + + if task == "conversational": + for message in prediction: + assert "error" not in message["content"].lower() + else: + assert task2validation[task]( + result=prediction, + snapshot=task2output[task] + ) except Exception as exception: logging.error(f"Base URL: {BASE_URL}") logging.error(f"Task: {task}") diff --git a/tests/integ/test_pytorch_local.py b/tests/integ/test_pytorch_local_cpu.py similarity index 99% rename from tests/integ/test_pytorch_local.py rename to tests/integ/test_pytorch_local_cpu.py index c48bf29d..4339d197 100644 --- a/tests/integ/test_pytorch_local.py +++ b/tests/integ/test_pytorch_local_cpu.py @@ -45,7 +45,7 @@ class TestPytorchLocal: ) @pytest.mark.parametrize( "device", - ["gpu", "cpu"] + ["cpu"] ) @pytest.mark.parametrize( "framework", diff --git a/tests/integ/test_pytorch_local_gpu.py b/tests/integ/test_pytorch_local_gpu.py new file mode 100644 index 00000000..d82d5bab --- /dev/null +++ b/tests/integ/test_pytorch_local_gpu.py @@ -0,0 +1,125 @@ +import tempfile +from tests.integ.helpers import verify_task +from tests.integ.config import ( + task2input, + task2model, + task2output, + task2validation +) +from transformers.testing_utils import ( + require_torch, + slow, + _run_slow_tests +) +import pytest + + +class TestPytorchLocal: + + @require_torch + @pytest.mark.parametrize( + "task", + [ + "text-classification", + "zero-shot-classification", + "ner", + "question-answering", + "fill-mask", + "summarization", + "translation_xx_to_yy", + "text2text-generation", + "text-generation", + "feature-extraction", + "image-classification", + "automatic-speech-recognition", + "audio-classification", + "object-detection", + "image-segmentation", + "table-question-answering", + "conversational", + "sentence-similarity", + "sentence-embeddings", + "sentence-ranking", + "text-to-image", + ], + ) + @pytest.mark.parametrize( + "device", + ["gpu"] + ) + @pytest.mark.parametrize( + "framework", + ["pytorch"] + ) + @pytest.mark.parametrize( + "repository_id", + [""] + ) + @pytest.mark.usefixtures('local_container') + def test_pt_container_local_model( + self, + local_container, + task, + framework, + device + ) -> None: + + verify_task(task = task, port = local_container[1]) + + + @require_torch + @pytest.mark.parametrize( + "repository_id", + ["philschmid/custom-handler-test", "philschmid/custom-handler-distilbert"], + ) + @pytest.mark.parametrize( + "device", + ["gpu", "cpu"] + ) + @pytest.mark.parametrize( + "framework", + ["pytorch"] + ) + @pytest.mark.parametrize( + "task", + ["custom"] + ) + @pytest.mark.usefixtures('local_container') + def test_pt_container_custom_handler( + self, + local_container, + task, + device, + repository_id + ) -> None: + + verify_task(task = task, port = local_container[1]) + + + @require_torch + @pytest.mark.parametrize( + "repository_id", + ["philschmid/custom-pipeline-text-classification"], + ) + @pytest.mark.parametrize( + "device", + ["gpu", "cpu"] + ) + @pytest.mark.parametrize( + "framework", + ["pytorch"] + ) + @pytest.mark.parametrize( + "task", + ["custom"] + ) + @pytest.mark.usefixtures('local_container') + def test_pt_container_legacy_custom_pipeline( + self, + local_container, + repository_id, + device, + task + ) -> None: + + verify_task(task = task, port = local_container[1]) diff --git a/tests/integ/test_pytorch_remote.py b/tests/integ/test_pytorch_remote_cpu.py similarity index 94% rename from tests/integ/test_pytorch_remote.py rename to tests/integ/test_pytorch_remote_cpu.py index 33a26a4a..14001dda 100644 --- a/tests/integ/test_pytorch_remote.py +++ b/tests/integ/test_pytorch_remote_cpu.py @@ -19,11 +19,12 @@ class TestPytorchRemote: @tenacity.retry( retry = tenacity.retry_if_exception(docker.errors.APIError), - stop = tenacity.stop_after_attempt(3) + stop = tenacity.stop_after_attempt(5), + reraise = True ) @pytest.mark.parametrize( "device", - ["gpu", "cpu"] + ["cpu"] ) @pytest.mark.parametrize( "task", diff --git a/tests/integ/test_pytorch_remote_gpu.py b/tests/integ/test_pytorch_remote_gpu.py new file mode 100644 index 00000000..ec79f4a5 --- /dev/null +++ b/tests/integ/test_pytorch_remote_gpu.py @@ -0,0 +1,62 @@ +import tempfile +from tests.integ.helpers import verify_task +from tests.integ.config import ( + task2input, + task2model, + task2output, + task2validation +) +from transformers.testing_utils import ( + require_torch, + slow, + _run_slow_tests +) +import pytest +import tenacity +import docker + +class TestPytorchRemote: + + @tenacity.retry( + retry = tenacity.retry_if_exception(docker.errors.APIError), + stop = tenacity.stop_after_attempt(5), + reraise = True + ) + @pytest.mark.parametrize( + "device", + ["gpu"] + ) + @pytest.mark.parametrize( + "task", + [ + "text-classification", + "zero-shot-classification", + "question-answering", + "fill-mask", + "summarization", + "ner", + "translation_xx_to_yy", + "text2text-generation", + "text-generation", + "feature-extraction", + "image-classification", + "automatic-speech-recognition", + "audio-classification", + "object-detection", + "image-segmentation", + "table-question-answering", + "conversational", + "sentence-similarity", + "sentence-embeddings", + "sentence-ranking", + "text-to-image" + ] + ) + @pytest.mark.parametrize( + "framework", + ["pytorch"] + ) + @pytest.mark.usefixtures('remote_container') + def test_inference_remote(self, remote_container, task, framework, device): + + verify_task(task = task, port = remote_container[1]) diff --git a/tests/integ/utils.py b/tests/integ/utils.py index 042aa233..2b826cdb 100644 --- a/tests/integ/utils.py +++ b/tests/integ/utils.py @@ -7,9 +7,11 @@ def validate_classification(result=None, snapshot=None): for idx, _ in enumerate(result): assert result[idx].keys() == snapshot[idx].keys() - # assert result[idx]["score"] >= snapshot[idx]["score"] return True +def validate_conversational(result=None, snapshot=None): + assert len(result) >= len(snapshot) + def validate_zero_shot_classification(result=None, snapshot=None): logging.info(f"Result: {result}") diff --git a/tests/unit/test_utils.py b/tests/unit/test_utils.py index 166f618e..6e37814d 100644 --- a/tests/unit/test_utils.py +++ b/tests/unit/test_utils.py @@ -175,7 +175,9 @@ def test_wrap_conversation_pipeline(): } ] res = conv_pipe(data) - assert "content" in res.messages[-1] + logging.info(f"Response: {res}") + assert res[-1]["role"] == "assistant" + assert "error" not in res[-1]["content"] @require_torch @@ -202,7 +204,8 @@ def test_wrapped_pipeline(): } ] res = conv_pipe(data) - assert "content" in res.messages[-1] + assert res[-1]["role"] == "assistant" + assert "error" not in res[-1]["content"] def test_local_custom_pipeline(): diff --git a/tox.ini b/tox.ini index bc3a75ab..0cc8b2eb 100644 --- a/tox.ini +++ b/tox.ini @@ -97,27 +97,49 @@ commands = setenv = RUN_SLOW=True -[testenv:torch-integration-remote] +[testenv:torch-integration-remote-gpu] install_command = pip install -e ".[torch]" allowlist_externals = pytest commands = pytest \ {tty:--color=yes} \ - tests/integ/test_pytorch_remote.py {posargs} \ + tests/integ/test_pytorch_remote_gpu.py {posargs} \ --log-cli-level=ERROR \ --log-format='%(asctime)s %(levelname)s %(module)s:%(lineno)d %(message)s' setenv = RUN_SLOW=True -[testenv:torch-integration-local] +[testenv:torch-integration-remote-cpu] install_command = pip install -e ".[torch]" allowlist_externals = pytest commands = pytest \ {tty:--color=yes} \ - tests/integ/test_pytorch_local.py {posargs} \ + tests/integ/test_pytorch_remote_cpu.py {posargs} \ + --log-cli-level=ERROR \ + --log-format='%(asctime)s %(levelname)s %(module)s:%(lineno)d %(message)s' + +[testenv:torch-integration-local-cpu] +install_command = pip install -e ".[torch]" +allowlist_externals = + pytest +commands = + pytest \ + {tty:--color=yes} \ + tests/integ/test_pytorch_local_cpu.py {posargs} \ + --log-cli-level=ERROR \ + --log-format='%(asctime)s %(levelname)s %(module)s:%(lineno)d %(message)s' + +[testenv:torch-integration-local-gpu] +install_command = pip install -e ".[torch]" +allowlist_externals = + pytest +commands = + pytest \ + {tty:--color=yes} \ + tests/integ/test_pytorch_local_gpu.py {posargs} \ --log-cli-level=ERROR \ --log-format='%(asctime)s %(levelname)s %(module)s:%(lineno)d %(message)s' setenv = From 51b2bc63aa61236b1dfe48a660fa56db285e8e8a Mon Sep 17 00:00:00 2001 From: Rafael Pierre Date: Thu, 22 Feb 2024 14:45:56 +0000 Subject: [PATCH 091/173] debug --- tests/integ/test_pytorch_local_gpu.py | 4 ++-- tox.ini | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/tests/integ/test_pytorch_local_gpu.py b/tests/integ/test_pytorch_local_gpu.py index d82d5bab..c13965cf 100644 --- a/tests/integ/test_pytorch_local_gpu.py +++ b/tests/integ/test_pytorch_local_gpu.py @@ -74,7 +74,7 @@ def test_pt_container_local_model( ) @pytest.mark.parametrize( "device", - ["gpu", "cpu"] + ["gpu"] ) @pytest.mark.parametrize( "framework", @@ -103,7 +103,7 @@ def test_pt_container_custom_handler( ) @pytest.mark.parametrize( "device", - ["gpu", "cpu"] + ["gpu"] ) @pytest.mark.parametrize( "framework", diff --git a/tox.ini b/tox.ini index 0cc8b2eb..bfeb0e7a 100644 --- a/tox.ini +++ b/tox.ini @@ -140,7 +140,7 @@ commands = pytest \ {tty:--color=yes} \ tests/integ/test_pytorch_local_gpu.py {posargs} \ - --log-cli-level=ERROR \ + --log-cli-level=DEBUG \ --log-format='%(asctime)s %(levelname)s %(module)s:%(lineno)d %(message)s' setenv = RUN_SLOW=True From 9e39ba20d523126a902ed88c37411b1d8a491e5b Mon Sep 17 00:00:00 2001 From: Rafael Pierre Date: Thu, 22 Feb 2024 15:02:10 +0000 Subject: [PATCH 092/173] device --- tests/integ/conftest.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/integ/conftest.py b/tests/integ/conftest.py index 71a98ff4..f55a5984 100644 --- a/tests/integ/conftest.py +++ b/tests/integ/conftest.py @@ -105,7 +105,7 @@ def local_container( device_request = [ docker.types.DeviceRequest(count=-1, capabilities=[["gpu"]]) - ] if IS_GPU else [] + ] if device == "gpu" else [] with tempfile.TemporaryDirectory() as tmpdirname: # https://github.com/huggingface/infinity/blob/test-ovh/test/integ/utils.py From 199099cf1e36c4d66a0e7ae573a61df2a6cf3e23 Mon Sep 17 00:00:00 2001 From: Rafael Pierre Date: Thu, 22 Feb 2024 15:19:01 +0000 Subject: [PATCH 093/173] from_env --- tests/integ/conftest.py | 5 +++-- tests/integ/test_pytorch_local_gpu.py | 5 ++++- 2 files changed, 7 insertions(+), 3 deletions(-) diff --git a/tests/integ/conftest.py b/tests/integ/conftest.py index f55a5984..b4511a76 100644 --- a/tests/integ/conftest.py +++ b/tests/integ/conftest.py @@ -68,7 +68,8 @@ def remote_container( @tenacity.retry( retry = tenacity.retry_if_exception(docker.errors.APIError), - stop = tenacity.stop_after_attempt(3) + stop = tenacity.stop_after_attempt(10), + reraise = True ) @pytest.fixture(scope = "function") def local_container( @@ -94,7 +95,7 @@ def local_container( else: try: logging.info(f"Starting container with Model = {model}") - client = docker.DockerClient(base_url='unix://var/run/docker.sock') + client = docker.from_env() container_name = f"integration-test-{framework}-{id}-{device}" container_image = f"integration-test-{framework}:{device}" diff --git a/tests/integ/test_pytorch_local_gpu.py b/tests/integ/test_pytorch_local_gpu.py index c13965cf..88aff756 100644 --- a/tests/integ/test_pytorch_local_gpu.py +++ b/tests/integ/test_pytorch_local_gpu.py @@ -93,7 +93,10 @@ def test_pt_container_custom_handler( repository_id ) -> None: - verify_task(task = task, port = local_container[1]) + verify_task( + task = task, + port = local_container[1], + ) @require_torch From e037c1a983fe445fb72d581a6e77f082b8616fdb Mon Sep 17 00:00:00 2001 From: Rafael Pierre Date: Thu, 22 Feb 2024 15:29:14 +0000 Subject: [PATCH 094/173] debug level --- tox.ini | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tox.ini b/tox.ini index bfeb0e7a..0cc8b2eb 100644 --- a/tox.ini +++ b/tox.ini @@ -140,7 +140,7 @@ commands = pytest \ {tty:--color=yes} \ tests/integ/test_pytorch_local_gpu.py {posargs} \ - --log-cli-level=DEBUG \ + --log-cli-level=ERROR \ --log-format='%(asctime)s %(levelname)s %(module)s:%(lineno)d %(message)s' setenv = RUN_SLOW=True From 4387c807a202f32aeb667f413c672be722c833a3 Mon Sep 17 00:00:00 2001 From: Rafael Pierre Date: Thu, 22 Feb 2024 16:00:21 +0000 Subject: [PATCH 095/173] socket --- .github/workflows/gpu-integ-test.yaml | 2 +- tests/integ/conftest.py | 14 +++++++++++++- 2 files changed, 14 insertions(+), 2 deletions(-) diff --git a/.github/workflows/gpu-integ-test.yaml b/.github/workflows/gpu-integ-test.yaml index 20591998..4b14cbbd 100644 --- a/.github/workflows/gpu-integ-test.yaml +++ b/.github/workflows/gpu-integ-test.yaml @@ -35,7 +35,7 @@ jobs: - name: Install tox & uv run: pip install uv tox - name: Run local integration tests - run: tox -e torch-integration-local-gpu -- -n 4 + run: tox -e torch-integration-local-gpu pytorch-integration-remote: runs-on: [single-gpu, nvidia-gpu, t4, ci] env: diff --git a/tests/integ/conftest.py b/tests/integ/conftest.py index b4511a76..afd486b9 100644 --- a/tests/integ/conftest.py +++ b/tests/integ/conftest.py @@ -15,6 +15,7 @@ _run_slow_tests ) import uuid +import socket IS_GPU = _run_slow_tests DEVICE = "gpu" if IS_GPU else "cpu" @@ -37,6 +38,12 @@ def remote_container( port = random.randint(5000, 7000) model = task2model[task][framework] + #check if port is already open + sock = socket.socket(socket.AF_INET, socket.SOCK_STREAM) + while sock.connect_ex(("localhost", port)) == 0: + logging.debug(f"Port {port} is already being used; getting a new one...") + port = random.randint(5000, 9000) + logging.debug(f"Image: {container_image}") logging.debug(f"Port: {port}") @@ -67,7 +74,6 @@ def remote_container( @tenacity.retry( - retry = tenacity.retry_if_exception(docker.errors.APIError), stop = tenacity.stop_after_attempt(10), reraise = True ) @@ -101,6 +107,12 @@ def local_container( port = random.randint(5000, 7000) + #check if port is already open + sock = socket.socket(socket.AF_INET, socket.SOCK_STREAM) + while sock.connect_ex(("localhost", port)) == 0: + logging.debug(f"Port {port} is already being used; getting a new one...") + port = random.randint(5000, 9000) + logging.debug(f"Image: {container_image}") logging.debug(f"Port: {port}") From d3b66f3f297a1fed867641e42c54a01f0549be1c Mon Sep 17 00:00:00 2001 From: Rafael Pierre Date: Thu, 22 Feb 2024 16:20:23 +0000 Subject: [PATCH 096/173] exception when starting container --- tests/integ/conftest.py | 124 +++++++++++++------------- tests/integ/test_pytorch_local_gpu.py | 2 +- tox.ini | 2 +- 3 files changed, 64 insertions(+), 64 deletions(-) diff --git a/tests/integ/conftest.py b/tests/integ/conftest.py index afd486b9..3ca2f33d 100644 --- a/tests/integ/conftest.py +++ b/tests/integ/conftest.py @@ -84,66 +84,66 @@ def local_container( repository_id, framework ): - time.sleep(random.randint(1, 5)) - - id = uuid.uuid4() - if not (task == "custom"): - model = task2model[task][framework] - id = task - else: - model = repository_id - - logging.info(f"Starting container with model: {model}") - - if not model: - logging.info(f"No model supported for {framework}") - yield None - else: - try: - logging.info(f"Starting container with Model = {model}") - client = docker.from_env() - container_name = f"integration-test-{framework}-{id}-{device}" - container_image = f"integration-test-{framework}:{device}" - - port = random.randint(5000, 7000) - - #check if port is already open - sock = socket.socket(socket.AF_INET, socket.SOCK_STREAM) - while sock.connect_ex(("localhost", port)) == 0: - logging.debug(f"Port {port} is already being used; getting a new one...") - port = random.randint(5000, 9000) - - logging.debug(f"Image: {container_image}") - logging.debug(f"Port: {port}") - - device_request = [ - docker.types.DeviceRequest(count=-1, capabilities=[["gpu"]]) - ] if device == "gpu" else [] - - with tempfile.TemporaryDirectory() as tmpdirname: - # https://github.com/huggingface/infinity/blob/test-ovh/test/integ/utils.py - storage_dir = _load_repository_from_hf( - repository_id = model, - target_dir = tmpdirname, - framework = framework - ) - logging.info(f"Temp dir name: {tmpdirname}") - yield client.containers.run( - container_image, - name=container_name, - ports={"5000": port}, - environment={"HF_MODEL_DIR": "/opt/huggingface/model", "HF_TASK": task}, - volumes={tmpdirname: {"bind": "/opt/huggingface/model", "mode": "ro"}}, - detach=True, - # GPU - device_requests=device_request, - ), port - - #Teardown - previous = client.containers.get(container_name) - previous.stop() - previous.remove() - except Exception as exception: - logging.error(f"Error starting container: {str(exception)}") - raise exception + try: + time.sleep(random.randint(1, 5)) + id = uuid.uuid4() + if not (task == "custom"): + model = task2model[task][framework] + id = task + else: + model = repository_id + + logging.info(f"Starting container with model: {model}") + + if not model: + message = f"No model supported for {framework}" + logging.error(message) + raise ValueError(message) + + logging.info(f"Starting container with Model = {model}") + client = docker.from_env() + container_name = f"integration-test-{framework}-{id}-{device}" + container_image = f"integration-test-{framework}:{device}" + + port = random.randint(5000, 7000) + + #check if port is already open + sock = socket.socket(socket.AF_INET, socket.SOCK_STREAM) + while sock.connect_ex(("localhost", port)) == 0: + logging.debug(f"Port {port} is already being used; getting a new one...") + port = random.randint(5000, 9000) + + logging.debug(f"Image: {container_image}") + logging.debug(f"Port: {port}") + + device_request = [ + docker.types.DeviceRequest(count=-1, capabilities=[["gpu"]]) + ] if device == "gpu" else [] + + with tempfile.TemporaryDirectory() as tmpdirname: + # https://github.com/huggingface/infinity/blob/test-ovh/test/integ/utils.py + storage_dir = _load_repository_from_hf( + repository_id = model, + target_dir = tmpdirname, + framework = framework + ) + logging.info(f"Temp dir name: {tmpdirname}") + yield client.containers.run( + container_image, + name=container_name, + ports={"5000": port}, + environment={"HF_MODEL_DIR": "/opt/huggingface/model", "HF_TASK": task}, + volumes={tmpdirname: {"bind": "/opt/huggingface/model", "mode": "ro"}}, + detach=True, + # GPU + device_requests=device_request, + ), port + + #Teardown + previous = client.containers.get(container_name) + previous.stop() + previous.remove() + except Exception as exception: + logging.error(f"Error starting container: {str(exception)}") + raise exception diff --git a/tests/integ/test_pytorch_local_gpu.py b/tests/integ/test_pytorch_local_gpu.py index 88aff756..15c28335 100644 --- a/tests/integ/test_pytorch_local_gpu.py +++ b/tests/integ/test_pytorch_local_gpu.py @@ -64,7 +64,7 @@ def test_pt_container_local_model( device ) -> None: - verify_task(task = task, port = local_container[1]) + verify_task(task = task, port = local_container[1]) @require_torch diff --git a/tox.ini b/tox.ini index 0cc8b2eb..bfeb0e7a 100644 --- a/tox.ini +++ b/tox.ini @@ -140,7 +140,7 @@ commands = pytest \ {tty:--color=yes} \ tests/integ/test_pytorch_local_gpu.py {posargs} \ - --log-cli-level=ERROR \ + --log-cli-level=DEBUG \ --log-format='%(asctime)s %(levelname)s %(module)s:%(lineno)d %(message)s' setenv = RUN_SLOW=True From c658ad619d37500eea4724150ea8c27953ee04ea Mon Sep 17 00:00:00 2001 From: Rafael Pierre Date: Thu, 22 Feb 2024 16:51:08 +0000 Subject: [PATCH 097/173] error --- tests/integ/helpers.py | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/tests/integ/helpers.py b/tests/integ/helpers.py index c854dcd2..f1f22f1f 100644 --- a/tests/integ/helpers.py +++ b/tests/integ/helpers.py @@ -52,6 +52,8 @@ def wait_for_container_to_be_ready( ): retries = 0 + error = None + while retries < max_retries: time.sleep(time_between_retries) try: @@ -62,8 +64,12 @@ def wait_for_container_to_be_ready( else: raise ConnectionError(f"Error: {response.status_code}") except Exception as exception: + error = exception logging.warning(f"Container at {base_url} not ready, trying again...") retries += 1 + + logging.error(f"Unable to start container: {str(error)}") + raise error def verify_task( #container: DockerClient, From f9e7daad549c3bd8b4d3e2085903841f95e31904 Mon Sep 17 00:00:00 2001 From: Rafael Pierre Date: Thu, 22 Feb 2024 17:01:58 +0000 Subject: [PATCH 098/173] permissions --- .github/workflows/gpu-integ-test.yaml | 13 +++++++++++++ 1 file changed, 13 insertions(+) diff --git a/.github/workflows/gpu-integ-test.yaml b/.github/workflows/gpu-integ-test.yaml index 4b14cbbd..eda859c1 100644 --- a/.github/workflows/gpu-integ-test.yaml +++ b/.github/workflows/gpu-integ-test.yaml @@ -32,6 +32,19 @@ jobs: uses: actions/setup-python@v2 with: python-version: 3.11 + - name: Check permissions + run: | + import os + def check_directory_permissions(directory_path): + permissions = os.stat(directory_path).st_mode + print(f"Permissions of the directory: {directory_path}") + print(f"Read permission: {'Yes' if permissions & 0o400 else 'No'}") + print(f"Write permission: {'Yes' if permissions & 0o200 else 'No'}") + print(f"Execute permission: {'Yes' if permissions & 0o100 else 'No'}") + + directory_path = "/tmp" + check_directory_permissions(directory_path) + shell: python - name: Install tox & uv run: pip install uv tox - name: Run local integration tests From bd8302ff7e774595312ce321e3052dd9d0bddf79 Mon Sep 17 00:00:00 2001 From: Rafael Pierre Date: Thu, 22 Feb 2024 17:03:45 +0000 Subject: [PATCH 099/173] order --- .github/workflows/gpu-integ-test.yaml | 18 +++++++++++------- 1 file changed, 11 insertions(+), 7 deletions(-) diff --git a/.github/workflows/gpu-integ-test.yaml b/.github/workflows/gpu-integ-test.yaml index eda859c1..125879a4 100644 --- a/.github/workflows/gpu-integ-test.yaml +++ b/.github/workflows/gpu-integ-test.yaml @@ -21,13 +21,6 @@ jobs: env: AWS_REGION: us-east-1 steps: - - uses: actions/checkout@v4.1.1 - - name: Docker Setup Buildx - uses: docker/setup-buildx-action@v3.0.0 - - name: Docker Build - run: make inference-pytorch-gpu - - name: List images - run: docker images - name: Set up Python 3.11 uses: actions/setup-python@v2 with: @@ -45,6 +38,17 @@ jobs: directory_path = "/tmp" check_directory_permissions(directory_path) shell: python + - uses: actions/checkout@v4.1.1 + - name: Docker Setup Buildx + uses: docker/setup-buildx-action@v3.0.0 + - name: Docker Build + run: make inference-pytorch-gpu + - name: List images + run: docker images + - name: Set up Python 3.11 + uses: actions/setup-python@v2 + with: + python-version: 3.11 - name: Install tox & uv run: pip install uv tox - name: Run local integration tests From d2bc1b5895379a4f334880f46dd4567eb36e2e5f Mon Sep 17 00:00:00 2001 From: Rafael Pierre Date: Thu, 22 Feb 2024 17:07:45 +0000 Subject: [PATCH 100/173] isolate --- tests/integ/test_pytorch_local_gpu.py | 40 +++++++++++++-------------- 1 file changed, 20 insertions(+), 20 deletions(-) diff --git a/tests/integ/test_pytorch_local_gpu.py b/tests/integ/test_pytorch_local_gpu.py index 15c28335..3c568bc1 100644 --- a/tests/integ/test_pytorch_local_gpu.py +++ b/tests/integ/test_pytorch_local_gpu.py @@ -21,26 +21,26 @@ class TestPytorchLocal: "task", [ "text-classification", - "zero-shot-classification", - "ner", - "question-answering", - "fill-mask", - "summarization", - "translation_xx_to_yy", - "text2text-generation", - "text-generation", - "feature-extraction", - "image-classification", - "automatic-speech-recognition", - "audio-classification", - "object-detection", - "image-segmentation", - "table-question-answering", - "conversational", - "sentence-similarity", - "sentence-embeddings", - "sentence-ranking", - "text-to-image", + #"zero-shot-classification", + #"ner", + #"question-answering", + #"fill-mask", + #"summarization", + #"translation_xx_to_yy", + #"text2text-generation", + #"text-generation", + #"feature-extraction", + #"image-classification", + #"automatic-speech-recognition", + #"audio-classification", + #"object-detection", + #"image-segmentation", + #"table-question-answering", + #"conversational", + #"sentence-similarity", + #"sentence-embeddings", + #"sentence-ranking", + #"text-to-image", ], ) @pytest.mark.parametrize( From 4544b98dbd31dd490347f15a239d88ed4262c915 Mon Sep 17 00:00:00 2001 From: Rafael Pierre Date: Thu, 22 Feb 2024 17:24:42 +0000 Subject: [PATCH 101/173] dry run --- .github/workflows/gpu-integ-test.yaml | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/.github/workflows/gpu-integ-test.yaml b/.github/workflows/gpu-integ-test.yaml index 125879a4..e34394e1 100644 --- a/.github/workflows/gpu-integ-test.yaml +++ b/.github/workflows/gpu-integ-test.yaml @@ -45,6 +45,10 @@ jobs: run: make inference-pytorch-gpu - name: List images run: docker images + - name: Container dry run + run: docker run -e HF_MODEL_ID="distilbert/distilbert-base-uncased" -e HF_TASK="text-classification" -d integration-test-pytorch:gpu + - name: Stop container + run: make stop-all - name: Set up Python 3.11 uses: actions/setup-python@v2 with: From bc6c5deaa51d13cec057816e6fd5e117d9aebb04 Mon Sep 17 00:00:00 2001 From: Rafael Pierre Date: Thu, 22 Feb 2024 17:28:52 +0000 Subject: [PATCH 102/173] dry run --- .github/workflows/gpu-integ-test.yaml | 11 ++++++++++- 1 file changed, 10 insertions(+), 1 deletion(-) diff --git a/.github/workflows/gpu-integ-test.yaml b/.github/workflows/gpu-integ-test.yaml index e34394e1..5726a83f 100644 --- a/.github/workflows/gpu-integ-test.yaml +++ b/.github/workflows/gpu-integ-test.yaml @@ -45,8 +45,17 @@ jobs: run: make inference-pytorch-gpu - name: List images run: docker images + - name: Install hub + run: pip install -U "huggingface_hub[cli]" + - name: Download dummy model + run: huggingface-cli download distilbert/distilbert-base-uncased --local-dir /tmp/distilbert - name: Container dry run - run: docker run -e HF_MODEL_ID="distilbert/distilbert-base-uncased" -e HF_TASK="text-classification" -d integration-test-pytorch:gpu + run: | + docker run + -v /tmp/distilbert:/tmp/distilbert + -e HF_MODEL_DIR="tmp/distilbert" + -e HF_TASK="text-classification" + -d integration-test-pytorch:gpu - name: Stop container run: make stop-all - name: Set up Python 3.11 From fbfc7f8e8a290285b5e7be40cf50f92e04f6c93a Mon Sep 17 00:00:00 2001 From: Rafael Pierre Date: Thu, 22 Feb 2024 17:37:14 +0000 Subject: [PATCH 103/173] fix dry run params --- .github/workflows/gpu-integ-test.yaml | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/.github/workflows/gpu-integ-test.yaml b/.github/workflows/gpu-integ-test.yaml index 5726a83f..43a41fa8 100644 --- a/.github/workflows/gpu-integ-test.yaml +++ b/.github/workflows/gpu-integ-test.yaml @@ -48,14 +48,14 @@ jobs: - name: Install hub run: pip install -U "huggingface_hub[cli]" - name: Download dummy model - run: huggingface-cli download distilbert/distilbert-base-uncased --local-dir /tmp/distilbert + run: huggingface-cli download hf-internal-testing/tiny-random-distilbert --local-dir /tmp/distilbert - name: Container dry run run: | - docker run - -v /tmp/distilbert:/tmp/distilbert - -e HF_MODEL_DIR="tmp/distilbert" - -e HF_TASK="text-classification" - -d integration-test-pytorch:gpu + docker run --gpus all \ + -v /tmp/distilbert:/opt/huggingface/model \ + -e HF_MODEL_DIR=/opt/huggingface/model \ + -e HF_TASK=text-classification \ + integration-test-pytorch:gpu - name: Stop container run: make stop-all - name: Set up Python 3.11 From 0db051885479faa81f906b4eb59920d2d31b26ad Mon Sep 17 00:00:00 2001 From: Rafael Pierre Date: Thu, 22 Feb 2024 17:44:09 +0000 Subject: [PATCH 104/173] quotes --- .github/workflows/gpu-integ-test.yaml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/.github/workflows/gpu-integ-test.yaml b/.github/workflows/gpu-integ-test.yaml index 43a41fa8..03002299 100644 --- a/.github/workflows/gpu-integ-test.yaml +++ b/.github/workflows/gpu-integ-test.yaml @@ -53,8 +53,8 @@ jobs: run: | docker run --gpus all \ -v /tmp/distilbert:/opt/huggingface/model \ - -e HF_MODEL_DIR=/opt/huggingface/model \ - -e HF_TASK=text-classification \ + -e HF_MODEL_DIR="/opt/huggingface/model" \ + -e HF_TASK="text-classification" \ integration-test-pytorch:gpu - name: Stop container run: make stop-all From be92d7cb40407d47a479da41b28d6afecf119666 Mon Sep 17 00:00:00 2001 From: Rafael Pierre Date: Thu, 22 Feb 2024 17:58:31 +0000 Subject: [PATCH 105/173] check path --- .github/workflows/gpu-integ-test.yaml | 18 ++++++++++++++---- 1 file changed, 14 insertions(+), 4 deletions(-) diff --git a/.github/workflows/gpu-integ-test.yaml b/.github/workflows/gpu-integ-test.yaml index 03002299..92caedc5 100644 --- a/.github/workflows/gpu-integ-test.yaml +++ b/.github/workflows/gpu-integ-test.yaml @@ -49,12 +49,22 @@ jobs: run: pip install -U "huggingface_hub[cli]" - name: Download dummy model run: huggingface-cli download hf-internal-testing/tiny-random-distilbert --local-dir /tmp/distilbert + - name: Test model path + run: | + docker run + --gpus all + -v /tmp/distilbert:/opt/huggingface/model + --entrypoint /bin/sh + integration-test-pytorch:gpu + -c "ls /opt/huggingface/model" + - name: Stop container + run: make stop-all - name: Container dry run run: | - docker run --gpus all \ - -v /tmp/distilbert:/opt/huggingface/model \ - -e HF_MODEL_DIR="/opt/huggingface/model" \ - -e HF_TASK="text-classification" \ + docker run --gpus all + -v /tmp/distilbert:/opt/huggingface/model + -e HF_MODEL_DIR=/opt/huggingface/model + -e HF_TASK=text-classification integration-test-pytorch:gpu - name: Stop container run: make stop-all From d58ec57ff2620874346605c100888516d5e514a9 Mon Sep 17 00:00:00 2001 From: Rafael Pierre Date: Thu, 22 Feb 2024 18:05:23 +0000 Subject: [PATCH 106/173] backslash --- .github/workflows/gpu-integ-test.yaml | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/.github/workflows/gpu-integ-test.yaml b/.github/workflows/gpu-integ-test.yaml index 92caedc5..6a403cb5 100644 --- a/.github/workflows/gpu-integ-test.yaml +++ b/.github/workflows/gpu-integ-test.yaml @@ -51,12 +51,12 @@ jobs: run: huggingface-cli download hf-internal-testing/tiny-random-distilbert --local-dir /tmp/distilbert - name: Test model path run: | - docker run - --gpus all - -v /tmp/distilbert:/opt/huggingface/model - --entrypoint /bin/sh - integration-test-pytorch:gpu - -c "ls /opt/huggingface/model" + docker run \ + --gpus all \ + -v /tmp/distilbert:/opt/huggingface/model \ + --entrypoint /bin/sh \ + integration-test-pytorch:gpu \ + -c "ls /opt/huggingface/model" - name: Stop container run: make stop-all - name: Container dry run From 3049fede95226a43f2692cf9de296235cc33a1ab Mon Sep 17 00:00:00 2001 From: Rafael Pierre Date: Thu, 22 Feb 2024 18:13:32 +0000 Subject: [PATCH 107/173] change path --- .github/workflows/gpu-integ-test.yaml | 12 +++++++----- 1 file changed, 7 insertions(+), 5 deletions(-) diff --git a/.github/workflows/gpu-integ-test.yaml b/.github/workflows/gpu-integ-test.yaml index 6a403cb5..362ce19e 100644 --- a/.github/workflows/gpu-integ-test.yaml +++ b/.github/workflows/gpu-integ-test.yaml @@ -48,7 +48,9 @@ jobs: - name: Install hub run: pip install -U "huggingface_hub[cli]" - name: Download dummy model - run: huggingface-cli download hf-internal-testing/tiny-random-distilbert --local-dir /tmp/distilbert + run: | + huggingface-cli download hf-internal-testing/tiny-random-distilbert --local-dir /tmp/distilbert && \ + ls /tmp/distilbert - name: Test model path run: | docker run \ @@ -61,10 +63,10 @@ jobs: run: make stop-all - name: Container dry run run: | - docker run --gpus all - -v /tmp/distilbert:/opt/huggingface/model - -e HF_MODEL_DIR=/opt/huggingface/model - -e HF_TASK=text-classification + docker run --gpus all \ + -v /tmp/distilbert:/opt/huggingface/model \ + -e HF_MODEL_DIR=/opt/huggingface/model \ + -e HF_TASK=text-classification \ integration-test-pytorch:gpu - name: Stop container run: make stop-all From c8945bc586e44ae0414958400089cc4453287511 Mon Sep 17 00:00:00 2001 From: Rafael Pierre Date: Thu, 22 Feb 2024 18:25:47 +0000 Subject: [PATCH 108/173] host path --- .github/workflows/gpu-integ-test.yaml | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/.github/workflows/gpu-integ-test.yaml b/.github/workflows/gpu-integ-test.yaml index 362ce19e..e9ee2779 100644 --- a/.github/workflows/gpu-integ-test.yaml +++ b/.github/workflows/gpu-integ-test.yaml @@ -55,17 +55,17 @@ jobs: run: | docker run \ --gpus all \ - -v /tmp/distilbert:/opt/huggingface/model \ + -v /tmp/distilbert:/tmp/distilbert \ --entrypoint /bin/sh \ integration-test-pytorch:gpu \ - -c "ls /opt/huggingface/model" + -c "ls /tmp/distilbert" - name: Stop container run: make stop-all - name: Container dry run run: | docker run --gpus all \ - -v /tmp/distilbert:/opt/huggingface/model \ - -e HF_MODEL_DIR=/opt/huggingface/model \ + -v /tmp/distilbert:/tmp/distilbert \ + -e HF_MODEL_DIR=/tmp/distilbert \ -e HF_TASK=text-classification \ integration-test-pytorch:gpu - name: Stop container From 825b93319c17f251243755a398982c71a4e8358c Mon Sep 17 00:00:00 2001 From: Rafael Pierre Date: Fri, 23 Feb 2024 08:51:08 +0000 Subject: [PATCH 109/173] look into cache --- .github/workflows/gpu-integ-test.yaml | 21 +++++++-------------- 1 file changed, 7 insertions(+), 14 deletions(-) diff --git a/.github/workflows/gpu-integ-test.yaml b/.github/workflows/gpu-integ-test.yaml index e9ee2779..80e29f23 100644 --- a/.github/workflows/gpu-integ-test.yaml +++ b/.github/workflows/gpu-integ-test.yaml @@ -25,19 +25,12 @@ jobs: uses: actions/setup-python@v2 with: python-version: 3.11 - - name: Check permissions + - name: Install hub + run: pip install -U "huggingface_hub[cli]" + - name: Download dummy model run: | - import os - def check_directory_permissions(directory_path): - permissions = os.stat(directory_path).st_mode - print(f"Permissions of the directory: {directory_path}") - print(f"Read permission: {'Yes' if permissions & 0o400 else 'No'}") - print(f"Write permission: {'Yes' if permissions & 0o200 else 'No'}") - print(f"Execute permission: {'Yes' if permissions & 0o100 else 'No'}") - - directory_path = "/tmp" - check_directory_permissions(directory_path) - shell: python + huggingface-cli download hf-internal-testing/tiny-random-distilbert --local-dir /mnt/hf_cache/distilbert && \ + ls /mnt/hf_cache - uses: actions/checkout@v4.1.1 - name: Docker Setup Buildx uses: docker/setup-buildx-action@v3.0.0 @@ -49,8 +42,8 @@ jobs: run: pip install -U "huggingface_hub[cli]" - name: Download dummy model run: | - huggingface-cli download hf-internal-testing/tiny-random-distilbert --local-dir /tmp/distilbert && \ - ls /tmp/distilbert + huggingface-cli download hf-internal-testing/tiny-random-distilbert --local-dir /mnt/hf_cache/distilbert && \ + ls /mnt/hf_cache - name: Test model path run: | docker run \ From 741d4d090fcca2e4ca9c32473775818a6a84c810 Mon Sep 17 00:00:00 2001 From: Rafael Pierre Date: Fri, 23 Feb 2024 08:52:45 +0000 Subject: [PATCH 110/173] path --- .github/workflows/gpu-integ-test.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/gpu-integ-test.yaml b/.github/workflows/gpu-integ-test.yaml index 80e29f23..1ef089dc 100644 --- a/.github/workflows/gpu-integ-test.yaml +++ b/.github/workflows/gpu-integ-test.yaml @@ -43,7 +43,7 @@ jobs: - name: Download dummy model run: | huggingface-cli download hf-internal-testing/tiny-random-distilbert --local-dir /mnt/hf_cache/distilbert && \ - ls /mnt/hf_cache + ls /mnt/hf_cache/distilbert - name: Test model path run: | docker run \ From c5c4ed595c6fd260fe29ae8c9494f7eb4d913b59 Mon Sep 17 00:00:00 2001 From: Rafael Pierre Date: Fri, 23 Feb 2024 08:54:42 +0000 Subject: [PATCH 111/173] cache --- .github/workflows/gpu-integ-test.yaml | 2 ++ 1 file changed, 2 insertions(+) diff --git a/.github/workflows/gpu-integ-test.yaml b/.github/workflows/gpu-integ-test.yaml index 1ef089dc..4ea61370 100644 --- a/.github/workflows/gpu-integ-test.yaml +++ b/.github/workflows/gpu-integ-test.yaml @@ -21,6 +21,8 @@ jobs: env: AWS_REGION: us-east-1 steps: + - name: Look at cache + run: ls /mnt/hf_cache/hub - name: Set up Python 3.11 uses: actions/setup-python@v2 with: From d828f61239243c60a87a60d311c1b77dee4325e9 Mon Sep 17 00:00:00 2001 From: Rafael Pierre Date: Fri, 23 Feb 2024 09:25:43 +0000 Subject: [PATCH 112/173] env vars for cache --- .github/workflows/gpu-integ-test.yaml | 35 ++------------------------- tests/integ/config.py | 10 +++----- tests/integ/conftest.py | 9 ++++--- tests/integ/test_pytorch_local_gpu.py | 1 - 4 files changed, 12 insertions(+), 43 deletions(-) diff --git a/.github/workflows/gpu-integ-test.yaml b/.github/workflows/gpu-integ-test.yaml index 4ea61370..b9f51bca 100644 --- a/.github/workflows/gpu-integ-test.yaml +++ b/.github/workflows/gpu-integ-test.yaml @@ -20,19 +20,13 @@ jobs: runs-on: [single-gpu, nvidia-gpu, t4, ci] env: AWS_REGION: us-east-1 + HF_HOME: /mnt/hf_cache/ + HF_HUB_CACHE: /mnt/hf_cache/hub steps: - - name: Look at cache - run: ls /mnt/hf_cache/hub - name: Set up Python 3.11 uses: actions/setup-python@v2 with: python-version: 3.11 - - name: Install hub - run: pip install -U "huggingface_hub[cli]" - - name: Download dummy model - run: | - huggingface-cli download hf-internal-testing/tiny-random-distilbert --local-dir /mnt/hf_cache/distilbert && \ - ls /mnt/hf_cache - uses: actions/checkout@v4.1.1 - name: Docker Setup Buildx uses: docker/setup-buildx-action@v3.0.0 @@ -40,31 +34,6 @@ jobs: run: make inference-pytorch-gpu - name: List images run: docker images - - name: Install hub - run: pip install -U "huggingface_hub[cli]" - - name: Download dummy model - run: | - huggingface-cli download hf-internal-testing/tiny-random-distilbert --local-dir /mnt/hf_cache/distilbert && \ - ls /mnt/hf_cache/distilbert - - name: Test model path - run: | - docker run \ - --gpus all \ - -v /tmp/distilbert:/tmp/distilbert \ - --entrypoint /bin/sh \ - integration-test-pytorch:gpu \ - -c "ls /tmp/distilbert" - - name: Stop container - run: make stop-all - - name: Container dry run - run: | - docker run --gpus all \ - -v /tmp/distilbert:/tmp/distilbert \ - -e HF_MODEL_DIR=/tmp/distilbert \ - -e HF_TASK=text-classification \ - integration-test-pytorch:gpu - - name: Stop container - run: make stop-all - name: Set up Python 3.11 uses: actions/setup-python@v2 with: diff --git a/tests/integ/config.py b/tests/integ/config.py index eb161741..8d2227d8 100644 --- a/tests/integ/config.py +++ b/tests/integ/config.py @@ -65,11 +65,11 @@ "tensorflow": "hf-internal-testing/tiny-random-vit", }, "automatic-speech-recognition": { - "pytorch": "hf-internal-testing/tiny-random-wav2vec2", + "pytorch": "hf-internal-testing/tiny-random-Wav2Vec2Model", "tensorflow": None, }, "audio-classification": { - "pytorch": "hf-internal-testing/tiny-random-wavlm", + "pytorch": "hf-internal-testing/tiny-random-WavLMModel", "tensorflow": None, }, "object-detection": { @@ -77,11 +77,11 @@ "tensorflow": None, }, "image-segmentation": { - "pytorch": "hf-internal-testing/tiny-random-beit-pipeline", + "pytorch": "hf-internal-testing/tiny-random-BeitForSemanticSegmentation", "tensorflow": None, }, "table-question-answering": { - "pytorch": "philschmid/tapex-tiny", + "pytorch": "microsoft/tapex-large-finetuned-tabfact", "tensorflow": None, }, "zero-shot-image-classification": { @@ -91,8 +91,6 @@ "conversational": { "pytorch": "microsoft/DialoGPT-small", "tensorflow": "microsoft/DialoGPT-small", - #"pytorch": "hf-internal-testing/tiny-random-blenderbot", - #"tensorflow": "hf-internal-testing/tiny-random-blenderbot", }, "sentence-similarity": { "pytorch": "sentence-transformers/all-MiniLM-L6-v2", diff --git a/tests/integ/conftest.py b/tests/integ/conftest.py index 3ca2f33d..57e018ba 100644 --- a/tests/integ/conftest.py +++ b/tests/integ/conftest.py @@ -121,19 +121,22 @@ def local_container( ] if device == "gpu" else [] with tempfile.TemporaryDirectory() as tmpdirname: - # https://github.com/huggingface/infinity/blob/test-ovh/test/integ/utils.py + storage_dir = _load_repository_from_hf( repository_id = model, target_dir = tmpdirname, framework = framework ) + logging.info(f"Temp dir name: {tmpdirname}") yield client.containers.run( container_image, name=container_name, ports={"5000": port}, - environment={"HF_MODEL_DIR": "/opt/huggingface/model", "HF_TASK": task}, - volumes={tmpdirname: {"bind": "/opt/huggingface/model", "mode": "ro"}}, + environment={ + "HF_MODEL_DIR": storage_dir, + "HF_TASK": task + }, detach=True, # GPU device_requests=device_request, diff --git a/tests/integ/test_pytorch_local_gpu.py b/tests/integ/test_pytorch_local_gpu.py index 3c568bc1..b62d7ef2 100644 --- a/tests/integ/test_pytorch_local_gpu.py +++ b/tests/integ/test_pytorch_local_gpu.py @@ -13,7 +13,6 @@ ) import pytest - class TestPytorchLocal: @require_torch From 1e592b0846e1f7e7472833a976ed8c2aaf70ad8f Mon Sep 17 00:00:00 2001 From: Rafael Pierre Date: Fri, 23 Feb 2024 12:06:29 +0000 Subject: [PATCH 113/173] dry run --- .github/workflows/gpu-integ-test.yaml | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/.github/workflows/gpu-integ-test.yaml b/.github/workflows/gpu-integ-test.yaml index b9f51bca..bb5d8599 100644 --- a/.github/workflows/gpu-integ-test.yaml +++ b/.github/workflows/gpu-integ-test.yaml @@ -23,6 +23,12 @@ jobs: HF_HOME: /mnt/hf_cache/ HF_HUB_CACHE: /mnt/hf_cache/hub steps: + - name: Cache dry run + run: | + docker run \ + --entrypoint /bin/sh \ + busybox \ + -c "ls /mnt && ls /mnt/hf_cache" - name: Set up Python 3.11 uses: actions/setup-python@v2 with: From c51df3ae885db79f5d07efc39e9dede0c6b505f9 Mon Sep 17 00:00:00 2001 From: Rafael Pierre Date: Fri, 23 Feb 2024 12:08:32 +0000 Subject: [PATCH 114/173] add volume --- .github/workflows/gpu-integ-test.yaml | 1 + 1 file changed, 1 insertion(+) diff --git a/.github/workflows/gpu-integ-test.yaml b/.github/workflows/gpu-integ-test.yaml index bb5d8599..ce1dfb61 100644 --- a/.github/workflows/gpu-integ-test.yaml +++ b/.github/workflows/gpu-integ-test.yaml @@ -27,6 +27,7 @@ jobs: run: | docker run \ --entrypoint /bin/sh \ + -v /mnt/hf_cache:/mnt/hf_cache \ busybox \ -c "ls /mnt && ls /mnt/hf_cache" - name: Set up Python 3.11 From 68574776d0bb46c03b302409c2fe111bbc4ceb9a Mon Sep 17 00:00:00 2001 From: Rafael Pierre Date: Fri, 23 Feb 2024 12:19:50 +0000 Subject: [PATCH 115/173] path --- tests/integ/conftest.py | 58 +++++++++++++++++++++++------------------ 1 file changed, 32 insertions(+), 26 deletions(-) diff --git a/tests/integ/conftest.py b/tests/integ/conftest.py index 57e018ba..a35ac40e 100644 --- a/tests/integ/conftest.py +++ b/tests/integ/conftest.py @@ -120,32 +120,38 @@ def local_container( docker.types.DeviceRequest(count=-1, capabilities=[["gpu"]]) ] if device == "gpu" else [] - with tempfile.TemporaryDirectory() as tmpdirname: - - storage_dir = _load_repository_from_hf( - repository_id = model, - target_dir = tmpdirname, - framework = framework - ) - - logging.info(f"Temp dir name: {tmpdirname}") - yield client.containers.run( - container_image, - name=container_name, - ports={"5000": port}, - environment={ - "HF_MODEL_DIR": storage_dir, - "HF_TASK": task - }, - detach=True, - # GPU - device_requests=device_request, - ), port - - #Teardown - previous = client.containers.get(container_name) - previous.stop() - previous.remove() + object_id = model.replace("/", "--") + model_dir = f"/mnt/hf_cache/hub/{object_id}" + + storage_dir = _load_repository_from_hf( + repository_id = model, + target_dir = model_dir, + framework = framework + ) + + yield client.containers.run( + container_image, + name=container_name, + ports={"5000": port}, + environment={ + "HF_MODEL_DIR": storage_dir, + "HF_TASK": task + }, + volumes = { + model_dir: { + "bind": "/opt/huggingface/model", + "mode": "ro" + } + }, + detach=True, + # GPU + device_requests=device_request, + ), port + + #Teardown + previous = client.containers.get(container_name) + previous.stop() + previous.remove() except Exception as exception: logging.error(f"Error starting container: {str(exception)}") raise exception From 0aa64e0d39845fd07e71b3fdfceef59024f52969 Mon Sep 17 00:00:00 2001 From: Rafael Pierre Date: Fri, 23 Feb 2024 12:33:11 +0000 Subject: [PATCH 116/173] cache dry run --- .github/workflows/gpu-integ-test.yaml | 17 +++++++++++------ 1 file changed, 11 insertions(+), 6 deletions(-) diff --git a/.github/workflows/gpu-integ-test.yaml b/.github/workflows/gpu-integ-test.yaml index ce1dfb61..f294d5e3 100644 --- a/.github/workflows/gpu-integ-test.yaml +++ b/.github/workflows/gpu-integ-test.yaml @@ -23,17 +23,22 @@ jobs: HF_HOME: /mnt/hf_cache/ HF_HUB_CACHE: /mnt/hf_cache/hub steps: + - name: Set up Python 3.11 + uses: actions/setup-python@v2 + with: + python-version: 3.11 + - name: Download sample artifact + run: | + huggingface-cli download \ + distilbert/distilbert-base-uncased \ + --local-dir /mnt/hf_cache/hub/model--distilbert--distilbert-base-uncased - name: Cache dry run run: | docker run \ --entrypoint /bin/sh \ - -v /mnt/hf_cache:/mnt/hf_cache \ + -v /mnt/hf_cache/hub/model--distilbert--distilbert-base-uncased:/opt/huggingface/model \ busybox \ - -c "ls /mnt && ls /mnt/hf_cache" - - name: Set up Python 3.11 - uses: actions/setup-python@v2 - with: - python-version: 3.11 + -c "ls /opt/huggingface/model" - uses: actions/checkout@v4.1.1 - name: Docker Setup Buildx uses: docker/setup-buildx-action@v3.0.0 From cce368b5305610b6bf0d850d573d36da33bf5c55 Mon Sep 17 00:00:00 2001 From: Rafael Pierre Date: Fri, 23 Feb 2024 12:34:35 +0000 Subject: [PATCH 117/173] install cli --- .github/workflows/gpu-integ-test.yaml | 2 ++ 1 file changed, 2 insertions(+) diff --git a/.github/workflows/gpu-integ-test.yaml b/.github/workflows/gpu-integ-test.yaml index f294d5e3..05d0e44d 100644 --- a/.github/workflows/gpu-integ-test.yaml +++ b/.github/workflows/gpu-integ-test.yaml @@ -27,6 +27,8 @@ jobs: uses: actions/setup-python@v2 with: python-version: 3.11 + - name: Install Hub CLI + run: pip install huggingface-hub[cli] - name: Download sample artifact run: | huggingface-cli download \ From aa94250f8d365d8a37c39059de042de8cd5b4194 Mon Sep 17 00:00:00 2001 From: Rafael Pierre Date: Fri, 23 Feb 2024 12:40:41 +0000 Subject: [PATCH 118/173] model dir --- tests/integ/conftest.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/integ/conftest.py b/tests/integ/conftest.py index a35ac40e..36bc7113 100644 --- a/tests/integ/conftest.py +++ b/tests/integ/conftest.py @@ -134,7 +134,7 @@ def local_container( name=container_name, ports={"5000": port}, environment={ - "HF_MODEL_DIR": storage_dir, + "HF_MODEL_DIR": "/opt/huggingface/model", "HF_TASK": task }, volumes = { From 919ac710d780372394d01570f26ed7e6e468f60e Mon Sep 17 00:00:00 2001 From: Rafael Pierre Date: Sat, 24 Feb 2024 15:16:08 +0000 Subject: [PATCH 119/173] config --- tests/integ/config.py | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/tests/integ/config.py b/tests/integ/config.py index 8d2227d8..7a33ec92 100644 --- a/tests/integ/config.py +++ b/tests/integ/config.py @@ -77,11 +77,9 @@ "tensorflow": None, }, "image-segmentation": { - "pytorch": "hf-internal-testing/tiny-random-BeitForSemanticSegmentation", "tensorflow": None, }, "table-question-answering": { - "pytorch": "microsoft/tapex-large-finetuned-tabfact", "tensorflow": None, }, "zero-shot-image-classification": { @@ -108,6 +106,14 @@ "pytorch": "hf-internal-testing/tiny-stable-diffusion-torch", "tensorflow": None, }, + "table-question-answering": { + "pytorch": "philschmid/tapex-tiny", + "tensorflow": None, + }, + "image-segmentation": { + "pytorch": "hf-internal-testing/tiny-random-beit-pipeline", + "tensorflow": None, + }, } From 80bac498963f24477477f3f9c47354cfcb7bb5fd Mon Sep 17 00:00:00 2001 From: Rafael Pierre Date: Sat, 24 Feb 2024 15:37:04 +0000 Subject: [PATCH 120/173] -n 10 --- .github/workflows/build-container.yaml | 14 ++--- .github/workflows/gpu-integ-test.yaml | 18 +----- .github/workflows/integ-test.yaml | 85 +++++++++++++++----------- .github/workflows/quality.yaml | 8 +-- pyproject.toml | 5 +- tests/integ/test_pytorch_local_cpu.py | 12 ++-- tests/integ/test_pytorch_local_gpu.py | 40 ++++++------ tox.ini | 2 +- 8 files changed, 92 insertions(+), 92 deletions(-) diff --git a/.github/workflows/build-container.yaml b/.github/workflows/build-container.yaml index 24ffdab5..031207c0 100644 --- a/.github/workflows/build-container.yaml +++ b/.github/workflows/build-container.yaml @@ -1,13 +1,13 @@ name: "Build applications images" on: - #push: - # branches: - # - main - # paths: - # - "src/**" - # - "dockerfiles/**" - # - "scripts/**" + push: + branches: + - main + paths: + - "src/**" + - "dockerfiles/**" + - "scripts/**" workflow_dispatch: concurrency: diff --git a/.github/workflows/gpu-integ-test.yaml b/.github/workflows/gpu-integ-test.yaml index 05d0e44d..57869a0f 100644 --- a/.github/workflows/gpu-integ-test.yaml +++ b/.github/workflows/gpu-integ-test.yaml @@ -27,20 +27,6 @@ jobs: uses: actions/setup-python@v2 with: python-version: 3.11 - - name: Install Hub CLI - run: pip install huggingface-hub[cli] - - name: Download sample artifact - run: | - huggingface-cli download \ - distilbert/distilbert-base-uncased \ - --local-dir /mnt/hf_cache/hub/model--distilbert--distilbert-base-uncased - - name: Cache dry run - run: | - docker run \ - --entrypoint /bin/sh \ - -v /mnt/hf_cache/hub/model--distilbert--distilbert-base-uncased:/opt/huggingface/model \ - busybox \ - -c "ls /opt/huggingface/model" - uses: actions/checkout@v4.1.1 - name: Docker Setup Buildx uses: docker/setup-buildx-action@v3.0.0 @@ -55,7 +41,7 @@ jobs: - name: Install tox & uv run: pip install uv tox - name: Run local integration tests - run: tox -e torch-integration-local-gpu + run: tox -e torch-integration-local-gpu -- -n 10 pytorch-integration-remote: runs-on: [single-gpu, nvidia-gpu, t4, ci] env: @@ -75,4 +61,4 @@ jobs: - name: Install tox & uv run: pip install uv tox - name: Run remote integration tests - run: tox -e torch-integration-remote-gpu -- -n 4 \ No newline at end of file + run: tox -e torch-integration-remote-gpu -- -n 10 \ No newline at end of file diff --git a/.github/workflows/integ-test.yaml b/.github/workflows/integ-test.yaml index 97546f5b..4f6ebf16 100644 --- a/.github/workflows/integ-test.yaml +++ b/.github/workflows/integ-test.yaml @@ -1,51 +1,64 @@ -name: CPU - Run Integration Tests +name: Run CPU Integration Tests on: - #push: - # branches: - # - main - #pull_request: + push: + branches: + - main + pull_request: workflow_dispatch: +env: + ACTIONS_RUNNER_DEBUG: true + ACTIONS_STEP_DEBUG: true + concurrency: group: ${{ github.workflow }}-${{ github.head_ref || github.run_id }} cancel-in-progress: true - jobs: - pytorch-integration-test: - runs-on: ubuntu-latest + pytorch-integration-local: + runs-on: [single-gpu, nvidia-gpu, t4, ci] + env: + AWS_REGION: us-east-1 + HF_HOME: /mnt/hf_cache/ + HF_HUB_CACHE: /mnt/hf_cache/hub steps: - - uses: actions/checkout@v2 - - name: Set up Python 3.9 + - name: Set up Python 3.11 + uses: actions/setup-python@v2 + with: + python-version: 3.11 + - uses: actions/checkout@v4.1.1 + - name: Docker Setup Buildx + uses: docker/setup-buildx-action@v3.0.0 + - name: Docker Build + run: make inference-pytorch-cpu + - name: List images + run: docker images + - name: Set up Python 3.11 uses: actions/setup-python@v2 with: - python-version: 3.9 - - name: Install Python dependencies - run: pip install -e .[test,dev,torch] - - name: Build Docker - run: docker build -t starlette-transformers:cpu -f dockerfiles/pytorch/cpu/Dockerfile . - - name: Run Integration Tests - env: - AWS_ACCESS_KEY_ID: ${{ secrets.AWS_ACCESS_KEY_ID }} - AWS_SECRET_ACCESS_KEY: ${{ secrets.AWS_SECRET_ACCESS_KEY }} - AWS_DEFAULT_REGION: us-east-1 - run: make integ-test - tensorflow-integration-test: - runs-on: ubuntu-latest + python-version: 3.11 + - name: Install tox & uv + run: pip install uv tox + - name: Run local integration tests + run: tox -e torch-integration-local-cpu -- -n 10 + pytorch-integration-remote: + runs-on: [single-gpu, nvidia-gpu, t4, ci] + env: + AWS_REGION: us-east-1 steps: - - uses: actions/checkout@v2 - - name: Set up Python 3.9 + - uses: actions/checkout@v4.1.1 + - name: Docker Setup Buildx + uses: docker/setup-buildx-action@v3.0.0 + - name: Docker Build + run: make inference-pytorch-cpu + - name: List images + run: docker images + - name: Set up Python 3.11 uses: actions/setup-python@v2 with: - python-version: 3.9 - - name: Install Python dependencies - run: pip install -e .[test,dev,tensorflow] - - name: Build Docker - run: docker build -t starlette-transformers:cpu -f dockerfiles/tensorflow/cpu/Dockerfile . - - name: Run Integration Tests - env: - AWS_ACCESS_KEY_ID: ${{ secrets.AWS_ACCESS_KEY_ID }} - AWS_SECRET_ACCESS_KEY: ${{ secrets.AWS_SECRET_ACCESS_KEY }} - AWS_DEFAULT_REGION: us-east-1 - run: make integ-test \ No newline at end of file + python-version: 3.11 + - name: Install tox & uv + run: pip install uv tox + - name: Run remote integration tests + run: tox -e torch-integration-remote-cpu -- -n 10 \ No newline at end of file diff --git a/.github/workflows/quality.yaml b/.github/workflows/quality.yaml index b393d203..6c7e6c57 100644 --- a/.github/workflows/quality.yaml +++ b/.github/workflows/quality.yaml @@ -1,10 +1,10 @@ name: Quality Check on: - #push: - # branches: - # - main - #pull_request: + push: + branches: + - main + pull_request: workflow_dispatch: concurrency: diff --git a/pyproject.toml b/pyproject.toml index 14cf8939..56184a96 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -13,7 +13,6 @@ lint.select = [ "B", # flake8-bugbear ] lint.ignore = [ - "E501", # line too long, handled by black "B008", # do not perform function calls in argument defaults "C901", # too complex ] @@ -23,8 +22,8 @@ line-length = 119 # Allow unused variables when underscore-prefixed. lint.dummy-variable-rgx = "^(_+|(_+[a-zA-Z0-9_]*[a-zA-Z0-9]+?))$" -# Assume Python 3.9. -target-version = "py39" +# Assume Python 3.11. +target-version = "py311" lint.per-file-ignores = {"__init__.py" = ["F401"]} diff --git a/tests/integ/test_pytorch_local_cpu.py b/tests/integ/test_pytorch_local_cpu.py index 4339d197..17e651e9 100644 --- a/tests/integ/test_pytorch_local_cpu.py +++ b/tests/integ/test_pytorch_local_cpu.py @@ -13,7 +13,6 @@ ) import pytest - class TestPytorchLocal: @require_torch @@ -64,7 +63,7 @@ def test_pt_container_local_model( device ) -> None: - verify_task(task = task, port = local_container[1]) + verify_task(task = task, port = local_container[1]) @require_torch @@ -74,7 +73,7 @@ def test_pt_container_local_model( ) @pytest.mark.parametrize( "device", - ["gpu", "cpu"] + ["cpu"] ) @pytest.mark.parametrize( "framework", @@ -93,7 +92,10 @@ def test_pt_container_custom_handler( repository_id ) -> None: - verify_task(task = task, port = local_container[1]) + verify_task( + task = task, + port = local_container[1], + ) @require_torch @@ -103,7 +105,7 @@ def test_pt_container_custom_handler( ) @pytest.mark.parametrize( "device", - ["gpu", "cpu"] + ["cpu"] ) @pytest.mark.parametrize( "framework", diff --git a/tests/integ/test_pytorch_local_gpu.py b/tests/integ/test_pytorch_local_gpu.py index b62d7ef2..15ffebde 100644 --- a/tests/integ/test_pytorch_local_gpu.py +++ b/tests/integ/test_pytorch_local_gpu.py @@ -20,26 +20,26 @@ class TestPytorchLocal: "task", [ "text-classification", - #"zero-shot-classification", - #"ner", - #"question-answering", - #"fill-mask", - #"summarization", - #"translation_xx_to_yy", - #"text2text-generation", - #"text-generation", - #"feature-extraction", - #"image-classification", - #"automatic-speech-recognition", - #"audio-classification", - #"object-detection", - #"image-segmentation", - #"table-question-answering", - #"conversational", - #"sentence-similarity", - #"sentence-embeddings", - #"sentence-ranking", - #"text-to-image", + "zero-shot-classification", + "ner", + "question-answering", + "fill-mask", + "summarization", + "translation_xx_to_yy", + "text2text-generation", + "text-generation", + "feature-extraction", + "image-classification", + "automatic-speech-recognition", + "audio-classification", + "object-detection", + "image-segmentation", + "table-question-answering", + "conversational", + "sentence-similarity", + "sentence-embeddings", + "sentence-ranking", + "text-to-image", ], ) @pytest.mark.parametrize( diff --git a/tox.ini b/tox.ini index bfeb0e7a..0cc8b2eb 100644 --- a/tox.ini +++ b/tox.ini @@ -140,7 +140,7 @@ commands = pytest \ {tty:--color=yes} \ tests/integ/test_pytorch_local_gpu.py {posargs} \ - --log-cli-level=DEBUG \ + --log-cli-level=ERROR \ --log-format='%(asctime)s %(levelname)s %(module)s:%(lineno)d %(message)s' setenv = RUN_SLOW=True From bf8c42954806c5a25e22572f89010aea118029db Mon Sep 17 00:00:00 2001 From: Rafael Pierre Date: Sat, 24 Feb 2024 17:17:11 +0000 Subject: [PATCH 121/173] pass cpu --- dockerfiles/pytorch/cpu/environment.yaml | 12 +++--- src/huggingface_inference_toolkit/handler.py | 5 ++- .../sentence_transformers_utils.py | 5 +-- .../serialization/base.py | 18 ++++++--- src/huggingface_inference_toolkit/utils.py | 37 ++++++++++--------- .../webservice_robyn.py | 5 ++- .../webservice_starlette.py | 5 ++- tests/integ/conftest.py | 2 +- tests/integ/helpers.py | 2 +- tests/unit/test_utils.py | 1 + tox.ini | 6 +-- 11 files changed, 57 insertions(+), 41 deletions(-) diff --git a/dockerfiles/pytorch/cpu/environment.yaml b/dockerfiles/pytorch/cpu/environment.yaml index 4bd1b693..58c4bb80 100644 --- a/dockerfiles/pytorch/cpu/environment.yaml +++ b/dockerfiles/pytorch/cpu/environment.yaml @@ -2,12 +2,12 @@ name: base channels: - conda-forge dependencies: -- python=3.9.13 -- pytorch::pytorch=1.13.1=py3.9_cpu_0 +- python=3.11 +- pytorch::pytorch=2.2.0=py3.11_cpu_0 - pip: - - transformers[sklearn,sentencepiece,audio,vision]==4.31.0 + - transformers[sklearn,sentencepiece,audio,vision]==4.37.2 - sentence_transformers==2.2.2 - - torchvision==0.14.1 - - diffusers==0.20.0 - - accelerate==0.21.0 + - torchvision==0.17.1 + - diffusers==0.26.3 + - accelerate==0.27.2 - safetensors \ No newline at end of file diff --git a/src/huggingface_inference_toolkit/handler.py b/src/huggingface_inference_toolkit/handler.py index 7743577d..08368326 100644 --- a/src/huggingface_inference_toolkit/handler.py +++ b/src/huggingface_inference_toolkit/handler.py @@ -10,7 +10,8 @@ class HuggingFaceHandler: """ - A Default Hugging Face Inference Handler which works with all transformers pipelines, Sentence Transformers and Optimum. + A Default Hugging Face Inference Handler which works with all + transformers pipelines, Sentence Transformers and Optimum. """ def __init__(self, model_dir: Union[str, Path], task=None, framework="pt"): @@ -40,7 +41,7 @@ def __call__(self, data): def get_inference_handler_either_custom_or_default_handler( - model_dir: Path, + model_dir: Path, task: Optional[str] = None ): """ diff --git a/src/huggingface_inference_toolkit/sentence_transformers_utils.py b/src/huggingface_inference_toolkit/sentence_transformers_utils.py index f95f9e7a..951c8502 100644 --- a/src/huggingface_inference_toolkit/sentence_transformers_utils.py +++ b/src/huggingface_inference_toolkit/sentence_transformers_utils.py @@ -52,7 +52,7 @@ def get_sentence_transformers_pipeline( task=None, model_dir=None, device=-1, - **kwargs + **kwargs ): try: device = "cuda" if device == 0 else "cpu" @@ -61,5 +61,4 @@ def get_sentence_transformers_pipeline( except KeyError: framework = kwargs['framework'] message = f"Task {task} is not supported for framework {framework}" - logging.error(framework) - raise ValueError(message) + logging.error(message) diff --git a/src/huggingface_inference_toolkit/serialization/base.py b/src/huggingface_inference_toolkit/serialization/base.py index eb965b64..dc7d6839 100644 --- a/src/huggingface_inference_toolkit/serialization/base.py +++ b/src/huggingface_inference_toolkit/serialization/base.py @@ -42,15 +42,21 @@ def get_deserializer(content_type): if content_type in content_type_mapping: return content_type_mapping[content_type] else: - raise Exception( - f'Content type "{content_type}" not supported. Supported content types are: {", ".join(list(content_type_mapping.keys()))}' - ) + message = f""" + Content type "{content_type}" not supported. + Supported content types are: + {", ".join(list(content_type_mapping.keys()))} + """ + raise Exception(message) @staticmethod def get_serializer(accept): if accept in content_type_mapping: return content_type_mapping[accept] else: - raise Exception( - f'Accept type "{accept}" not supported. Supported accept types are: {", ".join(list(content_type_mapping.keys()))}' - ) + message = f""" + Accept type "{accept}" not supported. + Supported accept types are: + {", ".join(list(content_type_mapping.keys()))} + """ + raise Exception(message) diff --git a/src/huggingface_inference_toolkit/utils.py b/src/huggingface_inference_toolkit/utils.py index 7499a097..b64760d6 100644 --- a/src/huggingface_inference_toolkit/utils.py +++ b/src/huggingface_inference_toolkit/utils.py @@ -3,12 +3,11 @@ import sys from pathlib import Path from typing import Optional, Union -import re -from huggingface_hub import HfApi, login, snapshot_download +from huggingface_hub import login, snapshot_download from transformers import WhisperForConditionalGeneration, pipeline from transformers.file_utils import is_tf_available, is_torch_available -from transformers.pipelines import Conversation, Pipeline +from transformers.pipelines import Pipeline from huggingface_inference_toolkit.const import HF_DEFAULT_PIPELINE_NAME, HF_MODULE_NAME from huggingface_inference_toolkit.diffusers_utils import ( @@ -20,8 +19,10 @@ is_sentence_transformers_available, ) -logger = logging.getLogger(__name__) -#logging.basicConfig(format="%(asctime)s | %(levelname)s | %(message)s", level=logging.INFO) +logging.basicConfig( + format="%(asctime)s | %(levelname)s | %(message)s", + level=logging.INFO +) if is_tf_available(): import tensorflow as tf @@ -81,7 +82,7 @@ def wrapped_pipeline(inputs, *args, **kwargs): prediction = pipeline(inputs, *args, **kwargs) logging.info(f"Prediction: {prediction}") return list(prediction) - + return wrapped_pipeline @@ -93,7 +94,7 @@ def _is_gpu_available(): if is_tf_available(): return True if len(tf.config.list_physical_devices("GPU")) > 0 else False elif is_torch_available(): - logger.info(f"CUDA: {torch.cuda.is_available()}") + logging.info(f"CUDA: {torch.cuda.is_available()}") return torch.cuda.is_available() else: raise RuntimeError( @@ -136,7 +137,7 @@ def _load_repository_from_hf( if framework is None: framework = _get_framework() - + logging.info(f"Framework: {framework}") if isinstance(target_dir, str): @@ -150,7 +151,7 @@ def _load_repository_from_hf( ignore_regex = create_artifact_filter(framework) logging.info(f"ignore_regex: {ignore_regex}") logging.info(f"Framework after filtering: {framework}") - logger.info(f"Ignore regex pattern for files, which are not downloaded: { ', '.join(ignore_regex) }") + logging.info(f"Ignore regex pattern for files, which are not downloaded: { ', '.join(ignore_regex) }") # Download the repository to the workdir and filter out non-framework specific weights snapshot_download( @@ -172,7 +173,7 @@ def check_and_register_custom_pipeline_from_directory(model_dir): custom_module = Path(model_dir).joinpath(HF_DEFAULT_PIPELINE_NAME) legacy_module = Path(model_dir).joinpath("pipeline.py") if custom_module.is_file(): - logger.info(f"Found custom pipeline at {custom_module}") + logging.info(f"Found custom pipeline at {custom_module}") spec = importlib.util.spec_from_file_location(HF_MODULE_NAME, custom_module) if spec: # add the whole directory to path for submodlues @@ -185,8 +186,10 @@ def check_and_register_custom_pipeline_from_directory(model_dir): custom_pipeline = handler.EndpointHandler(model_dir) elif legacy_module.is_file(): - logger.warning( - "You are using a legacy custom pipeline. Please update to the new format. See documentation for more information." + logging.warning( + """You are using a legacy custom pipeline. + Please update to the new format. + See documentation for more information.""" ) spec = importlib.util.spec_from_file_location("pipeline.PreTrainedPipeline", legacy_module) if spec: @@ -199,7 +202,7 @@ def check_and_register_custom_pipeline_from_directory(model_dir): # init custom handler with model_dir custom_pipeline = pipeline.PreTrainedPipeline(model_dir) else: - logger.info(f"No custom pipeline found at {custom_module}") + logging.info(f"No custom pipeline found at {custom_module}") custom_pipeline = None return custom_pipeline @@ -209,7 +212,7 @@ def get_device(): The get device function will return the device for the DL Framework. """ gpu = _is_gpu_available() - logger.info(f"GPU Available: {gpu}") + logging.info(f"GPU Available: {gpu}") if gpu: return 0 @@ -227,7 +230,7 @@ def get_pipeline( create pipeline class for a specific task based on local saved model """ device = get_device() - logger.info(f"Using device { 'GPU' if device == 0 else 'CPU'}") + logging.info(f"Using device { 'GPU' if device == 0 else 'CPU'}") if task is None: raise EnvironmentError( @@ -255,7 +258,7 @@ def get_pipeline( kwargs["tokenizer"] = model_dir if is_optimum_available(): - logger.info("Optimum is not implement yet using default pipeline.") + logging.info("Optimum is not implemented yet using default pipeline.") hf_pipeline = pipeline(task=task, model=model_dir, device=device, **kwargs) elif is_sentence_transformers_available() and task in [ "sentence-similarity", @@ -287,7 +290,7 @@ def get_pipeline( **kwargs ) - # wrapp specific pipeline to support better ux + # wrap specific pipeline to support better ux if task == "conversational": hf_pipeline = wrap_conversation_pipeline(hf_pipeline) elif task == "automatic-speech-recognition" and isinstance( diff --git a/src/huggingface_inference_toolkit/webservice_robyn.py b/src/huggingface_inference_toolkit/webservice_robyn.py index a1c437af..5aeaf605 100644 --- a/src/huggingface_inference_toolkit/webservice_robyn.py +++ b/src/huggingface_inference_toolkit/webservice_robyn.py @@ -21,7 +21,10 @@ # if empty_directory_or_not_hf_remote_id is None or task is None: # raise ValueError( -# f"Can't initialize model. Please set correct model id and task. provided values are model_id:{model_id_or_path} and task:{task}" +# f"""Can't initialize model. +# Please set correct model id and task. +# Provided values are model_id: +# {model_id_or_path} and task:{task}""" # ) # logger.info(f"Initializing model with model_id:{model_id_or_path} and task:{task}") diff --git a/src/huggingface_inference_toolkit/webservice_starlette.py b/src/huggingface_inference_toolkit/webservice_starlette.py index 64935925..8bc68b2e 100644 --- a/src/huggingface_inference_toolkit/webservice_starlette.py +++ b/src/huggingface_inference_toolkit/webservice_starlette.py @@ -49,7 +49,10 @@ async def some_startup_task(): ) else: raise ValueError( - f"Can't initialize model. Please set env HF_MODEL_DIR or provider a HF_MODEL_ID. Provided values are HF_MODEL_DIR:{HF_MODEL_DIR} and HF_MODEL_ID:{HF_MODEL_ID}" + f"""Can't initialize model. + Please set env HF_MODEL_DIR or provider a HF_MODEL_ID. + Provided values are: + HF_MODEL_DIR: {HF_MODEL_DIR} and HF_MODEL_ID:{HF_MODEL_ID}""" ) logger.info(f"Initializing model from directory:{HF_MODEL_DIR}") diff --git a/tests/integ/conftest.py b/tests/integ/conftest.py index 36bc7113..a0b3201a 100644 --- a/tests/integ/conftest.py +++ b/tests/integ/conftest.py @@ -35,7 +35,7 @@ def remote_container( client = docker.from_env() container_name = f"integration-test-{framework}-{task}-{device}" container_image = f"integration-test-{framework}:{device}" - port = random.randint(5000, 7000) + port = random.randint(5000, 9000) model = task2model[task][framework] #check if port is already open diff --git a/tests/integ/helpers.py b/tests/integ/helpers.py index f1f22f1f..0dae2598 100644 --- a/tests/integ/helpers.py +++ b/tests/integ/helpers.py @@ -110,7 +110,7 @@ def verify_task( if task == "conversational": for message in prediction: - assert "error" not in message["content"].lower() + assert "error" not in message.keys() else: assert task2validation[task]( result=prediction, diff --git a/tests/unit/test_utils.py b/tests/unit/test_utils.py index 6e37814d..856824a1 100644 --- a/tests/unit/test_utils.py +++ b/tests/unit/test_utils.py @@ -204,6 +204,7 @@ def test_wrapped_pipeline(): } ] res = conv_pipe(data) + logging.info(f"Response: {res}") assert res[-1]["role"] == "assistant" assert "error" not in res[-1]["content"] diff --git a/tox.ini b/tox.ini index 0cc8b2eb..f75a203b 100644 --- a/tox.ini +++ b/tox.ini @@ -67,8 +67,8 @@ allowlist_externals = commands = pytest -s -v \ {tty:--color=yes} \ - tests/unit/ {posargs} \ - --log-cli-level=ERROR \ + tests/unit/{posargs} \ + --log-cli-level=DEBUG \ --log-format='%(asctime)s %(levelname)s %(module)s:%(lineno)d %(message)s' setenv = RUN_SLOW=True @@ -118,7 +118,7 @@ commands = pytest \ {tty:--color=yes} \ tests/integ/test_pytorch_remote_cpu.py {posargs} \ - --log-cli-level=ERROR \ + --log-cli-level=INFO \ --log-format='%(asctime)s %(levelname)s %(module)s:%(lineno)d %(message)s' [testenv:torch-integration-local-cpu] From c46e85becfb2ef36c931c5f97075a8c383b4e06f Mon Sep 17 00:00:00 2001 From: Rafael Pierre Date: Sat, 24 Feb 2024 17:51:36 +0000 Subject: [PATCH 122/173] dry run local cpu --- .github/workflows/integ-test.yaml | 5 +++++ tests/integ/conftest.py | 2 +- 2 files changed, 6 insertions(+), 1 deletion(-) diff --git a/.github/workflows/integ-test.yaml b/.github/workflows/integ-test.yaml index 4f6ebf16..7eb18c3f 100644 --- a/.github/workflows/integ-test.yaml +++ b/.github/workflows/integ-test.yaml @@ -34,6 +34,11 @@ jobs: run: make inference-pytorch-cpu - name: List images run: docker images + - name: Dry run + run: docker run \ + --entrypoint /bin/sh \ + integration-test-pytorch:cpu \ + -c "echo Hello world!" - name: Set up Python 3.11 uses: actions/setup-python@v2 with: diff --git a/tests/integ/conftest.py b/tests/integ/conftest.py index a0b3201a..6899820b 100644 --- a/tests/integ/conftest.py +++ b/tests/integ/conftest.py @@ -105,7 +105,7 @@ def local_container( container_name = f"integration-test-{framework}-{id}-{device}" container_image = f"integration-test-{framework}:{device}" - port = random.randint(5000, 7000) + port = random.randint(5000, 9000) #check if port is already open sock = socket.socket(socket.AF_INET, socket.SOCK_STREAM) From b26522ad9eae130d104878a348ccb2147133ce11 Mon Sep 17 00:00:00 2001 From: Rafael Pierre Date: Sat, 24 Feb 2024 17:59:29 +0000 Subject: [PATCH 123/173] format --- .github/workflows/integ-test.yaml | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/.github/workflows/integ-test.yaml b/.github/workflows/integ-test.yaml index 7eb18c3f..68af49c0 100644 --- a/.github/workflows/integ-test.yaml +++ b/.github/workflows/integ-test.yaml @@ -35,10 +35,11 @@ jobs: - name: List images run: docker images - name: Dry run - run: docker run \ - --entrypoint /bin/sh \ - integration-test-pytorch:cpu \ - -c "echo Hello world!" + run: | + docker run \ + --entrypoint /bin/sh \ + integration-test-pytorch:cpu \ + -c "echo Hello world!" - name: Set up Python 3.11 uses: actions/setup-python@v2 with: From a707458afdab8b302a577bc4a00044431135a06e Mon Sep 17 00:00:00 2001 From: Rafael Pierre Date: Mon, 26 Feb 2024 08:56:22 +0000 Subject: [PATCH 124/173] review --- dockerfiles/pytorch/gpu/Dockerfile | 12 +++++++----- 1 file changed, 7 insertions(+), 5 deletions(-) diff --git a/dockerfiles/pytorch/gpu/Dockerfile b/dockerfiles/pytorch/gpu/Dockerfile index f87ceed3..9bbc8748 100644 --- a/dockerfiles/pytorch/gpu/Dockerfile +++ b/dockerfiles/pytorch/gpu/Dockerfile @@ -59,16 +59,18 @@ RUN apt-get update -y && apt-get upgrade -y && \ python3-pip \ python3.10-venv \ curl \ - ffmpeg - -# install dependencies -COPY --from=builder /app . - + ffmpeg \ + && apt-get clean autoremove --yes \ + && rm -rf /var/lib/{apt,dpkg,cache,log} + RUN curl -LsSf https://astral.sh/uv/install.sh | sh && \ source $HOME/.cargo/env && \ source .venv/bin/activate && \ ls -all +# install dependencies +COPY --from=builder /app . + # copy application COPY src/huggingface_inference_toolkit huggingface_inference_toolkit COPY src/huggingface_inference_toolkit/webservice_starlette.py webservice_starlette.py From 76739034864cc85f753952b2a97d0bb274cc852e Mon Sep 17 00:00:00 2001 From: Rafael Pierre Date: Mon, 26 Feb 2024 08:57:07 +0000 Subject: [PATCH 125/173] .vscode --- .gitignore | 1 + 1 file changed, 1 insertion(+) diff --git a/.gitignore b/.gitignore index 1cee519e..1de238e7 100644 --- a/.gitignore +++ b/.gitignore @@ -35,6 +35,7 @@ Vagrantfile __pycache__/ *.py[cod] *$py.class +.vscode # C extensions *.so From fac74d581f6983f1ecfcbf2ff7595a82f399b841 Mon Sep 17 00:00:00 2001 From: Rafael Pierre Date: Mon, 26 Feb 2024 09:03:51 +0000 Subject: [PATCH 126/173] venv --- dockerfiles/pytorch/gpu/Dockerfile | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/dockerfiles/pytorch/gpu/Dockerfile b/dockerfiles/pytorch/gpu/Dockerfile index 9bbc8748..e3290b4a 100644 --- a/dockerfiles/pytorch/gpu/Dockerfile +++ b/dockerfiles/pytorch/gpu/Dockerfile @@ -62,11 +62,9 @@ RUN apt-get update -y && apt-get upgrade -y && \ ffmpeg \ && apt-get clean autoremove --yes \ && rm -rf /var/lib/{apt,dpkg,cache,log} - + RUN curl -LsSf https://astral.sh/uv/install.sh | sh && \ - source $HOME/.cargo/env && \ - source .venv/bin/activate && \ - ls -all + source $HOME/.cargo/env # install dependencies COPY --from=builder /app . From 455c38ee08daf1bbb6991ab4b96959869c894836 Mon Sep 17 00:00:00 2001 From: Rafael Pierre Date: Mon, 26 Feb 2024 09:20:57 +0000 Subject: [PATCH 127/173] -n 4 --- .github/workflows/gpu-integ-test.yaml | 4 ++-- README.md | 8 ++++---- 2 files changed, 6 insertions(+), 6 deletions(-) diff --git a/.github/workflows/gpu-integ-test.yaml b/.github/workflows/gpu-integ-test.yaml index 57869a0f..925f3a91 100644 --- a/.github/workflows/gpu-integ-test.yaml +++ b/.github/workflows/gpu-integ-test.yaml @@ -41,7 +41,7 @@ jobs: - name: Install tox & uv run: pip install uv tox - name: Run local integration tests - run: tox -e torch-integration-local-gpu -- -n 10 + run: tox -e torch-integration-local-gpu -- -n 4 pytorch-integration-remote: runs-on: [single-gpu, nvidia-gpu, t4, ci] env: @@ -61,4 +61,4 @@ jobs: - name: Install tox & uv run: pip install uv tox - name: Run remote integration tests - run: tox -e torch-integration-remote-gpu -- -n 10 \ No newline at end of file + run: tox -e torch-integration-remote-gpu -- -n 4 \ No newline at end of file diff --git a/README.md b/README.md index fb469b1a..5e48fff8 100644 --- a/README.md +++ b/README.md @@ -24,14 +24,14 @@ HF_MODEL_ID=hf-internal-testing/tiny-random-distilbert HF_MODEL_DIR=tmp2 HF_TASK _cpu images_ ```bash -docker build -t starlette-transformers:cpu -f dockerfiles/pytorch/cpu/Dockerfile . -docker build -t starlette-transformers:cpu -f dockerfiles/tensorflow/cpu/Dockerfile . +make inference-pytorch-cpu +make inference-tensorflow-cpu ``` _gpu images_ ```bash -docker build -t starlette-transformers:gpu -f dockerfiles/pytorch/gpu/Dockerfile . -docker build -t starlette-transformers:gpu -f dockerfiles/tensorflow/gpu/Dockerfile . +make inference-pytorch-gpu +make inference-tensorflow-gpu ``` 2. Run the container and provide either environment variables to the HUB model you want to use or mount a volume to the container, where your model is stored. From 027a781c55ef85d871aa275f55e8498b0e98be4c Mon Sep 17 00:00:00 2001 From: Rafael Pierre Date: Mon, 26 Feb 2024 09:23:47 +0000 Subject: [PATCH 128/173] readme.md --- README.md | 12 +++++------- 1 file changed, 5 insertions(+), 7 deletions(-) diff --git a/README.md b/README.md index 5e48fff8..f3f812fd 100644 --- a/README.md +++ b/README.md @@ -25,24 +25,22 @@ HF_MODEL_ID=hf-internal-testing/tiny-random-distilbert HF_MODEL_DIR=tmp2 HF_TASK _cpu images_ ```bash make inference-pytorch-cpu -make inference-tensorflow-cpu ``` _gpu images_ ```bash make inference-pytorch-gpu -make inference-tensorflow-gpu ``` 2. Run the container and provide either environment variables to the HUB model you want to use or mount a volume to the container, where your model is stored. ```bash -docker run -ti -p 5000:5000 -e HF_MODEL_ID=distilbert-base-uncased-distilled-squad -e HF_TASK=question-answering starlette-transformers:cpu -docker run -ti -p 5000:5000 --gpus all -e HF_MODEL_ID=nlpconnect/vit-gpt2-image-captioning -e HF_TASK=image-to-text starlette-transformers:gpu -docker run -ti -p 5000:5000 --gpus all -e HF_MODEL_ID=echarlaix/tiny-random-stable-diffusion-xl -e HF_TASK=text-to-image starlette-transformers:gpu -docker run -ti -p 5000:5000 --gpus all -e HF_MODEL_ID=stabilityai/stable-diffusion-xl-base-1.0 -e HF_TASK=text-to-image starlette-transformers:gpu -docker run -ti -p 5000:5000 -e HF_MODEL_DIR=/repository -v $(pwd)/distilbert-base-uncased-emotion:/repository starlette-transformers:cpu +docker run -ti -p 5000:5000 -e HF_MODEL_ID=distilbert-base-uncased-distilled-squad -e HF_TASK=question-answering integration-test-pytorch:cpu +docker run -ti -p 5000:5000 --gpus all -e HF_MODEL_ID=nlpconnect/vit-gpt2-image-captioning -e HF_TASK=image-to-text integration-test-pytorch:gpu +docker run -ti -p 5000:5000 --gpus all -e HF_MODEL_ID=echarlaix/tiny-random-stable-diffusion-xl -e HF_TASK=text-to-image integration-test-pytorch:gpu +docker run -ti -p 5000:5000 --gpus all -e HF_MODEL_ID=stabilityai/stable-diffusion-xl-base-1.0 -e HF_TASK=text-to-image integration-test-pytorch:gpu +docker run -ti -p 5000:5000 -e HF_MODEL_DIR=/repository -v $(pwd)/distilbert-base-uncased-emotion:/repository integration-test-pytorch:cpu ``` From b3c9905c3377acb559f8d144dff81ac67ccc9b1f Mon Sep 17 00:00:00 2001 From: Rafael Pierre Date: Mon, 26 Feb 2024 10:01:53 +0000 Subject: [PATCH 129/173] contributing --- README.md | 53 ++++++++++++++++++++++++++++++++++++++++++++++++++++- 1 file changed, 52 insertions(+), 1 deletion(-) diff --git a/README.md b/README.md index f3f812fd..9bfe7db1 100644 --- a/README.md +++ b/README.md @@ -182,7 +182,58 @@ Below you ll find a list of supported and tested transformers and sentence trans --- ## 🤝 Contributing -TBD. +### Development + +* Recommended Python version: 3.11 +* We recommend `pyenv` for easily switching between different Python versions +* `hf-inference-toolkit` relies on `tox` for unit and integration testing + +#### Unit Testing + +* Install `tox` +* From a Linux terminal, run: +```bash +tox -e unit-torch +# Or +tox -e unit-tensorflow +``` +* You can increase the degree of test parallelism by passing `-n`: +```bash +tox -e unit-torch -- -n 4 +``` + +#### Integration Testing + +* There are two types of integration tests: **local** and **remote** +* **Local** tests simulate a scenario where users bring their own model which was previously downloaded and stored externally +* **Remote** tests simulate a scenario where models are download on the fly, as part of container startup + +##### Local Integration Testing + +* Build the relevant docker image +* To run local integration tests, before running `tox`, we need to create a mount point which will store model artifacts. Example: + +```bash +sudo mount --bind /home/ubuntu/.cache/huggingface/ /mnt/hf_cache/ +``` + +* Make sure that permissions are sufficient for the mount point you created +* Then, run: +```bash +tox -e torch-integration-local-gpu +# Or +tox -e torch-integration-local-cpu +``` + +##### Remote Integration Testing + +* Build the relevant docker image +* From a Linux terminal, run: +```bash +tox -e torch-integration-remote-gpu +#Or +tox -e torch-integration-remote-cpu +``` --- ## 📜 License From d9455efff85ef5fa888169b85e583031b47e0087 Mon Sep 17 00:00:00 2001 From: Rafael Pierre Date: Mon, 26 Feb 2024 10:04:14 +0000 Subject: [PATCH 130/173] paths ignore --- .github/workflows/gpu-integ-test.yaml | 2 ++ .github/workflows/integ-test.yaml | 2 ++ .github/workflows/quality.yaml | 2 ++ .github/workflows/unit-test.yaml | 2 ++ 4 files changed, 8 insertions(+) diff --git a/.github/workflows/gpu-integ-test.yaml b/.github/workflows/gpu-integ-test.yaml index 925f3a91..aeee668f 100644 --- a/.github/workflows/gpu-integ-test.yaml +++ b/.github/workflows/gpu-integ-test.yaml @@ -2,6 +2,8 @@ name: Run GPU Integration Tests on: push: + paths-ignore: + - 'README.md' branches: - main pull_request: diff --git a/.github/workflows/integ-test.yaml b/.github/workflows/integ-test.yaml index 68af49c0..b766bb87 100644 --- a/.github/workflows/integ-test.yaml +++ b/.github/workflows/integ-test.yaml @@ -2,6 +2,8 @@ name: Run CPU Integration Tests on: push: + paths-ignore: + - 'README.md' branches: - main pull_request: diff --git a/.github/workflows/quality.yaml b/.github/workflows/quality.yaml index 6c7e6c57..842c79e7 100644 --- a/.github/workflows/quality.yaml +++ b/.github/workflows/quality.yaml @@ -2,6 +2,8 @@ name: Quality Check on: push: + paths-ignore: + - 'README.md' branches: - main pull_request: diff --git a/.github/workflows/unit-test.yaml b/.github/workflows/unit-test.yaml index b3f2a536..f8adeabb 100644 --- a/.github/workflows/unit-test.yaml +++ b/.github/workflows/unit-test.yaml @@ -2,6 +2,8 @@ name: Run Unit-Tests on: push: + paths-ignore: + - 'README.md' branches: - main pull_request: From 68268c1daabbb486d17f4d2600180723d9ef3f04 Mon Sep 17 00:00:00 2001 From: Rafael Pierre Date: Mon, 26 Feb 2024 14:24:36 +0000 Subject: [PATCH 131/173] py version --- dockerfiles/pytorch/gpu/Dockerfile | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/dockerfiles/pytorch/gpu/Dockerfile b/dockerfiles/pytorch/gpu/Dockerfile index e3290b4a..4cdc52db 100644 --- a/dockerfiles/pytorch/gpu/Dockerfile +++ b/dockerfiles/pytorch/gpu/Dockerfile @@ -55,9 +55,9 @@ ENV TORCH_USE_CUDA_DSA=1 RUN apt-get update -y && apt-get upgrade -y && \ apt-get install -y \ - python3 \ + python3.11 \ python3-pip \ - python3.10-venv \ + python3.11-venv \ curl \ ffmpeg \ && apt-get clean autoremove --yes \ From 0149d03996335890224f954c2619e3527a94e373 Mon Sep 17 00:00:00 2001 From: Rafael Pierre Date: Mon, 26 Feb 2024 14:52:18 +0000 Subject: [PATCH 132/173] comments --- .github/workflows/gpu-integ-test.yaml | 8 ---- .github/workflows/integ-test.yaml | 14 ------ makefile | 2 +- tests/integ/config.py | 10 +---- tests/integ/test_tensorflow_local.py | 61 --------------------------- tests/integ/test_tensorflow_remote.py | 52 ----------------------- 6 files changed, 3 insertions(+), 144 deletions(-) delete mode 100644 tests/integ/test_tensorflow_local.py delete mode 100644 tests/integ/test_tensorflow_remote.py diff --git a/.github/workflows/gpu-integ-test.yaml b/.github/workflows/gpu-integ-test.yaml index aeee668f..1a935e88 100644 --- a/.github/workflows/gpu-integ-test.yaml +++ b/.github/workflows/gpu-integ-test.yaml @@ -25,17 +25,11 @@ jobs: HF_HOME: /mnt/hf_cache/ HF_HUB_CACHE: /mnt/hf_cache/hub steps: - - name: Set up Python 3.11 - uses: actions/setup-python@v2 - with: - python-version: 3.11 - uses: actions/checkout@v4.1.1 - name: Docker Setup Buildx uses: docker/setup-buildx-action@v3.0.0 - name: Docker Build run: make inference-pytorch-gpu - - name: List images - run: docker images - name: Set up Python 3.11 uses: actions/setup-python@v2 with: @@ -54,8 +48,6 @@ jobs: uses: docker/setup-buildx-action@v3.0.0 - name: Docker Build run: make inference-pytorch-gpu - - name: List images - run: docker images - name: Set up Python 3.11 uses: actions/setup-python@v2 with: diff --git a/.github/workflows/integ-test.yaml b/.github/workflows/integ-test.yaml index b766bb87..17b5d87b 100644 --- a/.github/workflows/integ-test.yaml +++ b/.github/workflows/integ-test.yaml @@ -25,23 +25,11 @@ jobs: HF_HOME: /mnt/hf_cache/ HF_HUB_CACHE: /mnt/hf_cache/hub steps: - - name: Set up Python 3.11 - uses: actions/setup-python@v2 - with: - python-version: 3.11 - uses: actions/checkout@v4.1.1 - name: Docker Setup Buildx uses: docker/setup-buildx-action@v3.0.0 - name: Docker Build run: make inference-pytorch-cpu - - name: List images - run: docker images - - name: Dry run - run: | - docker run \ - --entrypoint /bin/sh \ - integration-test-pytorch:cpu \ - -c "echo Hello world!" - name: Set up Python 3.11 uses: actions/setup-python@v2 with: @@ -60,8 +48,6 @@ jobs: uses: docker/setup-buildx-action@v3.0.0 - name: Docker Build run: make inference-pytorch-cpu - - name: List images - run: docker images - name: Set up Python 3.11 uses: actions/setup-python@v2 with: diff --git a/makefile b/makefile index 09da51ce..a3007b25 100644 --- a/makefile +++ b/makefile @@ -21,7 +21,7 @@ style: ruff $(check_dirs) --fix inference-pytorch-gpu: - docker build -f dockerfiles/pytorch/gpu/Dockerfile -t integration-test-pytorch:gpu . + docker build --no-cache -f dockerfiles/pytorch/gpu/Dockerfile -t integration-test-pytorch:gpu . inference-pytorch-cpu: docker build -f dockerfiles/pytorch/cpu/Dockerfile -t integration-test-pytorch:cpu . diff --git a/tests/integ/config.py b/tests/integ/config.py index 7a33ec92..aca2ebf8 100644 --- a/tests/integ/config.py +++ b/tests/integ/config.py @@ -76,19 +76,13 @@ "pytorch": "hustvl/yolos-tiny", "tensorflow": None, }, - "image-segmentation": { - "tensorflow": None, - }, - "table-question-answering": { - "tensorflow": None, - }, "zero-shot-image-classification": { "pytorch": "hf-internal-testing/tiny-random-clip-zero-shot-image-classification", "tensorflow": "hf-internal-testing/tiny-random-clip-zero-shot-image-classification", }, "conversational": { - "pytorch": "microsoft/DialoGPT-small", - "tensorflow": "microsoft/DialoGPT-small", + "pytorch": "hf-internal-testing/tiny-random-blenderbot-small", + "tensorflow": None, }, "sentence-similarity": { "pytorch": "sentence-transformers/all-MiniLM-L6-v2", diff --git a/tests/integ/test_tensorflow_local.py b/tests/integ/test_tensorflow_local.py deleted file mode 100644 index 45d37526..00000000 --- a/tests/integ/test_tensorflow_local.py +++ /dev/null @@ -1,61 +0,0 @@ -import tempfile -from tests.integ.helpers import verify_task -from tests.integ.config import ( - task2input, - task2model, - task2output, - task2validation -) -from transformers.testing_utils import ( - require_tf, - slow, - _run_slow_tests -) -import pytest - - -class TestTensorflowLocal: - - @pytest.mark.parametrize( - "task", - [ - "text-classification", - "zero-shot-classification", - "ner", - "question-answering", - "fill-mask", - "summarization", - "translation_xx_to_yy", - "text2text-generation", - "text-generation", - "feature-extraction", - "image-classification", - "conversational", - ], - ) - @pytest.mark.parametrize( - "device", - ["gpu", "cpu"] - ) - @pytest.mark.parametrize( - "framework", - ["tensorflow"] - ) - @pytest.mark.parametrize( - "repository_id", - [""] - ) - @pytest.mark.usefixtures('local_container') - def test_tf_container_local_model( - self, - local_container, - task, - framework, - device - ) -> None: - - verify_task( - task = task, - port = local_container[1], - framework = framework - ) diff --git a/tests/integ/test_tensorflow_remote.py b/tests/integ/test_tensorflow_remote.py deleted file mode 100644 index 3ee660b6..00000000 --- a/tests/integ/test_tensorflow_remote.py +++ /dev/null @@ -1,52 +0,0 @@ -import tempfile -from tests.integ.helpers import verify_task -from tests.integ.config import ( - task2input, - task2model, - task2output, - task2validation -) -from transformers.testing_utils import ( - require_torch, - slow, - _run_slow_tests -) -import pytest -import tenacity -import docker - -class TestTensorflowRemote: - - @pytest.mark.parametrize( - "device", - ["gpu"] - ) - @pytest.mark.parametrize( - "task", - [ - "text-classification", - "zero-shot-classification", - "ner", - "question-answering", - "fill-mask", - "summarization", - "translation_xx_to_yy", - "text2text-generation", - "text-generation", - "feature-extraction", - "image-classification", - "conversational", - ] - ) - @pytest.mark.parametrize( - "framework", - ["tensorflow"] - ) - @pytest.mark.usefixtures('remote_container') - def test_inference_remote(self, remote_container, task, framework, device): - - verify_task( - task = task, - port = remote_container[1], - framework = framework - ) From 557bd1b48a8f895b20191d387275ddaf4dbd3aa6 Mon Sep 17 00:00:00 2001 From: Rafael Pierre Date: Mon, 26 Feb 2024 14:52:49 +0000 Subject: [PATCH 133/173] comments --- .github/workflows/unit-test.yaml | 2 -- 1 file changed, 2 deletions(-) diff --git a/.github/workflows/unit-test.yaml b/.github/workflows/unit-test.yaml index f8adeabb..f70e32aa 100644 --- a/.github/workflows/unit-test.yaml +++ b/.github/workflows/unit-test.yaml @@ -28,8 +28,6 @@ jobs: uses: docker/setup-buildx-action@v3.0.0 - name: Docker Build run: make inference-pytorch-gpu - - name: List images - run: docker images - name: Set up Python 3.11 uses: actions/setup-python@v2 with: From 6e34590d917dad81a428315428cf8e87eb648eb2 Mon Sep 17 00:00:00 2001 From: Rafael Pierre Date: Mon, 26 Feb 2024 15:50:55 +0000 Subject: [PATCH 134/173] dialog model --- .github/workflows/integ-test.yaml | 4 ++-- makefile | 22 ++-------------------- setup.py | 2 +- tests/integ/config.py | 3 ++- tox.ini | 30 +----------------------------- 5 files changed, 8 insertions(+), 53 deletions(-) diff --git a/.github/workflows/integ-test.yaml b/.github/workflows/integ-test.yaml index 17b5d87b..c8fbf0b6 100644 --- a/.github/workflows/integ-test.yaml +++ b/.github/workflows/integ-test.yaml @@ -37,7 +37,7 @@ jobs: - name: Install tox & uv run: pip install uv tox - name: Run local integration tests - run: tox -e torch-integration-local-cpu -- -n 10 + run: tox -e torch-integration-local-cpu -- -n 4 pytorch-integration-remote: runs-on: [single-gpu, nvidia-gpu, t4, ci] env: @@ -55,4 +55,4 @@ jobs: - name: Install tox & uv run: pip install uv tox - name: Run remote integration tests - run: tox -e torch-integration-remote-cpu -- -n 10 \ No newline at end of file + run: tox -e torch-integration-remote-cpu -- -n 4 \ No newline at end of file diff --git a/makefile b/makefile index a3007b25..4451cbc6 100644 --- a/makefile +++ b/makefile @@ -24,25 +24,7 @@ inference-pytorch-gpu: docker build --no-cache -f dockerfiles/pytorch/gpu/Dockerfile -t integration-test-pytorch:gpu . inference-pytorch-cpu: - docker build -f dockerfiles/pytorch/cpu/Dockerfile -t integration-test-pytorch:cpu . - -inference-tensorflow-gpu: - docker build --no-cache -f dockerfiles/tensorflow/gpu/Dockerfile -t integration-test-tensorflow:gpu . - -inference-tensorflow-cpu: - docker build -f dockerfiles/tensorflow/cpu/Dockerfile -t integration-test-tensorflow:cpu . + docker build --no-cache -f dockerfiles/pytorch/cpu/Dockerfile -t integration-test-pytorch:cpu . stop-all: - docker stop $$(docker ps -a -q) && docker container prune --force - -run-tensorflow-remote-gpu: - docker run -e HF_TASK=text-classification -e HF_MODEL_ID=distilbert/distilbert-base-uncased integration-test-tensorflow:gpu - -run-tensorflow-local-gpu: - rm -rf /tmp/distilbert && \ - huggingface-cli download hf-internal-testing/tiny-random-distilbert --local-dir /tmp/distilbert && \ - docker run --gpus all \ - -v /tmp/distilbert:/opt/huggingface/model \ - -e HF_MODEL_DIR=/opt/huggingface/model \ - -e HF_TASK=text-classification \ - integration-test-tensorflow:gpu \ No newline at end of file + docker stop $$(docker ps -a -q) && docker container prune --force \ No newline at end of file diff --git a/setup.py b/setup.py index 9dc9876e..e1aff242 100644 --- a/setup.py +++ b/setup.py @@ -15,7 +15,7 @@ install_requires = [ # transformers - "transformers[sklearn,sentencepiece]==4.37.2", + "transformers[sklearn,sentencepiece]==4.38.1", "huggingface_hub>=0.20.3", "orjson", "Pillow", diff --git a/tests/integ/config.py b/tests/integ/config.py index aca2ebf8..b1d4d605 100644 --- a/tests/integ/config.py +++ b/tests/integ/config.py @@ -81,7 +81,8 @@ "tensorflow": "hf-internal-testing/tiny-random-clip-zero-shot-image-classification", }, "conversational": { - "pytorch": "hf-internal-testing/tiny-random-blenderbot-small", + #"pytorch": "hf-internal-testing/tiny-random-blenderbot-small", + "pytorch": "microsoft/DialoGPT-small", "tensorflow": None, }, "sentence-similarity": { diff --git a/tox.ini b/tox.ini index f75a203b..b1e0bb87 100644 --- a/tox.ini +++ b/tox.ini @@ -67,7 +67,7 @@ allowlist_externals = commands = pytest -s -v \ {tty:--color=yes} \ - tests/unit/{posargs} \ + tests/unit/ {posargs} \ --log-cli-level=DEBUG \ --log-format='%(asctime)s %(levelname)s %(module)s:%(lineno)d %(message)s' setenv = @@ -142,33 +142,5 @@ commands = tests/integ/test_pytorch_local_gpu.py {posargs} \ --log-cli-level=ERROR \ --log-format='%(asctime)s %(levelname)s %(module)s:%(lineno)d %(message)s' -setenv = - RUN_SLOW=True - -[testenv:tf-integration-remote] -install_command = uv pip install -e ".[tensorflow]" -allowlist_externals = - pytest - uv -commands = - pytest \ - {tty:--color=yes} \ - tests/integ/test_tensorflow_remote.py {posargs} \ - --log-cli-level=DEBUG \ - --log-format='%(asctime)s %(levelname)s %(module)s:%(lineno)d %(message)s' -setenv = - RUN_SLOW=True - -[testenv:tf-integration-local] -install_command = uv pip install -e ".[tensorflow, st]" -allowlist_externals = - pytest - uv -commands = - pytest \ - {tty:--color=yes} \ - tests/integ/test_tensorflow_local.py {posargs} \ - --log-cli-level=INFO \ - --log-format='%(asctime)s %(levelname)s %(module)s:%(lineno)d %(message)s' setenv = RUN_SLOW=True \ No newline at end of file From e8e896f3783ad62bc61af7f63aed643766cd7d2c Mon Sep 17 00:00:00 2001 From: Rafael Pierre Date: Mon, 26 Feb 2024 17:18:57 +0000 Subject: [PATCH 135/173] dockerfile --- dockerfiles/pytorch/cpu/Dockerfile | 72 +++++++++++++----------- dockerfiles/pytorch/cpu/environment.yaml | 13 ----- dockerfiles/pytorch/gpu/Dockerfile | 47 +++------------- dockerfiles/pytorch/gpu/requirements.txt | 9 --- makefile | 4 +- requirements-test.txt | 13 ----- scripts/entrypoint.sh | 3 + setup.py | 13 +++-- 8 files changed, 61 insertions(+), 113 deletions(-) delete mode 100644 dockerfiles/pytorch/cpu/environment.yaml delete mode 100644 dockerfiles/pytorch/gpu/requirements.txt delete mode 100644 requirements-test.txt diff --git a/dockerfiles/pytorch/cpu/Dockerfile b/dockerfiles/pytorch/cpu/Dockerfile index 53faf0ef..52db8e30 100644 --- a/dockerfiles/pytorch/cpu/Dockerfile +++ b/dockerfiles/pytorch/cpu/Dockerfile @@ -1,53 +1,57 @@ FROM ubuntu:22.04 +SHELL ["/bin/bash", "-c"] LABEL maintainer="Hugging Face" ENV DEBIAN_FRONTEND=noninteractive +ENV TORCH_USE_CUDA_DSA=1 -RUN apt-get update \ - && apt-get -y upgrade --only-upgrade systemd openssl cryptsetup \ - && apt-get install -y \ - bzip2 \ - curl \ - git \ - git-lfs \ - tar \ - gcc \ - g++ \ - cmake \ - # audio - libsndfile1-dev \ - ffmpeg \ +WORKDIR /app + +RUN apt-get update && \ + apt-get install software-properties-common -y && \ + add-apt-repository ppa:deadsnakes/ppa && \ + apt-get -y upgrade --only-upgrade systemd openssl cryptsetup && \ + apt-get install -y \ + build-essential \ + bzip2 \ + curl \ + git \ + git-lfs \ + tar \ + gcc \ + g++ \ + cmake \ + libprotobuf-dev \ + protobuf-compiler \ + python3-venv \ + python3-dev \ + python3.11 \ + libsndfile1-dev \ + ffmpeg \ && apt-get clean autoremove --yes \ && rm -rf /var/lib/{apt,dpkg,cache,log} -# install micromamba -ENV MAMBA_ROOT_PREFIX=/opt/conda -ENV PATH=/opt/conda/bin:$PATH -RUN curl -L https://micro.mamba.pm/api/micromamba/linux-64/latest | tar -xj "bin/micromamba" \ - && touch /root/.bashrc \ - && ./bin/micromamba shell init -s bash -p /opt/conda \ - && grep -v '[ -z "\$PS1" ] && return' /root/.bashrc > /opt/conda/bashrc - -WORKDIR /app - -# install base python dependencies -COPY dockerfiles/pytorch/cpu/environment.yaml /app/environment.yaml -RUN micromamba install -y -n base -f environment.yaml \ - && rm environment.yaml \ - && micromamba clean --all --yes +# install dependencies +COPY dockerfiles/pytorch/gpu/requirements.txt requirements-docker.txt +COPY requirements.txt requirements-toolkit.txt -# install huggingface inference toolkit -COPY requirements.txt /tmp/requirements.txt -RUN pip install --no-cache-dir -r /tmp/requirements.txt && rm /tmp/requirements.txt +# install wheel and setuptools +RUN python3 -m venv .venv && \ + source .venv/bin/activate && \ + pip install wheel && \ + pip install --no-cache-dir -r requirements-docker.txt && \ + pip install --no-cache-dir -r requirements-toolkit.txt # copy application COPY src/huggingface_inference_toolkit huggingface_inference_toolkit COPY src/huggingface_inference_toolkit/webservice_starlette.py webservice_starlette.py +#unit tests +COPY . /tmp/hf-inference-test + # copy entrypoint and change permissions COPY scripts/entrypoint.sh entrypoint.sh RUN chmod +x entrypoint.sh -# run app -ENTRYPOINT ["/bin/bash", "entrypoint.sh"] +ENTRYPOINT ["bash", "-c", "source .venv/bin/activate && ./entrypoint.sh"] \ No newline at end of file diff --git a/dockerfiles/pytorch/cpu/environment.yaml b/dockerfiles/pytorch/cpu/environment.yaml deleted file mode 100644 index 58c4bb80..00000000 --- a/dockerfiles/pytorch/cpu/environment.yaml +++ /dev/null @@ -1,13 +0,0 @@ -name: base -channels: -- conda-forge -dependencies: -- python=3.11 -- pytorch::pytorch=2.2.0=py3.11_cpu_0 -- pip: - - transformers[sklearn,sentencepiece,audio,vision]==4.37.2 - - sentence_transformers==2.2.2 - - torchvision==0.17.1 - - diffusers==0.26.3 - - accelerate==0.27.2 - - safetensors \ No newline at end of file diff --git a/dockerfiles/pytorch/gpu/Dockerfile b/dockerfiles/pytorch/gpu/Dockerfile index 4cdc52db..6911ce83 100644 --- a/dockerfiles/pytorch/gpu/Dockerfile +++ b/dockerfiles/pytorch/gpu/Dockerfile @@ -1,10 +1,11 @@ -FROM nvidia/cuda:12.1.0-devel-ubuntu22.04 as builder +ARG BASE_IMAGE=nvidia/cuda:12.1.0-devel-ubuntu22.04 + +FROM $BASE_IMAGE SHELL ["/bin/bash", "-c"] LABEL maintainer="Hugging Face" ENV DEBIAN_FRONTEND=noninteractive -ENV TORCH_USE_CUDA_DSA=1 WORKDIR /app @@ -24,56 +25,26 @@ RUN apt-get update && \ cmake \ libprotobuf-dev \ protobuf-compiler \ + python3-venv \ + python3-dev \ python3.11 \ - python3-pip \ - python3.11-venv \ libsndfile1-dev \ ffmpeg \ && apt-get clean autoremove --yes \ && rm -rf /var/lib/{apt,dpkg,cache,log} -# install dependencies -COPY dockerfiles/pytorch/gpu/requirements.txt requirements-docker.txt -COPY requirements.txt requirements-toolkit.txt +COPY . . # install wheel and setuptools -RUN curl -LsSf https://astral.sh/uv/install.sh | sh && \ - source $HOME/.cargo/env && \ - uv venv && \ +RUN python3 -m venv .venv && \ source .venv/bin/activate && \ - uv pip install --no-cache-dir -r requirements-docker.txt && \ - uv pip install --no-cache-dir -r requirements-toolkit.txt - -### Runner - -FROM nvidia/cuda:12.1.0-base-ubuntu22.04 as runner -SHELL ["/bin/bash", "-c"] - -WORKDIR /app - -ENV TORCH_USE_CUDA_DSA=1 - -RUN apt-get update -y && apt-get upgrade -y && \ - apt-get install -y \ - python3.11 \ - python3-pip \ - python3.11-venv \ - curl \ - ffmpeg \ - && apt-get clean autoremove --yes \ - && rm -rf /var/lib/{apt,dpkg,cache,log} - -RUN curl -LsSf https://astral.sh/uv/install.sh | sh && \ - source $HOME/.cargo/env - -# install dependencies -COPY --from=builder /app . + pip install -e ".[torch, st, diffusers]" # copy application COPY src/huggingface_inference_toolkit huggingface_inference_toolkit COPY src/huggingface_inference_toolkit/webservice_starlette.py webservice_starlette.py -#unit tests +#unit tests - tmp dir gets removed in entrypoint.sh COPY . /tmp/hf-inference-test # copy entrypoint and change permissions diff --git a/dockerfiles/pytorch/gpu/requirements.txt b/dockerfiles/pytorch/gpu/requirements.txt deleted file mode 100644 index b6ca030e..00000000 --- a/dockerfiles/pytorch/gpu/requirements.txt +++ /dev/null @@ -1,9 +0,0 @@ -cmake==3.28.3 -wheel==0.42.0 -setuptools==69.1.0 -torch==2.2.0 -torchvision -transformers[sklearn,sentencepiece,audio,vision]==4.37.2 -sentence_transformers==2.3.1 -diffusers==0.26.1 -accelerate==0.26.1 \ No newline at end of file diff --git a/makefile b/makefile index 4451cbc6..4a6ab54e 100644 --- a/makefile +++ b/makefile @@ -21,10 +21,10 @@ style: ruff $(check_dirs) --fix inference-pytorch-gpu: - docker build --no-cache -f dockerfiles/pytorch/gpu/Dockerfile -t integration-test-pytorch:gpu . + docker build -f dockerfiles/pytorch/gpu/Dockerfile -t integration-test-pytorch:gpu . inference-pytorch-cpu: - docker build --no-cache -f dockerfiles/pytorch/cpu/Dockerfile -t integration-test-pytorch:cpu . + docker build -f dockerfiles/pytorch/cpu/Dockerfile -t integration-test-pytorch:cpu . stop-all: docker stop $$(docker ps -a -q) && docker container prune --force \ No newline at end of file diff --git a/requirements-test.txt b/requirements-test.txt deleted file mode 100644 index fe7f709c..00000000 --- a/requirements-test.txt +++ /dev/null @@ -1,13 +0,0 @@ -pytest-xdist -parameterized -psutil -datasets -pytest-sugar -mock==2.0.0 -docker -requests -tenacity -termcolor -execnet -pluggy -py \ No newline at end of file diff --git a/scripts/entrypoint.sh b/scripts/entrypoint.sh index 8544a63c..afd248f3 100644 --- a/scripts/entrypoint.sh +++ b/scripts/entrypoint.sh @@ -1,5 +1,8 @@ # /bin/bash +#cleanup tempdir +rm -rf /tmp/hf-inference-test + # check if HF_MODEL_DIR is set and if not skip installing custom dependencies if [[ ! -z "${HF_MODEL_DIR}" ]]; then # check if requirements.txt exists and if so install dependencies diff --git a/setup.py b/setup.py index e1aff242..ed149a0f 100644 --- a/setup.py +++ b/setup.py @@ -14,7 +14,9 @@ # libavcodec-extra : libavcodec-extra inculdes additional codecs for ffmpeg install_requires = [ - # transformers + "wheel==0.42.0", + "setuptools==69.1.0", + "cmake==3.28.3", "transformers[sklearn,sentencepiece]==4.38.1", "huggingface_hub>=0.20.3", "orjson", @@ -22,14 +24,17 @@ "librosa", "pyctcdecode>=0.3.0", "phonemizer", - "ffmpeg" + "ffmpeg", + "starlette", + "uvicorn", + "pandas" ] extras = {} -extras["st"] = ["sentence_transformers==2.2.1"] +extras["st"] = ["sentence_transformers==2.3.1"] extras["diffusers"] = ["diffusers==0.26.3", "accelerate==0.27.2"] -extras["torch"] = ["torch==2.2.0", "torchaudio"] +extras["torch"] = ["torch==2.2.0", "torchvision", "torchaudio"] extras["tensorflow"] = ["tensorflow"] extras["test"] = [ "pytest==7.2.1", From 2afeaad4ddd01c89f338d3d68523ce7216ed4d9c Mon Sep 17 00:00:00 2001 From: Rafael Pierre Date: Mon, 26 Feb 2024 18:02:54 +0000 Subject: [PATCH 136/173] dockerfile --- .dockerignore | 3 +- dockerfiles/pytorch/{gpu => }/Dockerfile | 2 +- dockerfiles/pytorch/cpu/Dockerfile | 57 ------------------- makefile | 4 +- scripts/entrypoint.sh | 2 +- setup.py | 4 +- .../sentence_transformers_utils.py | 11 +--- 7 files changed, 11 insertions(+), 72 deletions(-) rename dockerfiles/pytorch/{gpu => }/Dockerfile (95%) delete mode 100644 dockerfiles/pytorch/cpu/Dockerfile diff --git a/.dockerignore b/.dockerignore index 61053631..93505b42 100644 --- a/.dockerignore +++ b/.dockerignore @@ -4,4 +4,5 @@ .tox .venv .gitignore -makefile \ No newline at end of file +makefile +__pycache__ \ No newline at end of file diff --git a/dockerfiles/pytorch/gpu/Dockerfile b/dockerfiles/pytorch/Dockerfile similarity index 95% rename from dockerfiles/pytorch/gpu/Dockerfile rename to dockerfiles/pytorch/Dockerfile index 6911ce83..ad327869 100644 --- a/dockerfiles/pytorch/gpu/Dockerfile +++ b/dockerfiles/pytorch/Dockerfile @@ -38,7 +38,7 @@ COPY . . # install wheel and setuptools RUN python3 -m venv .venv && \ source .venv/bin/activate && \ - pip install -e ".[torch, st, diffusers]" + pip install --no-cache-dir -e ".[torch, st, diffusers]" # copy application COPY src/huggingface_inference_toolkit huggingface_inference_toolkit diff --git a/dockerfiles/pytorch/cpu/Dockerfile b/dockerfiles/pytorch/cpu/Dockerfile deleted file mode 100644 index 52db8e30..00000000 --- a/dockerfiles/pytorch/cpu/Dockerfile +++ /dev/null @@ -1,57 +0,0 @@ -FROM ubuntu:22.04 -SHELL ["/bin/bash", "-c"] - -LABEL maintainer="Hugging Face" - -ENV DEBIAN_FRONTEND=noninteractive -ENV TORCH_USE_CUDA_DSA=1 - -WORKDIR /app - -RUN apt-get update && \ - apt-get install software-properties-common -y && \ - add-apt-repository ppa:deadsnakes/ppa && \ - apt-get -y upgrade --only-upgrade systemd openssl cryptsetup && \ - apt-get install -y \ - build-essential \ - bzip2 \ - curl \ - git \ - git-lfs \ - tar \ - gcc \ - g++ \ - cmake \ - libprotobuf-dev \ - protobuf-compiler \ - python3-venv \ - python3-dev \ - python3.11 \ - libsndfile1-dev \ - ffmpeg \ - && apt-get clean autoremove --yes \ - && rm -rf /var/lib/{apt,dpkg,cache,log} - -# install dependencies -COPY dockerfiles/pytorch/gpu/requirements.txt requirements-docker.txt -COPY requirements.txt requirements-toolkit.txt - -# install wheel and setuptools -RUN python3 -m venv .venv && \ - source .venv/bin/activate && \ - pip install wheel && \ - pip install --no-cache-dir -r requirements-docker.txt && \ - pip install --no-cache-dir -r requirements-toolkit.txt - -# copy application -COPY src/huggingface_inference_toolkit huggingface_inference_toolkit -COPY src/huggingface_inference_toolkit/webservice_starlette.py webservice_starlette.py - -#unit tests -COPY . /tmp/hf-inference-test - -# copy entrypoint and change permissions -COPY scripts/entrypoint.sh entrypoint.sh -RUN chmod +x entrypoint.sh - -ENTRYPOINT ["bash", "-c", "source .venv/bin/activate && ./entrypoint.sh"] \ No newline at end of file diff --git a/makefile b/makefile index 4a6ab54e..ab1961a7 100644 --- a/makefile +++ b/makefile @@ -21,10 +21,10 @@ style: ruff $(check_dirs) --fix inference-pytorch-gpu: - docker build -f dockerfiles/pytorch/gpu/Dockerfile -t integration-test-pytorch:gpu . + docker build -f dockerfiles/pytorch/Dockerfile -t integration-test-pytorch:gpu . inference-pytorch-cpu: - docker build -f dockerfiles/pytorch/cpu/Dockerfile -t integration-test-pytorch:cpu . + docker build --build-arg="BASE_IMAGE=ubuntu:22.04" -f dockerfiles/pytorch/Dockerfile -t integration-test-pytorch:cpu . stop-all: docker stop $$(docker ps -a -q) && docker container prune --force \ No newline at end of file diff --git a/scripts/entrypoint.sh b/scripts/entrypoint.sh index afd248f3..60f96f2b 100644 --- a/scripts/entrypoint.sh +++ b/scripts/entrypoint.sh @@ -1,7 +1,7 @@ # /bin/bash #cleanup tempdir -rm -rf /tmp/hf-inference-test +rm -rf /tmp/hf-inference-test && rm -rf /app/tests # check if HF_MODEL_DIR is set and if not skip installing custom dependencies if [[ ! -z "${HF_MODEL_DIR}" ]]; then diff --git a/setup.py b/setup.py index ed149a0f..768ce70c 100644 --- a/setup.py +++ b/setup.py @@ -17,8 +17,8 @@ "wheel==0.42.0", "setuptools==69.1.0", "cmake==3.28.3", - "transformers[sklearn,sentencepiece]==4.38.1", - "huggingface_hub>=0.20.3", + "transformers[sklearn,sentencepiece, audio, vision]==4.38.1", + "huggingface_hub==0.20.3", "orjson", "Pillow", "librosa", diff --git a/src/huggingface_inference_toolkit/sentence_transformers_utils.py b/src/huggingface_inference_toolkit/sentence_transformers_utils.py index 951c8502..dd9af4d1 100644 --- a/src/huggingface_inference_toolkit/sentence_transformers_utils.py +++ b/src/huggingface_inference_toolkit/sentence_transformers_utils.py @@ -54,11 +54,6 @@ def get_sentence_transformers_pipeline( device=-1, **kwargs ): - try: - device = "cuda" if device == 0 else "cpu" - pipeline = SENTENCE_TRANSFORMERS_TASKS[task](model_dir=model_dir, device=device) - return pipeline - except KeyError: - framework = kwargs['framework'] - message = f"Task {task} is not supported for framework {framework}" - logging.error(message) + device = "cuda" if device == 0 else "cpu" + pipeline = SENTENCE_TRANSFORMERS_TASKS[task](model_dir=model_dir, device=device) + return pipeline From 073f358b6ef889fe2d1a2d5dbcb5f152a21dbbc2 Mon Sep 17 00:00:00 2001 From: Rafael Pierre Date: Mon, 26 Feb 2024 18:13:11 +0000 Subject: [PATCH 137/173] tox --- .github/workflows/quality.yaml | 4 ++-- .../sentence_transformers_utils.py | 1 - tox.ini | 2 +- 3 files changed, 3 insertions(+), 4 deletions(-) diff --git a/.github/workflows/quality.yaml b/.github/workflows/quality.yaml index 842c79e7..09929fde 100644 --- a/.github/workflows/quality.yaml +++ b/.github/workflows/quality.yaml @@ -18,10 +18,10 @@ jobs: runs-on: ubuntu-latest steps: - uses: actions/checkout@v2 - - name: Set up Python 3.9 + - name: Set up Python 3.11 uses: actions/setup-python@v2 with: - python-version: 3.9 + python-version: 3.11 - name: Install Python dependencies run: pip install -e .[quality] - name: Run Quality check diff --git a/src/huggingface_inference_toolkit/sentence_transformers_utils.py b/src/huggingface_inference_toolkit/sentence_transformers_utils.py index dd9af4d1..72bb2ee2 100644 --- a/src/huggingface_inference_toolkit/sentence_transformers_utils.py +++ b/src/huggingface_inference_toolkit/sentence_transformers_utils.py @@ -1,5 +1,4 @@ import importlib.util -import logging _sentence_transformers = importlib.util.find_spec("sentence_transformers") is not None diff --git a/tox.ini b/tox.ini index b1e0bb87..e06d6855 100644 --- a/tox.ini +++ b/tox.ini @@ -57,7 +57,7 @@ commands = --gpus all \ --entrypoint /bin/sh \ integration-test-pytorch:gpu \ - -c "pip install tox uv && cd /tmp/hf-inference-test && tox -e unit-torch" + -c "python3 -m pip install tox && cd /tmp/hf-inference-test && tox -e unit-torch" [testenv:unit-torch-slow] install_command = uv pip install -e ".[torch, st, diffusers]" From a77ed507f097d4587f22bbd0c5ebf654c529e6b0 Mon Sep 17 00:00:00 2001 From: Rafael Pierre Date: Tue, 27 Feb 2024 08:54:28 +0000 Subject: [PATCH 138/173] unit tests --- dockerfiles/pytorch/Dockerfile | 2 +- requirements.txt | 5 ----- tox.ini | 17 ++++++----------- 3 files changed, 7 insertions(+), 17 deletions(-) diff --git a/dockerfiles/pytorch/Dockerfile b/dockerfiles/pytorch/Dockerfile index ad327869..b2ca0f04 100644 --- a/dockerfiles/pytorch/Dockerfile +++ b/dockerfiles/pytorch/Dockerfile @@ -38,7 +38,7 @@ COPY . . # install wheel and setuptools RUN python3 -m venv .venv && \ source .venv/bin/activate && \ - pip install --no-cache-dir -e ".[torch, st, diffusers]" + pip install --no-cache-dir -U pip -e ".[torch, st, diffusers]" # copy application COPY src/huggingface_inference_toolkit huggingface_inference_toolkit diff --git a/requirements.txt b/requirements.txt index 0437bb78..e69de29b 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,5 +0,0 @@ -orjson -starlette -uvicorn -pandas -huggingface_hub>=0.20.3 \ No newline at end of file diff --git a/tox.ini b/tox.ini index e06d6855..665738ed 100644 --- a/tox.ini +++ b/tox.ini @@ -6,13 +6,11 @@ allowlist_externals = [testenv] deps = - uv pytest allowlist_externals = pytest - uv commands_pre = - uv pip install -e ".[test]" + pip install -e ".[test]" commands = pytest --version setenv = PYTHONPATH = . @@ -47,23 +45,21 @@ setenv = [testenv:unit-torch-docker] install_command = - uv pip install docker + pip install docker allowlist_externals = pytest - uv docker commands = docker run \ --gpus all \ - --entrypoint /bin/sh \ + --entrypoint /bin/bash \ integration-test-pytorch:gpu \ - -c "python3 -m pip install tox && cd /tmp/hf-inference-test && tox -e unit-torch" + -c "source .venv/bin/activate && pip install tox && cd /tmp/hf-inference-test && tox -e unit-torch" [testenv:unit-torch-slow] -install_command = uv pip install -e ".[torch, st, diffusers]" +install_command = pip install -e ".[torch, st, diffusers]" allowlist_externals = pytest - uv commands = pytest -s -v \ {tty:--color=yes} \ @@ -74,10 +70,9 @@ setenv = RUN_SLOW=True [testenv:unit-tensorflow] -install_command = uv pip install -e ".[tensorflow, st]" +install_command = pip install -e ".[tensorflow, st]" allowlist_externals = pytest - uv commands = pytest -s -v \ {tty:--color=yes} \ From ab1f3f2083ba05225f774b6e2937b08896f69d2f Mon Sep 17 00:00:00 2001 From: Rafael Pierre Date: Tue, 27 Feb 2024 09:41:36 +0000 Subject: [PATCH 139/173] pip --- .gitignore | 1 + tox.ini | 9 ++++++--- 2 files changed, 7 insertions(+), 3 deletions(-) diff --git a/.gitignore b/.gitignore index 1de238e7..ab572a27 100644 --- a/.gitignore +++ b/.gitignore @@ -36,6 +36,7 @@ __pycache__/ *.py[cod] *$py.class .vscode +.make # C extensions *.so diff --git a/tox.ini b/tox.ini index 665738ed..1a3a5db8 100644 --- a/tox.ini +++ b/tox.ini @@ -7,10 +7,12 @@ allowlist_externals = [testenv] deps = pytest + uv allowlist_externals = pytest + uv commands_pre = - pip install -e ".[test]" + uv pip install -e ".[test]" commands = pytest --version setenv = PYTHONPATH = . @@ -27,8 +29,8 @@ commands = ruff src --fix install_command = uv pip install -e ".[torch,st]" allowlist_externals = - pytest uv + pytest commands = pytest -s -v \ {tty:--color=yes} \ @@ -45,10 +47,11 @@ setenv = [testenv:unit-torch-docker] install_command = - pip install docker + uv pip install docker allowlist_externals = pytest docker + uv commands = docker run \ --gpus all \ From c353728f118786e4dee37dcd7ffb89e32a20ddf2 Mon Sep 17 00:00:00 2001 From: Rafael Pierre Date: Tue, 27 Feb 2024 09:44:03 +0000 Subject: [PATCH 140/173] readme --- README.md | 11 ++++++++++- 1 file changed, 10 insertions(+), 1 deletion(-) diff --git a/README.md b/README.md index 9bfe7db1..52ef20f1 100644 --- a/README.md +++ b/README.md @@ -186,7 +186,16 @@ Below you ll find a list of supported and tested transformers and sentence trans * Recommended Python version: 3.11 * We recommend `pyenv` for easily switching between different Python versions -* `hf-inference-toolkit` relies on `tox` for unit and integration testing +* There are two options for unit and integration tests: + * `Make` - see `makefile` + * `tox` - see `tox.ini` + +#### Testing with Make + +* Unit Testing: `make unit-test` +* Integration testing: `make integ-test` + +#### Testing with Tox #### Unit Testing From a2c34421136508373b9d6bfac7309c544bc48e37 Mon Sep 17 00:00:00 2001 From: Rafael Pierre Date: Tue, 27 Feb 2024 10:32:18 +0000 Subject: [PATCH 141/173] unit --- tox.ini | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/tox.ini b/tox.ini index 1a3a5db8..09b9c761 100644 --- a/tox.ini +++ b/tox.ini @@ -27,9 +27,8 @@ commands = ruff src --fix [testenv:unit-torch] install_command = - uv pip install -e ".[torch,st]" + pip install -e ".[torch,st]" allowlist_externals = - uv pytest commands = pytest -s -v \ From 167018c46e31297367bb664b69ffc9fb2cc572bd Mon Sep 17 00:00:00 2001 From: Rafael Pierre Date: Tue, 27 Feb 2024 10:45:36 +0000 Subject: [PATCH 142/173] cache --- .github/workflows/gpu-integ-test.yaml | 1 + .github/workflows/integ-test.yaml | 1 + .github/workflows/unit-test.yaml | 1 + 3 files changed, 3 insertions(+) diff --git a/.github/workflows/gpu-integ-test.yaml b/.github/workflows/gpu-integ-test.yaml index 1a935e88..59e07256 100644 --- a/.github/workflows/gpu-integ-test.yaml +++ b/.github/workflows/gpu-integ-test.yaml @@ -34,6 +34,7 @@ jobs: uses: actions/setup-python@v2 with: python-version: 3.11 + cache: 'pip' - name: Install tox & uv run: pip install uv tox - name: Run local integration tests diff --git a/.github/workflows/integ-test.yaml b/.github/workflows/integ-test.yaml index c8fbf0b6..7bde09e5 100644 --- a/.github/workflows/integ-test.yaml +++ b/.github/workflows/integ-test.yaml @@ -34,6 +34,7 @@ jobs: uses: actions/setup-python@v2 with: python-version: 3.11 + cache: 'pip' - name: Install tox & uv run: pip install uv tox - name: Run local integration tests diff --git a/.github/workflows/unit-test.yaml b/.github/workflows/unit-test.yaml index f70e32aa..7d1f4b1e 100644 --- a/.github/workflows/unit-test.yaml +++ b/.github/workflows/unit-test.yaml @@ -32,6 +32,7 @@ jobs: uses: actions/setup-python@v2 with: python-version: 3.11 + cache: 'pip' - name: Install tox & uv run: pip install uv tox - name: Run unit tests From 0ac2960af494ae1900a0ae96cceba80fc1b82e51 Mon Sep 17 00:00:00 2001 From: Rafael Pierre Date: Tue, 27 Feb 2024 13:41:51 +0000 Subject: [PATCH 143/173] hub cache --- .github/workflows/build-container.yaml | 5 +++-- .github/workflows/gpu-integ-test.yaml | 3 ++- tests/integ/conftest.py | 5 +++-- tox.ini | 2 +- 4 files changed, 9 insertions(+), 6 deletions(-) diff --git a/.github/workflows/build-container.yaml b/.github/workflows/build-container.yaml index 031207c0..fe12fbf6 100644 --- a/.github/workflows/build-container.yaml +++ b/.github/workflows/build-container.yaml @@ -19,7 +19,8 @@ jobs: uses: ./.github/workflows/docker-build-action.yaml with: image: inference-pytorch-cpu - dockerfile: dockerfiles/pytorch/cpu/Dockerfile + dockerfile: dockerfiles/pytorch/Dockerfile + build_args: "BASE_IMAGE=ubuntu:22.04" secrets: TAILSCALE_AUTHKEY: ${{ secrets.TAILSCALE_AUTHKEY }} REGISTRY_USERNAME: ${{ secrets.REGISTRY_USERNAME }} @@ -28,7 +29,7 @@ jobs: uses: ./.github/workflows/docker-build-action.yaml with: image: inference-pytorch-gpu - dockerfile: dockerfiles/pytorch/gpu/Dockerfile + dockerfile: dockerfiles/pytorch/Dockerfile secrets: TAILSCALE_AUTHKEY: ${{ secrets.TAILSCALE_AUTHKEY }} REGISTRY_USERNAME: ${{ secrets.REGISTRY_USERNAME }} diff --git a/.github/workflows/gpu-integ-test.yaml b/.github/workflows/gpu-integ-test.yaml index 59e07256..fd7504fe 100644 --- a/.github/workflows/gpu-integ-test.yaml +++ b/.github/workflows/gpu-integ-test.yaml @@ -29,7 +29,8 @@ jobs: - name: Docker Setup Buildx uses: docker/setup-buildx-action@v3.0.0 - name: Docker Build - run: make inference-pytorch-gpu + run: | + - name: Set up Python 3.11 uses: actions/setup-python@v2 with: diff --git a/tests/integ/conftest.py b/tests/integ/conftest.py index 6899820b..ec282ea8 100644 --- a/tests/integ/conftest.py +++ b/tests/integ/conftest.py @@ -5,7 +5,6 @@ from tests.integ.config import task2model import tenacity import time -import tempfile from huggingface_inference_toolkit.utils import ( _is_gpu_available, _load_repository_from_hf @@ -16,7 +15,9 @@ ) import uuid import socket +import os +HF_HUB_CACHE = os.environ.get("HF_HUB_CACHE", "/home/ubuntu/.cache/huggingface/hub") IS_GPU = _run_slow_tests DEVICE = "gpu" if IS_GPU else "cpu" @@ -121,7 +122,7 @@ def local_container( ] if device == "gpu" else [] object_id = model.replace("/", "--") - model_dir = f"/mnt/hf_cache/hub/{object_id}" + model_dir = f"{HF_HUB_CACHE}/{object_id}" storage_dir = _load_repository_from_hf( repository_id = model, diff --git a/tox.ini b/tox.ini index 09b9c761..a02e0f59 100644 --- a/tox.ini +++ b/tox.ini @@ -56,7 +56,7 @@ commands = --gpus all \ --entrypoint /bin/bash \ integration-test-pytorch:gpu \ - -c "source .venv/bin/activate && pip install tox && cd /tmp/hf-inference-test && tox -e unit-torch" + -c "source .venv/bin/activate && pip install tox && cd /tmp/hf-inference-test && tox -e unit-torch-slow -- -n 10" [testenv:unit-torch-slow] install_command = pip install -e ".[torch, st, diffusers]" From 7b922a4e940f9a3c2012b67a4ccd3d771f50f367 Mon Sep 17 00:00:00 2001 From: Rafael Pierre Date: Tue, 27 Feb 2024 13:54:50 +0000 Subject: [PATCH 144/173] remove cache --- .github/workflows/gpu-integ-test.yaml | 4 +--- .github/workflows/integ-test.yaml | 1 - 2 files changed, 1 insertion(+), 4 deletions(-) diff --git a/.github/workflows/gpu-integ-test.yaml b/.github/workflows/gpu-integ-test.yaml index fd7504fe..1a935e88 100644 --- a/.github/workflows/gpu-integ-test.yaml +++ b/.github/workflows/gpu-integ-test.yaml @@ -29,13 +29,11 @@ jobs: - name: Docker Setup Buildx uses: docker/setup-buildx-action@v3.0.0 - name: Docker Build - run: | - + run: make inference-pytorch-gpu - name: Set up Python 3.11 uses: actions/setup-python@v2 with: python-version: 3.11 - cache: 'pip' - name: Install tox & uv run: pip install uv tox - name: Run local integration tests diff --git a/.github/workflows/integ-test.yaml b/.github/workflows/integ-test.yaml index 7bde09e5..c8fbf0b6 100644 --- a/.github/workflows/integ-test.yaml +++ b/.github/workflows/integ-test.yaml @@ -34,7 +34,6 @@ jobs: uses: actions/setup-python@v2 with: python-version: 3.11 - cache: 'pip' - name: Install tox & uv run: pip install uv tox - name: Run local integration tests From 59db10428402f6688390bbf4dfd3cf79d529cfd8 Mon Sep 17 00:00:00 2001 From: Rafael Pierre Date: Tue, 27 Feb 2024 13:55:15 +0000 Subject: [PATCH 145/173] unit cache --- .github/workflows/unit-test.yaml | 1 - 1 file changed, 1 deletion(-) diff --git a/.github/workflows/unit-test.yaml b/.github/workflows/unit-test.yaml index 7d1f4b1e..f70e32aa 100644 --- a/.github/workflows/unit-test.yaml +++ b/.github/workflows/unit-test.yaml @@ -32,7 +32,6 @@ jobs: uses: actions/setup-python@v2 with: python-version: 3.11 - cache: 'pip' - name: Install tox & uv run: pip install uv tox - name: Run unit tests From 9d294d53e04e502412d1a685aba420bff8eecb5d Mon Sep 17 00:00:00 2001 From: Rafael Pierre Date: Tue, 27 Feb 2024 14:09:55 +0000 Subject: [PATCH 146/173] cache --- .github/workflows/gpu-integ-test.yaml | 6 ++++++ tox.ini | 6 ++++-- 2 files changed, 10 insertions(+), 2 deletions(-) diff --git a/.github/workflows/gpu-integ-test.yaml b/.github/workflows/gpu-integ-test.yaml index 1a935e88..60ef63d2 100644 --- a/.github/workflows/gpu-integ-test.yaml +++ b/.github/workflows/gpu-integ-test.yaml @@ -25,6 +25,12 @@ jobs: HF_HOME: /mnt/hf_cache/ HF_HUB_CACHE: /mnt/hf_cache/hub steps: + - name: Configure AWS credentials + uses: aws-actions/configure-aws-credentials@v1 + with: + aws-access-key-id: ${{ secrets.AWS_ACCESS_KEY_ID }} + aws-secret-access-key: ${{ secrets.AWS_SECRET_ACCESS_KEY }} + aws-region: ${{ env.AWS_REGION }} - uses: actions/checkout@v4.1.1 - name: Docker Setup Buildx uses: docker/setup-buildx-action@v3.0.0 diff --git a/tox.ini b/tox.ini index a02e0f59..ce358362 100644 --- a/tox.ini +++ b/tox.ini @@ -128,7 +128,8 @@ commands = tests/integ/test_pytorch_local_cpu.py {posargs} \ --log-cli-level=ERROR \ --log-format='%(asctime)s %(levelname)s %(module)s:%(lineno)d %(message)s' - +setenv = + HF_HUB_CACHE=$HF_HUB_CACHE [testenv:torch-integration-local-gpu] install_command = pip install -e ".[torch]" allowlist_externals = @@ -140,4 +141,5 @@ commands = --log-cli-level=ERROR \ --log-format='%(asctime)s %(levelname)s %(module)s:%(lineno)d %(message)s' setenv = - RUN_SLOW=True \ No newline at end of file + RUN_SLOW=True + HF_HUB_CACHE=$HF_HUB_CACHE \ No newline at end of file From 88787e4edb719ce5a2c73b5bde9e7f70e9c3ed53 Mon Sep 17 00:00:00 2001 From: Rafael Pierre Date: Tue, 27 Feb 2024 14:18:59 +0000 Subject: [PATCH 147/173] passenv --- tox.ini | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/tox.ini b/tox.ini index ce358362..fc6787e7 100644 --- a/tox.ini +++ b/tox.ini @@ -128,8 +128,9 @@ commands = tests/integ/test_pytorch_local_cpu.py {posargs} \ --log-cli-level=ERROR \ --log-format='%(asctime)s %(levelname)s %(module)s:%(lineno)d %(message)s' -setenv = - HF_HUB_CACHE=$HF_HUB_CACHE +passenv = + HF_HUB_CACHE + [testenv:torch-integration-local-gpu] install_command = pip install -e ".[torch]" allowlist_externals = @@ -142,4 +143,5 @@ commands = --log-format='%(asctime)s %(levelname)s %(module)s:%(lineno)d %(message)s' setenv = RUN_SLOW=True - HF_HUB_CACHE=$HF_HUB_CACHE \ No newline at end of file +passenv = + HF_HUB_CACHE \ No newline at end of file From b1ee387a967fd82695e94b9935987047b66f1a12 Mon Sep 17 00:00:00 2001 From: Rafael Pierre Date: Tue, 27 Feb 2024 14:45:07 +0000 Subject: [PATCH 148/173] cleanup --- src/huggingface_inference_toolkit/utils.py | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/src/huggingface_inference_toolkit/utils.py b/src/huggingface_inference_toolkit/utils.py index b64760d6..61051cf2 100644 --- a/src/huggingface_inference_toolkit/utils.py +++ b/src/huggingface_inference_toolkit/utils.py @@ -19,10 +19,9 @@ is_sentence_transformers_available, ) -logging.basicConfig( - format="%(asctime)s | %(levelname)s | %(message)s", - level=logging.INFO -) +logger = logging.getLogger(__name__) +logging.basicConfig(format="%(asctime)s | %(levelname)s | %(message)s", level=logging.INFO) + if is_tf_available(): import tensorflow as tf From f3051ec67ccbae88336f217104f3105ecf6cbefa Mon Sep 17 00:00:00 2001 From: Rafael Pierre Date: Tue, 27 Feb 2024 15:15:44 +0000 Subject: [PATCH 149/173] comments --- dockerfiles/pytorch/Dockerfile | 4 +-- src/huggingface_inference_toolkit/utils.py | 32 ++++++++-------------- 2 files changed, 13 insertions(+), 23 deletions(-) diff --git a/dockerfiles/pytorch/Dockerfile b/dockerfiles/pytorch/Dockerfile index b2ca0f04..97127b4e 100644 --- a/dockerfiles/pytorch/Dockerfile +++ b/dockerfiles/pytorch/Dockerfile @@ -9,6 +9,8 @@ ENV DEBIAN_FRONTEND=noninteractive WORKDIR /app +COPY . . + RUN apt-get update && \ apt-get install software-properties-common -y && \ add-apt-repository ppa:deadsnakes/ppa && \ @@ -33,8 +35,6 @@ RUN apt-get update && \ && apt-get clean autoremove --yes \ && rm -rf /var/lib/{apt,dpkg,cache,log} -COPY . . - # install wheel and setuptools RUN python3 -m venv .venv && \ source .venv/bin/activate && \ diff --git a/src/huggingface_inference_toolkit/utils.py b/src/huggingface_inference_toolkit/utils.py index 61051cf2..c91f4764 100644 --- a/src/huggingface_inference_toolkit/utils.py +++ b/src/huggingface_inference_toolkit/utils.py @@ -4,7 +4,7 @@ from pathlib import Path from typing import Optional, Union -from huggingface_hub import login, snapshot_download +from huggingface_hub import login, snapshot_download, HfApi from transformers import WhisperForConditionalGeneration, pipeline from transformers.file_utils import is_tf_available, is_torch_available from transformers.pipelines import Pipeline @@ -93,7 +93,6 @@ def _is_gpu_available(): if is_tf_available(): return True if len(tf.config.list_physical_devices("GPU")) > 0 else False elif is_torch_available(): - logging.info(f"CUDA: {torch.cuda.is_available()}") return torch.cuda.is_available() else: raise RuntimeError( @@ -137,8 +136,6 @@ def _load_repository_from_hf( if framework is None: framework = _get_framework() - logging.info(f"Framework: {framework}") - if isinstance(target_dir, str): target_dir = Path(target_dir) @@ -146,13 +143,18 @@ def _load_repository_from_hf( if not target_dir.exists(): target_dir.mkdir(parents=True) + # check if safetensors weights are available + if framework == "pytorch": + files = HfApi().model_info(repository_id).siblings + if any(f.rfilename.endswith("safetensors") for f in files): + framework = "safetensors" + # create regex to only include the framework specific weights ignore_regex = create_artifact_filter(framework) - logging.info(f"ignore_regex: {ignore_regex}") - logging.info(f"Framework after filtering: {framework}") logging.info(f"Ignore regex pattern for files, which are not downloaded: { ', '.join(ignore_regex) }") - # Download the repository to the workdir and filter out non-framework specific weights + # Download the repository to the workdir and filter out non-framework + # specific weights snapshot_download( repo_id = repository_id, revision = revision, @@ -235,7 +237,8 @@ def get_pipeline( raise EnvironmentError( "The task for this model is not set: Please set one: https://huggingface.co/docs#how-is-a-models-type-of-inference-api-and-widget-determined" ) - # define tokenizer or feature extractor as kwargs to load it the pipeline correctly + # define tokenizer or feature extractor as kwargs to load it the pipeline + # correctly if task in { "automatic-speech-recognition", "image-segmentation", @@ -245,12 +248,6 @@ def get_pipeline( "zero-shot-image-classification", }: kwargs["feature_extractor"] = model_dir - hf_pipeline = pipeline( - task=task, - model=model_dir, - device=device, - **kwargs - ) elif task in {"image-to-text"}: pass else: @@ -278,10 +275,6 @@ def get_pipeline( **kwargs ) else: - logging.info(f"Task: {task}") - logging.info(f"Model: {model_dir}") - logging.info(f"Device: {device}") - logging.info(f"Args: {kwargs}") hf_pipeline = pipeline( task=task, model=model_dir, @@ -298,9 +291,6 @@ def get_pipeline( ): # set chunk length to 30s for whisper to enable long audio files hf_pipeline._preprocess_params["chunk_length_s"] = 30 - #hf_pipeline._preprocess_params["ignore_warning"] = True - # set decoder to english by default - # TODO: replace when transformers 4.26.0 is release with hf_pipeline.model.config.forced_decoder_ids = hf_pipeline.tokenizer.get_decoder_prompt_ids( language="english", task="transcribe" From 606e410bde3d33e31ab01e0a291d0c9b6e761b4e Mon Sep 17 00:00:00 2001 From: Rafael Pierre Date: Tue, 27 Feb 2024 15:39:50 +0000 Subject: [PATCH 150/173] fix --- src/huggingface_inference_toolkit/utils.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/huggingface_inference_toolkit/utils.py b/src/huggingface_inference_toolkit/utils.py index c91f4764..bd267c8e 100644 --- a/src/huggingface_inference_toolkit/utils.py +++ b/src/huggingface_inference_toolkit/utils.py @@ -4,7 +4,7 @@ from pathlib import Path from typing import Optional, Union -from huggingface_hub import login, snapshot_download, HfApi +from huggingface_hub import HfApi, login, snapshot_download from transformers import WhisperForConditionalGeneration, pipeline from transformers.file_utils import is_tf_available, is_torch_available from transformers.pipelines import Pipeline @@ -153,7 +153,7 @@ def _load_repository_from_hf( ignore_regex = create_artifact_filter(framework) logging.info(f"Ignore regex pattern for files, which are not downloaded: { ', '.join(ignore_regex) }") - # Download the repository to the workdir and filter out non-framework + # Download the repository to the workdir and filter out non-framework # specific weights snapshot_download( repo_id = repository_id, From ef159959c7fcbbc6589e80a9953b862cf8d529bf Mon Sep 17 00:00:00 2001 From: Rafael Pierre Date: Tue, 27 Feb 2024 21:26:49 +0000 Subject: [PATCH 151/173] remove tox --- .github/workflows/gpu-integ-test.yaml | 50 ++-------------- .github/workflows/integ-test.yaml | 12 ++-- .github/workflows/quality.yaml | 12 ++-- .../workflows/templates/integration_test.yaml | 60 +++++++++++++++++++ .github/workflows/unit-test.yaml | 12 ++-- 5 files changed, 83 insertions(+), 63 deletions(-) create mode 100644 .github/workflows/templates/integration_test.yaml diff --git a/.github/workflows/gpu-integ-test.yaml b/.github/workflows/gpu-integ-test.yaml index 60ef63d2..bc4d4f14 100644 --- a/.github/workflows/gpu-integ-test.yaml +++ b/.github/workflows/gpu-integ-test.yaml @@ -9,10 +9,6 @@ on: pull_request: workflow_dispatch: -env: - ACTIONS_RUNNER_DEBUG: true - ACTIONS_STEP_DEBUG: true - concurrency: group: ${{ github.workflow }}-${{ github.head_ref || github.run_id }} cancel-in-progress: true @@ -20,45 +16,9 @@ concurrency: jobs: pytorch-integration-local: runs-on: [single-gpu, nvidia-gpu, t4, ci] - env: - AWS_REGION: us-east-1 - HF_HOME: /mnt/hf_cache/ - HF_HUB_CACHE: /mnt/hf_cache/hub - steps: - - name: Configure AWS credentials - uses: aws-actions/configure-aws-credentials@v1 - with: - aws-access-key-id: ${{ secrets.AWS_ACCESS_KEY_ID }} - aws-secret-access-key: ${{ secrets.AWS_SECRET_ACCESS_KEY }} - aws-region: ${{ env.AWS_REGION }} - - uses: actions/checkout@v4.1.1 - - name: Docker Setup Buildx - uses: docker/setup-buildx-action@v3.0.0 - - name: Docker Build - run: make inference-pytorch-gpu - - name: Set up Python 3.11 - uses: actions/setup-python@v2 - with: - python-version: 3.11 - - name: Install tox & uv - run: pip install uv tox - - name: Run local integration tests - run: tox -e torch-integration-local-gpu -- -n 4 - pytorch-integration-remote: - runs-on: [single-gpu, nvidia-gpu, t4, ci] - env: - AWS_REGION: us-east-1 steps: - - uses: actions/checkout@v4.1.1 - - name: Docker Setup Buildx - uses: docker/setup-buildx-action@v3.0.0 - - name: Docker Build - run: make inference-pytorch-gpu - - name: Set up Python 3.11 - uses: actions/setup-python@v2 - with: - python-version: 3.11 - - name: Install tox & uv - run: pip install uv tox - - name: Run remote integration tests - run: tox -e torch-integration-remote-gpu -- -n 4 \ No newline at end of file + - name: Integration Tests - GPU Local + uses: ./.github/workflows/templates/integration_test.yaml + with: + test_path: "tests/integ/test_pytorch_local_gpu.py" + build_img_cmd: "make inference-pytorch-gpu" \ No newline at end of file diff --git a/.github/workflows/integ-test.yaml b/.github/workflows/integ-test.yaml index c8fbf0b6..d0c3e48b 100644 --- a/.github/workflows/integ-test.yaml +++ b/.github/workflows/integ-test.yaml @@ -1,12 +1,12 @@ name: Run CPU Integration Tests on: - push: - paths-ignore: - - 'README.md' - branches: - - main - pull_request: + #push: + # paths-ignore: + # - 'README.md' + # branches: + # - main + #pull_request: workflow_dispatch: env: diff --git a/.github/workflows/quality.yaml b/.github/workflows/quality.yaml index 09929fde..fb34bfeb 100644 --- a/.github/workflows/quality.yaml +++ b/.github/workflows/quality.yaml @@ -1,12 +1,12 @@ name: Quality Check on: - push: - paths-ignore: - - 'README.md' - branches: - - main - pull_request: + #push: + # paths-ignore: + # - 'README.md' + # branches: + # - main + #pull_request: workflow_dispatch: concurrency: diff --git a/.github/workflows/templates/integration_test.yaml b/.github/workflows/templates/integration_test.yaml new file mode 100644 index 00000000..aced26a3 --- /dev/null +++ b/.github/workflows/templates/integration_test.yaml @@ -0,0 +1,60 @@ +on: + workflow_call: + inputs: + region: + type: string + required: false + default: "us-east-1" + hf_home: + required: false + type: string + default: "/mnt/hf_cache/"" + hf_hub_cache: + required: false + type: string + default: "/mnt/hf_cache/hub" + run_slow: + required: false + type: string + default: "True" + test_path: + type: string + required: true + test_parallelism: + type: string + required: false + default: "4" + build_img_cmd: + type: string + required: false + default: "make inference-pytorch-gpu" + +concurrency: + group: ${{ github.workflow }}-${{ github.head_ref || github.run_id }} + cancel-in-progress: true + +jobs: + pytorch-integration-local: + env: + AWS_REGION: ${{ inputs.region }} + HF_HOME: ${{ inputs.hf_home }} + HF_HUB_CACHE: ${{ inputs.hf_hub_cache }} + RUN_SLOW: ${{ inputs.run_slow }} + steps: + - uses: actions/checkout@v4.1.1 + - name: Docker Setup Buildx + uses: docker/setup-buildx-action@v3.0.0 + - name: Docker Build + run: ${{ inputs.build_image_cmd }} + - name: Set up Python 3.11 + uses: actions/setup-python@v2 + with: + python-version: 3.11 + - name: Install dependencies + run: pip install -e ".[torch, test]" + - name: Run local integration tests + run: | + python -m pytest \ + ${{ inputs.test_path }} -n ${{ inputs.test_parallelism }} \ + --log-cli-level=${{ inputs.log_level }} \ + --log-format=${{ inputs.log_format }} \ No newline at end of file diff --git a/.github/workflows/unit-test.yaml b/.github/workflows/unit-test.yaml index f70e32aa..a3432399 100644 --- a/.github/workflows/unit-test.yaml +++ b/.github/workflows/unit-test.yaml @@ -1,12 +1,12 @@ name: Run Unit-Tests on: - push: - paths-ignore: - - 'README.md' - branches: - - main - pull_request: + #push: + # paths-ignore: + # - 'README.md' + # branches: + # - main + #pull_request: workflow_dispatch: env: From a6f07810bf87340e3c0a2a87925f2ff0c5d46878 Mon Sep 17 00:00:00 2001 From: Rafael Pierre Date: Tue, 27 Feb 2024 21:29:56 +0000 Subject: [PATCH 152/173] fix --- .github/workflows/gpu-integ-test.yaml | 13 +++++-------- .github/workflows/templates/integration_test.yaml | 3 ++- 2 files changed, 7 insertions(+), 9 deletions(-) diff --git a/.github/workflows/gpu-integ-test.yaml b/.github/workflows/gpu-integ-test.yaml index bc4d4f14..de615626 100644 --- a/.github/workflows/gpu-integ-test.yaml +++ b/.github/workflows/gpu-integ-test.yaml @@ -8,17 +8,14 @@ on: - main pull_request: workflow_dispatch: - + concurrency: group: ${{ github.workflow }}-${{ github.head_ref || github.run_id }} cancel-in-progress: true jobs: pytorch-integration-local: - runs-on: [single-gpu, nvidia-gpu, t4, ci] - steps: - - name: Integration Tests - GPU Local - uses: ./.github/workflows/templates/integration_test.yaml - with: - test_path: "tests/integ/test_pytorch_local_gpu.py" - build_img_cmd: "make inference-pytorch-gpu" \ No newline at end of file + uses: ./.github/workflows/templates/integration_test.yaml + with: + test_path: "tests/integ/test_pytorch_local_gpu.py" + build_img_cmd: "make inference-pytorch-gpu" \ No newline at end of file diff --git a/.github/workflows/templates/integration_test.yaml b/.github/workflows/templates/integration_test.yaml index aced26a3..3dfc352b 100644 --- a/.github/workflows/templates/integration_test.yaml +++ b/.github/workflows/templates/integration_test.yaml @@ -8,7 +8,7 @@ on: hf_home: required: false type: string - default: "/mnt/hf_cache/"" + default: "/mnt/hf_cache/" hf_hub_cache: required: false type: string @@ -35,6 +35,7 @@ concurrency: jobs: pytorch-integration-local: + runs-on: [single-gpu, nvidia-gpu, t4, ci] env: AWS_REGION: ${{ inputs.region }} HF_HOME: ${{ inputs.hf_home }} From 68a87c1d3da38ecde2b9426ef34c747dbdf13399 Mon Sep 17 00:00:00 2001 From: Rafael Pierre Date: Tue, 27 Feb 2024 21:30:38 +0000 Subject: [PATCH 153/173] path --- .github/workflows/gpu-integ-test.yaml | 4 ++-- .github/workflows/{templates => }/integration_test.yaml | 0 2 files changed, 2 insertions(+), 2 deletions(-) rename .github/workflows/{templates => }/integration_test.yaml (100%) diff --git a/.github/workflows/gpu-integ-test.yaml b/.github/workflows/gpu-integ-test.yaml index de615626..6e67db69 100644 --- a/.github/workflows/gpu-integ-test.yaml +++ b/.github/workflows/gpu-integ-test.yaml @@ -8,14 +8,14 @@ on: - main pull_request: workflow_dispatch: - + concurrency: group: ${{ github.workflow }}-${{ github.head_ref || github.run_id }} cancel-in-progress: true jobs: pytorch-integration-local: - uses: ./.github/workflows/templates/integration_test.yaml + uses: ./.github/workflows/integration_test.yaml with: test_path: "tests/integ/test_pytorch_local_gpu.py" build_img_cmd: "make inference-pytorch-gpu" \ No newline at end of file diff --git a/.github/workflows/templates/integration_test.yaml b/.github/workflows/integration_test.yaml similarity index 100% rename from .github/workflows/templates/integration_test.yaml rename to .github/workflows/integration_test.yaml From 040d581a4856e8aecc92b7484ce120030c23a902 Mon Sep 17 00:00:00 2001 From: Rafael Pierre Date: Tue, 27 Feb 2024 21:32:15 +0000 Subject: [PATCH 154/173] concurrency --- .github/workflows/integration_test.yaml | 4 ---- 1 file changed, 4 deletions(-) diff --git a/.github/workflows/integration_test.yaml b/.github/workflows/integration_test.yaml index 3dfc352b..fe49a89a 100644 --- a/.github/workflows/integration_test.yaml +++ b/.github/workflows/integration_test.yaml @@ -29,10 +29,6 @@ on: required: false default: "make inference-pytorch-gpu" -concurrency: - group: ${{ github.workflow }}-${{ github.head_ref || github.run_id }} - cancel-in-progress: true - jobs: pytorch-integration-local: runs-on: [single-gpu, nvidia-gpu, t4, ci] From 605c7f364351b1c4110f3d718802e058aa1e7e96 Mon Sep 17 00:00:00 2001 From: Rafael Pierre Date: Wed, 28 Feb 2024 08:47:00 +0000 Subject: [PATCH 155/173] fix --- .github/workflows/integration_test.yaml | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) diff --git a/.github/workflows/integration_test.yaml b/.github/workflows/integration_test.yaml index fe49a89a..11314655 100644 --- a/.github/workflows/integration_test.yaml +++ b/.github/workflows/integration_test.yaml @@ -28,6 +28,14 @@ on: type: string required: false default: "make inference-pytorch-gpu" + log_level: + type: string + required: false + default: "ERROR" + log_format: + type: string + required: false + default: "%(asctime)s %(levelname)s %(module)s:%(lineno)d %(message)s" jobs: pytorch-integration-local: @@ -42,7 +50,7 @@ jobs: - name: Docker Setup Buildx uses: docker/setup-buildx-action@v3.0.0 - name: Docker Build - run: ${{ inputs.build_image_cmd }} + run: ${{ inputs.build_img_cmd }} - name: Set up Python 3.11 uses: actions/setup-python@v2 with: From c7a3cd0809032361e16633a934b0a093e6d562d5 Mon Sep 17 00:00:00 2001 From: Rafael Pierre Date: Wed, 28 Feb 2024 08:59:44 +0000 Subject: [PATCH 156/173] runs on; --- .github/workflows/gpu-integ-test.yaml | 27 ++++++++++++++++++++++--- .github/workflows/integration_test.yaml | 12 +++++++---- 2 files changed, 32 insertions(+), 7 deletions(-) diff --git a/.github/workflows/gpu-integ-test.yaml b/.github/workflows/gpu-integ-test.yaml index 6e67db69..cd194033 100644 --- a/.github/workflows/gpu-integ-test.yaml +++ b/.github/workflows/gpu-integ-test.yaml @@ -1,4 +1,4 @@ -name: Run GPU Integration Tests +name: Run Integration Tests on: push: @@ -14,8 +14,29 @@ concurrency: cancel-in-progress: true jobs: - pytorch-integration-local: + pytorch-integration-local-gpu: + name: Local Integration Tests - GPU uses: ./.github/workflows/integration_test.yaml with: test_path: "tests/integ/test_pytorch_local_gpu.py" - build_img_cmd: "make inference-pytorch-gpu" \ No newline at end of file + build_img_cmd: "make inference-pytorch-gpu" + pytorch-integration-remote-gpu: + name: Remote Integration Tests - GPU + uses: ./.github/workflows/integration_test.yaml + with: + test_path: "tests/integ/test_pytorch_remote_gpu.py" + build_img_cmd: "make inference-pytorch-gpu" + pytorch-integration-remote-cpu: + name: Remote Integration Tests - CPU + uses: ./.github/workflows/integration_test.yaml + with: + test_path: "tests/integ/test_pytorch_remote_gpu.py" + build_img_cmd: "make inference-pytorch-cpu" + runs_on: "['ci']" + pytorch-integration-local-cpu: + name: Local Integration Tests - CPU + uses: ./.github/workflows/integration_test.yaml + with: + test_path: "tests/integ/test_pytorch_local_cpu.py" + build_img_cmd: "make inference-pytorch-cpu" + runs_on: "['ci']" \ No newline at end of file diff --git a/.github/workflows/integration_test.yaml b/.github/workflows/integration_test.yaml index 11314655..d262ed8c 100644 --- a/.github/workflows/integration_test.yaml +++ b/.github/workflows/integration_test.yaml @@ -36,10 +36,14 @@ on: type: string required: false default: "%(asctime)s %(levelname)s %(module)s:%(lineno)d %(message)s" + runs_on: + type: string + required: false + default: "['single-gpu', 'nvidia-gpu', 't4', 'ci']" jobs: - pytorch-integration-local: - runs-on: [single-gpu, nvidia-gpu, t4, ci] + pytorch-integration-tests: + runs-on: ${{ fromJson(inputs.runs_on) }} env: AWS_REGION: ${{ inputs.region }} HF_HOME: ${{ inputs.hf_home }} @@ -61,5 +65,5 @@ jobs: run: | python -m pytest \ ${{ inputs.test_path }} -n ${{ inputs.test_parallelism }} \ - --log-cli-level=${{ inputs.log_level }} \ - --log-format=${{ inputs.log_format }} \ No newline at end of file + --log-cli-level='${{ inputs.log_level }}' \ + --log-format='${{ inputs.log_format }}' \ No newline at end of file From 0cccf692f6a8dea3b94f4e8a11d040bae700e21b Mon Sep 17 00:00:00 2001 From: Rafael Pierre Date: Wed, 28 Feb 2024 09:08:59 +0000 Subject: [PATCH 157/173] cpu --- .github/workflows/gpu-integ-test.yaml | 2 +- .gitignore | 1 + tox.ini | 147 -------------------------- 3 files changed, 2 insertions(+), 148 deletions(-) delete mode 100644 tox.ini diff --git a/.github/workflows/gpu-integ-test.yaml b/.github/workflows/gpu-integ-test.yaml index cd194033..4c151a99 100644 --- a/.github/workflows/gpu-integ-test.yaml +++ b/.github/workflows/gpu-integ-test.yaml @@ -30,7 +30,7 @@ jobs: name: Remote Integration Tests - CPU uses: ./.github/workflows/integration_test.yaml with: - test_path: "tests/integ/test_pytorch_remote_gpu.py" + test_path: "tests/integ/test_pytorch_remote_cpu.py" build_img_cmd: "make inference-pytorch-cpu" runs_on: "['ci']" pytorch-integration-local-cpu: diff --git a/.gitignore b/.gitignore index ab572a27..788b0ea1 100644 --- a/.gitignore +++ b/.gitignore @@ -37,6 +37,7 @@ __pycache__/ *$py.class .vscode .make +tox.ini # C extensions *.so diff --git a/tox.ini b/tox.ini deleted file mode 100644 index fc6787e7..00000000 --- a/tox.ini +++ /dev/null @@ -1,147 +0,0 @@ -[tox] -envlist = 311 -skipsdist = true -allowlist_externals = - pytest - -[testenv] -deps = - pytest - uv -allowlist_externals = - pytest - uv -commands_pre = - uv pip install -e ".[test]" -commands = pytest --version -setenv = - PYTHONPATH = . - -[testenv:lint] -basepython = python -commands = ruff src - -[testenv:fix] -basepython = python -commands = ruff src --fix - -[testenv:unit-torch] -install_command = - pip install -e ".[torch,st]" -allowlist_externals = - pytest -commands = - pytest -s -v \ - {tty:--color=yes} \ - tests/unit/test_const.py \ - tests/unit/test_handler.py \ - tests/unit/test_sentence_transformers.py \ - tests/unit/test_serializer.py \ - tests/unit/test_utils.py \ - {posargs} \ - --log-cli-level=INFO \ - --log-format='%(asctime)s %(levelname)s %(module)s:%(lineno)d %(message)s' -setenv = - PYTHONPATH=. - -[testenv:unit-torch-docker] -install_command = - uv pip install docker -allowlist_externals = - pytest - docker - uv -commands = - docker run \ - --gpus all \ - --entrypoint /bin/bash \ - integration-test-pytorch:gpu \ - -c "source .venv/bin/activate && pip install tox && cd /tmp/hf-inference-test && tox -e unit-torch-slow -- -n 10" - -[testenv:unit-torch-slow] -install_command = pip install -e ".[torch, st, diffusers]" -allowlist_externals = - pytest -commands = - pytest -s -v \ - {tty:--color=yes} \ - tests/unit/ {posargs} \ - --log-cli-level=DEBUG \ - --log-format='%(asctime)s %(levelname)s %(module)s:%(lineno)d %(message)s' -setenv = - RUN_SLOW=True - -[testenv:unit-tensorflow] -install_command = pip install -e ".[tensorflow, st]" -allowlist_externals = - pytest -commands = - pytest -s -v \ - {tty:--color=yes} \ - tests/unit/ {posargs} \ - --log-cli-level=DEBUG \ - --log-format='%(asctime)s %(levelname)s %(module)s:%(lineno)d %(message)s' - -[testenv:unit-tensorflow-slow] -install_command = pip install -e ".[tensorflow, st]" -allowlist_externals = pytest -commands = - pytest -s -v \ - {tty:--color=yes} \ - tests/unit/ {posargs} \ - --log-cli-level=ERROR \ - --log-format='%(asctime)s %(levelname)s %(module)s:%(lineno)d %(message)s' -setenv = - RUN_SLOW=True - -[testenv:torch-integration-remote-gpu] -install_command = pip install -e ".[torch]" -allowlist_externals = - pytest -commands = - pytest \ - {tty:--color=yes} \ - tests/integ/test_pytorch_remote_gpu.py {posargs} \ - --log-cli-level=ERROR \ - --log-format='%(asctime)s %(levelname)s %(module)s:%(lineno)d %(message)s' -setenv = - RUN_SLOW=True - -[testenv:torch-integration-remote-cpu] -install_command = pip install -e ".[torch]" -allowlist_externals = - pytest -commands = - pytest \ - {tty:--color=yes} \ - tests/integ/test_pytorch_remote_cpu.py {posargs} \ - --log-cli-level=INFO \ - --log-format='%(asctime)s %(levelname)s %(module)s:%(lineno)d %(message)s' - -[testenv:torch-integration-local-cpu] -install_command = pip install -e ".[torch]" -allowlist_externals = - pytest -commands = - pytest \ - {tty:--color=yes} \ - tests/integ/test_pytorch_local_cpu.py {posargs} \ - --log-cli-level=ERROR \ - --log-format='%(asctime)s %(levelname)s %(module)s:%(lineno)d %(message)s' -passenv = - HF_HUB_CACHE - -[testenv:torch-integration-local-gpu] -install_command = pip install -e ".[torch]" -allowlist_externals = - pytest -commands = - pytest \ - {tty:--color=yes} \ - tests/integ/test_pytorch_local_gpu.py {posargs} \ - --log-cli-level=ERROR \ - --log-format='%(asctime)s %(levelname)s %(module)s:%(lineno)d %(message)s' -setenv = - RUN_SLOW=True -passenv = - HF_HUB_CACHE \ No newline at end of file From 8d8e68a140cc65113929d0f347c88db00f124eed Mon Sep 17 00:00:00 2001 From: Rafael Pierre Date: Wed, 28 Feb 2024 09:51:21 +0000 Subject: [PATCH 158/173] unit tests --- .dockerignore | 4 +- .github/workflows/integ-test.yaml | 58 ------------------- ...test.yaml => integration-test-action.yaml} | 0 ...-integ-test.yaml => integration-test.yaml} | 8 +-- .github/workflows/quality.yaml | 12 ++-- .github/workflows/unit-test.yaml | 34 ++++++----- dockerfiles/pytorch/Dockerfile | 7 +-- 7 files changed, 36 insertions(+), 87 deletions(-) delete mode 100644 .github/workflows/integ-test.yaml rename .github/workflows/{integration_test.yaml => integration-test-action.yaml} (100%) rename .github/workflows/{gpu-integ-test.yaml => integration-test.yaml} (81%) diff --git a/.dockerignore b/.dockerignore index 93505b42..2cb0b490 100644 --- a/.dockerignore +++ b/.dockerignore @@ -5,4 +5,6 @@ .venv .gitignore makefile -__pycache__ \ No newline at end of file +__pycache__ +tests +.vscode diff --git a/.github/workflows/integ-test.yaml b/.github/workflows/integ-test.yaml deleted file mode 100644 index d0c3e48b..00000000 --- a/.github/workflows/integ-test.yaml +++ /dev/null @@ -1,58 +0,0 @@ -name: Run CPU Integration Tests - -on: - #push: - # paths-ignore: - # - 'README.md' - # branches: - # - main - #pull_request: - workflow_dispatch: - -env: - ACTIONS_RUNNER_DEBUG: true - ACTIONS_STEP_DEBUG: true - -concurrency: - group: ${{ github.workflow }}-${{ github.head_ref || github.run_id }} - cancel-in-progress: true - -jobs: - pytorch-integration-local: - runs-on: [single-gpu, nvidia-gpu, t4, ci] - env: - AWS_REGION: us-east-1 - HF_HOME: /mnt/hf_cache/ - HF_HUB_CACHE: /mnt/hf_cache/hub - steps: - - uses: actions/checkout@v4.1.1 - - name: Docker Setup Buildx - uses: docker/setup-buildx-action@v3.0.0 - - name: Docker Build - run: make inference-pytorch-cpu - - name: Set up Python 3.11 - uses: actions/setup-python@v2 - with: - python-version: 3.11 - - name: Install tox & uv - run: pip install uv tox - - name: Run local integration tests - run: tox -e torch-integration-local-cpu -- -n 4 - pytorch-integration-remote: - runs-on: [single-gpu, nvidia-gpu, t4, ci] - env: - AWS_REGION: us-east-1 - steps: - - uses: actions/checkout@v4.1.1 - - name: Docker Setup Buildx - uses: docker/setup-buildx-action@v3.0.0 - - name: Docker Build - run: make inference-pytorch-cpu - - name: Set up Python 3.11 - uses: actions/setup-python@v2 - with: - python-version: 3.11 - - name: Install tox & uv - run: pip install uv tox - - name: Run remote integration tests - run: tox -e torch-integration-remote-cpu -- -n 4 \ No newline at end of file diff --git a/.github/workflows/integration_test.yaml b/.github/workflows/integration-test-action.yaml similarity index 100% rename from .github/workflows/integration_test.yaml rename to .github/workflows/integration-test-action.yaml diff --git a/.github/workflows/gpu-integ-test.yaml b/.github/workflows/integration-test.yaml similarity index 81% rename from .github/workflows/gpu-integ-test.yaml rename to .github/workflows/integration-test.yaml index 4c151a99..5f5e03e3 100644 --- a/.github/workflows/gpu-integ-test.yaml +++ b/.github/workflows/integration-test.yaml @@ -16,26 +16,26 @@ concurrency: jobs: pytorch-integration-local-gpu: name: Local Integration Tests - GPU - uses: ./.github/workflows/integration_test.yaml + uses: ./.github/workflows/integration-test-action.yaml with: test_path: "tests/integ/test_pytorch_local_gpu.py" build_img_cmd: "make inference-pytorch-gpu" pytorch-integration-remote-gpu: name: Remote Integration Tests - GPU - uses: ./.github/workflows/integration_test.yaml + uses: ./.github/workflows/integration-test-action.yaml with: test_path: "tests/integ/test_pytorch_remote_gpu.py" build_img_cmd: "make inference-pytorch-gpu" pytorch-integration-remote-cpu: name: Remote Integration Tests - CPU - uses: ./.github/workflows/integration_test.yaml + uses: ./.github/workflows/integration-test-action.yaml with: test_path: "tests/integ/test_pytorch_remote_cpu.py" build_img_cmd: "make inference-pytorch-cpu" runs_on: "['ci']" pytorch-integration-local-cpu: name: Local Integration Tests - CPU - uses: ./.github/workflows/integration_test.yaml + uses: ./.github/workflows/integration-test-action.yaml with: test_path: "tests/integ/test_pytorch_local_cpu.py" build_img_cmd: "make inference-pytorch-cpu" diff --git a/.github/workflows/quality.yaml b/.github/workflows/quality.yaml index fb34bfeb..09929fde 100644 --- a/.github/workflows/quality.yaml +++ b/.github/workflows/quality.yaml @@ -1,12 +1,12 @@ name: Quality Check on: - #push: - # paths-ignore: - # - 'README.md' - # branches: - # - main - #pull_request: + push: + paths-ignore: + - 'README.md' + branches: + - main + pull_request: workflow_dispatch: concurrency: diff --git a/.github/workflows/unit-test.yaml b/.github/workflows/unit-test.yaml index a3432399..1173dc7a 100644 --- a/.github/workflows/unit-test.yaml +++ b/.github/workflows/unit-test.yaml @@ -1,12 +1,12 @@ name: Run Unit-Tests on: - #push: - # paths-ignore: - # - 'README.md' - # branches: - # - main - #pull_request: + push: + paths-ignore: + - 'README.md' + branches: + - main + pull_request: workflow_dispatch: env: @@ -22,17 +22,25 @@ jobs: runs-on: [single-gpu, nvidia-gpu, t4, ci] env: AWS_REGION: us-east-1 + CACHE_TEST_DIR: /mnt/hf_cache/hf-inference-toolkit-tests steps: - uses: actions/checkout@v4.1.1 + - name: Copy unit tests to cache mount + run: | + rm -rf ${{ env.CACHE_TEST_DIR }} && \ + mkdir ${{ env.CACHE_TEST_DIR }} && \ + cp tests/unit ${{ env.CACHE_TEST_DIR }} - name: Docker Setup Buildx uses: docker/setup-buildx-action@v3.0.0 - name: Docker Build run: make inference-pytorch-gpu - - name: Set up Python 3.11 - uses: actions/setup-python@v2 - with: - python-version: 3.11 - - name: Install tox & uv - run: pip install uv tox - name: Run unit tests - run: tox -e unit-torch-docker \ No newline at end of file + env: + RUN_SLOW: True + run: | + docker run \ + -v ./tests:${{ env.CACHE_TEST_DIR }} \ + --entrypoint /bin/bash \ + integration-test-pytorch:gpu \ + -c "source .venv/bin/activate && pip install '.[test]' && pytest ${{ env.CACHE_TEST_DIR }}" + \ No newline at end of file diff --git a/dockerfiles/pytorch/Dockerfile b/dockerfiles/pytorch/Dockerfile index 97127b4e..28a188d2 100644 --- a/dockerfiles/pytorch/Dockerfile +++ b/dockerfiles/pytorch/Dockerfile @@ -9,8 +9,6 @@ ENV DEBIAN_FRONTEND=noninteractive WORKDIR /app -COPY . . - RUN apt-get update && \ apt-get install software-properties-common -y && \ add-apt-repository ppa:deadsnakes/ppa && \ @@ -35,6 +33,8 @@ RUN apt-get update && \ && apt-get clean autoremove --yes \ && rm -rf /var/lib/{apt,dpkg,cache,log} +COPY . . + # install wheel and setuptools RUN python3 -m venv .venv && \ source .venv/bin/activate && \ @@ -44,9 +44,6 @@ RUN python3 -m venv .venv && \ COPY src/huggingface_inference_toolkit huggingface_inference_toolkit COPY src/huggingface_inference_toolkit/webservice_starlette.py webservice_starlette.py -#unit tests - tmp dir gets removed in entrypoint.sh -COPY . /tmp/hf-inference-test - # copy entrypoint and change permissions COPY scripts/entrypoint.sh entrypoint.sh RUN chmod +x entrypoint.sh From 3996bd46ad9e6e7b46cd68340554d0632b018b62 Mon Sep 17 00:00:00 2001 From: Rafael Pierre Date: Wed, 28 Feb 2024 09:52:44 +0000 Subject: [PATCH 159/173] -r --- .github/workflows/unit-test.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/unit-test.yaml b/.github/workflows/unit-test.yaml index 1173dc7a..3601450c 100644 --- a/.github/workflows/unit-test.yaml +++ b/.github/workflows/unit-test.yaml @@ -29,7 +29,7 @@ jobs: run: | rm -rf ${{ env.CACHE_TEST_DIR }} && \ mkdir ${{ env.CACHE_TEST_DIR }} && \ - cp tests/unit ${{ env.CACHE_TEST_DIR }} + cp -r tests/unit ${{ env.CACHE_TEST_DIR }} - name: Docker Setup Buildx uses: docker/setup-buildx-action@v3.0.0 - name: Docker Build From 7251139d71dd8e27fc3aa968d0a7730f37ff056a Mon Sep 17 00:00:00 2001 From: Rafael Pierre Date: Wed, 28 Feb 2024 09:53:28 +0000 Subject: [PATCH 160/173] ignore --- .github/workflows/integration-test.yaml | 2 ++ 1 file changed, 2 insertions(+) diff --git a/.github/workflows/integration-test.yaml b/.github/workflows/integration-test.yaml index 5f5e03e3..7aa1aa2f 100644 --- a/.github/workflows/integration-test.yaml +++ b/.github/workflows/integration-test.yaml @@ -4,6 +4,8 @@ on: push: paths-ignore: - 'README.md' + - '.github/workflows/unit-test.yaml' + - '.github/workflows/quality.yaml' branches: - main pull_request: From 819cd33ced8c100c1af04a8481cb3a2f90116d25 Mon Sep 17 00:00:00 2001 From: Rafael Pierre Date: Wed, 28 Feb 2024 10:12:39 +0000 Subject: [PATCH 161/173] path --- .github/workflows/unit-test.yaml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/.github/workflows/unit-test.yaml b/.github/workflows/unit-test.yaml index 3601450c..551453ad 100644 --- a/.github/workflows/unit-test.yaml +++ b/.github/workflows/unit-test.yaml @@ -29,7 +29,7 @@ jobs: run: | rm -rf ${{ env.CACHE_TEST_DIR }} && \ mkdir ${{ env.CACHE_TEST_DIR }} && \ - cp -r tests/unit ${{ env.CACHE_TEST_DIR }} + cp -r tests ${{ env.CACHE_TEST_DIR }} - name: Docker Setup Buildx uses: docker/setup-buildx-action@v3.0.0 - name: Docker Build @@ -42,5 +42,5 @@ jobs: -v ./tests:${{ env.CACHE_TEST_DIR }} \ --entrypoint /bin/bash \ integration-test-pytorch:gpu \ - -c "source .venv/bin/activate && pip install '.[test]' && pytest ${{ env.CACHE_TEST_DIR }}" + -c "source .venv/bin/activate && pip install '.[test]' && pytest ${{ env.CACHE_TEST_DIR }}/unit" \ No newline at end of file From b11741f6277fa60117fe6764ec65c9ad8bada36d Mon Sep 17 00:00:00 2001 From: Rafael Pierre Date: Wed, 28 Feb 2024 11:12:34 +0000 Subject: [PATCH 162/173] cache --- .github/workflows/unit-test.yaml | 4 ++-- makefile | 6 +++--- tests/unit/conftest.py | 7 +++++++ tests/unit/test_serializer.py | 18 ++++++++++++------ 4 files changed, 24 insertions(+), 11 deletions(-) create mode 100644 tests/unit/conftest.py diff --git a/.github/workflows/unit-test.yaml b/.github/workflows/unit-test.yaml index 551453ad..cf18e6e0 100644 --- a/.github/workflows/unit-test.yaml +++ b/.github/workflows/unit-test.yaml @@ -23,6 +23,7 @@ jobs: env: AWS_REGION: us-east-1 CACHE_TEST_DIR: /mnt/hf_cache/hf-inference-toolkit-tests + RUN_SLOW: True steps: - uses: actions/checkout@v4.1.1 - name: Copy unit tests to cache mount @@ -35,10 +36,9 @@ jobs: - name: Docker Build run: make inference-pytorch-gpu - name: Run unit tests - env: - RUN_SLOW: True run: | docker run \ + -e RUN_SLOW='${{ env.RUN_SLOW }}' -v ./tests:${{ env.CACHE_TEST_DIR }} \ --entrypoint /bin/bash \ integration-test-pytorch:gpu \ diff --git a/makefile b/makefile index ab1961a7..84cfe0c0 100644 --- a/makefile +++ b/makefile @@ -5,10 +5,10 @@ check_dirs := src # run tests unit-test: - python3 -m pytest -s -v ./tests/unit + python3 -m pytest -s -v tests/unit -n 10 --log-cli-level='DEBUG' -integ-test: - python3 -m pytest -s -v ./tests/integ/ +integ-test: d + python3 -m pytest -s -v tests/integ/ # Check that source code meets quality standards diff --git a/tests/unit/conftest.py b/tests/unit/conftest.py new file mode 100644 index 00000000..ddba0442 --- /dev/null +++ b/tests/unit/conftest.py @@ -0,0 +1,7 @@ +import os +import logging +import pytest + +@pytest.fixture(scope = "session") +def cache_test_dir(): + yield os.environ.get("CACHE_TEST_DIR", "./tests") \ No newline at end of file diff --git a/tests/unit/test_serializer.py b/tests/unit/test_serializer.py index 98e528e5..8488347d 100644 --- a/tests/unit/test_serializer.py +++ b/tests/unit/test_serializer.py @@ -3,9 +3,13 @@ import numpy as np import pytest import os -from huggingface_inference_toolkit.serialization import Jsoner, Audioer, Imager +from huggingface_inference_toolkit.serialization import ( + Jsoner, + Audioer, + Imager +) from PIL import Image - +import logging def test_json_serialization(): t = {"res": np.array([2.0]), "text": "I like you.", "float": 1.2} @@ -30,9 +34,10 @@ def test_json_deserialization(): raw_content = b'{\n\t"inputs": "i like you"\n}' assert {"inputs": "i like you"} == Jsoner.deserialize(raw_content) +@pytest.mark.usefixtures('cache_test_dir') +def test_image_deserialization(cache_test_dir): -def test_image_deserialization(): - image_files_path = os.path.join(os.getcwd(), "tests/resources/image") + image_files_path = f"{cache_test_dir}/resources/image" for image_file in os.listdir(image_files_path): image_bytes = open(os.path.join(image_files_path, image_file), "rb").read() @@ -41,9 +46,10 @@ def test_image_deserialization(): assert isinstance(decoded_data, dict) assert isinstance(decoded_data["inputs"], Image.Image) +@pytest.mark.usefixtures('cache_test_dir') +def test_audio_deserialization(cache_test_dir): -def test_audio_deserialization(): - audio_files_path = os.path.join(os.getcwd(), "tests/resources/audio") + audio_files_path = f"{cache_test_dir}/resources/audio" for audio_file in os.listdir(audio_files_path): audio_bytes = open(os.path.join(audio_files_path, audio_file), "rb").read() From e8cab4be2fb823aace69e5285bc4ca1e1a345bd7 Mon Sep 17 00:00:00 2001 From: Rafael Pierre Date: Wed, 28 Feb 2024 11:24:13 +0000 Subject: [PATCH 163/173] backslash --- .github/workflows/unit-test.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/unit-test.yaml b/.github/workflows/unit-test.yaml index cf18e6e0..bcbbeca0 100644 --- a/.github/workflows/unit-test.yaml +++ b/.github/workflows/unit-test.yaml @@ -38,7 +38,7 @@ jobs: - name: Run unit tests run: | docker run \ - -e RUN_SLOW='${{ env.RUN_SLOW }}' + -e RUN_SLOW='${{ env.RUN_SLOW }}' \ -v ./tests:${{ env.CACHE_TEST_DIR }} \ --entrypoint /bin/bash \ integration-test-pytorch:gpu \ From 35f92bc87ac021933a4af22086228ff82dd6a7c2 Mon Sep 17 00:00:00 2001 From: Rafael Pierre Date: Wed, 28 Feb 2024 11:25:06 +0000 Subject: [PATCH 164/173] st, diffusers --- .github/workflows/unit-test.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/unit-test.yaml b/.github/workflows/unit-test.yaml index bcbbeca0..c50a1da1 100644 --- a/.github/workflows/unit-test.yaml +++ b/.github/workflows/unit-test.yaml @@ -42,5 +42,5 @@ jobs: -v ./tests:${{ env.CACHE_TEST_DIR }} \ --entrypoint /bin/bash \ integration-test-pytorch:gpu \ - -c "source .venv/bin/activate && pip install '.[test]' && pytest ${{ env.CACHE_TEST_DIR }}/unit" + -c "source .venv/bin/activate && pip install '.[test, st, diffusers]' && pytest ${{ env.CACHE_TEST_DIR }}/unit" \ No newline at end of file From 00503c378b4b225affe014d77059f76cc3c978f0 Mon Sep 17 00:00:00 2001 From: Rafael Pierre Date: Wed, 28 Feb 2024 11:45:19 +0000 Subject: [PATCH 165/173] cache test dir --- .github/workflows/unit-test.yaml | 1 + 1 file changed, 1 insertion(+) diff --git a/.github/workflows/unit-test.yaml b/.github/workflows/unit-test.yaml index c50a1da1..f1e294f8 100644 --- a/.github/workflows/unit-test.yaml +++ b/.github/workflows/unit-test.yaml @@ -39,6 +39,7 @@ jobs: run: | docker run \ -e RUN_SLOW='${{ env.RUN_SLOW }}' \ + -e CACHE_TEST_DIR='${{ env.CACHE_TEST_DIR }}' \ -v ./tests:${{ env.CACHE_TEST_DIR }} \ --entrypoint /bin/bash \ integration-test-pytorch:gpu \ From b34d99101356fe8a795c9cca3783f0af18f4235f Mon Sep 17 00:00:00 2001 From: Rafael Pierre Date: Wed, 28 Feb 2024 11:59:14 +0000 Subject: [PATCH 166/173] gpus --- .github/workflows/unit-test.yaml | 1 + tests/unit/test_utils.py | 10 ++-------- 2 files changed, 3 insertions(+), 8 deletions(-) diff --git a/.github/workflows/unit-test.yaml b/.github/workflows/unit-test.yaml index f1e294f8..9e59915d 100644 --- a/.github/workflows/unit-test.yaml +++ b/.github/workflows/unit-test.yaml @@ -39,6 +39,7 @@ jobs: run: | docker run \ -e RUN_SLOW='${{ env.RUN_SLOW }}' \ + --gpus all \ -e CACHE_TEST_DIR='${{ env.CACHE_TEST_DIR }}' \ -v ./tests:${{ env.CACHE_TEST_DIR }} \ --entrypoint /bin/bash \ diff --git a/tests/unit/test_utils.py b/tests/unit/test_utils.py index 856824a1..c5ee1028 100644 --- a/tests/unit/test_utils.py +++ b/tests/unit/test_utils.py @@ -124,7 +124,7 @@ def test_get_pipeline(): @require_torch -def test_whisper_long_audio(): +def test_whisper_long_audio(cache_test_dir): with tempfile.TemporaryDirectory() as tmpdirname: storage_dir = _load_repository_from_hf( repository_id = "openai/whisper-tiny", @@ -140,13 +140,7 @@ def test_whisper_long_audio(): model_dir = storage_dir.as_posix(), framework = "safetensors" ) - res = pipe( - os.path.join( - os.getcwd(), - "tests/resources/audio", - "long_sample.mp3" - ) - ) + res = pipe(f"{cache_test_dir}/resources/audio/long_sample.mp3") assert len(res["text"]) > 700 From d8a60d1beb45386c3b1e61bee51c89f4c93725e4 Mon Sep 17 00:00:00 2001 From: Rafael Pierre Date: Wed, 28 Feb 2024 12:07:56 +0000 Subject: [PATCH 167/173] custom pipeline path --- tests/unit/test_utils.py | 12 ++++++++---- 1 file changed, 8 insertions(+), 4 deletions(-) diff --git a/tests/unit/test_utils.py b/tests/unit/test_utils.py index c5ee1028..53c9cada 100644 --- a/tests/unit/test_utils.py +++ b/tests/unit/test_utils.py @@ -203,8 +203,8 @@ def test_wrapped_pipeline(): assert "error" not in res[-1]["content"] -def test_local_custom_pipeline(): - model_dir = os.path.join(os.getcwd(), "tests/resources/custom_handler") +def test_local_custom_pipeline(cache_test_dir): + model_dir = f"{cache_test_dir}/resources/custom_handler" pipeline = check_and_register_custom_pipeline_from_directory(model_dir) payload = "test" assert pipeline.path == model_dir @@ -214,7 +214,9 @@ def test_local_custom_pipeline(): def test_remote_custom_pipeline(): with tempfile.TemporaryDirectory() as tmpdirname: storage_dir = _load_repository_from_hf( - "philschmid/custom-pipeline-text-classification", tmpdirname, framework="pytorch" + "philschmid/custom-pipeline-text-classification", + tmpdirname, + framework="pytorch" ) pipeline = check_and_register_custom_pipeline_from_directory(str(storage_dir)) payload = "test" @@ -225,7 +227,9 @@ def test_remote_custom_pipeline(): def test_get_inference_handler_either_custom_or_default_pipeline(): with tempfile.TemporaryDirectory() as tmpdirname: storage_dir = _load_repository_from_hf( - "philschmid/custom-pipeline-text-classification", tmpdirname, framework="pytorch" + "philschmid/custom-pipeline-text-classification", + tmpdirname, + framework="pytorch" ) pipeline = get_inference_handler_either_custom_or_default_handler(str(storage_dir)) payload = "test" From 5b55a66c23902e6a793dd8f86e33570f7fd18d0d Mon Sep 17 00:00:00 2001 From: Rafael Pierre Date: Wed, 28 Feb 2024 15:29:36 +0000 Subject: [PATCH 168/173] fix --- .../workflows/integration-test-action.yaml | 4 +- .github/workflows/unit-test.yaml | 2 +- .gitignore | 4 +- README.md | 49 ------------------- dockerfiles/pytorch/Dockerfile | 8 ++- pyproject.toml | 1 + scripts/entrypoint.sh | 3 -- setup.py | 5 +- src/huggingface_inference_toolkit/utils.py | 22 ++++----- 9 files changed, 25 insertions(+), 73 deletions(-) diff --git a/.github/workflows/integration-test-action.yaml b/.github/workflows/integration-test-action.yaml index d262ed8c..2e3479fc 100644 --- a/.github/workflows/integration-test-action.yaml +++ b/.github/workflows/integration-test-action.yaml @@ -39,7 +39,7 @@ on: runs_on: type: string required: false - default: "['single-gpu', 'nvidia-gpu', 't4', 'ci']" + default: '["single-gpu", "nvidia-gpu", "t4", "ci"]' jobs: pytorch-integration-tests: @@ -60,7 +60,7 @@ jobs: with: python-version: 3.11 - name: Install dependencies - run: pip install -e ".[torch, test]" + run: pip install ".[torch, test]" - name: Run local integration tests run: | python -m pytest \ diff --git a/.github/workflows/unit-test.yaml b/.github/workflows/unit-test.yaml index 9e59915d..a15cca96 100644 --- a/.github/workflows/unit-test.yaml +++ b/.github/workflows/unit-test.yaml @@ -44,5 +44,5 @@ jobs: -v ./tests:${{ env.CACHE_TEST_DIR }} \ --entrypoint /bin/bash \ integration-test-pytorch:gpu \ - -c "source .venv/bin/activate && pip install '.[test, st, diffusers]' && pytest ${{ env.CACHE_TEST_DIR }}/unit" + -c "pip install '.[test, st, diffusers]' && pytest ${{ env.CACHE_TEST_DIR }}/unit" \ No newline at end of file diff --git a/.gitignore b/.gitignore index 788b0ea1..bb0c387b 100644 --- a/.gitignore +++ b/.gitignore @@ -177,4 +177,6 @@ cython_debug/ .sagemaker model tests/tmp -tmp/ \ No newline at end of file +tmp/ +act.sh +.act \ No newline at end of file diff --git a/README.md b/README.md index 52ef20f1..92346440 100644 --- a/README.md +++ b/README.md @@ -195,55 +195,6 @@ Below you ll find a list of supported and tested transformers and sentence trans * Unit Testing: `make unit-test` * Integration testing: `make integ-test` -#### Testing with Tox - -#### Unit Testing - -* Install `tox` -* From a Linux terminal, run: -```bash -tox -e unit-torch -# Or -tox -e unit-tensorflow -``` -* You can increase the degree of test parallelism by passing `-n`: -```bash -tox -e unit-torch -- -n 4 -``` - -#### Integration Testing - -* There are two types of integration tests: **local** and **remote** -* **Local** tests simulate a scenario where users bring their own model which was previously downloaded and stored externally -* **Remote** tests simulate a scenario where models are download on the fly, as part of container startup - -##### Local Integration Testing - -* Build the relevant docker image -* To run local integration tests, before running `tox`, we need to create a mount point which will store model artifacts. Example: - -```bash -sudo mount --bind /home/ubuntu/.cache/huggingface/ /mnt/hf_cache/ -``` - -* Make sure that permissions are sufficient for the mount point you created -* Then, run: -```bash -tox -e torch-integration-local-gpu -# Or -tox -e torch-integration-local-cpu -``` - -##### Remote Integration Testing - -* Build the relevant docker image -* From a Linux terminal, run: -```bash -tox -e torch-integration-remote-gpu -#Or -tox -e torch-integration-remote-cpu -``` - --- ## 📜 License diff --git a/dockerfiles/pytorch/Dockerfile b/dockerfiles/pytorch/Dockerfile index 28a188d2..ebb39247 100644 --- a/dockerfiles/pytorch/Dockerfile +++ b/dockerfiles/pytorch/Dockerfile @@ -25,8 +25,8 @@ RUN apt-get update && \ cmake \ libprotobuf-dev \ protobuf-compiler \ - python3-venv \ python3-dev \ + python3-pip \ python3.11 \ libsndfile1-dev \ ffmpeg \ @@ -36,9 +36,7 @@ RUN apt-get update && \ COPY . . # install wheel and setuptools -RUN python3 -m venv .venv && \ - source .venv/bin/activate && \ - pip install --no-cache-dir -U pip -e ".[torch, st, diffusers]" +RUN pip install --no-cache-dir -U pip ".[torch, st, diffusers]" # copy application COPY src/huggingface_inference_toolkit huggingface_inference_toolkit @@ -48,4 +46,4 @@ COPY src/huggingface_inference_toolkit/webservice_starlette.py webservice_starle COPY scripts/entrypoint.sh entrypoint.sh RUN chmod +x entrypoint.sh -ENTRYPOINT ["bash", "-c", "source .venv/bin/activate && ./entrypoint.sh"] \ No newline at end of file +ENTRYPOINT ["bash", "-c", "./entrypoint.sh"] \ No newline at end of file diff --git a/pyproject.toml b/pyproject.toml index 56184a96..a692967f 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -13,6 +13,7 @@ lint.select = [ "B", # flake8-bugbear ] lint.ignore = [ + "E501", # Line length (handled by ruff-format) "B008", # do not perform function calls in argument defaults "C901", # too complex ] diff --git a/scripts/entrypoint.sh b/scripts/entrypoint.sh index 60f96f2b..8544a63c 100644 --- a/scripts/entrypoint.sh +++ b/scripts/entrypoint.sh @@ -1,8 +1,5 @@ # /bin/bash -#cleanup tempdir -rm -rf /tmp/hf-inference-test && rm -rf /app/tests - # check if HF_MODEL_DIR is set and if not skip installing custom dependencies if [[ ! -z "${HF_MODEL_DIR}" ]]; then # check if requirements.txt exists and if so install dependencies diff --git a/setup.py b/setup.py index 768ce70c..bdd64fba 100644 --- a/setup.py +++ b/setup.py @@ -20,11 +20,14 @@ "transformers[sklearn,sentencepiece, audio, vision]==4.38.1", "huggingface_hub==0.20.3", "orjson", + # vision "Pillow", "librosa", + # speech + torchaudio "pyctcdecode>=0.3.0", "phonemizer", "ffmpeg", + # web api "starlette", "uvicorn", "pandas" @@ -32,7 +35,7 @@ extras = {} -extras["st"] = ["sentence_transformers==2.3.1"] +extras["st"] = ["sentence_transformers==2.4.0"] extras["diffusers"] = ["diffusers==0.26.3", "accelerate==0.27.2"] extras["torch"] = ["torch==2.2.0", "torchvision", "torchaudio"] extras["tensorflow"] = ["tensorflow"] diff --git a/src/huggingface_inference_toolkit/utils.py b/src/huggingface_inference_toolkit/utils.py index bd267c8e..6c7f707e 100644 --- a/src/huggingface_inference_toolkit/utils.py +++ b/src/huggingface_inference_toolkit/utils.py @@ -75,11 +75,11 @@ def wrap_conversation_pipeline(pipeline): """ def wrapped_pipeline(inputs, *args, **kwargs): - logging.info(f"Inputs: {inputs}") - logging.info(f"Args: {args}") - logging.info(f"KWArgs: {kwargs}") + logger.info(f"Inputs: {inputs}") + logger.info(f"Args: {args}") + logger.info(f"KWArgs: {kwargs}") prediction = pipeline(inputs, *args, **kwargs) - logging.info(f"Prediction: {prediction}") + logger.info(f"Prediction: {prediction}") return list(prediction) @@ -151,7 +151,7 @@ def _load_repository_from_hf( # create regex to only include the framework specific weights ignore_regex = create_artifact_filter(framework) - logging.info(f"Ignore regex pattern for files, which are not downloaded: { ', '.join(ignore_regex) }") + logger.info(f"Ignore regex pattern for files, which are not downloaded: { ', '.join(ignore_regex) }") # Download the repository to the workdir and filter out non-framework # specific weights @@ -174,7 +174,7 @@ def check_and_register_custom_pipeline_from_directory(model_dir): custom_module = Path(model_dir).joinpath(HF_DEFAULT_PIPELINE_NAME) legacy_module = Path(model_dir).joinpath("pipeline.py") if custom_module.is_file(): - logging.info(f"Found custom pipeline at {custom_module}") + logger.info(f"Found custom pipeline at {custom_module}") spec = importlib.util.spec_from_file_location(HF_MODULE_NAME, custom_module) if spec: # add the whole directory to path for submodlues @@ -187,7 +187,7 @@ def check_and_register_custom_pipeline_from_directory(model_dir): custom_pipeline = handler.EndpointHandler(model_dir) elif legacy_module.is_file(): - logging.warning( + logger.warning( """You are using a legacy custom pipeline. Please update to the new format. See documentation for more information.""" @@ -203,7 +203,7 @@ def check_and_register_custom_pipeline_from_directory(model_dir): # init custom handler with model_dir custom_pipeline = pipeline.PreTrainedPipeline(model_dir) else: - logging.info(f"No custom pipeline found at {custom_module}") + logger.info(f"No custom pipeline found at {custom_module}") custom_pipeline = None return custom_pipeline @@ -213,7 +213,7 @@ def get_device(): The get device function will return the device for the DL Framework. """ gpu = _is_gpu_available() - logging.info(f"GPU Available: {gpu}") + logger.info(f"GPU Available: {gpu}") if gpu: return 0 @@ -231,7 +231,7 @@ def get_pipeline( create pipeline class for a specific task based on local saved model """ device = get_device() - logging.info(f"Using device { 'GPU' if device == 0 else 'CPU'}") + logger.info(f"Using device { 'GPU' if device == 0 else 'CPU'}") if task is None: raise EnvironmentError( @@ -254,7 +254,7 @@ def get_pipeline( kwargs["tokenizer"] = model_dir if is_optimum_available(): - logging.info("Optimum is not implemented yet using default pipeline.") + logger.info("Optimum is not implemented yet using default pipeline.") hf_pipeline = pipeline(task=task, model=model_dir, device=device, **kwargs) elif is_sentence_transformers_available() and task in [ "sentence-similarity", From 088a2d892014127b55c4370a6c5083d1f2886894 Mon Sep 17 00:00:00 2001 From: Rafael Pierre Date: Wed, 28 Feb 2024 16:16:27 +0000 Subject: [PATCH 169/173] payload --- makefile | 2 +- src/huggingface_inference_toolkit/utils.py | 2 +- tests/unit/test_utils.py | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/makefile b/makefile index 84cfe0c0..80acc2e1 100644 --- a/makefile +++ b/makefile @@ -7,7 +7,7 @@ check_dirs := src unit-test: python3 -m pytest -s -v tests/unit -n 10 --log-cli-level='DEBUG' -integ-test: d +integ-test: python3 -m pytest -s -v tests/integ/ # Check that source code meets quality standards diff --git a/src/huggingface_inference_toolkit/utils.py b/src/huggingface_inference_toolkit/utils.py index 6c7f707e..43948b82 100644 --- a/src/huggingface_inference_toolkit/utils.py +++ b/src/huggingface_inference_toolkit/utils.py @@ -82,7 +82,6 @@ def wrapped_pipeline(inputs, *args, **kwargs): logger.info(f"Prediction: {prediction}") return list(prediction) - return wrapped_pipeline @@ -285,6 +284,7 @@ def get_pipeline( # wrap specific pipeline to support better ux if task == "conversational": hf_pipeline = wrap_conversation_pipeline(hf_pipeline) + elif task == "automatic-speech-recognition" and isinstance( hf_pipeline.model, WhisperForConditionalGeneration diff --git a/tests/unit/test_utils.py b/tests/unit/test_utils.py index 53c9cada..79cff93d 100644 --- a/tests/unit/test_utils.py +++ b/tests/unit/test_utils.py @@ -197,7 +197,7 @@ def test_wrapped_pipeline(): "content": "Can you explain why?" } ] - res = conv_pipe(data) + res = conv_pipe(data, max_new_tokens = 100) logging.info(f"Response: {res}") assert res[-1]["role"] == "assistant" assert "error" not in res[-1]["content"] From c628acb85a9733661dc2ad2823b3b4dbe4ad3285 Mon Sep 17 00:00:00 2001 From: Rafael Pierre Date: Wed, 28 Feb 2024 16:33:06 +0000 Subject: [PATCH 170/173] final comments --- makefile | 2 +- src/huggingface_inference_toolkit/utils.py | 1 - tests/unit/test_serializer.py | 1 - 3 files changed, 1 insertion(+), 3 deletions(-) diff --git a/makefile b/makefile index 80acc2e1..a9490428 100644 --- a/makefile +++ b/makefile @@ -5,7 +5,7 @@ check_dirs := src # run tests unit-test: - python3 -m pytest -s -v tests/unit -n 10 --log-cli-level='DEBUG' + RUN_SLOW=True python3 -m pytest -s -v tests/unit -n 10 --log-cli-level='ERROR' integ-test: python3 -m pytest -s -v tests/integ/ diff --git a/src/huggingface_inference_toolkit/utils.py b/src/huggingface_inference_toolkit/utils.py index 43948b82..1570317b 100644 --- a/src/huggingface_inference_toolkit/utils.py +++ b/src/huggingface_inference_toolkit/utils.py @@ -212,7 +212,6 @@ def get_device(): The get device function will return the device for the DL Framework. """ gpu = _is_gpu_available() - logger.info(f"GPU Available: {gpu}") if gpu: return 0 diff --git a/tests/unit/test_serializer.py b/tests/unit/test_serializer.py index 8488347d..07dfd5c1 100644 --- a/tests/unit/test_serializer.py +++ b/tests/unit/test_serializer.py @@ -9,7 +9,6 @@ Imager ) from PIL import Image -import logging def test_json_serialization(): t = {"res": np.array([2.0]), "text": "I like you.", "float": 1.2} From 50bea980146888c1671d3babd9f36ceb1ea7711c Mon Sep 17 00:00:00 2001 From: Rafael Pierre <159796999+rafaelpierrehf@users.noreply.github.com> Date: Wed, 28 Feb 2024 17:40:12 +0100 Subject: [PATCH 171/173] Update dockerfiles/pytorch/Dockerfile Co-authored-by: Philipp Schmid <32632186+philschmid@users.noreply.github.com> --- dockerfiles/pytorch/Dockerfile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/dockerfiles/pytorch/Dockerfile b/dockerfiles/pytorch/Dockerfile index ebb39247..a0c7b0c4 100644 --- a/dockerfiles/pytorch/Dockerfile +++ b/dockerfiles/pytorch/Dockerfile @@ -32,7 +32,7 @@ RUN apt-get update && \ ffmpeg \ && apt-get clean autoremove --yes \ && rm -rf /var/lib/{apt,dpkg,cache,log} - +# Copying only necessary files as filtered by .dockerignore COPY . . # install wheel and setuptools From 0b93a74bae1376cfa35081047762a899b11041ce Mon Sep 17 00:00:00 2001 From: Rafael Pierre <159796999+rafaelpierrehf@users.noreply.github.com> Date: Wed, 28 Feb 2024 17:40:38 +0100 Subject: [PATCH 172/173] Update README.md Co-authored-by: Philipp Schmid <32632186+philschmid@users.noreply.github.com> --- README.md | 1 - 1 file changed, 1 deletion(-) diff --git a/README.md b/README.md index 92346440..f2f66b40 100644 --- a/README.md +++ b/README.md @@ -188,7 +188,6 @@ Below you ll find a list of supported and tested transformers and sentence trans * We recommend `pyenv` for easily switching between different Python versions * There are two options for unit and integration tests: * `Make` - see `makefile` - * `tox` - see `tox.ini` #### Testing with Make From 0096a3ee4b18cad193ea0a8416f772bea2fe07a7 Mon Sep 17 00:00:00 2001 From: Rafael Pierre <159796999+rafaelpierrehf@users.noreply.github.com> Date: Wed, 28 Feb 2024 17:41:00 +0100 Subject: [PATCH 173/173] Update dockerfiles/pytorch/Dockerfile Co-authored-by: Philipp Schmid <32632186+philschmid@users.noreply.github.com> --- dockerfiles/pytorch/Dockerfile | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/dockerfiles/pytorch/Dockerfile b/dockerfiles/pytorch/Dockerfile index a0c7b0c4..8e4c4d35 100644 --- a/dockerfiles/pytorch/Dockerfile +++ b/dockerfiles/pytorch/Dockerfile @@ -43,7 +43,6 @@ COPY src/huggingface_inference_toolkit huggingface_inference_toolkit COPY src/huggingface_inference_toolkit/webservice_starlette.py webservice_starlette.py # copy entrypoint and change permissions -COPY scripts/entrypoint.sh entrypoint.sh -RUN chmod +x entrypoint.sh +COPY --chmod=0755 scripts/entrypoint.sh entrypoint.sh ENTRYPOINT ["bash", "-c", "./entrypoint.sh"] \ No newline at end of file