diff --git a/.github/workflows/build-container.yaml b/.github/workflows/build-container.yaml index 8b1cbf69..031207c0 100644 --- a/.github/workflows/build-container.yaml +++ b/.github/workflows/build-container.yaml @@ -18,40 +18,36 @@ jobs: starlette-pytorch-cpu: uses: ./.github/workflows/docker-build-action.yaml with: - context: ./ - repository: huggingface - image: hf-endpoints-inference-pytorch-cpu - dockerfile: dockerfiles/starlette/pytorch/Dockerfile.cpu + image: inference-pytorch-cpu + dockerfile: dockerfiles/pytorch/cpu/Dockerfile secrets: - DOCKER_USERNAME: ${{ secrets.DOCKER_USERNAME }} - DOCKER_PASSWORD: ${{ secrets.DOCKER_PASSWORD }} + TAILSCALE_AUTHKEY: ${{ secrets.TAILSCALE_AUTHKEY }} + REGISTRY_USERNAME: ${{ secrets.REGISTRY_USERNAME }} + REGISTRY_PASSWORD: ${{ secrets.REGISTRY_PASSWORD }} starlette-pytorch-gpu: uses: ./.github/workflows/docker-build-action.yaml with: - context: ./ - repository: huggingface - image: hf-endpoints-inference-pytorch-gpu - dockerfile: dockerfiles/starlette/pytorch/Dockerfile.gpu + image: inference-pytorch-gpu + dockerfile: dockerfiles/pytorch/gpu/Dockerfile secrets: - DOCKER_USERNAME: ${{ secrets.DOCKER_USERNAME }} - DOCKER_PASSWORD: ${{ secrets.DOCKER_PASSWORD }} + TAILSCALE_AUTHKEY: ${{ secrets.TAILSCALE_AUTHKEY }} + REGISTRY_USERNAME: ${{ secrets.REGISTRY_USERNAME }} + REGISTRY_PASSWORD: ${{ secrets.REGISTRY_PASSWORD }} starlette-tensorflow-cpu: uses: ./.github/workflows/docker-build-action.yaml with: - context: ./ - repository: huggingface - image: hf-endpoints-inference-tensorflow-cpu - dockerfile: dockerfiles/starlette/tensorflow/Dockerfile.cpu + image: inference-tensorflow-cpu + dockerfile: dockerfiles/tensorflow/cpu/Dockerfile secrets: - DOCKER_USERNAME: ${{ secrets.DOCKER_USERNAME }} - DOCKER_PASSWORD: ${{ secrets.DOCKER_PASSWORD }} + TAILSCALE_AUTHKEY: ${{ secrets.TAILSCALE_AUTHKEY }} + REGISTRY_USERNAME: ${{ secrets.REGISTRY_USERNAME }} + REGISTRY_PASSWORD: ${{ secrets.REGISTRY_PASSWORD }} starlette-tensorflow-gpu: uses: ./.github/workflows/docker-build-action.yaml with: - context: ./ - repository: huggingface - image: hf-endpoints-inference-tensorflow-gpu - dockerfile: dockerfiles/starlette/tensorflow/Dockerfile.gpu + image: inference-tensorflow-gpu + dockerfile: dockerfiles/tensorflow/gpu/Dockerfile secrets: - DOCKER_USERNAME: ${{ secrets.DOCKER_USERNAME }} - DOCKER_PASSWORD: ${{ secrets.DOCKER_PASSWORD }} + TAILSCALE_AUTHKEY: ${{ secrets.TAILSCALE_AUTHKEY }} + REGISTRY_USERNAME: ${{ secrets.REGISTRY_USERNAME }} + REGISTRY_PASSWORD: ${{ secrets.REGISTRY_PASSWORD }} diff --git a/.github/workflows/docker-build-action.yaml b/.github/workflows/docker-build-action.yaml index 9c3af40a..eae64905 100644 --- a/.github/workflows/docker-build-action.yaml +++ b/.github/workflows/docker-build-action.yaml @@ -3,17 +3,15 @@ on: inputs: context: type: string - required: true + required: false + default: "./" repository: type: string - required: true + required: false + default: "registry.internal.huggingface.tech/hf-endpoints" image: type: string required: true - platforms: - type: string - required: false - default: linux/amd64 build_args: type: string required: false @@ -23,9 +21,11 @@ on: required: false default: "Dockerfile" secrets: - DOCKER_USERNAME: + REGISTRY_USERNAME: + required: true + REGISTRY_PASSWORD: required: true - DOCKER_PASSWORD: + TAILSCALE_AUTHKEY: required: true jobs: @@ -35,17 +35,23 @@ jobs: - name: Check out uses: actions/checkout@v3 - - name: Set up QEMU - uses: docker/setup-qemu-action@v2 + - name: Tailscale + uses: tailscale/github-action@v1 + with: + authkey: ${{ secrets.TAILSCALE_AUTHKEY }} + version: 1.38.2 - name: Set up Docker Buildx uses: docker/setup-buildx-action@v2.0.0 + with: + install: true - name: Login to container registry uses: docker/login-action@v2.0.0 with: - username: ${{ secrets.DOCKER_USERNAME }} - password: ${{ secrets.DOCKER_PASSWORD }} + registry: ${{ inputs.repository }} + username: ${{ secrets.REGISTRY_USERNAME }} + password: ${{ secrets.REGISTRY_PASSWORD }} - name: Inject slug/short variables uses: rlespinasse/github-slug-action@v4 @@ -57,4 +63,4 @@ jobs: context: ${{ inputs.context }} build-args: ${{ inputs.build_args }} file: ${{ inputs.context }}/${{ inputs.dockerfile }} - tags: ${{ inputs.repository }}/${{ inputs.image }}:${{ env.GITHUB_SHA_SHORT }},${{ inputs.repository }}/${{ inputs.image }}:latest, + tags: ${{ inputs.repository }}/${{ inputs.image }}:${{ env.GITHUB_SHA_SHORT }},${{ inputs.repository }}/${{ inputs.image }}:latest \ No newline at end of file diff --git a/.github/workflows/gpu-integ-test.yaml b/.github/workflows/gpu-integ-test.yaml index 42c9e169..ede153ea 100644 --- a/.github/workflows/gpu-integ-test.yaml +++ b/.github/workflows/gpu-integ-test.yaml @@ -64,7 +64,7 @@ jobs: - name: Install Python dependencies run: pip install -e .[test,dev,torch] - name: Build Docker - run: docker build -t starlette-transformers:gpu -f dockerfiles/starlette/pytorch/Dockerfile.gpu . + run: docker build -t starlette-transformers:gpu -f dockerfiles/pytorch/gpu/Dockerfile . - name: Run Integration Tests run: RUN_SLOW=True make integ-test tensorflow-integration-test: @@ -86,7 +86,7 @@ jobs: - name: Install Python dependencies run: pip install -e .[test,dev,tensorflow] - name: Build Docker - run: docker build -t starlette-transformers:gpu -f dockerfiles/starlette/tensorflow/Dockerfile.gpu . + run: docker build -t starlette-transformers:gpu -f dockerfiles/tensorflow/gpu/Dockerfile . - name: Run Integration Tests run: RUN_SLOW=True make integ-test diff --git a/.github/workflows/integ-test.yaml b/.github/workflows/integ-test.yaml index e77d4a32..f6f6bba0 100644 --- a/.github/workflows/integ-test.yaml +++ b/.github/workflows/integ-test.yaml @@ -24,7 +24,7 @@ jobs: - name: Install Python dependencies run: pip install -e .[test,dev,torch] - name: Build Docker - run: docker build -t starlette-transformers:cpu -f dockerfiles/starlette/pytorch/Dockerfile.cpu . + run: docker build -t starlette-transformers:cpu -f dockerfiles/pytorch/cpu/Dockerfile . - name: Run Integration Tests env: AWS_ACCESS_KEY_ID: ${{ secrets.AWS_ACCESS_KEY_ID }} @@ -42,7 +42,7 @@ jobs: - name: Install Python dependencies run: pip install -e .[test,dev,tensorflow] - name: Build Docker - run: docker build -t starlette-transformers:cpu -f dockerfiles/starlette/tensorflow/Dockerfile.cpu . + run: docker build -t starlette-transformers:cpu -f dockerfiles/tensorflow/cpu/Dockerfile . - name: Run Integration Tests env: AWS_ACCESS_KEY_ID: ${{ secrets.AWS_ACCESS_KEY_ID }} diff --git a/README.md b/README.md index 63c2b568..52b664a8 100644 --- a/README.md +++ b/README.md @@ -24,14 +24,14 @@ HF_MODEL_ID=hf-internal-testing/tiny-random-distilbert HF_MODEL_DIR=tmp2 HF_TASK _cpu images_ ```bash -docker build -t starlette-transformers:cpu -f dockerfiles/starlette/pytorch/Dockerfile.cpu . -docker build -t starlette-transformers:cpu -f dockerfiles/starlette/tensorflow/Dockerfile.cpu . +docker build -t starlette-transformers:cpu -f dockerfiles/pytorch/cpu/Dockerfile . +docker build -t starlette-transformers:cpu -f dockerfiles/tensorflow/cpu/Dockerfile . ``` _gpu images_ ```bash -docker build -t starlette-transformers:gpu -f dockerfiles/starlette/pytorch/Dockerfile.gpu . -docker build -t starlette-transformers:gpu -f dockerfiles/starlette/tensorflow/Dockerfile.gpu . +docker build -t starlette-transformers:gpu -f dockerfiles/pytorch/gpu/Dockerfile . +docker build -t starlette-transformers:gpu -f dockerfiles/tensorflow/gpu/Dockerfile . ``` 2. Run the container and provide either environment variables to the HUB model you want to use or mount a volume to the container, where your model is stored. @@ -39,6 +39,7 @@ docker build -t starlette-transformers:gpu -f dockerfiles/starlette/tensorflow/D ```bash docker run -ti -p 5000:5000 -e HF_MODEL_ID=distilbert-base-uncased-distilled-squad -e HF_TASK=question-answering starlette-transformers:cpu +docker run -ti -p 5000:5000 -e HF_MODEL_DIR=/repository -v $(pwd)/distilbert-base-uncased-emotion:/repository starlette-transformers:cpu ``` @@ -56,6 +57,7 @@ curl --request POST \ }' ``` + --- ## 🛠️ Environment variables diff --git a/dockerfiles/pytorch/cpu/Dockerfile b/dockerfiles/pytorch/cpu/Dockerfile new file mode 100644 index 00000000..551bfa9a --- /dev/null +++ b/dockerfiles/pytorch/cpu/Dockerfile @@ -0,0 +1,52 @@ +FROM ubuntu:22.04 + +LABEL maintainer="Hugging Face" + +ENV DEBIAN_FRONTEND=noninteractive + +RUN apt-get update \ + && apt-get -y upgrade --only-upgrade systemd openssl cryptsetup \ + && apt-get install -y \ + bzip2 \ + curl \ + git \ + git-lfs \ + tar \ + gcc \ + g++ \ + # audio + libsndfile1-dev \ + ffmpeg \ + && apt-get clean autoremove --yes \ + && rm -rf /var/lib/{apt,dpkg,cache,log} + +# install micromamba +ENV MAMBA_ROOT_PREFIX=/opt/conda +ENV PATH=/opt/conda/bin:$PATH +RUN curl -L https://micromamba.snakepit.net/api/micromamba/linux-64/latest | tar -xj "bin/micromamba" \ + && touch /root/.bashrc \ + && ./bin/micromamba shell init -s bash -p /opt/conda \ + && grep -v '[ -z "\$PS1" ] && return' /root/.bashrc > /opt/conda/bashrc + +WORKDIR /app + +# install base python dependencies +COPY dockerfiles/pytorch/cpu/environment.yaml /app/environment.yaml +RUN micromamba install -y -n base -f environment.yaml \ + && rm environment.yaml \ + && micromamba clean --all --yes + +# install huggingface inference toolkit +COPY requirements.txt /tmp/requirements.txt +RUN pip install --no-cache-dir -r /tmp/requirements.txt && rm /tmp/requirements.txt + +# copy application +COPY src/huggingface_inference_toolkit huggingface_inference_toolkit +COPY src/huggingface_inference_toolkit/webservice_starlette.py webservice_starlette.py + +# copy entrypoint and change permissions +COPY scripts/entrypoint.sh entrypoint.sh +RUN chmod +x entrypoint.sh + +# run app +ENTRYPOINT ["/bin/bash", "entrypoint.sh"] diff --git a/dockerfiles/pytorch/cpu/environment.yaml b/dockerfiles/pytorch/cpu/environment.yaml new file mode 100644 index 00000000..4c3efe47 --- /dev/null +++ b/dockerfiles/pytorch/cpu/environment.yaml @@ -0,0 +1,12 @@ +name: base +channels: +- conda-forge +dependencies: +- python=3.9.13 +- pytorch::pytorch=1.13.1=py3.9_cpu_0 +- pip: + - transformers[sklearn,sentencepiece,audio,vision]==4.27.2 + - sentence_transformers==2.2.2 + - torchvision==0.14.1 + - diffusers==0.14.0 + - accelerate==0.17.1 \ No newline at end of file diff --git a/dockerfiles/pytorch/gpu/Dockerfile b/dockerfiles/pytorch/gpu/Dockerfile new file mode 100644 index 00000000..3db0d4b0 --- /dev/null +++ b/dockerfiles/pytorch/gpu/Dockerfile @@ -0,0 +1,54 @@ +FROM nvidia/cuda:11.7.0-devel-ubuntu22.04 + +LABEL maintainer="Hugging Face" + +ENV DEBIAN_FRONTEND=noninteractive + +RUN apt-get update \ + && apt-get -y upgrade --only-upgrade systemd openssl cryptsetup \ + && apt-get install -y \ + bzip2 \ + curl \ + git \ + git-lfs \ + tar \ + gcc \ + g++ \ + # audio + libsndfile1-dev \ + ffmpeg \ + && apt-get clean autoremove --yes \ + && rm -rf /var/lib/{apt,dpkg,cache,log} + +# install micromamba +ENV MAMBA_ROOT_PREFIX=/opt/conda +ENV PATH=/opt/conda/bin:$PATH +ENV LD_LIBRARY_PATH="/opt/conda/lib:${LD_LIBRARY_PATH}" + +RUN curl -L https://micromamba.snakepit.net/api/micromamba/linux-64/latest | tar -xj "bin/micromamba" \ + && touch /root/.bashrc \ + && ./bin/micromamba shell init -s bash -p /opt/conda \ + && grep -v '[ -z "\$PS1" ] && return' /root/.bashrc > /opt/conda/bashrc + +WORKDIR /app + +# install base python dependencies +COPY dockerfiles/pytorch/gpu/environment.yaml /app/environment.yaml +RUN micromamba install -y -n base -f environment.yaml \ + && rm environment.yaml \ + && micromamba clean --all --yes + +# install huggingface inference toolkit +COPY requirements.txt /tmp/requirements.txt +RUN pip install --no-cache-dir -r /tmp/requirements.txt && rm /tmp/requirements.txt + +# copy application +COPY src/huggingface_inference_toolkit huggingface_inference_toolkit +COPY src/huggingface_inference_toolkit/webservice_starlette.py webservice_starlette.py + +# copy entrypoint and change permissions +COPY scripts/entrypoint.sh entrypoint.sh +RUN chmod +x entrypoint.sh + +# run app +ENTRYPOINT ["/bin/bash", "entrypoint.sh"] diff --git a/dockerfiles/pytorch/gpu/environment.yaml b/dockerfiles/pytorch/gpu/environment.yaml new file mode 100644 index 00000000..e65a10bb --- /dev/null +++ b/dockerfiles/pytorch/gpu/environment.yaml @@ -0,0 +1,13 @@ +name: base +channels: +- conda-forge +dependencies: +- python=3.9.13 +- nvidia::cudatoolkit=11.7 +- pytorch::pytorch=1.13.1=py3.9_cuda11.7* +- pip: + - transformers[sklearn,sentencepiece,audio,vision]==4.27.2 + - sentence_transformers==2.2.2 + - torchvision==0.14.1 + - diffusers==0.14.0 + - accelerate==0.17.1 \ No newline at end of file diff --git a/dockerfiles/robyn/Dockerfile.cpu b/dockerfiles/robyn/Dockerfile.cpu deleted file mode 100644 index f7a884ce..00000000 --- a/dockerfiles/robyn/Dockerfile.cpu +++ /dev/null @@ -1,12 +0,0 @@ -FROM public.ecr.aws/t6m7g5n4/transformers-inference:4.20.1-pt1.11-cpu - -# install robyn framework -COPY requirements.txt /tmp/requirements.txt -RUN pip install -r /tmp/requirements.txt && rm /tmp/requirements.txt - -# copy application -COPY src/huggingface_inference_toolkit huggingface_inference_toolkit -COPY src/huggingface_inference_toolkit/webservice_robyn.py webservice_robyn.py - -# run app -CMD ["webservice_robyn.py"] \ No newline at end of file diff --git a/dockerfiles/robyn/Dockerfile.gpu b/dockerfiles/robyn/Dockerfile.gpu deleted file mode 100644 index d0170a1f..00000000 --- a/dockerfiles/robyn/Dockerfile.gpu +++ /dev/null @@ -1,12 +0,0 @@ -FROM public.ecr.aws/t6m7g5n4/transformers-inference:4.20.1-pt1.11-cuda11.5 - -# install robyn framework -COPY requirements.txt /tmp/requirements.txt -RUN pip install -r /tmp/requirements.txt && rm /tmp/requirements.txt - -# copy application -COPY src/huggingface_inference_toolkit huggingface_inference_toolkit -COPY src/huggingface_inference_toolkit/webservice_robyn.py webservice_robyn.py - -# run app -CMD ["webservice_robyn.py"] \ No newline at end of file diff --git a/dockerfiles/starlette/pytorch/Dockerfile.cpu b/dockerfiles/starlette/pytorch/Dockerfile.cpu deleted file mode 100644 index 7007c201..00000000 --- a/dockerfiles/starlette/pytorch/Dockerfile.cpu +++ /dev/null @@ -1,24 +0,0 @@ -FROM huggingface/transformers-inference:4.24.0-pt1.13-cpu - -# install starlette framework -COPY starlette_requirements.txt /tmp/requirements.txt -RUN pip install --no-cache-dir -r /tmp/requirements.txt && rm /tmp/requirements.txt - -# Think about a better solution -> base contaienr has pt 1.13. thats why need below 0.14 -RUN pip install --no-cache-dir sentence_transformers=="2.2.2" torchvision=="0.14.0" diffusers=="0.9.0" accelerate=="0.14.0" - -# Add upgrade due to issue in base container upgrade https://github.com/mamba-org/mamba/issues/2170 -RUN pip install transformers==4.25.1 --no-cache-dir --upgrade - -# copy application -COPY src/huggingface_inference_toolkit huggingface_inference_toolkit -COPY src/huggingface_inference_toolkit/webservice_starlette.py webservice_starlette.py - -# copy entrypoint and change permissions -COPY scripts/entrypoint.sh entrypoint.sh -RUN chmod +x entrypoint.sh - -# run app -ENTRYPOINT ["/bin/bash", "entrypoint.sh"] - - diff --git a/dockerfiles/starlette/pytorch/Dockerfile.gpu b/dockerfiles/starlette/pytorch/Dockerfile.gpu deleted file mode 100644 index 6e585775..00000000 --- a/dockerfiles/starlette/pytorch/Dockerfile.gpu +++ /dev/null @@ -1,22 +0,0 @@ -FROM huggingface/transformers-inference:4.24.0-pt1.13-cuda11.6 - -# install starlette framework -COPY starlette_requirements.txt /tmp/requirements.txt -RUN pip install --no-cache-dir -r /tmp/requirements.txt && rm /tmp/requirements.txt - -# Think about a better solution -> base contaienr has pt 1.13. thats why need below 0.14 -RUN pip install --no-cache-dir sentence_transformers=="2.2.2" torchvision=="0.14.0" diffusers=="0.9.0" accelerate=="0.14.0" --extra-index-url https://download.pytorch.org/whl/cu116 - -# Add upgrade due to issue in base container upgrade https://github.com/mamba-org/mamba/issues/2170 -RUN pip install transformers==4.25.1 --no-cache-dir --upgrade - -# copy application -COPY src/huggingface_inference_toolkit huggingface_inference_toolkit -COPY src/huggingface_inference_toolkit/webservice_starlette.py webservice_starlette.py - -# copy entrypoint and change permissions -COPY scripts/entrypoint.sh entrypoint.sh -RUN chmod +x entrypoint.sh - -# run app -ENTRYPOINT ["/bin/bash", "entrypoint.sh"] \ No newline at end of file diff --git a/dockerfiles/starlette/tensorflow/Dockerfile.cpu b/dockerfiles/starlette/tensorflow/Dockerfile.cpu deleted file mode 100644 index e8061022..00000000 --- a/dockerfiles/starlette/tensorflow/Dockerfile.cpu +++ /dev/null @@ -1,16 +0,0 @@ -FROM huggingface/transformers-inference:4.24.0-tf2.9-cpu - -# install starlette framework -COPY starlette_requirements.txt /tmp/requirements.txt -RUN pip install --no-cache-dir -r /tmp/requirements.txt && rm /tmp/requirements.txt - -# copy application -COPY src/huggingface_inference_toolkit huggingface_inference_toolkit -COPY src/huggingface_inference_toolkit/webservice_starlette.py webservice_starlette.py - -# copy entrypoint and change permissions -COPY scripts/entrypoint.sh entrypoint.sh -RUN chmod +x entrypoint.sh - -# run app -ENTRYPOINT ["/bin/bash", "entrypoint.sh"] \ No newline at end of file diff --git a/dockerfiles/starlette/tensorflow/Dockerfile.gpu b/dockerfiles/starlette/tensorflow/Dockerfile.gpu deleted file mode 100644 index 5ae9abd0..00000000 --- a/dockerfiles/starlette/tensorflow/Dockerfile.gpu +++ /dev/null @@ -1,16 +0,0 @@ -FROM huggingface/transformers-inference:4.24.0-tf2.9-cuda11.2 - -# install starlette framework -COPY starlette_requirements.txt /tmp/requirements.txt -RUN pip install --no-cache-dir -r /tmp/requirements.txt && rm /tmp/requirements.txt - -# copy application -COPY src/huggingface_inference_toolkit huggingface_inference_toolkit -COPY src/huggingface_inference_toolkit/webservice_starlette.py webservice_starlette.py - -# copy entrypoint and change permissions -COPY scripts/entrypoint.sh entrypoint.sh -RUN chmod +x entrypoint.sh - -# run app -ENTRYPOINT ["/bin/bash", "entrypoint.sh"] \ No newline at end of file diff --git a/dockerfiles/tensorflow/cpu/Dockerfile b/dockerfiles/tensorflow/cpu/Dockerfile new file mode 100644 index 00000000..b0443845 --- /dev/null +++ b/dockerfiles/tensorflow/cpu/Dockerfile @@ -0,0 +1,52 @@ +FROM ubuntu:22.04 + +LABEL maintainer="Hugging Face" + +ENV DEBIAN_FRONTEND=noninteractive + +RUN apt-get update \ + && apt-get -y upgrade --only-upgrade systemd openssl cryptsetup \ + && apt-get install -y \ + bzip2 \ + curl \ + git \ + git-lfs \ + tar \ + gcc \ + g++ \ + # audio + libsndfile1-dev \ + ffmpeg \ + && apt-get clean autoremove --yes \ + && rm -rf /var/lib/{apt,dpkg,cache,log} + +# install micromamba +ENV MAMBA_ROOT_PREFIX=/opt/conda +ENV PATH=/opt/conda/bin:$PATH +RUN curl -L https://micromamba.snakepit.net/api/micromamba/linux-64/latest | tar -xj "bin/micromamba" \ + && touch /root/.bashrc \ + && ./bin/micromamba shell init -s bash -p /opt/conda \ + && grep -v '[ -z "\$PS1" ] && return' /root/.bashrc > /opt/conda/bashrc + +WORKDIR /app + +# install base python dependencies +COPY dockerfiles/tensorflow/cpu/environment.yaml /app/environment.yaml +RUN micromamba install -y -n base -f environment.yaml \ + && rm environment.yaml \ + && micromamba clean --all --yes + +# install huggingface inference toolkit +COPY requirements.txt /tmp/requirements.txt +RUN pip install --no-cache-dir -r /tmp/requirements.txt && rm /tmp/requirements.txt + +# copy application +COPY src/huggingface_inference_toolkit huggingface_inference_toolkit +COPY src/huggingface_inference_toolkit/webservice_starlette.py webservice_starlette.py + +# copy entrypoint and change permissions +COPY scripts/entrypoint.sh entrypoint.sh +RUN chmod +x entrypoint.sh + +# run app +ENTRYPOINT ["/bin/bash", "entrypoint.sh"] diff --git a/dockerfiles/tensorflow/cpu/environment.yaml b/dockerfiles/tensorflow/cpu/environment.yaml new file mode 100644 index 00000000..a370380c --- /dev/null +++ b/dockerfiles/tensorflow/cpu/environment.yaml @@ -0,0 +1,8 @@ +name: base +channels: +- conda-forge +dependencies: +- python=3.9.13 +- tensorflow=2.9.1=*cpu*py39* +- pip: + - transformers[sklearn,sentencepiece,audio,vision]==4.27.2 \ No newline at end of file diff --git a/dockerfiles/tensorflow/gpu/Dockerfile b/dockerfiles/tensorflow/gpu/Dockerfile new file mode 100644 index 00000000..785bcc79 --- /dev/null +++ b/dockerfiles/tensorflow/gpu/Dockerfile @@ -0,0 +1,55 @@ +FROM nvidia/cuda:11.2.2-base-ubuntu20.04 + +LABEL maintainer="Hugging Face" + +ENV DEBIAN_FRONTEND=noninteractive +ENV CONDA_OVERRIDE_CUDA="11.2" + +RUN apt-get update \ + && apt-get -y upgrade --only-upgrade systemd openssl cryptsetup \ + && apt-get install -y \ + bzip2 \ + curl \ + git \ + git-lfs \ + tar \ + gcc \ + g++ \ + # audio + libsndfile1-dev \ + ffmpeg \ + && apt-get clean autoremove --yes \ + && rm -rf /var/lib/{apt,dpkg,cache,log} + +# install micromamba +ENV MAMBA_ROOT_PREFIX=/opt/conda +ENV PATH=/opt/conda/bin:$PATH +ENV LD_LIBRARY_PATH="/opt/conda/lib:${LD_LIBRARY_PATH}" + +RUN curl -L https://micromamba.snakepit.net/api/micromamba/linux-64/latest | tar -xj "bin/micromamba" \ + && touch /root/.bashrc \ + && ./bin/micromamba shell init -s bash -p /opt/conda \ + && grep -v '[ -z "\$PS1" ] && return' /root/.bashrc > /opt/conda/bashrc + +WORKDIR /app + +# install base python dependencies +COPY dockerfiles/tensorflow/gpu/environment.yaml /app/environment.yaml +RUN micromamba install -y -n base -f environment.yaml \ + && rm environment.yaml \ + && micromamba clean --all --yes + +# install huggingface inference toolkit +COPY requirements.txt /tmp/requirements.txt +RUN pip install --no-cache-dir -r /tmp/requirements.txt && rm /tmp/requirements.txt + +# copy application +COPY src/huggingface_inference_toolkit huggingface_inference_toolkit +COPY src/huggingface_inference_toolkit/webservice_starlette.py webservice_starlette.py + +# copy entrypoint and change permissions +COPY scripts/entrypoint.sh entrypoint.sh +RUN chmod +x entrypoint.sh + +# run app +ENTRYPOINT ["/bin/bash", "entrypoint.sh"] diff --git a/dockerfiles/tensorflow/gpu/environment.yaml b/dockerfiles/tensorflow/gpu/environment.yaml new file mode 100644 index 00000000..1d886795 --- /dev/null +++ b/dockerfiles/tensorflow/gpu/environment.yaml @@ -0,0 +1,9 @@ +name: base +channels: +- conda-forge +dependencies: +- python=3.9.13 +- nvidia::cudatoolkit=11.7 +- tensorflow=2.9.1=*cuda112*py39* +- pip: + - transformers[sklearn,sentencepiece,audio,vision]==4.27.2 \ No newline at end of file diff --git a/makefile b/makefile index fec9bc02..49855723 100644 --- a/makefile +++ b/makefile @@ -13,12 +13,9 @@ integ-test: # Check that source code meets quality standards quality: - black --check --line-length 119 --target-version py36 $(check_dirs) - isort --check-only $(check_dirs) - flake8 src + ruff $(check_dirs) # Format source code automatically -style: - black --line-length 119 --target-version py36 $(check_dirs) - isort $(check_dirs) \ No newline at end of file +style: + ruff $(check_dirs) --fix \ No newline at end of file diff --git a/pyproject.toml b/pyproject.toml index 211fdad8..96ef9084 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,3 +1,34 @@ -[tool.black] +[tool.mypy] +ignore_missing_imports = true +no_implicit_optional = true +scripts_are_modules = true + +[tool.ruff] +select = [ + "E", # pycodestyle errors + "W", # pycodestyle warnings + "F", # pyflakes + "I", # isort + "C", # flake8-comprehensions + "B", # flake8-bugbear +] +ignore = [ + "E501", # line too long, handled by black + "B008", # do not perform function calls in argument defaults + "C901", # too complex +] +# Same as Black. line-length = 119 -target-version = ['py39'] + +# Allow unused variables when underscore-prefixed. +dummy-variable-rgx = "^(_+|(_+[a-zA-Z0-9_]*[a-zA-Z0-9]+?))$" + +# Assume Python 3.8. +target-version = "py39" + +[tool.ruff.per-file-ignores] +"__init__.py" = ["F401"] + +[tool.isort] +profile = "black" +known_third_party = ["transforemrs", "starlette", "huggingface_hub"] diff --git a/requirements.txt b/requirements.txt index 63c5474a..8a178f8d 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,2 +1,5 @@ orjson -robyn \ No newline at end of file +starlette +uvicorn +pandas +huggingface_hub>=0.13.2 \ No newline at end of file diff --git a/setup.py b/setup.py index c11fa81d..92132915 100644 --- a/setup.py +++ b/setup.py @@ -17,6 +17,7 @@ install_requires = [ # transformers "transformers[sklearn,sentencepiece]>=4.25.1", + "huggingface_hub>=0.13.3", # api stuff "orjson", # "robyn", @@ -54,6 +55,7 @@ "black", "isort", "flake8", + "ruff", ] setup( diff --git a/src/huggingface_inference_toolkit/async_utils.py b/src/huggingface_inference_toolkit/async_utils.py index 8b73beba..5b6af3fd 100644 --- a/src/huggingface_inference_toolkit/async_utils.py +++ b/src/huggingface_inference_toolkit/async_utils.py @@ -5,7 +5,6 @@ from anyio import Semaphore from typing_extensions import ParamSpec - # To not have too many threads running (which could happen on too many concurrent # requests, we limit it with a semaphore. MAX_CONCURRENT_THREADS = 1 diff --git a/src/huggingface_inference_toolkit/const.py b/src/huggingface_inference_toolkit/const.py index d7dc3688..993fea26 100644 --- a/src/huggingface_inference_toolkit/const.py +++ b/src/huggingface_inference_toolkit/const.py @@ -1,7 +1,6 @@ import os from pathlib import Path - HF_MODEL_DIR = os.environ.get("HF_MODEL_DIR", "/opt/huggingface/model") HF_MODEL_ID = os.environ.get("HF_MODEL_ID", None) HF_TASK = os.environ.get("HF_TASK", None) diff --git a/src/huggingface_inference_toolkit/diffusers_utils.py b/src/huggingface_inference_toolkit/diffusers_utils.py index 2bb27d81..36cdcb22 100644 --- a/src/huggingface_inference_toolkit/diffusers_utils.py +++ b/src/huggingface_inference_toolkit/diffusers_utils.py @@ -2,7 +2,6 @@ import json import os - _diffusers = importlib.util.find_spec("diffusers") is not None @@ -12,7 +11,6 @@ def is_diffusers_available(): if is_diffusers_available(): import torch - from diffusers import DPMSolverMultistepScheduler, StableDiffusionPipeline diff --git a/src/huggingface_inference_toolkit/handler.py b/src/huggingface_inference_toolkit/handler.py index e3fbb2e2..993e4967 100644 --- a/src/huggingface_inference_toolkit/handler.py +++ b/src/huggingface_inference_toolkit/handler.py @@ -1,16 +1,14 @@ import logging -from abc import ABC from pathlib import Path from typing import Optional, Union from huggingface_inference_toolkit.utils import check_and_register_custom_pipeline_from_directory, get_pipeline - logger = logging.getLogger(__name__) logging.basicConfig(format="%(asctime)s | %(levelname)s | %(message)s", level=logging.INFO) -class HuggingFaceHandler(ABC): +class HuggingFaceHandler: """ A Default Hugging Face Inference Handler which works with all transformers pipelines, Sentence Transformers and Optimum. """ diff --git a/src/huggingface_inference_toolkit/sentence_transformers_utils.py b/src/huggingface_inference_toolkit/sentence_transformers_utils.py index a3f79ded..2a3c0055 100644 --- a/src/huggingface_inference_toolkit/sentence_transformers_utils.py +++ b/src/huggingface_inference_toolkit/sentence_transformers_utils.py @@ -1,6 +1,5 @@ import importlib.util - _sentence_transformers = importlib.util.find_spec("sentence_transformers") is not None diff --git a/src/huggingface_inference_toolkit/serialization/base.py b/src/huggingface_inference_toolkit/serialization/base.py index d3b936d9..eb965b64 100644 --- a/src/huggingface_inference_toolkit/serialization/base.py +++ b/src/huggingface_inference_toolkit/serialization/base.py @@ -2,7 +2,6 @@ from huggingface_inference_toolkit.serialization.image_utils import Imager from huggingface_inference_toolkit.serialization.json_utils import Jsoner - content_type_mapping = { "application/json": Jsoner, "text/csv": None, diff --git a/src/huggingface_inference_toolkit/utils.py b/src/huggingface_inference_toolkit/utils.py index e39c22af..a77dd655 100644 --- a/src/huggingface_inference_toolkit/utils.py +++ b/src/huggingface_inference_toolkit/utils.py @@ -1,13 +1,10 @@ import importlib.util import logging -import os import sys from pathlib import Path from typing import Optional, Union -from huggingface_hub import HfApi, login -from huggingface_hub.file_download import cached_download, hf_hub_url -from huggingface_hub.utils import filter_repo_objects +from huggingface_hub import login, snapshot_download from transformers import WhisperForConditionalGeneration, pipeline from transformers.file_utils import is_tf_available, is_torch_available from transformers.pipelines import Conversation, Pipeline @@ -23,7 +20,6 @@ is_sentence_transformers_available, ) - logger = logging.getLogger(__name__) logging.basicConfig(format="%(asctime)s | %(levelname)s | %(message)s", level=logging.INFO) @@ -51,6 +47,11 @@ def is_optimum_available(): "flax": "flax*", "rust": "rust*", "onnx": "*onnx", + "safetensors": "*safetensors", + "coreml": "*mlmodel", + "tflite": "*tflite", + "savedmodel": "*tar.gz", + "ckpt": "*ckpt", } @@ -58,7 +59,18 @@ def create_artifact_filter(framework): """ Returns a list of regex pattern based on the DL Framework. which will be to used to ignore files when downloading """ - ignore_regex_list = ["pytorch*", "tf*", "flax*", "rust*", "*onnx"] + ignore_regex_list = [ + "pytorch*", + "tf*", + "flax*", + "rust*", + "*onnx", + "*safetensors", + "*mlmodel", + "*tflite", + "*tar.gz", + "*ckpt", + ] pattern = framework2weight.get(framework, None) if pattern in ignore_regex_list: ignore_regex_list.remove(pattern) @@ -147,46 +159,17 @@ def _load_repository_from_hf( # create regex to only include the framework specific weights ignore_regex = create_artifact_filter(framework) + logger.info(f"Ignore regex pattern for files, which are not downloaded: { ', '.join(ignore_regex) }") - # get image artifact files - _api = HfApi() - repo_info = _api.repo_info( - repo_id=repository_id, - repo_type="model", + # Download the repository to the workdir and filter out non-framework specific weights + snapshot_download( + repository_id, revision=revision, - ) - # apply regex to filter out non-framework specific weights if args.framework is set - filtered_repo_files = filter_repo_objects( - items=[f.rfilename for f in repo_info.siblings], + local_dir=str(target_dir), + local_dir_use_symlinks=False, ignore_patterns=ignore_regex, ) - # iterate over all files and download them - for repo_file in filtered_repo_files: - url = hf_hub_url(repo_id=repository_id, filename=repo_file, revision=revision) - - # define values if repo has nested strucutre - if isinstance(repo_file, str): - repo_file = Path(repo_file) - - repo_file_is_dir = repo_file.parent != Path(".") - real_target_dir = target_dir / repo_file.parent if repo_file_is_dir else target_dir - real_file_name = str(repo_file.name) if repo_file_is_dir else repo_file - - # download files - path = cached_download( - url, - cache_dir=real_target_dir.as_posix(), - force_filename=real_file_name, - use_auth_token=hf_hub_token, - ) - if os.path.exists(path + ".lock"): - os.remove(path + ".lock") - - # create requirements.txt if not exists - if not (target_dir / "requirements.txt").exists(): - target_dir.joinpath("requirements.txt").touch() - return target_dir diff --git a/src/huggingface_inference_toolkit/webservice_robyn.py b/src/huggingface_inference_toolkit/webservice_robyn.py index aa677a93..a1c437af 100644 --- a/src/huggingface_inference_toolkit/webservice_robyn.py +++ b/src/huggingface_inference_toolkit/webservice_robyn.py @@ -6,7 +6,6 @@ from huggingface_inference_toolkit.serialization.base import ContentType from huggingface_inference_toolkit.serialization.json_utils import Jsoner - logger = logging.getLogger(__name__) logging.basicConfig(format="%(asctime)s | %(levelname)s | %(message)s", level=logging.INFO) diff --git a/src/huggingface_inference_toolkit/webservice_starlette.py b/src/huggingface_inference_toolkit/webservice_starlette.py index 9c3df0ee..a8581992 100644 --- a/src/huggingface_inference_toolkit/webservice_starlette.py +++ b/src/huggingface_inference_toolkit/webservice_starlette.py @@ -3,6 +3,10 @@ from time import perf_counter import orjson +from starlette.applications import Starlette +from starlette.responses import PlainTextResponse, Response +from starlette.routing import Route + from huggingface_inference_toolkit.async_utils import async_handler_call from huggingface_inference_toolkit.const import ( HF_FRAMEWORK, @@ -16,9 +20,6 @@ from huggingface_inference_toolkit.serialization.base import ContentType from huggingface_inference_toolkit.serialization.json_utils import Jsoner from huggingface_inference_toolkit.utils import _load_repository_from_hf -from starlette.applications import Starlette -from starlette.responses import PlainTextResponse, Response -from starlette.routing import Route def config_logging(level=logging.INFO): diff --git a/starlette_requirements.txt b/starlette_requirements.txt deleted file mode 100644 index 6139a106..00000000 --- a/starlette_requirements.txt +++ /dev/null @@ -1,5 +0,0 @@ -orjson -starlette -uvicorn -pandas -huggingface_hub>=0.11.0 \ No newline at end of file diff --git a/tests/unit/test_utils.py b/tests/unit/test_utils.py index 9ac0603c..9d5052ee 100644 --- a/tests/unit/test_utils.py +++ b/tests/unit/test_utils.py @@ -44,8 +44,6 @@ def test_load_tensorflow_repository_from_hf(): # folder contains all config files and pytorch_model.bin folder_contents = os.listdir(storage_folder) assert "pytorch_model.bin" not in folder_contents - # custom requirements.txt for custom handler - assert "requirements.txt" in folder_contents # filter framework assert "tf_model.h5" in folder_contents # revision doesn't have tokenizer @@ -62,8 +60,6 @@ def test_load_onnx_repository_from_hf(): # folder contains all config files and pytorch_model.bin folder_contents = os.listdir(storage_folder) assert "pytorch_model.bin" not in folder_contents - # custom requirements.txt for custom handler - assert "requirements.txt" in folder_contents # filter framework assert "tf_model.h5" not in folder_contents # onnx model @@ -85,8 +81,6 @@ def test_load_pytorch_repository_from_hf(): # folder contains all config files and pytorch_model.bin folder_contents = os.listdir(storage_folder) assert "pytorch_model.bin" in folder_contents - # custom requirements.txt for custom handler - assert "requirements.txt" in folder_contents # filter framework assert "tf_model.h5" not in folder_contents # revision doesn't have tokenizer @@ -126,7 +120,6 @@ def test_get_pipeline(): @require_torch def test_whisper_long_audio(): with tempfile.TemporaryDirectory() as tmpdirname: - storage_dir = _load_repository_from_hf("openai/whisper-tiny", tmpdirname, framework="pytorch") pipe = get_pipeline("automatic-speech-recognition", storage_dir.as_posix()) res = pipe(os.path.join(os.getcwd(), "tests/resources/audio", "long_sample.mp3"))