huggingface · philschmid · May 28, 2024 · May 27, 2024 · May 27, 2024 · May 27, 2024
diff --git a/.github/workflows/docker-build-action.yaml b/.github/workflows/docker-build-action.yaml
@@ -63,6 +63,7 @@ jobs:
           push: true
           context: ${{ inputs.context }}
           build-args: ${{ inputs.build_args }}
+          target: base
           file:  ${{ inputs.context }}/${{ inputs.dockerfile }}
           tags: ${{ inputs.repository }}/${{ inputs.image }}:sha-${{ env.GITHUB_SHA_SHORT }},${{ inputs.repository }}/${{ inputs.image }}:latest
 

diff --git a/README.md b/README.md
@@ -20,7 +20,7 @@ HF_MODEL_ID=hf-internal-testing/tiny-random-distilbert HF_MODEL_DIR=tmp2 HF_TASK
 ### Container
 
 
-1. build the preferred container for either CPU or GPU for PyTorch or TensorFlow.
+1. build the preferred container for either CPU or GPU for PyTorch.
 
 _cpu images_
 ```bash
@@ -58,6 +58,57 @@ curl --request POST \
 }'
 ```
 
+### Vertex AI Support
+
+The Hugging Face Inference Toolkit is also supported on Vertex AI, based on [Custom container requirements for prediction](https://cloud.google.com/vertex-ai/docs/predictions/custom-container-requirements). [Environment variables set by Vertex AI](https://cloud.google.com/vertex-ai/docs/predictions/custom-container-requirements#aip-variables) are automatically detected and used by the toolkit. 
+
+#### Local run with HF_MODEL_ID and HF_TASK
+
+Start Hugging Face Inference Toolkit with the following environment variables. 
+
+```bash
+mkdir tmp2/
+AIP_MODE=PREDICTION AIP_PORT=8080 AIP_PREDICT_ROUTE=/pred AIP_HEALTH_ROUTE=/h HF_MODEL_DIR=tmp2 HF_MODEL_ID=distilbert/distilbert-base-uncased-finetuned-sst-2-english HF_TASK=text-classification uvicorn src.huggingface_inference_toolkit.webservice_starlette:app  --port 8080
+```
+
+Send request
+
+```bash
+curl --request POST \
+  --url http://localhost:8080/pred \
+  --header 'Content-Type: application/json' \
+  --data '{
+	"instances": ["I love this product", "I hate this product"],
+	"parameters": { "top_k": 2 }
+}'
+```
+
+#### Container run with HF_MODEL_ID and HF_TASK
+
+1. build the preferred container for either CPU or GPU for PyTorch o.
+
+```bash
+docker build -t vertex -f dockerfiles/pytorch/Dockerfile -t vertex-test-pytorch:gpu .
+```
+
+2. Run the container and provide either environment variables to the HUB model you want to use or mount a volume to the container, where your model is stored.
+
+```bash
+docker run -ti -p 8080:8080 -e AIP_MODE=PREDICTION -e AIP_HTTP_PORT=8080 -e AIP_PREDICT_ROUTE=/pred -e AIP_HEALTH_ROUTE=/h -e HF_MODEL_ID=distilbert/distilbert-base-uncased-finetuned-sst-2-english -e HF_TASK=text-classification vertex-test-pytorch:gpu
+```
+
+3. Send request
+
+```bash
+curl --request POST \
+	--url http://localhost:8080/pred \
+	--header 'Content-Type: application/json' \
+	--data '{
+	"instances": ["I love this product", "I hate this product"],
+	"parameters": { "top_k": 2 }
+}'
+```
+
 
 ---
 
@@ -176,6 +227,7 @@ Below you ll find a list of supported and tested transformers and sentence trans
 ##  ⚙ Supported Frontend
 
 - [x] Starlette (HF Endpoints)
+- [x] Starlette (Vertex AI)
 - [ ] Starlette (Azure ML)
 - [ ] Starlette (SageMaker)
 

diff --git a/dockerfiles/pytorch/Dockerfile b/dockerfiles/pytorch/Dockerfile
@@ -1,6 +1,6 @@
 ARG BASE_IMAGE=nvidia/cuda:12.1.0-devel-ubuntu22.04
 
-FROM $BASE_IMAGE
+FROM $BASE_IMAGE as base 
 SHELL ["/bin/bash", "-c"]
 
 LABEL maintainer="Hugging Face"
@@ -45,4 +45,10 @@ COPY src/huggingface_inference_toolkit/webservice_starlette.py webservice_starle
 # copy entrypoint and change permissions
 COPY --chmod=0755  scripts/entrypoint.sh entrypoint.sh
 
-ENTRYPOINT ["bash", "-c", "./entrypoint.sh"]
+ENTRYPOINT ["bash", "-c", "./entrypoint.sh"]
+
+
+from base as vertex
+
+# Install Vertex AI requiremented packages
+RUN pip install --no-cache-dir google-cloud-storage
diff --git a/dockerfiles/tensorflow/cpu/Dockerfile b/dockerfiles/tensorflow/cpu/Dockerfile
diff --git a/dockerfiles/tensorflow/cpu/environment.yaml b/dockerfiles/tensorflow/cpu/environment.yaml
diff --git a/dockerfiles/tensorflow/gpu/Dockerfile b/dockerfiles/tensorflow/gpu/Dockerfile
diff --git a/dockerfiles/tensorflow/gpu/environment.yaml b/dockerfiles/tensorflow/gpu/environment.yaml
diff --git a/makefile b/makefile
@@ -26,5 +26,11 @@ inference-pytorch-gpu:
 inference-pytorch-cpu:
 	docker build --build-arg="BASE_IMAGE=ubuntu:22.04" -f dockerfiles/pytorch/Dockerfile -t integration-test-pytorch:cpu .
 
+vertex-pytorch-gpu:
+	docker build -t vertex -f dockerfiles/pytorch/Dockerfile -t integration-test-pytorch:gpu .
+
+vertex-pytorch-cpu:
+	docker build  -t vertex --build-arg="BASE_IMAGE=ubuntu:22.04" -f dockerfiles/pytorch/Dockerfile -t integration-test-pytorch:cpu .
+
 stop-all:
 	docker stop $$(docker ps -a -q) && docker container prune --force
diff --git a/requirements.txt b/requirements.txt
diff --git a/scripts/entrypoint.sh b/scripts/entrypoint.sh
@@ -1,13 +1,21 @@
-# /bin/bash
+#!/bin/bash
 
-# check if HF_MODEL_DIR is set and if not skip installing custom dependencies
+# Define the default port
+PORT=5000
+
+# Check if AIP_MODE is set and adjust the port for Vertex AI
+if [[ ! -z "${AIP_MODE}" ]]; then
+  PORT=${AIP_HTTP_PORT}
+fi
+
+# Check if HF_MODEL_DIR is set and if not skip installing custom dependencies
 if [[ ! -z "${HF_MODEL_DIR}" ]]; then
-  # check if requirements.txt exists and if so install dependencies
+  # Check if requirements.txt exists and if so install dependencies
   if [ -f "${HF_MODEL_DIR}/requirements.txt" ]; then
     echo "Installing custom dependencies from ${HF_MODEL_DIR}/requirements.txt"
     pip install -r ${HF_MODEL_DIR}/requirements.txt --no-cache-dir;
   fi
 fi
 
-# start the server
-uvicorn webservice_starlette:app --host 0.0.0.0 --port 5000
+# Start the server
+uvicorn webservice_starlette:app --host 0.0.0.0 --port ${PORT}
diff --git a/setup.py b/setup.py
@@ -1,24 +1,18 @@
 from __future__ import absolute_import
-from datetime import date
 from setuptools import find_packages, setup
 
 # We don't declare our dependency on transformers here because we build with
 # different packages for different variants
 
-VERSION = "0.3.0"
-
+VERSION = "0.4.0"
 
 # Ubuntu packages
 # libsndfile1-dev: torchaudio requires the development version of the libsndfile package which can be installed via a system package manager. On Ubuntu it can be installed as follows: apt install libsndfile1-dev
 # ffmpeg: ffmpeg is required for audio processing. On Ubuntu it can be installed as follows: apt install ffmpeg
 # libavcodec-extra : libavcodec-extra  inculdes additional codecs for ffmpeg
 
 install_requires = [
-    "wheel==0.42.0",
-    "setuptools==69.1.0",
-    "cmake==3.28.3",
-    "transformers[sklearn,sentencepiece, audio, vision]==4.38.2",
-    "huggingface_hub==0.20.3",
+    "transformers[sklearn,sentencepiece, audio,vision]==4.41.1",
     "orjson",
     # vision
     "Pillow",
@@ -31,15 +25,14 @@
     "starlette",
     "uvicorn",
     "pandas",
-    "peft==0.9.0"
+    "peft==0.11.1"
 ]
 
 extras = {}
 
-extras["st"] = ["sentence_transformers==2.4.0"]
+extras["st"] = ["sentence_transformers==2.7.0"]
 extras["diffusers"] = ["diffusers==0.26.3", "accelerate==0.27.2"]
-extras["torch"] = ["torch==2.2.0", "torchvision", "torchaudio"]
-extras["tensorflow"] = ["tensorflow"]
+extras["torch"] = ["torch==2.2.2", "torchvision", "torchaudio"]
 extras["test"] = [
     "pytest==7.2.1",
     "pytest-xdist",

diff --git a/src/huggingface_inference_toolkit/handler.py b/src/huggingface_inference_toolkit/handler.py
@@ -1,4 +1,5 @@
 import logging
+import os
 from pathlib import Path
 from typing import Optional, Union
 
@@ -40,15 +41,52 @@ def __call__(self, data):
         return prediction
 
 
+class VertexAIHandler(HuggingFaceHandler):
+    """
+    A Default Vertex AI Hugging Face Inference Handler which abstracts the
+    Vertex AI specific logic for inference.
+    """
+    def __init__(self, model_dir: Union[str, Path], task=None, framework="pt"):
+        super().__init__(model_dir, task, framework)
+
+    def __call__(self, data):
+        """
+        Handles an inference request with input data and makes a prediction.
+        Args:
+            :data: (obj): the raw request body data.
+        :return: prediction output
+        """
+        if "instances" not in data:
+            raise ValueError("The request body must contain a key 'instances' with a list of instances.")
+        parameters = data.pop("parameters", None)
+
+        predictions = []
+        # iterate over all instances and make predictions
+        for inputs in data["instances"]:
+            payload = {"inputs": inputs, "parameters": parameters}
+            predictions.append(super().__call__(payload))
+
+        # reutrn predictions
+        return {"predictions": predictions}
+
 def get_inference_handler_either_custom_or_default_handler(
     model_dir: Path,
     task: Optional[str] = None
 ):
     """
-    get inference handler either custom or default Handler
+    Returns the appropriate inference handler based on the given model directory and task.
+
+    Args:
+        model_dir (Path): The directory path where the model is stored.
+        task (Optional[str]): The task for which the inference handler is required. Defaults to None.
+
+    Returns:
+        InferenceHandler: The appropriate inference handler based on the given model directory and task.
     """
     custom_pipeline = check_and_register_custom_pipeline_from_directory(model_dir)
     if custom_pipeline:
         return custom_pipeline
+    elif os.environ.get("AIP_MODE", None) == "PREDICTION":
+        return VertexAIHandler(model_dir=model_dir, task=task)
     else:
         return HuggingFaceHandler(model_dir=model_dir, task=task)