From 9390c55e37db496710e8ce5f36f95f5d768cafcc Mon Sep 17 00:00:00 2001 From: philschmid Date: Mon, 27 May 2024 12:18:51 +0000 Subject: [PATCH 01/11] wip vertex ai --- README.md | 29 +++++++++- dockerfiles/inference-endpoints/Dockerfile | 48 ++++++++++++++++ requirements.txt | 0 scripts/entrypoint.sh | 18 ++++-- src/huggingface_inference_toolkit/handler.py | 40 ++++++++++++- .../vertex_ai_utils.py | 46 +++++++++++++++ .../webservice_robyn.py | 57 ------------------- .../webservice_starlette.py | 49 ++++++++++++---- 8 files changed, 211 insertions(+), 76 deletions(-) create mode 100644 dockerfiles/inference-endpoints/Dockerfile delete mode 100644 requirements.txt create mode 100644 src/huggingface_inference_toolkit/vertex_ai_utils.py delete mode 100644 src/huggingface_inference_toolkit/webservice_robyn.py diff --git a/README.md b/README.md index f2f66b40..7137f225 100644 --- a/README.md +++ b/README.md @@ -20,7 +20,7 @@ HF_MODEL_ID=hf-internal-testing/tiny-random-distilbert HF_MODEL_DIR=tmp2 HF_TASK ### Container -1. build the preferred container for either CPU or GPU for PyTorch or TensorFlow. +1. build the preferred container for either CPU or GPU for PyTorch o. _cpu images_ ```bash @@ -58,6 +58,32 @@ curl --request POST \ }' ``` +### Vertex AI Support + +The Hugging Face Inference Toolkit is also supported on Vertex AI, based on [Custom container requirements for prediction](https://cloud.google.com/vertex-ai/docs/predictions/custom-container-requirements). [Enviornment variables set by Vertex AI](https://cloud.google.com/vertex-ai/docs/predictions/custom-container-requirements#aip-variables) are automatically detected and used by the toolkit. + +#### Local run with HF_MODEL_ID and HF_TASK + +Start Hugging Face Inference Toolkit with the following environment variables. + +```bash +mkdir tmp2/ +AIP_MODE=PREDICTION AIP_PORT=8080 AIP_PREDICT_ROUTE=/pred AIP_HEALTH_ROUTE=/h HF_MODEL_DIR=tmp2 HF_MODEL_ID=distilbert/distilbert-base-uncased-finetuned-sst-2-english HF_TASK=text-classification uvicorn src.huggingface_inference_toolkit.webservice_starlette:app --port 8080 +``` + +Send request. The API schema is the same as from the [inference API](https://huggingface.co/docs/api-inference/detailed_parameters) + +```bash +curl --request POST \ + --url http://localhost:8080/pred \ + --header 'Content-Type: application/json' \ + --data '{ + "instances": ["I love this product", "I hate this product"], + "parameters": { "top_k": 2 } +}' +``` + + --- @@ -176,6 +202,7 @@ Below you ll find a list of supported and tested transformers and sentence trans ## ⚙ Supported Frontend - [x] Starlette (HF Endpoints) +- [ ] Starlette (Vertex AI) - [ ] Starlette (Azure ML) - [ ] Starlette (SageMaker) diff --git a/dockerfiles/inference-endpoints/Dockerfile b/dockerfiles/inference-endpoints/Dockerfile new file mode 100644 index 00000000..8e4c4d35 --- /dev/null +++ b/dockerfiles/inference-endpoints/Dockerfile @@ -0,0 +1,48 @@ +ARG BASE_IMAGE=nvidia/cuda:12.1.0-devel-ubuntu22.04 + +FROM $BASE_IMAGE +SHELL ["/bin/bash", "-c"] + +LABEL maintainer="Hugging Face" + +ENV DEBIAN_FRONTEND=noninteractive + +WORKDIR /app + +RUN apt-get update && \ + apt-get install software-properties-common -y && \ + add-apt-repository ppa:deadsnakes/ppa && \ + apt-get -y upgrade --only-upgrade systemd openssl cryptsetup && \ + apt-get install -y \ + build-essential \ + bzip2 \ + curl \ + git \ + git-lfs \ + tar \ + gcc \ + g++ \ + cmake \ + libprotobuf-dev \ + protobuf-compiler \ + python3-dev \ + python3-pip \ + python3.11 \ + libsndfile1-dev \ + ffmpeg \ + && apt-get clean autoremove --yes \ + && rm -rf /var/lib/{apt,dpkg,cache,log} +# Copying only necessary files as filtered by .dockerignore +COPY . . + +# install wheel and setuptools +RUN pip install --no-cache-dir -U pip ".[torch, st, diffusers]" + +# copy application +COPY src/huggingface_inference_toolkit huggingface_inference_toolkit +COPY src/huggingface_inference_toolkit/webservice_starlette.py webservice_starlette.py + +# copy entrypoint and change permissions +COPY --chmod=0755 scripts/entrypoint.sh entrypoint.sh + +ENTRYPOINT ["bash", "-c", "./entrypoint.sh"] \ No newline at end of file diff --git a/requirements.txt b/requirements.txt deleted file mode 100644 index e69de29b..00000000 diff --git a/scripts/entrypoint.sh b/scripts/entrypoint.sh index 8544a63c..3e62536b 100644 --- a/scripts/entrypoint.sh +++ b/scripts/entrypoint.sh @@ -1,13 +1,21 @@ -# /bin/bash +#!/bin/bash -# check if HF_MODEL_DIR is set and if not skip installing custom dependencies +# Define the default port +PORT=5000 + +# Check if AIP_MODE is set and adjust the port for Vertex AI +if [[ ! -z "${AIP_MODE}" ]]; then + PORT=${AIP_HTTP_PORT} +fi + +# Check if HF_MODEL_DIR is set and if not skip installing custom dependencies if [[ ! -z "${HF_MODEL_DIR}" ]]; then - # check if requirements.txt exists and if so install dependencies + # Check if requirements.txt exists and if so install dependencies if [ -f "${HF_MODEL_DIR}/requirements.txt" ]; then echo "Installing custom dependencies from ${HF_MODEL_DIR}/requirements.txt" pip install -r ${HF_MODEL_DIR}/requirements.txt --no-cache-dir; fi fi -# start the server -uvicorn webservice_starlette:app --host 0.0.0.0 --port 5000 \ No newline at end of file +# Start the server +uvicorn webservice_starlette:app --host 0.0.0.0 --port ${PORT} diff --git a/src/huggingface_inference_toolkit/handler.py b/src/huggingface_inference_toolkit/handler.py index 08368326..c8bd4952 100644 --- a/src/huggingface_inference_toolkit/handler.py +++ b/src/huggingface_inference_toolkit/handler.py @@ -1,4 +1,5 @@ import logging +import os from pathlib import Path from typing import Optional, Union @@ -40,15 +41,52 @@ def __call__(self, data): return prediction +class VertexAIHandler(HuggingFaceHandler): + """ + A Default Vertex AI Hugging Face Inference Handler which abstracts the + Vertex AI specific logic for inference. + """ + def __init__(self, model_dir: Union[str, Path], task=None, framework="pt"): + super().__init__(model_dir, task, framework) + + def __call__(self, data): + """ + Handles an inference request with input data and makes a prediction. + Args: + :data: (obj): the raw request body data. + :return: prediction output + """ + if "instances" not in data: + raise ValueError("The request body must contain a key 'instances' with a list of instances.") + parameters = data.pop("parameters", None) + + predictions = [] + # iterate over all instances and make predictions + for inputs in data["instances"]: + payload = {"inputs": inputs, "parameters": parameters} + predictions.append(super().__call__(payload)) + + # reutrn predictions + return {"predictions": predictions} + def get_inference_handler_either_custom_or_default_handler( model_dir: Path, task: Optional[str] = None ): """ - get inference handler either custom or default Handler + Returns the appropriate inference handler based on the given model directory and task. + + Args: + model_dir (Path): The directory path where the model is stored. + task (Optional[str]): The task for which the inference handler is required. Defaults to None. + + Returns: + InferenceHandler: The appropriate inference handler based on the given model directory and task. """ custom_pipeline = check_and_register_custom_pipeline_from_directory(model_dir) if custom_pipeline: return custom_pipeline + elif os.environ.get("AIP_MODE", None) == "PREDICTION": + return VertexAIHandler(model_dir=model_dir, task=task) else: return HuggingFaceHandler(model_dir=model_dir, task=task) diff --git a/src/huggingface_inference_toolkit/vertex_ai_utils.py b/src/huggingface_inference_toolkit/vertex_ai_utils.py new file mode 100644 index 00000000..cf2bedad --- /dev/null +++ b/src/huggingface_inference_toolkit/vertex_ai_utils.py @@ -0,0 +1,46 @@ +import logging +from pathlib import Path +import re +from typing import Union + + +logger = logging.getLogger(__name__) +logging.basicConfig(format="%(asctime)s | %(levelname)s | %(message)s", level=logging.INFO) + +from google.cloud import storage + +_logger = logging.getLogger(__name__) + + +GCS_URI_PREFIX = "gs://" + + +# copied from https://github.com/googleapis/python-aiplatform/blob/94d838d8cfe1599bc2d706e66080c05108821986/google/cloud/aiplatform/utils/prediction_utils.py#L121 +def _load_repository_from_gcs(artifact_uri: str, target_dir: Union[str, Path]="/tmp"): + """ + Load files from GCS path to target_dir + """ + _logger.info(f"Loading model artifacts from {artifact_uri} to {target_dir}") + target_dir = Path(target_dir) + + if artifact_uri.startswith(GCS_URI_PREFIX): + matches = re.match(f"{GCS_URI_PREFIX}(.*?)/(.*)", artifact_uri) + bucket_name, prefix = matches.groups() + + gcs_client = storage.Client() + blobs = gcs_client.list_blobs(bucket_name, prefix=prefix) + for blob in blobs: + name_without_prefix = blob.name[len(prefix) :] + name_without_prefix = ( + name_without_prefix[1:] + if name_without_prefix.startswith("/") + else name_without_prefix + ) + file_split = name_without_prefix.split("/") + directory = target_dir.join(file_split[0:-1]) + directory.mkdir(parents=True, exist_ok=True) + if name_without_prefix and not name_without_prefix.endswith("/"): + blob.download_to_filename(name_without_prefix) + + return str(target_dir.absolute()) + diff --git a/src/huggingface_inference_toolkit/webservice_robyn.py b/src/huggingface_inference_toolkit/webservice_robyn.py deleted file mode 100644 index 5aeaf605..00000000 --- a/src/huggingface_inference_toolkit/webservice_robyn.py +++ /dev/null @@ -1,57 +0,0 @@ -import logging -import os - -from robyn import Robyn - -from huggingface_inference_toolkit.serialization.base import ContentType -from huggingface_inference_toolkit.serialization.json_utils import Jsoner - -logger = logging.getLogger(__name__) -logging.basicConfig(format="%(asctime)s | %(levelname)s | %(message)s", level=logging.INFO) - - -app = Robyn(__file__) - -HF_MODEL_DIR = os.environ.get("HF_MODEL_DIR", "/opt/huggingface/model") -HF_TASK = os.environ.get("HF_TASK", None) - -# @app.startup_handler -# async def startup_event(): -# global inference_handler - -# if empty_directory_or_not_hf_remote_id is None or task is None: -# raise ValueError( -# f"""Can't initialize model. -# Please set correct model id and task. -# Provided values are model_id: -# {model_id_or_path} and task:{task}""" -# ) - -# logger.info(f"Initializing model with model_id:{model_id_or_path} and task:{task}") -# # create inference handler -# inference_handler = HuggingFaceHandler(HF_MODEL_ID) -# logger.info(f"Model initialized successfully on device: {inference_handler.model.device}") -# return inference_handler - - -@app.get("/health") -async def health(): - return "OK" - - -@app.post("/predict") -async def predict(request): - try: - logger.info(request) - content_type = request.headers.get("Content-Type", None) - body = ContentType.get_deserializer(content_type).deserialize(request["body"]) - logger.info(body) - - # pred = inference_handler(body["inputs"]) - return Jsoner.serialize(body) - except Exception as e: - logger.error(e) - return Jsoner.serialize({"error": str(e)}) - - -app.start(port=5000) diff --git a/src/huggingface_inference_toolkit/webservice_starlette.py b/src/huggingface_inference_toolkit/webservice_starlette.py index 8bc68b2e..b749237b 100644 --- a/src/huggingface_inference_toolkit/webservice_starlette.py +++ b/src/huggingface_inference_toolkit/webservice_starlette.py @@ -1,4 +1,5 @@ import logging +import os from pathlib import Path from time import perf_counter @@ -20,6 +21,7 @@ from huggingface_inference_toolkit.serialization.base import ContentType from huggingface_inference_toolkit.serialization.json_utils import Jsoner from huggingface_inference_toolkit.utils import _load_repository_from_hf, convert_params_to_int_or_bool +from huggingface_inference_toolkit.vertex_ai_utils import _load_repository_from_gcs def config_logging(level=logging.INFO): @@ -35,10 +37,11 @@ def config_logging(level=logging.INFO): logger = logging.getLogger(__name__) -async def some_startup_task(): +async def prepare_model_artifacts(): global inference_handler # 1. check if model artifacts available in HF_MODEL_DIR if len(list(Path(HF_MODEL_DIR).glob("**/*"))) <= 0: + # 2. if not available, try to load from HF_MODEL_ID if HF_MODEL_ID is not None: _load_repository_from_hf( repository_id=HF_MODEL_ID, @@ -47,6 +50,11 @@ async def some_startup_task(): revision=HF_REVISION, hf_hub_token=HF_HUB_TOKEN, ) + # 3. check if in Vertex AI environment and load from GCS + # If artifactUri not on Model Creation not set returns an empty string + elif len(os.environ.get("AIP_STORAGE_URI", '')) > 0: + _load_repository_from_gcs(os.environ["AIP_STORAGE_URI"], target_dir=HF_MODEL_DIR) + # 4. if not available, raise error else: raise ValueError( f"""Can't initialize model. @@ -72,7 +80,7 @@ async def predict(request): # try to deserialize payload deserialized_body = ContentType.get_deserializer(content_type).deserialize(await request.body()) # checks if input schema is correct - if "inputs" not in deserialized_body: + if "inputs" not in deserialized_body and "instances" not in deserialized_body: raise ValueError(f"Body needs to provide a inputs key, recieved: {orjson.dumps(deserialized_body)}") # check for query parameter and add them to the body @@ -97,14 +105,31 @@ async def predict(request): logger.error(e) return Response(Jsoner.serialize({"error": str(e)}), status_code=400, media_type="application/json") - -app = Starlette( - debug=True, - routes=[ - Route("/", health, methods=["GET"]), - Route("/health", health, methods=["GET"]), - Route("/", predict, methods=["POST"]), - Route("/predict", predict, methods=["POST"]), - ], - on_startup=[some_startup_task], +# Create app based on which cloud environment is used +if os.getenv("AIP_MODE", None) == "PREDICTION": + logger.info("Running in Vertex AI environment") + # extract routes from environment variables + _predict_route = os.getenv("AIP_PREDICT_ROUTE", None) + _health_route = os.getenv("AIP_HEALTH_ROUTE", None) + if _predict_route is None or _health_route is None: + raise ValueError("AIP_PREDICT_ROUTE and AIP_HEALTH_ROUTE need to be set in Vertex AI environment") + + app = Starlette( + debug=False, + routes=[ + Route(_health_route, health, methods=["GET"]), + Route(_predict_route, predict, methods=["POST"]), + ], + on_startup=[prepare_model_artifacts], + ) +else: + app = Starlette( + debug=False, + routes=[ + Route("/", health, methods=["GET"]), + Route("/health", health, methods=["GET"]), + Route("/", predict, methods=["POST"]), + Route("/predict", predict, methods=["POST"]), + ], + on_startup=[prepare_model_artifacts], ) From 0b91c65bf8aeca73d49ad587d269cbf51ca8c4bf Mon Sep 17 00:00:00 2001 From: philschmid Date: Mon, 27 May 2024 13:25:21 +0000 Subject: [PATCH 02/11] updated versions --- README.md | 27 +++++++++- dockerfiles/inference-endpoints/Dockerfile | 48 ----------------- dockerfiles/pytorch/Dockerfile | 10 +++- dockerfiles/tensorflow/cpu/Dockerfile | 53 ------------------ dockerfiles/tensorflow/cpu/environment.yaml | 8 --- dockerfiles/tensorflow/gpu/Dockerfile | 59 --------------------- dockerfiles/tensorflow/gpu/environment.yaml | 9 ---- makefile | 6 +++ setup.py | 17 ++---- 9 files changed, 45 insertions(+), 192 deletions(-) delete mode 100644 dockerfiles/inference-endpoints/Dockerfile delete mode 100644 dockerfiles/tensorflow/cpu/Dockerfile delete mode 100644 dockerfiles/tensorflow/cpu/environment.yaml delete mode 100644 dockerfiles/tensorflow/gpu/Dockerfile delete mode 100644 dockerfiles/tensorflow/gpu/environment.yaml diff --git a/README.md b/README.md index 7137f225..62f49723 100644 --- a/README.md +++ b/README.md @@ -71,7 +71,7 @@ mkdir tmp2/ AIP_MODE=PREDICTION AIP_PORT=8080 AIP_PREDICT_ROUTE=/pred AIP_HEALTH_ROUTE=/h HF_MODEL_DIR=tmp2 HF_MODEL_ID=distilbert/distilbert-base-uncased-finetuned-sst-2-english HF_TASK=text-classification uvicorn src.huggingface_inference_toolkit.webservice_starlette:app --port 8080 ``` -Send request. The API schema is the same as from the [inference API](https://huggingface.co/docs/api-inference/detailed_parameters) +Send request ```bash curl --request POST \ @@ -83,6 +83,31 @@ curl --request POST \ }' ``` +#### Container run with HF_MODEL_ID and HF_TASK + +1. build the preferred container for either CPU or GPU for PyTorch o. + +```bash +docker build -t vertex -f dockerfiles/pytorch/Dockerfile -t vertex-test-pytorch:gpu . +``` + +2. Run the container and provide either environment variables to the HUB model you want to use or mount a volume to the container, where your model is stored. + +```bash +docker run -ti -p 8080:8080 -e AIP_MODE=PREDICTION -e AIP_HTTP_PORT=8080 -e AIP_PREDICT_ROUTE=/pred -e AIP_HEALTH_ROUTE=/h -e HF_MODEL_ID=distilbert/distilbert-base-uncased-finetuned-sst-2-english -e HF_TASK=text-classification vertex-test-pytorch:gpu +``` + +1. Send request + +```bash +curl --request POST \ + --url http://localhost:8080/pred \ + --header 'Content-Type: application/json' \ + --data '{ + "instances": ["I love this product", "I hate this product"], + "parameters": { "top_k": 2 } +}' +``` --- diff --git a/dockerfiles/inference-endpoints/Dockerfile b/dockerfiles/inference-endpoints/Dockerfile deleted file mode 100644 index 8e4c4d35..00000000 --- a/dockerfiles/inference-endpoints/Dockerfile +++ /dev/null @@ -1,48 +0,0 @@ -ARG BASE_IMAGE=nvidia/cuda:12.1.0-devel-ubuntu22.04 - -FROM $BASE_IMAGE -SHELL ["/bin/bash", "-c"] - -LABEL maintainer="Hugging Face" - -ENV DEBIAN_FRONTEND=noninteractive - -WORKDIR /app - -RUN apt-get update && \ - apt-get install software-properties-common -y && \ - add-apt-repository ppa:deadsnakes/ppa && \ - apt-get -y upgrade --only-upgrade systemd openssl cryptsetup && \ - apt-get install -y \ - build-essential \ - bzip2 \ - curl \ - git \ - git-lfs \ - tar \ - gcc \ - g++ \ - cmake \ - libprotobuf-dev \ - protobuf-compiler \ - python3-dev \ - python3-pip \ - python3.11 \ - libsndfile1-dev \ - ffmpeg \ - && apt-get clean autoremove --yes \ - && rm -rf /var/lib/{apt,dpkg,cache,log} -# Copying only necessary files as filtered by .dockerignore -COPY . . - -# install wheel and setuptools -RUN pip install --no-cache-dir -U pip ".[torch, st, diffusers]" - -# copy application -COPY src/huggingface_inference_toolkit huggingface_inference_toolkit -COPY src/huggingface_inference_toolkit/webservice_starlette.py webservice_starlette.py - -# copy entrypoint and change permissions -COPY --chmod=0755 scripts/entrypoint.sh entrypoint.sh - -ENTRYPOINT ["bash", "-c", "./entrypoint.sh"] \ No newline at end of file diff --git a/dockerfiles/pytorch/Dockerfile b/dockerfiles/pytorch/Dockerfile index 8e4c4d35..c554ce59 100644 --- a/dockerfiles/pytorch/Dockerfile +++ b/dockerfiles/pytorch/Dockerfile @@ -1,6 +1,6 @@ ARG BASE_IMAGE=nvidia/cuda:12.1.0-devel-ubuntu22.04 -FROM $BASE_IMAGE +FROM $BASE_IMAGE as base SHELL ["/bin/bash", "-c"] LABEL maintainer="Hugging Face" @@ -45,4 +45,10 @@ COPY src/huggingface_inference_toolkit/webservice_starlette.py webservice_starle # copy entrypoint and change permissions COPY --chmod=0755 scripts/entrypoint.sh entrypoint.sh -ENTRYPOINT ["bash", "-c", "./entrypoint.sh"] \ No newline at end of file +ENTRYPOINT ["bash", "-c", "./entrypoint.sh"] + + +from base as vertex + +# Install Vertex AI requiremented packages +RUN pip install --no-cache-dir google-cloud-storage diff --git a/dockerfiles/tensorflow/cpu/Dockerfile b/dockerfiles/tensorflow/cpu/Dockerfile deleted file mode 100644 index d16010bb..00000000 --- a/dockerfiles/tensorflow/cpu/Dockerfile +++ /dev/null @@ -1,53 +0,0 @@ -FROM ubuntu:22.04 - -LABEL maintainer="Hugging Face" - -ENV DEBIAN_FRONTEND=noninteractive - -RUN apt-get update \ - && apt-get -y upgrade --only-upgrade systemd openssl cryptsetup \ - && apt-get install -y \ - bzip2 \ - curl \ - git \ - git-lfs \ - tar \ - gcc \ - g++ \ - cmake \ - # audio - libsndfile1-dev \ - ffmpeg \ - && apt-get clean autoremove --yes \ - && rm -rf /var/lib/{apt,dpkg,cache,log} - -# install micromamba -ENV MAMBA_ROOT_PREFIX=/opt/conda -ENV PATH=/opt/conda/bin:$PATH -RUN curl -L https://micro.mamba.pm/api/micromamba/linux-64/latest | tar -xj "bin/micromamba" \ - && touch /root/.bashrc \ - && ./bin/micromamba shell init -s bash -p /opt/conda \ - && grep -v '[ -z "\$PS1" ] && return' /root/.bashrc > /opt/conda/bashrc - -WORKDIR /app - -# install base python dependencies -COPY dockerfiles/tensorflow/cpu/environment.yaml /app/environment.yaml -RUN micromamba install -y -n base -f environment.yaml \ - && rm environment.yaml \ - && micromamba clean --all --yes - -# install huggingface inference toolkit -COPY requirements.txt /tmp/requirements.txt -RUN pip install --no-cache-dir -r /tmp/requirements.txt && rm /tmp/requirements.txt - -# copy application -COPY src/huggingface_inference_toolkit huggingface_inference_toolkit -COPY src/huggingface_inference_toolkit/webservice_starlette.py webservice_starlette.py - -# copy entrypoint and change permissions -COPY scripts/entrypoint.sh entrypoint.sh -RUN chmod +x entrypoint.sh - -# run app -ENTRYPOINT ["/bin/bash", "entrypoint.sh"] \ No newline at end of file diff --git a/dockerfiles/tensorflow/cpu/environment.yaml b/dockerfiles/tensorflow/cpu/environment.yaml deleted file mode 100644 index a370380c..00000000 --- a/dockerfiles/tensorflow/cpu/environment.yaml +++ /dev/null @@ -1,8 +0,0 @@ -name: base -channels: -- conda-forge -dependencies: -- python=3.9.13 -- tensorflow=2.9.1=*cpu*py39* -- pip: - - transformers[sklearn,sentencepiece,audio,vision]==4.27.2 \ No newline at end of file diff --git a/dockerfiles/tensorflow/gpu/Dockerfile b/dockerfiles/tensorflow/gpu/Dockerfile deleted file mode 100644 index c42a33c0..00000000 --- a/dockerfiles/tensorflow/gpu/Dockerfile +++ /dev/null @@ -1,59 +0,0 @@ -FROM nvidia/cuda:11.2.2-base-ubuntu20.04 - -LABEL maintainer="Hugging Face" - -ENV DEBIAN_FRONTEND=noninteractive -ENV CONDA_OVERRIDE_CUDA="11.2" - -RUN apt-get update \ - && apt-get -y upgrade --only-upgrade systemd openssl cryptsetup \ - && apt-get install -y \ - bzip2 \ - curl \ - git \ - git-lfs \ - tar \ - gcc \ - g++ \ - cmake \ - # audio - libsndfile1-dev \ - ffmpeg \ - && apt-get clean autoremove --yes \ - && rm -rf /var/lib/{apt,dpkg,cache,log} - -# install micromamba -ENV MAMBA_ROOT_PREFIX=/opt/conda -ENV PATH=/opt/conda/bin:$PATH -ENV LD_LIBRARY_PATH="/opt/conda/lib:${LD_LIBRARY_PATH}" - -RUN curl -L https://micro.mamba.pm/api/micromamba/linux-64/latest | tar -xj "bin/micromamba" \ - && touch /root/.bashrc \ - && ./bin/micromamba shell init -s bash -p /opt/conda \ - && grep -v '[ -z "\$PS1" ] && return' /root/.bashrc > /opt/conda/bashrc - -WORKDIR /app - -# install base python dependencies -COPY dockerfiles/tensorflow/gpu/environment.yaml /app/environment.yaml -RUN micromamba install -y -n base -f environment.yaml \ - && rm environment.yaml \ - && micromamba clean --all --yes - -# install huggingface inference toolkit -COPY requirements.txt /tmp/requirements.txt -RUN pip install --no-cache-dir -r /tmp/requirements.txt && rm /tmp/requirements.txt - -# copy tests -COPY . /tmp/hf-inference-test - -# copy application -COPY src/huggingface_inference_toolkit huggingface_inference_toolkit -COPY src/huggingface_inference_toolkit/webservice_starlette.py webservice_starlette.py - -# copy entrypoint and change permissions -COPY scripts/entrypoint.sh entrypoint.sh -RUN chmod +x entrypoint.sh - -# run app -ENTRYPOINT ["/bin/bash", "entrypoint.sh"] \ No newline at end of file diff --git a/dockerfiles/tensorflow/gpu/environment.yaml b/dockerfiles/tensorflow/gpu/environment.yaml deleted file mode 100644 index 1d886795..00000000 --- a/dockerfiles/tensorflow/gpu/environment.yaml +++ /dev/null @@ -1,9 +0,0 @@ -name: base -channels: -- conda-forge -dependencies: -- python=3.9.13 -- nvidia::cudatoolkit=11.7 -- tensorflow=2.9.1=*cuda112*py39* -- pip: - - transformers[sklearn,sentencepiece,audio,vision]==4.27.2 \ No newline at end of file diff --git a/makefile b/makefile index a9490428..3502d83e 100644 --- a/makefile +++ b/makefile @@ -26,5 +26,11 @@ inference-pytorch-gpu: inference-pytorch-cpu: docker build --build-arg="BASE_IMAGE=ubuntu:22.04" -f dockerfiles/pytorch/Dockerfile -t integration-test-pytorch:cpu . +vertex-pytorch-gpu: + docker build -t vertex -f dockerfiles/pytorch/Dockerfile -t integration-test-pytorch:gpu . + +vertex-pytorch-cpu: + docker build -t vertex --build-arg="BASE_IMAGE=ubuntu:22.04" -f dockerfiles/pytorch/Dockerfile -t integration-test-pytorch:cpu . + stop-all: docker stop $$(docker ps -a -q) && docker container prune --force \ No newline at end of file diff --git a/setup.py b/setup.py index 5e99df02..192cd5c5 100644 --- a/setup.py +++ b/setup.py @@ -1,5 +1,4 @@ from __future__ import absolute_import -from datetime import date from setuptools import find_packages, setup # We don't declare our dependency on transformers here because we build with @@ -7,18 +6,13 @@ VERSION = "0.3.0" - # Ubuntu packages # libsndfile1-dev: torchaudio requires the development version of the libsndfile package which can be installed via a system package manager. On Ubuntu it can be installed as follows: apt install libsndfile1-dev # ffmpeg: ffmpeg is required for audio processing. On Ubuntu it can be installed as follows: apt install ffmpeg # libavcodec-extra : libavcodec-extra inculdes additional codecs for ffmpeg install_requires = [ - "wheel==0.42.0", - "setuptools==69.1.0", - "cmake==3.28.3", - "transformers[sklearn,sentencepiece, audio, vision]==4.38.2", - "huggingface_hub==0.20.3", + "transformers[sklearn,sentencepiece, audio,vision]==4.41.1", "orjson", # vision "Pillow", @@ -31,15 +25,14 @@ "starlette", "uvicorn", "pandas", - "peft==0.9.0" + "peft==0.11.1" ] extras = {} -extras["st"] = ["sentence_transformers==2.4.0"] -extras["diffusers"] = ["diffusers==0.26.3", "accelerate==0.27.2"] -extras["torch"] = ["torch==2.2.0", "torchvision", "torchaudio"] -extras["tensorflow"] = ["tensorflow"] +extras["st"] = ["sentence_transformers==2.7.0"] +extras["diffusers"] = ["diffusers==0.28.0", "accelerate==0.30.1"] +extras["torch"] = ["torch==2.3.0", "torchvision", "torchaudio"] extras["test"] = [ "pytest==7.2.1", "pytest-xdist", From e78be25d10d8dd776b977d85b0c7042f0f0be83e Mon Sep 17 00:00:00 2001 From: philschmid Date: Mon, 27 May 2024 13:26:05 +0000 Subject: [PATCH 03/11] 0.4.0 --- setup.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/setup.py b/setup.py index 192cd5c5..035d29fc 100644 --- a/setup.py +++ b/setup.py @@ -4,7 +4,7 @@ # We don't declare our dependency on transformers here because we build with # different packages for different variants -VERSION = "0.3.0" +VERSION = "0.4.0" # Ubuntu packages # libsndfile1-dev: torchaudio requires the development version of the libsndfile package which can be installed via a system package manager. On Ubuntu it can be installed as follows: apt install libsndfile1-dev From 0c79f58446e477449d98c67846dc5cd11322aade Mon Sep 17 00:00:00 2001 From: philschmid Date: Mon, 27 May 2024 13:26:44 +0000 Subject: [PATCH 04/11] x --- README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.md b/README.md index 62f49723..6b2182b6 100644 --- a/README.md +++ b/README.md @@ -227,7 +227,7 @@ Below you ll find a list of supported and tested transformers and sentence trans ## ⚙ Supported Frontend - [x] Starlette (HF Endpoints) -- [ ] Starlette (Vertex AI) +- [x] Starlette (Vertex AI) - [ ] Starlette (Azure ML) - [ ] Starlette (SageMaker) From a2317c829f3233df64dc83cb5d82412a86a46e40 Mon Sep 17 00:00:00 2001 From: philschmid Date: Mon, 27 May 2024 13:35:12 +0000 Subject: [PATCH 05/11] style --- README.md | 2 +- src/huggingface_inference_toolkit/handler.py | 12 ++++++------ src/huggingface_inference_toolkit/vertex_ai_utils.py | 8 ++++---- .../webservice_starlette.py | 8 ++++---- 4 files changed, 15 insertions(+), 15 deletions(-) diff --git a/README.md b/README.md index 6b2182b6..f8196973 100644 --- a/README.md +++ b/README.md @@ -97,7 +97,7 @@ docker build -t vertex -f dockerfiles/pytorch/Dockerfile -t vertex-test-pytorch: docker run -ti -p 8080:8080 -e AIP_MODE=PREDICTION -e AIP_HTTP_PORT=8080 -e AIP_PREDICT_ROUTE=/pred -e AIP_HEALTH_ROUTE=/h -e HF_MODEL_ID=distilbert/distilbert-base-uncased-finetuned-sst-2-english -e HF_TASK=text-classification vertex-test-pytorch:gpu ``` -1. Send request +3. Send request ```bash curl --request POST \ diff --git a/src/huggingface_inference_toolkit/handler.py b/src/huggingface_inference_toolkit/handler.py index c8bd4952..0a8c93b8 100644 --- a/src/huggingface_inference_toolkit/handler.py +++ b/src/huggingface_inference_toolkit/handler.py @@ -48,7 +48,7 @@ class VertexAIHandler(HuggingFaceHandler): """ def __init__(self, model_dir: Union[str, Path], task=None, framework="pt"): super().__init__(model_dir, task, framework) - + def __call__(self, data): """ Handles an inference request with input data and makes a prediction. @@ -59,13 +59,13 @@ def __call__(self, data): if "instances" not in data: raise ValueError("The request body must contain a key 'instances' with a list of instances.") parameters = data.pop("parameters", None) - + predictions = [] # iterate over all instances and make predictions for inputs in data["instances"]: payload = {"inputs": inputs, "parameters": parameters} predictions.append(super().__call__(payload)) - + # reutrn predictions return {"predictions": predictions} @@ -75,18 +75,18 @@ def get_inference_handler_either_custom_or_default_handler( ): """ Returns the appropriate inference handler based on the given model directory and task. - + Args: model_dir (Path): The directory path where the model is stored. task (Optional[str]): The task for which the inference handler is required. Defaults to None. - + Returns: InferenceHandler: The appropriate inference handler based on the given model directory and task. """ custom_pipeline = check_and_register_custom_pipeline_from_directory(model_dir) if custom_pipeline: return custom_pipeline - elif os.environ.get("AIP_MODE", None) == "PREDICTION": + elif os.environ.get("AIP_MODE", None) == "PREDICTION": return VertexAIHandler(model_dir=model_dir, task=task) else: return HuggingFaceHandler(model_dir=model_dir, task=task) diff --git a/src/huggingface_inference_toolkit/vertex_ai_utils.py b/src/huggingface_inference_toolkit/vertex_ai_utils.py index cf2bedad..19dd41e2 100644 --- a/src/huggingface_inference_toolkit/vertex_ai_utils.py +++ b/src/huggingface_inference_toolkit/vertex_ai_utils.py @@ -1,13 +1,12 @@ import logging -from pathlib import Path import re +from pathlib import Path from typing import Union - logger = logging.getLogger(__name__) logging.basicConfig(format="%(asctime)s | %(levelname)s | %(message)s", level=logging.INFO) -from google.cloud import storage + _logger = logging.getLogger(__name__) @@ -20,9 +19,10 @@ def _load_repository_from_gcs(artifact_uri: str, target_dir: Union[str, Path]="/ """ Load files from GCS path to target_dir """ + from google.cloud import storage _logger.info(f"Loading model artifacts from {artifact_uri} to {target_dir}") target_dir = Path(target_dir) - + if artifact_uri.startswith(GCS_URI_PREFIX): matches = re.match(f"{GCS_URI_PREFIX}(.*?)/(.*)", artifact_uri) bucket_name, prefix = matches.groups() diff --git a/src/huggingface_inference_toolkit/webservice_starlette.py b/src/huggingface_inference_toolkit/webservice_starlette.py index b749237b..862560dc 100644 --- a/src/huggingface_inference_toolkit/webservice_starlette.py +++ b/src/huggingface_inference_toolkit/webservice_starlette.py @@ -52,7 +52,7 @@ async def prepare_model_artifacts(): ) # 3. check if in Vertex AI environment and load from GCS # If artifactUri not on Model Creation not set returns an empty string - elif len(os.environ.get("AIP_STORAGE_URI", '')) > 0: + elif len(os.environ.get("AIP_STORAGE_URI", '')) > 0: _load_repository_from_gcs(os.environ["AIP_STORAGE_URI"], target_dir=HF_MODEL_DIR) # 4. if not available, raise error else: @@ -112,8 +112,8 @@ async def predict(request): _predict_route = os.getenv("AIP_PREDICT_ROUTE", None) _health_route = os.getenv("AIP_HEALTH_ROUTE", None) if _predict_route is None or _health_route is None: - raise ValueError("AIP_PREDICT_ROUTE and AIP_HEALTH_ROUTE need to be set in Vertex AI environment") - + raise ValueError("AIP_PREDICT_ROUTE and AIP_HEALTH_ROUTE need to be set in Vertex AI environment") + app = Starlette( debug=False, routes=[ @@ -121,7 +121,7 @@ async def predict(request): Route(_predict_route, predict, methods=["POST"]), ], on_startup=[prepare_model_artifacts], - ) + ) else: app = Starlette( debug=False, From f993c05e1d356adc7a3a084a5519810fdcc308a5 Mon Sep 17 00:00:00 2001 From: philschmid Date: Mon, 27 May 2024 14:00:45 +0000 Subject: [PATCH 06/11] revert diffusersr version --- setup.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/setup.py b/setup.py index 035d29fc..c7d26da9 100644 --- a/setup.py +++ b/setup.py @@ -31,7 +31,7 @@ extras = {} extras["st"] = ["sentence_transformers==2.7.0"] -extras["diffusers"] = ["diffusers==0.28.0", "accelerate==0.30.1"] +extras["diffusers"] = ["diffusers==0.26.3", "accelerate==0.30.1"] extras["torch"] = ["torch==2.3.0", "torchvision", "torchaudio"] extras["test"] = [ "pytest==7.2.1", From bde90156f82454a5bcdf2f4cd7d563a47add47b8 Mon Sep 17 00:00:00 2001 From: philschmid Date: Mon, 27 May 2024 14:13:12 +0000 Subject: [PATCH 07/11] pt 2.2.2 --- setup.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/setup.py b/setup.py index c7d26da9..81628fa1 100644 --- a/setup.py +++ b/setup.py @@ -32,7 +32,7 @@ extras["st"] = ["sentence_transformers==2.7.0"] extras["diffusers"] = ["diffusers==0.26.3", "accelerate==0.30.1"] -extras["torch"] = ["torch==2.3.0", "torchvision", "torchaudio"] +extras["torch"] = ["torch==2.2.2", "torchvision", "torchaudio"] extras["test"] = [ "pytest==7.2.1", "pytest-xdist", From b3a65867af627a9f1d5adac3a278fa338a552922 Mon Sep 17 00:00:00 2001 From: philschmid Date: Mon, 27 May 2024 15:10:05 +0000 Subject: [PATCH 08/11] update model repo to fix tests --- .github/workflows/docker-build-action.yaml | 1 + setup.py | 4 ++-- tests/unit/test_diffusers.py | 6 +++--- 3 files changed, 6 insertions(+), 5 deletions(-) diff --git a/.github/workflows/docker-build-action.yaml b/.github/workflows/docker-build-action.yaml index 62cba961..fe644056 100644 --- a/.github/workflows/docker-build-action.yaml +++ b/.github/workflows/docker-build-action.yaml @@ -63,6 +63,7 @@ jobs: push: true context: ${{ inputs.context }} build-args: ${{ inputs.build_args }} + target: base file: ${{ inputs.context }}/${{ inputs.dockerfile }} tags: ${{ inputs.repository }}/${{ inputs.image }}:sha-${{ env.GITHUB_SHA_SHORT }},${{ inputs.repository }}/${{ inputs.image }}:latest diff --git a/setup.py b/setup.py index 81628fa1..fde51c5e 100644 --- a/setup.py +++ b/setup.py @@ -31,8 +31,8 @@ extras = {} extras["st"] = ["sentence_transformers==2.7.0"] -extras["diffusers"] = ["diffusers==0.26.3", "accelerate==0.30.1"] -extras["torch"] = ["torch==2.2.2", "torchvision", "torchaudio"] +extras["diffusers"] = ["diffusers==0.26.3", "accelerate==0.27.2"] +extras["torch"] = ["torch==2.2.0", "torchvision", "torchaudio"] extras["test"] = [ "pytest==7.2.1", "pytest-xdist", diff --git a/tests/unit/test_diffusers.py b/tests/unit/test_diffusers.py index 0f2890a8..4384cd4e 100644 --- a/tests/unit/test_diffusers.py +++ b/tests/unit/test_diffusers.py @@ -15,7 +15,7 @@ def test_get_diffusers_pipeline(): with tempfile.TemporaryDirectory() as tmpdirname: storage_dir = _load_repository_from_hf( - "hf-internal-testing/tiny-stable-diffusion-torch", + "echarlaix/tiny-random-stable-diffusion-xl", tmpdirname, framework="pytorch" ) @@ -28,7 +28,7 @@ def test_get_diffusers_pipeline(): def test_pipe_on_gpu(): with tempfile.TemporaryDirectory() as tmpdirname: storage_dir = _load_repository_from_hf( - "hf-internal-testing/tiny-stable-diffusion-torch", + "echarlaix/tiny-random-stable-diffusion-xl", tmpdirname, framework="pytorch" ) @@ -44,7 +44,7 @@ def test_pipe_on_gpu(): def test_text_to_image_task(): with tempfile.TemporaryDirectory() as tmpdirname: storage_dir = _load_repository_from_hf( - "hf-internal-testing/tiny-stable-diffusion-torch", + "echarlaix/tiny-random-stable-diffusion-xl", tmpdirname, framework="pytorch" ) From 0cdeee73d32dc0f9dd7db30b8f40f0c3f04dafea Mon Sep 17 00:00:00 2001 From: philschmid Date: Mon, 27 May 2024 15:19:40 +0000 Subject: [PATCH 09/11] update --- setup.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/setup.py b/setup.py index fde51c5e..deffb557 100644 --- a/setup.py +++ b/setup.py @@ -32,7 +32,7 @@ extras["st"] = ["sentence_transformers==2.7.0"] extras["diffusers"] = ["diffusers==0.26.3", "accelerate==0.27.2"] -extras["torch"] = ["torch==2.2.0", "torchvision", "torchaudio"] +extras["torch"] = ["torch==2.2.2", "torchvision", "torchaudio"] extras["test"] = [ "pytest==7.2.1", "pytest-xdist", From 66afdb01a526b659624f6443675eca55c8ac71c7 Mon Sep 17 00:00:00 2001 From: Philipp Schmid <32632186+philschmid@users.noreply.github.com> Date: Tue, 28 May 2024 10:23:55 +0200 Subject: [PATCH 10/11] Update README.md Co-authored-by: oOraph <13552058+oOraph@users.noreply.github.com> --- README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.md b/README.md index f8196973..c95f3619 100644 --- a/README.md +++ b/README.md @@ -60,7 +60,7 @@ curl --request POST \ ### Vertex AI Support -The Hugging Face Inference Toolkit is also supported on Vertex AI, based on [Custom container requirements for prediction](https://cloud.google.com/vertex-ai/docs/predictions/custom-container-requirements). [Enviornment variables set by Vertex AI](https://cloud.google.com/vertex-ai/docs/predictions/custom-container-requirements#aip-variables) are automatically detected and used by the toolkit. +The Hugging Face Inference Toolkit is also supported on Vertex AI, based on [Custom container requirements for prediction](https://cloud.google.com/vertex-ai/docs/predictions/custom-container-requirements). [Environment variables set by Vertex AI](https://cloud.google.com/vertex-ai/docs/predictions/custom-container-requirements#aip-variables) are automatically detected and used by the toolkit. #### Local run with HF_MODEL_ID and HF_TASK From e339f9a8c7236fdafa747564a65706e2cceafe2a Mon Sep 17 00:00:00 2001 From: Philipp Schmid <32632186+philschmid@users.noreply.github.com> Date: Tue, 28 May 2024 10:24:01 +0200 Subject: [PATCH 11/11] Update README.md Co-authored-by: oOraph <13552058+oOraph@users.noreply.github.com> --- README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.md b/README.md index c95f3619..f3056a89 100644 --- a/README.md +++ b/README.md @@ -20,7 +20,7 @@ HF_MODEL_ID=hf-internal-testing/tiny-random-distilbert HF_MODEL_DIR=tmp2 HF_TASK ### Container -1. build the preferred container for either CPU or GPU for PyTorch o. +1. build the preferred container for either CPU or GPU for PyTorch. _cpu images_ ```bash