diff --git a/.github/workflows/gpu-integ-test.yaml b/.github/workflows/gpu-integ-test.yaml index bcc3b986..703e4e28 100644 --- a/.github/workflows/gpu-integ-test.yaml +++ b/.github/workflows/gpu-integ-test.yaml @@ -69,7 +69,7 @@ jobs: tensorflow-integration-test: needs: - start-runner - # - pytorch-integration-test + - pytorch-integration-test runs-on: ${{ needs.start-runner.outputs.label }} # run the job on the newly created runner env: AWS_REGION: us-east-1 diff --git a/README.md b/README.md index 75080871..63c2b568 100644 --- a/README.md +++ b/README.md @@ -41,6 +41,7 @@ docker build -t starlette-transformers:gpu -f dockerfiles/starlette/tensorflow/D docker run -ti -p 5000:5000 -e HF_MODEL_ID=distilbert-base-uncased-distilled-squad -e HF_TASK=question-answering starlette-transformers:cpu ``` + 3. Send request. The API schema is the same as from the [inference API](https://huggingface.co/docs/api-inference/detailed_parameters) ```bash diff --git a/dockerfiles/starlette/pytorch/Dockerfile.cpu b/dockerfiles/starlette/pytorch/Dockerfile.cpu index fe34fc77..04d0a1db 100644 --- a/dockerfiles/starlette/pytorch/Dockerfile.cpu +++ b/dockerfiles/starlette/pytorch/Dockerfile.cpu @@ -4,8 +4,8 @@ FROM huggingface/transformers-inference:4.24.0-pt1.13-cpu COPY starlette_requirements.txt /tmp/requirements.txt RUN pip install --no-cache-dir -r /tmp/requirements.txt && rm /tmp/requirements.txt -# Think about a better solution -> base contaienr has pt 1.11. thats why need below 0.13 -RUN pip install --no-cache-dir sentence_transformers torchvision~="0.12.0" +# Think about a better solution -> base contaienr has pt 1.13. thats why need below 0.14 +RUN pip install --no-cache-dir sentence_transformers torchvision~="0.14.0" diffusers=="0.8.1" accelerate=="0.14.0" # copy application COPY src/huggingface_inference_toolkit huggingface_inference_toolkit diff --git a/dockerfiles/starlette/pytorch/Dockerfile.gpu b/dockerfiles/starlette/pytorch/Dockerfile.gpu index 45bc7f52..33e02056 100644 --- a/dockerfiles/starlette/pytorch/Dockerfile.gpu +++ b/dockerfiles/starlette/pytorch/Dockerfile.gpu @@ -4,12 +4,13 @@ FROM huggingface/transformers-inference:4.24.0-pt1.13-cuda11.6 COPY starlette_requirements.txt /tmp/requirements.txt RUN pip install --no-cache-dir -r /tmp/requirements.txt && rm /tmp/requirements.txt -# Think about a better solution -> base contaienr has pt 1.11. thats why need below 0.13 -RUN pip install --no-cache-dir sentence_transformers torchvision~="0.12.0" +# Think about a better solution -> base contaienr has pt 1.13. thats why need below 0.14 +RUN pip install --no-cache-dir sentence_transformers torchvision~="0.14.0" diffusers=="0.8.1" accelerate=="0.14.0" # copy application COPY src/huggingface_inference_toolkit huggingface_inference_toolkit COPY src/huggingface_inference_toolkit/webservice_starlette.py webservice_starlette.py # run app -ENTRYPOINT ["uvicorn", "webservice_starlette:app", "--host", "0.0.0.0", "--port", "5000"] \ No newline at end of file +ENTRYPOINT ["uvicorn", "webservice_starlette:app", "--host", "0.0.0.0", "--port", "5000"] + diff --git a/setup.py b/setup.py index 9200e5e8..b81e84dd 100644 --- a/setup.py +++ b/setup.py @@ -31,12 +31,13 @@ extras = {} extras["st"] = ["sentence_transformers"] +extras["diffusers"] = ["diffusers==0.8.1", "accelerate==0.14.0"] # Hugging Face specific dependencies # framework specific dependencies extras["torch"] = ["torch>=1.8.0", "torchaudio"] -extras["tensorflow"] = ["tensorflow>=2.4.0"] +extras["tensorflow"] = ["tensorflow==2.9.0"] # test and quality extras["test"] = [ "pytest", diff --git a/src/huggingface_inference_toolkit/diffusers_utils.py b/src/huggingface_inference_toolkit/diffusers_utils.py new file mode 100644 index 00000000..3db96940 --- /dev/null +++ b/src/huggingface_inference_toolkit/diffusers_utils.py @@ -0,0 +1,56 @@ +import importlib.util +import json +import os + + +_diffusers = importlib.util.find_spec("diffusers") is not None + + +def is_diffusers_available(): + return _diffusers + + +if is_diffusers_available(): + import torch + + from diffusers import StableDiffusionPipeline + + +def check_supported_pipeline(model_dir): + try: + with open(os.path.join(model_dir, "model_index.json")) as json_file: + data = json.load(json_file) + if data["_class_name"] == "StableDiffusionPipeline": + return True + else: + return False + except Exception: + return False + + +class DiffusersPipelineImageToText: + def __init__(self, model_dir: str, device: str = None): # needs "cuda" for GPU + self.pipeline = StableDiffusionPipeline.from_pretrained(model_dir, torch_dtype=torch.float16) + self.pipeline.to(device) + + def __call__(self, prompt, **kwargs): + + if kwargs: + out = self.pipeline(prompt, **kwargs) + else: + out = self.pipeline(prompt) + + # TODO: return more than 1 image if requested + return out.images[0] + + +DIFFUSERS_TASKS = { + "text-to-image": DiffusersPipelineImageToText, +} + + +def get_diffusers_pipeline(task=None, model_dir=None, device=-1, **kwargs): + """Get a pipeline for Diffusers models.""" + device = "cuda" if device == 0 else "cpu" + pipeline = DIFFUSERS_TASKS[task](model_dir=model_dir, device=device) + return pipeline diff --git a/src/huggingface_inference_toolkit/serialization/audio_utils.py b/src/huggingface_inference_toolkit/serialization/audio_utils.py index 682f1a8a..bad4105b 100644 --- a/src/huggingface_inference_toolkit/serialization/audio_utils.py +++ b/src/huggingface_inference_toolkit/serialization/audio_utils.py @@ -4,5 +4,5 @@ def deserialize(body): return {"inputs": bytes(body)} @staticmethod - def serialize(body): + def serialize(body, accept=None): raise NotImplementedError("Audio serialization not implemented") diff --git a/src/huggingface_inference_toolkit/serialization/base.py b/src/huggingface_inference_toolkit/serialization/base.py index d55c5b99..d3b936d9 100644 --- a/src/huggingface_inference_toolkit/serialization/base.py +++ b/src/huggingface_inference_toolkit/serialization/base.py @@ -46,3 +46,12 @@ def get_deserializer(content_type): raise Exception( f'Content type "{content_type}" not supported. Supported content types are: {", ".join(list(content_type_mapping.keys()))}' ) + + @staticmethod + def get_serializer(accept): + if accept in content_type_mapping: + return content_type_mapping[accept] + else: + raise Exception( + f'Accept type "{accept}" not supported. Supported accept types are: {", ".join(list(content_type_mapping.keys()))}' + ) diff --git a/src/huggingface_inference_toolkit/serialization/image_utils.py b/src/huggingface_inference_toolkit/serialization/image_utils.py index ecec38c9..43dbbe15 100644 --- a/src/huggingface_inference_toolkit/serialization/image_utils.py +++ b/src/huggingface_inference_toolkit/serialization/image_utils.py @@ -10,5 +10,11 @@ def deserialize(body): return {"inputs": image} @staticmethod - def serialize(body): - raise NotImplementedError("Image serialization not implemented") + def serialize(image, accept=None): + if isinstance(image, Image.Image): + img_byte_arr = BytesIO() + image.save(img_byte_arr, format=accept.split("/")[-1].upper()) + img_byte_arr = img_byte_arr.getvalue() + return img_byte_arr + else: + raise ValueError(f"Can only serialize PIL.Image.Image, got {type(image)}") diff --git a/src/huggingface_inference_toolkit/serialization/json_utils.py b/src/huggingface_inference_toolkit/serialization/json_utils.py index fb15e457..a6387000 100644 --- a/src/huggingface_inference_toolkit/serialization/json_utils.py +++ b/src/huggingface_inference_toolkit/serialization/json_utils.py @@ -20,7 +20,7 @@ def deserialize(body): return orjson.loads(body) @staticmethod - def serialize(body): + def serialize(body, accept=None): return orjson.dumps(body, option=orjson.OPT_SERIALIZE_NUMPY, default=default) diff --git a/src/huggingface_inference_toolkit/utils.py b/src/huggingface_inference_toolkit/utils.py index 18dc4aa3..dd9437c6 100644 --- a/src/huggingface_inference_toolkit/utils.py +++ b/src/huggingface_inference_toolkit/utils.py @@ -5,7 +5,7 @@ from pathlib import Path from typing import Optional, Union -from huggingface_hub import HfApi +from huggingface_hub import HfApi, login from huggingface_hub.file_download import cached_download, hf_hub_url from huggingface_hub.utils import filter_repo_objects from transformers import pipeline @@ -13,6 +13,11 @@ from transformers.pipelines import Conversation, Pipeline from huggingface_inference_toolkit.const import HF_DEFAULT_PIPELINE_NAME, HF_MODULE_NAME +from huggingface_inference_toolkit.diffusers_utils import ( + check_supported_pipeline, + get_diffusers_pipeline, + is_diffusers_available, +) from huggingface_inference_toolkit.sentence_transformers_utils import ( get_sentence_transformers_pipeline, is_sentence_transformers_available, @@ -127,6 +132,9 @@ def _load_repository_from_hf( """ Load a model from huggingface hub. """ + if hf_hub_token is not None: + login(token=hf_hub_token) + if framework is None: framework = _get_framework() @@ -146,7 +154,6 @@ def _load_repository_from_hf( repo_id=repository_id, repo_type="model", revision=revision, - token=hf_hub_token, ) # apply regex to filter out non-framework specific weights if args.framework is set filtered_repo_files = filter_repo_objects( @@ -267,6 +274,8 @@ def get_pipeline(task: str, model_dir: Path, **kwargs) -> Pipeline: "sentence-ranking", ]: hf_pipeline = get_sentence_transformers_pipeline(task=task, model_dir=model_dir, device=device, **kwargs) + elif is_diffusers_available() and check_supported_pipeline(model_dir) and task == "text-to-image": + hf_pipeline = get_diffusers_pipeline(task=task, model_dir=model_dir, device=device, **kwargs) else: hf_pipeline = pipeline(task=task, model=model_dir, device=device, **kwargs) diff --git a/src/huggingface_inference_toolkit/webservice_starlette.py b/src/huggingface_inference_toolkit/webservice_starlette.py index 76ca5478..9c3df0ee 100644 --- a/src/huggingface_inference_toolkit/webservice_starlette.py +++ b/src/huggingface_inference_toolkit/webservice_starlette.py @@ -81,8 +81,14 @@ async def predict(request): # log request time # TODO: repalce with middleware logger.info(f"POST {request.url.path} | Duration: {(perf_counter()-start_time) *1000:.2f} ms") + + # response extracts content from request + accept = request.headers.get("accept", None) + if accept is None or accept == "*/*": + accept = "application/json" # deserialized and resonds with json - return Response(Jsoner.serialize(pred), media_type="application/json") + serialized_response_body = ContentType.get_serializer(accept).serialize(pred, accept) + return Response(serialized_response_body, media_type=accept) except Exception as e: logger.error(e) return Response(Jsoner.serialize({"error": str(e)}), status_code=400, media_type="application/json") @@ -98,22 +104,3 @@ async def predict(request): ], on_startup=[some_startup_task], ) - - -# for pegasus it was async -# 1.2rps at 20 with 17s latency -# 1rps at 1 user with 930ms latency - -# for pegasus it was sync -# 1.2rps at 20 with 17s latency -# 1rps at 1 user with 980ms latency -# health is blocking with 17s latency - - -# for tiny it was async -# 107.7rps at 500 with 4.7s latency -# 8.5rps at 1 user with 120ms latency - -# for tiny it was sync -# 109rps at 500 with 4.6s latency -# 8.5rps at 1 user with 120ms latency diff --git a/starlette_requirements.txt b/starlette_requirements.txt index e07d24d8..6139a106 100644 --- a/starlette_requirements.txt +++ b/starlette_requirements.txt @@ -2,4 +2,4 @@ orjson starlette uvicorn pandas -huggingface_hub>=0.9.0 \ No newline at end of file +huggingface_hub>=0.11.0 \ No newline at end of file diff --git a/tests/integ/config.py b/tests/integ/config.py index d427fc1f..467afde2 100644 --- a/tests/integ/config.py +++ b/tests/integ/config.py @@ -11,6 +11,7 @@ validate_summarization, validate_text2text_generation, validate_text_generation, + validate_text_to_image, validate_translation, validate_zero_shot_classification, ) @@ -101,6 +102,10 @@ "pytorch": "cross-encoder/ms-marco-MiniLM-L-6-v2", "tensorflow": None, }, + "text-to-image": { + "pytorch": "hf-internal-testing/tiny-stable-diffusion-torch", + "tensorflow": None, + }, } @@ -156,6 +161,7 @@ }, "sentence-embeddings": {"inputs": "Lets create an embedding"}, "sentence-ranking": {"inputs": ["Lets create an embedding", "Lets create an embedding"]}, + "text-to-image": {"inputs": "a man on a horse jumps over a broken down airplane."}, } task2output = { @@ -204,6 +210,7 @@ "sentence-similarity": {"similarities": ""}, "sentence-embeddings": {"embeddings": ""}, "sentence-ranking": {"scores": ""}, + "text-to-image": bytes, } @@ -229,4 +236,5 @@ "sentence-similarity": validate_zero_shot_classification, "sentence-embeddings": validate_zero_shot_classification, "sentence-ranking": validate_zero_shot_classification, + "text-to-image": validate_text_to_image, } diff --git a/tests/integ/test_container.py b/tests/integ/test_container.py index fe30952f..6c343c6a 100644 --- a/tests/integ/test_container.py +++ b/tests/integ/test_container.py @@ -58,6 +58,8 @@ def verify_task(container: DockerClient, task: str, port: int = 5000, framework: prediction = requests.post( f"{BASE_URL}", data=task2input[task], headers={"content-type": "audio/x-audio"} ).json() + elif task == "text-to-image": + prediction = requests.post(f"{BASE_URL}", json=input, headers={"accept": "image/png"}).content else: prediction = requests.post(f"{BASE_URL}", json=input).json() assert task2validation[task](result=prediction, snapshot=task2output[task]) is True @@ -90,6 +92,8 @@ def verify_task(container: DockerClient, task: str, port: int = 5000, framework: "sentence-similarity", "sentence-embeddings", "sentence-ranking", + # diffusers + "text-to-image", ], ) def test_pt_container_remote_model(task) -> None: @@ -111,6 +115,7 @@ def test_pt_container_remote_model(task) -> None: device_requests=device_request, ) # time.sleep(5) + verify_task(container, task, port) container.stop() container.remove() @@ -143,6 +148,8 @@ def test_pt_container_remote_model(task) -> None: "sentence-similarity", "sentence-embeddings", "sentence-ranking", + # diffusers + "text-to-image", ], ) def test_pt_container_local_model(task) -> None: diff --git a/tests/integ/utils.py b/tests/integ/utils.py index 80c71ccf..813ba751 100644 --- a/tests/integ/utils.py +++ b/tests/integ/utils.py @@ -79,3 +79,8 @@ def validate_automatic_speech_recognition(result=None, snapshot=None): def validate_object_detection(result=None, snapshot=None): assert result[0].keys() == snapshot[0].keys() return True + + +def validate_text_to_image(result=None, snapshot=None): + assert isinstance(result, snapshot) + return True diff --git a/tests/unit/test_diffusers.py b/tests/unit/test_diffusers.py new file mode 100644 index 00000000..f0de073b --- /dev/null +++ b/tests/unit/test_diffusers.py @@ -0,0 +1,40 @@ +import os +import tempfile +from PIL import Image +from transformers.testing_utils import require_torch, slow + +from huggingface_inference_toolkit.handler import get_inference_handler_either_custom_or_default_handler +from huggingface_inference_toolkit.diffusers_utils import get_diffusers_pipeline, DiffusersPipelineImageToText +from huggingface_inference_toolkit.utils import _load_repository_from_hf, get_pipeline + + +@require_torch +def test_get_diffusers_pipeline(): + with tempfile.TemporaryDirectory() as tmpdirname: + storage_dir = _load_repository_from_hf( + "hf-internal-testing/tiny-stable-diffusion-torch", tmpdirname, framework="pytorch" + ) + pipe = get_pipeline("text-to-image", storage_dir.as_posix()) + assert isinstance(pipe, DiffusersPipelineImageToText) + + +@slow +@require_torch +def test_pipe_on_gpu(): + with tempfile.TemporaryDirectory() as tmpdirname: + storage_dir = _load_repository_from_hf( + "hf-internal-testing/tiny-stable-diffusion-torch", tmpdirname, framework="pytorch" + ) + pipe = get_pipeline("text-to-image", storage_dir.as_posix()) + assert pipe.device.type == "cuda" + + +@require_torch +def test_text_to_image_task(): + with tempfile.TemporaryDirectory() as tmpdirname: + storage_dir = _load_repository_from_hf( + "hf-internal-testing/tiny-stable-diffusion-torch", tmpdirname, framework="pytorch" + ) + pipe = get_pipeline("text-to-image", storage_dir.as_posix()) + res = pipe("Lets create an embedding") + assert isinstance(res, Image.Image) diff --git a/tests/unit/test_serializer.py b/tests/unit/test_serializer.py index 5c9b9886..98e528e5 100644 --- a/tests/unit/test_serializer.py +++ b/tests/unit/test_serializer.py @@ -21,6 +21,11 @@ def test_json_image_serialization(): Jsoner.serialize(t) +def test_image_serialization(): + image = Image.new("RGB", (60, 30), color="red") + Imager.serialize(image, accept="image/png") + + def test_json_deserialization(): raw_content = b'{\n\t"inputs": "i like you"\n}' assert {"inputs": "i like you"} == Jsoner.deserialize(raw_content)