From 90b54faa98d741adf61765df6c4f782f59324192 Mon Sep 17 00:00:00 2001 From: philschmid Date: Thu, 24 Nov 2022 15:47:40 +0000 Subject: [PATCH 01/11] add stable diffusion models --- dockerfiles/starlette/pytorch/Dockerfile.gpu | 3 +- setup.py | 1 + .../diffusers_utils.py | 55 +++++++++++++++++++ .../serialization/audio_utils.py | 2 +- .../serialization/base.py | 9 +++ .../serialization/image_utils.py | 10 +++- .../serialization/json_utils.py | 2 +- src/huggingface_inference_toolkit/utils.py | 7 +++ .../webservice_starlette.py | 27 +++------ starlette_requirements.txt | 4 +- tests/integ/config.py | 8 +++ tests/integ/test_container.py | 8 +++ tests/integ/utils.py | 6 ++ tests/unit/test_diffusers.py | 40 ++++++++++++++ tests/unit/test_serializer.py | 5 ++ 15 files changed, 161 insertions(+), 26 deletions(-) create mode 100644 src/huggingface_inference_toolkit/diffusers_utils.py create mode 100644 tests/unit/test_diffusers.py diff --git a/dockerfiles/starlette/pytorch/Dockerfile.gpu b/dockerfiles/starlette/pytorch/Dockerfile.gpu index 45bc7f52..b75be46f 100644 --- a/dockerfiles/starlette/pytorch/Dockerfile.gpu +++ b/dockerfiles/starlette/pytorch/Dockerfile.gpu @@ -12,4 +12,5 @@ COPY src/huggingface_inference_toolkit huggingface_inference_toolkit COPY src/huggingface_inference_toolkit/webservice_starlette.py webservice_starlette.py # run app -ENTRYPOINT ["uvicorn", "webservice_starlette:app", "--host", "0.0.0.0", "--port", "5000"] \ No newline at end of file +ENTRYPOINT ["uvicorn", "webservice_starlette:app", "--host", "0.0.0.0", "--port", "5000"] + diff --git a/setup.py b/setup.py index 9200e5e8..0f4fecd0 100644 --- a/setup.py +++ b/setup.py @@ -31,6 +31,7 @@ extras = {} extras["st"] = ["sentence_transformers"] +extras["diffusers"] = ["diffusers==0.8.1", "accelerate==0.14.0"] # Hugging Face specific dependencies diff --git a/src/huggingface_inference_toolkit/diffusers_utils.py b/src/huggingface_inference_toolkit/diffusers_utils.py new file mode 100644 index 00000000..3258a459 --- /dev/null +++ b/src/huggingface_inference_toolkit/diffusers_utils.py @@ -0,0 +1,55 @@ +import importlib.util +import json +import os + +_diffusers = importlib.util.find_spec("diffusers") is not None + + +def is_diffusers_available(): + return _diffusers + + +if is_diffusers_available(): + from diffusers import StableDiffusionPipeline + + +def check_supported_pipeline(model_dir): + try: + with open(os.path.join(model_dir), "model_index.json") as json_file: + data = json.load(json_file) + if data["_class_name"] == "StableDiffusionPipeline": + return True + else: + return False + except: + return False + + +class DiffusersPipelineImageToText: + def __init__(self, model_dir: str, device: str = None): # needs "cuda" for GPU + # try: + # pipeline = DIFFUSERS_TASKS[task].from_pretrained(model_dir, torch_dtype=torch.float16,) + # except: + self.pipeline = StableDiffusionPipeline.from_pretrained(model_dir) + self.pipeline.to(device) + + def __call__(self, prompt, **kwargs): + + if kwargs: + out = self.pipeline(prompt, **kwargs) + else: + out = self.pipeline(prompt) + + # TODO: only return 1 image currently + return out.images[0] + + +DIFFUSERS_TASKS = { + "text-to-image": DiffusersPipelineImageToText, +} + + +def get_diffusers_pipeline(task=None, model_dir=None, device=-1, **kwargs): + device = "cuda" if device == 0 else "cpu" + pipeline = DIFFUSERS_TASKS[task](model_dir=model_dir, device=device) + return pipeline diff --git a/src/huggingface_inference_toolkit/serialization/audio_utils.py b/src/huggingface_inference_toolkit/serialization/audio_utils.py index 682f1a8a..bad4105b 100644 --- a/src/huggingface_inference_toolkit/serialization/audio_utils.py +++ b/src/huggingface_inference_toolkit/serialization/audio_utils.py @@ -4,5 +4,5 @@ def deserialize(body): return {"inputs": bytes(body)} @staticmethod - def serialize(body): + def serialize(body, accept=None): raise NotImplementedError("Audio serialization not implemented") diff --git a/src/huggingface_inference_toolkit/serialization/base.py b/src/huggingface_inference_toolkit/serialization/base.py index d55c5b99..d3b936d9 100644 --- a/src/huggingface_inference_toolkit/serialization/base.py +++ b/src/huggingface_inference_toolkit/serialization/base.py @@ -46,3 +46,12 @@ def get_deserializer(content_type): raise Exception( f'Content type "{content_type}" not supported. Supported content types are: {", ".join(list(content_type_mapping.keys()))}' ) + + @staticmethod + def get_serializer(accept): + if accept in content_type_mapping: + return content_type_mapping[accept] + else: + raise Exception( + f'Accept type "{accept}" not supported. Supported accept types are: {", ".join(list(content_type_mapping.keys()))}' + ) diff --git a/src/huggingface_inference_toolkit/serialization/image_utils.py b/src/huggingface_inference_toolkit/serialization/image_utils.py index ecec38c9..43dbbe15 100644 --- a/src/huggingface_inference_toolkit/serialization/image_utils.py +++ b/src/huggingface_inference_toolkit/serialization/image_utils.py @@ -10,5 +10,11 @@ def deserialize(body): return {"inputs": image} @staticmethod - def serialize(body): - raise NotImplementedError("Image serialization not implemented") + def serialize(image, accept=None): + if isinstance(image, Image.Image): + img_byte_arr = BytesIO() + image.save(img_byte_arr, format=accept.split("/")[-1].upper()) + img_byte_arr = img_byte_arr.getvalue() + return img_byte_arr + else: + raise ValueError(f"Can only serialize PIL.Image.Image, got {type(image)}") diff --git a/src/huggingface_inference_toolkit/serialization/json_utils.py b/src/huggingface_inference_toolkit/serialization/json_utils.py index fb15e457..a6387000 100644 --- a/src/huggingface_inference_toolkit/serialization/json_utils.py +++ b/src/huggingface_inference_toolkit/serialization/json_utils.py @@ -20,7 +20,7 @@ def deserialize(body): return orjson.loads(body) @staticmethod - def serialize(body): + def serialize(body, accept=None): return orjson.dumps(body, option=orjson.OPT_SERIALIZE_NUMPY, default=default) diff --git a/src/huggingface_inference_toolkit/utils.py b/src/huggingface_inference_toolkit/utils.py index 18dc4aa3..7aa4bebf 100644 --- a/src/huggingface_inference_toolkit/utils.py +++ b/src/huggingface_inference_toolkit/utils.py @@ -13,6 +13,11 @@ from transformers.pipelines import Conversation, Pipeline from huggingface_inference_toolkit.const import HF_DEFAULT_PIPELINE_NAME, HF_MODULE_NAME +from huggingface_inference_toolkit.diffusers_utils import ( + get_diffusers_pipeline, + is_diffusers_available, + check_supported_pipeline, +) from huggingface_inference_toolkit.sentence_transformers_utils import ( get_sentence_transformers_pipeline, is_sentence_transformers_available, @@ -267,6 +272,8 @@ def get_pipeline(task: str, model_dir: Path, **kwargs) -> Pipeline: "sentence-ranking", ]: hf_pipeline = get_sentence_transformers_pipeline(task=task, model_dir=model_dir, device=device, **kwargs) + elif is_diffusers_available() and check_supported_pipeline(model_dir) and task == "text-to-image": + hf_pipeline = get_diffusers_pipeline(task=task, model_dir=model_dir, device=device, **kwargs) else: hf_pipeline = pipeline(task=task, model=model_dir, device=device, **kwargs) diff --git a/src/huggingface_inference_toolkit/webservice_starlette.py b/src/huggingface_inference_toolkit/webservice_starlette.py index 76ca5478..9c3df0ee 100644 --- a/src/huggingface_inference_toolkit/webservice_starlette.py +++ b/src/huggingface_inference_toolkit/webservice_starlette.py @@ -81,8 +81,14 @@ async def predict(request): # log request time # TODO: repalce with middleware logger.info(f"POST {request.url.path} | Duration: {(perf_counter()-start_time) *1000:.2f} ms") + + # response extracts content from request + accept = request.headers.get("accept", None) + if accept is None or accept == "*/*": + accept = "application/json" # deserialized and resonds with json - return Response(Jsoner.serialize(pred), media_type="application/json") + serialized_response_body = ContentType.get_serializer(accept).serialize(pred, accept) + return Response(serialized_response_body, media_type=accept) except Exception as e: logger.error(e) return Response(Jsoner.serialize({"error": str(e)}), status_code=400, media_type="application/json") @@ -98,22 +104,3 @@ async def predict(request): ], on_startup=[some_startup_task], ) - - -# for pegasus it was async -# 1.2rps at 20 with 17s latency -# 1rps at 1 user with 930ms latency - -# for pegasus it was sync -# 1.2rps at 20 with 17s latency -# 1rps at 1 user with 980ms latency -# health is blocking with 17s latency - - -# for tiny it was async -# 107.7rps at 500 with 4.7s latency -# 8.5rps at 1 user with 120ms latency - -# for tiny it was sync -# 109rps at 500 with 4.6s latency -# 8.5rps at 1 user with 120ms latency diff --git a/starlette_requirements.txt b/starlette_requirements.txt index e07d24d8..0483ed85 100644 --- a/starlette_requirements.txt +++ b/starlette_requirements.txt @@ -2,4 +2,6 @@ orjson starlette uvicorn pandas -huggingface_hub>=0.9.0 \ No newline at end of file +huggingface_hub>=0.9.0 +diffusers==0.8.1 +accelerate==0.14.0 \ No newline at end of file diff --git a/tests/integ/config.py b/tests/integ/config.py index d427fc1f..6048e46f 100644 --- a/tests/integ/config.py +++ b/tests/integ/config.py @@ -11,6 +11,7 @@ validate_summarization, validate_text2text_generation, validate_text_generation, + validate_text_to_image, validate_translation, validate_zero_shot_classification, ) @@ -101,6 +102,10 @@ "pytorch": "cross-encoder/ms-marco-MiniLM-L-6-v2", "tensorflow": None, }, + "text-to-image": { + "pytorch": "diffusers/tiny-stable-diffusion-torch", + "tensorflow": None, + }, } @@ -156,6 +161,7 @@ }, "sentence-embeddings": {"inputs": "Lets create an embedding"}, "sentence-ranking": {"inputs": ["Lets create an embedding", "Lets create an embedding"]}, + "text-to-image": {"inputs": "a man on a horse jumps over a broken down airplane."}, } task2output = { @@ -204,6 +210,7 @@ "sentence-similarity": {"similarities": ""}, "sentence-embeddings": {"embeddings": ""}, "sentence-ranking": {"scores": ""}, + "text-to-image": bytes, } @@ -229,4 +236,5 @@ "sentence-similarity": validate_zero_shot_classification, "sentence-embeddings": validate_zero_shot_classification, "sentence-ranking": validate_zero_shot_classification, + "text-to-image": validate_text_to_image, } diff --git a/tests/integ/test_container.py b/tests/integ/test_container.py index fe30952f..81f5038f 100644 --- a/tests/integ/test_container.py +++ b/tests/integ/test_container.py @@ -11,6 +11,7 @@ from transformers.testing_utils import require_torch, slow, require_tf, _run_slow_tests IS_GPU = _run_slow_tests +IS_GPU = True DEVICE = "gpu" if IS_GPU else "cpu" client = docker.from_env() @@ -58,6 +59,8 @@ def verify_task(container: DockerClient, task: str, port: int = 5000, framework: prediction = requests.post( f"{BASE_URL}", data=task2input[task], headers={"content-type": "audio/x-audio"} ).json() + elif task == "text-to-image": + prediction = requests.post(f"{BASE_URL}", json=input, headers={"accept": "image/png"}).content else: prediction = requests.post(f"{BASE_URL}", json=input).json() assert task2validation[task](result=prediction, snapshot=task2output[task]) is True @@ -90,6 +93,8 @@ def verify_task(container: DockerClient, task: str, port: int = 5000, framework: "sentence-similarity", "sentence-embeddings", "sentence-ranking", + # diffusers + "text-to-image", ], ) def test_pt_container_remote_model(task) -> None: @@ -111,6 +116,7 @@ def test_pt_container_remote_model(task) -> None: device_requests=device_request, ) # time.sleep(5) + verify_task(container, task, port) container.stop() container.remove() @@ -143,6 +149,8 @@ def test_pt_container_remote_model(task) -> None: "sentence-similarity", "sentence-embeddings", "sentence-ranking", + # diffusers + "text-to-image", ], ) def test_pt_container_local_model(task) -> None: diff --git a/tests/integ/utils.py b/tests/integ/utils.py index 80c71ccf..2abbbf16 100644 --- a/tests/integ/utils.py +++ b/tests/integ/utils.py @@ -79,3 +79,9 @@ def validate_automatic_speech_recognition(result=None, snapshot=None): def validate_object_detection(result=None, snapshot=None): assert result[0].keys() == snapshot[0].keys() return True + + +def validate_text_to_image(result=None, snapshot=None): + assert isinstance(result, snapshot) + return True + \ No newline at end of file diff --git a/tests/unit/test_diffusers.py b/tests/unit/test_diffusers.py new file mode 100644 index 00000000..346f2c59 --- /dev/null +++ b/tests/unit/test_diffusers.py @@ -0,0 +1,40 @@ +import os +import tempfile +from PIL import Image +from transformers.testing_utils import require_torch, slow + +from huggingface_inference_toolkit.handler import get_inference_handler_either_custom_or_default_handler +from huggingface_inference_toolkit.diffusers_utils import get_diffusers_pipeline, DiffusersPipelineImageToText +from huggingface_inference_toolkit.utils import _load_repository_from_hf, get_pipeline + + +@require_torch +def test_get_diffusers_pipeline(): + with tempfile.TemporaryDirectory() as tmpdirname: + storage_dir = _load_repository_from_hf( + "diffusers/tiny-stable-diffusion-torch", tmpdirname, framework="pytorch" + ) + pipe = get_pipeline("text-to-image", storage_dir.as_posix()) + assert isinstance(pipe, DiffusersPipelineImageToText) + + +@slow +@require_torch +def test_pipe_on_gpu(): + with tempfile.TemporaryDirectory() as tmpdirname: + storage_dir = _load_repository_from_hf( + "diffusers/tiny-stable-diffusion-torch", tmpdirname, framework="pytorch" + ) + pipe = get_pipeline("text-to-image", storage_dir.as_posix()) + assert pipe.device.type == "cuda" + + +@require_torch +def test_text_to_image_task(): + with tempfile.TemporaryDirectory() as tmpdirname: + storage_dir = _load_repository_from_hf( + "diffusers/tiny-stable-diffusion-torch", tmpdirname, framework="pytorch" + ) + pipe = get_pipeline("text-to-image", storage_dir.as_posix()) + res = pipe("Lets create an embedding") + assert isinstance(res, Image.Image) diff --git a/tests/unit/test_serializer.py b/tests/unit/test_serializer.py index 5c9b9886..98e528e5 100644 --- a/tests/unit/test_serializer.py +++ b/tests/unit/test_serializer.py @@ -21,6 +21,11 @@ def test_json_image_serialization(): Jsoner.serialize(t) +def test_image_serialization(): + image = Image.new("RGB", (60, 30), color="red") + Imager.serialize(image, accept="image/png") + + def test_json_deserialization(): raw_content = b'{\n\t"inputs": "i like you"\n}' assert {"inputs": "i like you"} == Jsoner.deserialize(raw_content) From fe216c2a0cebb8db29d05b72b43831021751c7cb Mon Sep 17 00:00:00 2001 From: philschmid Date: Thu, 24 Nov 2022 16:41:05 +0000 Subject: [PATCH 02/11] add more checks --- README.md | 1 + dockerfiles/starlette/pytorch/Dockerfile.gpu | 2 +- .../diffusers_utils.py | 18 +++++++++--------- src/huggingface_inference_toolkit/utils.py | 6 ++++-- starlette_requirements.txt | 2 +- tests/integ/utils.py | 1 - 6 files changed, 16 insertions(+), 14 deletions(-) diff --git a/README.md b/README.md index 75080871..63c2b568 100644 --- a/README.md +++ b/README.md @@ -41,6 +41,7 @@ docker build -t starlette-transformers:gpu -f dockerfiles/starlette/tensorflow/D docker run -ti -p 5000:5000 -e HF_MODEL_ID=distilbert-base-uncased-distilled-squad -e HF_TASK=question-answering starlette-transformers:cpu ``` + 3. Send request. The API schema is the same as from the [inference API](https://huggingface.co/docs/api-inference/detailed_parameters) ```bash diff --git a/dockerfiles/starlette/pytorch/Dockerfile.gpu b/dockerfiles/starlette/pytorch/Dockerfile.gpu index b75be46f..a04e9053 100644 --- a/dockerfiles/starlette/pytorch/Dockerfile.gpu +++ b/dockerfiles/starlette/pytorch/Dockerfile.gpu @@ -5,7 +5,7 @@ COPY starlette_requirements.txt /tmp/requirements.txt RUN pip install --no-cache-dir -r /tmp/requirements.txt && rm /tmp/requirements.txt # Think about a better solution -> base contaienr has pt 1.11. thats why need below 0.13 -RUN pip install --no-cache-dir sentence_transformers torchvision~="0.12.0" +RUN pip install --no-cache-dir sentence_transformers torchvision~="0.14.0" # copy application COPY src/huggingface_inference_toolkit huggingface_inference_toolkit diff --git a/src/huggingface_inference_toolkit/diffusers_utils.py b/src/huggingface_inference_toolkit/diffusers_utils.py index 3258a459..09ffa2f0 100644 --- a/src/huggingface_inference_toolkit/diffusers_utils.py +++ b/src/huggingface_inference_toolkit/diffusers_utils.py @@ -14,15 +14,15 @@ def is_diffusers_available(): def check_supported_pipeline(model_dir): - try: - with open(os.path.join(model_dir), "model_index.json") as json_file: - data = json.load(json_file) - if data["_class_name"] == "StableDiffusionPipeline": - return True - else: - return False - except: - return False + try: + with open(os.path.join(model_dir, "model_index.json")) as json_file: + data = json.load(json_file) + if data["_class_name"] == "StableDiffusionPipeline": + return True + else: + return False + except: + return False class DiffusersPipelineImageToText: diff --git a/src/huggingface_inference_toolkit/utils.py b/src/huggingface_inference_toolkit/utils.py index 7aa4bebf..b1b7e6fd 100644 --- a/src/huggingface_inference_toolkit/utils.py +++ b/src/huggingface_inference_toolkit/utils.py @@ -5,7 +5,7 @@ from pathlib import Path from typing import Optional, Union -from huggingface_hub import HfApi +from huggingface_hub import HfApi, login from huggingface_hub.file_download import cached_download, hf_hub_url from huggingface_hub.utils import filter_repo_objects from transformers import pipeline @@ -132,6 +132,9 @@ def _load_repository_from_hf( """ Load a model from huggingface hub. """ + if hf_hub_token is not None: + login(token=hf_hub_token) + if framework is None: framework = _get_framework() @@ -151,7 +154,6 @@ def _load_repository_from_hf( repo_id=repository_id, repo_type="model", revision=revision, - token=hf_hub_token, ) # apply regex to filter out non-framework specific weights if args.framework is set filtered_repo_files = filter_repo_objects( diff --git a/starlette_requirements.txt b/starlette_requirements.txt index 0483ed85..b59bd66e 100644 --- a/starlette_requirements.txt +++ b/starlette_requirements.txt @@ -2,6 +2,6 @@ orjson starlette uvicorn pandas -huggingface_hub>=0.9.0 +huggingface_hub>=0.11.0 diffusers==0.8.1 accelerate==0.14.0 \ No newline at end of file diff --git a/tests/integ/utils.py b/tests/integ/utils.py index 2abbbf16..813ba751 100644 --- a/tests/integ/utils.py +++ b/tests/integ/utils.py @@ -84,4 +84,3 @@ def validate_object_detection(result=None, snapshot=None): def validate_text_to_image(result=None, snapshot=None): assert isinstance(result, snapshot) return True - \ No newline at end of file From fab4f60f011a53c1329b19e233590851a9237653 Mon Sep 17 00:00:00 2001 From: philschmid Date: Thu, 24 Nov 2022 16:49:18 +0000 Subject: [PATCH 03/11] add fp16 loading --- setup.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/setup.py b/setup.py index 0f4fecd0..b81e84dd 100644 --- a/setup.py +++ b/setup.py @@ -37,7 +37,7 @@ # Hugging Face specific dependencies # framework specific dependencies extras["torch"] = ["torch>=1.8.0", "torchaudio"] -extras["tensorflow"] = ["tensorflow>=2.4.0"] +extras["tensorflow"] = ["tensorflow==2.9.0"] # test and quality extras["test"] = [ "pytest", From cfc46406c726007c06e4e815184875cbf34c8805 Mon Sep 17 00:00:00 2001 From: philschmid Date: Thu, 24 Nov 2022 16:49:23 +0000 Subject: [PATCH 04/11] added fp16 loading --- src/huggingface_inference_toolkit/diffusers_utils.py | 7 +++++-- src/huggingface_inference_toolkit/utils.py | 2 +- 2 files changed, 6 insertions(+), 3 deletions(-) diff --git a/src/huggingface_inference_toolkit/diffusers_utils.py b/src/huggingface_inference_toolkit/diffusers_utils.py index 09ffa2f0..23b72f83 100644 --- a/src/huggingface_inference_toolkit/diffusers_utils.py +++ b/src/huggingface_inference_toolkit/diffusers_utils.py @@ -2,6 +2,9 @@ import json import os +import torch + + _diffusers = importlib.util.find_spec("diffusers") is not None @@ -21,7 +24,7 @@ def check_supported_pipeline(model_dir): return True else: return False - except: + except Exception: return False @@ -30,7 +33,7 @@ def __init__(self, model_dir: str, device: str = None): # needs "cuda" for GPU # try: # pipeline = DIFFUSERS_TASKS[task].from_pretrained(model_dir, torch_dtype=torch.float16,) # except: - self.pipeline = StableDiffusionPipeline.from_pretrained(model_dir) + self.pipeline = StableDiffusionPipeline.from_pretrained(model_dir, torch_dtype=torch.float16) self.pipeline.to(device) def __call__(self, prompt, **kwargs): diff --git a/src/huggingface_inference_toolkit/utils.py b/src/huggingface_inference_toolkit/utils.py index b1b7e6fd..dd9437c6 100644 --- a/src/huggingface_inference_toolkit/utils.py +++ b/src/huggingface_inference_toolkit/utils.py @@ -14,9 +14,9 @@ from huggingface_inference_toolkit.const import HF_DEFAULT_PIPELINE_NAME, HF_MODULE_NAME from huggingface_inference_toolkit.diffusers_utils import ( + check_supported_pipeline, get_diffusers_pipeline, is_diffusers_available, - check_supported_pipeline, ) from huggingface_inference_toolkit.sentence_transformers_utils import ( get_sentence_transformers_pipeline, From 93625e38ba9ead78b913baf2e6a1280fa69421d0 Mon Sep 17 00:00:00 2001 From: philschmid Date: Thu, 24 Nov 2022 16:51:44 +0000 Subject: [PATCH 05/11] added comments --- src/huggingface_inference_toolkit/diffusers_utils.py | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/src/huggingface_inference_toolkit/diffusers_utils.py b/src/huggingface_inference_toolkit/diffusers_utils.py index 23b72f83..637b7d29 100644 --- a/src/huggingface_inference_toolkit/diffusers_utils.py +++ b/src/huggingface_inference_toolkit/diffusers_utils.py @@ -30,9 +30,6 @@ def check_supported_pipeline(model_dir): class DiffusersPipelineImageToText: def __init__(self, model_dir: str, device: str = None): # needs "cuda" for GPU - # try: - # pipeline = DIFFUSERS_TASKS[task].from_pretrained(model_dir, torch_dtype=torch.float16,) - # except: self.pipeline = StableDiffusionPipeline.from_pretrained(model_dir, torch_dtype=torch.float16) self.pipeline.to(device) @@ -43,7 +40,7 @@ def __call__(self, prompt, **kwargs): else: out = self.pipeline(prompt) - # TODO: only return 1 image currently + # TODO: return more than 1 image if requested return out.images[0] @@ -53,6 +50,7 @@ def __call__(self, prompt, **kwargs): def get_diffusers_pipeline(task=None, model_dir=None, device=-1, **kwargs): + """Get a pipeline for Diffusers models.""" device = "cuda" if device == 0 else "cpu" pipeline = DIFFUSERS_TASKS[task](model_dir=model_dir, device=device) return pipeline From 670c356bf6ecc07d62dae2238dcd01d5a8fa161e Mon Sep 17 00:00:00 2001 From: philschmid Date: Thu, 24 Nov 2022 16:57:59 +0000 Subject: [PATCH 06/11] fix import --- src/huggingface_inference_toolkit/diffusers_utils.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/src/huggingface_inference_toolkit/diffusers_utils.py b/src/huggingface_inference_toolkit/diffusers_utils.py index 637b7d29..99f7002f 100644 --- a/src/huggingface_inference_toolkit/diffusers_utils.py +++ b/src/huggingface_inference_toolkit/diffusers_utils.py @@ -2,8 +2,6 @@ import json import os -import torch - _diffusers = importlib.util.find_spec("diffusers") is not None @@ -14,6 +12,7 @@ def is_diffusers_available(): if is_diffusers_available(): from diffusers import StableDiffusionPipeline + import torch def check_supported_pipeline(model_dir): From 9f0ac4066d3165f921bd8ee2b53ecb3a2ff18627 Mon Sep 17 00:00:00 2001 From: philschmid Date: Thu, 24 Nov 2022 17:19:54 +0000 Subject: [PATCH 07/11] fix quality --- src/huggingface_inference_toolkit/diffusers_utils.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/src/huggingface_inference_toolkit/diffusers_utils.py b/src/huggingface_inference_toolkit/diffusers_utils.py index 99f7002f..3db96940 100644 --- a/src/huggingface_inference_toolkit/diffusers_utils.py +++ b/src/huggingface_inference_toolkit/diffusers_utils.py @@ -11,9 +11,10 @@ def is_diffusers_available(): if is_diffusers_available(): - from diffusers import StableDiffusionPipeline import torch + from diffusers import StableDiffusionPipeline + def check_supported_pipeline(model_dir): try: From 2b5e6a7bed01652ab93109f064cef4fc99fed05a Mon Sep 17 00:00:00 2001 From: philschmid Date: Thu, 24 Nov 2022 17:34:39 +0000 Subject: [PATCH 08/11] remove flag --- tests/integ/test_container.py | 1 - 1 file changed, 1 deletion(-) diff --git a/tests/integ/test_container.py b/tests/integ/test_container.py index 81f5038f..6c343c6a 100644 --- a/tests/integ/test_container.py +++ b/tests/integ/test_container.py @@ -11,7 +11,6 @@ from transformers.testing_utils import require_torch, slow, require_tf, _run_slow_tests IS_GPU = _run_slow_tests -IS_GPU = True DEVICE = "gpu" if IS_GPU else "cpu" client = docker.from_env() From 1c0d8aa200b215318f01f08a7116db027449a1c6 Mon Sep 17 00:00:00 2001 From: philschmid Date: Fri, 25 Nov 2022 06:54:41 +0000 Subject: [PATCH 09/11] check fix action order --- .github/workflows/gpu-integ-test.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/gpu-integ-test.yaml b/.github/workflows/gpu-integ-test.yaml index bcc3b986..703e4e28 100644 --- a/.github/workflows/gpu-integ-test.yaml +++ b/.github/workflows/gpu-integ-test.yaml @@ -69,7 +69,7 @@ jobs: tensorflow-integration-test: needs: - start-runner - # - pytorch-integration-test + - pytorch-integration-test runs-on: ${{ needs.start-runner.outputs.label }} # run the job on the newly created runner env: AWS_REGION: us-east-1 From f0b0cb5f469e5a9451e212872d736f828259a444 Mon Sep 17 00:00:00 2001 From: philschmid Date: Fri, 25 Nov 2022 08:12:57 +0000 Subject: [PATCH 10/11] change tiny model --- tests/integ/config.py | 2 +- tests/unit/test_diffusers.py | 6 +++--- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/tests/integ/config.py b/tests/integ/config.py index 6048e46f..467afde2 100644 --- a/tests/integ/config.py +++ b/tests/integ/config.py @@ -103,7 +103,7 @@ "tensorflow": None, }, "text-to-image": { - "pytorch": "diffusers/tiny-stable-diffusion-torch", + "pytorch": "hf-internal-testing/tiny-stable-diffusion-torch", "tensorflow": None, }, } diff --git a/tests/unit/test_diffusers.py b/tests/unit/test_diffusers.py index 346f2c59..f0de073b 100644 --- a/tests/unit/test_diffusers.py +++ b/tests/unit/test_diffusers.py @@ -12,7 +12,7 @@ def test_get_diffusers_pipeline(): with tempfile.TemporaryDirectory() as tmpdirname: storage_dir = _load_repository_from_hf( - "diffusers/tiny-stable-diffusion-torch", tmpdirname, framework="pytorch" + "hf-internal-testing/tiny-stable-diffusion-torch", tmpdirname, framework="pytorch" ) pipe = get_pipeline("text-to-image", storage_dir.as_posix()) assert isinstance(pipe, DiffusersPipelineImageToText) @@ -23,7 +23,7 @@ def test_get_diffusers_pipeline(): def test_pipe_on_gpu(): with tempfile.TemporaryDirectory() as tmpdirname: storage_dir = _load_repository_from_hf( - "diffusers/tiny-stable-diffusion-torch", tmpdirname, framework="pytorch" + "hf-internal-testing/tiny-stable-diffusion-torch", tmpdirname, framework="pytorch" ) pipe = get_pipeline("text-to-image", storage_dir.as_posix()) assert pipe.device.type == "cuda" @@ -33,7 +33,7 @@ def test_pipe_on_gpu(): def test_text_to_image_task(): with tempfile.TemporaryDirectory() as tmpdirname: storage_dir = _load_repository_from_hf( - "diffusers/tiny-stable-diffusion-torch", tmpdirname, framework="pytorch" + "hf-internal-testing/tiny-stable-diffusion-torch", tmpdirname, framework="pytorch" ) pipe = get_pipeline("text-to-image", storage_dir.as_posix()) res = pipe("Lets create an embedding") From a94a95694756753660b969b905fb7a2774e692e2 Mon Sep 17 00:00:00 2001 From: philschmid Date: Fri, 25 Nov 2022 09:41:17 +0000 Subject: [PATCH 11/11] only install diffusers in pytorch --- dockerfiles/starlette/pytorch/Dockerfile.cpu | 4 ++-- dockerfiles/starlette/pytorch/Dockerfile.gpu | 4 ++-- starlette_requirements.txt | 4 +--- 3 files changed, 5 insertions(+), 7 deletions(-) diff --git a/dockerfiles/starlette/pytorch/Dockerfile.cpu b/dockerfiles/starlette/pytorch/Dockerfile.cpu index fe34fc77..04d0a1db 100644 --- a/dockerfiles/starlette/pytorch/Dockerfile.cpu +++ b/dockerfiles/starlette/pytorch/Dockerfile.cpu @@ -4,8 +4,8 @@ FROM huggingface/transformers-inference:4.24.0-pt1.13-cpu COPY starlette_requirements.txt /tmp/requirements.txt RUN pip install --no-cache-dir -r /tmp/requirements.txt && rm /tmp/requirements.txt -# Think about a better solution -> base contaienr has pt 1.11. thats why need below 0.13 -RUN pip install --no-cache-dir sentence_transformers torchvision~="0.12.0" +# Think about a better solution -> base contaienr has pt 1.13. thats why need below 0.14 +RUN pip install --no-cache-dir sentence_transformers torchvision~="0.14.0" diffusers=="0.8.1" accelerate=="0.14.0" # copy application COPY src/huggingface_inference_toolkit huggingface_inference_toolkit diff --git a/dockerfiles/starlette/pytorch/Dockerfile.gpu b/dockerfiles/starlette/pytorch/Dockerfile.gpu index a04e9053..33e02056 100644 --- a/dockerfiles/starlette/pytorch/Dockerfile.gpu +++ b/dockerfiles/starlette/pytorch/Dockerfile.gpu @@ -4,8 +4,8 @@ FROM huggingface/transformers-inference:4.24.0-pt1.13-cuda11.6 COPY starlette_requirements.txt /tmp/requirements.txt RUN pip install --no-cache-dir -r /tmp/requirements.txt && rm /tmp/requirements.txt -# Think about a better solution -> base contaienr has pt 1.11. thats why need below 0.13 -RUN pip install --no-cache-dir sentence_transformers torchvision~="0.14.0" +# Think about a better solution -> base contaienr has pt 1.13. thats why need below 0.14 +RUN pip install --no-cache-dir sentence_transformers torchvision~="0.14.0" diffusers=="0.8.1" accelerate=="0.14.0" # copy application COPY src/huggingface_inference_toolkit huggingface_inference_toolkit diff --git a/starlette_requirements.txt b/starlette_requirements.txt index b59bd66e..6139a106 100644 --- a/starlette_requirements.txt +++ b/starlette_requirements.txt @@ -2,6 +2,4 @@ orjson starlette uvicorn pandas -huggingface_hub>=0.11.0 -diffusers==0.8.1 -accelerate==0.14.0 \ No newline at end of file +huggingface_hub>=0.11.0 \ No newline at end of file