From 35d4b8f04902bff3867f85a889b3585752a35f0c Mon Sep 17 00:00:00 2001 From: philschmid Date: Thu, 17 Aug 2023 13:00:47 +0000 Subject: [PATCH 1/2] fix sd pipel --- dockerfiles/pytorch/cpu/environment.yaml | 2 +- dockerfiles/pytorch/gpu/environment.yaml | 2 +- src/huggingface_inference_toolkit/diffusers_utils.py | 9 +++------ .../webservice_starlette.py | 1 - 4 files changed, 5 insertions(+), 9 deletions(-) diff --git a/dockerfiles/pytorch/cpu/environment.yaml b/dockerfiles/pytorch/cpu/environment.yaml index a8792b9a..4bd1b693 100644 --- a/dockerfiles/pytorch/cpu/environment.yaml +++ b/dockerfiles/pytorch/cpu/environment.yaml @@ -8,6 +8,6 @@ dependencies: - transformers[sklearn,sentencepiece,audio,vision]==4.31.0 - sentence_transformers==2.2.2 - torchvision==0.14.1 - - diffusers==0.19.3 + - diffusers==0.20.0 - accelerate==0.21.0 - safetensors \ No newline at end of file diff --git a/dockerfiles/pytorch/gpu/environment.yaml b/dockerfiles/pytorch/gpu/environment.yaml index a4de43cb..8c1012f7 100644 --- a/dockerfiles/pytorch/gpu/environment.yaml +++ b/dockerfiles/pytorch/gpu/environment.yaml @@ -9,6 +9,6 @@ dependencies: - transformers[sklearn,sentencepiece,audio,vision]==4.31.0 - sentence_transformers==2.2.2 - torchvision==0.14.1 - - diffusers==0.19.3 + - diffusers==0.20.0 - accelerate==0.21.0 - safetensors \ No newline at end of file diff --git a/src/huggingface_inference_toolkit/diffusers_utils.py b/src/huggingface_inference_toolkit/diffusers_utils.py index 94fe7172..5b5dac64 100644 --- a/src/huggingface_inference_toolkit/diffusers_utils.py +++ b/src/huggingface_inference_toolkit/diffusers_utils.py @@ -1,5 +1,6 @@ import importlib.util import logging +from transformers.utils.import_utils import is_torch_bf16_gpu_available logger = logging.getLogger(__name__) logging.basicConfig(format="%(asctime)s | %(levelname)s | %(message)s", level=logging.INFO) @@ -20,7 +21,7 @@ class IEAutoPipelineForText2Image: def __init__(self, model_dir: str, device: str = None): # needs "cuda" for GPU dtype = torch.float32 if device == "cuda": - dtype = torch.float16 + dtype = torch.bfloat16 if is_torch_bf16_gpu_available() else torch.float16 device_map = "auto" if device == "cuda" else None self.pipeline = AutoPipelineForText2Image.from_pretrained(model_dir, torch_dtype=dtype, device_map=device_map) @@ -43,11 +44,7 @@ def __call__( logger.warning("Sending num_images_per_prompt > 1 to pipeline is not supported. Using default value 1.") # Call pipeline with parameters - if self.pipeline.device.type == "cuda": - with torch.autocast("cuda"): - out = self.pipeline(prompt, num_images_per_prompt=1) - else: - out = self.pipeline(prompt, num_images_per_prompt=1) + out = self.pipeline(prompt, num_images_per_prompt=1, **kwargs) return out.images[0] diff --git a/src/huggingface_inference_toolkit/webservice_starlette.py b/src/huggingface_inference_toolkit/webservice_starlette.py index 92f4323e..64935925 100644 --- a/src/huggingface_inference_toolkit/webservice_starlette.py +++ b/src/huggingface_inference_toolkit/webservice_starlette.py @@ -75,7 +75,6 @@ async def predict(request): # check for query parameter and add them to the body if request.query_params and "parameters" not in deserialized_body: deserialized_body["parameters"] = convert_params_to_int_or_bool(dict(request.query_params)) - print(deserialized_body) # tracks request time start_time = perf_counter() From 1597c1f5a5347a98a1918fe04d1cbeb8e7f00032 Mon Sep 17 00:00:00 2001 From: philschmid Date: Thu, 17 Aug 2023 13:03:47 +0000 Subject: [PATCH 2/2] fix style --- src/huggingface_inference_toolkit/diffusers_utils.py | 1 + 1 file changed, 1 insertion(+) diff --git a/src/huggingface_inference_toolkit/diffusers_utils.py b/src/huggingface_inference_toolkit/diffusers_utils.py index 5b5dac64..7068df9d 100644 --- a/src/huggingface_inference_toolkit/diffusers_utils.py +++ b/src/huggingface_inference_toolkit/diffusers_utils.py @@ -1,5 +1,6 @@ import importlib.util import logging + from transformers.utils.import_utils import is_torch_bf16_gpu_available logger = logging.getLogger(__name__)