From fac9e161cc7ed046aa4dded71b079ae404ec47a1 Mon Sep 17 00:00:00 2001 From: Philipp Schmid Date: Wed, 2 Aug 2023 07:19:23 +0000 Subject: [PATCH 1/3] addig SD XL --- dockerfiles/pytorch/cpu/environment.yaml | 7 ++- dockerfiles/pytorch/gpu/environment.yaml | 7 ++- .../diffusers_utils.py | 59 +++++++------------ 3 files changed, 30 insertions(+), 43 deletions(-) diff --git a/dockerfiles/pytorch/cpu/environment.yaml b/dockerfiles/pytorch/cpu/environment.yaml index 4c3efe47..a8792b9a 100644 --- a/dockerfiles/pytorch/cpu/environment.yaml +++ b/dockerfiles/pytorch/cpu/environment.yaml @@ -5,8 +5,9 @@ dependencies: - python=3.9.13 - pytorch::pytorch=1.13.1=py3.9_cpu_0 - pip: - - transformers[sklearn,sentencepiece,audio,vision]==4.27.2 + - transformers[sklearn,sentencepiece,audio,vision]==4.31.0 - sentence_transformers==2.2.2 - torchvision==0.14.1 - - diffusers==0.14.0 - - accelerate==0.17.1 \ No newline at end of file + - diffusers==0.19.3 + - accelerate==0.21.0 + - safetensors \ No newline at end of file diff --git a/dockerfiles/pytorch/gpu/environment.yaml b/dockerfiles/pytorch/gpu/environment.yaml index 375c75cd..7c16e624 100644 --- a/dockerfiles/pytorch/gpu/environment.yaml +++ b/dockerfiles/pytorch/gpu/environment.yaml @@ -6,8 +6,9 @@ dependencies: - nvidia::cudatoolkit=11.7 - pytorch::pytorch=1.13.1=py3.9_cuda11.7* - pip: - - transformers[sklearn,sentencepiece,audio,vision]==4.31.0 +- transformers[sklearn,sentencepiece,audio,vision]==4.31.0 - sentence_transformers==2.2.2 - torchvision==0.14.1 - - diffusers==0.18.2 - - accelerate==0.21.0 \ No newline at end of file + - diffusers==0.19.3 + - accelerate==0.21.0 + - safetensors \ No newline at end of file diff --git a/src/huggingface_inference_toolkit/diffusers_utils.py b/src/huggingface_inference_toolkit/diffusers_utils.py index 36cdcb22..3e4e1c21 100644 --- a/src/huggingface_inference_toolkit/diffusers_utils.py +++ b/src/huggingface_inference_toolkit/diffusers_utils.py @@ -1,6 +1,5 @@ import importlib.util -import json -import os +import logging _diffusers = importlib.util.find_spec("diffusers") is not None @@ -11,60 +10,46 @@ def is_diffusers_available(): if is_diffusers_available(): import torch - from diffusers import DPMSolverMultistepScheduler, StableDiffusionPipeline + from diffusers import AutoPipelineForText2Image, DPMSolverMultistepScheduler, StableDiffusionPipeline +logger = logging.getLogger(__name__) +logging.basicConfig(format="%(asctime)s | %(levelname)s | %(message)s", level=logging.INFO) -def check_supported_pipeline(model_dir): - try: - with open(os.path.join(model_dir, "model_index.json")) as json_file: - data = json.load(json_file) - if data["_class_name"] == "StableDiffusionPipeline": - return True - else: - return False - except Exception: - return False - -class DiffusersPipelineImageToText: +class IEAutoPipelineForText2Image: def __init__(self, model_dir: str, device: str = None): # needs "cuda" for GPU - self.pipeline = StableDiffusionPipeline.from_pretrained(model_dir, torch_dtype=torch.float16) + dtype = torch.float32 + if device == "cuda": + dtype = torch.float16 + device_map = "auto" if device == "cuda" else None + + self.pipeline = AutoPipelineForText2Image.from_pretrained(model_dir, torch_dtype=dtype, device_map=device_map) # try to use DPMSolverMultistepScheduler - try: - self.pipeline.scheduler = DPMSolverMultistepScheduler.from_config(self.pipeline.scheduler.config) - except Exception: - pass + if isinstance(self.pipeline, StableDiffusionPipeline): + try: + self.pipeline.scheduler = DPMSolverMultistepScheduler.from_config(self.pipeline.scheduler.config) + except Exception: + pass self.pipeline.to(device) def __call__( self, prompt, - num_inference_steps=25, - guidance_scale=7.5, - num_images_per_prompt=1, - height=None, - width=None, - negative_prompt=None, + **kwargs, ): # TODO: add support for more images (Reason is correct output) - num_images_per_prompt = 1 + if "num_images_per_prompt" in kwargs: + kwargs.pop("num_images_per_prompt") + logger.warning("Sending num_images_per_prompt > 1 to pipeline is not supported. Using default value 1.") # Call pipeline with parameters - out = self.pipeline( - prompt, - num_inference_steps=num_inference_steps, - guidance_scale=guidance_scale, - num_images_per_prompt=num_images_per_prompt, - negative_prompt=negative_prompt, - height=height, - width=width, - ) + out = self.pipeline(prompt, num_images_per_prompt=1) return out.images[0] DIFFUSERS_TASKS = { - "text-to-image": DiffusersPipelineImageToText, + "text-to-image": IEAutoPipelineForText2Image, } From b49ddb7012dde2715b0422e6510fa03429d3feb2 Mon Sep 17 00:00:00 2001 From: philschmid Date: Wed, 2 Aug 2023 08:16:25 +0000 Subject: [PATCH 2/3] updated and tested stuff --- README.md | 2 ++ dockerfiles/pytorch/cpu/Dockerfile | 2 +- dockerfiles/pytorch/gpu/Dockerfile | 2 +- dockerfiles/pytorch/gpu/environment.yaml | 2 +- dockerfiles/tensorflow/cpu/Dockerfile | 2 +- dockerfiles/tensorflow/gpu/Dockerfile | 2 +- .../diffusers_utils.py | 7 +++-- src/huggingface_inference_toolkit/utils.py | 29 +++++++++---------- 8 files changed, 25 insertions(+), 23 deletions(-) diff --git a/README.md b/README.md index d098104c..fb469b1a 100644 --- a/README.md +++ b/README.md @@ -40,6 +40,8 @@ docker build -t starlette-transformers:gpu -f dockerfiles/tensorflow/gpu/Dockerf ```bash docker run -ti -p 5000:5000 -e HF_MODEL_ID=distilbert-base-uncased-distilled-squad -e HF_TASK=question-answering starlette-transformers:cpu docker run -ti -p 5000:5000 --gpus all -e HF_MODEL_ID=nlpconnect/vit-gpt2-image-captioning -e HF_TASK=image-to-text starlette-transformers:gpu +docker run -ti -p 5000:5000 --gpus all -e HF_MODEL_ID=echarlaix/tiny-random-stable-diffusion-xl -e HF_TASK=text-to-image starlette-transformers:gpu +docker run -ti -p 5000:5000 --gpus all -e HF_MODEL_ID=stabilityai/stable-diffusion-xl-base-1.0 -e HF_TASK=text-to-image starlette-transformers:gpu docker run -ti -p 5000:5000 -e HF_MODEL_DIR=/repository -v $(pwd)/distilbert-base-uncased-emotion:/repository starlette-transformers:cpu ``` diff --git a/dockerfiles/pytorch/cpu/Dockerfile b/dockerfiles/pytorch/cpu/Dockerfile index 551bfa9a..61e573b4 100644 --- a/dockerfiles/pytorch/cpu/Dockerfile +++ b/dockerfiles/pytorch/cpu/Dockerfile @@ -23,7 +23,7 @@ RUN apt-get update \ # install micromamba ENV MAMBA_ROOT_PREFIX=/opt/conda ENV PATH=/opt/conda/bin:$PATH -RUN curl -L https://micromamba.snakepit.net/api/micromamba/linux-64/latest | tar -xj "bin/micromamba" \ +RUN curl -L https://micro.mamba.pm/api/micromamba/linux-64/latest | tar -xj "bin/micromamba" \ && touch /root/.bashrc \ && ./bin/micromamba shell init -s bash -p /opt/conda \ && grep -v '[ -z "\$PS1" ] && return' /root/.bashrc > /opt/conda/bashrc diff --git a/dockerfiles/pytorch/gpu/Dockerfile b/dockerfiles/pytorch/gpu/Dockerfile index fc98ab71..1a3941a7 100644 --- a/dockerfiles/pytorch/gpu/Dockerfile +++ b/dockerfiles/pytorch/gpu/Dockerfile @@ -25,7 +25,7 @@ ENV MAMBA_ROOT_PREFIX=/opt/conda ENV PATH=/opt/conda/bin:$PATH ENV LD_LIBRARY_PATH="/opt/conda/lib:${LD_LIBRARY_PATH}" -RUN curl -L https://micromamba.snakepit.net/api/micromamba/linux-64/latest | tar -xj "bin/micromamba" \ +RUN curl -L https://micro.mamba.pm/api/micromamba/linux-64/latest | tar -xj "bin/micromamba" \ && touch /root/.bashrc \ && ./bin/micromamba shell init -s bash -p /opt/conda \ && grep -v '[ -z "\$PS1" ] && return' /root/.bashrc > /opt/conda/bashrc diff --git a/dockerfiles/pytorch/gpu/environment.yaml b/dockerfiles/pytorch/gpu/environment.yaml index 7c16e624..a4de43cb 100644 --- a/dockerfiles/pytorch/gpu/environment.yaml +++ b/dockerfiles/pytorch/gpu/environment.yaml @@ -6,7 +6,7 @@ dependencies: - nvidia::cudatoolkit=11.7 - pytorch::pytorch=1.13.1=py3.9_cuda11.7* - pip: -- transformers[sklearn,sentencepiece,audio,vision]==4.31.0 + - transformers[sklearn,sentencepiece,audio,vision]==4.31.0 - sentence_transformers==2.2.2 - torchvision==0.14.1 - diffusers==0.19.3 diff --git a/dockerfiles/tensorflow/cpu/Dockerfile b/dockerfiles/tensorflow/cpu/Dockerfile index b0443845..c52abf13 100644 --- a/dockerfiles/tensorflow/cpu/Dockerfile +++ b/dockerfiles/tensorflow/cpu/Dockerfile @@ -23,7 +23,7 @@ RUN apt-get update \ # install micromamba ENV MAMBA_ROOT_PREFIX=/opt/conda ENV PATH=/opt/conda/bin:$PATH -RUN curl -L https://micromamba.snakepit.net/api/micromamba/linux-64/latest | tar -xj "bin/micromamba" \ +RUN curl -L https://micro.mamba.pm/api/micromamba/linux-64/latest | tar -xj "bin/micromamba" \ && touch /root/.bashrc \ && ./bin/micromamba shell init -s bash -p /opt/conda \ && grep -v '[ -z "\$PS1" ] && return' /root/.bashrc > /opt/conda/bashrc diff --git a/dockerfiles/tensorflow/gpu/Dockerfile b/dockerfiles/tensorflow/gpu/Dockerfile index 785bcc79..d989111c 100644 --- a/dockerfiles/tensorflow/gpu/Dockerfile +++ b/dockerfiles/tensorflow/gpu/Dockerfile @@ -26,7 +26,7 @@ ENV MAMBA_ROOT_PREFIX=/opt/conda ENV PATH=/opt/conda/bin:$PATH ENV LD_LIBRARY_PATH="/opt/conda/lib:${LD_LIBRARY_PATH}" -RUN curl -L https://micromamba.snakepit.net/api/micromamba/linux-64/latest | tar -xj "bin/micromamba" \ +RUN curl -L https://micro.mamba.pm/api/micromamba/linux-64/latest | tar -xj "bin/micromamba" \ && touch /root/.bashrc \ && ./bin/micromamba shell init -s bash -p /opt/conda \ && grep -v '[ -z "\$PS1" ] && return' /root/.bashrc > /opt/conda/bashrc diff --git a/src/huggingface_inference_toolkit/diffusers_utils.py b/src/huggingface_inference_toolkit/diffusers_utils.py index 3e4e1c21..b3826ea0 100644 --- a/src/huggingface_inference_toolkit/diffusers_utils.py +++ b/src/huggingface_inference_toolkit/diffusers_utils.py @@ -43,8 +43,11 @@ def __call__( logger.warning("Sending num_images_per_prompt > 1 to pipeline is not supported. Using default value 1.") # Call pipeline with parameters - out = self.pipeline(prompt, num_images_per_prompt=1) - + if self.pipeline.device.type == "cuda": + with torch.autocast("cuda"): + out = self.pipeline(prompt, num_images_per_prompt=1) + else: + out = self.pipeline(prompt, num_images_per_prompt=1) return out.images[0] diff --git a/src/huggingface_inference_toolkit/utils.py b/src/huggingface_inference_toolkit/utils.py index 8456ad7d..3b00d61e 100644 --- a/src/huggingface_inference_toolkit/utils.py +++ b/src/huggingface_inference_toolkit/utils.py @@ -4,14 +4,13 @@ from pathlib import Path from typing import Optional, Union -from huggingface_hub import login, snapshot_download +from huggingface_hub import HfApi, login, snapshot_download from transformers import WhisperForConditionalGeneration, pipeline from transformers.file_utils import is_tf_available, is_torch_available from transformers.pipelines import Conversation, Pipeline from huggingface_inference_toolkit.const import HF_DEFAULT_PIPELINE_NAME, HF_MODULE_NAME from huggingface_inference_toolkit.diffusers_utils import ( - check_supported_pipeline, get_diffusers_pipeline, is_diffusers_available, ) @@ -46,11 +45,12 @@ def is_optimum_available(): "pt": "pytorch*", "flax": "flax*", "rust": "rust*", - "onnx": "*onnx", + "onnx": "*onnx*", "safetensors": "*safetensors", "coreml": "*mlmodel", "tflite": "*tflite", "savedmodel": "*tar.gz", + "openvino": "*openvino*", "ckpt": "*ckpt", } @@ -59,18 +59,8 @@ def create_artifact_filter(framework): """ Returns a list of regex pattern based on the DL Framework. which will be to used to ignore files when downloading """ - ignore_regex_list = [ - "pytorch*", - "tf*", - "flax*", - "rust*", - "*onnx", - "*safetensors", - "*mlmodel", - "*tflite", - "*tar.gz", - "*ckpt", - ] + ignore_regex_list = list(framework2weight.values()) + pattern = framework2weight.get(framework, None) if pattern in ignore_regex_list: ignore_regex_list.remove(pattern) @@ -157,6 +147,13 @@ def _load_repository_from_hf( if not target_dir.exists(): target_dir.mkdir(parents=True) + # check if safetensors weights are available + if framework == "pytorch": + files = HfApi().model_info(repository_id).siblings + if any(f.rfilename.endswith("safetensors") for f in files): + framework = "safetensors" + + # create regex to only include the framework specific weights ignore_regex = create_artifact_filter(framework) logger.info(f"Ignore regex pattern for files, which are not downloaded: { ', '.join(ignore_regex) }") @@ -259,7 +256,7 @@ def get_pipeline(task: str, model_dir: Path, **kwargs) -> Pipeline: "sentence-ranking", ]: hf_pipeline = get_sentence_transformers_pipeline(task=task, model_dir=model_dir, device=device, **kwargs) - elif is_diffusers_available() and check_supported_pipeline(model_dir) and task == "text-to-image": + elif is_diffusers_available() and task == "text-to-image": hf_pipeline = get_diffusers_pipeline(task=task, model_dir=model_dir, device=device, **kwargs) else: hf_pipeline = pipeline(task=task, model=model_dir, device=device, **kwargs) From 9d53cc2591d8d4d545bbb077b74e4a1907d54a13 Mon Sep 17 00:00:00 2001 From: philschmid Date: Wed, 2 Aug 2023 08:41:19 +0000 Subject: [PATCH 3/3] fix unit tests --- src/huggingface_inference_toolkit/diffusers_utils.py | 6 +++--- src/huggingface_inference_toolkit/utils.py | 7 +++---- tests/unit/test_diffusers.py | 6 +++--- 3 files changed, 9 insertions(+), 10 deletions(-) diff --git a/src/huggingface_inference_toolkit/diffusers_utils.py b/src/huggingface_inference_toolkit/diffusers_utils.py index b3826ea0..94fe7172 100644 --- a/src/huggingface_inference_toolkit/diffusers_utils.py +++ b/src/huggingface_inference_toolkit/diffusers_utils.py @@ -1,6 +1,9 @@ import importlib.util import logging +logger = logging.getLogger(__name__) +logging.basicConfig(format="%(asctime)s | %(levelname)s | %(message)s", level=logging.INFO) + _diffusers = importlib.util.find_spec("diffusers") is not None @@ -12,9 +15,6 @@ def is_diffusers_available(): import torch from diffusers import AutoPipelineForText2Image, DPMSolverMultistepScheduler, StableDiffusionPipeline -logger = logging.getLogger(__name__) -logging.basicConfig(format="%(asctime)s | %(levelname)s | %(message)s", level=logging.INFO) - class IEAutoPipelineForText2Image: def __init__(self, model_dir: str, device: str = None): # needs "cuda" for GPU diff --git a/src/huggingface_inference_toolkit/utils.py b/src/huggingface_inference_toolkit/utils.py index 3b00d61e..ffe8d2c3 100644 --- a/src/huggingface_inference_toolkit/utils.py +++ b/src/huggingface_inference_toolkit/utils.py @@ -59,7 +59,7 @@ def create_artifact_filter(framework): """ Returns a list of regex pattern based on the DL Framework. which will be to used to ignore files when downloading """ - ignore_regex_list = list(framework2weight.values()) + ignore_regex_list = list(set(framework2weight.values())) pattern = framework2weight.get(framework, None) if pattern in ignore_regex_list: @@ -153,7 +153,6 @@ def _load_repository_from_hf( if any(f.rfilename.endswith("safetensors") for f in files): framework = "safetensors" - # create regex to only include the framework specific weights ignore_regex = create_artifact_filter(framework) logger.info(f"Ignore regex pattern for files, which are not downloaded: { ', '.join(ignore_regex) }") @@ -284,8 +283,8 @@ def convert_params_to_int_or_bool(params): for k, v in params.items(): if v.isnumeric(): params[k] = int(v) - if v == 'false': + if v == "false": params[k] = False - if v == 'true': + if v == "true": params[k] = True return params diff --git a/tests/unit/test_diffusers.py b/tests/unit/test_diffusers.py index f0de073b..32b10cf0 100644 --- a/tests/unit/test_diffusers.py +++ b/tests/unit/test_diffusers.py @@ -3,8 +3,8 @@ from PIL import Image from transformers.testing_utils import require_torch, slow -from huggingface_inference_toolkit.handler import get_inference_handler_either_custom_or_default_handler -from huggingface_inference_toolkit.diffusers_utils import get_diffusers_pipeline, DiffusersPipelineImageToText + +from huggingface_inference_toolkit.diffusers_utils import get_diffusers_pipeline, IEAutoPipelineForText2Image from huggingface_inference_toolkit.utils import _load_repository_from_hf, get_pipeline @@ -15,7 +15,7 @@ def test_get_diffusers_pipeline(): "hf-internal-testing/tiny-stable-diffusion-torch", tmpdirname, framework="pytorch" ) pipe = get_pipeline("text-to-image", storage_dir.as_posix()) - assert isinstance(pipe, DiffusersPipelineImageToText) + assert isinstance(pipe, IEAutoPipelineForText2Image) @slow