From 45d4116eb8eb024e1224fac3e3efcaeb96dc5e2b Mon Sep 17 00:00:00 2001 From: sayakpaul Date: Fri, 27 Oct 2023 09:46:59 +0530 Subject: [PATCH 01/11] feat: serialization of the python modules. --- src/diffusers/configuration_utils.py | 23 +++++++++++++++++++++++ 1 file changed, 23 insertions(+) diff --git a/src/diffusers/configuration_utils.py b/src/diffusers/configuration_utils.py index a67fa9d41ca5..a1f4b1a2f25a 100644 --- a/src/diffusers/configuration_utils.py +++ b/src/diffusers/configuration_utils.py @@ -21,6 +21,7 @@ import json import os import re +import sys from collections import OrderedDict from pathlib import PosixPath from typing import Any, Dict, Tuple, Union @@ -31,6 +32,9 @@ from requests import HTTPError from . import __version__ +from .models import _import_structure as model_modules +from .pipelines import _import_structure as pipeline_modules +from .schedulers import _import_structure as scheduler_modules from .utils import ( DIFFUSERS_CACHE, HUGGINGFACE_CO_RESOLVE_ENDPOINT, @@ -46,6 +50,10 @@ _re_configuration_file = re.compile(r"config\.(.*)\.json") +_all_available_pipeline_component_modules = ( + list(model_modules.values()) + list(scheduler_modules.values()) + list(pipeline_modules.values()) +) + class FrozenDict(OrderedDict): def __init__(self, *args, **kwargs): @@ -162,6 +170,21 @@ def save_config(self, save_directory: Union[str, os.PathLike], push_to_hub: bool self.to_json_file(output_config_file) logger.info(f"Configuration saved in {output_config_file}") + # Additionally, save the implementation file too. It can happen for a pipeline, for a model, and + # for a scheduler. + if self.__class__.__name__ not in _all_available_pipeline_component_modules: + module_to_save = self.__class__.__module__ + absolute_module_path = sys.modules[module_to_save].__file__ + try: + with open(absolute_module_path, "r") as original_file: + content = original_file.read() + path_to_write = os.path.join(save_directory, f"{module_to_save}.py") + with open(path_to_write, "w") as new_file: + new_file.write(content) + logger.info(f"{module_to_save}.py saved in {save_directory}") + except Exception as e: + logger.error(e) + if push_to_hub: commit_message = kwargs.pop("commit_message", None) private = kwargs.pop("private", False) From 462d443c148a0ef9c79eac51ffd794113c40dae3 Mon Sep 17 00:00:00 2001 From: sayakpaul Date: Fri, 27 Oct 2023 11:50:28 +0530 Subject: [PATCH 02/11] fix: serialization --- src/diffusers/configuration_utils.py | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/src/diffusers/configuration_utils.py b/src/diffusers/configuration_utils.py index a1f4b1a2f25a..bb210ca12578 100644 --- a/src/diffusers/configuration_utils.py +++ b/src/diffusers/configuration_utils.py @@ -590,7 +590,11 @@ def to_json_string(self) -> str: String containing all the attributes that make up the configuration instance in JSON format. """ config_dict = self._internal_dict if hasattr(self, "_internal_dict") else {} - config_dict["_class_name"] = self.__class__.__name__ + cls_name = self.__class__.__name__ + if cls_name not in _all_available_pipeline_component_modules: + config_dict["_class_name"] = [str(self.__class__.__module__), cls_name] + else: + config_dict["_class_name"] = cls_name config_dict["_diffusers_version"] = __version__ def to_json_saveable(value): From cc934eb032a85ef9e3cfe3e05b72ee1ed3ab1c7a Mon Sep 17 00:00:00 2001 From: sayakpaul Date: Fri, 27 Oct 2023 16:27:58 +0530 Subject: [PATCH 03/11] doc --- .../tutorials/custom_pipelines_components.md | 116 ++++++++++++++++++ 1 file changed, 116 insertions(+) create mode 100644 docs/source/en/tutorials/custom_pipelines_components.md diff --git a/docs/source/en/tutorials/custom_pipelines_components.md b/docs/source/en/tutorials/custom_pipelines_components.md new file mode 100644 index 000000000000..f0b58958bf0a --- /dev/null +++ b/docs/source/en/tutorials/custom_pipelines_components.md @@ -0,0 +1,116 @@ + + +Diffusers supports the use [custom pipelines](../using-diffusers/contribute_pipeline) letting the users add any additional features on top of the [`DiffusionPipeline`]. However, it can get cumbersome if you're dealing with a custom pipeline where its components (such as the UNet, VAE, scheduler) are also custom. + +We allow loading of such pipelines by exposing a `trust_remote_code` argument inside [`DiffusionPipeline`]. The advantage of `trust_remote_code` lies in its flexibility. You can have different levels of customizations for a pipeline. Following are a few examples: + +* Only UNet is custom +* UNet and VAE both are custom +* Pipeline is custom +* UNet, VAE, scheduler, and pipeline are custom + +With `trust_remote_code=True`, you can achieve perform of the above! + +This tutorial covers how to author your pipeline repository so that it becomes compatible with `trust_remote_code`. You'll use a custom UNet, a custom scheduler, and a custom pipeline for this purpose. + +## Pipeline components + +In the interest of brevity, you'll use the custom UNet, scheduler, and pipeline classes that we've already authored: + +```bash +# Custom UNet +wget https://huggingface.co/sayakpaul/custom_pipeline_remote_code/raw/main/unet/my_unet_model.py +# Custom scheduler +wget https://huggingface.co/sayakpaul/custom_pipeline_remote_code/raw/main/scheduler/my_scheduler.py +# Custom pipeline +wget https://huggingface.co/sayakpaul/custom_pipeline_remote_code/raw/main/my_pipeline.py +``` + + + +The above classes are just for references. We encourage you to experiment with these classes for desired customizations. + + + +Load the individual components, starting with the UNet: + +```python +from my_unet_model import MyUNetModel + +pretrained_id = "hf-internal-testing/tiny-sdxl-custom-all" +unet = MyUNetModel.from_pretrained(pretrained_id, subfolder="unet") +``` + +Then go for the scheduler: + +```python +from my_scheduler import MyUNetModel + +scheduler = MyScheduler.from_pretrained(pretrained_id, subfolder="scheduler") +``` + +Finally, the VAE and the text encoders: + +```python +from transformers import CLIPTextModel, CLIPTextModelWithProjection, CLIPTokenizer +from diffusers import AutoencoderKL + +text_encoder = CLIPTextModel.from_pretrained(pretrained_id, subfolder="text_encoder") +text_encoder_2 = CLIPTextModelWithProjection.from_pretrained(pretrained_id, subfolder="text_encoder_2") +tokenizer = CLIPTokenizer.from_pretrained(pretrained_id, subfolder="tokenizer") +tokenizer_2 = CLIPTokenizer.from_pretrained(pretrained_id, subfolder="tokenizer_2") + +vae = AutoencoderKL.from_pretrained(pretrained_id, subfolder="vae") +``` + +## Pipeline initialization and serialization + +With all the components, you can now initialize the custom pipeline: + +```python +pipeline = MyPipeline( + vae=vae, unet=unet, text_encoder=text_encoder, text_encoder_2=text_encoder_2, + tokenizer=tokenizer, tokenizer_2=tokenizer_2, scheduler=scheduler +) +``` + +Now, push the pipeline to the Hub: + +```python +pipeline.push_to_hub("custom_pipeline_remote_code") +``` + +Since the `pipeline` itself is a custom pipeline, its corresponding Python module will also be pushed ([example](https://huggingface.co/sayakpaul/custom_pipeline_remote_code/blob/main/my_pipeline.py)). If the pipeline has any other custom components, they will be pushed as well ([UNet](https://huggingface.co/sayakpaul/custom_pipeline_remote_code/blob/main/unet/my_unet_model.py), [scheduler](https://huggingface.co/sayakpaul/custom_pipeline_remote_code/blob/main/scheduler/my_scheduler.py)). + +If you want to keep the pipeline local, then use the [`PushToHubMixin.save_pretrained`] method. + +## Pipeline loading + +You can load this pipeline from the Hub by specifying `trust_remote_code=True`: + +```python +from diffusers import DiffusionPipeline + +reloaded_pipeline = DiffusionPipeline.from_pretrained("sayakpaul/custom_pipeline_remote_code", trust_remote_code=True) +``` + +And then perform inference: + +```python +prompt = "hey" +num_inference_steps = 2 + +_ = reloaded_pipeline(prompt=prompt, num_inference_steps=num_inference_steps)[0] +``` + +For more complex pipelines, readers are welcome to check out [this comment](https://github.com/huggingface/diffusers/pull/5472#issuecomment-1775034461) on GitHub. \ No newline at end of file From 99f91226be278c6d8899b597dc1427a24e56100a Mon Sep 17 00:00:00 2001 From: sayakpaul Date: Fri, 27 Oct 2023 16:29:25 +0530 Subject: [PATCH 04/11] add: entry to toc --- docs/source/en/_toctree.yml | 2 ++ docs/source/en/tutorials/custom_pipelines_components.md | 2 ++ 2 files changed, 4 insertions(+) diff --git a/docs/source/en/_toctree.yml b/docs/source/en/_toctree.yml index cef8f474c00e..f6ddcc3a1df1 100644 --- a/docs/source/en/_toctree.yml +++ b/docs/source/en/_toctree.yml @@ -19,6 +19,8 @@ title: Train a diffusion model - local: tutorials/using_peft_for_inference title: Inference with PEFT + - local: tutorials/custom_pipelines_components + title: Working with fully custom pipelines and components title: Tutorials - sections: - sections: diff --git a/docs/source/en/tutorials/custom_pipelines_components.md b/docs/source/en/tutorials/custom_pipelines_components.md index f0b58958bf0a..fbe275b8bad3 100644 --- a/docs/source/en/tutorials/custom_pipelines_components.md +++ b/docs/source/en/tutorials/custom_pipelines_components.md @@ -10,6 +10,8 @@ an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express o specific language governing permissions and limitations under the License. --> +# Working with fully custom pipelines and components + Diffusers supports the use [custom pipelines](../using-diffusers/contribute_pipeline) letting the users add any additional features on top of the [`DiffusionPipeline`]. However, it can get cumbersome if you're dealing with a custom pipeline where its components (such as the UNet, VAE, scheduler) are also custom. We allow loading of such pipelines by exposing a `trust_remote_code` argument inside [`DiffusionPipeline`]. The advantage of `trust_remote_code` lies in its flexibility. You can have different levels of customizations for a pipeline. Following are a few examples: From 0ccc58a88d66fb00afeada1fea85f3262671b015 Mon Sep 17 00:00:00 2001 From: sayakpaul Date: Fri, 27 Oct 2023 16:32:27 +0530 Subject: [PATCH 05/11] indentation --- docs/source/en/tutorials/custom_pipelines_components.md | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/docs/source/en/tutorials/custom_pipelines_components.md b/docs/source/en/tutorials/custom_pipelines_components.md index fbe275b8bad3..08ba9c73383b 100644 --- a/docs/source/en/tutorials/custom_pipelines_components.md +++ b/docs/source/en/tutorials/custom_pipelines_components.md @@ -81,8 +81,13 @@ With all the components, you can now initialize the custom pipeline: ```python pipeline = MyPipeline( - vae=vae, unet=unet, text_encoder=text_encoder, text_encoder_2=text_encoder_2, - tokenizer=tokenizer, tokenizer_2=tokenizer_2, scheduler=scheduler + vae=vae, + unet=unet, + text_encoder=text_encoder, + text_encoder_2=text_encoder_2, + tokenizer=tokenizer, + tokenizer_2=tokenizer_2, + scheduler=scheduler, ) ``` From e152eccdb0f6b2ca33a75ddfdeba87602f66035e Mon Sep 17 00:00:00 2001 From: sayakpaul Date: Fri, 27 Oct 2023 16:33:13 +0530 Subject: [PATCH 06/11] additional args --- docs/source/en/tutorials/custom_pipelines_components.md | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/docs/source/en/tutorials/custom_pipelines_components.md b/docs/source/en/tutorials/custom_pipelines_components.md index 08ba9c73383b..3eef97160571 100644 --- a/docs/source/en/tutorials/custom_pipelines_components.md +++ b/docs/source/en/tutorials/custom_pipelines_components.md @@ -108,7 +108,11 @@ You can load this pipeline from the Hub by specifying `trust_remote_code=True`: ```python from diffusers import DiffusionPipeline -reloaded_pipeline = DiffusionPipeline.from_pretrained("sayakpaul/custom_pipeline_remote_code", trust_remote_code=True) +reloaded_pipeline = DiffusionPipeline.from_pretrained( + "sayakpaul/custom_pipeline_remote_code", + torch_dtype=torch.float16, + trust_remote_code=True, +).to("cuda") ``` And then perform inference: From 11d0bc3150bff76bb4e20ad7f94e5fe902862f24 Mon Sep 17 00:00:00 2001 From: sayakpaul Date: Fri, 27 Oct 2023 20:31:30 +0530 Subject: [PATCH 07/11] add warning --- docs/source/en/tutorials/custom_pipelines_components.md | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/docs/source/en/tutorials/custom_pipelines_components.md b/docs/source/en/tutorials/custom_pipelines_components.md index 3eef97160571..e67aec825d46 100644 --- a/docs/source/en/tutorials/custom_pipelines_components.md +++ b/docs/source/en/tutorials/custom_pipelines_components.md @@ -25,6 +25,12 @@ With `trust_remote_code=True`, you can achieve perform of the above! This tutorial covers how to author your pipeline repository so that it becomes compatible with `trust_remote_code`. You'll use a custom UNet, a custom scheduler, and a custom pipeline for this purpose. + + +You should use `trust_remote_code=True` _only_ when you fully trust the code and have verified its usage. + + + ## Pipeline components In the interest of brevity, you'll use the custom UNet, scheduler, and pipeline classes that we've already authored: From 9fc135811e9818fc529ce80d30110202f35cbf7f Mon Sep 17 00:00:00 2001 From: sayakpaul Date: Fri, 27 Oct 2023 20:34:25 +0530 Subject: [PATCH 08/11] more notes --- docs/source/en/tutorials/custom_pipelines_components.md | 2 ++ 1 file changed, 2 insertions(+) diff --git a/docs/source/en/tutorials/custom_pipelines_components.md b/docs/source/en/tutorials/custom_pipelines_components.md index e67aec825d46..8f19dd2abd90 100644 --- a/docs/source/en/tutorials/custom_pipelines_components.md +++ b/docs/source/en/tutorials/custom_pipelines_components.md @@ -81,6 +81,8 @@ tokenizer_2 = CLIPTokenizer.from_pretrained(pretrained_id, subfolder="tokenizer_ vae = AutoencoderKL.from_pretrained(pretrained_id, subfolder="vae") ``` +`MyUNetModel`, `MyScheduler`, and `MyPipeline` use blocks that are already supported by Diffusers. If you are using any custom blocks make sure to put them in the module files themselves. + ## Pipeline initialization and serialization With all the components, you can now initialize the custom pipeline: From 07179bd6c9b05fe3e91d902731c5ef7a4d1ca838 Mon Sep 17 00:00:00 2001 From: Sayak Paul Date: Fri, 27 Oct 2023 20:35:10 +0530 Subject: [PATCH 09/11] Apply suggestions from code review Co-authored-by: Patrick von Platen --- src/diffusers/configuration_utils.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/src/diffusers/configuration_utils.py b/src/diffusers/configuration_utils.py index bb210ca12578..d26c04bc07cb 100644 --- a/src/diffusers/configuration_utils.py +++ b/src/diffusers/configuration_utils.py @@ -50,9 +50,9 @@ _re_configuration_file = re.compile(r"config\.(.*)\.json") -_all_available_pipeline_component_modules = ( +_all_available_pipeline_component_modules = sum(( list(model_modules.values()) + list(scheduler_modules.values()) + list(pipeline_modules.values()) -) +), []) class FrozenDict(OrderedDict): @@ -172,7 +172,7 @@ def save_config(self, save_directory: Union[str, os.PathLike], push_to_hub: bool # Additionally, save the implementation file too. It can happen for a pipeline, for a model, and # for a scheduler. - if self.__class__.__name__ not in _all_available_pipeline_component_modules: + if self.__class__.__name__ not in _all_available_pipelines_schedulers_model_classes: module_to_save = self.__class__.__module__ absolute_module_path = sys.modules[module_to_save].__file__ try: From 09b5046e9684098fe305c8576c074a2abfad59d9 Mon Sep 17 00:00:00 2001 From: sayakpaul Date: Fri, 27 Oct 2023 20:37:19 +0530 Subject: [PATCH 10/11] fix --- src/diffusers/configuration_utils.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/src/diffusers/configuration_utils.py b/src/diffusers/configuration_utils.py index d26c04bc07cb..2cbbedbadf1d 100644 --- a/src/diffusers/configuration_utils.py +++ b/src/diffusers/configuration_utils.py @@ -50,9 +50,9 @@ _re_configuration_file = re.compile(r"config\.(.*)\.json") -_all_available_pipeline_component_modules = sum(( - list(model_modules.values()) + list(scheduler_modules.values()) + list(pipeline_modules.values()) -), []) +_all_available_pipelines_schedulers_model_classes = sum( + (list(model_modules.values()) + list(scheduler_modules.values()) + list(pipeline_modules.values())), [] +) class FrozenDict(OrderedDict): @@ -591,7 +591,7 @@ def to_json_string(self) -> str: """ config_dict = self._internal_dict if hasattr(self, "_internal_dict") else {} cls_name = self.__class__.__name__ - if cls_name not in _all_available_pipeline_component_modules: + if cls_name not in _all_available_pipelines_schedulers_model_classes: config_dict["_class_name"] = [str(self.__class__.__module__), cls_name] else: config_dict["_class_name"] = cls_name From e0f4a0cd8b82781f97a7716e444ebe3e7e3891bf Mon Sep 17 00:00:00 2001 From: sayakpaul Date: Fri, 27 Oct 2023 20:50:57 +0530 Subject: [PATCH 11/11] fix circular import --- src/diffusers/configuration_utils.py | 29 +++++++++++++++++++++------- 1 file changed, 22 insertions(+), 7 deletions(-) diff --git a/src/diffusers/configuration_utils.py b/src/diffusers/configuration_utils.py index 2cbbedbadf1d..b48279135323 100644 --- a/src/diffusers/configuration_utils.py +++ b/src/diffusers/configuration_utils.py @@ -32,9 +32,6 @@ from requests import HTTPError from . import __version__ -from .models import _import_structure as model_modules -from .pipelines import _import_structure as pipeline_modules -from .schedulers import _import_structure as scheduler_modules from .utils import ( DIFFUSERS_CACHE, HUGGINGFACE_CO_RESOLVE_ENDPOINT, @@ -50,10 +47,6 @@ _re_configuration_file = re.compile(r"config\.(.*)\.json") -_all_available_pipelines_schedulers_model_classes = sum( - (list(model_modules.values()) + list(scheduler_modules.values()) + list(pipeline_modules.values())), [] -) - class FrozenDict(OrderedDict): def __init__(self, *args, **kwargs): @@ -172,6 +165,15 @@ def save_config(self, save_directory: Union[str, os.PathLike], push_to_hub: bool # Additionally, save the implementation file too. It can happen for a pipeline, for a model, and # for a scheduler. + + # To avoid circular import problems. + from .models import _import_structure as model_modules + from .pipelines import _import_structure as pipeline_modules + from .schedulers import _import_structure as scheduler_modules + + _all_available_pipelines_schedulers_model_classes = sum( + (list(model_modules.values()) + list(scheduler_modules.values()) + list(pipeline_modules.values())), [] + ) if self.__class__.__name__ not in _all_available_pipelines_schedulers_model_classes: module_to_save = self.__class__.__module__ absolute_module_path = sys.modules[module_to_save].__file__ @@ -591,6 +593,19 @@ def to_json_string(self) -> str: """ config_dict = self._internal_dict if hasattr(self, "_internal_dict") else {} cls_name = self.__class__.__name__ + + # Additionally, save the implementation file too. It can happen for a pipeline, for a model, and + # for a scheduler. + + # To avoid circular import problems. + from .models import _import_structure as model_modules + from .pipelines import _import_structure as pipeline_modules + from .schedulers import _import_structure as scheduler_modules + + _all_available_pipelines_schedulers_model_classes = sum( + (list(model_modules.values()) + list(scheduler_modules.values()) + list(pipeline_modules.values())), [] + ) + if cls_name not in _all_available_pipelines_schedulers_model_classes: config_dict["_class_name"] = [str(self.__class__.__module__), cls_name] else: