diff --git a/docs/source/en/api/pipelines/lumina.md b/docs/source/en/api/pipelines/lumina.md index 1967e85f173a..ce5cf8b103cc 100644 --- a/docs/source/en/api/pipelines/lumina.md +++ b/docs/source/en/api/pipelines/lumina.md @@ -58,10 +58,10 @@ Use [`torch.compile`](https://huggingface.co/docs/diffusers/main/en/tutorials/fa First, load the pipeline: ```python -from diffusers import LuminaText2ImgPipeline +from diffusers import LuminaPipeline import torch -pipeline = LuminaText2ImgPipeline.from_pretrained( +pipeline = LuminaPipeline.from_pretrained( "Alpha-VLLM/Lumina-Next-SFT-diffusers", torch_dtype=torch.bfloat16 ).to("cuda") ``` @@ -86,11 +86,11 @@ image = pipeline(prompt="Upper body of a young woman in a Victorian-era outfit w Quantization helps reduce the memory requirements of very large models by storing model weights in a lower precision data type. However, quantization may have varying impact on video quality depending on the video model. -Refer to the [Quantization](../../quantization/overview) overview to learn more about supported quantization backends and selecting a quantization backend that supports your use case. The example below demonstrates how to load a quantized [`LuminaText2ImgPipeline`] for inference with bitsandbytes. +Refer to the [Quantization](../../quantization/overview) overview to learn more about supported quantization backends and selecting a quantization backend that supports your use case. The example below demonstrates how to load a quantized [`LuminaPipeline`] for inference with bitsandbytes. ```py import torch -from diffusers import BitsAndBytesConfig as DiffusersBitsAndBytesConfig, Transformer2DModel, LuminaText2ImgPipeline +from diffusers import BitsAndBytesConfig as DiffusersBitsAndBytesConfig, Transformer2DModel, LuminaPipeline from transformers import BitsAndBytesConfig as BitsAndBytesConfig, T5EncoderModel quant_config = BitsAndBytesConfig(load_in_8bit=True) @@ -109,7 +109,7 @@ transformer_8bit = Transformer2DModel.from_pretrained( torch_dtype=torch.float16, ) -pipeline = LuminaText2ImgPipeline.from_pretrained( +pipeline = LuminaPipeline.from_pretrained( "Alpha-VLLM/Lumina-Next-SFT-diffusers", text_encoder=text_encoder_8bit, transformer=transformer_8bit, @@ -122,9 +122,9 @@ image = pipeline(prompt).images[0] image.save("lumina.png") ``` -## LuminaText2ImgPipeline +## LuminaPipeline -[[autodoc]] LuminaText2ImgPipeline +[[autodoc]] LuminaPipeline - all - __call__ diff --git a/docs/source/en/api/pipelines/lumina2.md b/docs/source/en/api/pipelines/lumina2.md index cf04bc17e3ef..57f0e8e2105d 100644 --- a/docs/source/en/api/pipelines/lumina2.md +++ b/docs/source/en/api/pipelines/lumina2.md @@ -36,14 +36,14 @@ Single file loading for Lumina Image 2.0 is available for the `Lumina2Transforme ```python import torch -from diffusers import Lumina2Transformer2DModel, Lumina2Text2ImgPipeline +from diffusers import Lumina2Transformer2DModel, Lumina2Pipeline ckpt_path = "https://huggingface.co/Alpha-VLLM/Lumina-Image-2.0/blob/main/consolidated.00-of-01.pth" transformer = Lumina2Transformer2DModel.from_single_file( ckpt_path, torch_dtype=torch.bfloat16 ) -pipe = Lumina2Text2ImgPipeline.from_pretrained( +pipe = Lumina2Pipeline.from_pretrained( "Alpha-VLLM/Lumina-Image-2.0", transformer=transformer, torch_dtype=torch.bfloat16 ) pipe.enable_model_cpu_offload() @@ -60,7 +60,7 @@ image.save("lumina-single-file.png") GGUF Quantized checkpoints for the `Lumina2Transformer2DModel` can be loaded via `from_single_file` with the `GGUFQuantizationConfig` ```python -from diffusers import Lumina2Transformer2DModel, Lumina2Text2ImgPipeline, GGUFQuantizationConfig +from diffusers import Lumina2Transformer2DModel, Lumina2Pipeline, GGUFQuantizationConfig ckpt_path = "https://huggingface.co/calcuis/lumina-gguf/blob/main/lumina2-q4_0.gguf" transformer = Lumina2Transformer2DModel.from_single_file( @@ -69,7 +69,7 @@ transformer = Lumina2Transformer2DModel.from_single_file( torch_dtype=torch.bfloat16, ) -pipe = Lumina2Text2ImgPipeline.from_pretrained( +pipe = Lumina2Pipeline.from_pretrained( "Alpha-VLLM/Lumina-Image-2.0", transformer=transformer, torch_dtype=torch.bfloat16 ) pipe.enable_model_cpu_offload() @@ -80,8 +80,8 @@ image = pipe( image.save("lumina-gguf.png") ``` -## Lumina2Text2ImgPipeline +## Lumina2Pipeline -[[autodoc]] Lumina2Text2ImgPipeline +[[autodoc]] Lumina2Pipeline - all - __call__ diff --git a/scripts/convert_lumina_to_diffusers.py b/scripts/convert_lumina_to_diffusers.py index a12625d1376f..c14aad3c6bf2 100644 --- a/scripts/convert_lumina_to_diffusers.py +++ b/scripts/convert_lumina_to_diffusers.py @@ -5,7 +5,7 @@ from safetensors.torch import load_file from transformers import AutoModel, AutoTokenizer -from diffusers import AutoencoderKL, FlowMatchEulerDiscreteScheduler, LuminaNextDiT2DModel, LuminaText2ImgPipeline +from diffusers import AutoencoderKL, FlowMatchEulerDiscreteScheduler, LuminaNextDiT2DModel, LuminaPipeline def main(args): @@ -115,7 +115,7 @@ def main(args): tokenizer = AutoTokenizer.from_pretrained("google/gemma-2b") text_encoder = AutoModel.from_pretrained("google/gemma-2b") - pipeline = LuminaText2ImgPipeline( + pipeline = LuminaPipeline( tokenizer=tokenizer, text_encoder=text_encoder, transformer=transformer, vae=vae, scheduler=scheduler ) pipeline.save_pretrained(args.dump_path) diff --git a/src/diffusers/__init__.py b/src/diffusers/__init__.py index cfb0bd08f818..4f8c3b176e2c 100644 --- a/src/diffusers/__init__.py +++ b/src/diffusers/__init__.py @@ -349,7 +349,9 @@ "LEditsPPPipelineStableDiffusionXL", "LTXImageToVideoPipeline", "LTXPipeline", + "Lumina2Pipeline", "Lumina2Text2ImgPipeline", + "LuminaPipeline", "LuminaText2ImgPipeline", "MarigoldDepthPipeline", "MarigoldIntrinsicsPipeline", @@ -859,7 +861,9 @@ LEditsPPPipelineStableDiffusionXL, LTXImageToVideoPipeline, LTXPipeline, + Lumina2Pipeline, Lumina2Text2ImgPipeline, + LuminaPipeline, LuminaText2ImgPipeline, MarigoldDepthPipeline, MarigoldIntrinsicsPipeline, diff --git a/src/diffusers/pipelines/__init__.py b/src/diffusers/pipelines/__init__.py index e99162e7a7fe..271b22d5d31c 100644 --- a/src/diffusers/pipelines/__init__.py +++ b/src/diffusers/pipelines/__init__.py @@ -261,8 +261,8 @@ ) _import_structure["latte"] = ["LattePipeline"] _import_structure["ltx"] = ["LTXPipeline", "LTXImageToVideoPipeline"] - _import_structure["lumina"] = ["LuminaText2ImgPipeline"] - _import_structure["lumina2"] = ["Lumina2Text2ImgPipeline"] + _import_structure["lumina"] = ["LuminaPipeline", "LuminaText2ImgPipeline"] + _import_structure["lumina2"] = ["Lumina2Pipeline", "Lumina2Text2ImgPipeline"] _import_structure["marigold"].extend( [ "MarigoldDepthPipeline", @@ -611,8 +611,8 @@ LEditsPPPipelineStableDiffusionXL, ) from .ltx import LTXImageToVideoPipeline, LTXPipeline - from .lumina import LuminaText2ImgPipeline - from .lumina2 import Lumina2Text2ImgPipeline + from .lumina import LuminaPipeline, LuminaText2ImgPipeline + from .lumina2 import Lumina2Pipeline, Lumina2Text2ImgPipeline from .marigold import ( MarigoldDepthPipeline, MarigoldIntrinsicsPipeline, diff --git a/src/diffusers/pipelines/auto_pipeline.py b/src/diffusers/pipelines/auto_pipeline.py index 4f760ee09add..e2490923dc58 100644 --- a/src/diffusers/pipelines/auto_pipeline.py +++ b/src/diffusers/pipelines/auto_pipeline.py @@ -69,8 +69,8 @@ ) from .kandinsky3 import Kandinsky3Img2ImgPipeline, Kandinsky3Pipeline from .latent_consistency_models import LatentConsistencyModelImg2ImgPipeline, LatentConsistencyModelPipeline -from .lumina import LuminaText2ImgPipeline -from .lumina2 import Lumina2Text2ImgPipeline +from .lumina import LuminaPipeline +from .lumina2 import Lumina2Pipeline from .pag import ( HunyuanDiTPAGPipeline, PixArtSigmaPAGPipeline, @@ -141,8 +141,8 @@ ("flux", FluxPipeline), ("flux-control", FluxControlPipeline), ("flux-controlnet", FluxControlNetPipeline), - ("lumina", LuminaText2ImgPipeline), - ("lumina2", Lumina2Text2ImgPipeline), + ("lumina", LuminaPipeline), + ("lumina2", Lumina2Pipeline), ("cogview3", CogView3PlusPipeline), ("cogview4", CogView4Pipeline), ] diff --git a/src/diffusers/pipelines/lumina/__init__.py b/src/diffusers/pipelines/lumina/__init__.py index ca1396359721..a19dc7e94641 100644 --- a/src/diffusers/pipelines/lumina/__init__.py +++ b/src/diffusers/pipelines/lumina/__init__.py @@ -22,7 +22,7 @@ _dummy_objects.update(get_objects_from_module(dummy_torch_and_transformers_objects)) else: - _import_structure["pipeline_lumina"] = ["LuminaText2ImgPipeline"] + _import_structure["pipeline_lumina"] = ["LuminaPipeline", "LuminaText2ImgPipeline"] if TYPE_CHECKING or DIFFUSERS_SLOW_IMPORT: try: @@ -32,7 +32,7 @@ except OptionalDependencyNotAvailable: from ...utils.dummy_torch_and_transformers_objects import * else: - from .pipeline_lumina import LuminaText2ImgPipeline + from .pipeline_lumina import LuminaPipeline, LuminaText2ImgPipeline else: import sys diff --git a/src/diffusers/pipelines/lumina/pipeline_lumina.py b/src/diffusers/pipelines/lumina/pipeline_lumina.py index b50079532f94..816213f105cb 100644 --- a/src/diffusers/pipelines/lumina/pipeline_lumina.py +++ b/src/diffusers/pipelines/lumina/pipeline_lumina.py @@ -30,6 +30,7 @@ from ...schedulers import FlowMatchEulerDiscreteScheduler from ...utils import ( BACKENDS_MAPPING, + deprecate, is_bs4_available, is_ftfy_available, is_torch_xla_available, @@ -60,11 +61,9 @@ Examples: ```py >>> import torch - >>> from diffusers import LuminaText2ImgPipeline + >>> from diffusers import LuminaPipeline - >>> pipe = LuminaText2ImgPipeline.from_pretrained( - ... "Alpha-VLLM/Lumina-Next-SFT-diffusers", torch_dtype=torch.bfloat16 - ... ) + >>> pipe = LuminaPipeline.from_pretrained("Alpha-VLLM/Lumina-Next-SFT-diffusers", torch_dtype=torch.bfloat16) >>> # Enable memory optimizations. >>> pipe.enable_model_cpu_offload() @@ -134,7 +133,7 @@ def retrieve_timesteps( return timesteps, num_inference_steps -class LuminaText2ImgPipeline(DiffusionPipeline): +class LuminaPipeline(DiffusionPipeline): r""" Pipeline for text-to-image generation using Lumina-T2I. @@ -932,3 +931,23 @@ def __call__( return (image,) return ImagePipelineOutput(images=image) + + +class LuminaText2ImgPipeline(LuminaPipeline): + def __init__( + self, + transformer: LuminaNextDiT2DModel, + scheduler: FlowMatchEulerDiscreteScheduler, + vae: AutoencoderKL, + text_encoder: GemmaPreTrainedModel, + tokenizer: Union[GemmaTokenizer, GemmaTokenizerFast], + ): + deprecation_message = "`LuminaText2ImgPipeline` has been renamed to `LuminaPipeline` and will be removed in a future version. Please use `LuminaPipeline` instead." + deprecate("diffusers.pipelines.lumina.pipeline_lumina.LuminaText2ImgPipeline", "0.34", deprecation_message) + super().__init__( + transformer=transformer, + scheduler=scheduler, + vae=vae, + text_encoder=text_encoder, + tokenizer=tokenizer, + ) diff --git a/src/diffusers/pipelines/lumina2/__init__.py b/src/diffusers/pipelines/lumina2/__init__.py index 0e51a768a785..b1d6bfeb0d58 100644 --- a/src/diffusers/pipelines/lumina2/__init__.py +++ b/src/diffusers/pipelines/lumina2/__init__.py @@ -22,7 +22,7 @@ _dummy_objects.update(get_objects_from_module(dummy_torch_and_transformers_objects)) else: - _import_structure["pipeline_lumina2"] = ["Lumina2Text2ImgPipeline"] + _import_structure["pipeline_lumina2"] = ["Lumina2Pipeline", "Lumina2Text2ImgPipeline"] if TYPE_CHECKING or DIFFUSERS_SLOW_IMPORT: try: @@ -32,7 +32,7 @@ except OptionalDependencyNotAvailable: from ...utils.dummy_torch_and_transformers_objects import * else: - from .pipeline_lumina2 import Lumina2Text2ImgPipeline + from .pipeline_lumina2 import Lumina2Pipeline, Lumina2Text2ImgPipeline else: import sys diff --git a/src/diffusers/pipelines/lumina2/pipeline_lumina2.py b/src/diffusers/pipelines/lumina2/pipeline_lumina2.py index 514192cb70c7..e0905a2f131f 100644 --- a/src/diffusers/pipelines/lumina2/pipeline_lumina2.py +++ b/src/diffusers/pipelines/lumina2/pipeline_lumina2.py @@ -25,6 +25,7 @@ from ...models.transformers.transformer_lumina2 import Lumina2Transformer2DModel from ...schedulers import FlowMatchEulerDiscreteScheduler from ...utils import ( + deprecate, is_torch_xla_available, logging, replace_example_docstring, @@ -47,9 +48,9 @@ Examples: ```py >>> import torch - >>> from diffusers import Lumina2Text2ImgPipeline + >>> from diffusers import Lumina2Pipeline - >>> pipe = Lumina2Text2ImgPipeline.from_pretrained("Alpha-VLLM/Lumina-Image-2.0", torch_dtype=torch.bfloat16) + >>> pipe = Lumina2Pipeline.from_pretrained("Alpha-VLLM/Lumina-Image-2.0", torch_dtype=torch.bfloat16) >>> # Enable memory optimizations. >>> pipe.enable_model_cpu_offload() @@ -133,7 +134,7 @@ def retrieve_timesteps( return timesteps, num_inference_steps -class Lumina2Text2ImgPipeline(DiffusionPipeline, Lumina2LoraLoaderMixin): +class Lumina2Pipeline(DiffusionPipeline, Lumina2LoraLoaderMixin): r""" Pipeline for text-to-image generation using Lumina-T2I. @@ -767,3 +768,23 @@ def __call__( return (image,) return ImagePipelineOutput(images=image) + + +class Lumina2Text2ImgPipeline(Lumina2Pipeline): + def __init__( + self, + transformer: Lumina2Transformer2DModel, + scheduler: FlowMatchEulerDiscreteScheduler, + vae: AutoencoderKL, + text_encoder: Gemma2PreTrainedModel, + tokenizer: Union[GemmaTokenizer, GemmaTokenizerFast], + ): + deprecation_message = "`Lumina2Text2ImgPipeline` has been renamed to `Lumina2Pipeline` and will be removed in a future version. Please use `Lumina2Pipeline` instead." + deprecate("diffusers.pipelines.lumina2.pipeline_lumina2.Lumina2Text2ImgPipeline", "0.34", deprecation_message) + super().__init__( + transformer=transformer, + scheduler=scheduler, + vae=vae, + text_encoder=text_encoder, + tokenizer=tokenizer, + ) diff --git a/src/diffusers/utils/dummy_torch_and_transformers_objects.py b/src/diffusers/utils/dummy_torch_and_transformers_objects.py index 5a2818c2e245..46919e7e85e0 100644 --- a/src/diffusers/utils/dummy_torch_and_transformers_objects.py +++ b/src/diffusers/utils/dummy_torch_and_transformers_objects.py @@ -1217,6 +1217,21 @@ def from_pretrained(cls, *args, **kwargs): requires_backends(cls, ["torch", "transformers"]) +class Lumina2Pipeline(metaclass=DummyObject): + _backends = ["torch", "transformers"] + + def __init__(self, *args, **kwargs): + requires_backends(self, ["torch", "transformers"]) + + @classmethod + def from_config(cls, *args, **kwargs): + requires_backends(cls, ["torch", "transformers"]) + + @classmethod + def from_pretrained(cls, *args, **kwargs): + requires_backends(cls, ["torch", "transformers"]) + + class Lumina2Text2ImgPipeline(metaclass=DummyObject): _backends = ["torch", "transformers"] @@ -1232,6 +1247,21 @@ def from_pretrained(cls, *args, **kwargs): requires_backends(cls, ["torch", "transformers"]) +class LuminaPipeline(metaclass=DummyObject): + _backends = ["torch", "transformers"] + + def __init__(self, *args, **kwargs): + requires_backends(self, ["torch", "transformers"]) + + @classmethod + def from_config(cls, *args, **kwargs): + requires_backends(cls, ["torch", "transformers"]) + + @classmethod + def from_pretrained(cls, *args, **kwargs): + requires_backends(cls, ["torch", "transformers"]) + + class LuminaText2ImgPipeline(metaclass=DummyObject): _backends = ["torch", "transformers"] diff --git a/tests/pipelines/lumina/test_lumina_nextdit.py b/tests/pipelines/lumina/test_lumina_nextdit.py index 034a0185d338..0c1fe8eb2fcd 100644 --- a/tests/pipelines/lumina/test_lumina_nextdit.py +++ b/tests/pipelines/lumina/test_lumina_nextdit.py @@ -5,7 +5,13 @@ import torch from transformers import AutoTokenizer, GemmaConfig, GemmaForCausalLM -from diffusers import AutoencoderKL, FlowMatchEulerDiscreteScheduler, LuminaNextDiT2DModel, LuminaText2ImgPipeline +from diffusers import ( + AutoencoderKL, + FlowMatchEulerDiscreteScheduler, + LuminaNextDiT2DModel, + LuminaPipeline, + LuminaText2ImgPipeline, +) from diffusers.utils.testing_utils import ( backend_empty_cache, numpy_cosine_similarity_distance, @@ -17,8 +23,8 @@ from ..test_pipelines_common import PipelineTesterMixin -class LuminaText2ImgPipelinePipelineFastTests(unittest.TestCase, PipelineTesterMixin): - pipeline_class = LuminaText2ImgPipeline +class LuminaPipelineFastTests(unittest.TestCase, PipelineTesterMixin): + pipeline_class = LuminaPipeline params = frozenset( [ "prompt", @@ -99,11 +105,17 @@ def get_dummy_inputs(self, device, seed=0): def test_xformers_attention_forwardGenerator_pass(self): pass + def test_deprecation_raises_warning(self): + with self.assertWarns(FutureWarning) as warning: + _ = LuminaText2ImgPipeline(**self.get_dummy_components()).to(torch_device) + warning_message = str(warning.warnings[0].message) + assert "renamed to `LuminaPipeline`" in warning_message + @slow @require_torch_accelerator -class LuminaText2ImgPipelineSlowTests(unittest.TestCase): - pipeline_class = LuminaText2ImgPipeline +class LuminaPipelineSlowTests(unittest.TestCase): + pipeline_class = LuminaPipeline repo_id = "Alpha-VLLM/Lumina-Next-SFT-diffusers" def setUp(self): diff --git a/tests/pipelines/lumina2/test_pipeline_lumina2.py b/tests/pipelines/lumina2/test_pipeline_lumina2.py index aa0571559b45..33fc870bcd34 100644 --- a/tests/pipelines/lumina2/test_pipeline_lumina2.py +++ b/tests/pipelines/lumina2/test_pipeline_lumina2.py @@ -6,15 +6,17 @@ from diffusers import ( AutoencoderKL, FlowMatchEulerDiscreteScheduler, + Lumina2Pipeline, Lumina2Text2ImgPipeline, Lumina2Transformer2DModel, ) +from diffusers.utils.testing_utils import torch_device from ..test_pipelines_common import PipelineTesterMixin -class Lumina2Text2ImgPipelinePipelineFastTests(unittest.TestCase, PipelineTesterMixin): - pipeline_class = Lumina2Text2ImgPipeline +class Lumina2PipelineFastTests(unittest.TestCase, PipelineTesterMixin): + pipeline_class = Lumina2Pipeline params = frozenset( [ "prompt", @@ -115,3 +117,9 @@ def get_dummy_inputs(self, device, seed=0): "output_type": "np", } return inputs + + def test_deprecation_raises_warning(self): + with self.assertWarns(FutureWarning) as warning: + _ = Lumina2Text2ImgPipeline(**self.get_dummy_components()).to(torch_device) + warning_message = str(warning.warnings[0].message) + assert "renamed to `Lumina2Pipeline`" in warning_message