Skip to content
14 changes: 7 additions & 7 deletions docs/source/en/api/pipelines/lumina.md
Original file line number Diff line number Diff line change
Expand Up @@ -58,10 +58,10 @@ Use [`torch.compile`](https://huggingface.co/docs/diffusers/main/en/tutorials/fa
First, load the pipeline:

```python
from diffusers import LuminaText2ImgPipeline
from diffusers import LuminaPipeline
import torch

pipeline = LuminaText2ImgPipeline.from_pretrained(
pipeline = LuminaPipeline.from_pretrained(
"Alpha-VLLM/Lumina-Next-SFT-diffusers", torch_dtype=torch.bfloat16
).to("cuda")
```
Expand All @@ -86,11 +86,11 @@ image = pipeline(prompt="Upper body of a young woman in a Victorian-era outfit w

Quantization helps reduce the memory requirements of very large models by storing model weights in a lower precision data type. However, quantization may have varying impact on video quality depending on the video model.

Refer to the [Quantization](../../quantization/overview) overview to learn more about supported quantization backends and selecting a quantization backend that supports your use case. The example below demonstrates how to load a quantized [`LuminaText2ImgPipeline`] for inference with bitsandbytes.
Refer to the [Quantization](../../quantization/overview) overview to learn more about supported quantization backends and selecting a quantization backend that supports your use case. The example below demonstrates how to load a quantized [`LuminaPipeline`] for inference with bitsandbytes.

```py
import torch
from diffusers import BitsAndBytesConfig as DiffusersBitsAndBytesConfig, Transformer2DModel, LuminaText2ImgPipeline
from diffusers import BitsAndBytesConfig as DiffusersBitsAndBytesConfig, Transformer2DModel, LuminaPipeline
from transformers import BitsAndBytesConfig as BitsAndBytesConfig, T5EncoderModel

quant_config = BitsAndBytesConfig(load_in_8bit=True)
Expand All @@ -109,7 +109,7 @@ transformer_8bit = Transformer2DModel.from_pretrained(
torch_dtype=torch.float16,
)

pipeline = LuminaText2ImgPipeline.from_pretrained(
pipeline = LuminaPipeline.from_pretrained(
"Alpha-VLLM/Lumina-Next-SFT-diffusers",
text_encoder=text_encoder_8bit,
transformer=transformer_8bit,
Expand All @@ -122,9 +122,9 @@ image = pipeline(prompt).images[0]
image.save("lumina.png")
```

## LuminaText2ImgPipeline
## LuminaPipeline

[[autodoc]] LuminaText2ImgPipeline
[[autodoc]] LuminaPipeline
- all
- __call__

12 changes: 6 additions & 6 deletions docs/source/en/api/pipelines/lumina2.md
Original file line number Diff line number Diff line change
Expand Up @@ -36,14 +36,14 @@ Single file loading for Lumina Image 2.0 is available for the `Lumina2Transforme

```python
import torch
from diffusers import Lumina2Transformer2DModel, Lumina2Text2ImgPipeline
from diffusers import Lumina2Transformer2DModel, Lumina2Pipeline

ckpt_path = "https://huggingface.co/Alpha-VLLM/Lumina-Image-2.0/blob/main/consolidated.00-of-01.pth"
transformer = Lumina2Transformer2DModel.from_single_file(
ckpt_path, torch_dtype=torch.bfloat16
)

pipe = Lumina2Text2ImgPipeline.from_pretrained(
pipe = Lumina2Pipeline.from_pretrained(
"Alpha-VLLM/Lumina-Image-2.0", transformer=transformer, torch_dtype=torch.bfloat16
)
pipe.enable_model_cpu_offload()
Expand All @@ -60,7 +60,7 @@ image.save("lumina-single-file.png")
GGUF Quantized checkpoints for the `Lumina2Transformer2DModel` can be loaded via `from_single_file` with the `GGUFQuantizationConfig`

```python
from diffusers import Lumina2Transformer2DModel, Lumina2Text2ImgPipeline, GGUFQuantizationConfig
from diffusers import Lumina2Transformer2DModel, Lumina2Pipeline, GGUFQuantizationConfig

ckpt_path = "https://huggingface.co/calcuis/lumina-gguf/blob/main/lumina2-q4_0.gguf"
transformer = Lumina2Transformer2DModel.from_single_file(
Expand All @@ -69,7 +69,7 @@ transformer = Lumina2Transformer2DModel.from_single_file(
torch_dtype=torch.bfloat16,
)

pipe = Lumina2Text2ImgPipeline.from_pretrained(
pipe = Lumina2Pipeline.from_pretrained(
"Alpha-VLLM/Lumina-Image-2.0", transformer=transformer, torch_dtype=torch.bfloat16
)
pipe.enable_model_cpu_offload()
Expand All @@ -80,8 +80,8 @@ image = pipe(
image.save("lumina-gguf.png")
```

## Lumina2Text2ImgPipeline
## Lumina2Pipeline

[[autodoc]] Lumina2Text2ImgPipeline
[[autodoc]] Lumina2Pipeline
- all
- __call__
4 changes: 2 additions & 2 deletions scripts/convert_lumina_to_diffusers.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@
from safetensors.torch import load_file
from transformers import AutoModel, AutoTokenizer

from diffusers import AutoencoderKL, FlowMatchEulerDiscreteScheduler, LuminaNextDiT2DModel, LuminaText2ImgPipeline
from diffusers import AutoencoderKL, FlowMatchEulerDiscreteScheduler, LuminaNextDiT2DModel, LuminaPipeline


def main(args):
Expand Down Expand Up @@ -115,7 +115,7 @@ def main(args):
tokenizer = AutoTokenizer.from_pretrained("google/gemma-2b")
text_encoder = AutoModel.from_pretrained("google/gemma-2b")

pipeline = LuminaText2ImgPipeline(
pipeline = LuminaPipeline(
tokenizer=tokenizer, text_encoder=text_encoder, transformer=transformer, vae=vae, scheduler=scheduler
)
pipeline.save_pretrained(args.dump_path)
Expand Down
4 changes: 4 additions & 0 deletions src/diffusers/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -349,7 +349,9 @@
"LEditsPPPipelineStableDiffusionXL",
"LTXImageToVideoPipeline",
"LTXPipeline",
"Lumina2Pipeline",
"Lumina2Text2ImgPipeline",
"LuminaPipeline",
"LuminaText2ImgPipeline",
"MarigoldDepthPipeline",
"MarigoldIntrinsicsPipeline",
Expand Down Expand Up @@ -859,7 +861,9 @@
LEditsPPPipelineStableDiffusionXL,
LTXImageToVideoPipeline,
LTXPipeline,
Lumina2Pipeline,
Lumina2Text2ImgPipeline,
LuminaPipeline,
LuminaText2ImgPipeline,
MarigoldDepthPipeline,
MarigoldIntrinsicsPipeline,
Expand Down
8 changes: 4 additions & 4 deletions src/diffusers/pipelines/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -261,8 +261,8 @@
)
_import_structure["latte"] = ["LattePipeline"]
_import_structure["ltx"] = ["LTXPipeline", "LTXImageToVideoPipeline"]
_import_structure["lumina"] = ["LuminaText2ImgPipeline"]
_import_structure["lumina2"] = ["Lumina2Text2ImgPipeline"]
_import_structure["lumina"] = ["LuminaPipeline", "LuminaText2ImgPipeline"]
_import_structure["lumina2"] = ["Lumina2Pipeline", "Lumina2Text2ImgPipeline"]
_import_structure["marigold"].extend(
[
"MarigoldDepthPipeline",
Expand Down Expand Up @@ -611,8 +611,8 @@
LEditsPPPipelineStableDiffusionXL,
)
from .ltx import LTXImageToVideoPipeline, LTXPipeline
from .lumina import LuminaText2ImgPipeline
from .lumina2 import Lumina2Text2ImgPipeline
from .lumina import LuminaPipeline, LuminaText2ImgPipeline
from .lumina2 import Lumina2Pipeline, Lumina2Text2ImgPipeline
from .marigold import (
MarigoldDepthPipeline,
MarigoldIntrinsicsPipeline,
Expand Down
8 changes: 4 additions & 4 deletions src/diffusers/pipelines/auto_pipeline.py
Original file line number Diff line number Diff line change
Expand Up @@ -69,8 +69,8 @@
)
from .kandinsky3 import Kandinsky3Img2ImgPipeline, Kandinsky3Pipeline
from .latent_consistency_models import LatentConsistencyModelImg2ImgPipeline, LatentConsistencyModelPipeline
from .lumina import LuminaText2ImgPipeline
from .lumina2 import Lumina2Text2ImgPipeline
from .lumina import LuminaPipeline
from .lumina2 import Lumina2Pipeline
from .pag import (
HunyuanDiTPAGPipeline,
PixArtSigmaPAGPipeline,
Expand Down Expand Up @@ -141,8 +141,8 @@
("flux", FluxPipeline),
("flux-control", FluxControlPipeline),
("flux-controlnet", FluxControlNetPipeline),
("lumina", LuminaText2ImgPipeline),
("lumina2", Lumina2Text2ImgPipeline),
("lumina", LuminaPipeline),
("lumina2", Lumina2Pipeline),
("cogview3", CogView3PlusPipeline),
("cogview4", CogView4Pipeline),
]
Expand Down
4 changes: 2 additions & 2 deletions src/diffusers/pipelines/lumina/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,7 @@

_dummy_objects.update(get_objects_from_module(dummy_torch_and_transformers_objects))
else:
_import_structure["pipeline_lumina"] = ["LuminaText2ImgPipeline"]
_import_structure["pipeline_lumina"] = ["LuminaPipeline", "LuminaText2ImgPipeline"]

if TYPE_CHECKING or DIFFUSERS_SLOW_IMPORT:
try:
Expand All @@ -32,7 +32,7 @@
except OptionalDependencyNotAvailable:
from ...utils.dummy_torch_and_transformers_objects import *
else:
from .pipeline_lumina import LuminaText2ImgPipeline
from .pipeline_lumina import LuminaPipeline, LuminaText2ImgPipeline

else:
import sys
Expand Down
29 changes: 24 additions & 5 deletions src/diffusers/pipelines/lumina/pipeline_lumina.py
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,7 @@
from ...schedulers import FlowMatchEulerDiscreteScheduler
from ...utils import (
BACKENDS_MAPPING,
deprecate,
is_bs4_available,
is_ftfy_available,
is_torch_xla_available,
Expand Down Expand Up @@ -60,11 +61,9 @@
Examples:
```py
>>> import torch
>>> from diffusers import LuminaText2ImgPipeline
>>> from diffusers import LuminaPipeline

>>> pipe = LuminaText2ImgPipeline.from_pretrained(
... "Alpha-VLLM/Lumina-Next-SFT-diffusers", torch_dtype=torch.bfloat16
... )
>>> pipe = LuminaPipeline.from_pretrained("Alpha-VLLM/Lumina-Next-SFT-diffusers", torch_dtype=torch.bfloat16)
>>> # Enable memory optimizations.
>>> pipe.enable_model_cpu_offload()

Expand Down Expand Up @@ -134,7 +133,7 @@ def retrieve_timesteps(
return timesteps, num_inference_steps


class LuminaText2ImgPipeline(DiffusionPipeline):
class LuminaPipeline(DiffusionPipeline):
r"""
Pipeline for text-to-image generation using Lumina-T2I.

Expand Down Expand Up @@ -932,3 +931,23 @@ def __call__(
return (image,)

return ImagePipelineOutput(images=image)


class LuminaText2ImgPipeline(LuminaPipeline):
def __init__(
self,
transformer: LuminaNextDiT2DModel,
scheduler: FlowMatchEulerDiscreteScheduler,
vae: AutoencoderKL,
text_encoder: GemmaPreTrainedModel,
tokenizer: Union[GemmaTokenizer, GemmaTokenizerFast],
):
deprecation_message = "`LuminaText2ImgPipeline` has been renamed to `LuminaPipeline` and will be removed in a future version. Please use `LuminaPipeline` instead."
deprecate("diffusers.pipelines.lumina.pipeline_lumina.LuminaText2ImgPipeline", "0.34", deprecation_message)
super().__init__(
transformer=transformer,
scheduler=scheduler,
vae=vae,
text_encoder=text_encoder,
tokenizer=tokenizer,
)
4 changes: 2 additions & 2 deletions src/diffusers/pipelines/lumina2/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,7 @@

_dummy_objects.update(get_objects_from_module(dummy_torch_and_transformers_objects))
else:
_import_structure["pipeline_lumina2"] = ["Lumina2Text2ImgPipeline"]
_import_structure["pipeline_lumina2"] = ["Lumina2Pipeline", "Lumina2Text2ImgPipeline"]

if TYPE_CHECKING or DIFFUSERS_SLOW_IMPORT:
try:
Expand All @@ -32,7 +32,7 @@
except OptionalDependencyNotAvailable:
from ...utils.dummy_torch_and_transformers_objects import *
else:
from .pipeline_lumina2 import Lumina2Text2ImgPipeline
from .pipeline_lumina2 import Lumina2Pipeline, Lumina2Text2ImgPipeline

else:
import sys
Expand Down
27 changes: 24 additions & 3 deletions src/diffusers/pipelines/lumina2/pipeline_lumina2.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,7 @@
from ...models.transformers.transformer_lumina2 import Lumina2Transformer2DModel
from ...schedulers import FlowMatchEulerDiscreteScheduler
from ...utils import (
deprecate,
is_torch_xla_available,
logging,
replace_example_docstring,
Expand All @@ -47,9 +48,9 @@
Examples:
```py
>>> import torch
>>> from diffusers import Lumina2Text2ImgPipeline
>>> from diffusers import Lumina2Pipeline

>>> pipe = Lumina2Text2ImgPipeline.from_pretrained("Alpha-VLLM/Lumina-Image-2.0", torch_dtype=torch.bfloat16)
>>> pipe = Lumina2Pipeline.from_pretrained("Alpha-VLLM/Lumina-Image-2.0", torch_dtype=torch.bfloat16)
>>> # Enable memory optimizations.
>>> pipe.enable_model_cpu_offload()

Expand Down Expand Up @@ -133,7 +134,7 @@ def retrieve_timesteps(
return timesteps, num_inference_steps


class Lumina2Text2ImgPipeline(DiffusionPipeline, Lumina2LoraLoaderMixin):
class Lumina2Pipeline(DiffusionPipeline, Lumina2LoraLoaderMixin):
r"""
Pipeline for text-to-image generation using Lumina-T2I.

Expand Down Expand Up @@ -767,3 +768,23 @@ def __call__(
return (image,)

return ImagePipelineOutput(images=image)


class Lumina2Text2ImgPipeline(Lumina2Pipeline):
def __init__(
self,
transformer: Lumina2Transformer2DModel,
scheduler: FlowMatchEulerDiscreteScheduler,
vae: AutoencoderKL,
text_encoder: Gemma2PreTrainedModel,
tokenizer: Union[GemmaTokenizer, GemmaTokenizerFast],
):
deprecation_message = "`Lumina2Text2ImgPipeline` has been renamed to `Lumina2Pipeline` and will be removed in a future version. Please use `Lumina2Pipeline` instead."
deprecate("diffusers.pipelines.lumina2.pipeline_lumina2.Lumina2Text2ImgPipeline", "0.34", deprecation_message)
super().__init__(
transformer=transformer,
scheduler=scheduler,
vae=vae,
text_encoder=text_encoder,
tokenizer=tokenizer,
)
30 changes: 30 additions & 0 deletions src/diffusers/utils/dummy_torch_and_transformers_objects.py
Original file line number Diff line number Diff line change
Expand Up @@ -1217,6 +1217,21 @@ def from_pretrained(cls, *args, **kwargs):
requires_backends(cls, ["torch", "transformers"])


class Lumina2Pipeline(metaclass=DummyObject):
_backends = ["torch", "transformers"]

def __init__(self, *args, **kwargs):
requires_backends(self, ["torch", "transformers"])

@classmethod
def from_config(cls, *args, **kwargs):
requires_backends(cls, ["torch", "transformers"])

@classmethod
def from_pretrained(cls, *args, **kwargs):
requires_backends(cls, ["torch", "transformers"])


class Lumina2Text2ImgPipeline(metaclass=DummyObject):
_backends = ["torch", "transformers"]

Expand All @@ -1232,6 +1247,21 @@ def from_pretrained(cls, *args, **kwargs):
requires_backends(cls, ["torch", "transformers"])


class LuminaPipeline(metaclass=DummyObject):
_backends = ["torch", "transformers"]

def __init__(self, *args, **kwargs):
requires_backends(self, ["torch", "transformers"])

@classmethod
def from_config(cls, *args, **kwargs):
requires_backends(cls, ["torch", "transformers"])

@classmethod
def from_pretrained(cls, *args, **kwargs):
requires_backends(cls, ["torch", "transformers"])


class LuminaText2ImgPipeline(metaclass=DummyObject):
_backends = ["torch", "transformers"]

Expand Down
Loading
Loading