Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 5 additions & 0 deletions src/diffusers/loaders.py
Original file line number Diff line number Diff line change
Expand Up @@ -1387,6 +1387,9 @@ def from_single_file(cls, pretrained_model_link_or_path, **kwargs):
An instance of `CLIPTextModel` to use, specifically the
[clip-vit-large-patch14](https://huggingface.co/openai/clip-vit-large-patch14) variant. If this
parameter is `None`, the function loads a new instance of `CLIPTextModel` by itself if needed.
vae (`AutoencoderKL`, *optional*, defaults to `None`):
Variational Auto-Encoder (VAE) Model to encode and decode images to and from latent representations. If
this parameter is `None`, the function will load a new instance of [CLIP] by itself, if needed.
tokenizer ([`~transformers.CLIPTokenizer`], *optional*, defaults to `None`):
An instance of `CLIPTokenizer` to use. If this parameter is `None`, the function loads a new instance
of `CLIPTokenizer` by itself if needed.
Expand Down Expand Up @@ -1435,6 +1438,7 @@ def from_single_file(cls, pretrained_model_link_or_path, **kwargs):
load_safety_checker = kwargs.pop("load_safety_checker", True)
prediction_type = kwargs.pop("prediction_type", None)
text_encoder = kwargs.pop("text_encoder", None)
vae = kwargs.pop("vae", None)
controlnet = kwargs.pop("controlnet", None)
tokenizer = kwargs.pop("tokenizer", None)

Expand Down Expand Up @@ -1525,6 +1529,7 @@ def from_single_file(cls, pretrained_model_link_or_path, **kwargs):
load_safety_checker=load_safety_checker,
prediction_type=prediction_type,
text_encoder=text_encoder,
vae=vae,
tokenizer=tokenizer,
)

Expand Down
8 changes: 6 additions & 2 deletions src/diffusers/pipelines/stable_diffusion/convert_from_ckpt.py
Original file line number Diff line number Diff line change
Expand Up @@ -1107,6 +1107,7 @@ def download_from_original_stable_diffusion_ckpt(
pipeline_class: DiffusionPipeline = None,
local_files_only=False,
vae_path=None,
vae=None,
text_encoder=None,
tokenizer=None,
) -> DiffusionPipeline:
Expand Down Expand Up @@ -1156,6 +1157,9 @@ def download_from_original_stable_diffusion_ckpt(
The pipeline class to use. Pass `None` to determine automatically.
local_files_only (`bool`, *optional*, defaults to `False`):
Whether or not to only look at local files (i.e., do not try to download the model).
vae (`AutoencoderKL`, *optional*, defaults to `None`):
Variational Auto-Encoder (VAE) Model to encode and decode images to and from latent representations. If
this parameter is `None`, the function will load a new instance of [CLIP] by itself, if needed.
text_encoder (`CLIPTextModel`, *optional*, defaults to `None`):
An instance of [CLIP](https://huggingface.co/docs/transformers/model_doc/clip#transformers.CLIPTextModel)
to use, specifically the [clip-vit-large-patch14](https://huggingface.co/openai/clip-vit-large-patch14)
Expand Down Expand Up @@ -1361,7 +1365,7 @@ def download_from_original_stable_diffusion_ckpt(
unet.load_state_dict(converted_unet_checkpoint)

# Convert the VAE model.
if vae_path is None:
if vae_path is None and vae is None:
vae_config = create_vae_diffusers_config(original_config, image_size=image_size)
converted_vae_checkpoint = convert_ldm_vae_checkpoint(checkpoint, vae_config)

Expand All @@ -1385,7 +1389,7 @@ def download_from_original_stable_diffusion_ckpt(
set_module_tensor_to_device(vae, param_name, "cpu", value=param)
else:
vae.load_state_dict(converted_vae_checkpoint)
else:
elif vae is None:
vae = AutoencoderKL.from_pretrained(vae_path)

if model_type == "FrozenOpenCLIPEmbedder":
Expand Down