Describe the bug
text_config_dict is provided which will be used to initialize CLIPTextConfig. The value text_config["id2label"] will be overriden.
/usr/local/lib/python3.9/dist-packages/transformers/models/clip/feature_extraction_clip.py:28: FutureWarning: The class CLIPFeatureExtractor is deprecated and will be removed in version 5 of Transformers. Please use CLIPImageProcessor instead.
warnings.warn(
╭─────────────────────────────── Traceback (most recent call last) ────────────────────────────────╮
│ in <cell line: 9>:9 │
│ │
│ /usr/local/lib/python3.9/dist-packages/diffusers/pipelines/pipeline_utils.py:965 in │
│ from_pretrained │
│ │
│ 962 │ │ │ ) │
│ 963 │ │ │
│ 964 │ │ # 5. Instantiate the pipeline │
│ ❱ 965 │ │ model = pipeline_class(**init_kwargs) │
│ 966 │ │ │
│ 967 │ │ if return_cached_folder: │
│ 968 │ │ │ return model, cached_folder │
│ │
│ /usr/local/lib/python3.9/dist-packages/diffusers/pipelines/stable_diffusion/pipeline_stable_diff │
│ usion_controlnet.py:149 in init │
│ │
│ 146 │ │ │ │ " checker. If you do not want to use the safety checker, you can pass `' │
│ 147 │ │ │ ) │
│ 148 │ │ │
│ ❱ 149 │ │ self.register_modules( │
│ 150 │ │ │ vae=vae, │
│ 151 │ │ │ text_encoder=text_encoder, │
│ 152 │ │ │ tokenizer=tokenizer, │
│ │
│ /usr/local/lib/python3.9/dist-packages/diffusers/pipelines/pipeline_utils.py:252 in │
│ register_modules │
│ │
│ 249 │ │ │ if module is None: │
│ 250 │ │ │ │ register_dict = {name: (None, None)} │
│ 251 │ │ │ else: │
│ ❱ 252 │ │ │ │ library = module.module.split(".")[0] │
│ 253 │ │ │ │ │
│ 254 │ │ │ │ # check if the module is a pipeline module │
│ 255 │ │ │ │ pipeline_dir = module.module.split(".")[-2] if len(module.module
Reproduction
from diffusers import StableDiffusionControlNetPipeline, ControlNetModel, UniPCMultistepScheduler
import torch
controlnet = [
ControlNetModel.from_pretrained("lllyasviel/sd-controlnet-openpose", torch_dtype=torch.float16),
ControlNetModel.from_pretrained("lllyasviel/sd-controlnet-canny", torch_dtype=torch.float16),
]
print(1)
pipe = StableDiffusionControlNetPipeline.from_pretrained(
"runwayml/stable-diffusion-v1-5", controlnet=controlnet, torch_dtype=torch.float16
)
print(2)
pipe.scheduler = UniPCMultistepScheduler.from_config(pipe.scheduler.config)
pipe.enable_xformers_memory_efficient_attention()
pipe.enable_model_cpu_offload()
print(3)
prompt = "a giant standing in a fantasy landscape, best quality"
negative_prompt = "monochrome, lowres, bad anatomy, worst quality, low quality"
generator = torch.Generator(device="cpu").manual_seed(1)
print(4)
openpose_image=open('pose3.png', 'rb')
canny_image=open('canny.png', 'rb')
images = [openpose_image, canny_image]
print(5)
image = pipe(
prompt,
images,
num_inference_steps=20,
generator=generator,
negative_prompt=negative_prompt,
controlnet_conditioning_scale=[1.0, 0.8],
).images[0]
print(6)
image.save("./multi_controlnet_output.png")
copied from here: https://huggingface.co/docs/diffusers/main/en/api/pipelines/stable_diffusion/controlnet
Logs
`text_config_dict` is provided which will be used to initialize `CLIPTextConfig`. The value `text_config["id2label"]` will be overriden.
/usr/local/lib/python3.9/dist-packages/transformers/models/clip/feature_extraction_clip.py:28: FutureWarning: The class CLIPFeatureExtractor is deprecated and will be removed in version 5 of Transformers. Please use CLIPImageProcessor instead.
warnings.warn(
╭─────────────────────────────── Traceback (most recent call last) ────────────────────────────────╮
│ in <cell line: 9>:9 │
│ │
│ /usr/local/lib/python3.9/dist-packages/diffusers/pipelines/pipeline_utils.py:965 in │
│ from_pretrained │
│ │
│ 962 │ │ │ ) │
│ 963 │ │ │
│ 964 │ │ # 5. Instantiate the pipeline │
│ ❱ 965 │ │ model = pipeline_class(**init_kwargs) │
│ 966 │ │ │
│ 967 │ │ if return_cached_folder: │
│ 968 │ │ │ return model, cached_folder │
│ │
│ /usr/local/lib/python3.9/dist-packages/diffusers/pipelines/stable_diffusion/pipeline_stable_diff │
│ usion_controlnet.py:149 in __init__ │
│ │
│ 146 │ │ │ │ " checker. If you do not want to use the safety checker, you can pass `' │
│ 147 │ │ │ ) │
│ 148 │ │ │
│ ❱ 149 │ │ self.register_modules( │
│ 150 │ │ │ vae=vae, │
│ 151 │ │ │ text_encoder=text_encoder, │
│ 152 │ │ │ tokenizer=tokenizer, │
│ │
│ /usr/local/lib/python3.9/dist-packages/diffusers/pipelines/pipeline_utils.py:252 in │
│ register_modules │
│ │
│ 249 │ │ │ if module is None: │
│ 250 │ │ │ │ register_dict = {name: (None, None)} │
│ 251 │ │ │ else: │
│ ❱ 252 │ │ │ │ library = module.__module__.split(".")[0] │
│ 253 │ │ │ │ │
│ 254 │ │ │ │ # check if the module is a pipeline module │
│ 255 │ │ │ │ pipeline_dir = module.__module__.split(".")[-2] if len(module.__module__
System Info
colab using the latest diffusers version
Describe the bug
text_config_dictis provided which will be used to initializeCLIPTextConfig. The valuetext_config["id2label"]will be overriden./usr/local/lib/python3.9/dist-packages/transformers/models/clip/feature_extraction_clip.py:28: FutureWarning: The class CLIPFeatureExtractor is deprecated and will be removed in version 5 of Transformers. Please use CLIPImageProcessor instead.
warnings.warn(
╭─────────────────────────────── Traceback (most recent call last) ────────────────────────────────╮
│ in <cell line: 9>:9 │
│ │
│ /usr/local/lib/python3.9/dist-packages/diffusers/pipelines/pipeline_utils.py:965 in │
│ from_pretrained │
│ │
│ 962 │ │ │ ) │
│ 963 │ │ │
│ 964 │ │ # 5. Instantiate the pipeline │
│ ❱ 965 │ │ model = pipeline_class(**init_kwargs) │
│ 966 │ │ │
│ 967 │ │ if return_cached_folder: │
│ 968 │ │ │ return model, cached_folder │
│ │
│ /usr/local/lib/python3.9/dist-packages/diffusers/pipelines/stable_diffusion/pipeline_stable_diff │
│ usion_controlnet.py:149 in init │
│ │
│ 146 │ │ │ │ " checker. If you do not want to use the safety checker, you can pass `' │
│ 147 │ │ │ ) │
│ 148 │ │ │
│ ❱ 149 │ │ self.register_modules( │
│ 150 │ │ │ vae=vae, │
│ 151 │ │ │ text_encoder=text_encoder, │
│ 152 │ │ │ tokenizer=tokenizer, │
│ │
│ /usr/local/lib/python3.9/dist-packages/diffusers/pipelines/pipeline_utils.py:252 in │
│ register_modules │
│ │
│ 249 │ │ │ if module is None: │
│ 250 │ │ │ │ register_dict = {name: (None, None)} │
│ 251 │ │ │ else: │
│ ❱ 252 │ │ │ │ library = module.module.split(".")[0] │
│ 253 │ │ │ │ │
│ 254 │ │ │ │ # check if the module is a pipeline module │
│ 255 │ │ │ │ pipeline_dir = module.module.split(".")[-2] if len(module.module
Reproduction
from diffusers import StableDiffusionControlNetPipeline, ControlNetModel, UniPCMultistepScheduler
import torch
controlnet = [
ControlNetModel.from_pretrained("lllyasviel/sd-controlnet-openpose", torch_dtype=torch.float16),
ControlNetModel.from_pretrained("lllyasviel/sd-controlnet-canny", torch_dtype=torch.float16),
]
print(1)
pipe = StableDiffusionControlNetPipeline.from_pretrained(
"runwayml/stable-diffusion-v1-5", controlnet=controlnet, torch_dtype=torch.float16
)
print(2)
pipe.scheduler = UniPCMultistepScheduler.from_config(pipe.scheduler.config)
pipe.enable_xformers_memory_efficient_attention()
pipe.enable_model_cpu_offload()
print(3)
prompt = "a giant standing in a fantasy landscape, best quality"
negative_prompt = "monochrome, lowres, bad anatomy, worst quality, low quality"
generator = torch.Generator(device="cpu").manual_seed(1)
print(4)
openpose_image=open('pose3.png', 'rb')
canny_image=open('canny.png', 'rb')
images = [openpose_image, canny_image]
print(5)
image = pipe(
prompt,
images,
num_inference_steps=20,
generator=generator,
negative_prompt=negative_prompt,
controlnet_conditioning_scale=[1.0, 0.8],
).images[0]
print(6)
image.save("./multi_controlnet_output.png")
copied from here: https://huggingface.co/docs/diffusers/main/en/api/pipelines/stable_diffusion/controlnet
Logs
System Info
colab using the latest diffusers version