diff --git a/examples/custom_diffusion/train_custom_diffusion.py b/examples/custom_diffusion/train_custom_diffusion.py index 3288fe3258ac..8d90998700f4 100644 --- a/examples/custom_diffusion/train_custom_diffusion.py +++ b/examples/custom_diffusion/train_custom_diffusion.py @@ -207,7 +207,7 @@ def __init__( with open(concept["class_prompt"], "r") as f: class_prompt = f.read().splitlines() - class_img_path = [(x, y) for (x, y) in zip(class_images_path, class_prompt)] + class_img_path = list(zip(class_images_path, class_prompt)) self.class_images_path.extend(class_img_path[:num_class_images]) random.shuffle(self.instance_images_path) diff --git a/src/diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_gligen.py b/src/diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_gligen.py index e39b7cde3d36..f176f08d5d8c 100644 --- a/src/diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_gligen.py +++ b/src/diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_gligen.py @@ -803,7 +803,9 @@ def __call__( if gligen_inpaint_image is not None: gligen_inpaint_latent_with_noise = ( - self.scheduler.add_noise(gligen_inpaint_latent, torch.randn_like(gligen_inpaint_latent), t) + self.scheduler.add_noise( + gligen_inpaint_latent, torch.randn_like(gligen_inpaint_latent), torch.tensor([t]) + ) .expand(latents.shape[0], -1, -1, -1) .clone() ) diff --git a/src/diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_gligen_text_image.py b/src/diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_gligen_text_image.py index 59ba0ab666b3..ba418b4cb3c3 100644 --- a/src/diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_gligen_text_image.py +++ b/src/diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_gligen_text_image.py @@ -965,7 +965,9 @@ def __call__( if gligen_inpaint_image is not None: gligen_inpaint_latent_with_noise = ( - self.scheduler.add_noise(gligen_inpaint_latent, torch.randn_like(gligen_inpaint_latent), t) + self.scheduler.add_noise( + gligen_inpaint_latent, torch.randn_like(gligen_inpaint_latent), torch.tensor([t]) + ) .expand(latents.shape[0], -1, -1, -1) .clone() ) diff --git a/tests/pipelines/stable_diffusion/test_stable_diffusion_gligen.py b/tests/pipelines/stable_diffusion/test_stable_diffusion_gligen.py index 19d44e0cd1d9..388ad9672e15 100644 --- a/tests/pipelines/stable_diffusion/test_stable_diffusion_gligen.py +++ b/tests/pipelines/stable_diffusion/test_stable_diffusion_gligen.py @@ -22,6 +22,7 @@ from diffusers import ( AutoencoderKL, DDIMScheduler, + EulerAncestralDiscreteScheduler, StableDiffusionGLIGENPipeline, UNet2DConditionModel, ) @@ -120,7 +121,7 @@ def get_dummy_inputs(self, device, seed=0): } return inputs - def test_gligen(self): + def test_stable_diffusion_gligen_default_case(self): device = "cpu" # ensure determinism for the device-dependent torch.Generator components = self.get_dummy_components() sd_pipe = StableDiffusionGLIGENPipeline(**components) @@ -136,6 +137,24 @@ def test_gligen(self): assert np.abs(image_slice.flatten() - expected_slice).max() < 1e-2 + def test_stable_diffusion_gligen_k_euler_ancestral(self): + device = "cpu" # ensure determinism for the device-dependent torch.Generator + components = self.get_dummy_components() + sd_pipe = StableDiffusionGLIGENPipeline(**components) + sd_pipe.scheduler = EulerAncestralDiscreteScheduler.from_config(sd_pipe.scheduler.config) + sd_pipe = sd_pipe.to(device) + sd_pipe.set_progress_bar_config(disable=None) + + inputs = self.get_dummy_inputs(device) + output = sd_pipe(**inputs) + image = output.images + image_slice = image[0, -3:, -3:, -1] + + assert image.shape == (1, 64, 64, 3) + expected_slice = np.array([0.425, 0.494, 0.429, 0.469, 0.525, 0.417, 0.533, 0.5, 0.47]) + + assert np.abs(image_slice.flatten() - expected_slice).max() < 1e-2 + def test_attention_slicing_forward_pass(self): super().test_attention_slicing_forward_pass(expected_max_diff=3e-3) diff --git a/tests/pipelines/stable_diffusion/test_stable_diffusion_gligen_text_image.py b/tests/pipelines/stable_diffusion/test_stable_diffusion_gligen_text_image.py index 4e14adc81f42..f8f32643aec1 100644 --- a/tests/pipelines/stable_diffusion/test_stable_diffusion_gligen_text_image.py +++ b/tests/pipelines/stable_diffusion/test_stable_diffusion_gligen_text_image.py @@ -29,6 +29,7 @@ from diffusers import ( AutoencoderKL, DDIMScheduler, + EulerAncestralDiscreteScheduler, StableDiffusionGLIGENTextImagePipeline, UNet2DConditionModel, ) @@ -150,7 +151,7 @@ def get_dummy_inputs(self, device, seed=0): } return inputs - def test_gligen(self): + def test_stable_diffusion_gligen_text_image_default_case(self): device = "cpu" # ensure determinism for the device-dependent torch.Generator components = self.get_dummy_components() sd_pipe = StableDiffusionGLIGENTextImagePipeline(**components) @@ -166,6 +167,24 @@ def test_gligen(self): assert np.abs(image_slice.flatten() - expected_slice).max() < 1e-2 + def test_stable_diffusion_gligen_k_euler_ancestral(self): + device = "cpu" # ensure determinism for the device-dependent torch.Generator + components = self.get_dummy_components() + sd_pipe = StableDiffusionGLIGENTextImagePipeline(**components) + sd_pipe.scheduler = EulerAncestralDiscreteScheduler.from_config(sd_pipe.scheduler.config) + sd_pipe = sd_pipe.to(device) + sd_pipe.set_progress_bar_config(disable=None) + + inputs = self.get_dummy_inputs(device) + image = sd_pipe(**inputs).images + image_slice = image[0, -3:, -3:, -1] + + assert image.shape == (1, 64, 64, 3) + + expected_slice = np.array([0.425, 0.494, 0.429, 0.469, 0.525, 0.417, 0.533, 0.5, 0.47]) + + assert np.abs(image_slice.flatten() - expected_slice).max() < 1e-2 + def test_attention_slicing_forward_pass(self): super().test_attention_slicing_forward_pass(expected_max_diff=3e-3)