From 9b599c82f5b3f0b2289b4de3807391e558dc3982 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=C3=81lvaro=20Somoza?= Date: Mon, 24 Jun 2024 03:44:23 -0400 Subject: [PATCH 1/3] initial fix --- .../pipeline_stable_diffusion_3_img2img.py | 2 +- .../schedulers/scheduling_flow_match_euler_discrete.py | 9 ++++++++- 2 files changed, 9 insertions(+), 2 deletions(-) diff --git a/src/diffusers/pipelines/stable_diffusion_3/pipeline_stable_diffusion_3_img2img.py b/src/diffusers/pipelines/stable_diffusion_3/pipeline_stable_diffusion_3_img2img.py index 2555224b8c23..f888fb6c1de3 100644 --- a/src/diffusers/pipelines/stable_diffusion_3/pipeline_stable_diffusion_3_img2img.py +++ b/src/diffusers/pipelines/stable_diffusion_3/pipeline_stable_diffusion_3_img2img.py @@ -852,7 +852,7 @@ def __call__( # 4. Prepare timesteps timesteps, num_inference_steps = retrieve_timesteps(self.scheduler, num_inference_steps, device, timesteps) timesteps, num_inference_steps = self.get_timesteps(num_inference_steps, strength, device) - latent_timestep = timesteps[:1].repeat(batch_size * num_inference_steps) + latent_timestep = timesteps[:1].repeat(batch_size * num_images_per_prompt) # 5. Prepare latent variables if latents is None: diff --git a/src/diffusers/schedulers/scheduling_flow_match_euler_discrete.py b/src/diffusers/schedulers/scheduling_flow_match_euler_discrete.py index 08e9d4f3a95d..02a16bc36f5a 100644 --- a/src/diffusers/schedulers/scheduling_flow_match_euler_discrete.py +++ b/src/diffusers/schedulers/scheduling_flow_match_euler_discrete.py @@ -129,7 +129,14 @@ def scale_noise( if self.step_index is None: self._init_step_index(timestep) - sigma = self.sigmas[self.step_index] + sigmas = self.sigmas.to(device=sample.device, dtype=sample.dtype) + schedule_timesteps = self.timesteps.to(sample.device) + step_indices = [self.index_for_timestep(t, schedule_timesteps) for t in timestep] + sigma = sigmas[step_indices].flatten() + + while len(sigma.shape) < len(noise.shape): + sigma = sigma.unsqueeze(-1) + sample = sigma * noise + (1.0 - sigma) * sample return sample From aef08ed4d3f09f2947262d4f05e9516fe4c2b913 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=C3=81lvaro=20Somoza?= Date: Wed, 26 Jun 2024 18:32:37 -0400 Subject: [PATCH 2/3] apply suggestion --- .../scheduling_flow_match_euler_discrete.py | 25 ++++++++++++++++--- 1 file changed, 21 insertions(+), 4 deletions(-) diff --git a/src/diffusers/schedulers/scheduling_flow_match_euler_discrete.py b/src/diffusers/schedulers/scheduling_flow_match_euler_discrete.py index 02a16bc36f5a..ba50e16d9a51 100644 --- a/src/diffusers/schedulers/scheduling_flow_match_euler_discrete.py +++ b/src/diffusers/schedulers/scheduling_flow_match_euler_discrete.py @@ -129,12 +129,29 @@ def scale_noise( if self.step_index is None: self._init_step_index(timestep) + # Make sure sigmas and timesteps have the same device and dtype as original_samples sigmas = self.sigmas.to(device=sample.device, dtype=sample.dtype) - schedule_timesteps = self.timesteps.to(sample.device) - step_indices = [self.index_for_timestep(t, schedule_timesteps) for t in timestep] - sigma = sigmas[step_indices].flatten() - while len(sigma.shape) < len(noise.shape): + if sample.device.type == "mps" and torch.is_floating_point(timestep): + # mps does not support float64 + schedule_timesteps = self.timesteps.to(sample.device, dtype=torch.float32) + timestep = timestep.to(sample.device, dtype=torch.float32) + else: + schedule_timesteps = self.timesteps.to(sample.device) + timestep = timestep.to(sample.device) + + # self.begin_index is None when scheduler is used for training, or pipeline does not implement set_begin_index + if self.begin_index is None: + step_indices = [self.index_for_timestep(t, schedule_timesteps) for t in timestep] + elif self.step_index is not None: + # add_noise is called after first denoising step (for inpainting) + step_indices = [self.step_index] * timestep.shape[0] + else: + # add noise is called before first denoising step to create initial latent(img2img) + step_indices = [self.begin_index] * timestep.shape[0] + + sigma = sigmas[step_indices].flatten() + while len(sigma.shape) < len(sample.shape): sigma = sigma.unsqueeze(-1) sample = sigma * noise + (1.0 - sigma) * sample From 1cceb36c36d58b5661a435c458ffb739cbbd953b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=C3=81lvaro=20Somoza?= Date: Wed, 26 Jun 2024 22:38:35 -0400 Subject: [PATCH 3/3] delete step_index line --- .../schedulers/scheduling_flow_match_euler_discrete.py | 3 --- 1 file changed, 3 deletions(-) diff --git a/src/diffusers/schedulers/scheduling_flow_match_euler_discrete.py b/src/diffusers/schedulers/scheduling_flow_match_euler_discrete.py index ba50e16d9a51..83ce63981abd 100644 --- a/src/diffusers/schedulers/scheduling_flow_match_euler_discrete.py +++ b/src/diffusers/schedulers/scheduling_flow_match_euler_discrete.py @@ -126,9 +126,6 @@ def scale_noise( `torch.FloatTensor`: A scaled input sample. """ - if self.step_index is None: - self._init_step_index(timestep) - # Make sure sigmas and timesteps have the same device and dtype as original_samples sigmas = self.sigmas.to(device=sample.device, dtype=sample.dtype)