From a1678769e108bd753ef1000b56288df682a8f02d Mon Sep 17 00:00:00 2001 From: hlky Date: Wed, 9 Oct 2024 09:53:35 +0100 Subject: [PATCH 1/5] Slight performance improvement to Euler --- src/diffusers/schedulers/scheduling_euler_discrete.py | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) diff --git a/src/diffusers/schedulers/scheduling_euler_discrete.py b/src/diffusers/schedulers/scheduling_euler_discrete.py index 5c39583356ad..e98c4d98843a 100644 --- a/src/diffusers/schedulers/scheduling_euler_discrete.py +++ b/src/diffusers/schedulers/scheduling_euler_discrete.py @@ -638,14 +638,13 @@ def step( gamma = min(s_churn / (len(self.sigmas) - 1), 2**0.5 - 1) if s_tmin <= sigma <= s_tmax else 0.0 - noise = randn_tensor( - model_output.shape, dtype=model_output.dtype, device=model_output.device, generator=generator - ) - - eps = noise * s_noise sigma_hat = sigma * (gamma + 1) if gamma > 0: + noise = randn_tensor( + model_output.shape, dtype=model_output.dtype, device=model_output.device, generator=generator + ) + eps = noise * s_noise sample = sample + eps * (sigma_hat**2 - sigma**2) ** 0.5 # 1. compute predicted original sample (x_0) from sigma-scaled predicted noise From e42fdb80ff88b23ca426f94fbe0f7292147bf25c Mon Sep 17 00:00:00 2001 From: hlky Date: Wed, 9 Oct 2024 14:04:24 +0100 Subject: [PATCH 2/5] Slight performance improvement to EDMEuler --- src/diffusers/schedulers/scheduling_edm_euler.py | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) diff --git a/src/diffusers/schedulers/scheduling_edm_euler.py b/src/diffusers/schedulers/scheduling_edm_euler.py index 4b823c0d281b..be74405da8df 100644 --- a/src/diffusers/schedulers/scheduling_edm_euler.py +++ b/src/diffusers/schedulers/scheduling_edm_euler.py @@ -333,14 +333,13 @@ def step( gamma = min(s_churn / (len(self.sigmas) - 1), 2**0.5 - 1) if s_tmin <= sigma <= s_tmax else 0.0 - noise = randn_tensor( - model_output.shape, dtype=model_output.dtype, device=model_output.device, generator=generator - ) - - eps = noise * s_noise sigma_hat = sigma * (gamma + 1) if gamma > 0: + noise = randn_tensor( + model_output.shape, dtype=model_output.dtype, device=model_output.device, generator=generator + ) + eps = noise * s_noise sample = sample + eps * (sigma_hat**2 - sigma**2) ** 0.5 # 1. compute predicted original sample (x_0) from sigma-scaled predicted noise From f029e840a2354f945ee0f7b6e75b5dae56a00df5 Mon Sep 17 00:00:00 2001 From: hlky Date: Wed, 9 Oct 2024 14:04:34 +0100 Subject: [PATCH 3/5] Slight performance improvement to FlowMatchHeun --- .../schedulers/scheduling_flow_match_heun_discrete.py | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) diff --git a/src/diffusers/schedulers/scheduling_flow_match_heun_discrete.py b/src/diffusers/schedulers/scheduling_flow_match_heun_discrete.py index d9a3ca2d4b0a..cc7f6b8e9c57 100644 --- a/src/diffusers/schedulers/scheduling_flow_match_heun_discrete.py +++ b/src/diffusers/schedulers/scheduling_flow_match_heun_discrete.py @@ -266,14 +266,13 @@ def step( gamma = min(s_churn / (len(self.sigmas) - 1), 2**0.5 - 1) if s_tmin <= sigma <= s_tmax else 0.0 - noise = randn_tensor( - model_output.shape, dtype=model_output.dtype, device=model_output.device, generator=generator - ) - - eps = noise * s_noise sigma_hat = sigma * (gamma + 1) if gamma > 0: + noise = randn_tensor( + model_output.shape, dtype=model_output.dtype, device=model_output.device, generator=generator + ) + eps = noise * s_noise sample = sample + eps * (sigma_hat**2 - sigma**2) ** 0.5 if self.state_in_first_order: From bcd1fc147247f13385819f0515bb483ff4222555 Mon Sep 17 00:00:00 2001 From: hlky Date: Wed, 9 Oct 2024 14:04:50 +0100 Subject: [PATCH 4/5] Slight performance improvement to KDPM2Ancestral --- .../schedulers/scheduling_k_dpm_2_ancestral_discrete.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/src/diffusers/schedulers/scheduling_k_dpm_2_ancestral_discrete.py b/src/diffusers/schedulers/scheduling_k_dpm_2_ancestral_discrete.py index b1ec244e5a79..a1b864c59431 100644 --- a/src/diffusers/schedulers/scheduling_k_dpm_2_ancestral_discrete.py +++ b/src/diffusers/schedulers/scheduling_k_dpm_2_ancestral_discrete.py @@ -502,9 +502,6 @@ def step( gamma = 0 sigma_hat = sigma * (gamma + 1) # Note: sigma_hat == sigma for now - device = model_output.device - noise = randn_tensor(model_output.shape, dtype=model_output.dtype, device=device, generator=generator) - # 1. compute predicted original sample (x_0) from sigma-scaled predicted noise if self.config.prediction_type == "epsilon": sigma_input = sigma_hat if self.state_in_first_order else sigma_interpol @@ -542,6 +539,9 @@ def step( self.sample = None prev_sample = sample + derivative * dt + noise = randn_tensor( + model_output.shape, dtype=model_output.dtype, device=model_output.device, generator=generator + ) prev_sample = prev_sample + noise * sigma_up # upon completion increase step index by one From c4f87582c6d34346e876af7bbcd1f8013f127c98 Mon Sep 17 00:00:00 2001 From: hlky Date: Thu, 10 Oct 2024 09:38:11 +0100 Subject: [PATCH 5/5] Update KDPM2AncestralDiscreteSchedulerTest --- .../schedulers/test_scheduler_kdpm2_ancestral.py | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/tests/schedulers/test_scheduler_kdpm2_ancestral.py b/tests/schedulers/test_scheduler_kdpm2_ancestral.py index f6e8e96e084a..82312629727c 100644 --- a/tests/schedulers/test_scheduler_kdpm2_ancestral.py +++ b/tests/schedulers/test_scheduler_kdpm2_ancestral.py @@ -59,8 +59,8 @@ def test_full_loop_no_noise(self): result_sum = torch.sum(torch.abs(sample)) result_mean = torch.mean(torch.abs(sample)) - assert abs(result_sum.item() - 13849.3877) < 1e-2 - assert abs(result_mean.item() - 18.0331) < 5e-3 + assert abs(result_sum.item() - 13979.9433) < 1e-2 + assert abs(result_mean.item() - 18.2030) < 5e-3 def test_prediction_type(self): for prediction_type in ["epsilon", "v_prediction"]: @@ -92,8 +92,8 @@ def test_full_loop_with_v_prediction(self): result_sum = torch.sum(torch.abs(sample)) result_mean = torch.mean(torch.abs(sample)) - assert abs(result_sum.item() - 328.9970) < 1e-2 - assert abs(result_mean.item() - 0.4284) < 1e-3 + assert abs(result_sum.item() - 331.8133) < 1e-2 + assert abs(result_mean.item() - 0.4320) < 1e-3 def test_full_loop_device(self): if torch_device == "mps": @@ -119,8 +119,8 @@ def test_full_loop_device(self): result_sum = torch.sum(torch.abs(sample)) result_mean = torch.mean(torch.abs(sample)) - assert abs(result_sum.item() - 13849.3818) < 1e-1 - assert abs(result_mean.item() - 18.0331) < 1e-3 + assert abs(result_sum.item() - 13979.9433) < 1e-1 + assert abs(result_mean.item() - 18.2030) < 1e-3 def test_full_loop_with_noise(self): if torch_device == "mps": @@ -154,5 +154,5 @@ def test_full_loop_with_noise(self): result_sum = torch.sum(torch.abs(sample)) result_mean = torch.mean(torch.abs(sample)) - assert abs(result_sum.item() - 93087.0312) < 1e-2, f" expected result sum 93087.0312, but get {result_sum}" - assert abs(result_mean.item() - 121.2071) < 5e-3, f" expected result mean 121.2071, but get {result_mean}" + assert abs(result_sum.item() - 93087.3437) < 1e-2, f" expected result sum 93087.3437, but get {result_sum}" + assert abs(result_mean.item() - 121.2074) < 5e-3, f" expected result mean 121.2074, but get {result_mean}"