From a1678769e108bd753ef1000b56288df682a8f02d Mon Sep 17 00:00:00 2001
From: hlky <hlky@hlky.ac>
Date: Wed, 9 Oct 2024 09:53:35 +0100
Subject: [PATCH 1/5] Slight performance improvement to Euler

---
 src/diffusers/schedulers/scheduling_euler_discrete.py | 9 ++++-----
 1 file changed, 4 insertions(+), 5 deletions(-)

diff --git a/src/diffusers/schedulers/scheduling_euler_discrete.py b/src/diffusers/schedulers/scheduling_euler_discrete.py
index 5c39583356ad..e98c4d98843a 100644
--- a/src/diffusers/schedulers/scheduling_euler_discrete.py
+++ b/src/diffusers/schedulers/scheduling_euler_discrete.py
@@ -638,14 +638,13 @@ def step(
 
         gamma = min(s_churn / (len(self.sigmas) - 1), 2**0.5 - 1) if s_tmin <= sigma <= s_tmax else 0.0
 
-        noise = randn_tensor(
-            model_output.shape, dtype=model_output.dtype, device=model_output.device, generator=generator
-        )
-
-        eps = noise * s_noise
         sigma_hat = sigma * (gamma + 1)
 
         if gamma > 0:
+            noise = randn_tensor(
+                model_output.shape, dtype=model_output.dtype, device=model_output.device, generator=generator
+            )
+            eps = noise * s_noise
             sample = sample + eps * (sigma_hat**2 - sigma**2) ** 0.5
 
         # 1. compute predicted original sample (x_0) from sigma-scaled predicted noise

From e42fdb80ff88b23ca426f94fbe0f7292147bf25c Mon Sep 17 00:00:00 2001
From: hlky <hlky@hlky.ac>
Date: Wed, 9 Oct 2024 14:04:24 +0100
Subject: [PATCH 2/5] Slight performance improvement to EDMEuler

---
 src/diffusers/schedulers/scheduling_edm_euler.py | 9 ++++-----
 1 file changed, 4 insertions(+), 5 deletions(-)

diff --git a/src/diffusers/schedulers/scheduling_edm_euler.py b/src/diffusers/schedulers/scheduling_edm_euler.py
index 4b823c0d281b..be74405da8df 100644
--- a/src/diffusers/schedulers/scheduling_edm_euler.py
+++ b/src/diffusers/schedulers/scheduling_edm_euler.py
@@ -333,14 +333,13 @@ def step(
 
         gamma = min(s_churn / (len(self.sigmas) - 1), 2**0.5 - 1) if s_tmin <= sigma <= s_tmax else 0.0
 
-        noise = randn_tensor(
-            model_output.shape, dtype=model_output.dtype, device=model_output.device, generator=generator
-        )
-
-        eps = noise * s_noise
         sigma_hat = sigma * (gamma + 1)
 
         if gamma > 0:
+            noise = randn_tensor(
+                model_output.shape, dtype=model_output.dtype, device=model_output.device, generator=generator
+            )
+            eps = noise * s_noise
             sample = sample + eps * (sigma_hat**2 - sigma**2) ** 0.5
 
         # 1. compute predicted original sample (x_0) from sigma-scaled predicted noise

From f029e840a2354f945ee0f7b6e75b5dae56a00df5 Mon Sep 17 00:00:00 2001
From: hlky <hlky@hlky.ac>
Date: Wed, 9 Oct 2024 14:04:34 +0100
Subject: [PATCH 3/5] Slight performance improvement to FlowMatchHeun

---
 .../schedulers/scheduling_flow_match_heun_discrete.py    | 9 ++++-----
 1 file changed, 4 insertions(+), 5 deletions(-)

diff --git a/src/diffusers/schedulers/scheduling_flow_match_heun_discrete.py b/src/diffusers/schedulers/scheduling_flow_match_heun_discrete.py
index d9a3ca2d4b0a..cc7f6b8e9c57 100644
--- a/src/diffusers/schedulers/scheduling_flow_match_heun_discrete.py
+++ b/src/diffusers/schedulers/scheduling_flow_match_heun_discrete.py
@@ -266,14 +266,13 @@ def step(
 
         gamma = min(s_churn / (len(self.sigmas) - 1), 2**0.5 - 1) if s_tmin <= sigma <= s_tmax else 0.0
 
-        noise = randn_tensor(
-            model_output.shape, dtype=model_output.dtype, device=model_output.device, generator=generator
-        )
-
-        eps = noise * s_noise
         sigma_hat = sigma * (gamma + 1)
 
         if gamma > 0:
+            noise = randn_tensor(
+                model_output.shape, dtype=model_output.dtype, device=model_output.device, generator=generator
+            )
+            eps = noise * s_noise
             sample = sample + eps * (sigma_hat**2 - sigma**2) ** 0.5
 
         if self.state_in_first_order:

From bcd1fc147247f13385819f0515bb483ff4222555 Mon Sep 17 00:00:00 2001
From: hlky <hlky@hlky.ac>
Date: Wed, 9 Oct 2024 14:04:50 +0100
Subject: [PATCH 4/5] Slight performance improvement to KDPM2Ancestral

---
 .../schedulers/scheduling_k_dpm_2_ancestral_discrete.py     | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/src/diffusers/schedulers/scheduling_k_dpm_2_ancestral_discrete.py b/src/diffusers/schedulers/scheduling_k_dpm_2_ancestral_discrete.py
index b1ec244e5a79..a1b864c59431 100644
--- a/src/diffusers/schedulers/scheduling_k_dpm_2_ancestral_discrete.py
+++ b/src/diffusers/schedulers/scheduling_k_dpm_2_ancestral_discrete.py
@@ -502,9 +502,6 @@ def step(
         gamma = 0
         sigma_hat = sigma * (gamma + 1)  # Note: sigma_hat == sigma for now
 
-        device = model_output.device
-        noise = randn_tensor(model_output.shape, dtype=model_output.dtype, device=device, generator=generator)
-
         # 1. compute predicted original sample (x_0) from sigma-scaled predicted noise
         if self.config.prediction_type == "epsilon":
             sigma_input = sigma_hat if self.state_in_first_order else sigma_interpol
@@ -542,6 +539,9 @@ def step(
             self.sample = None
 
             prev_sample = sample + derivative * dt
+            noise = randn_tensor(
+                model_output.shape, dtype=model_output.dtype, device=model_output.device, generator=generator
+            )
             prev_sample = prev_sample + noise * sigma_up
 
         # upon completion increase step index by one

From c4f87582c6d34346e876af7bbcd1f8013f127c98 Mon Sep 17 00:00:00 2001
From: hlky <hlky@hlky.ac>
Date: Thu, 10 Oct 2024 09:38:11 +0100
Subject: [PATCH 5/5] Update KDPM2AncestralDiscreteSchedulerTest

---
 .../schedulers/test_scheduler_kdpm2_ancestral.py | 16 ++++++++--------
 1 file changed, 8 insertions(+), 8 deletions(-)

diff --git a/tests/schedulers/test_scheduler_kdpm2_ancestral.py b/tests/schedulers/test_scheduler_kdpm2_ancestral.py
index f6e8e96e084a..82312629727c 100644
--- a/tests/schedulers/test_scheduler_kdpm2_ancestral.py
+++ b/tests/schedulers/test_scheduler_kdpm2_ancestral.py
@@ -59,8 +59,8 @@ def test_full_loop_no_noise(self):
         result_sum = torch.sum(torch.abs(sample))
         result_mean = torch.mean(torch.abs(sample))
 
-        assert abs(result_sum.item() - 13849.3877) < 1e-2
-        assert abs(result_mean.item() - 18.0331) < 5e-3
+        assert abs(result_sum.item() - 13979.9433) < 1e-2
+        assert abs(result_mean.item() - 18.2030) < 5e-3
 
     def test_prediction_type(self):
         for prediction_type in ["epsilon", "v_prediction"]:
@@ -92,8 +92,8 @@ def test_full_loop_with_v_prediction(self):
         result_sum = torch.sum(torch.abs(sample))
         result_mean = torch.mean(torch.abs(sample))
 
-        assert abs(result_sum.item() - 328.9970) < 1e-2
-        assert abs(result_mean.item() - 0.4284) < 1e-3
+        assert abs(result_sum.item() - 331.8133) < 1e-2
+        assert abs(result_mean.item() - 0.4320) < 1e-3
 
     def test_full_loop_device(self):
         if torch_device == "mps":
@@ -119,8 +119,8 @@ def test_full_loop_device(self):
         result_sum = torch.sum(torch.abs(sample))
         result_mean = torch.mean(torch.abs(sample))
 
-        assert abs(result_sum.item() - 13849.3818) < 1e-1
-        assert abs(result_mean.item() - 18.0331) < 1e-3
+        assert abs(result_sum.item() - 13979.9433) < 1e-1
+        assert abs(result_mean.item() - 18.2030) < 1e-3
 
     def test_full_loop_with_noise(self):
         if torch_device == "mps":
@@ -154,5 +154,5 @@ def test_full_loop_with_noise(self):
         result_sum = torch.sum(torch.abs(sample))
         result_mean = torch.mean(torch.abs(sample))
 
-        assert abs(result_sum.item() - 93087.0312) < 1e-2, f" expected result sum 93087.0312, but get {result_sum}"
-        assert abs(result_mean.item() - 121.2071) < 5e-3, f" expected result mean 121.2071, but get {result_mean}"
+        assert abs(result_sum.item() - 93087.3437) < 1e-2, f" expected result sum 93087.3437, but get {result_sum}"
+        assert abs(result_mean.item() - 121.2074) < 5e-3, f" expected result mean 121.2074, but get {result_mean}"