From d8baec36e92dc4dd1549e54455f3a60949275fe9 Mon Sep 17 00:00:00 2001
From: sayakpaul <spsayakpaul@gmail.com>
Date: Wed, 13 May 2026 17:13:43 +0900
Subject: [PATCH 1/4] refactor ltx2 autoencoder tests to use latest mixins

---
 .../test_models_autoencoder_kl_ltx2_audio.py  | 67 +++++++++----------
 .../test_models_autoencoder_ltx2_video.py     | 66 +++++++++---------
 2 files changed, 63 insertions(+), 70 deletions(-)

diff --git a/tests/models/autoencoders/test_models_autoencoder_kl_ltx2_audio.py b/tests/models/autoencoders/test_models_autoencoder_kl_ltx2_audio.py
index ce93dfb42afe..07a56a3bfcfb 100644
--- a/tests/models/autoencoders/test_models_autoencoder_kl_ltx2_audio.py
+++ b/tests/models/autoencoders/test_models_autoencoder_kl_ltx2_audio.py
@@ -13,24 +13,29 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
-import unittest
+import pytest
 
 from diffusers import AutoencoderKLLTX2Audio
 
-from ...testing_utils import (
-    floats_tensor,
-    torch_device,
-)
-from ..test_modeling_common import ModelTesterMixin
-from .testing_utils import AutoencoderTesterMixin
+from ...testing_utils import floats_tensor, torch_device
+from ..testing_utils import BaseModelTesterConfig, MemoryTesterMixin, ModelTesterMixin, TrainingTesterMixin
+from .testing_utils import NewAutoencoderTesterMixin
 
 
-class AutoencoderKLLTX2AudioTests(ModelTesterMixin, AutoencoderTesterMixin, unittest.TestCase):
-    model_class = AutoencoderKLLTX2Audio
-    main_input_name = "sample"
-    base_precision = 1e-2
+class AutoencoderKLLTX2AudioTesterConfig(BaseModelTesterConfig):
+    @property
+    def main_input_name(self):
+        return "sample"
+
+    @property
+    def model_class(self):
+        return AutoencoderKLLTX2Audio
+
+    @property
+    def output_shape(self):
+        return (2, 5, 16)
 
-    def get_autoencoder_kl_ltx_video_config(self):
+    def get_init_dict(self):
         return {
             "in_channels": 2,  # stereo,
             "output_channels": 2,
@@ -50,39 +55,29 @@ def get_autoencoder_kl_ltx_video_config(self):
             "double_z": True,
         }
 
-    @property
-    def dummy_input(self):
+    def get_dummy_inputs(self):
         batch_size = 2
         num_channels = 2
         num_frames = 8
         num_mel_bins = 16
-
         spectrogram = floats_tensor((batch_size, num_channels, num_frames, num_mel_bins)).to(torch_device)
+        return {"sample": spectrogram}
 
-        input_dict = {"sample": spectrogram}
-        return input_dict
 
-    @property
-    def input_shape(self):
-        return (2, 5, 16)
+class TestAutoencoderKLLTX2Audio(AutoencoderKLLTX2AudioTesterConfig, ModelTesterMixin):
+    base_precision = 1e-2
 
-    @property
-    def output_shape(self):
-        return (2, 5, 16)
+    def test_outputs_equivalence(self):
+        pytest.skip("Unsupported test.")
 
-    def prepare_init_args_and_inputs_for_common(self):
-        init_dict = self.get_autoencoder_kl_ltx_video_config()
-        inputs_dict = self.dummy_input
-        return init_dict, inputs_dict
 
-    # Overriding as output shape is not the same as input shape for LTX 2.0 audio VAE
-    def test_output(self):
-        super().test_output(expected_output_shape=(2, 2, 5, 16))
+class TestAutoencoderKLLTX2AudioTraining(AutoencoderKLLTX2AudioTesterConfig, TrainingTesterMixin):
+    """Training tests for AutoencoderKLLTX2Audio."""
+
+
+class TestAutoencoderKLLTX2AudioMemory(AutoencoderKLLTX2AudioTesterConfig, MemoryTesterMixin):
+    """Memory optimization tests for AutoencoderKLLTX2Audio."""
 
-    @unittest.skip("Unsupported test.")
-    def test_outputs_equivalence(self):
-        pass
 
-    @unittest.skip("AutoencoderKLLTX2Audio does not support `norm_num_groups` because it does not use GroupNorm.")
-    def test_forward_with_norm_groups(self):
-        pass
+class TestAutoencoderKLLTX2AudioSlicingTiling(AutoencoderKLLTX2AudioTesterConfig, NewAutoencoderTesterMixin):
+    """Slicing and tiling tests for AutoencoderKLLTX2Audio."""
diff --git a/tests/models/autoencoders/test_models_autoencoder_ltx2_video.py b/tests/models/autoencoders/test_models_autoencoder_ltx2_video.py
index 146241361a82..c5d4e934c98e 100644
--- a/tests/models/autoencoders/test_models_autoencoder_ltx2_video.py
+++ b/tests/models/autoencoders/test_models_autoencoder_ltx2_video.py
@@ -13,28 +13,32 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
-import unittest
+import pytest
 
 from diffusers import AutoencoderKLLTX2Video
 
-from ...testing_utils import (
-    enable_full_determinism,
-    floats_tensor,
-    torch_device,
-)
-from ..test_modeling_common import ModelTesterMixin
-from .testing_utils import AutoencoderTesterMixin
+from ...testing_utils import enable_full_determinism, floats_tensor, torch_device
+from ..testing_utils import BaseModelTesterConfig, MemoryTesterMixin, ModelTesterMixin, TrainingTesterMixin
+from .testing_utils import NewAutoencoderTesterMixin
 
 
 enable_full_determinism()
 
 
-class AutoencoderKLLTX2VideoTests(ModelTesterMixin, AutoencoderTesterMixin, unittest.TestCase):
-    model_class = AutoencoderKLLTX2Video
-    main_input_name = "sample"
-    base_precision = 1e-2
+class AutoencoderKLLTX2VideoTesterConfig(BaseModelTesterConfig):
+    @property
+    def main_input_name(self):
+        return "sample"
+
+    @property
+    def model_class(self):
+        return AutoencoderKLLTX2Video
 
-    def get_autoencoder_kl_ltx_video_config(self):
+    @property
+    def output_shape(self):
+        return (3, 9, 16, 16)
+
+    def get_init_dict(self):
         return {
             "in_channels": 3,
             "out_channels": 3,
@@ -59,30 +63,24 @@ def get_autoencoder_kl_ltx_video_config(self):
             "decoder_spatial_padding_mode": "zeros",
         }
 
-    @property
-    def dummy_input(self):
+    def get_dummy_inputs(self):
         batch_size = 2
         num_frames = 9
         num_channels = 3
         sizes = (16, 16)
-
         image = floats_tensor((batch_size, num_channels, num_frames) + sizes).to(torch_device)
+        return {"sample": image}
 
-        input_dict = {"sample": image}
-        return input_dict
 
-    @property
-    def input_shape(self):
-        return (3, 9, 16, 16)
+class TestAutoencoderKLLTX2Video(AutoencoderKLLTX2VideoTesterConfig, ModelTesterMixin):
+    base_precision = 1e-2
+
+    def test_outputs_equivalence(self):
+        pytest.skip("Unsupported test.")
 
-    @property
-    def output_shape(self):
-        return (3, 9, 16, 16)
 
-    def prepare_init_args_and_inputs_for_common(self):
-        init_dict = self.get_autoencoder_kl_ltx_video_config()
-        inputs_dict = self.dummy_input
-        return init_dict, inputs_dict
+class TestAutoencoderKLLTX2VideoTraining(AutoencoderKLLTX2VideoTesterConfig, TrainingTesterMixin):
+    """Training tests for AutoencoderKLLTX2Video."""
 
     def test_gradient_checkpointing_is_applied(self):
         expected_set = {
@@ -94,10 +92,10 @@ def test_gradient_checkpointing_is_applied(self):
         }
         super().test_gradient_checkpointing_is_applied(expected_set=expected_set)
 
-    @unittest.skip("Unsupported test.")
-    def test_outputs_equivalence(self):
-        pass
 
-    @unittest.skip("AutoencoderKLLTXVideo does not support `norm_num_groups` because it does not use GroupNorm.")
-    def test_forward_with_norm_groups(self):
-        pass
+class TestAutoencoderKLLTX2VideoMemory(AutoencoderKLLTX2VideoTesterConfig, MemoryTesterMixin):
+    """Memory optimization tests for AutoencoderKLLTX2Video."""
+
+
+class TestAutoencoderKLLTX2VideoSlicingTiling(AutoencoderKLLTX2VideoTesterConfig, NewAutoencoderTesterMixin):
+    """Slicing and tiling tests for AutoencoderKLLTX2Video."""

From f2b06fbae092d202cd3d8a701a8ceecaf3566887 Mon Sep 17 00:00:00 2001
From: sayakpaul <spsayakpaul@gmail.com>
Date: Wed, 13 May 2026 18:00:02 +0900
Subject: [PATCH 2/4] fix more.

---
 .../test_models_autoencoder_kl_ltx2_audio.py       | 14 ++++++++++++--
 1 file changed, 12 insertions(+), 2 deletions(-)

diff --git a/tests/models/autoencoders/test_models_autoencoder_kl_ltx2_audio.py b/tests/models/autoencoders/test_models_autoencoder_kl_ltx2_audio.py
index 07a56a3bfcfb..37ece8f4e0f7 100644
--- a/tests/models/autoencoders/test_models_autoencoder_kl_ltx2_audio.py
+++ b/tests/models/autoencoders/test_models_autoencoder_kl_ltx2_audio.py
@@ -14,10 +14,12 @@
 # limitations under the License.
 
 import pytest
+import torch
 
 from diffusers import AutoencoderKLLTX2Audio
+from diffusers.utils.torch_utils import randn_tensor
 
-from ...testing_utils import floats_tensor, torch_device
+from ...testing_utils import torch_device
 from ..testing_utils import BaseModelTesterConfig, MemoryTesterMixin, ModelTesterMixin, TrainingTesterMixin
 from .testing_utils import NewAutoencoderTesterMixin
 
@@ -35,6 +37,10 @@ def model_class(self):
     def output_shape(self):
         return (2, 5, 16)
 
+    @property
+    def generator(self):
+        return torch.Generator("cpu").manual_seed(0)
+
     def get_init_dict(self):
         return {
             "in_channels": 2,  # stereo,
@@ -60,7 +66,11 @@ def get_dummy_inputs(self):
         num_channels = 2
         num_frames = 8
         num_mel_bins = 16
-        spectrogram = floats_tensor((batch_size, num_channels, num_frames, num_mel_bins)).to(torch_device)
+        spectrogram = randn_tensor(
+            (batch_size, num_channels, num_frames, num_mel_bins),
+            generator=self.generator,
+            device=torch_device,
+        )
         return {"sample": spectrogram}
 
 

From fea4c5593702d46022f7fe00f7b4cd8ee6670604 Mon Sep 17 00:00:00 2001
From: sayakpaul <spsayakpaul@gmail.com>
Date: Wed, 13 May 2026 18:18:49 +0900
Subject: [PATCH 3/4] fix tests

---
 .../test_models_autoencoder_ltx2_video.py            | 12 ++++++++++--
 1 file changed, 10 insertions(+), 2 deletions(-)

diff --git a/tests/models/autoencoders/test_models_autoencoder_ltx2_video.py b/tests/models/autoencoders/test_models_autoencoder_ltx2_video.py
index c5d4e934c98e..cc041baa5bc7 100644
--- a/tests/models/autoencoders/test_models_autoencoder_ltx2_video.py
+++ b/tests/models/autoencoders/test_models_autoencoder_ltx2_video.py
@@ -14,10 +14,12 @@
 # limitations under the License.
 
 import pytest
+import torch
 
 from diffusers import AutoencoderKLLTX2Video
+from diffusers.utils.torch_utils import randn_tensor
 
-from ...testing_utils import enable_full_determinism, floats_tensor, torch_device
+from ...testing_utils import enable_full_determinism, torch_device
 from ..testing_utils import BaseModelTesterConfig, MemoryTesterMixin, ModelTesterMixin, TrainingTesterMixin
 from .testing_utils import NewAutoencoderTesterMixin
 
@@ -38,6 +40,10 @@ def model_class(self):
     def output_shape(self):
         return (3, 9, 16, 16)
 
+    @property
+    def generator(self):
+        return torch.Generator("cpu").manual_seed(0)
+
     def get_init_dict(self):
         return {
             "in_channels": 3,
@@ -68,7 +74,9 @@ def get_dummy_inputs(self):
         num_frames = 9
         num_channels = 3
         sizes = (16, 16)
-        image = floats_tensor((batch_size, num_channels, num_frames) + sizes).to(torch_device)
+        image = randn_tensor(
+            (batch_size, num_channels, num_frames, *sizes), generator=self.generator, device=torch_device
+        )
         return {"sample": image}
 
 

From 1771b9ddcdca47894338d5b9db77b8d396cddcab Mon Sep 17 00:00:00 2001
From: sayakpaul <spsayakpaul@gmail.com>
Date: Thu, 14 May 2026 16:23:04 +0900
Subject: [PATCH 4/4] is_flaky

---
 .../autoencoders/test_models_autoencoder_kl_ltx2_audio.py | 8 +++++++-
 1 file changed, 7 insertions(+), 1 deletion(-)

diff --git a/tests/models/autoencoders/test_models_autoencoder_kl_ltx2_audio.py b/tests/models/autoencoders/test_models_autoencoder_kl_ltx2_audio.py
index 37ece8f4e0f7..2e16ba3f9953 100644
--- a/tests/models/autoencoders/test_models_autoencoder_kl_ltx2_audio.py
+++ b/tests/models/autoencoders/test_models_autoencoder_kl_ltx2_audio.py
@@ -19,7 +19,7 @@
 from diffusers import AutoencoderKLLTX2Audio
 from diffusers.utils.torch_utils import randn_tensor
 
-from ...testing_utils import torch_device
+from ...testing_utils import is_flaky, torch_device
 from ..testing_utils import BaseModelTesterConfig, MemoryTesterMixin, ModelTesterMixin, TrainingTesterMixin
 from .testing_utils import NewAutoencoderTesterMixin
 
@@ -88,6 +88,12 @@ class TestAutoencoderKLLTX2AudioTraining(AutoencoderKLLTX2AudioTesterConfig, Tra
 class TestAutoencoderKLLTX2AudioMemory(AutoencoderKLLTX2AudioTesterConfig, MemoryTesterMixin):
     """Memory optimization tests for AutoencoderKLLTX2Audio."""
 
+    @is_flaky()
+    @pytest.mark.parametrize("record_stream", [False, True])
+    @pytest.mark.parametrize("offload_type", ["block_level", "leaf_level"])
+    def test_group_offloading_with_disk(self, tmp_path, record_stream, offload_type, atol=1e-5, rtol=0):
+        super().test_group_offloading_with_disk(tmp_path, record_stream, offload_type, atol=atol, rtol=rtol)
+
 
 class TestAutoencoderKLLTX2AudioSlicingTiling(AutoencoderKLLTX2AudioTesterConfig, NewAutoencoderTesterMixin):
     """Slicing and tiling tests for AutoencoderKLLTX2Audio."""