From d8baec36e92dc4dd1549e54455f3a60949275fe9 Mon Sep 17 00:00:00 2001 From: sayakpaul Date: Wed, 13 May 2026 17:13:43 +0900 Subject: [PATCH 1/4] refactor ltx2 autoencoder tests to use latest mixins --- .../test_models_autoencoder_kl_ltx2_audio.py | 67 +++++++++---------- .../test_models_autoencoder_ltx2_video.py | 66 +++++++++--------- 2 files changed, 63 insertions(+), 70 deletions(-) diff --git a/tests/models/autoencoders/test_models_autoencoder_kl_ltx2_audio.py b/tests/models/autoencoders/test_models_autoencoder_kl_ltx2_audio.py index ce93dfb42afe..07a56a3bfcfb 100644 --- a/tests/models/autoencoders/test_models_autoencoder_kl_ltx2_audio.py +++ b/tests/models/autoencoders/test_models_autoencoder_kl_ltx2_audio.py @@ -13,24 +13,29 @@ # See the License for the specific language governing permissions and # limitations under the License. -import unittest +import pytest from diffusers import AutoencoderKLLTX2Audio -from ...testing_utils import ( - floats_tensor, - torch_device, -) -from ..test_modeling_common import ModelTesterMixin -from .testing_utils import AutoencoderTesterMixin +from ...testing_utils import floats_tensor, torch_device +from ..testing_utils import BaseModelTesterConfig, MemoryTesterMixin, ModelTesterMixin, TrainingTesterMixin +from .testing_utils import NewAutoencoderTesterMixin -class AutoencoderKLLTX2AudioTests(ModelTesterMixin, AutoencoderTesterMixin, unittest.TestCase): - model_class = AutoencoderKLLTX2Audio - main_input_name = "sample" - base_precision = 1e-2 +class AutoencoderKLLTX2AudioTesterConfig(BaseModelTesterConfig): + @property + def main_input_name(self): + return "sample" + + @property + def model_class(self): + return AutoencoderKLLTX2Audio + + @property + def output_shape(self): + return (2, 5, 16) - def get_autoencoder_kl_ltx_video_config(self): + def get_init_dict(self): return { "in_channels": 2, # stereo, "output_channels": 2, @@ -50,39 +55,29 @@ def get_autoencoder_kl_ltx_video_config(self): "double_z": True, } - @property - def dummy_input(self): + def get_dummy_inputs(self): batch_size = 2 num_channels = 2 num_frames = 8 num_mel_bins = 16 - spectrogram = floats_tensor((batch_size, num_channels, num_frames, num_mel_bins)).to(torch_device) + return {"sample": spectrogram} - input_dict = {"sample": spectrogram} - return input_dict - @property - def input_shape(self): - return (2, 5, 16) +class TestAutoencoderKLLTX2Audio(AutoencoderKLLTX2AudioTesterConfig, ModelTesterMixin): + base_precision = 1e-2 - @property - def output_shape(self): - return (2, 5, 16) + def test_outputs_equivalence(self): + pytest.skip("Unsupported test.") - def prepare_init_args_and_inputs_for_common(self): - init_dict = self.get_autoencoder_kl_ltx_video_config() - inputs_dict = self.dummy_input - return init_dict, inputs_dict - # Overriding as output shape is not the same as input shape for LTX 2.0 audio VAE - def test_output(self): - super().test_output(expected_output_shape=(2, 2, 5, 16)) +class TestAutoencoderKLLTX2AudioTraining(AutoencoderKLLTX2AudioTesterConfig, TrainingTesterMixin): + """Training tests for AutoencoderKLLTX2Audio.""" + + +class TestAutoencoderKLLTX2AudioMemory(AutoencoderKLLTX2AudioTesterConfig, MemoryTesterMixin): + """Memory optimization tests for AutoencoderKLLTX2Audio.""" - @unittest.skip("Unsupported test.") - def test_outputs_equivalence(self): - pass - @unittest.skip("AutoencoderKLLTX2Audio does not support `norm_num_groups` because it does not use GroupNorm.") - def test_forward_with_norm_groups(self): - pass +class TestAutoencoderKLLTX2AudioSlicingTiling(AutoencoderKLLTX2AudioTesterConfig, NewAutoencoderTesterMixin): + """Slicing and tiling tests for AutoencoderKLLTX2Audio.""" diff --git a/tests/models/autoencoders/test_models_autoencoder_ltx2_video.py b/tests/models/autoencoders/test_models_autoencoder_ltx2_video.py index 146241361a82..c5d4e934c98e 100644 --- a/tests/models/autoencoders/test_models_autoencoder_ltx2_video.py +++ b/tests/models/autoencoders/test_models_autoencoder_ltx2_video.py @@ -13,28 +13,32 @@ # See the License for the specific language governing permissions and # limitations under the License. -import unittest +import pytest from diffusers import AutoencoderKLLTX2Video -from ...testing_utils import ( - enable_full_determinism, - floats_tensor, - torch_device, -) -from ..test_modeling_common import ModelTesterMixin -from .testing_utils import AutoencoderTesterMixin +from ...testing_utils import enable_full_determinism, floats_tensor, torch_device +from ..testing_utils import BaseModelTesterConfig, MemoryTesterMixin, ModelTesterMixin, TrainingTesterMixin +from .testing_utils import NewAutoencoderTesterMixin enable_full_determinism() -class AutoencoderKLLTX2VideoTests(ModelTesterMixin, AutoencoderTesterMixin, unittest.TestCase): - model_class = AutoencoderKLLTX2Video - main_input_name = "sample" - base_precision = 1e-2 +class AutoencoderKLLTX2VideoTesterConfig(BaseModelTesterConfig): + @property + def main_input_name(self): + return "sample" + + @property + def model_class(self): + return AutoencoderKLLTX2Video - def get_autoencoder_kl_ltx_video_config(self): + @property + def output_shape(self): + return (3, 9, 16, 16) + + def get_init_dict(self): return { "in_channels": 3, "out_channels": 3, @@ -59,30 +63,24 @@ def get_autoencoder_kl_ltx_video_config(self): "decoder_spatial_padding_mode": "zeros", } - @property - def dummy_input(self): + def get_dummy_inputs(self): batch_size = 2 num_frames = 9 num_channels = 3 sizes = (16, 16) - image = floats_tensor((batch_size, num_channels, num_frames) + sizes).to(torch_device) + return {"sample": image} - input_dict = {"sample": image} - return input_dict - @property - def input_shape(self): - return (3, 9, 16, 16) +class TestAutoencoderKLLTX2Video(AutoencoderKLLTX2VideoTesterConfig, ModelTesterMixin): + base_precision = 1e-2 + + def test_outputs_equivalence(self): + pytest.skip("Unsupported test.") - @property - def output_shape(self): - return (3, 9, 16, 16) - def prepare_init_args_and_inputs_for_common(self): - init_dict = self.get_autoencoder_kl_ltx_video_config() - inputs_dict = self.dummy_input - return init_dict, inputs_dict +class TestAutoencoderKLLTX2VideoTraining(AutoencoderKLLTX2VideoTesterConfig, TrainingTesterMixin): + """Training tests for AutoencoderKLLTX2Video.""" def test_gradient_checkpointing_is_applied(self): expected_set = { @@ -94,10 +92,10 @@ def test_gradient_checkpointing_is_applied(self): } super().test_gradient_checkpointing_is_applied(expected_set=expected_set) - @unittest.skip("Unsupported test.") - def test_outputs_equivalence(self): - pass - @unittest.skip("AutoencoderKLLTXVideo does not support `norm_num_groups` because it does not use GroupNorm.") - def test_forward_with_norm_groups(self): - pass +class TestAutoencoderKLLTX2VideoMemory(AutoencoderKLLTX2VideoTesterConfig, MemoryTesterMixin): + """Memory optimization tests for AutoencoderKLLTX2Video.""" + + +class TestAutoencoderKLLTX2VideoSlicingTiling(AutoencoderKLLTX2VideoTesterConfig, NewAutoencoderTesterMixin): + """Slicing and tiling tests for AutoencoderKLLTX2Video.""" From f2b06fbae092d202cd3d8a701a8ceecaf3566887 Mon Sep 17 00:00:00 2001 From: sayakpaul Date: Wed, 13 May 2026 18:00:02 +0900 Subject: [PATCH 2/4] fix more. --- .../test_models_autoencoder_kl_ltx2_audio.py | 14 ++++++++++++-- 1 file changed, 12 insertions(+), 2 deletions(-) diff --git a/tests/models/autoencoders/test_models_autoencoder_kl_ltx2_audio.py b/tests/models/autoencoders/test_models_autoencoder_kl_ltx2_audio.py index 07a56a3bfcfb..37ece8f4e0f7 100644 --- a/tests/models/autoencoders/test_models_autoencoder_kl_ltx2_audio.py +++ b/tests/models/autoencoders/test_models_autoencoder_kl_ltx2_audio.py @@ -14,10 +14,12 @@ # limitations under the License. import pytest +import torch from diffusers import AutoencoderKLLTX2Audio +from diffusers.utils.torch_utils import randn_tensor -from ...testing_utils import floats_tensor, torch_device +from ...testing_utils import torch_device from ..testing_utils import BaseModelTesterConfig, MemoryTesterMixin, ModelTesterMixin, TrainingTesterMixin from .testing_utils import NewAutoencoderTesterMixin @@ -35,6 +37,10 @@ def model_class(self): def output_shape(self): return (2, 5, 16) + @property + def generator(self): + return torch.Generator("cpu").manual_seed(0) + def get_init_dict(self): return { "in_channels": 2, # stereo, @@ -60,7 +66,11 @@ def get_dummy_inputs(self): num_channels = 2 num_frames = 8 num_mel_bins = 16 - spectrogram = floats_tensor((batch_size, num_channels, num_frames, num_mel_bins)).to(torch_device) + spectrogram = randn_tensor( + (batch_size, num_channels, num_frames, num_mel_bins), + generator=self.generator, + device=torch_device, + ) return {"sample": spectrogram} From fea4c5593702d46022f7fe00f7b4cd8ee6670604 Mon Sep 17 00:00:00 2001 From: sayakpaul Date: Wed, 13 May 2026 18:18:49 +0900 Subject: [PATCH 3/4] fix tests --- .../test_models_autoencoder_ltx2_video.py | 12 ++++++++++-- 1 file changed, 10 insertions(+), 2 deletions(-) diff --git a/tests/models/autoencoders/test_models_autoencoder_ltx2_video.py b/tests/models/autoencoders/test_models_autoencoder_ltx2_video.py index c5d4e934c98e..cc041baa5bc7 100644 --- a/tests/models/autoencoders/test_models_autoencoder_ltx2_video.py +++ b/tests/models/autoencoders/test_models_autoencoder_ltx2_video.py @@ -14,10 +14,12 @@ # limitations under the License. import pytest +import torch from diffusers import AutoencoderKLLTX2Video +from diffusers.utils.torch_utils import randn_tensor -from ...testing_utils import enable_full_determinism, floats_tensor, torch_device +from ...testing_utils import enable_full_determinism, torch_device from ..testing_utils import BaseModelTesterConfig, MemoryTesterMixin, ModelTesterMixin, TrainingTesterMixin from .testing_utils import NewAutoencoderTesterMixin @@ -38,6 +40,10 @@ def model_class(self): def output_shape(self): return (3, 9, 16, 16) + @property + def generator(self): + return torch.Generator("cpu").manual_seed(0) + def get_init_dict(self): return { "in_channels": 3, @@ -68,7 +74,9 @@ def get_dummy_inputs(self): num_frames = 9 num_channels = 3 sizes = (16, 16) - image = floats_tensor((batch_size, num_channels, num_frames) + sizes).to(torch_device) + image = randn_tensor( + (batch_size, num_channels, num_frames, *sizes), generator=self.generator, device=torch_device + ) return {"sample": image} From 1771b9ddcdca47894338d5b9db77b8d396cddcab Mon Sep 17 00:00:00 2001 From: sayakpaul Date: Thu, 14 May 2026 16:23:04 +0900 Subject: [PATCH 4/4] is_flaky --- .../autoencoders/test_models_autoencoder_kl_ltx2_audio.py | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/tests/models/autoencoders/test_models_autoencoder_kl_ltx2_audio.py b/tests/models/autoencoders/test_models_autoencoder_kl_ltx2_audio.py index 37ece8f4e0f7..2e16ba3f9953 100644 --- a/tests/models/autoencoders/test_models_autoencoder_kl_ltx2_audio.py +++ b/tests/models/autoencoders/test_models_autoencoder_kl_ltx2_audio.py @@ -19,7 +19,7 @@ from diffusers import AutoencoderKLLTX2Audio from diffusers.utils.torch_utils import randn_tensor -from ...testing_utils import torch_device +from ...testing_utils import is_flaky, torch_device from ..testing_utils import BaseModelTesterConfig, MemoryTesterMixin, ModelTesterMixin, TrainingTesterMixin from .testing_utils import NewAutoencoderTesterMixin @@ -88,6 +88,12 @@ class TestAutoencoderKLLTX2AudioTraining(AutoencoderKLLTX2AudioTesterConfig, Tra class TestAutoencoderKLLTX2AudioMemory(AutoencoderKLLTX2AudioTesterConfig, MemoryTesterMixin): """Memory optimization tests for AutoencoderKLLTX2Audio.""" + @is_flaky() + @pytest.mark.parametrize("record_stream", [False, True]) + @pytest.mark.parametrize("offload_type", ["block_level", "leaf_level"]) + def test_group_offloading_with_disk(self, tmp_path, record_stream, offload_type, atol=1e-5, rtol=0): + super().test_group_offloading_with_disk(tmp_path, record_stream, offload_type, atol=atol, rtol=rtol) + class TestAutoencoderKLLTX2AudioSlicingTiling(AutoencoderKLLTX2AudioTesterConfig, NewAutoencoderTesterMixin): """Slicing and tiling tests for AutoencoderKLLTX2Audio."""