From 2d2796c6b19382ef0525a99d3e9573503e6c5784 Mon Sep 17 00:00:00 2001
From: Mikko Tukiainen <mikko.tukiainen@amd.com>
Date: Mon, 24 Mar 2025 09:23:55 +0000
Subject: [PATCH 1/4] Add missing 'gradient_checkpointing = False' attr

---
 src/diffusers/models/autoencoders/autoencoder_kl_mochi.py | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/src/diffusers/models/autoencoders/autoencoder_kl_mochi.py b/src/diffusers/models/autoencoders/autoencoder_kl_mochi.py
index d69ec6252b00..703de6d4e2a5 100644
--- a/src/diffusers/models/autoencoders/autoencoder_kl_mochi.py
+++ b/src/diffusers/models/autoencoders/autoencoder_kl_mochi.py
@@ -497,6 +497,8 @@ def __init__(
         self.norm_out = MochiChunkedGroupNorm3D(block_out_channels[-1])
         self.proj_out = nn.Linear(block_out_channels[-1], 2 * out_channels, bias=False)
 
+        self.gradient_checkpointing = False
+
     def forward(
         self, hidden_states: torch.Tensor, conv_cache: Optional[Dict[str, torch.Tensor]] = None
     ) -> torch.Tensor:

From 0deac44c5f8ca905d1346a50c5e776eeff917029 Mon Sep 17 00:00:00 2001
From: Mikko Tukiainen <mikko.tukiainen@amd.com>
Date: Mon, 24 Mar 2025 09:24:57 +0000
Subject: [PATCH 2/4] Add (limited) tests for Mochi autoencoder

---
 .../test_models_autoencoder_mochi.py          | 121 ++++++++++++++++++
 1 file changed, 121 insertions(+)
 create mode 100755 tests/models/autoencoders/test_models_autoencoder_mochi.py

diff --git a/tests/models/autoencoders/test_models_autoencoder_mochi.py b/tests/models/autoencoders/test_models_autoencoder_mochi.py
new file mode 100755
index 000000000000..8243fecc9b0f
--- /dev/null
+++ b/tests/models/autoencoders/test_models_autoencoder_mochi.py
@@ -0,0 +1,121 @@
+# coding=utf-8
+# Copyright 2024 HuggingFace Inc.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import unittest
+
+from diffusers import AutoencoderKLMochi
+from diffusers.utils.testing_utils import (
+    enable_full_determinism,
+    floats_tensor,
+    torch_device,
+)
+
+from ..test_modeling_common import ModelTesterMixin, UNetTesterMixin
+
+
+enable_full_determinism()
+
+
+class AutoencoderKLMochiTests(ModelTesterMixin, UNetTesterMixin, unittest.TestCase):
+    model_class = AutoencoderKLMochi
+    main_input_name = "sample"
+    base_precision = 1e-2
+
+
+    def get_autoencoder_kl_mochi_config(self):
+        return {
+            "in_channels": 15,
+            "out_channels": 3,
+            "latent_channels": 4,
+            "encoder_block_out_channels": (32, 32, 32, 32),
+            "decoder_block_out_channels": (32, 32, 32, 32),
+            "layers_per_block": (1, 1, 1, 1, 1),
+            "act_fn": "silu",
+            "scaling_factor": 1,
+        }
+
+    @property
+    def dummy_input(self):
+        batch_size = 2
+        num_frames = 7
+        num_channels = 3
+        sizes = (16, 16)
+
+        image = floats_tensor((batch_size, num_channels, num_frames) + sizes).to(torch_device)
+
+        return {"sample": image}
+
+    @property
+    def input_shape(self):
+        return (3, 7, 16, 16)
+
+    @property
+    def output_shape(self):
+        return (3, 7, 16, 16)
+
+    def prepare_init_args_and_inputs_for_common(self):
+        init_dict = self.get_autoencoder_kl_mochi_config()
+        inputs_dict = self.dummy_input
+        return init_dict, inputs_dict
+
+    def test_gradient_checkpointing_is_applied(self):
+        expected_set = {
+            "MochiDecoder3D",
+            "MochiDownBlock3D",
+            "MochiEncoder3D",
+            "MochiMidBlock3D",
+            "MochiUpBlock3D",
+        }
+        super().test_gradient_checkpointing_is_applied(expected_set=expected_set)
+
+    @unittest.skip("Unsupported test.")
+    def test_effective_gradient_checkpointing(self):
+       """ Fails because of conv_cache:
+       tests/models/autoencoders/test_models_autoencoder_mochi.py::AutoencoderKLMochiTests::test_effective_gradient_checkpointing - 
+       TypeError: ModelMixin.enable_gradient_checkpointing.<locals>._gradient_checkpointing_func() got an unexpected keyword argument 'conv_cache'
+       """
+       pass
+
+    @unittest.skip("Unsupported test.")
+    def test_forward_with_norm_groups(self):
+       """
+       tests/models/autoencoders/test_models_autoencoder_mochi.py::AutoencoderKLMochiTests::test_forward_with_norm_groups - 
+       TypeError: AutoencoderKLMochi.__init__() got an unexpected keyword argument 'norm_num_groups'
+       """
+       pass
+
+
+    @unittest.skip("Unsupported test.")
+    def test_model_parallelism(self):
+       """
+       tests/models/autoencoders/test_models_autoencoder_mochi.py::AutoencoderKLMochiTests::test_outputs_equivalence -
+       RuntimeError: values expected sparse tensor layout but got Strided
+       """
+       pass
+
+    @unittest.skip("Unsupported test.")
+    def test_outputs_equivalence(self):
+       """
+       tests/models/autoencoders/test_models_autoencoder_mochi.py::AutoencoderKLMochiTests::test_outputs_equivalence -
+       RuntimeError: values expected sparse tensor layout but got Strided
+       """
+       pass
+
+    @unittest.skip("Unsupported test.")
+    def test_sharded_checkpoints_device_map(self):
+       """
+       tests/models/autoencoders/test_models_autoencoder_mochi.py::AutoencoderKLMochiTests::test_sharded_checkpoints_device_map - 
+       RuntimeError: Expected all tensors to be on the same device, but found at least two devices, cuda:0 and cuda:5!
+       """

From ed55d7b692c9477799816daf0bb99db7603e9489 Mon Sep 17 00:00:00 2001
From: "github-actions[bot]" <github-actions[bot]@users.noreply.github.com>
Date: Mon, 31 Mar 2025 22:19:00 +0000
Subject: [PATCH 3/4] Apply style fixes

---
 .../test_models_autoencoder_mochi.py          | 50 +++++++++----------
 1 file changed, 24 insertions(+), 26 deletions(-)

diff --git a/tests/models/autoencoders/test_models_autoencoder_mochi.py b/tests/models/autoencoders/test_models_autoencoder_mochi.py
index 8243fecc9b0f..19d51665b8f2 100755
--- a/tests/models/autoencoders/test_models_autoencoder_mochi.py
+++ b/tests/models/autoencoders/test_models_autoencoder_mochi.py
@@ -33,7 +33,6 @@ class AutoencoderKLMochiTests(ModelTesterMixin, UNetTesterMixin, unittest.TestCa
     main_input_name = "sample"
     base_precision = 1e-2
 
-
     def get_autoencoder_kl_mochi_config(self):
         return {
             "in_channels": 15,
@@ -82,40 +81,39 @@ def test_gradient_checkpointing_is_applied(self):
 
     @unittest.skip("Unsupported test.")
     def test_effective_gradient_checkpointing(self):
-       """ Fails because of conv_cache:
-       tests/models/autoencoders/test_models_autoencoder_mochi.py::AutoencoderKLMochiTests::test_effective_gradient_checkpointing - 
-       TypeError: ModelMixin.enable_gradient_checkpointing.<locals>._gradient_checkpointing_func() got an unexpected keyword argument 'conv_cache'
-       """
-       pass
+        """Fails because of conv_cache:
+        tests/models/autoencoders/test_models_autoencoder_mochi.py::AutoencoderKLMochiTests::test_effective_gradient_checkpointing -
+        TypeError: ModelMixin.enable_gradient_checkpointing.<locals>._gradient_checkpointing_func() got an unexpected keyword argument 'conv_cache'
+        """
+        pass
 
     @unittest.skip("Unsupported test.")
     def test_forward_with_norm_groups(self):
-       """
-       tests/models/autoencoders/test_models_autoencoder_mochi.py::AutoencoderKLMochiTests::test_forward_with_norm_groups - 
-       TypeError: AutoencoderKLMochi.__init__() got an unexpected keyword argument 'norm_num_groups'
-       """
-       pass
-
+        """
+        tests/models/autoencoders/test_models_autoencoder_mochi.py::AutoencoderKLMochiTests::test_forward_with_norm_groups -
+        TypeError: AutoencoderKLMochi.__init__() got an unexpected keyword argument 'norm_num_groups'
+        """
+        pass
 
     @unittest.skip("Unsupported test.")
     def test_model_parallelism(self):
-       """
-       tests/models/autoencoders/test_models_autoencoder_mochi.py::AutoencoderKLMochiTests::test_outputs_equivalence -
-       RuntimeError: values expected sparse tensor layout but got Strided
-       """
-       pass
+        """
+        tests/models/autoencoders/test_models_autoencoder_mochi.py::AutoencoderKLMochiTests::test_outputs_equivalence -
+        RuntimeError: values expected sparse tensor layout but got Strided
+        """
+        pass
 
     @unittest.skip("Unsupported test.")
     def test_outputs_equivalence(self):
-       """
-       tests/models/autoencoders/test_models_autoencoder_mochi.py::AutoencoderKLMochiTests::test_outputs_equivalence -
-       RuntimeError: values expected sparse tensor layout but got Strided
-       """
-       pass
+        """
+        tests/models/autoencoders/test_models_autoencoder_mochi.py::AutoencoderKLMochiTests::test_outputs_equivalence -
+        RuntimeError: values expected sparse tensor layout but got Strided
+        """
+        pass
 
     @unittest.skip("Unsupported test.")
     def test_sharded_checkpoints_device_map(self):
-       """
-       tests/models/autoencoders/test_models_autoencoder_mochi.py::AutoencoderKLMochiTests::test_sharded_checkpoints_device_map - 
-       RuntimeError: Expected all tensors to be on the same device, but found at least two devices, cuda:0 and cuda:5!
-       """
+        """
+        tests/models/autoencoders/test_models_autoencoder_mochi.py::AutoencoderKLMochiTests::test_sharded_checkpoints_device_map -
+        RuntimeError: Expected all tensors to be on the same device, but found at least two devices, cuda:0 and cuda:5!
+        """

From e917098ece7ec10e2b41de800f7d9828e6e505e5 Mon Sep 17 00:00:00 2001
From: Mikko Tukiainen <Mikko.Tukiainen@amd.com>
Date: Tue, 1 Apr 2025 07:26:30 +0000
Subject: [PATCH 4/4] pass 'conv_cache' as arg instead of kwarg

---
 .../models/autoencoders/autoencoder_kl_mochi.py    | 14 +++++++-------
 .../autoencoders/test_models_autoencoder_mochi.py  |  8 --------
 2 files changed, 7 insertions(+), 15 deletions(-)

diff --git a/src/diffusers/models/autoencoders/autoencoder_kl_mochi.py b/src/diffusers/models/autoencoders/autoencoder_kl_mochi.py
index 703de6d4e2a5..f6d9b3bb4834 100644
--- a/src/diffusers/models/autoencoders/autoencoder_kl_mochi.py
+++ b/src/diffusers/models/autoencoders/autoencoder_kl_mochi.py
@@ -210,7 +210,7 @@ def forward(
                 hidden_states, new_conv_cache[conv_cache_key] = self._gradient_checkpointing_func(
                     resnet,
                     hidden_states,
-                    conv_cache=conv_cache.get(conv_cache_key),
+                    conv_cache.get(conv_cache_key),
                 )
             else:
                 hidden_states, new_conv_cache[conv_cache_key] = resnet(
@@ -306,7 +306,7 @@ def forward(
 
             if torch.is_grad_enabled() and self.gradient_checkpointing:
                 hidden_states, new_conv_cache[conv_cache_key] = self._gradient_checkpointing_func(
-                    resnet, hidden_states, conv_cache=conv_cache.get(conv_cache_key)
+                    resnet, hidden_states, conv_cache.get(conv_cache_key)
                 )
             else:
                 hidden_states, new_conv_cache[conv_cache_key] = resnet(
@@ -382,7 +382,7 @@ def forward(
                 hidden_states, new_conv_cache[conv_cache_key] = self._gradient_checkpointing_func(
                     resnet,
                     hidden_states,
-                    conv_cache=conv_cache.get(conv_cache_key),
+                    conv_cache.get(conv_cache_key),
                 )
             else:
                 hidden_states, new_conv_cache[conv_cache_key] = resnet(
@@ -515,13 +515,13 @@ def forward(
 
         if torch.is_grad_enabled() and self.gradient_checkpointing:
             hidden_states, new_conv_cache["block_in"] = self._gradient_checkpointing_func(
-                self.block_in, hidden_states, conv_cache=conv_cache.get("block_in")
+                self.block_in, hidden_states, conv_cache.get("block_in")
             )
 
             for i, down_block in enumerate(self.down_blocks):
                 conv_cache_key = f"down_block_{i}"
                 hidden_states, new_conv_cache[conv_cache_key] = self._gradient_checkpointing_func(
-                    down_block, hidden_states, conv_cache=conv_cache.get(conv_cache_key)
+                    down_block, hidden_states, conv_cache.get(conv_cache_key)
                 )
         else:
             hidden_states, new_conv_cache["block_in"] = self.block_in(
@@ -625,13 +625,13 @@ def forward(
         # 1. Mid
         if torch.is_grad_enabled() and self.gradient_checkpointing:
             hidden_states, new_conv_cache["block_in"] = self._gradient_checkpointing_func(
-                self.block_in, hidden_states, conv_cache=conv_cache.get("block_in")
+                self.block_in, hidden_states, conv_cache.get("block_in")
             )
 
             for i, up_block in enumerate(self.up_blocks):
                 conv_cache_key = f"up_block_{i}"
                 hidden_states, new_conv_cache[conv_cache_key] = self._gradient_checkpointing_func(
-                    up_block, hidden_states, conv_cache=conv_cache.get(conv_cache_key)
+                    up_block, hidden_states, conv_cache.get(conv_cache_key)
                 )
         else:
             hidden_states, new_conv_cache["block_in"] = self.block_in(
diff --git a/tests/models/autoencoders/test_models_autoencoder_mochi.py b/tests/models/autoencoders/test_models_autoencoder_mochi.py
index 19d51665b8f2..77645d3c07d2 100755
--- a/tests/models/autoencoders/test_models_autoencoder_mochi.py
+++ b/tests/models/autoencoders/test_models_autoencoder_mochi.py
@@ -79,14 +79,6 @@ def test_gradient_checkpointing_is_applied(self):
         }
         super().test_gradient_checkpointing_is_applied(expected_set=expected_set)
 
-    @unittest.skip("Unsupported test.")
-    def test_effective_gradient_checkpointing(self):
-        """Fails because of conv_cache:
-        tests/models/autoencoders/test_models_autoencoder_mochi.py::AutoencoderKLMochiTests::test_effective_gradient_checkpointing -
-        TypeError: ModelMixin.enable_gradient_checkpointing.<locals>._gradient_checkpointing_func() got an unexpected keyword argument 'conv_cache'
-        """
-        pass
-
     @unittest.skip("Unsupported test.")
     def test_forward_with_norm_groups(self):
         """