From 9732ce5d1f217b96b4560953ce8474332ba0836c Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?=C3=81lvaro=20Somoza?= <somoza.alvaro@gmail.com>
Date: Tue, 2 Dec 2025 12:48:38 -0300
Subject: [PATCH 1/7] initial

---
 .../en/api/models/z_image_transformer2d.md    | 19 +++++++
 docs/source/en/api/pipelines/z_image.md       | 57 +++++++++++++++++++
 2 files changed, 76 insertions(+)
 create mode 100644 docs/source/en/api/models/z_image_transformer2d.md
 create mode 100644 docs/source/en/api/pipelines/z_image.md
diff --git a/docs/source/en/api/models/z_image_transformer2d.md b/docs/source/en/api/models/z_image_transformer2d.md
new file mode 100644
index 000000000000..2ecb9851febd
--- /dev/null
+++ b/docs/source/en/api/models/z_image_transformer2d.md
@@ -0,0 +1,19 @@
+<!--Copyright 2025 The HuggingFace Team. All rights reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with
+the License. You may obtain a copy of the License at
+
+http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on
+an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the
+specific language governing permissions and limitations under the License.
+-->
+
+# ZImageTransformer2DModel
+
+A Transformer model for image-like data from [Z-Image](https://huggingface.co/Tongyi-MAI/Z-Image-Turbo).
+
+## ZImageTransformer2DModel
+
+[[autodoc]] ZImageTransformer2DModel
\ No newline at end of file
diff --git a/docs/source/en/api/pipelines/z_image.md b/docs/source/en/api/pipelines/z_image.md
new file mode 100644
index 000000000000..08ca0ba63748
--- /dev/null
+++ b/docs/source/en/api/pipelines/z_image.md
@@ -0,0 +1,57 @@
+<!--Copyright 2025 The HuggingFace Team. All rights reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with
+the License. You may obtain a copy of the License at
+
+http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on
+an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the
+specific language governing permissions and limitations under the License.
+-->
+
+# Z-Image
+
+<div class="flex flex-wrap space-x-1">
+  <img alt="LoRA" src="https://img.shields.io/badge/LoRA-d8b4fe?style=flat"/>
+</div>
+
+Z-Image is a powerful and highly efficient image generation model with 6B parameters. Currently there's only one model with two more to be released:
+
+|Model|HuggingFace|
+|---|---|
+|Z-Image-Turbo|https://huggingface.co/Tongyi-MAI/Z-Image-Turbo|
+
+### Z-Image-Turbo
+
+Z-Image-Turbo – A distilled version of Z-Image that matches or exceeds leading competitors with only 8 NFEs (Number of Function Evaluations). It offers ⚡️sub-second inference latency⚡️ on enterprise-grade H800 GPUs and fits comfortably within 16G VRAM consumer devices. It excels in photorealistic image generation, bilingual text rendering (English & Chinese), and robust instruction adherence.
+
+## ZImagePipeline
+
+[[autodoc]] ZImagePipeline
+	- all
+	- __call__
+
+## Citation
+```bibtex
+@article{team2025zimage,
+  title={Z-Image: An Efficient Image Generation Foundation Model with Single-Stream Diffusion Transformer},
+  author={Z-Image Team},
+  journal={arXiv preprint arXiv:2511.22699},
+  year={2025}
+}
+
+@article{liu2025decoupled,
+  title={Decoupled DMD: CFG Augmentation as the Spear, Distribution Matching as the Shield},
+  author={Dongyang Liu and Peng Gao and David Liu and Ruoyi Du and Zhen Li and Qilong Wu and Xin Jin and Sihan Cao and Shifeng Zhang and Hongsheng Li and Steven Hoi},
+  journal={arXiv preprint arXiv:2511.22677},
+  year={2025}
+}
+
+@article{jiang2025distribution,
+  title={Distribution Matching Distillation Meets Reinforcement Learning},
+  author={Jiang, Dengyang and Liu, Dongyang and Wang, Zanyi and Wu, Qilong and Jin, Xin and Liu, David and Li, Zhen and Wang, Mengmeng and Gao, Peng and Yang, Harry},
+  journal={arXiv preprint arXiv:2511.13649},
+  year={2025}
+}
+```
\ No newline at end of file

From 846f706d2867b98f52e22730a59e3c824f27faa5 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?=C3=81lvaro=20Somoza?= <somoza.alvaro@gmail.com>
Date: Tue, 2 Dec 2025 13:00:14 -0300
Subject: [PATCH 2/7] toctree

---
 docs/source/en/_toctree.yml | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/docs/source/en/_toctree.yml b/docs/source/en/_toctree.yml
index d2b4a0de915b..ec1881bd42cf 100644
--- a/docs/source/en/_toctree.yml
+++ b/docs/source/en/_toctree.yml
@@ -399,6 +399,8 @@
         title: WanAnimateTransformer3DModel
       - local: api/models/wan_transformer_3d
         title: WanTransformer3DModel
+      - local: z_image_transformer2d.md
+        title: ZImageTransformer2DModel        
       title: Transformers
     - sections:
       - local: api/models/stable_cascade_unet
@@ -642,6 +644,8 @@
         title: VisualCloze
       - local: api/pipelines/wuerstchen
         title: Wuerstchen
+      - local: api/pipelines/z_image.md
+        title: Z-Image        
       title: Image
     - sections:
       - local: api/pipelines/allegro

From 81c100a47ed177ee4b3bc86077655f1305f74b46 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?=C3=81lvaro=20Somoza?= <somoza.alvaro@gmail.com>
Date: Tue, 2 Dec 2025 13:06:45 -0300
Subject: [PATCH 3/7] fix

---
 docs/source/en/_toctree.yml | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/docs/source/en/_toctree.yml b/docs/source/en/_toctree.yml
index ec1881bd42cf..ee01b47a6df6 100644
--- a/docs/source/en/_toctree.yml
+++ b/docs/source/en/_toctree.yml
@@ -399,7 +399,7 @@
         title: WanAnimateTransformer3DModel
       - local: api/models/wan_transformer_3d
         title: WanTransformer3DModel
-      - local: z_image_transformer2d.md
+      - local: z_image_transformer2d
         title: ZImageTransformer2DModel        
       title: Transformers
     - sections:
@@ -644,7 +644,7 @@
         title: VisualCloze
       - local: api/pipelines/wuerstchen
         title: Wuerstchen
-      - local: api/pipelines/z_image.md
+      - local: api/pipelines/z_image
         title: Z-Image        
       title: Image
     - sections:

From cbe2f34e1b5b30b00fa72b2837b242c429ddc5ec Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?=C3=81lvaro=20Somoza?= <somoza.alvaro@gmail.com>
Date: Tue, 2 Dec 2025 13:19:50 -0300
Subject: [PATCH 4/7] apply review and fix

---
 docs/source/en/_toctree.yml             |  2 +-
 docs/source/en/api/pipelines/z_image.md | 28 ++-----------------------
 2 files changed, 3 insertions(+), 27 deletions(-)

diff --git a/docs/source/en/_toctree.yml b/docs/source/en/_toctree.yml
index ee01b47a6df6..861b9ada77c9 100644
--- a/docs/source/en/_toctree.yml
+++ b/docs/source/en/_toctree.yml
@@ -399,7 +399,7 @@
         title: WanAnimateTransformer3DModel
       - local: api/models/wan_transformer_3d
         title: WanTransformer3DModel
-      - local: z_image_transformer2d
+      - local: api/models/z_image_transformer2d
         title: ZImageTransformer2DModel        
       title: Transformers
     - sections:
diff --git a/docs/source/en/api/pipelines/z_image.md b/docs/source/en/api/pipelines/z_image.md
index 08ca0ba63748..1606a06b14a5 100644
--- a/docs/source/en/api/pipelines/z_image.md
+++ b/docs/source/en/api/pipelines/z_image.md
@@ -16,7 +16,7 @@ specific language governing permissions and limitations under the License.
   <img alt="LoRA" src="https://img.shields.io/badge/LoRA-d8b4fe?style=flat"/>
 </div>
 
-Z-Image is a powerful and highly efficient image generation model with 6B parameters. Currently there's only one model with two more to be released:
+[Z-Image](https://huggingface.co/papers/2511.22699) is a powerful and highly efficient image generation model with 6B parameters. Currently there's only one model with two more to be released:
 
 |Model|HuggingFace|
 |---|---|
@@ -30,28 +30,4 @@ Z-Image-Turbo – A distilled version of Z-Image that matches or exceeds leading
 
 [[autodoc]] ZImagePipeline
 	- all
-	- __call__
-
-## Citation
-```bibtex
-@article{team2025zimage,
-  title={Z-Image: An Efficient Image Generation Foundation Model with Single-Stream Diffusion Transformer},
-  author={Z-Image Team},
-  journal={arXiv preprint arXiv:2511.22699},
-  year={2025}
-}
-
-@article{liu2025decoupled,
-  title={Decoupled DMD: CFG Augmentation as the Spear, Distribution Matching as the Shield},
-  author={Dongyang Liu and Peng Gao and David Liu and Ruoyi Du and Zhen Li and Qilong Wu and Xin Jin and Sihan Cao and Shifeng Zhang and Hongsheng Li and Steven Hoi},
-  journal={arXiv preprint arXiv:2511.22677},
-  year={2025}
-}
-
-@article{jiang2025distribution,
-  title={Distribution Matching Distillation Meets Reinforcement Learning},
-  author={Jiang, Dengyang and Liu, Dongyang and Wang, Zanyi and Wu, Qilong and Jin, Xin and Liu, David and Li, Zhen and Wang, Mengmeng and Gao, Peng and Yang, Harry},
-  journal={arXiv preprint arXiv:2511.13649},
-  year={2025}
-}
-```
\ No newline at end of file
+	- __call__
\ No newline at end of file

From df774fc5ebdc113b068f390537a28c2a94c64a47 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?=C3=81lvaro=20Somoza?= <asomoza@users.noreply.github.com>
Date: Thu, 4 Dec 2025 20:32:39 -0300
Subject: [PATCH 5/7] Update docs/source/en/api/pipelines/z_image.md

Co-authored-by: Steven Liu <59462357+stevhliu@users.noreply.github.com>
---
 docs/source/en/api/pipelines/z_image.md | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/docs/source/en/api/pipelines/z_image.md b/docs/source/en/api/pipelines/z_image.md
index 1606a06b14a5..bb7c88801db2 100644
--- a/docs/source/en/api/pipelines/z_image.md
+++ b/docs/source/en/api/pipelines/z_image.md
@@ -18,7 +18,7 @@ specific language governing permissions and limitations under the License.
 
 [Z-Image](https://huggingface.co/papers/2511.22699) is a powerful and highly efficient image generation model with 6B parameters. Currently there's only one model with two more to be released:
 
-|Model|HuggingFace|
+|Model|Hugging Face|
 |---|---|
 |Z-Image-Turbo|https://huggingface.co/Tongyi-MAI/Z-Image-Turbo|
 

From 02af054c14bcf7a7b6490841fe4852095a1d9248 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?=C3=81lvaro=20Somoza?= <asomoza@users.noreply.github.com>
Date: Thu, 4 Dec 2025 20:32:49 -0300
Subject: [PATCH 6/7] Update docs/source/en/api/pipelines/z_image.md

Co-authored-by: Steven Liu <59462357+stevhliu@users.noreply.github.com>
---
 docs/source/en/api/pipelines/z_image.md | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/docs/source/en/api/pipelines/z_image.md b/docs/source/en/api/pipelines/z_image.md
index bb7c88801db2..b9377cbbfcb3 100644
--- a/docs/source/en/api/pipelines/z_image.md
+++ b/docs/source/en/api/pipelines/z_image.md
@@ -22,7 +22,7 @@ specific language governing permissions and limitations under the License.
 |---|---|
 |Z-Image-Turbo|https://huggingface.co/Tongyi-MAI/Z-Image-Turbo|
 
-### Z-Image-Turbo
+## Z-Image-Turbo
 
 Z-Image-Turbo – A distilled version of Z-Image that matches or exceeds leading competitors with only 8 NFEs (Number of Function Evaluations). It offers ⚡️sub-second inference latency⚡️ on enterprise-grade H800 GPUs and fits comfortably within 16G VRAM consumer devices. It excels in photorealistic image generation, bilingual text rendering (English & Chinese), and robust instruction adherence.
 

From 3ff17802473ba70896a85da497a60f07c1bb067f Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?=C3=81lvaro=20Somoza?= <asomoza@users.noreply.github.com>
Date: Thu, 4 Dec 2025 20:33:24 -0300
Subject: [PATCH 7/7] Update docs/source/en/api/pipelines/z_image.md

Co-authored-by: Steven Liu <59462357+stevhliu@users.noreply.github.com>
---
 docs/source/en/api/pipelines/z_image.md | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/docs/source/en/api/pipelines/z_image.md b/docs/source/en/api/pipelines/z_image.md
index b9377cbbfcb3..224db7ca01af 100644
--- a/docs/source/en/api/pipelines/z_image.md
+++ b/docs/source/en/api/pipelines/z_image.md
@@ -24,7 +24,7 @@ specific language governing permissions and limitations under the License.
 
 ## Z-Image-Turbo
 
-Z-Image-Turbo – A distilled version of Z-Image that matches or exceeds leading competitors with only 8 NFEs (Number of Function Evaluations). It offers ⚡️sub-second inference latency⚡️ on enterprise-grade H800 GPUs and fits comfortably within 16G VRAM consumer devices. It excels in photorealistic image generation, bilingual text rendering (English & Chinese), and robust instruction adherence.
+Z-Image-Turbo is a distilled version of Z-Image that matches or exceeds leading competitors with only 8 NFEs (Number of Function Evaluations). It offers sub-second inference latency on enterprise-grade H800 GPUs and fits comfortably within 16G VRAM consumer devices. It excels in photorealistic image generation, bilingual text rendering (English & Chinese), and robust instruction adherence.
 
 ## ZImagePipeline