From 471e5c1e881627e65bfac489b7f95052095660a3 Mon Sep 17 00:00:00 2001
From: Aflalo <estellea@isl-iam1.rr.intel.com>
Date: Sun, 18 Jun 2023 04:52:29 -0700
Subject: [PATCH 1/8] fixed typo

---
 .../stable_diffusion/pipeline_stable_diffusion_ldm3d.py         | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_ldm3d.py b/src/diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_ldm3d.py
index c804d2f1918b..2df9c46f0be3 100644
--- a/src/diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_ldm3d.py
+++ b/src/diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_ldm3d.py
@@ -49,7 +49,7 @@
         >>> pipe = pipe.to("cuda")
 
         >>> prompt = "a photo of an astronaut riding a horse on mars"
-        >>> output = pipe_ldm3d(prompt)
+        >>> output = pipe(prompt)
         >>> rgb_image, depth_image = output.rgb, output.depth
         ```
 """

From 7f5edc50a603f771d55a4d6283aea7280262a8de Mon Sep 17 00:00:00 2001
From: Aflalo <estellea@isl-iam1.rr.intel.com>
Date: Mon, 19 Jun 2023 01:13:23 -0700
Subject: [PATCH 2/8] updated doc to be consistent in naming

---
 .../en/api/pipelines/stable_diffusion/ldm3d_diffusion.mdx     | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/docs/source/en/api/pipelines/stable_diffusion/ldm3d_diffusion.mdx b/docs/source/en/api/pipelines/stable_diffusion/ldm3d_diffusion.mdx
index ca5798d93a8e..79aed8c1c278 100644
--- a/docs/source/en/api/pipelines/stable_diffusion/ldm3d_diffusion.mdx
+++ b/docs/source/en/api/pipelines/stable_diffusion/ldm3d_diffusion.mdx
@@ -35,9 +35,9 @@ Running LDM3D is straighforward with the [`StableDiffusionLDM3DPipeline`]:
 ```python
 >>> from diffusers import StableDiffusionLDM3DPipeline
 
->>> pipe_ldm3d = StableDiffusionLDM3DPipeline.from_pretrained("Intel/ldm3d")
+>>> pipe= StableDiffusionLDM3DPipeline.from_pretrained("Intel/ldm3d")
 prompt ="A picture of some lemons on a table"
-output = pipe_ldm3d(prompt)
+output = pipe(prompt)
 rgb_image, depth_image = output.rgb, output.depth
 rgb_image[0].save("lemons_ldm3d_rgb.jpg")
 depth_image[0].save("lemons_ldm3d_depth.png")

From 5dddd0ea155b8497170ec35cf88aa80f9b9b9462 Mon Sep 17 00:00:00 2001
From: Aflalo <estellea@isl-iam1.rr.intel.com>
Date: Mon, 19 Jun 2023 01:16:51 -0700
Subject: [PATCH 3/8] make style/quality

---
 .../en/api/pipelines/stable_diffusion/ldm3d_diffusion.mdx       | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/docs/source/en/api/pipelines/stable_diffusion/ldm3d_diffusion.mdx b/docs/source/en/api/pipelines/stable_diffusion/ldm3d_diffusion.mdx
index 79aed8c1c278..d311fdb5f4f6 100644
--- a/docs/source/en/api/pipelines/stable_diffusion/ldm3d_diffusion.mdx
+++ b/docs/source/en/api/pipelines/stable_diffusion/ldm3d_diffusion.mdx
@@ -35,7 +35,7 @@ Running LDM3D is straighforward with the [`StableDiffusionLDM3DPipeline`]:
 ```python
 >>> from diffusers import StableDiffusionLDM3DPipeline
 
->>> pipe= StableDiffusionLDM3DPipeline.from_pretrained("Intel/ldm3d")
+>>> pipe = StableDiffusionLDM3DPipeline.from_pretrained("Intel/ldm3d")
 prompt ="A picture of some lemons on a table"
 output = pipe(prompt)
 rgb_image, depth_image = output.rgb, output.depth

From ec18756f97835bb4ac407afae28be4b7a5ff5bde Mon Sep 17 00:00:00 2001
From: Aflalo <estellea@isl-gpu33.rr.intel.com>
Date: Thu, 22 Jun 2023 06:54:09 -0700
Subject: [PATCH 4/8] preprocessing for 4 channels and not 6

---
 src/diffusers/image_processor.py | 13 ++++++++-----
 1 file changed, 8 insertions(+), 5 deletions(-)

diff --git a/src/diffusers/image_processor.py b/src/diffusers/image_processor.py
index 4f3c61208539..0f712c399f78 100644
--- a/src/diffusers/image_processor.py
+++ b/src/diffusers/image_processor.py
@@ -314,12 +314,15 @@ def numpy_to_depth(self, images):
         """
         if images.ndim == 3:
             images = images[None, ...]
-        images = (images * 255).round().astype("uint8")
-        if images.shape[-1] == 1:
-            # special case for grayscale (single channel) images
-            raise Exception("Not supported")
+        images_depth = images[:, :, :, 3:]
+        if images.shape[-1] == 6:
+            images_depth = (images_depth * 255).round().astype("uint8")
+            pil_images = [Image.fromarray(self.rgblike_to_depthmap(image_depth), mode="I;16") for image_depth in images_depth]
+        elif images.shape[-1] == 4:
+            images_depth = (images_depth * 65535.0).astype(np.uint16)
+            pil_images = [Image.fromarray(image_depth, mode="I;16") for image_depth in images_depth]
         else:
-            pil_images = [Image.fromarray(self.rgblike_to_depthmap(image[:, :, 3:]), mode="I;16") for image in images]
+            raise Exception("Not supported")
 
         return pil_images
 

From b6f1a7f6c6e5edf7f604126d2a2d15a05603f649 Mon Sep 17 00:00:00 2001
From: Aflalo <estellea@isl-gpu33.rr.intel.com>
Date: Thu, 22 Jun 2023 06:55:00 -0700
Subject: [PATCH 5/8] make style

---
 src/diffusers/image_processor.py | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/src/diffusers/image_processor.py b/src/diffusers/image_processor.py
index 0f712c399f78..b7ebef1cb20c 100644
--- a/src/diffusers/image_processor.py
+++ b/src/diffusers/image_processor.py
@@ -317,7 +317,9 @@ def numpy_to_depth(self, images):
         images_depth = images[:, :, :, 3:]
         if images.shape[-1] == 6:
             images_depth = (images_depth * 255).round().astype("uint8")
-            pil_images = [Image.fromarray(self.rgblike_to_depthmap(image_depth), mode="I;16") for image_depth in images_depth]
+            pil_images = [
+                Image.fromarray(self.rgblike_to_depthmap(image_depth), mode="I;16") for image_depth in images_depth
+            ]
         elif images.shape[-1] == 4:
             images_depth = (images_depth * 65535.0).astype(np.uint16)
             pil_images = [Image.fromarray(image_depth, mode="I;16") for image_depth in images_depth]

From d7348d6b2573b806f655f6d7b92177e37e88de22 Mon Sep 17 00:00:00 2001
From: Aflalo <estellea@isl-gpu38.rr.intel.com>
Date: Mon, 26 Jun 2023 02:53:35 -0700
Subject: [PATCH 6/8] test for 4c

---
 src/diffusers/image_processor.py              |  6 +++-
 .../test_stable_diffusion_ldm3d.py            | 32 +++++++++++++++----
 2 files changed, 31 insertions(+), 7 deletions(-)

diff --git a/src/diffusers/image_processor.py b/src/diffusers/image_processor.py
index 313923cb975c..78470e751e12 100644
--- a/src/diffusers/image_processor.py
+++ b/src/diffusers/image_processor.py
@@ -354,7 +354,11 @@ def postprocess(
         image = self.pt_to_numpy(image)
 
         if output_type == "np":
-            return image[:, :, :, :3], np.stack([self.rgblike_to_depthmap(im[:, :, 3:]) for im in image], axis=0)
+            if image.shape[-1]==6:
+                image_depth = np.stack([self.rgblike_to_depthmap(im[:, :, 3:]) for im in image], axis=0)
+            else:
+                image_depth =  image[:, :, :, 3:] 
+            return image[:, :, :, :3], image_depth
 
         if output_type == "pil":
             return self.numpy_to_pil(image), self.numpy_to_depth(image)
diff --git a/tests/pipelines/stable_diffusion/test_stable_diffusion_ldm3d.py b/tests/pipelines/stable_diffusion/test_stable_diffusion_ldm3d.py
index 933e4307a41b..16e22d14ca26 100644
--- a/tests/pipelines/stable_diffusion/test_stable_diffusion_ldm3d.py
+++ b/tests/pipelines/stable_diffusion/test_stable_diffusion_ldm3d.py
@@ -130,9 +130,9 @@ def test_stable_diffusion_ddim(self):
         assert depth.shape == (1, 64, 64)
 
         expected_slice_rgb = np.array(
-            [0.37301102, 0.7023895, 0.7418312, 0.5163375, 0.5825485, 0.60929704, 0.4188174, 0.48407027, 0.46555096]
+            [0.37350047, 0.70260847, 0.74218935, 0.5166996 , 0.582674  , 0.6094541 , 0.41805607, 0.48368582, 0.46530965]
         )
-        expected_slice_depth = np.array([103.4673, 85.81202, 87.84926])
+        expected_slice_depth = np.array([103.46226 ,  85.828445,  87.86833])
 
         assert np.abs(image_slice_rgb.flatten() - expected_slice_rgb).max() < 1e-2
         assert np.abs(image_slice_depth.flatten() - expected_slice_depth).max() < 1e-2
@@ -280,10 +280,30 @@ def test_ldm3d(self):
         output = ldm3d_pipe(**inputs)
         rgb, depth = output.rgb, output.depth
 
-        expected_rgb_mean = 0.54461557
-        expected_rgb_std = 0.2806707
-        expected_depth_mean = 143.64595
-        expected_depth_std = 83.491776
+        expected_rgb_mean = 0.495586
+        expected_rgb_std = 0.33795515
+        expected_depth_mean = 112.48518
+        expected_depth_std = 98.489746
+        assert np.abs(expected_rgb_mean - rgb.mean()) < 1e-3
+        assert np.abs(expected_rgb_std - rgb.std()) < 1e-3
+        assert np.abs(expected_depth_mean - depth.mean()) < 1e-3
+        assert np.abs(expected_depth_std - depth.std()) < 1e-3
+
+    def test_ldm3d_v2(self):
+        ldm3d_pipe = StableDiffusionLDM3DPipeline.from_pretrained("Intel/ldm3d-4c").to(torch_device)
+        ldm3d_pipe.set_progress_bar_config(disable=None)
+
+        inputs = self.get_inputs(torch_device)
+        output = ldm3d_pipe(**inputs)
+        rgb, depth = output.rgb, output.depth
+        
+        expected_rgb_mean = 0.4194127
+        expected_rgb_std = 0.35375586
+        expected_depth_mean = 0.5638502
+        expected_depth_std = 0.34686103
+
+        assert rgb.shape == (1, 512, 512, 3)
+        assert depth.shape == (1, 512, 512, 1)
         assert np.abs(expected_rgb_mean - rgb.mean()) < 1e-3
         assert np.abs(expected_rgb_std - rgb.std()) < 1e-3
         assert np.abs(expected_depth_mean - depth.mean()) < 1e-3

From ef923aa699aa8cb7ee4452f207d82cb3f7f035c9 Mon Sep 17 00:00:00 2001
From: Aflalo <estellea@isl-gpu38.rr.intel.com>
Date: Mon, 26 Jun 2023 02:54:59 -0700
Subject: [PATCH 7/8] make style/quality

---
 src/diffusers/image_processor.py                            | 4 ++--
 .../stable_diffusion/test_stable_diffusion_ldm3d.py         | 6 +++---
 2 files changed, 5 insertions(+), 5 deletions(-)

diff --git a/src/diffusers/image_processor.py b/src/diffusers/image_processor.py
index 78470e751e12..6ccf9b465ebd 100644
--- a/src/diffusers/image_processor.py
+++ b/src/diffusers/image_processor.py
@@ -354,10 +354,10 @@ def postprocess(
         image = self.pt_to_numpy(image)
 
         if output_type == "np":
-            if image.shape[-1]==6:
+            if image.shape[-1] == 6:
                 image_depth = np.stack([self.rgblike_to_depthmap(im[:, :, 3:]) for im in image], axis=0)
             else:
-                image_depth =  image[:, :, :, 3:] 
+                image_depth = image[:, :, :, 3:]
             return image[:, :, :, :3], image_depth
 
         if output_type == "pil":
diff --git a/tests/pipelines/stable_diffusion/test_stable_diffusion_ldm3d.py b/tests/pipelines/stable_diffusion/test_stable_diffusion_ldm3d.py
index 16e22d14ca26..91272c58543a 100644
--- a/tests/pipelines/stable_diffusion/test_stable_diffusion_ldm3d.py
+++ b/tests/pipelines/stable_diffusion/test_stable_diffusion_ldm3d.py
@@ -130,9 +130,9 @@ def test_stable_diffusion_ddim(self):
         assert depth.shape == (1, 64, 64)
 
         expected_slice_rgb = np.array(
-            [0.37350047, 0.70260847, 0.74218935, 0.5166996 , 0.582674  , 0.6094541 , 0.41805607, 0.48368582, 0.46530965]
+            [0.37350047, 0.70260847, 0.74218935, 0.5166996, 0.582674, 0.6094541, 0.41805607, 0.48368582, 0.46530965]
         )
-        expected_slice_depth = np.array([103.46226 ,  85.828445,  87.86833])
+        expected_slice_depth = np.array([103.46226, 85.828445, 87.86833])
 
         assert np.abs(image_slice_rgb.flatten() - expected_slice_rgb).max() < 1e-2
         assert np.abs(image_slice_depth.flatten() - expected_slice_depth).max() < 1e-2
@@ -296,7 +296,7 @@ def test_ldm3d_v2(self):
         inputs = self.get_inputs(torch_device)
         output = ldm3d_pipe(**inputs)
         rgb, depth = output.rgb, output.depth
-        
+
         expected_rgb_mean = 0.4194127
         expected_rgb_std = 0.35375586
         expected_depth_mean = 0.5638502

From 3b242c3aa249fab51960b63dc0a137f7d5b5ee82 Mon Sep 17 00:00:00 2001
From: Aflalo <estellea@isl-iam1.rr.intel.com>
Date: Thu, 29 Jun 2023 01:02:16 -0700
Subject: [PATCH 8/8] fixed test on cpu

---
 .../pipelines/stable_diffusion/test_stable_diffusion_ldm3d.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/tests/pipelines/stable_diffusion/test_stable_diffusion_ldm3d.py b/tests/pipelines/stable_diffusion/test_stable_diffusion_ldm3d.py
index 91272c58543a..e2164e8117ad 100644
--- a/tests/pipelines/stable_diffusion/test_stable_diffusion_ldm3d.py
+++ b/tests/pipelines/stable_diffusion/test_stable_diffusion_ldm3d.py
@@ -130,9 +130,9 @@ def test_stable_diffusion_ddim(self):
         assert depth.shape == (1, 64, 64)
 
         expected_slice_rgb = np.array(
-            [0.37350047, 0.70260847, 0.74218935, 0.5166996, 0.582674, 0.6094541, 0.41805607, 0.48368582, 0.46530965]
+            [0.37338176, 0.70247, 0.74203193, 0.51643604, 0.58256793, 0.60932136, 0.4181095, 0.48355877, 0.46535262]
         )
-        expected_slice_depth = np.array([103.46226, 85.828445, 87.86833])
+        expected_slice_depth = np.array([103.46727, 85.812004, 87.849236])
 
         assert np.abs(image_slice_rgb.flatten() - expected_slice_rgb).max() < 1e-2
         assert np.abs(image_slice_depth.flatten() - expected_slice_depth).max() < 1e-2