From 471e5c1e881627e65bfac489b7f95052095660a3 Mon Sep 17 00:00:00 2001 From: Aflalo Date: Sun, 18 Jun 2023 04:52:29 -0700 Subject: [PATCH 1/8] fixed typo --- .../stable_diffusion/pipeline_stable_diffusion_ldm3d.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_ldm3d.py b/src/diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_ldm3d.py index c804d2f1918b..2df9c46f0be3 100644 --- a/src/diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_ldm3d.py +++ b/src/diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_ldm3d.py @@ -49,7 +49,7 @@ >>> pipe = pipe.to("cuda") >>> prompt = "a photo of an astronaut riding a horse on mars" - >>> output = pipe_ldm3d(prompt) + >>> output = pipe(prompt) >>> rgb_image, depth_image = output.rgb, output.depth ``` """ From 7f5edc50a603f771d55a4d6283aea7280262a8de Mon Sep 17 00:00:00 2001 From: Aflalo Date: Mon, 19 Jun 2023 01:13:23 -0700 Subject: [PATCH 2/8] updated doc to be consistent in naming --- .../en/api/pipelines/stable_diffusion/ldm3d_diffusion.mdx | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/docs/source/en/api/pipelines/stable_diffusion/ldm3d_diffusion.mdx b/docs/source/en/api/pipelines/stable_diffusion/ldm3d_diffusion.mdx index ca5798d93a8e..79aed8c1c278 100644 --- a/docs/source/en/api/pipelines/stable_diffusion/ldm3d_diffusion.mdx +++ b/docs/source/en/api/pipelines/stable_diffusion/ldm3d_diffusion.mdx @@ -35,9 +35,9 @@ Running LDM3D is straighforward with the [`StableDiffusionLDM3DPipeline`]: ```python >>> from diffusers import StableDiffusionLDM3DPipeline ->>> pipe_ldm3d = StableDiffusionLDM3DPipeline.from_pretrained("Intel/ldm3d") +>>> pipe= StableDiffusionLDM3DPipeline.from_pretrained("Intel/ldm3d") prompt ="A picture of some lemons on a table" -output = pipe_ldm3d(prompt) +output = pipe(prompt) rgb_image, depth_image = output.rgb, output.depth rgb_image[0].save("lemons_ldm3d_rgb.jpg") depth_image[0].save("lemons_ldm3d_depth.png") From 5dddd0ea155b8497170ec35cf88aa80f9b9b9462 Mon Sep 17 00:00:00 2001 From: Aflalo Date: Mon, 19 Jun 2023 01:16:51 -0700 Subject: [PATCH 3/8] make style/quality --- .../en/api/pipelines/stable_diffusion/ldm3d_diffusion.mdx | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/source/en/api/pipelines/stable_diffusion/ldm3d_diffusion.mdx b/docs/source/en/api/pipelines/stable_diffusion/ldm3d_diffusion.mdx index 79aed8c1c278..d311fdb5f4f6 100644 --- a/docs/source/en/api/pipelines/stable_diffusion/ldm3d_diffusion.mdx +++ b/docs/source/en/api/pipelines/stable_diffusion/ldm3d_diffusion.mdx @@ -35,7 +35,7 @@ Running LDM3D is straighforward with the [`StableDiffusionLDM3DPipeline`]: ```python >>> from diffusers import StableDiffusionLDM3DPipeline ->>> pipe= StableDiffusionLDM3DPipeline.from_pretrained("Intel/ldm3d") +>>> pipe = StableDiffusionLDM3DPipeline.from_pretrained("Intel/ldm3d") prompt ="A picture of some lemons on a table" output = pipe(prompt) rgb_image, depth_image = output.rgb, output.depth From ec18756f97835bb4ac407afae28be4b7a5ff5bde Mon Sep 17 00:00:00 2001 From: Aflalo Date: Thu, 22 Jun 2023 06:54:09 -0700 Subject: [PATCH 4/8] preprocessing for 4 channels and not 6 --- src/diffusers/image_processor.py | 13 ++++++++----- 1 file changed, 8 insertions(+), 5 deletions(-) diff --git a/src/diffusers/image_processor.py b/src/diffusers/image_processor.py index 4f3c61208539..0f712c399f78 100644 --- a/src/diffusers/image_processor.py +++ b/src/diffusers/image_processor.py @@ -314,12 +314,15 @@ def numpy_to_depth(self, images): """ if images.ndim == 3: images = images[None, ...] - images = (images * 255).round().astype("uint8") - if images.shape[-1] == 1: - # special case for grayscale (single channel) images - raise Exception("Not supported") + images_depth = images[:, :, :, 3:] + if images.shape[-1] == 6: + images_depth = (images_depth * 255).round().astype("uint8") + pil_images = [Image.fromarray(self.rgblike_to_depthmap(image_depth), mode="I;16") for image_depth in images_depth] + elif images.shape[-1] == 4: + images_depth = (images_depth * 65535.0).astype(np.uint16) + pil_images = [Image.fromarray(image_depth, mode="I;16") for image_depth in images_depth] else: - pil_images = [Image.fromarray(self.rgblike_to_depthmap(image[:, :, 3:]), mode="I;16") for image in images] + raise Exception("Not supported") return pil_images From b6f1a7f6c6e5edf7f604126d2a2d15a05603f649 Mon Sep 17 00:00:00 2001 From: Aflalo Date: Thu, 22 Jun 2023 06:55:00 -0700 Subject: [PATCH 5/8] make style --- src/diffusers/image_processor.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/src/diffusers/image_processor.py b/src/diffusers/image_processor.py index 0f712c399f78..b7ebef1cb20c 100644 --- a/src/diffusers/image_processor.py +++ b/src/diffusers/image_processor.py @@ -317,7 +317,9 @@ def numpy_to_depth(self, images): images_depth = images[:, :, :, 3:] if images.shape[-1] == 6: images_depth = (images_depth * 255).round().astype("uint8") - pil_images = [Image.fromarray(self.rgblike_to_depthmap(image_depth), mode="I;16") for image_depth in images_depth] + pil_images = [ + Image.fromarray(self.rgblike_to_depthmap(image_depth), mode="I;16") for image_depth in images_depth + ] elif images.shape[-1] == 4: images_depth = (images_depth * 65535.0).astype(np.uint16) pil_images = [Image.fromarray(image_depth, mode="I;16") for image_depth in images_depth] From d7348d6b2573b806f655f6d7b92177e37e88de22 Mon Sep 17 00:00:00 2001 From: Aflalo Date: Mon, 26 Jun 2023 02:53:35 -0700 Subject: [PATCH 6/8] test for 4c --- src/diffusers/image_processor.py | 6 +++- .../test_stable_diffusion_ldm3d.py | 32 +++++++++++++++---- 2 files changed, 31 insertions(+), 7 deletions(-) diff --git a/src/diffusers/image_processor.py b/src/diffusers/image_processor.py index 313923cb975c..78470e751e12 100644 --- a/src/diffusers/image_processor.py +++ b/src/diffusers/image_processor.py @@ -354,7 +354,11 @@ def postprocess( image = self.pt_to_numpy(image) if output_type == "np": - return image[:, :, :, :3], np.stack([self.rgblike_to_depthmap(im[:, :, 3:]) for im in image], axis=0) + if image.shape[-1]==6: + image_depth = np.stack([self.rgblike_to_depthmap(im[:, :, 3:]) for im in image], axis=0) + else: + image_depth = image[:, :, :, 3:] + return image[:, :, :, :3], image_depth if output_type == "pil": return self.numpy_to_pil(image), self.numpy_to_depth(image) diff --git a/tests/pipelines/stable_diffusion/test_stable_diffusion_ldm3d.py b/tests/pipelines/stable_diffusion/test_stable_diffusion_ldm3d.py index 933e4307a41b..16e22d14ca26 100644 --- a/tests/pipelines/stable_diffusion/test_stable_diffusion_ldm3d.py +++ b/tests/pipelines/stable_diffusion/test_stable_diffusion_ldm3d.py @@ -130,9 +130,9 @@ def test_stable_diffusion_ddim(self): assert depth.shape == (1, 64, 64) expected_slice_rgb = np.array( - [0.37301102, 0.7023895, 0.7418312, 0.5163375, 0.5825485, 0.60929704, 0.4188174, 0.48407027, 0.46555096] + [0.37350047, 0.70260847, 0.74218935, 0.5166996 , 0.582674 , 0.6094541 , 0.41805607, 0.48368582, 0.46530965] ) - expected_slice_depth = np.array([103.4673, 85.81202, 87.84926]) + expected_slice_depth = np.array([103.46226 , 85.828445, 87.86833]) assert np.abs(image_slice_rgb.flatten() - expected_slice_rgb).max() < 1e-2 assert np.abs(image_slice_depth.flatten() - expected_slice_depth).max() < 1e-2 @@ -280,10 +280,30 @@ def test_ldm3d(self): output = ldm3d_pipe(**inputs) rgb, depth = output.rgb, output.depth - expected_rgb_mean = 0.54461557 - expected_rgb_std = 0.2806707 - expected_depth_mean = 143.64595 - expected_depth_std = 83.491776 + expected_rgb_mean = 0.495586 + expected_rgb_std = 0.33795515 + expected_depth_mean = 112.48518 + expected_depth_std = 98.489746 + assert np.abs(expected_rgb_mean - rgb.mean()) < 1e-3 + assert np.abs(expected_rgb_std - rgb.std()) < 1e-3 + assert np.abs(expected_depth_mean - depth.mean()) < 1e-3 + assert np.abs(expected_depth_std - depth.std()) < 1e-3 + + def test_ldm3d_v2(self): + ldm3d_pipe = StableDiffusionLDM3DPipeline.from_pretrained("Intel/ldm3d-4c").to(torch_device) + ldm3d_pipe.set_progress_bar_config(disable=None) + + inputs = self.get_inputs(torch_device) + output = ldm3d_pipe(**inputs) + rgb, depth = output.rgb, output.depth + + expected_rgb_mean = 0.4194127 + expected_rgb_std = 0.35375586 + expected_depth_mean = 0.5638502 + expected_depth_std = 0.34686103 + + assert rgb.shape == (1, 512, 512, 3) + assert depth.shape == (1, 512, 512, 1) assert np.abs(expected_rgb_mean - rgb.mean()) < 1e-3 assert np.abs(expected_rgb_std - rgb.std()) < 1e-3 assert np.abs(expected_depth_mean - depth.mean()) < 1e-3 From ef923aa699aa8cb7ee4452f207d82cb3f7f035c9 Mon Sep 17 00:00:00 2001 From: Aflalo Date: Mon, 26 Jun 2023 02:54:59 -0700 Subject: [PATCH 7/8] make style/quality --- src/diffusers/image_processor.py | 4 ++-- .../stable_diffusion/test_stable_diffusion_ldm3d.py | 6 +++--- 2 files changed, 5 insertions(+), 5 deletions(-) diff --git a/src/diffusers/image_processor.py b/src/diffusers/image_processor.py index 78470e751e12..6ccf9b465ebd 100644 --- a/src/diffusers/image_processor.py +++ b/src/diffusers/image_processor.py @@ -354,10 +354,10 @@ def postprocess( image = self.pt_to_numpy(image) if output_type == "np": - if image.shape[-1]==6: + if image.shape[-1] == 6: image_depth = np.stack([self.rgblike_to_depthmap(im[:, :, 3:]) for im in image], axis=0) else: - image_depth = image[:, :, :, 3:] + image_depth = image[:, :, :, 3:] return image[:, :, :, :3], image_depth if output_type == "pil": diff --git a/tests/pipelines/stable_diffusion/test_stable_diffusion_ldm3d.py b/tests/pipelines/stable_diffusion/test_stable_diffusion_ldm3d.py index 16e22d14ca26..91272c58543a 100644 --- a/tests/pipelines/stable_diffusion/test_stable_diffusion_ldm3d.py +++ b/tests/pipelines/stable_diffusion/test_stable_diffusion_ldm3d.py @@ -130,9 +130,9 @@ def test_stable_diffusion_ddim(self): assert depth.shape == (1, 64, 64) expected_slice_rgb = np.array( - [0.37350047, 0.70260847, 0.74218935, 0.5166996 , 0.582674 , 0.6094541 , 0.41805607, 0.48368582, 0.46530965] + [0.37350047, 0.70260847, 0.74218935, 0.5166996, 0.582674, 0.6094541, 0.41805607, 0.48368582, 0.46530965] ) - expected_slice_depth = np.array([103.46226 , 85.828445, 87.86833]) + expected_slice_depth = np.array([103.46226, 85.828445, 87.86833]) assert np.abs(image_slice_rgb.flatten() - expected_slice_rgb).max() < 1e-2 assert np.abs(image_slice_depth.flatten() - expected_slice_depth).max() < 1e-2 @@ -296,7 +296,7 @@ def test_ldm3d_v2(self): inputs = self.get_inputs(torch_device) output = ldm3d_pipe(**inputs) rgb, depth = output.rgb, output.depth - + expected_rgb_mean = 0.4194127 expected_rgb_std = 0.35375586 expected_depth_mean = 0.5638502 From 3b242c3aa249fab51960b63dc0a137f7d5b5ee82 Mon Sep 17 00:00:00 2001 From: Aflalo Date: Thu, 29 Jun 2023 01:02:16 -0700 Subject: [PATCH 8/8] fixed test on cpu --- .../pipelines/stable_diffusion/test_stable_diffusion_ldm3d.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tests/pipelines/stable_diffusion/test_stable_diffusion_ldm3d.py b/tests/pipelines/stable_diffusion/test_stable_diffusion_ldm3d.py index 91272c58543a..e2164e8117ad 100644 --- a/tests/pipelines/stable_diffusion/test_stable_diffusion_ldm3d.py +++ b/tests/pipelines/stable_diffusion/test_stable_diffusion_ldm3d.py @@ -130,9 +130,9 @@ def test_stable_diffusion_ddim(self): assert depth.shape == (1, 64, 64) expected_slice_rgb = np.array( - [0.37350047, 0.70260847, 0.74218935, 0.5166996, 0.582674, 0.6094541, 0.41805607, 0.48368582, 0.46530965] + [0.37338176, 0.70247, 0.74203193, 0.51643604, 0.58256793, 0.60932136, 0.4181095, 0.48355877, 0.46535262] ) - expected_slice_depth = np.array([103.46226, 85.828445, 87.86833]) + expected_slice_depth = np.array([103.46727, 85.812004, 87.849236]) assert np.abs(image_slice_rgb.flatten() - expected_slice_rgb).max() < 1e-2 assert np.abs(image_slice_depth.flatten() - expected_slice_depth).max() < 1e-2