From d65fbc2af36fccc309d8e0ea456e8490b9075ddd Mon Sep 17 00:00:00 2001 From: Mohammad Sadegh Salehi Date: Sun, 26 Oct 2025 19:48:59 +0000 Subject: [PATCH 1/3] Fix overflow in rgblike_to_depthmap by safe dtype casting (torch & NumPy) --- src/diffusers/image_processor.py | 39 ++++++++++++++++++++++++-------- 1 file changed, 30 insertions(+), 9 deletions(-) diff --git a/src/diffusers/image_processor.py b/src/diffusers/image_processor.py index 0e3082eada8a..db138ff8e01c 100644 --- a/src/diffusers/image_processor.py +++ b/src/diffusers/image_processor.py @@ -1045,16 +1045,37 @@ def depth_pil_to_numpy(images: Union[List[PIL.Image.Image], PIL.Image.Image]) -> def rgblike_to_depthmap(image: Union[np.ndarray, torch.Tensor]) -> Union[np.ndarray, torch.Tensor]: r""" Convert an RGB-like depth image to a depth map. - - Args: - image (`Union[np.ndarray, torch.Tensor]`): - The RGB-like depth image to convert. - - Returns: - `Union[np.ndarray, torch.Tensor]`: - The corresponding depth map. """ - return image[:, :, 1] * 2**8 + image[:, :, 2] + # 1. Cast the tensor to a larger integer type (e.g., int32) + # to safely perform the multiplication by 256. + # 2. Perform the 16-bit combination: High-byte * 256 + Low-byte. + # 3. Cast the final result to the desired depth map type (uint16) if needed + # before returning, though leaving it as int32/int64 is often safer + # for return value from a library function. + + if isinstance(image, torch.Tensor): + # Cast to a safe dtype (e.g., int32 or int64) for the calculation + image_safe = image.to(torch.int32) + + # Calculate the depth map + depth_map = image_safe[:, :, 1] * 256 + image_safe[:, :, 2] + + # You may want to cast the final result to uint16, but casting to a + # larger int type (like int32) is sufficient to fix the overflow. + # depth_map = depth_map.to(torch.uint16) # Uncomment if uint16 is strictly required + return depth_map + + elif isinstance(image, np.ndarray): + # NumPy equivalent: Cast to a safe dtype (e.g., np.int32) + image_safe = image.astype(np.int32) + + # Calculate the depth map + depth_map = image_safe[:, :, 1] * 256 + image_safe[:, :, 2] + + # depth_map = depth_map.astype(np.uint16) # Uncomment if uint16 is strictly required + return depth_map + else: + raise TypeError("Input image must be a torch.Tensor or np.ndarray") def numpy_to_depth(self, images: np.ndarray) -> List[PIL.Image.Image]: r""" From c3b6e6e95d3d8825a65d37f75e72d4a14a75777a Mon Sep 17 00:00:00 2001 From: Mohammad Sadegh Salehi <34940948+MohammadSadeghSalehi@users.noreply.github.com> Date: Thu, 6 Nov 2025 09:48:28 +0000 Subject: [PATCH 2/3] Fix: store original dtype and cast back after safe computation --- src/diffusers/image_processor.py | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/src/diffusers/image_processor.py b/src/diffusers/image_processor.py index db138ff8e01c..4c3138b8f29a 100644 --- a/src/diffusers/image_processor.py +++ b/src/diffusers/image_processor.py @@ -1055,6 +1055,7 @@ def rgblike_to_depthmap(image: Union[np.ndarray, torch.Tensor]) -> Union[np.ndar if isinstance(image, torch.Tensor): # Cast to a safe dtype (e.g., int32 or int64) for the calculation + original_dtype = image.dtype image_safe = image.to(torch.int32) # Calculate the depth map @@ -1063,17 +1064,18 @@ def rgblike_to_depthmap(image: Union[np.ndarray, torch.Tensor]) -> Union[np.ndar # You may want to cast the final result to uint16, but casting to a # larger int type (like int32) is sufficient to fix the overflow. # depth_map = depth_map.to(torch.uint16) # Uncomment if uint16 is strictly required - return depth_map + return depth_map.to(original_dtype) elif isinstance(image, np.ndarray): # NumPy equivalent: Cast to a safe dtype (e.g., np.int32) + original_dtype = image.dtype image_safe = image.astype(np.int32) # Calculate the depth map depth_map = image_safe[:, :, 1] * 256 + image_safe[:, :, 2] # depth_map = depth_map.astype(np.uint16) # Uncomment if uint16 is strictly required - return depth_map + return depth_map.astype(original_dtype) else: raise TypeError("Input image must be a torch.Tensor or np.ndarray") From 1c8b5dc36cbd727e3e7628e620fbc027494a92af Mon Sep 17 00:00:00 2001 From: "github-actions[bot]" Date: Thu, 6 Nov 2025 16:16:21 +0000 Subject: [PATCH 3/3] Apply style fixes --- src/diffusers/image_processor.py | 20 ++++++++++---------- 1 file changed, 10 insertions(+), 10 deletions(-) diff --git a/src/diffusers/image_processor.py b/src/diffusers/image_processor.py index 4c3138b8f29a..067d876ffcd8 100644 --- a/src/diffusers/image_processor.py +++ b/src/diffusers/image_processor.py @@ -1049,31 +1049,31 @@ def rgblike_to_depthmap(image: Union[np.ndarray, torch.Tensor]) -> Union[np.ndar # 1. Cast the tensor to a larger integer type (e.g., int32) # to safely perform the multiplication by 256. # 2. Perform the 16-bit combination: High-byte * 256 + Low-byte. - # 3. Cast the final result to the desired depth map type (uint16) if needed - # before returning, though leaving it as int32/int64 is often safer + # 3. Cast the final result to the desired depth map type (uint16) if needed + # before returning, though leaving it as int32/int64 is often safer # for return value from a library function. - + if isinstance(image, torch.Tensor): # Cast to a safe dtype (e.g., int32 or int64) for the calculation original_dtype = image.dtype - image_safe = image.to(torch.int32) - + image_safe = image.to(torch.int32) + # Calculate the depth map depth_map = image_safe[:, :, 1] * 256 + image_safe[:, :, 2] - - # You may want to cast the final result to uint16, but casting to a + + # You may want to cast the final result to uint16, but casting to a # larger int type (like int32) is sufficient to fix the overflow. # depth_map = depth_map.to(torch.uint16) # Uncomment if uint16 is strictly required return depth_map.to(original_dtype) - + elif isinstance(image, np.ndarray): # NumPy equivalent: Cast to a safe dtype (e.g., np.int32) original_dtype = image.dtype image_safe = image.astype(np.int32) - + # Calculate the depth map depth_map = image_safe[:, :, 1] * 256 + image_safe[:, :, 2] - + # depth_map = depth_map.astype(np.uint16) # Uncomment if uint16 is strictly required return depth_map.astype(original_dtype) else: