From 8425cd48774e41c95fa0a2378c3033916aeccbbb Mon Sep 17 00:00:00 2001 From: hi-sushanta Date: Thu, 5 Oct 2023 06:34:18 +0530 Subject: [PATCH 01/15] I added a new doc string to the class. This is more flexible to understanding other developers what are doing and where it's using. --- src/diffusers/models/unet_2d_blocks.py | 36 ++++++++++++++++++++++++++ 1 file changed, 36 insertions(+) diff --git a/src/diffusers/models/unet_2d_blocks.py b/src/diffusers/models/unet_2d_blocks.py index 8aebb3aad615..31a6a5241e09 100644 --- a/src/diffusers/models/unet_2d_blocks.py +++ b/src/diffusers/models/unet_2d_blocks.py @@ -453,6 +453,42 @@ def get_up_block( class AutoencoderTinyBlock(nn.Module): + """ + It's tiny autoencoder block. The block consists of three + convolutional layers with ReLU activation, followed by a + skip connection and a final ReLU activation. + + The skip connection helps to preserve the spatial information of the input signal, which + can be beneficial for autoencoders that are used for image compression or denoising. + This block is designed to be used in autoencoder architectures that are lightweight and + fast to train. + + Parameters: + * `in_channels:` The number of input channels. + * `out_channels:` The number of output channels. + * `act_fn:` The activation function to use. Supported values are `relu`, `tanh`, and `sigmoid`. + + Example: + ```python + >>> # Define an autoencoder architecture using the AutoencoderTinyBlock. + >>> model = nn.Sequential( + >>> AutoencoderTinyBlock(3, 64, "relu"), + >>> AutoencoderTinyBlock(64, 128, "relu"), + >>> AutoencoderTinyBlock(128, 256, "relu"), + >>> ) + + >>> # Train the model on a dataset of images. + >>> # ... + + >>> # Use the model to reconstruct an image from a noisy input. + >>> noisy_image = ... + >>> reconstructed_image = model(noisy_image) + ``` + + Output: + A tensor with the same shape as the input tensor, but with the number of channels equal to `out_channels`. + """ + def __init__(self, in_channels: int, out_channels: int, act_fn: str): super().__init__() act_fn = get_activation(act_fn) From 746a8e8e51efef6c73c34153c0585391ad000391 Mon Sep 17 00:00:00 2001 From: Chi Date: Sat, 7 Oct 2023 05:54:19 +0530 Subject: [PATCH 02/15] Update src/diffusers/models/unet_2d_blocks.py This changes suggest by maintener. Co-authored-by: Sayak Paul --- src/diffusers/models/unet_2d_blocks.py | 16 ---------------- 1 file changed, 16 deletions(-) diff --git a/src/diffusers/models/unet_2d_blocks.py b/src/diffusers/models/unet_2d_blocks.py index 6cd11c29ae11..5a109e338315 100644 --- a/src/diffusers/models/unet_2d_blocks.py +++ b/src/diffusers/models/unet_2d_blocks.py @@ -481,22 +481,6 @@ class AutoencoderTinyBlock(nn.Module): * `out_channels:` The number of output channels. * `act_fn:` The activation function to use. Supported values are `relu`, `tanh`, and `sigmoid`. - Example: - ```python - >>> # Define an autoencoder architecture using the AutoencoderTinyBlock. - >>> model = nn.Sequential( - >>> AutoencoderTinyBlock(3, 64, "relu"), - >>> AutoencoderTinyBlock(64, 128, "relu"), - >>> AutoencoderTinyBlock(128, 256, "relu"), - >>> ) - - >>> # Train the model on a dataset of images. - >>> # ... - - >>> # Use the model to reconstruct an image from a noisy input. - >>> noisy_image = ... - >>> reconstructed_image = model(noisy_image) - ``` Output: A tensor with the same shape as the input tensor, but with the number of channels equal to `out_channels`. From 6e5688647b0ba681f4b47e1a96fabe6ef733ff8e Mon Sep 17 00:00:00 2001 From: Chi Date: Sat, 7 Oct 2023 17:16:49 +0530 Subject: [PATCH 03/15] Update src/diffusers/models/unet_2d_blocks.py Add suggested text Co-authored-by: Sayak Paul --- src/diffusers/models/unet_2d_blocks.py | 9 +-------- 1 file changed, 1 insertion(+), 8 deletions(-) diff --git a/src/diffusers/models/unet_2d_blocks.py b/src/diffusers/models/unet_2d_blocks.py index 5a109e338315..f4ef776d1c8a 100644 --- a/src/diffusers/models/unet_2d_blocks.py +++ b/src/diffusers/models/unet_2d_blocks.py @@ -467,14 +467,7 @@ def get_up_block( class AutoencoderTinyBlock(nn.Module): """ - It's tiny autoencoder block. The block consists of three - convolutional layers with ReLU activation, followed by a - skip connection and a final ReLU activation. - - The skip connection helps to preserve the spatial information of the input signal, which - can be beneficial for autoencoders that are used for image compression or denoising. - This block is designed to be used in autoencoder architectures that are lightweight and - fast to train. + Tiny Autoencoder block used in [`AutoencoderTiny`]. It is a mini residual module consisting of plain conv + ReLU blocks. Parameters: * `in_channels:` The number of input channels. From 627fd9fee2c0074c1ce2ca68de7afecc2f7a6b38 Mon Sep 17 00:00:00 2001 From: Chi Date: Sat, 7 Oct 2023 20:44:27 +0530 Subject: [PATCH 04/15] Update unet_2d_blocks.py I changed the Parameter to Args text. --- src/diffusers/models/unet_2d_blocks.py | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) diff --git a/src/diffusers/models/unet_2d_blocks.py b/src/diffusers/models/unet_2d_blocks.py index f4ef776d1c8a..b4fc95afd083 100644 --- a/src/diffusers/models/unet_2d_blocks.py +++ b/src/diffusers/models/unet_2d_blocks.py @@ -469,11 +469,10 @@ class AutoencoderTinyBlock(nn.Module): """ Tiny Autoencoder block used in [`AutoencoderTiny`]. It is a mini residual module consisting of plain conv + ReLU blocks. - Parameters: - * `in_channels:` The number of input channels. - * `out_channels:` The number of output channels. - * `act_fn:` The activation function to use. Supported values are `relu`, `tanh`, and `sigmoid`. - + Args: + in_channels (`int`): The number of input channels. + out_channels (`int`): The number of output channels. + act_fn (`str`):` The activation function to use. Supported values are `relu`, `tanh`, and `sigmoid`. Output: A tensor with the same shape as the input tensor, but with the number of channels equal to `out_channels`. From ae4f7f272c902177ad736144dd03132cdc2f1c00 Mon Sep 17 00:00:00 2001 From: Chi Date: Sun, 8 Oct 2023 19:53:14 +0530 Subject: [PATCH 05/15] Update unet_2d_blocks.py proper indentation set in this file. --- src/diffusers/models/unet_2d_blocks.py | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/src/diffusers/models/unet_2d_blocks.py b/src/diffusers/models/unet_2d_blocks.py index b4fc95afd083..28adb7ec6d1e 100644 --- a/src/diffusers/models/unet_2d_blocks.py +++ b/src/diffusers/models/unet_2d_blocks.py @@ -467,15 +467,15 @@ def get_up_block( class AutoencoderTinyBlock(nn.Module): """ - Tiny Autoencoder block used in [`AutoencoderTiny`]. It is a mini residual module consisting of plain conv + ReLU blocks. + Tiny Autoencoder block used in [`AutoencoderTiny`]. It is a mini residual module consisting of plain conv + ReLU blocks. - Args: - in_channels (`int`): The number of input channels. - out_channels (`int`): The number of output channels. - act_fn (`str`):` The activation function to use. Supported values are `relu`, `tanh`, and `sigmoid`. + Args: + in_channels (`int`): The number of input channels. + out_channels (`int`): The number of output channels. + act_fn (`str`): The activation function to use. Supported values are `relu`, `tanh`, and `sigmoid`. - Output: - A tensor with the same shape as the input tensor, but with the number of channels equal to `out_channels`. + Returns: + `torch.FloatTensor`: A tensor with the same shape as the input tensor, but with the number of channels equal to `out_channels`. """ def __init__(self, in_channels: int, out_channels: int, act_fn: str): From f0bea430b8104f3a21ec1c7b8849aafd8463b26b Mon Sep 17 00:00:00 2001 From: Chi Date: Sun, 8 Oct 2023 20:03:51 +0530 Subject: [PATCH 06/15] Update unet_2d_blocks.py a little bit of change in the act_fun argument line. --- src/diffusers/models/unet_2d_blocks.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/diffusers/models/unet_2d_blocks.py b/src/diffusers/models/unet_2d_blocks.py index 28adb7ec6d1e..d0342aac93af 100644 --- a/src/diffusers/models/unet_2d_blocks.py +++ b/src/diffusers/models/unet_2d_blocks.py @@ -472,7 +472,7 @@ class AutoencoderTinyBlock(nn.Module): Args: in_channels (`int`): The number of input channels. out_channels (`int`): The number of output channels. - act_fn (`str`): The activation function to use. Supported values are `relu`, `tanh`, and `sigmoid`. + act_fn (`str`):` The activation function to use. Supported values are `"swish"`, `"mish"`, `"gelu"`, and `"relu"`. Returns: `torch.FloatTensor`: A tensor with the same shape as the input tensor, but with the number of channels equal to `out_channels`. From 3546f6d417565d16fe1692d961e9794254dff641 Mon Sep 17 00:00:00 2001 From: hi-sushanta Date: Mon, 9 Oct 2023 19:27:39 +0530 Subject: [PATCH 07/15] I run the black command to reformat style in the code --- src/diffusers/models/unet_2d_blocks.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/diffusers/models/unet_2d_blocks.py b/src/diffusers/models/unet_2d_blocks.py index d0342aac93af..18b0e41af738 100644 --- a/src/diffusers/models/unet_2d_blocks.py +++ b/src/diffusers/models/unet_2d_blocks.py @@ -467,13 +467,13 @@ def get_up_block( class AutoencoderTinyBlock(nn.Module): """ - Tiny Autoencoder block used in [`AutoencoderTiny`]. It is a mini residual module consisting of plain conv + ReLU blocks. + Tiny Autoencoder block used in [`AutoencoderTiny`]. It is a mini residual module consisting of plain conv + ReLU blocks. Args: in_channels (`int`): The number of input channels. out_channels (`int`): The number of output channels. act_fn (`str`):` The activation function to use. Supported values are `"swish"`, `"mish"`, `"gelu"`, and `"relu"`. - + Returns: `torch.FloatTensor`: A tensor with the same shape as the input tensor, but with the number of channels equal to `out_channels`. """ From 01a9fc96188f3f769c6bc3ace41e092ed2a9c67b Mon Sep 17 00:00:00 2001 From: Chi Date: Fri, 13 Oct 2023 21:17:00 +0530 Subject: [PATCH 08/15] Update unet_2d_blocks.py similar doc-string add to have in the original diffusion repository. --- src/diffusers/models/unet_2d_blocks.py | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/src/diffusers/models/unet_2d_blocks.py b/src/diffusers/models/unet_2d_blocks.py index 18b0e41af738..d57949976d30 100644 --- a/src/diffusers/models/unet_2d_blocks.py +++ b/src/diffusers/models/unet_2d_blocks.py @@ -467,15 +467,18 @@ def get_up_block( class AutoencoderTinyBlock(nn.Module): """ - Tiny Autoencoder block used in [`AutoencoderTiny`]. It is a mini residual module consisting of plain conv + ReLU blocks. + Tiny Autoencoder block used in [`AutoencoderTiny`]. It is a mini residual module consisting of plain conv + ReLU + blocks. Args: in_channels (`int`): The number of input channels. out_channels (`int`): The number of output channels. - act_fn (`str`):` The activation function to use. Supported values are `"swish"`, `"mish"`, `"gelu"`, and `"relu"`. + act_fn (`str`): + ` The activation function to use. Supported values are `"swish"`, `"mish"`, `"gelu"`, and `"relu"`. Returns: - `torch.FloatTensor`: A tensor with the same shape as the input tensor, but with the number of channels equal to `out_channels`. + `torch.FloatTensor`: A tensor with the same shape as the input tensor, but with the number of channels equal to + `out_channels`. """ def __init__(self, in_channels: int, out_channels: int, act_fn: str): From eef06f43ddb2e37c4fffc1c2a4e5a29e93786269 Mon Sep 17 00:00:00 2001 From: hi-sushanta Date: Sat, 30 Dec 2023 06:41:49 +0530 Subject: [PATCH 09/15] Batter way to write binarize function --- src/diffusers/image_processor.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/src/diffusers/image_processor.py b/src/diffusers/image_processor.py index ab96384fe9f1..b3c85bef5252 100644 --- a/src/diffusers/image_processor.py +++ b/src/diffusers/image_processor.py @@ -258,8 +258,7 @@ def binarize(self, image: PIL.Image.Image) -> PIL.Image.Image: `PIL.Image.Image`: The binarized image. Values less than 0.5 are set to 0, values greater than 0.5 are set to 1. """ - image[image < 0.5] = 0 - image[image >= 0.5] = 1 + image.point(lambda p: 0 if p < 0.5 else 1, '1') return image def preprocess( From 2dc1f64091c5a4e20ed415bb4fa64e6942953e10 Mon Sep 17 00:00:00 2001 From: hi-sushanta Date: Sat, 30 Dec 2023 06:57:11 +0530 Subject: [PATCH 10/15] Solve check_code_quality error --- src/diffusers/image_processor.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/diffusers/image_processor.py b/src/diffusers/image_processor.py index b3c85bef5252..45e339bedc53 100644 --- a/src/diffusers/image_processor.py +++ b/src/diffusers/image_processor.py @@ -258,7 +258,7 @@ def binarize(self, image: PIL.Image.Image) -> PIL.Image.Image: `PIL.Image.Image`: The binarized image. Values less than 0.5 are set to 0, values greater than 0.5 are set to 1. """ - image.point(lambda p: 0 if p < 0.5 else 1, '1') + image.point(lambda p: 0 if p < 0.5 else 1, "1") return image def preprocess( From f33c9b843f37e8b21237d58cefe7e96bffdf0b05 Mon Sep 17 00:00:00 2001 From: hi-sushanta Date: Sat, 30 Dec 2023 07:08:53 +0530 Subject: [PATCH 11/15] My mistake to run pull request but not reformated file --- src/diffusers/image_processor.py | 57 +------------------------------- 1 file changed, 1 insertion(+), 56 deletions(-) diff --git a/src/diffusers/image_processor.py b/src/diffusers/image_processor.py index 2d18877f85c3..6e9ffddc99d1 100644 --- a/src/diffusers/image_processor.py +++ b/src/diffusers/image_processor.py @@ -400,7 +400,7 @@ def binarize(self, image: PIL.Image.Image) -> PIL.Image.Image: `PIL.Image.Image`: The binarized image. Values less than 0.5 are set to 0, values greater than 0.5 are set to 1. """ - image.point(lambda p: 0 if p < 0.5 else 1, '1') + image.point(lambda p: 0 if p < 0.5 else 1, "1") return image def get_default_height_width( @@ -446,61 +446,6 @@ def get_default_height_width( return height, width -<<<<<<< HEAD - def resize( - self, - image: Union[PIL.Image.Image, np.ndarray, torch.Tensor], - height: Optional[int] = None, - width: Optional[int] = None, - ) -> Union[PIL.Image.Image, np.ndarray, torch.Tensor]: - """ - Resize image. - - Args: - image (`PIL.Image.Image`, `np.ndarray` or `torch.Tensor`): - The image input, can be a PIL image, numpy array or pytorch tensor. - height (`int`, *optional*, defaults to `None`): - The height to resize to. - width (`int`, *optional*`, defaults to `None`): - The width to resize to. - - Returns: - `PIL.Image.Image`, `np.ndarray` or `torch.Tensor`: - The resized image. - """ - if isinstance(image, PIL.Image.Image): - image = image.resize((width, height), resample=PIL_INTERPOLATION[self.config.resample]) - elif isinstance(image, torch.Tensor): - image = torch.nn.functional.interpolate( - image, - size=(height, width), - ) - elif isinstance(image, np.ndarray): - image = self.numpy_to_pt(image) - image = torch.nn.functional.interpolate( - image, - size=(height, width), - ) - image = self.pt_to_numpy(image) - return image - - def binarize(self, image: PIL.Image.Image) -> PIL.Image.Image: - """ - Create a mask. - - Args: - image (`PIL.Image.Image`): - The image input, should be a PIL image. - - Returns: - `PIL.Image.Image`: - The binarized image. Values less than 0.5 are set to 0, values greater than 0.5 are set to 1. - """ - image.point(lambda p: 0 if p < 0.5 else 1, "1") - return image - -======= ->>>>>>> 2013a2d9011964e9e0578f5aa2ebdebb6cc0110b def preprocess( self, image: PipelineImageInput, From d866d4dd8e9596ace318826a3be0a3102e9bcb79 Mon Sep 17 00:00:00 2001 From: Chi Date: Sat, 30 Dec 2023 11:02:51 +0530 Subject: [PATCH 12/15] Update image_processor.py --- src/diffusers/image_processor.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/src/diffusers/image_processor.py b/src/diffusers/image_processor.py index 6e9ffddc99d1..c8037d8037c4 100644 --- a/src/diffusers/image_processor.py +++ b/src/diffusers/image_processor.py @@ -400,7 +400,9 @@ def binarize(self, image: PIL.Image.Image) -> PIL.Image.Image: `PIL.Image.Image`: The binarized image. Values less than 0.5 are set to 0, values greater than 0.5 are set to 1. """ - image.point(lambda p: 0 if p < 0.5 else 1, "1") + image[image < 0.5] = 0 + image[image >= 0.5] = 1 + return image def get_default_height_width( From d876f20f45e76e4136c2f4fff11b0553758a5d6e Mon Sep 17 00:00:00 2001 From: hi-sushanta Date: Sun, 31 Dec 2023 06:32:14 +0530 Subject: [PATCH 13/15] remove extra variable and space --- src/diffusers/image_processor.py | 25 ++++++------------------- 1 file changed, 6 insertions(+), 19 deletions(-) diff --git a/src/diffusers/image_processor.py b/src/diffusers/image_processor.py index c8037d8037c4..a72eed13dd40 100644 --- a/src/diffusers/image_processor.py +++ b/src/diffusers/image_processor.py @@ -33,14 +33,7 @@ List[torch.FloatTensor], ] -PipelineDepthInput = Union[ - PIL.Image.Image, - np.ndarray, - torch.FloatTensor, - List[PIL.Image.Image], - List[np.ndarray], - List[torch.FloatTensor], -] +PipelineDepthInput = PipelineImageInput class VaeImageProcessor(ConfigMixin): @@ -153,27 +146,21 @@ def convert_to_rgb(image: PIL.Image.Image) -> PIL.Image.Image: """ Converts a PIL image to RGB format. """ - image = image.convert("RGB") - - return image + return image.convert("RGB") @staticmethod def convert_to_grayscale(image: PIL.Image.Image) -> PIL.Image.Image: """ Converts a PIL image to grayscale format. """ - image = image.convert("L") - - return image + return image.convert("L") @staticmethod def blur(image: PIL.Image.Image, blur_factor: int = 4) -> PIL.Image.Image: """ - Blurs an image. + Applies Gaussian blur to an image. """ - image = image.filter(ImageFilter.GaussianBlur(blur_factor)) - - return image + return image.filter(ImageFilter.GaussianBlur(blur_factor)) @staticmethod def get_crop_region(mask_image: PIL.Image.Image, width: int, height: int, pad=0): @@ -402,7 +389,7 @@ def binarize(self, image: PIL.Image.Image) -> PIL.Image.Image: """ image[image < 0.5] = 0 image[image >= 0.5] = 1 - + return image def get_default_height_width( From 393d1b199aa6600a04e82e07dd03bbaae47c1b58 Mon Sep 17 00:00:00 2001 From: Chi Date: Wed, 3 Jan 2024 06:12:32 +0530 Subject: [PATCH 14/15] Update image_processor.py --- src/diffusers/image_processor.py | 14 ++++++++++---- 1 file changed, 10 insertions(+), 4 deletions(-) diff --git a/src/diffusers/image_processor.py b/src/diffusers/image_processor.py index a72eed13dd40..e38578770093 100644 --- a/src/diffusers/image_processor.py +++ b/src/diffusers/image_processor.py @@ -146,21 +146,27 @@ def convert_to_rgb(image: PIL.Image.Image) -> PIL.Image.Image: """ Converts a PIL image to RGB format. """ - return image.convert("RGB") + image = image.convert("RGB") + + return image @staticmethod def convert_to_grayscale(image: PIL.Image.Image) -> PIL.Image.Image: """ Converts a PIL image to grayscale format. """ - return image.convert("L") + image = image.convert("L") + + return image @staticmethod def blur(image: PIL.Image.Image, blur_factor: int = 4) -> PIL.Image.Image: """ Applies Gaussian blur to an image. """ - return image.filter(ImageFilter.GaussianBlur(blur_factor)) + image = image.filter(ImageFilter.GaussianBlur(blur_factor)) + + return image @staticmethod def get_crop_region(mask_image: PIL.Image.Image, width: int, height: int, pad=0): @@ -389,7 +395,7 @@ def binarize(self, image: PIL.Image.Image) -> PIL.Image.Image: """ image[image < 0.5] = 0 image[image >= 0.5] = 1 - + return image def get_default_height_width( From 5b9708a1b0dd6ffb647786b71adccfb5a9a0bf03 Mon Sep 17 00:00:00 2001 From: hi-sushanta Date: Wed, 3 Jan 2024 06:16:31 +0530 Subject: [PATCH 15/15] Run ruff libarary to reformat my file --- src/diffusers/image_processor.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/src/diffusers/image_processor.py b/src/diffusers/image_processor.py index 20c4f6e98f8d..843052c1adf3 100644 --- a/src/diffusers/image_processor.py +++ b/src/diffusers/image_processor.py @@ -147,7 +147,7 @@ def convert_to_rgb(image: PIL.Image.Image) -> PIL.Image.Image: Converts a PIL image to RGB format. """ image = image.convert("RGB") - + return image @staticmethod @@ -156,7 +156,7 @@ def convert_to_grayscale(image: PIL.Image.Image) -> PIL.Image.Image: Converts a PIL image to grayscale format. """ image = image.convert("L") - + return image @staticmethod @@ -165,7 +165,7 @@ def blur(image: PIL.Image.Image, blur_factor: int = 4) -> PIL.Image.Image: Applies Gaussian blur to an image. """ image = image.filter(ImageFilter.GaussianBlur(blur_factor)) - + return image @staticmethod @@ -395,7 +395,7 @@ def binarize(self, image: PIL.Image.Image) -> PIL.Image.Image: """ image[image < 0.5] = 0 image[image >= 0.5] = 1 - + return image def get_default_height_width(