huggingface · patrickvonplaten · Jan 4, 2023 · Jan 3, 2023 · Jan 3, 2023 · Jan 3, 2023
diff --git a/src/diffusers/experimental/rl/value_guided_sampling.py b/src/diffusers/experimental/rl/value_guided_sampling.py
@@ -19,6 +19,7 @@
 
 from ...models.unet_1d import UNet1DModel
 from ...pipelines import DiffusionPipeline
+from ...utils import randn_tensor
 from ...utils.dummy_pt_objects import DDPMScheduler
 
 
@@ -127,7 +128,7 @@ def __call__(self, obs, batch_size=64, planning_horizon=32, n_guide_steps=2, sca
         shape = (batch_size, planning_horizon, self.state_dim + self.action_dim)
 
         # generate initial noise and apply our conditions (to make the trajectories start at current state)
-        x1 = torch.randn(shape, device=self.unet.device)
+        x1 = randn_tensor(shape, device=self.unet.device)
         x = self.reset_x0(x1, conditions, self.action_dim)
         x = self.to_torch(x)
 

diff --git a/src/diffusers/models/prior_transformer.py b/src/diffusers/models/prior_transformer.py
@@ -95,7 +95,7 @@ def __init__(
         self.proj_to_clip_embeddings = nn.Linear(inner_dim, embedding_dim)
 
         causal_attention_mask = torch.full(
-            [num_embeddings + additional_embeddings, num_embeddings + additional_embeddings], float("-inf")
+            [num_embeddings + additional_embeddings, num_embeddings + additional_embeddings], -10000.0
         )
         causal_attention_mask.triu_(1)
         causal_attention_mask = causal_attention_mask[None, ...]

diff --git a/src/diffusers/models/vae.py b/src/diffusers/models/vae.py
@@ -18,7 +18,7 @@
 import torch
 import torch.nn as nn
 
-from ..utils import BaseOutput
+from ..utils import BaseOutput, randn_tensor
 from .unet_2d_blocks import UNetMidBlock2D, get_down_block, get_up_block
 
 
@@ -323,11 +323,10 @@ def __init__(self, parameters, deterministic=False):
             )
 
     def sample(self, generator: Optional[torch.Generator] = None) -> torch.FloatTensor:
-        device = self.parameters.device
-        sample_device = "cpu" if device.type == "mps" else device
-        sample = torch.randn(self.mean.shape, generator=generator, device=sample_device)
         # make sure sample is on the same device as the parameters and has same dtype
-        sample = sample.to(device=device, dtype=self.parameters.dtype)
+        sample = randn_tensor(
+            self.mean.shape, generator=generator, device=self.parameters.device, dtype=self.parameters.dtype
+        )
         x = self.mean + self.std * sample
         return x
 

diff --git a/src/diffusers/pipelines/alt_diffusion/pipeline_alt_diffusion.py b/src/diffusers/pipelines/alt_diffusion/pipeline_alt_diffusion.py
@@ -31,7 +31,7 @@
     LMSDiscreteScheduler,
     PNDMScheduler,
 )
-from ...utils import deprecate, logging, replace_example_docstring
+from ...utils import deprecate, logging, randn_tensor, replace_example_docstring
 from ..pipeline_utils import DiffusionPipeline
 from ..stable_diffusion.safety_checker import StableDiffusionSafetyChecker
 from . import AltDiffusionPipelineOutput, RobertaSeriesModelWithTransformation
@@ -401,20 +401,8 @@ def prepare_latents(self, batch_size, num_channels_latents, height, width, dtype
             )
 
         if latents is None:
-            rand_device = "cpu" if device.type == "mps" else device
-
-            if isinstance(generator, list):
-                shape = (1,) + shape[1:]
-                latents = [
-                    torch.randn(shape, generator=generator[i], device=rand_device, dtype=dtype)
-                    for i in range(batch_size)
-                ]
-                latents = torch.cat(latents, dim=0).to(device)
-            else:
-                latents = torch.randn(shape, generator=generator, device=rand_device, dtype=dtype).to(device)
+            latents = randn_tensor(shape, generator=generator, device=device, dtype=dtype)
         else:
-            if latents.shape != shape:
-                raise ValueError(f"Unexpected latents shape, got {latents.shape}, expected {shape}")
             latents = latents.to(device)
 
         # scale the initial noise by the standard deviation required by the scheduler

diff --git a/src/diffusers/pipelines/alt_diffusion/pipeline_alt_diffusion_img2img.py b/src/diffusers/pipelines/alt_diffusion/pipeline_alt_diffusion_img2img.py
@@ -33,7 +33,7 @@
     LMSDiscreteScheduler,
     PNDMScheduler,
 )
-from ...utils import PIL_INTERPOLATION, deprecate, logging, replace_example_docstring
+from ...utils import PIL_INTERPOLATION, deprecate, logging, randn_tensor, replace_example_docstring
 from ..pipeline_utils import DiffusionPipeline
 from ..stable_diffusion.safety_checker import StableDiffusionSafetyChecker
 from . import AltDiffusionPipelineOutput, RobertaSeriesModelWithTransformation
@@ -461,16 +461,8 @@ def prepare_latents(self, image, timestep, batch_size, num_images_per_prompt, dt
         else:
             init_latents = torch.cat([init_latents], dim=0)
 
-        rand_device = "cpu" if device.type == "mps" else device
         shape = init_latents.shape
-        if isinstance(generator, list):
-            shape = (1,) + shape[1:]
-            noise = [
-                torch.randn(shape, generator=generator[i], device=rand_device, dtype=dtype) for i in range(batch_size)
-            ]
-            noise = torch.cat(noise, dim=0).to(device)
-        else:
-            noise = torch.randn(shape, generator=generator, device=rand_device, dtype=dtype).to(device)
+        noise = randn_tensor(shape, generator=generator, device=device, dtype=dtype)
 
         # get latents
         init_latents = self.scheduler.add_noise(init_latents, noise, timestep)

diff --git a/src/diffusers/pipelines/audio_diffusion/pipeline_audio_diffusion.py b/src/diffusers/pipelines/audio_diffusion/pipeline_audio_diffusion.py
@@ -23,6 +23,7 @@
 
 from ...models import AutoencoderKL, UNet2DConditionModel
 from ...schedulers import DDIMScheduler, DDPMScheduler
+from ...utils import randn_tensor
 from ..pipeline_utils import AudioPipelineOutput, BaseOutput, DiffusionPipeline, ImagePipelineOutput
 from .mel import Mel
 
@@ -126,7 +127,7 @@ def __call__(
         input_dims = self.get_input_dims()
         self.mel.set_resolution(x_res=input_dims[1], y_res=input_dims[0])
         if noise is None:
-            noise = torch.randn(
+            noise = randn_tensor(
                 (
                     batch_size,
                     self.unet.in_channels,

diff --git a/src/diffusers/pipelines/dance_diffusion/pipeline_dance_diffusion.py b/src/diffusers/pipelines/dance_diffusion/pipeline_dance_diffusion.py
@@ -17,7 +17,7 @@
 
 import torch
 
-from ...utils import logging
+from ...utils import logging, randn_tensor
 from ..pipeline_utils import AudioPipelineOutput, DiffusionPipeline
 
 
@@ -100,16 +100,7 @@ def __call__(
                 f" size of {batch_size}. Make sure the batch size matches the length of the generators."
             )
 
-        rand_device = "cpu" if self.device.type == "mps" else self.device
-        if isinstance(generator, list):
-            shape = (1,) + shape[1:]
-            audio = [
-                torch.randn(shape, generator=generator[i], device=rand_device, dtype=self.unet.dtype)
-                for i in range(batch_size)
-            ]
-            audio = torch.cat(audio, dim=0).to(self.device)
-        else:
-            audio = torch.randn(shape, generator=generator, device=rand_device, dtype=dtype).to(self.device)
+        audio = randn_tensor(shape, generator=generator, device=self.device, dtype=dtype)
 
         # set step values
         self.scheduler.set_timesteps(num_inference_steps, device=audio.device)

diff --git a/src/diffusers/pipelines/ddim/pipeline_ddim.py b/src/diffusers/pipelines/ddim/pipeline_ddim.py
@@ -16,7 +16,7 @@
 
 import torch
 
-from ...utils import deprecate
+from ...utils import deprecate, randn_tensor
 from ..pipeline_utils import DiffusionPipeline, ImagePipelineOutput
 
 
@@ -103,17 +103,7 @@ def __call__(
                 f" size of {batch_size}. Make sure the batch size matches the length of the generators."
             )
 
-        rand_device = "cpu" if self.device.type == "mps" else self.device
-        if isinstance(generator, list):
-            shape = (1,) + image_shape[1:]
-            image = [
-                torch.randn(shape, generator=generator[i], device=rand_device, dtype=self.unet.dtype)
-                for i in range(batch_size)
-            ]
-            image = torch.cat(image, dim=0).to(self.device)
-        else:
-            image = torch.randn(image_shape, generator=generator, device=rand_device, dtype=self.unet.dtype)
-            image = image.to(self.device)
+        image = randn_tensor(image_shape, generator=generator, device=self.device, dtype=self.unet.dtype)
 
         # set step values
         self.scheduler.set_timesteps(num_inference_steps)

diff --git a/src/diffusers/pipelines/ddpm/pipeline_ddpm.py b/src/diffusers/pipelines/ddpm/pipeline_ddpm.py
@@ -18,7 +18,7 @@
 import torch
 
 from ...configuration_utils import FrozenDict
-from ...utils import deprecate
+from ...utils import deprecate, randn_tensor
 from ..pipeline_utils import DiffusionPipeline, ImagePipelineOutput
 
 
@@ -100,10 +100,10 @@ def __call__(
 
         if self.device.type == "mps":
             # randn does not work reproducibly on mps
-            image = torch.randn(image_shape, generator=generator)
+            image = randn_tensor(image_shape, generator=generator)
             image = image.to(self.device)
         else:
-            image = torch.randn(image_shape, generator=generator, device=self.device)
+            image = randn_tensor(image_shape, generator=generator, device=self.device)
 
         # set step values
         self.scheduler.set_timesteps(num_inference_steps)

diff --git a/src/diffusers/pipelines/latent_diffusion/pipeline_latent_diffusion.py b/src/diffusers/pipelines/latent_diffusion/pipeline_latent_diffusion.py
@@ -26,6 +26,7 @@
 
 from ...models import AutoencoderKL, UNet2DConditionModel, UNet2DModel, VQModel
 from ...schedulers import DDIMScheduler, LMSDiscreteScheduler, PNDMScheduler
+from ...utils import randn_tensor
 from ..pipeline_utils import DiffusionPipeline, ImagePipelineOutput
 
 
@@ -143,20 +144,7 @@ def __call__(
             )
 
         if latents is None:
-            rand_device = "cpu" if self.device.type == "mps" else self.device
-
-            if isinstance(generator, list):
-                latents_shape = (1,) + latents_shape[1:]
-                latents = [
-                    torch.randn(latents_shape, generator=generator[i], device=rand_device, dtype=text_embeddings.dtype)
-                    for i in range(batch_size)
-                ]
-                latents = torch.cat(latents, dim=0)
-            else:
-                latents = torch.randn(
-                    latents_shape, generator=generator, device=rand_device, dtype=text_embeddings.dtype
-                )
-            latents = latents.to(self.device)
+            latents = randn_tensor(latents_shape, generator=generator, device=self.device, dtype=text_embeddings.dtype)
         else:
             if latents.shape != latents_shape:
                 raise ValueError(f"Unexpected latents shape, got {latents.shape}, expected {latents_shape}")

diff --git a/src/diffusers/pipelines/latent_diffusion/pipeline_latent_diffusion_superresolution.py b/src/diffusers/pipelines/latent_diffusion/pipeline_latent_diffusion_superresolution.py
@@ -16,7 +16,7 @@
     LMSDiscreteScheduler,
     PNDMScheduler,
 )
-from ...utils import PIL_INTERPOLATION, deprecate
+from ...utils import PIL_INTERPOLATION, deprecate, randn_tensor
 from ..pipeline_utils import DiffusionPipeline, ImagePipelineOutput
 
 
@@ -121,12 +121,7 @@ def __call__(
         latents_shape = (batch_size, self.unet.in_channels // 2, height, width)
         latents_dtype = next(self.unet.parameters()).dtype
 
-        if self.device.type == "mps":
-            # randn does not work reproducibly on mps
-            latents = torch.randn(latents_shape, generator=generator, device="cpu", dtype=latents_dtype)
-            latents = latents.to(self.device)
-        else:
-            latents = torch.randn(latents_shape, generator=generator, device=self.device, dtype=latents_dtype)
+        latents = randn_tensor(latents_shape, generator=generator, device=self.device, dtype=latents_dtype)
 
         image = image.to(device=self.device, dtype=latents_dtype)
 

diff --git a/src/diffusers/pipelines/latent_diffusion_uncond/pipeline_latent_diffusion_uncond.py b/src/diffusers/pipelines/latent_diffusion_uncond/pipeline_latent_diffusion_uncond.py
@@ -19,6 +19,7 @@
 
 from ...models import UNet2DModel, VQModel
 from ...schedulers import DDIMScheduler
+from ...utils import randn_tensor
 from ..pipeline_utils import DiffusionPipeline, ImagePipelineOutput
 
 
@@ -71,7 +72,7 @@ def __call__(
             True, otherwise a `tuple. When returning a tuple, the first element is a list with the generated images.
         """
 
-        latents = torch.randn(
+        latents = randn_tensor(
             (batch_size, self.unet.in_channels, self.unet.sample_size, self.unet.sample_size),
             generator=generator,
         )

diff --git a/src/diffusers/pipelines/paint_by_example/image_encoder.py b/src/diffusers/pipelines/paint_by_example/image_encoder.py
@@ -34,7 +34,7 @@ def __init__(self, config, proj_size=768):
         self.proj_out = nn.Linear(config.hidden_size, self.proj_size)
 
         # uncondition for scaling
-        self.uncond_vector = nn.Parameter(torch.rand((1, 1, self.proj_size)))
+        self.uncond_vector = nn.Parameter(torch.randn((1, 1, self.proj_size)))
 
     def forward(self, pixel_values):
         clip_output = self.model(pixel_values=pixel_values)

diff --git a/src/diffusers/pipelines/paint_by_example/pipeline_paint_by_example.py b/src/diffusers/pipelines/paint_by_example/pipeline_paint_by_example.py
@@ -24,7 +24,7 @@
 
 from ...models import AutoencoderKL, UNet2DConditionModel
 from ...schedulers import DDIMScheduler, LMSDiscreteScheduler, PNDMScheduler
-from ...utils import logging
+from ...utils import logging, randn_tensor
 from ..pipeline_utils import DiffusionPipeline
 from ..stable_diffusion import StableDiffusionPipelineOutput
 from ..stable_diffusion.safety_checker import StableDiffusionSafetyChecker
@@ -300,20 +300,8 @@ def prepare_latents(self, batch_size, num_channels_latents, height, width, dtype
             )
 
         if latents is None:
-            rand_device = "cpu" if device.type == "mps" else device
-
-            if isinstance(generator, list):
-                shape = (1,) + shape[1:]
-                latents = [
-                    torch.randn(shape, generator=generator[i], device=rand_device, dtype=dtype)
-                    for i in range(batch_size)
-                ]
-                latents = torch.cat(latents, dim=0).to(device)
-            else:
-                latents = torch.randn(shape, generator=generator, device=rand_device, dtype=dtype).to(device)
+            latents = randn_tensor(shape, generator=generator, device=device, dtype=dtype)
         else:
-            if latents.shape != shape:
-                raise ValueError(f"Unexpected latents shape, got {latents.shape}, expected {shape}")
             latents = latents.to(device)
 
         # scale the initial noise by the standard deviation required by the scheduler

diff --git a/src/diffusers/pipelines/pndm/pipeline_pndm.py b/src/diffusers/pipelines/pndm/pipeline_pndm.py
@@ -19,6 +19,7 @@
 
 from ...models import UNet2DModel
 from ...schedulers import PNDMScheduler
+from ...utils import randn_tensor
 from ..pipeline_utils import DiffusionPipeline, ImagePipelineOutput
 
 
@@ -72,11 +73,11 @@ def __call__(
         # the official paper: https://arxiv.org/pdf/2202.09778.pdf
 
         # Sample gaussian noise to begin loop
-        image = torch.randn(
+        image = randn_tensor(
             (batch_size, self.unet.in_channels, self.unet.sample_size, self.unet.sample_size),
             generator=generator,
+            device=self.device,
         )
-        image = image.to(self.device)
 
         self.scheduler.set_timesteps(num_inference_steps)
         for t in self.progress_bar(self.scheduler.timesteps):

diff --git a/src/diffusers/pipelines/repaint/pipeline_repaint.py b/src/diffusers/pipelines/repaint/pipeline_repaint.py
@@ -22,7 +22,7 @@
 
 from ...models import UNet2DModel
 from ...schedulers import RePaintScheduler
-from ...utils import PIL_INTERPOLATION, deprecate, logging
+from ...utils import PIL_INTERPOLATION, deprecate, logging, randn_tensor
 from ..pipeline_utils import DiffusionPipeline, ImagePipelineOutput
 
 
@@ -143,18 +143,8 @@ def __call__(
                 f" size of {batch_size}. Make sure the batch size matches the length of the generators."
             )
 
-        rand_device = "cpu" if self.device.type == "mps" else self.device
         image_shape = original_image.shape
-        if isinstance(generator, list):
-            shape = (1,) + image_shape[1:]
-            image = [
-                torch.randn(shape, generator=generator[i], device=rand_device, dtype=self.unet.dtype)
-                for i in range(batch_size)
-            ]
-            image = torch.cat(image, dim=0).to(self.device)
-        else:
-            image = torch.randn(image_shape, generator=generator, device=rand_device, dtype=self.unet.dtype)
-            image = image.to(self.device)
+        image = randn_tensor(image_shape, generator=generator, device=self.device, dtype=self.unet.dtype)
 
         # set step values
         self.scheduler.set_timesteps(num_inference_steps, jump_length, jump_n_sample, self.device)

diff --git a/src/diffusers/pipelines/score_sde_ve/pipeline_score_sde_ve.py b/src/diffusers/pipelines/score_sde_ve/pipeline_score_sde_ve.py
@@ -18,6 +18,7 @@
 
 from ...models import UNet2DModel
 from ...schedulers import ScoreSdeVeScheduler
+from ...utils import randn_tensor
 from ..pipeline_utils import DiffusionPipeline, ImagePipelineOutput
 
 
@@ -69,7 +70,7 @@ def __call__(
 
         model = self.unet
 
-        sample = torch.randn(*shape, generator=generator) * self.scheduler.init_noise_sigma
+        sample = randn_tensor(shape, generator=generator) * self.scheduler.init_noise_sigma
         sample = sample.to(self.device)
 
         self.scheduler.set_timesteps(num_inference_steps)