From f82a4aa213381867a200f9d02f12207d3c0638f9 Mon Sep 17 00:00:00 2001 From: a-r-r-o-w Date: Mon, 26 Feb 2024 23:49:26 +0530 Subject: [PATCH 1/8] fix returns and docs --- .../animatediff/pipeline_animatediff.py | 15 +++++-------- .../pipeline_animatediff_video2video.py | 17 ++++---------- .../pipelines/i2vgen_xl/pipeline_i2vgen_xl.py | 13 ++++------- src/diffusers/pipelines/pia/pipeline_pia.py | 13 ++++------- .../pipeline_stable_video_diffusion.py | 7 ++---- .../pipeline_text_to_video_synth.py | 13 ++++------- .../pipeline_text_to_video_synth_img2img.py | 22 ++++++------------- 7 files changed, 31 insertions(+), 69 deletions(-) diff --git a/src/diffusers/pipelines/animatediff/pipeline_animatediff.py b/src/diffusers/pipelines/animatediff/pipeline_animatediff.py index c794bd00ce85..166242e98e72 100644 --- a/src/diffusers/pipelines/animatediff/pipeline_animatediff.py +++ b/src/diffusers/pipelines/animatediff/pipeline_animatediff.py @@ -71,17 +71,14 @@ def tensor2vid(video: torch.Tensor, processor: "VaeImageProcessor", output_type: for batch_idx in range(batch_size): batch_vid = video[batch_idx].permute(1, 0, 2, 3) batch_output = processor.postprocess(batch_vid, output_type) - outputs.append(batch_output) if output_type == "np": outputs = np.stack(outputs) - - elif output_type == "pt": + elif output_type == "pt" or output_type == "latent": outputs = torch.stack(outputs) - elif not output_type == "pil": - raise ValueError(f"{output_type} does not exist. Please choose one of ['np', 'pt', 'pil]") + raise ValueError(f"{output_type} does not exist. Please choose one of ['np', 'pt', 'pil', 'latent']") return outputs @@ -812,6 +809,8 @@ def __call__( self._num_timesteps = len(timesteps) num_warmup_steps = len(timesteps) - num_inference_steps * self.scheduler.order + + # 8. Denoising loop with self.progress_bar(total=num_inference_steps) as progress_bar: for i, t in enumerate(timesteps): # expand the latents if we are doing classifier free guidance @@ -851,13 +850,11 @@ def __call__( if callback is not None and i % callback_steps == 0: callback(i, t, latents) - if output_type == "latent": - return AnimateDiffPipelineOutput(frames=latents) - + # 9. Post processing video_tensor = self.decode_latents(latents) video = tensor2vid(video_tensor, self.image_processor, output_type=output_type) - # 9. Offload all models + # 10. Offload all models self.maybe_free_model_hooks() if not return_dict: diff --git a/src/diffusers/pipelines/animatediff/pipeline_animatediff_video2video.py b/src/diffusers/pipelines/animatediff/pipeline_animatediff_video2video.py index 4b5cc12b1265..d8ba2ec35f65 100644 --- a/src/diffusers/pipelines/animatediff/pipeline_animatediff_video2video.py +++ b/src/diffusers/pipelines/animatediff/pipeline_animatediff_video2video.py @@ -90,17 +90,14 @@ def tensor2vid(video: torch.Tensor, processor, output_type="np"): for batch_idx in range(batch_size): batch_vid = video[batch_idx].permute(1, 0, 2, 3) batch_output = processor.postprocess(batch_vid, output_type) - outputs.append(batch_output) if output_type == "np": outputs = np.stack(outputs) - - elif output_type == "pt": + elif output_type == "pt" or output_type == "latent": outputs = torch.stack(outputs) - elif not output_type == "pil": - raise ValueError(f"{output_type} does not exist. Please choose one of ['np', 'pt', 'pil]") + raise ValueError(f"{output_type} does not exist. Please choose one of ['np', 'pt', 'pil', 'latent']") return outputs @@ -956,6 +953,7 @@ def __call__( self._num_timesteps = len(timesteps) num_warmup_steps = len(timesteps) - num_inference_steps * self.scheduler.order + # 8. Denoising loop with self.progress_bar(total=num_inference_steps) as progress_bar: for i, t in enumerate(timesteps): @@ -994,16 +992,9 @@ def __call__( if i == len(timesteps) - 1 or ((i + 1) > num_warmup_steps and (i + 1) % self.scheduler.order == 0): progress_bar.update() - if output_type == "latent": - return AnimateDiffPipelineOutput(frames=latents) - # 9. Post-processing video_tensor = self.decode_latents(latents) - - if output_type == "pt": - video = video_tensor - else: - video = tensor2vid(video_tensor, self.image_processor, output_type=output_type) + video = tensor2vid(video_tensor, self.image_processor, output_type=output_type) # 10. Offload all models self.maybe_free_model_hooks() diff --git a/src/diffusers/pipelines/i2vgen_xl/pipeline_i2vgen_xl.py b/src/diffusers/pipelines/i2vgen_xl/pipeline_i2vgen_xl.py index 5354f6643cb7..4d629d8fe485 100644 --- a/src/diffusers/pipelines/i2vgen_xl/pipeline_i2vgen_xl.py +++ b/src/diffusers/pipelines/i2vgen_xl/pipeline_i2vgen_xl.py @@ -73,17 +73,14 @@ def tensor2vid(video: torch.Tensor, processor: "VaeImageProcessor", output_type: for batch_idx in range(batch_size): batch_vid = video[batch_idx].permute(1, 0, 2, 3) batch_output = processor.postprocess(batch_vid, output_type) - outputs.append(batch_output) if output_type == "np": outputs = np.stack(outputs) - - elif output_type == "pt": + elif output_type == "pt" or output_type == "latent": outputs = torch.stack(outputs) - elif not output_type == "pil": - raise ValueError(f"{output_type} does not exist. Please choose one of ['np', 'pt', 'pil]") + raise ValueError(f"{output_type} does not exist. Please choose one of ['np', 'pt', 'pil', 'latent']") return outputs @@ -784,13 +781,11 @@ def __call__( if i == len(timesteps) - 1 or ((i + 1) > num_warmup_steps and (i + 1) % self.scheduler.order == 0): progress_bar.update() - if output_type == "latent": - return I2VGenXLPipelineOutput(frames=latents) - + # 8. Post processing video_tensor = self.decode_latents(latents, decode_chunk_size=decode_chunk_size) video = tensor2vid(video_tensor, self.image_processor, output_type=output_type) - # Offload all models + # 9. Offload all models self.maybe_free_model_hooks() if not return_dict: diff --git a/src/diffusers/pipelines/pia/pipeline_pia.py b/src/diffusers/pipelines/pia/pipeline_pia.py index 071caa1a33dd..48111376476b 100644 --- a/src/diffusers/pipelines/pia/pipeline_pia.py +++ b/src/diffusers/pipelines/pia/pipeline_pia.py @@ -97,17 +97,14 @@ def tensor2vid(video: torch.Tensor, processor: "VaeImageProcessor", output_type: for batch_idx in range(batch_size): batch_vid = video[batch_idx].permute(1, 0, 2, 3) batch_output = processor.postprocess(batch_vid, output_type) - outputs.append(batch_output) if output_type == "np": outputs = np.stack(outputs) - - elif output_type == "pt": + elif output_type == "pt" or output_type == "latent": outputs = torch.stack(outputs) - elif not output_type == "pil": - raise ValueError(f"{output_type} does not exist. Please choose one of ['np', 'pt', 'pil]") + raise ValueError(f"{output_type} does not exist. Please choose one of ['np', 'pt', 'pil', 'latent']") return outputs @@ -1040,13 +1037,11 @@ def __call__( if i == len(timesteps) - 1 or ((i + 1) > num_warmup_steps and (i + 1) % self.scheduler.order == 0): progress_bar.update() - if output_type == "latent": - return PIAPipelineOutput(frames=latents) - + # 9. Post processing video_tensor = self.decode_latents(latents) video = tensor2vid(video_tensor, self.image_processor, output_type=output_type) - # 9. Offload all models + # 10. Offload all models self.maybe_free_model_hooks() if not return_dict: diff --git a/src/diffusers/pipelines/stable_video_diffusion/pipeline_stable_video_diffusion.py b/src/diffusers/pipelines/stable_video_diffusion/pipeline_stable_video_diffusion.py index f53ebbafee2e..b58e10ef5a26 100644 --- a/src/diffusers/pipelines/stable_video_diffusion/pipeline_stable_video_diffusion.py +++ b/src/diffusers/pipelines/stable_video_diffusion/pipeline_stable_video_diffusion.py @@ -47,17 +47,14 @@ def tensor2vid(video: torch.Tensor, processor: "VaeImageProcessor", output_type: for batch_idx in range(batch_size): batch_vid = video[batch_idx].permute(1, 0, 2, 3) batch_output = processor.postprocess(batch_vid, output_type) - outputs.append(batch_output) if output_type == "np": outputs = np.stack(outputs) - - elif output_type == "pt": + elif output_type == "pt" or output_type == "latent": outputs = torch.stack(outputs) - elif not output_type == "pil": - raise ValueError(f"{output_type} does not exist. Please choose one of ['np', 'pt', 'pil]") + raise ValueError(f"{output_type} does not exist. Please choose one of ['np', 'pt', 'pil', 'latent']") return outputs diff --git a/src/diffusers/pipelines/text_to_video_synthesis/pipeline_text_to_video_synth.py b/src/diffusers/pipelines/text_to_video_synthesis/pipeline_text_to_video_synth.py index eb34910b7008..3e87f625ac8a 100644 --- a/src/diffusers/pipelines/text_to_video_synthesis/pipeline_text_to_video_synth.py +++ b/src/diffusers/pipelines/text_to_video_synthesis/pipeline_text_to_video_synth.py @@ -66,17 +66,14 @@ def tensor2vid(video: torch.Tensor, processor: "VaeImageProcessor", output_type: for batch_idx in range(batch_size): batch_vid = video[batch_idx].permute(1, 0, 2, 3) batch_output = processor.postprocess(batch_vid, output_type) - outputs.append(batch_output) if output_type == "np": outputs = np.stack(outputs) - - elif output_type == "pt": + elif output_type == "pt" or output_type == "latent": outputs = torch.stack(outputs) - elif not output_type == "pil": - raise ValueError(f"{output_type} does not exist. Please choose one of ['np', 'pt', 'pil]") + raise ValueError(f"{output_type} does not exist. Please choose one of ['np', 'pt', 'pil', 'latent']") return outputs @@ -707,13 +704,11 @@ def __call__( step_idx = i // getattr(self.scheduler, "order", 1) callback(step_idx, t, latents) - if output_type == "latent": - return TextToVideoSDPipelineOutput(frames=latents) - + # 8. Post processing video_tensor = self.decode_latents(latents) video = tensor2vid(video_tensor, self.image_processor, output_type) - # Offload all models + # 9. Offload all models self.maybe_free_model_hooks() if not return_dict: diff --git a/src/diffusers/pipelines/text_to_video_synthesis/pipeline_text_to_video_synth_img2img.py b/src/diffusers/pipelines/text_to_video_synthesis/pipeline_text_to_video_synth_img2img.py index 2a41d9a8f735..74ea1c9262be 100644 --- a/src/diffusers/pipelines/text_to_video_synthesis/pipeline_text_to_video_synth_img2img.py +++ b/src/diffusers/pipelines/text_to_video_synthesis/pipeline_text_to_video_synth_img2img.py @@ -101,17 +101,14 @@ def tensor2vid(video: torch.Tensor, processor: "VaeImageProcessor", output_type: for batch_idx in range(batch_size): batch_vid = video[batch_idx].permute(1, 0, 2, 3) batch_output = processor.postprocess(batch_vid, output_type) - outputs.append(batch_output) if output_type == "np": outputs = np.stack(outputs) - - elif output_type == "pt": + elif output_type == "pt" or output_type == "latent": outputs = torch.stack(outputs) - elif not output_type == "pil": - raise ValueError(f"{output_type} does not exist. Please choose one of ['np', 'pt', 'pil]") + raise ValueError(f"{output_type} does not exist. Please choose one of ['np', 'pt', 'pil', 'latent']") return outputs @@ -755,13 +752,13 @@ def __call__( timesteps, num_inference_steps = self.get_timesteps(num_inference_steps, strength, device) latent_timestep = timesteps[:1].repeat(batch_size * num_images_per_prompt) - # 5. Prepare latent variables + # 6. Prepare latent variables latents = self.prepare_latents(video, latent_timestep, batch_size, prompt_embeds.dtype, device, generator) - # 6. Prepare extra step kwargs. TODO: Logic should ideally just be moved out of the pipeline + # 7. Prepare extra step kwargs. TODO: Logic should ideally just be moved out of the pipeline extra_step_kwargs = self.prepare_extra_step_kwargs(generator, eta) - # 7. Denoising loop + # 8. Denoising loop num_warmup_steps = len(timesteps) - num_inference_steps * self.scheduler.order with self.progress_bar(total=num_inference_steps) as progress_bar: for i, t in enumerate(timesteps): @@ -801,20 +798,15 @@ def __call__( step_idx = i // getattr(self.scheduler, "order", 1) callback(step_idx, t, latents) - if output_type == "latent": - return TextToVideoSDPipelineOutput(frames=latents) - # manually for max memory savings if hasattr(self, "final_offload_hook") and self.final_offload_hook is not None: self.unet.to("cpu") - if output_type == "latent": - return TextToVideoSDPipelineOutput(frames=latents) - + # 9. Post processing video_tensor = self.decode_latents(latents) video = tensor2vid(video_tensor, self.image_processor, output_type) - # Offload all models + # 10. Offload all models self.maybe_free_model_hooks() if not return_dict: From 21596569dc0fa11b11636e24636af337962b1059 Mon Sep 17 00:00:00 2001 From: a-r-r-o-w Date: Tue, 27 Feb 2024 07:39:18 +0530 Subject: [PATCH 2/8] handle latent output_type correctly --- .../pipelines/animatediff/pipeline_animatediff.py | 9 ++++++--- .../animatediff/pipeline_animatediff_video2video.py | 9 ++++++--- src/diffusers/pipelines/i2vgen_xl/pipeline_i2vgen_xl.py | 9 ++++++--- src/diffusers/pipelines/pia/pipeline_pia.py | 9 ++++++--- .../pipeline_stable_video_diffusion.py | 2 +- .../pipeline_text_to_video_synth.py | 9 ++++++--- .../pipeline_text_to_video_synth_img2img.py | 9 ++++++--- 7 files changed, 37 insertions(+), 19 deletions(-) diff --git a/src/diffusers/pipelines/animatediff/pipeline_animatediff.py b/src/diffusers/pipelines/animatediff/pipeline_animatediff.py index 166242e98e72..026dcc0012f1 100644 --- a/src/diffusers/pipelines/animatediff/pipeline_animatediff.py +++ b/src/diffusers/pipelines/animatediff/pipeline_animatediff.py @@ -75,7 +75,7 @@ def tensor2vid(video: torch.Tensor, processor: "VaeImageProcessor", output_type: if output_type == "np": outputs = np.stack(outputs) - elif output_type == "pt" or output_type == "latent": + elif output_type == "pt": outputs = torch.stack(outputs) elif not output_type == "pil": raise ValueError(f"{output_type} does not exist. Please choose one of ['np', 'pt', 'pil', 'latent']") @@ -851,8 +851,11 @@ def __call__( callback(i, t, latents) # 9. Post processing - video_tensor = self.decode_latents(latents) - video = tensor2vid(video_tensor, self.image_processor, output_type=output_type) + if output_type == "latent": + video = latents + else: + video_tensor = self.decode_latents(latents) + video = tensor2vid(video_tensor, self.image_processor, output_type=output_type) # 10. Offload all models self.maybe_free_model_hooks() diff --git a/src/diffusers/pipelines/animatediff/pipeline_animatediff_video2video.py b/src/diffusers/pipelines/animatediff/pipeline_animatediff_video2video.py index d8ba2ec35f65..0e1b0c738963 100644 --- a/src/diffusers/pipelines/animatediff/pipeline_animatediff_video2video.py +++ b/src/diffusers/pipelines/animatediff/pipeline_animatediff_video2video.py @@ -94,7 +94,7 @@ def tensor2vid(video: torch.Tensor, processor, output_type="np"): if output_type == "np": outputs = np.stack(outputs) - elif output_type == "pt" or output_type == "latent": + elif output_type == "pt": outputs = torch.stack(outputs) elif not output_type == "pil": raise ValueError(f"{output_type} does not exist. Please choose one of ['np', 'pt', 'pil', 'latent']") @@ -993,8 +993,11 @@ def __call__( progress_bar.update() # 9. Post-processing - video_tensor = self.decode_latents(latents) - video = tensor2vid(video_tensor, self.image_processor, output_type=output_type) + if output_type == "latent": + video = latents + else: + video_tensor = self.decode_latents(latents) + video = tensor2vid(video_tensor, self.image_processor, output_type=output_type) # 10. Offload all models self.maybe_free_model_hooks() diff --git a/src/diffusers/pipelines/i2vgen_xl/pipeline_i2vgen_xl.py b/src/diffusers/pipelines/i2vgen_xl/pipeline_i2vgen_xl.py index 4d629d8fe485..e3c072ce22b8 100644 --- a/src/diffusers/pipelines/i2vgen_xl/pipeline_i2vgen_xl.py +++ b/src/diffusers/pipelines/i2vgen_xl/pipeline_i2vgen_xl.py @@ -77,7 +77,7 @@ def tensor2vid(video: torch.Tensor, processor: "VaeImageProcessor", output_type: if output_type == "np": outputs = np.stack(outputs) - elif output_type == "pt" or output_type == "latent": + elif output_type == "pt": outputs = torch.stack(outputs) elif not output_type == "pil": raise ValueError(f"{output_type} does not exist. Please choose one of ['np', 'pt', 'pil', 'latent']") @@ -782,8 +782,11 @@ def __call__( progress_bar.update() # 8. Post processing - video_tensor = self.decode_latents(latents, decode_chunk_size=decode_chunk_size) - video = tensor2vid(video_tensor, self.image_processor, output_type=output_type) + if output_type == "latent": + video = latents + else: + video_tensor = self.decode_latents(latents, decode_chunk_size=decode_chunk_size) + video = tensor2vid(video_tensor, self.image_processor, output_type=output_type) # 9. Offload all models self.maybe_free_model_hooks() diff --git a/src/diffusers/pipelines/pia/pipeline_pia.py b/src/diffusers/pipelines/pia/pipeline_pia.py index 48111376476b..8b8bc190e2e8 100644 --- a/src/diffusers/pipelines/pia/pipeline_pia.py +++ b/src/diffusers/pipelines/pia/pipeline_pia.py @@ -101,7 +101,7 @@ def tensor2vid(video: torch.Tensor, processor: "VaeImageProcessor", output_type: if output_type == "np": outputs = np.stack(outputs) - elif output_type == "pt" or output_type == "latent": + elif output_type == "pt": outputs = torch.stack(outputs) elif not output_type == "pil": raise ValueError(f"{output_type} does not exist. Please choose one of ['np', 'pt', 'pil', 'latent']") @@ -1038,8 +1038,11 @@ def __call__( progress_bar.update() # 9. Post processing - video_tensor = self.decode_latents(latents) - video = tensor2vid(video_tensor, self.image_processor, output_type=output_type) + if output_type == "latent": + video = latents + else: + video_tensor = self.decode_latents(latents) + video = tensor2vid(video_tensor, self.image_processor, output_type=output_type) # 10. Offload all models self.maybe_free_model_hooks() diff --git a/src/diffusers/pipelines/stable_video_diffusion/pipeline_stable_video_diffusion.py b/src/diffusers/pipelines/stable_video_diffusion/pipeline_stable_video_diffusion.py index b58e10ef5a26..91ba970ad416 100644 --- a/src/diffusers/pipelines/stable_video_diffusion/pipeline_stable_video_diffusion.py +++ b/src/diffusers/pipelines/stable_video_diffusion/pipeline_stable_video_diffusion.py @@ -51,7 +51,7 @@ def tensor2vid(video: torch.Tensor, processor: "VaeImageProcessor", output_type: if output_type == "np": outputs = np.stack(outputs) - elif output_type == "pt" or output_type == "latent": + elif output_type == "pt": outputs = torch.stack(outputs) elif not output_type == "pil": raise ValueError(f"{output_type} does not exist. Please choose one of ['np', 'pt', 'pil', 'latent']") diff --git a/src/diffusers/pipelines/text_to_video_synthesis/pipeline_text_to_video_synth.py b/src/diffusers/pipelines/text_to_video_synthesis/pipeline_text_to_video_synth.py index 3e87f625ac8a..36e0b0299020 100644 --- a/src/diffusers/pipelines/text_to_video_synthesis/pipeline_text_to_video_synth.py +++ b/src/diffusers/pipelines/text_to_video_synthesis/pipeline_text_to_video_synth.py @@ -70,7 +70,7 @@ def tensor2vid(video: torch.Tensor, processor: "VaeImageProcessor", output_type: if output_type == "np": outputs = np.stack(outputs) - elif output_type == "pt" or output_type == "latent": + elif output_type == "pt": outputs = torch.stack(outputs) elif not output_type == "pil": raise ValueError(f"{output_type} does not exist. Please choose one of ['np', 'pt', 'pil', 'latent']") @@ -705,8 +705,11 @@ def __call__( callback(step_idx, t, latents) # 8. Post processing - video_tensor = self.decode_latents(latents) - video = tensor2vid(video_tensor, self.image_processor, output_type) + if output_type == "latent": + video = latents + else: + video_tensor = self.decode_latents(latents) + video = tensor2vid(video_tensor, self.image_processor, output_type) # 9. Offload all models self.maybe_free_model_hooks() diff --git a/src/diffusers/pipelines/text_to_video_synthesis/pipeline_text_to_video_synth_img2img.py b/src/diffusers/pipelines/text_to_video_synthesis/pipeline_text_to_video_synth_img2img.py index 74ea1c9262be..cd84c2d1da20 100644 --- a/src/diffusers/pipelines/text_to_video_synthesis/pipeline_text_to_video_synth_img2img.py +++ b/src/diffusers/pipelines/text_to_video_synthesis/pipeline_text_to_video_synth_img2img.py @@ -105,7 +105,7 @@ def tensor2vid(video: torch.Tensor, processor: "VaeImageProcessor", output_type: if output_type == "np": outputs = np.stack(outputs) - elif output_type == "pt" or output_type == "latent": + elif output_type == "pt": outputs = torch.stack(outputs) elif not output_type == "pil": raise ValueError(f"{output_type} does not exist. Please choose one of ['np', 'pt', 'pil', 'latent']") @@ -803,8 +803,11 @@ def __call__( self.unet.to("cpu") # 9. Post processing - video_tensor = self.decode_latents(latents) - video = tensor2vid(video_tensor, self.image_processor, output_type) + if output_type == "latent": + video = latents + else: + video_tensor = self.decode_latents(latents) + video = tensor2vid(video_tensor, self.image_processor, output_type) # 10. Offload all models self.maybe_free_model_hooks() From 15a2f4d0d3529b7fee1ed430ba31509d2ec9e884 Mon Sep 17 00:00:00 2001 From: a-r-r-o-w Date: Tue, 27 Feb 2024 07:40:31 +0530 Subject: [PATCH 3/8] revert to old tensor2vid impl --- src/diffusers/pipelines/animatediff/pipeline_animatediff.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/src/diffusers/pipelines/animatediff/pipeline_animatediff.py b/src/diffusers/pipelines/animatediff/pipeline_animatediff.py index 026dcc0012f1..ab22ff85d09a 100644 --- a/src/diffusers/pipelines/animatediff/pipeline_animatediff.py +++ b/src/diffusers/pipelines/animatediff/pipeline_animatediff.py @@ -71,14 +71,17 @@ def tensor2vid(video: torch.Tensor, processor: "VaeImageProcessor", output_type: for batch_idx in range(batch_size): batch_vid = video[batch_idx].permute(1, 0, 2, 3) batch_output = processor.postprocess(batch_vid, output_type) + outputs.append(batch_output) if output_type == "np": outputs = np.stack(outputs) + elif output_type == "pt": outputs = torch.stack(outputs) + elif not output_type == "pil": - raise ValueError(f"{output_type} does not exist. Please choose one of ['np', 'pt', 'pil', 'latent']") + raise ValueError(f"{output_type} does not exist. Please choose one of ['np', 'pt', 'pil]") return outputs From 1ddc565d653229350e2a88722a1601ec8e5219d3 Mon Sep 17 00:00:00 2001 From: a-r-r-o-w Date: Tue, 27 Feb 2024 07:40:57 +0530 Subject: [PATCH 4/8] make fix-copies --- .../animatediff/pipeline_animatediff_video2video.py | 5 ++++- src/diffusers/pipelines/i2vgen_xl/pipeline_i2vgen_xl.py | 5 ++++- src/diffusers/pipelines/pia/pipeline_pia.py | 5 ++++- .../pipeline_stable_video_diffusion.py | 5 ++++- .../text_to_video_synthesis/pipeline_text_to_video_synth.py | 5 ++++- .../pipeline_text_to_video_synth_img2img.py | 5 ++++- 6 files changed, 24 insertions(+), 6 deletions(-) diff --git a/src/diffusers/pipelines/animatediff/pipeline_animatediff_video2video.py b/src/diffusers/pipelines/animatediff/pipeline_animatediff_video2video.py index 0e1b0c738963..e9434aecdf2b 100644 --- a/src/diffusers/pipelines/animatediff/pipeline_animatediff_video2video.py +++ b/src/diffusers/pipelines/animatediff/pipeline_animatediff_video2video.py @@ -90,14 +90,17 @@ def tensor2vid(video: torch.Tensor, processor, output_type="np"): for batch_idx in range(batch_size): batch_vid = video[batch_idx].permute(1, 0, 2, 3) batch_output = processor.postprocess(batch_vid, output_type) + outputs.append(batch_output) if output_type == "np": outputs = np.stack(outputs) + elif output_type == "pt": outputs = torch.stack(outputs) + elif not output_type == "pil": - raise ValueError(f"{output_type} does not exist. Please choose one of ['np', 'pt', 'pil', 'latent']") + raise ValueError(f"{output_type} does not exist. Please choose one of ['np', 'pt', 'pil]") return outputs diff --git a/src/diffusers/pipelines/i2vgen_xl/pipeline_i2vgen_xl.py b/src/diffusers/pipelines/i2vgen_xl/pipeline_i2vgen_xl.py index e3c072ce22b8..0d11f8215a12 100644 --- a/src/diffusers/pipelines/i2vgen_xl/pipeline_i2vgen_xl.py +++ b/src/diffusers/pipelines/i2vgen_xl/pipeline_i2vgen_xl.py @@ -73,14 +73,17 @@ def tensor2vid(video: torch.Tensor, processor: "VaeImageProcessor", output_type: for batch_idx in range(batch_size): batch_vid = video[batch_idx].permute(1, 0, 2, 3) batch_output = processor.postprocess(batch_vid, output_type) + outputs.append(batch_output) if output_type == "np": outputs = np.stack(outputs) + elif output_type == "pt": outputs = torch.stack(outputs) + elif not output_type == "pil": - raise ValueError(f"{output_type} does not exist. Please choose one of ['np', 'pt', 'pil', 'latent']") + raise ValueError(f"{output_type} does not exist. Please choose one of ['np', 'pt', 'pil]") return outputs diff --git a/src/diffusers/pipelines/pia/pipeline_pia.py b/src/diffusers/pipelines/pia/pipeline_pia.py index 8b8bc190e2e8..40834e701775 100644 --- a/src/diffusers/pipelines/pia/pipeline_pia.py +++ b/src/diffusers/pipelines/pia/pipeline_pia.py @@ -97,14 +97,17 @@ def tensor2vid(video: torch.Tensor, processor: "VaeImageProcessor", output_type: for batch_idx in range(batch_size): batch_vid = video[batch_idx].permute(1, 0, 2, 3) batch_output = processor.postprocess(batch_vid, output_type) + outputs.append(batch_output) if output_type == "np": outputs = np.stack(outputs) + elif output_type == "pt": outputs = torch.stack(outputs) + elif not output_type == "pil": - raise ValueError(f"{output_type} does not exist. Please choose one of ['np', 'pt', 'pil', 'latent']") + raise ValueError(f"{output_type} does not exist. Please choose one of ['np', 'pt', 'pil]") return outputs diff --git a/src/diffusers/pipelines/stable_video_diffusion/pipeline_stable_video_diffusion.py b/src/diffusers/pipelines/stable_video_diffusion/pipeline_stable_video_diffusion.py index 91ba970ad416..f53ebbafee2e 100644 --- a/src/diffusers/pipelines/stable_video_diffusion/pipeline_stable_video_diffusion.py +++ b/src/diffusers/pipelines/stable_video_diffusion/pipeline_stable_video_diffusion.py @@ -47,14 +47,17 @@ def tensor2vid(video: torch.Tensor, processor: "VaeImageProcessor", output_type: for batch_idx in range(batch_size): batch_vid = video[batch_idx].permute(1, 0, 2, 3) batch_output = processor.postprocess(batch_vid, output_type) + outputs.append(batch_output) if output_type == "np": outputs = np.stack(outputs) + elif output_type == "pt": outputs = torch.stack(outputs) + elif not output_type == "pil": - raise ValueError(f"{output_type} does not exist. Please choose one of ['np', 'pt', 'pil', 'latent']") + raise ValueError(f"{output_type} does not exist. Please choose one of ['np', 'pt', 'pil]") return outputs diff --git a/src/diffusers/pipelines/text_to_video_synthesis/pipeline_text_to_video_synth.py b/src/diffusers/pipelines/text_to_video_synthesis/pipeline_text_to_video_synth.py index 36e0b0299020..827f5ef5fe1b 100644 --- a/src/diffusers/pipelines/text_to_video_synthesis/pipeline_text_to_video_synth.py +++ b/src/diffusers/pipelines/text_to_video_synthesis/pipeline_text_to_video_synth.py @@ -66,14 +66,17 @@ def tensor2vid(video: torch.Tensor, processor: "VaeImageProcessor", output_type: for batch_idx in range(batch_size): batch_vid = video[batch_idx].permute(1, 0, 2, 3) batch_output = processor.postprocess(batch_vid, output_type) + outputs.append(batch_output) if output_type == "np": outputs = np.stack(outputs) + elif output_type == "pt": outputs = torch.stack(outputs) + elif not output_type == "pil": - raise ValueError(f"{output_type} does not exist. Please choose one of ['np', 'pt', 'pil', 'latent']") + raise ValueError(f"{output_type} does not exist. Please choose one of ['np', 'pt', 'pil]") return outputs diff --git a/src/diffusers/pipelines/text_to_video_synthesis/pipeline_text_to_video_synth_img2img.py b/src/diffusers/pipelines/text_to_video_synthesis/pipeline_text_to_video_synth_img2img.py index cd84c2d1da20..f2bc1e9fdfe3 100644 --- a/src/diffusers/pipelines/text_to_video_synthesis/pipeline_text_to_video_synth_img2img.py +++ b/src/diffusers/pipelines/text_to_video_synthesis/pipeline_text_to_video_synth_img2img.py @@ -101,14 +101,17 @@ def tensor2vid(video: torch.Tensor, processor: "VaeImageProcessor", output_type: for batch_idx in range(batch_size): batch_vid = video[batch_idx].permute(1, 0, 2, 3) batch_output = processor.postprocess(batch_vid, output_type) + outputs.append(batch_output) if output_type == "np": outputs = np.stack(outputs) + elif output_type == "pt": outputs = torch.stack(outputs) + elif not output_type == "pil": - raise ValueError(f"{output_type} does not exist. Please choose one of ['np', 'pt', 'pil', 'latent']") + raise ValueError(f"{output_type} does not exist. Please choose one of ['np', 'pt', 'pil]") return outputs From 3d489093d6455ecc6cbf097729c16f3b1d28eb2c Mon Sep 17 00:00:00 2001 From: a-r-r-o-w Date: Tue, 27 Feb 2024 07:46:12 +0530 Subject: [PATCH 5/8] fix return in community animatediff pipes --- .../pipeline_animatediff_controlnet.py | 27 ++++++++++--------- .../pipeline_animatediff_img2video.py | 20 +++++++++----- 2 files changed, 28 insertions(+), 19 deletions(-) diff --git a/examples/community/pipeline_animatediff_controlnet.py b/examples/community/pipeline_animatediff_controlnet.py index 5873ceaa8d70..833b73f665ae 100644 --- a/examples/community/pipeline_animatediff_controlnet.py +++ b/examples/community/pipeline_animatediff_controlnet.py @@ -91,10 +91,8 @@ """ +# Copied from diffusers.pipelines.animatediff.pipeline_animatediff.tensor2vid def tensor2vid(video: torch.Tensor, processor, output_type="np"): - # Based on: - # https://github.com/modelscope/modelscope/blob/1509fdb973e5871f37148a4b5e5964cafd43e64d/modelscope/pipelines/multi_modal/text_to_video_synthesis_pipeline.py#L78 - batch_size, channels, num_frames, height, width = video.shape outputs = [] for batch_idx in range(batch_size): @@ -103,6 +101,15 @@ def tensor2vid(video: torch.Tensor, processor, output_type="np"): outputs.append(batch_output) + if output_type == "np": + outputs = np.stack(outputs) + + elif output_type == "pt": + outputs = torch.stack(outputs) + + elif not output_type == "pil": + raise ValueError(f"{output_type} does not exist. Please choose one of ['np', 'pt', 'pil]") + return outputs @@ -1077,7 +1084,7 @@ def __call__( ] controlnet_keep.append(keeps[0] if isinstance(controlnet, ControlNetModel) else keeps) - # Denoising loop + # 8. Denoising loop num_warmup_steps = len(timesteps) - num_inference_steps * self.scheduler.order with self.progress_bar(total=num_inference_steps) as progress_bar: for i, t in enumerate(timesteps): @@ -1153,18 +1160,14 @@ def __call__( if callback is not None and i % callback_steps == 0: callback(i, t, latents) + # 9. Post processing if output_type == "latent": - return AnimateDiffControlNetPipelineOutput(frames=latents) - - # Post-processing - video_tensor = self.decode_latents(latents) - - if output_type == "pt": - video = video_tensor + video = latents else: + video_tensor = self.decode_latents(latents) video = tensor2vid(video_tensor, self.image_processor, output_type=output_type) - # Offload all models + # 10. Offload all models self.maybe_free_model_hooks() if not return_dict: diff --git a/examples/community/pipeline_animatediff_img2video.py b/examples/community/pipeline_animatediff_img2video.py index e77e26592d3e..9b2d1a0382ee 100644 --- a/examples/community/pipeline_animatediff_img2video.py +++ b/examples/community/pipeline_animatediff_img2video.py @@ -158,10 +158,8 @@ def slerp( return v2 +# Copied from diffusers.pipelines.animatediff.pipeline_animatediff.tensor2vid def tensor2vid(video: torch.Tensor, processor, output_type="np"): - # Based on: - # https://github.com/modelscope/modelscope/blob/1509fdb973e5871f37148a4b5e5964cafd43e64d/modelscope/pipelines/multi_modal/text_to_video_synthesis_pipeline.py#L78 - batch_size, channels, num_frames, height, width = video.shape outputs = [] for batch_idx in range(batch_size): @@ -170,6 +168,15 @@ def tensor2vid(video: torch.Tensor, processor, output_type="np"): outputs.append(batch_output) + if output_type == "np": + outputs = np.stack(outputs) + + elif output_type == "pt": + outputs = torch.stack(outputs) + + elif not output_type == "pil": + raise ValueError(f"{output_type} does not exist. Please choose one of ['np', 'pt', 'pil]") + return outputs @@ -1015,11 +1022,10 @@ def __call__( return AnimateDiffPipelineOutput(frames=latents) # 10. Post-processing - video_tensor = self.decode_latents(latents) - - if output_type == "pt": - video = video_tensor + if output_type == "latent": + video = latents else: + video_tensor = self.decode_latents(latents) video = tensor2vid(video_tensor, self.image_processor, output_type=output_type) # 11. Offload all models From f9383e43848290d60f1daa22798225d4c3e48f57 Mon Sep 17 00:00:00 2001 From: a-r-r-o-w Date: Tue, 27 Feb 2024 07:49:25 +0530 Subject: [PATCH 6/8] fix return docstring --- .../community/pipeline_animatediff_controlnet.py | 15 +++++---------- 1 file changed, 5 insertions(+), 10 deletions(-) diff --git a/examples/community/pipeline_animatediff_controlnet.py b/examples/community/pipeline_animatediff_controlnet.py index 833b73f665ae..1e2e2ed6bcd6 100644 --- a/examples/community/pipeline_animatediff_controlnet.py +++ b/examples/community/pipeline_animatediff_controlnet.py @@ -13,7 +13,6 @@ # limitations under the License. import inspect -from dataclasses import dataclass from typing import Any, Callable, Dict, List, Optional, Tuple, Union import numpy as np @@ -27,6 +26,7 @@ from diffusers.models import AutoencoderKL, ControlNetModel, ImageProjection, UNet2DConditionModel, UNetMotionModel from diffusers.models.lora import adjust_lora_scale_text_encoder from diffusers.models.unets.unet_motion_model import MotionAdapter +from diffusers.pipelines.animatediff.pipeline_output import AnimateDiffPipelineOutput from diffusers.pipelines.controlnet.multicontrolnet import MultiControlNetModel from diffusers.pipelines.pipeline_utils import DiffusionPipeline from diffusers.schedulers import ( @@ -37,7 +37,7 @@ LMSDiscreteScheduler, PNDMScheduler, ) -from diffusers.utils import USE_PEFT_BACKEND, BaseOutput, deprecate, logging, scale_lora_layers, unscale_lora_layers +from diffusers.utils import USE_PEFT_BACKEND, deprecate, logging, scale_lora_layers, unscale_lora_layers from diffusers.utils.torch_utils import is_compiled_module, randn_tensor @@ -113,11 +113,6 @@ def tensor2vid(video: torch.Tensor, processor, output_type="np"): return outputs -@dataclass -class AnimateDiffControlNetPipelineOutput(BaseOutput): - frames: Union[torch.Tensor, np.ndarray] - - class AnimateDiffControlNetPipeline(DiffusionPipeline, TextualInversionLoaderMixin, IPAdapterMixin, LoraLoaderMixin): r""" Pipeline for text-to-video generation. @@ -907,8 +902,8 @@ def __call__( Examples: Returns: - [`~pipelines.text_to_video_synthesis.TextToVideoSDPipelineOutput`] or `tuple`: - If `return_dict` is `True`, [`~pipelines.text_to_video_synthesis.TextToVideoSDPipelineOutput`] is + [`~pipelines.animatediff.pipeline_output.AnimateDiffPipelineOutput`] or `tuple`: + If `return_dict` is `True`, [`~pipelines.animatediff.pipeline_output.AnimateDiffPipelineOutput`] is returned, otherwise a `tuple` is returned where the first element is a list with the generated frames. """ @@ -1173,4 +1168,4 @@ def __call__( if not return_dict: return (video,) - return AnimateDiffControlNetPipelineOutput(frames=video) + return AnimateDiffPipelineOutput(frames=video) From c03270d65ebffd8c3934fe0b0e02a40eb131d4a6 Mon Sep 17 00:00:00 2001 From: a-r-r-o-w Date: Tue, 27 Feb 2024 07:53:01 +0530 Subject: [PATCH 7/8] fix return docs --- examples/community/pipeline_animatediff_img2video.py | 4 ++-- src/diffusers/pipelines/animatediff/pipeline_animatediff.py | 4 ++-- .../pipelines/animatediff/pipeline_animatediff_video2video.py | 4 ++-- src/diffusers/pipelines/pia/pipeline_pia.py | 4 ++-- 4 files changed, 8 insertions(+), 8 deletions(-) diff --git a/examples/community/pipeline_animatediff_img2video.py b/examples/community/pipeline_animatediff_img2video.py index 9b2d1a0382ee..27b9e96ff2ef 100644 --- a/examples/community/pipeline_animatediff_img2video.py +++ b/examples/community/pipeline_animatediff_img2video.py @@ -890,8 +890,8 @@ def __call__( Examples: Returns: - [`AnimateDiffPipelineOutput`] or `tuple`: - If `return_dict` is `True`, [`AnimateDiffPipelineOutput`] is + [`~pipelines.animatediff.pipeline_output.AnimateDiffPipelineOutput`] or `tuple`: + If `return_dict` is `True`, [`~pipelines.animatediff.pipeline_output.AnimateDiffPipelineOutput`] is returned, otherwise a `tuple` is returned where the first element is a list with the generated frames. """ # 0. Default height and width to unet diff --git a/src/diffusers/pipelines/animatediff/pipeline_animatediff.py b/src/diffusers/pipelines/animatediff/pipeline_animatediff.py index ab22ff85d09a..d7347342083a 100644 --- a/src/diffusers/pipelines/animatediff/pipeline_animatediff.py +++ b/src/diffusers/pipelines/animatediff/pipeline_animatediff.py @@ -694,8 +694,8 @@ def __call__( Examples: Returns: - [`~pipelines.text_to_video_synthesis.TextToVideoSDPipelineOutput`] or `tuple`: - If `return_dict` is `True`, [`~pipelines.text_to_video_synthesis.TextToVideoSDPipelineOutput`] is + [`~pipelines.animatediff.pipeline_output.AnimateDiffPipelineOutput`] or `tuple`: + If `return_dict` is `True`, [`~pipelines.animatediff.pipeline_output.AnimateDiffPipelineOutput`] is returned, otherwise a `tuple` is returned where the first element is a list with the generated frames. """ diff --git a/src/diffusers/pipelines/animatediff/pipeline_animatediff_video2video.py b/src/diffusers/pipelines/animatediff/pipeline_animatediff_video2video.py index e9434aecdf2b..679ef9d62f0d 100644 --- a/src/diffusers/pipelines/animatediff/pipeline_animatediff_video2video.py +++ b/src/diffusers/pipelines/animatediff/pipeline_animatediff_video2video.py @@ -848,8 +848,8 @@ def __call__( Examples: Returns: - [`AnimateDiffPipelineOutput`] or `tuple`: - If `return_dict` is `True`, [`AnimateDiffPipelineOutput`] is + [`pipelines.animatediff.pipeline_output.AnimateDiffPipelineOutput`] or `tuple`: + If `return_dict` is `True`, [`pipelines.animatediff.pipeline_output.AnimateDiffPipelineOutput`] is returned, otherwise a `tuple` is returned where the first element is a list with the generated frames. """ diff --git a/src/diffusers/pipelines/pia/pipeline_pia.py b/src/diffusers/pipelines/pia/pipeline_pia.py index 40834e701775..04d7eea22e42 100644 --- a/src/diffusers/pipelines/pia/pipeline_pia.py +++ b/src/diffusers/pipelines/pia/pipeline_pia.py @@ -886,8 +886,8 @@ def __call__( Examples: Returns: - [`~pipelines.text_to_video_synthesis.TextToVideoSDPipelineOutput`] or `tuple`: - If `return_dict` is `True`, [`~pipelines.text_to_video_synthesis.TextToVideoSDPipelineOutput`] is + [`~pipelines.pia.pipeline_pia.PIAPipelineOutput`] or `tuple`: + If `return_dict` is `True`, [`~pipelines.pia.pipeline_pia.PIAPipelineOutput`] is returned, otherwise a `tuple` is returned where the first element is a list with the generated frames. """ # 0. Default height and width to unet From 608d9ebfd7217bfa72d806aa11afcb54e6366b28 Mon Sep 17 00:00:00 2001 From: a-r-r-o-w Date: Tue, 27 Feb 2024 07:56:44 +0530 Subject: [PATCH 8/8] add missing quote --- examples/community/pipeline_animatediff_controlnet.py | 2 +- examples/community/pipeline_animatediff_img2video.py | 2 +- src/diffusers/pipelines/animatediff/pipeline_animatediff.py | 2 +- .../pipelines/animatediff/pipeline_animatediff_video2video.py | 2 +- src/diffusers/pipelines/i2vgen_xl/pipeline_i2vgen_xl.py | 2 +- src/diffusers/pipelines/pia/pipeline_pia.py | 2 +- .../stable_video_diffusion/pipeline_stable_video_diffusion.py | 2 +- .../text_to_video_synthesis/pipeline_text_to_video_synth.py | 2 +- .../pipeline_text_to_video_synth_img2img.py | 2 +- 9 files changed, 9 insertions(+), 9 deletions(-) diff --git a/examples/community/pipeline_animatediff_controlnet.py b/examples/community/pipeline_animatediff_controlnet.py index 1e2e2ed6bcd6..23c38116b2f7 100644 --- a/examples/community/pipeline_animatediff_controlnet.py +++ b/examples/community/pipeline_animatediff_controlnet.py @@ -108,7 +108,7 @@ def tensor2vid(video: torch.Tensor, processor, output_type="np"): outputs = torch.stack(outputs) elif not output_type == "pil": - raise ValueError(f"{output_type} does not exist. Please choose one of ['np', 'pt', 'pil]") + raise ValueError(f"{output_type} does not exist. Please choose one of ['np', 'pt', 'pil']") return outputs diff --git a/examples/community/pipeline_animatediff_img2video.py b/examples/community/pipeline_animatediff_img2video.py index 27b9e96ff2ef..92b60ab0fbb5 100644 --- a/examples/community/pipeline_animatediff_img2video.py +++ b/examples/community/pipeline_animatediff_img2video.py @@ -175,7 +175,7 @@ def tensor2vid(video: torch.Tensor, processor, output_type="np"): outputs = torch.stack(outputs) elif not output_type == "pil": - raise ValueError(f"{output_type} does not exist. Please choose one of ['np', 'pt', 'pil]") + raise ValueError(f"{output_type} does not exist. Please choose one of ['np', 'pt', 'pil']") return outputs diff --git a/src/diffusers/pipelines/animatediff/pipeline_animatediff.py b/src/diffusers/pipelines/animatediff/pipeline_animatediff.py index d7347342083a..f4e22ad71958 100644 --- a/src/diffusers/pipelines/animatediff/pipeline_animatediff.py +++ b/src/diffusers/pipelines/animatediff/pipeline_animatediff.py @@ -81,7 +81,7 @@ def tensor2vid(video: torch.Tensor, processor: "VaeImageProcessor", output_type: outputs = torch.stack(outputs) elif not output_type == "pil": - raise ValueError(f"{output_type} does not exist. Please choose one of ['np', 'pt', 'pil]") + raise ValueError(f"{output_type} does not exist. Please choose one of ['np', 'pt', 'pil']") return outputs diff --git a/src/diffusers/pipelines/animatediff/pipeline_animatediff_video2video.py b/src/diffusers/pipelines/animatediff/pipeline_animatediff_video2video.py index 679ef9d62f0d..cc05eb37f0bf 100644 --- a/src/diffusers/pipelines/animatediff/pipeline_animatediff_video2video.py +++ b/src/diffusers/pipelines/animatediff/pipeline_animatediff_video2video.py @@ -100,7 +100,7 @@ def tensor2vid(video: torch.Tensor, processor, output_type="np"): outputs = torch.stack(outputs) elif not output_type == "pil": - raise ValueError(f"{output_type} does not exist. Please choose one of ['np', 'pt', 'pil]") + raise ValueError(f"{output_type} does not exist. Please choose one of ['np', 'pt', 'pil']") return outputs diff --git a/src/diffusers/pipelines/i2vgen_xl/pipeline_i2vgen_xl.py b/src/diffusers/pipelines/i2vgen_xl/pipeline_i2vgen_xl.py index 0d11f8215a12..bc362d47467f 100644 --- a/src/diffusers/pipelines/i2vgen_xl/pipeline_i2vgen_xl.py +++ b/src/diffusers/pipelines/i2vgen_xl/pipeline_i2vgen_xl.py @@ -83,7 +83,7 @@ def tensor2vid(video: torch.Tensor, processor: "VaeImageProcessor", output_type: outputs = torch.stack(outputs) elif not output_type == "pil": - raise ValueError(f"{output_type} does not exist. Please choose one of ['np', 'pt', 'pil]") + raise ValueError(f"{output_type} does not exist. Please choose one of ['np', 'pt', 'pil']") return outputs diff --git a/src/diffusers/pipelines/pia/pipeline_pia.py b/src/diffusers/pipelines/pia/pipeline_pia.py index 04d7eea22e42..d0b6ee2e037e 100644 --- a/src/diffusers/pipelines/pia/pipeline_pia.py +++ b/src/diffusers/pipelines/pia/pipeline_pia.py @@ -107,7 +107,7 @@ def tensor2vid(video: torch.Tensor, processor: "VaeImageProcessor", output_type: outputs = torch.stack(outputs) elif not output_type == "pil": - raise ValueError(f"{output_type} does not exist. Please choose one of ['np', 'pt', 'pil]") + raise ValueError(f"{output_type} does not exist. Please choose one of ['np', 'pt', 'pil']") return outputs diff --git a/src/diffusers/pipelines/stable_video_diffusion/pipeline_stable_video_diffusion.py b/src/diffusers/pipelines/stable_video_diffusion/pipeline_stable_video_diffusion.py index f53ebbafee2e..5cc4024a4acc 100644 --- a/src/diffusers/pipelines/stable_video_diffusion/pipeline_stable_video_diffusion.py +++ b/src/diffusers/pipelines/stable_video_diffusion/pipeline_stable_video_diffusion.py @@ -57,7 +57,7 @@ def tensor2vid(video: torch.Tensor, processor: "VaeImageProcessor", output_type: outputs = torch.stack(outputs) elif not output_type == "pil": - raise ValueError(f"{output_type} does not exist. Please choose one of ['np', 'pt', 'pil]") + raise ValueError(f"{output_type} does not exist. Please choose one of ['np', 'pt', 'pil']") return outputs diff --git a/src/diffusers/pipelines/text_to_video_synthesis/pipeline_text_to_video_synth.py b/src/diffusers/pipelines/text_to_video_synthesis/pipeline_text_to_video_synth.py index 827f5ef5fe1b..6546d985580f 100644 --- a/src/diffusers/pipelines/text_to_video_synthesis/pipeline_text_to_video_synth.py +++ b/src/diffusers/pipelines/text_to_video_synthesis/pipeline_text_to_video_synth.py @@ -76,7 +76,7 @@ def tensor2vid(video: torch.Tensor, processor: "VaeImageProcessor", output_type: outputs = torch.stack(outputs) elif not output_type == "pil": - raise ValueError(f"{output_type} does not exist. Please choose one of ['np', 'pt', 'pil]") + raise ValueError(f"{output_type} does not exist. Please choose one of ['np', 'pt', 'pil']") return outputs diff --git a/src/diffusers/pipelines/text_to_video_synthesis/pipeline_text_to_video_synth_img2img.py b/src/diffusers/pipelines/text_to_video_synthesis/pipeline_text_to_video_synth_img2img.py index f2bc1e9fdfe3..531f90539aef 100644 --- a/src/diffusers/pipelines/text_to_video_synthesis/pipeline_text_to_video_synth_img2img.py +++ b/src/diffusers/pipelines/text_to_video_synthesis/pipeline_text_to_video_synth_img2img.py @@ -111,7 +111,7 @@ def tensor2vid(video: torch.Tensor, processor: "VaeImageProcessor", output_type: outputs = torch.stack(outputs) elif not output_type == "pil": - raise ValueError(f"{output_type} does not exist. Please choose one of ['np', 'pt', 'pil]") + raise ValueError(f"{output_type} does not exist. Please choose one of ['np', 'pt', 'pil']") return outputs