From bef201147f377e435c454999230fc68c2b719bf0 Mon Sep 17 00:00:00 2001 From: Mukunda Katta Date: Sat, 25 Apr 2026 11:12:25 -0700 Subject: [PATCH] SUPIR: add scaffold pipeline + design doc (refs #7219) Adds the public API surface for an upcoming SUPIR (Scaling Up to Excellence) image restoration pipeline so docs, typing, and tests can land incrementally while the heavy porting work proceeds. This is a SCAFFOLD ONLY: - new src/diffusers/pipelines/supir/ with SUPIRPipeline class wiring vae / text_encoder(_2) / tokenizer(_2) / unet / controlnet / scheduler components and the documented __call__ argument surface; - all heavy paths (encode_prompt, prepare_low_quality_latents, restoration_guided_step, __call__) raise NotImplementedError and point at SUPIR_DESIGN.md; - gated behind is_torch_available + is_transformers_available with a matching dummy class in dummy_torch_and_transformers_objects.py; - exposed from the top-level diffusers namespace and the pipelines subpackage following the existing _LazyModule pattern; - tests/pipelines/supir/test_supir.py pins the public __call__ and __init__ signatures and xfails the (not-yet-existing) end-to-end run. SUPIR_DESIGN.md at the repo root captures the porting plan from Fanghua-Yu/SUPIR: degradation-robust encoder, trimmed ControlNet adaptor with ZeroSFT connector, SDXL UNet generative prior, restoration-guided sampler, optional LLaVA caption guidance, and the weight-conversion path. No working pipeline yet; calling SUPIRPipeline raises NotImplementedError. --- SUPIR_DESIGN.md | 131 +++++++ src/diffusers/__init__.py | 2 + src/diffusers/pipelines/__init__.py | 2 + src/diffusers/pipelines/supir/__init__.py | 45 +++ .../pipelines/supir/pipeline_supir.py | 348 ++++++++++++++++++ .../dummy_torch_and_transformers_objects.py | 15 + tests/pipelines/supir/__init__.py | 0 tests/pipelines/supir/test_supir.py | 145 ++++++++ 8 files changed, 688 insertions(+) create mode 100644 SUPIR_DESIGN.md create mode 100644 src/diffusers/pipelines/supir/__init__.py create mode 100644 src/diffusers/pipelines/supir/pipeline_supir.py create mode 100644 tests/pipelines/supir/__init__.py create mode 100644 tests/pipelines/supir/test_supir.py diff --git a/SUPIR_DESIGN.md b/SUPIR_DESIGN.md new file mode 100644 index 000000000000..cc6e4ace279a --- /dev/null +++ b/SUPIR_DESIGN.md @@ -0,0 +1,131 @@ +# SUPIR Pipeline Design Document + +Status: scaffold landed, full implementation pending. Tracks +[huggingface/diffusers#7219](https://github.com/huggingface/diffusers/issues/7219). + +## What is SUPIR + +SUPIR (Scaling Up to Excellence) is an image restoration / super-resolution +system from Yu et al. that combines a large generative prior (SDXL), a +degradation-robust encoder, a trimmed ControlNet-style adaptor with a +ZeroSFT connector, and a restoration-guided sampler. Optional caption +guidance is sourced from an external multi-modal LLM (LLaVA in the +reference implementation). + +Reference materials: + +- Paper: https://arxiv.org/abs/2401.13627 +- Project page: https://supir.xpixel.group/ +- Reference repo: https://github.com/Fanghua-Yu/SUPIR +- Pretrained weights / model card: https://huggingface.co/camenduru/SUPIR + +## Why a scaffold first + +The full SUPIR pipeline is large. It introduces new modules +(`GLVControl`, `LightGLVUNet`, `ZeroSFT`), a fine-tuned VAE encoder, a +custom EDM-style sampler, and a non-trivial weight-conversion path from +the upstream `.ckpt` files into diffusers `from_pretrained` layout. +Landing the public API surface first lets us: + +- expose `SUPIRPipeline` in diffusers' import structure (and dummy + fallbacks), so docs / typing / downstream packaging can refer to it; +- add gating tests that lock the `__call__` argument shape; +- decouple module porting work from API churn; +- give contributors clear seams to fill in (the helper stubs in + `pipeline_supir.py` map 1:1 to the planned modules below). + +The scaffold raises `NotImplementedError` for any path that requires the +real model. It is not a working pipeline. + +## Planned components and porting plan + +The reference repo lives in +[Fanghua-Yu/SUPIR/SUPIR/modules](https://github.com/Fanghua-Yu/SUPIR/tree/master/SUPIR). +Mapping each piece into diffusers: + +### Stage 1 - degradation-robust encoder + +- Source: fine-tuned SDXL VAE encoder. Reference repo loads it from + `SUPIR/SUPIR_v0_Q_F.ckpt` together with the standard SDXL VAE decoder. +- Diffusers home: reuse `AutoencoderKL`. Add an optional + `from_single_file` weight-conversion script under + `scripts/convert_supir_to_diffusers.py` that splits the SUPIR + checkpoint into the standard `vae` / `unet` / `controlnet` subfolders. +- Pipeline integration: implemented in + `SUPIRPipeline.prepare_low_quality_latents`. + +### Stage 2 - trimmed ControlNet adaptor with ZeroSFT + +- Source: `GLVControl` and `ZeroSFT` in + [`SUPIR_v0.py`](https://github.com/Fanghua-Yu/SUPIR/blob/master/SUPIR/modules/SUPIR_v0.py). +- Two options: + 1. introduce a new `SUPIRControlNetModel` under + `src/diffusers/models/controlnets/supir.py` that subclasses + `ControlNetModel`, replaces the encoder block ViT layers with the + trimmed variant, and swaps the residual injection for ZeroSFT; + 2. keep `ControlNetModel` and add a `ZeroSFTBlock` mixin applied at + load time. Option 1 is preferred for clarity. +- Pipeline integration: registered under `controlnet=` in `__init__`. + The scaffold currently types this as `ControlNetModel` to keep the + public surface stable; this will be widened to a union with the new + type once option 1 lands. + +### Stage 3 - generative prior (SDXL UNet) + +- No model changes. Reuse `UNet2DConditionModel` and the dual-encoder + text path from `StableDiffusionXLPipeline`. The scaffold's + `encode_prompt` stub will be filled in by porting that method. + +### Stage 4 - restoration-guided sampler + +- Source: SUPIR's modified EDM sampler (paper section 3.4) - LQ-anchored + guidance plus EDM stochasticity (`s_churn`, `s_noise`). +- Diffusers home: a new `restoration_guided_step` helper on the + pipeline (already stubbed). The base scheduler stays a + `KarrasDiffusionSchedulers` instance; SUPIR wraps each step rather + than replacing the scheduler. + +### Stage 5 - optional caption guidance + +- The reference repo invokes LLaVA out-of-process to caption the LQ + image and feed the result into the SDXL prompt path. +- Diffusers home: keep this **out** of `SUPIRPipeline` itself. Provide + a small helper in `examples/community/supir_llava_caption.py` so + users opt in. The pipeline only consumes the resulting `prompt`. + +## Weight conversion + +- `SUPIR/SUPIR_v0_Q_F.ckpt` and `SUPIR_v0_Q.ckpt` (Stage I and Stage II + checkpoints) need to be mapped onto the diffusers folder layout. +- Plan: add `scripts/convert_supir_to_diffusers.py` that: + 1. loads the upstream checkpoint with `safetensors`; + 2. emits the SDXL VAE/UNet shards untouched (they match the public + SDXL release); + 3. converts the SUPIR-specific ControlNet+ZeroSFT weights into the + keys expected by `SUPIRControlNetModel`; + 4. writes a `model_index.json` referencing the SUPIR pipeline class. +- Out of scope for the scaffold PR. + +## Testing strategy + +- Scaffold PR: a single `tests/pipelines/supir/test_supir.py` module + that pins the `__call__` argument shape (so future implementation + PRs cannot silently break the public API) and `xfail`s the actual + inference path until it lands. +- Implementation PR(s): add the standard pipeline test matrix + (`PipelineTesterMixin`), a slow integration test against the + upstream weights, and a tiled-inference test for >= 1024 px inputs. + +## Open questions + +- Does diffusers want the trimmed-ControlNet variant living under + `models/controlnets/` (alongside `ControlNetModel`) or namespaced to + `models/supir/`? +- Should the LLaVA caption path live in `examples/community/` (current + plan) or as a `from_pretrained` hook on the pipeline? +- The reference repo conditions on negative prompts plus an + EDM-derived "clean prompt" - we need to decide whether to expose the + clean prompt as a separate `__call__` argument or fold it into + `prompt_2`. + +These are tracked on the GitHub issue. diff --git a/src/diffusers/__init__.py b/src/diffusers/__init__.py index 2cbfd6e29305..00627d07177c 100644 --- a/src/diffusers/__init__.py +++ b/src/diffusers/__init__.py @@ -724,6 +724,7 @@ "StableUnCLIPImg2ImgPipeline", "StableUnCLIPPipeline", "StableVideoDiffusionPipeline", + "SUPIRPipeline", "TextToVideoSDPipeline", "TextToVideoZeroPipeline", "TextToVideoZeroSDXLPipeline", @@ -1507,6 +1508,7 @@ StableUnCLIPImg2ImgPipeline, StableUnCLIPPipeline, StableVideoDiffusionPipeline, + SUPIRPipeline, TextToVideoSDPipeline, TextToVideoZeroPipeline, TextToVideoZeroSDXLPipeline, diff --git a/src/diffusers/pipelines/__init__.py b/src/diffusers/pipelines/__init__.py index ae1849a587e8..c0dfca903c44 100644 --- a/src/diffusers/pipelines/__init__.py +++ b/src/diffusers/pipelines/__init__.py @@ -395,6 +395,7 @@ "StableDiffusionXLPipeline", ] ) + _import_structure["supir"] = ["SUPIRPipeline"] _import_structure["t2i_adapter"] = [ "StableDiffusionAdapterPipeline", "StableDiffusionXLAdapterPipeline", @@ -852,6 +853,7 @@ StableDiffusionXLPipeline, ) from .stable_video_diffusion import StableVideoDiffusionPipeline + from .supir import SUPIRPipeline from .t2i_adapter import ( StableDiffusionAdapterPipeline, StableDiffusionXLAdapterPipeline, diff --git a/src/diffusers/pipelines/supir/__init__.py b/src/diffusers/pipelines/supir/__init__.py new file mode 100644 index 000000000000..a90ca85def10 --- /dev/null +++ b/src/diffusers/pipelines/supir/__init__.py @@ -0,0 +1,45 @@ +from typing import TYPE_CHECKING + +from ...utils import ( + DIFFUSERS_SLOW_IMPORT, + OptionalDependencyNotAvailable, + _LazyModule, + get_objects_from_module, + is_torch_available, + is_transformers_available, +) + + +_dummy_objects = {} +_import_structure = {} + +try: + if not (is_transformers_available() and is_torch_available()): + raise OptionalDependencyNotAvailable() +except OptionalDependencyNotAvailable: + from ...utils import dummy_torch_and_transformers_objects # noqa F403 + + _dummy_objects.update(get_objects_from_module(dummy_torch_and_transformers_objects)) +else: + _import_structure["pipeline_supir"] = ["SUPIRPipeline"] + + +if TYPE_CHECKING or DIFFUSERS_SLOW_IMPORT: + try: + if not (is_transformers_available() and is_torch_available()): + raise OptionalDependencyNotAvailable() + except OptionalDependencyNotAvailable: + from ...utils.dummy_torch_and_transformers_objects import * # noqa F403 + else: + from .pipeline_supir import SUPIRPipeline +else: + import sys + + sys.modules[__name__] = _LazyModule( + __name__, + globals()["__file__"], + _import_structure, + module_spec=__spec__, + ) + for name, value in _dummy_objects.items(): + setattr(sys.modules[__name__], name, value) diff --git a/src/diffusers/pipelines/supir/pipeline_supir.py b/src/diffusers/pipelines/supir/pipeline_supir.py new file mode 100644 index 000000000000..58985aaa040f --- /dev/null +++ b/src/diffusers/pipelines/supir/pipeline_supir.py @@ -0,0 +1,348 @@ +# Copyright 2026 Fanghua-Yu and The HuggingFace Team. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +""" +SUPIR (Scaling Up to Excellence) image restoration / upscaler pipeline scaffold. + +This is a SCAFFOLD ONLY. The full pipeline implementation is intentionally +deferred. The class signature, constructor wiring, and `__call__` argument +surface are exposed so downstream packaging, tests, and documentation can +land incrementally. All heavy-lifting paths raise NotImplementedError. + +See the repo-root SUPIR_DESIGN.md for the porting plan from +https://github.com/Fanghua-Yu/SUPIR and the planned diffusers component +layout (degradation-robust encoder, trimmed ControlNet adaptor with +ZeroSFT connector, SDXL UNet generative prior, restoration-guided +sampler, optional LLaVA caption guidance). +""" + +from dataclasses import dataclass +from typing import Any, Callable + +import numpy as np +import PIL.Image +import torch +from transformers import ( + CLIPTextModel, + CLIPTextModelWithProjection, + CLIPTokenizer, +) + +from ...image_processor import PipelineImageInput, VaeImageProcessor +from ...models import AutoencoderKL, ControlNetModel, UNet2DConditionModel +from ...schedulers import KarrasDiffusionSchedulers +from ...utils import BaseOutput, logging +from ..pipeline_utils import DiffusionPipeline, StableDiffusionMixin + + +logger = logging.get_logger(__name__) # pylint: disable=invalid-name + + +EXAMPLE_DOC_STRING = """ + Examples: + ```py + >>> # NOTE: scaffold only. Calling SUPIRPipeline today raises NotImplementedError. + >>> # Once the implementation lands the canonical usage will look like: + >>> import torch + >>> from diffusers import SUPIRPipeline + >>> from diffusers.utils import load_image + >>> + >>> pipe = SUPIRPipeline.from_pretrained( + ... "Fanghua-Yu/SUPIR", + ... torch_dtype=torch.float16, + ... ).to("cuda") + >>> + >>> low_quality = load_image("https://example.com/lq.png") + >>> result = pipe( + ... prompt="a high quality photo, sharp details", + ... image=low_quality, + ... num_inference_steps=50, + ... upscale=2, + ... ).images[0] + ``` +""" + + +@dataclass +class SUPIRPipelineOutput(BaseOutput): + """ + Output class for SUPIR pipeline runs. + + Args: + images (`list[PIL.Image.Image]` or `np.ndarray`): + Restored / upscaled images, returned as a list of PIL images of + length `batch_size` or as a numpy array of shape + `(batch_size, height, width, num_channels)`. + """ + + images: list[PIL.Image.Image] | np.ndarray + + +class SUPIRPipeline(DiffusionPipeline, StableDiffusionMixin): + """ + Pipeline for image restoration and super-resolution with SUPIR. + + SUPIR (Scaling Up to Excellence) restores and upscales degraded images by + combining an SDXL generative prior, a degradation-robust encoder, and a + trimmed ControlNet-style adaptor with ZeroSFT connectors. See the + upstream paper at https://arxiv.org/abs/2401.13627 and + https://github.com/Fanghua-Yu/SUPIR for the reference implementation. + + NOTE: this class is currently a SCAFFOLD. The constructor wires the + expected components and `__call__` exposes the documented argument + surface, but the actual restoration logic is not yet implemented and + will raise `NotImplementedError`. The intent is to land the public API + shape first so tests, docs, and downstream packaging can stabilise + while the porting work in `SUPIR_DESIGN.md` proceeds. + + This model inherits from [`DiffusionPipeline`]. Check the superclass + documentation for the generic methods implemented for all pipelines + (downloading, saving, running on a particular device, etc.). + + Args: + vae ([`AutoencoderKL`]): + VAE used to encode/decode images to/from latent space. SUPIR uses + an SDXL VAE plus a fine-tuned degradation-robust encoder; for the + scaffold only the standard VAE is wired here. + text_encoder ([`CLIPTextModel`]): + Frozen text encoder (SDXL primary). SUPIR can optionally consume + captions produced by an external LLaVA model. + text_encoder_2 ([`CLIPTextModelWithProjection`]): + Second frozen text encoder used by SDXL. + tokenizer ([`CLIPTokenizer`]): + Tokenizer for the primary text encoder. + tokenizer_2 ([`CLIPTokenizer`]): + Tokenizer for the secondary text encoder. + unet ([`UNet2DConditionModel`]): + SDXL UNet used as the generative prior. + controlnet ([`ControlNetModel`]): + Trimmed ControlNet-style adaptor that injects degraded-image + features into the UNet via ZeroSFT connectors. The reference + SUPIR repo ships dedicated `GLVControl` / `LightGLVUNet` modules; + once ported they will replace the standard `ControlNetModel`. + scheduler ([`KarrasDiffusionSchedulers`]): + Sampler used during denoising. SUPIR layers a restoration-guided + sampling step on top of a Karras-style scheduler. + """ + + model_cpu_offload_seq = "text_encoder->text_encoder_2->unet->vae" + _optional_components = ["controlnet"] + _callback_tensor_inputs = ["latents", "prompt_embeds", "negative_prompt_embeds"] + + def __init__( + self, + vae: AutoencoderKL, + text_encoder: CLIPTextModel, + text_encoder_2: CLIPTextModelWithProjection, + tokenizer: CLIPTokenizer, + tokenizer_2: CLIPTokenizer, + unet: UNet2DConditionModel, + controlnet: ControlNetModel, + scheduler: KarrasDiffusionSchedulers, + ): + super().__init__() + + self.register_modules( + vae=vae, + text_encoder=text_encoder, + text_encoder_2=text_encoder_2, + tokenizer=tokenizer, + tokenizer_2=tokenizer_2, + unet=unet, + controlnet=controlnet, + scheduler=scheduler, + ) + # VAE downscale factor mirrors the SDXL conventions; matches the + # reference repo where degraded inputs are encoded in pixel space + # then driven through the same latent topology as SDXL. + self.vae_scale_factor = 2 ** (len(self.vae.config.block_out_channels) - 1) if vae is not None else 8 + self.image_processor = VaeImageProcessor(vae_scale_factor=self.vae_scale_factor) + + # ------------------------------------------------------------------ + # Helper hooks intentionally left as TODOs. Once implemented they will + # mirror the structure used by `StableDiffusionXLControlNetPipeline`. + # ------------------------------------------------------------------ + + def encode_prompt( + self, + prompt: str | list[str] | None = None, + prompt_2: str | list[str] | None = None, + device: torch.device | None = None, + num_images_per_prompt: int = 1, + do_classifier_free_guidance: bool = True, + negative_prompt: str | list[str] | None = None, + negative_prompt_2: str | list[str] | None = None, + ): + """Encode text prompts using the SDXL dual-encoder stack. + + TODO: port from `StableDiffusionXLPipeline.encode_prompt`. Kept as a + stub so the scaffold remains import-clean and so callers can locate + the eventual extension point (e.g. for LLaVA-derived captions). + """ + raise NotImplementedError( + "SUPIRPipeline.encode_prompt is part of the scaffold and is not implemented yet. " + "See SUPIR_DESIGN.md for the porting plan." + ) + + def prepare_low_quality_latents( + self, + image: PipelineImageInput, + height: int, + width: int, + dtype: torch.dtype, + device: torch.device, + generator: torch.Generator | list[torch.Generator] | None = None, + ) -> torch.Tensor: + """Encode the degraded input image through the degradation-robust + encoder (a fine-tuned SDXL VAE encoder in the reference repo) into + the latent space used by the UNet. + + TODO: implement degradation-aware encoding and tiled inference for + large inputs. Tracked in SUPIR_DESIGN.md > Stage 1. + """ + raise NotImplementedError( + "SUPIRPipeline.prepare_low_quality_latents is part of the scaffold " + "and is not implemented yet." + ) + + def restoration_guided_step( + self, + latents: torch.Tensor, + timestep: torch.Tensor, + lq_latents: torch.Tensor, + guidance_scale: float, + s_churn: float = 0.0, + s_noise: float = 1.003, + ) -> torch.Tensor: + """One step of SUPIR's restoration-guided EDM-style sampler. + + TODO: implement the modified denoising step (LQ-anchored guidance, + EDM noise injection) described in the paper, section 3.4. + """ + raise NotImplementedError( + "SUPIRPipeline.restoration_guided_step is part of the scaffold and " + "is not implemented yet." + ) + + @torch.no_grad() + def __call__( + self, + prompt: str | list[str] | None = None, + prompt_2: str | list[str] | None = None, + image: PipelineImageInput = None, + height: int | None = None, + width: int | None = None, + upscale: int = 1, + num_inference_steps: int = 50, + timesteps: list[int] | None = None, + denoising_end: float | None = None, + guidance_scale: float = 7.5, + negative_prompt: str | list[str] | None = None, + negative_prompt_2: str | list[str] | None = None, + num_images_per_prompt: int | None = 1, + eta: float = 0.0, + generator: torch.Generator | list[torch.Generator] | None = None, + latents: torch.Tensor | None = None, + prompt_embeds: torch.Tensor | None = None, + negative_prompt_embeds: torch.Tensor | None = None, + pooled_prompt_embeds: torch.Tensor | None = None, + negative_pooled_prompt_embeds: torch.Tensor | None = None, + output_type: str | None = "pil", + return_dict: bool = True, + cross_attention_kwargs: dict[str, Any] | None = None, + controlnet_conditioning_scale: float = 1.0, + s_churn: float = 0.0, + s_noise: float = 1.003, + callback_on_step_end: Callable[[Any, int, int, dict[str, Any]], dict[str, Any]] | None = None, + callback_on_step_end_tensor_inputs: list[str] | None = None, + ) -> SUPIRPipelineOutput | tuple[list[PIL.Image.Image] | np.ndarray]: + """Run SUPIR restoration / upscaling. + + Args: + prompt (`str` or `list[str]`, optional): + Text prompt describing the desired restoration. May be + generated automatically from an LLaVA caption in a future + revision; today it is the only text-conditioning surface. + prompt_2 (`str` or `list[str]`, optional): + Prompt for the second SDXL text encoder. Defaults to `prompt`. + image (`PipelineImageInput`): + The low-quality input image (or batch of images) to restore. + Required. + height (`int`, optional): + Output height in pixels. Defaults to `image.height * upscale`. + width (`int`, optional): + Output width in pixels. Defaults to `image.width * upscale`. + upscale (`int`, defaults to `1`): + Convenience scale factor used when `height` and `width` are + not provided. SUPIR commonly runs at 2x or 4x. + num_inference_steps (`int`, defaults to `50`): + Number of denoising steps. + timesteps (`list[int]`, optional): + Custom timesteps to use; bypasses scheduler defaults. + denoising_end (`float`, optional): + Fraction of the denoising schedule to complete; useful for + pipelining a refiner. + guidance_scale (`float`, defaults to `7.5`): + Classifier-free guidance scale. + negative_prompt / negative_prompt_2 (`str` or `list[str]`, optional): + Negative text prompts for CFG. + num_images_per_prompt (`int`, defaults to `1`): + Number of restored images per prompt. + eta (`float`, defaults to `0.0`): + DDIM eta parameter. + generator (`torch.Generator` or list, optional): + Generator(s) for deterministic sampling. + latents (`torch.Tensor`, optional): + Pre-computed initial latents. + prompt_embeds / negative_prompt_embeds / + pooled_prompt_embeds / negative_pooled_prompt_embeds + (`torch.Tensor`, optional): + Pre-computed embeddings; skip text encoding if provided. + output_type (`str`, defaults to `"pil"`): + Output format: `"pil"`, `"np"`, or `"latent"`. + return_dict (`bool`, defaults to `True`): + Whether to return a [`SUPIRPipelineOutput`] or a plain tuple. + cross_attention_kwargs (`dict`, optional): + Forwarded to attention processors (e.g. for LoRA scale). + controlnet_conditioning_scale (`float`, defaults to `1.0`): + Scale applied to the SUPIR adaptor outputs. + s_churn (`float`, defaults to `0.0`): + EDM stochasticity parameter for the SUPIR sampler. + s_noise (`float`, defaults to `1.003`): + EDM noise scaling parameter for the SUPIR sampler. + callback_on_step_end / callback_on_step_end_tensor_inputs: + Standard diffusers callback hooks. + + Returns: + [`SUPIRPipelineOutput`] or `tuple`. When `return_dict=True`, + returns `SUPIRPipelineOutput` containing the restored images. + + Raises: + NotImplementedError: this scaffold does not yet implement the + restoration loop. See SUPIR_DESIGN.md for status. + """ + # Argument validation that doesn't require the model is intentionally + # kept lightweight here so that `--dry-run` style smoke tests can + # still exercise the call surface; the heavy lifting is gated. + if image is None: + raise ValueError("`image` is required: SUPIR is a restoration pipeline.") + if upscale < 1: + raise ValueError(f"`upscale` must be >= 1, got {upscale}.") + if num_inference_steps < 1: + raise ValueError(f"`num_inference_steps` must be >= 1, got {num_inference_steps}.") + + raise NotImplementedError( + "SUPIRPipeline.__call__ is a scaffold and the restoration loop is not yet " + "implemented. Track progress in SUPIR_DESIGN.md and the linked GitHub issue " + "(huggingface/diffusers#7219)." + ) diff --git a/src/diffusers/utils/dummy_torch_and_transformers_objects.py b/src/diffusers/utils/dummy_torch_and_transformers_objects.py index c95c56789e37..ae06bdf011b9 100644 --- a/src/diffusers/utils/dummy_torch_and_transformers_objects.py +++ b/src/diffusers/utils/dummy_torch_and_transformers_objects.py @@ -4112,6 +4112,21 @@ def from_pretrained(cls, *args, **kwargs): requires_backends(cls, ["torch", "transformers"]) +class SUPIRPipeline(metaclass=DummyObject): + _backends = ["torch", "transformers"] + + def __init__(self, *args, **kwargs): + requires_backends(self, ["torch", "transformers"]) + + @classmethod + def from_config(cls, *args, **kwargs): + requires_backends(cls, ["torch", "transformers"]) + + @classmethod + def from_pretrained(cls, *args, **kwargs): + requires_backends(cls, ["torch", "transformers"]) + + class TextToVideoSDPipeline(metaclass=DummyObject): _backends = ["torch", "transformers"] diff --git a/tests/pipelines/supir/__init__.py b/tests/pipelines/supir/__init__.py new file mode 100644 index 000000000000..e69de29bb2d1 diff --git a/tests/pipelines/supir/test_supir.py b/tests/pipelines/supir/test_supir.py new file mode 100644 index 000000000000..0138c94fbf09 --- /dev/null +++ b/tests/pipelines/supir/test_supir.py @@ -0,0 +1,145 @@ +# Copyright 2026 The HuggingFace Team. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +""" +API-surface tests for the SUPIR scaffold. + +The full SUPIR pipeline is not yet implemented (see SUPIR_DESIGN.md). +These tests intentionally avoid running the model. They exist to: + +1. lock the public `__call__` argument list so future implementation + PRs cannot drift the documented API silently; +2. confirm `SUPIRPipeline` is importable from the top-level + `diffusers` namespace and from the pipeline subpackage; +3. confirm the scaffold raises `NotImplementedError` on the heavy + paths (so accidentally calling it is loud, not silent). + +Once the pipeline implementation lands, the `xfail` markers on the +inference tests should be flipped to real assertions. +""" + +import inspect + +import pytest + + +def test_supir_pipeline_is_importable_from_top_level(): + from diffusers import SUPIRPipeline + + assert SUPIRPipeline is not None + + +def test_supir_pipeline_is_importable_from_subpackage(): + from diffusers.pipelines.supir import SUPIRPipeline + + assert SUPIRPipeline is not None + + +def test_supir_pipeline_call_signature_locks_public_api(): + """Pin the documented `__call__` parameter list. + + If you intentionally change the SUPIR public API, update this test in + the same PR so reviewers see the diff. + """ + from diffusers import SUPIRPipeline + + sig = inspect.signature(SUPIRPipeline.__call__) + params = list(sig.parameters.keys()) + + expected = [ + "self", + "prompt", + "prompt_2", + "image", + "height", + "width", + "upscale", + "num_inference_steps", + "timesteps", + "denoising_end", + "guidance_scale", + "negative_prompt", + "negative_prompt_2", + "num_images_per_prompt", + "eta", + "generator", + "latents", + "prompt_embeds", + "negative_prompt_embeds", + "pooled_prompt_embeds", + "negative_pooled_prompt_embeds", + "output_type", + "return_dict", + "cross_attention_kwargs", + "controlnet_conditioning_scale", + "s_churn", + "s_noise", + "callback_on_step_end", + "callback_on_step_end_tensor_inputs", + ] + + assert params == expected, ( + f"SUPIRPipeline.__call__ parameter list drifted.\n" + f" expected: {expected}\n got: {params}" + ) + + +def test_supir_pipeline_constructor_components(): + """The constructor should accept the components SDXL-derived pipelines expect.""" + from diffusers import SUPIRPipeline + + sig = inspect.signature(SUPIRPipeline.__init__) + params = list(sig.parameters.keys()) + + expected = [ + "self", + "vae", + "text_encoder", + "text_encoder_2", + "tokenizer", + "tokenizer_2", + "unet", + "controlnet", + "scheduler", + ] + assert params == expected, ( + f"SUPIRPipeline.__init__ parameter list drifted.\n" + f" expected: {expected}\n got: {params}" + ) + + +@pytest.mark.xfail( + reason="SUPIR pipeline is a scaffold; restoration loop is not yet implemented. " + "See SUPIR_DESIGN.md and huggingface/diffusers#7219.", + strict=True, +) +def test_supir_pipeline_runs_end_to_end(): + """Placeholder for the eventual end-to-end smoke test. + + Marked `xfail(strict=True)` so that whoever implements the pipeline + has to remove this marker (or change the assertion) - the test + cannot silently start passing without intent. + """ + from diffusers import SUPIRPipeline + + # Intentionally not constructing a real instance; the real test will + # build a tiny dummy pipeline (matching `PipelineTesterMixin` style) + # and assert that `__call__` returns an `SUPIRPipelineOutput` with + # the requested shape. + raise NotImplementedError( + "SUPIRPipeline is currently a scaffold; replace this with a real " + "smoke test once the implementation lands." + ) + # Unreachable, but keeps SUPIRPipeline marked as 'used' for linters + # that don't understand xfail strict semantics. + assert SUPIRPipeline is not None