Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8 changes: 2 additions & 6 deletions tests/models/unets/test_models_unet_stable_cascade.py
Original file line number Diff line number Diff line change
Expand Up @@ -50,9 +50,7 @@ def test_stable_cascade_unet_prior_single_file_components(self):
gc.collect()
torch.cuda.empty_cache()

unet = StableCascadeUNet.from_pretrained(
"stabilityai/stable-cascade-prior", subfolder="prior", revision="refs/pr/2", variant="bf16"
)
unet = StableCascadeUNet.from_pretrained("stabilityai/stable-cascade-prior", subfolder="prior", variant="bf16")
unet_config = unet.config
del unet
gc.collect()
Expand All @@ -74,9 +72,7 @@ def test_stable_cascade_unet_decoder_single_file_components(self):
gc.collect()
torch.cuda.empty_cache()

unet = StableCascadeUNet.from_pretrained(
"stabilityai/stable-cascade", subfolder="decoder", revision="refs/pr/44", variant="bf16"
)
unet = StableCascadeUNet.from_pretrained("stabilityai/stable-cascade", subfolder="decoder", variant="bf16")
unet_config = unet.config
del unet
gc.collect()
Expand Down
28 changes: 13 additions & 15 deletions tests/pipelines/stable_cascade/test_stable_cascade_decoder.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,13 +21,13 @@
from transformers import CLIPTextConfig, CLIPTextModelWithProjection, CLIPTokenizer

from diffusers import DDPMWuerstchenScheduler, StableCascadeDecoderPipeline
from diffusers.image_processor import VaeImageProcessor
from diffusers.models import StableCascadeUNet
from diffusers.pipelines.wuerstchen import PaellaVQModel
from diffusers.utils.testing_utils import (
enable_full_determinism,
load_image,
load_numpy,
load_pt,
numpy_cosine_similarity_distance,
require_torch_gpu,
skip_mps,
slow,
Expand Down Expand Up @@ -258,7 +258,7 @@ def tearDown(self):

def test_stable_cascade_decoder(self):
pipe = StableCascadeDecoderPipeline.from_pretrained(
"diffusers/StableCascade-decoder", torch_dtype=torch.bfloat16
"stabilityai/stable-cascade", variant="bf16", torch_dtype=torch.bfloat16
)
pipe.enable_model_cpu_offload()
pipe.set_progress_bar_config(disable=None)
Expand All @@ -271,18 +271,16 @@ def test_stable_cascade_decoder(self):
)

image = pipe(
prompt=prompt, image_embeddings=image_embedding, num_inference_steps=10, generator=generator
prompt=prompt,
image_embeddings=image_embedding,
output_type="np",
num_inference_steps=2,
generator=generator,
).images[0]

assert image.size == (1024, 1024)

expected_image = load_image(
"https://huggingface.co/datasets/hf-internal-testing/diffusers-images/resolve/main/stable_cascade/t2i.png"
assert image.shape == (1024, 1024, 3)
expected_image = load_numpy(
"https://huggingface.co/datasets/hf-internal-testing/diffusers-images/resolve/main/stable_cascade/stable_cascade_decoder_image.npy"
)

image_processor = VaeImageProcessor()

image_np = image_processor.pil_to_numpy(image)
expected_image_np = image_processor.pil_to_numpy(expected_image)

self.assertTrue(np.allclose(image_np, expected_image_np, atol=53e-2))
max_diff = numpy_cosine_similarity_distance(image.flatten(), expected_image.flatten())
assert max_diff < 1e-4
22 changes: 10 additions & 12 deletions tests/pipelines/stable_cascade/test_stable_cascade_prior.py
Original file line number Diff line number Diff line change
Expand Up @@ -29,7 +29,8 @@
from diffusers.utils.import_utils import is_peft_available
from diffusers.utils.testing_utils import (
enable_full_determinism,
load_pt,
load_numpy,
numpy_cosine_similarity_distance,
require_peft_backend,
require_torch_gpu,
skip_mps,
Expand Down Expand Up @@ -319,25 +320,22 @@ def tearDown(self):
torch.cuda.empty_cache()

def test_stable_cascade_prior(self):
pipe = StableCascadePriorPipeline.from_pretrained("diffusers/StableCascade-prior", torch_dtype=torch.bfloat16)
pipe = StableCascadePriorPipeline.from_pretrained(
"stabilityai/stable-cascade-prior", variant="bf16", torch_dtype=torch.bfloat16
)
pipe.enable_model_cpu_offload()
pipe.set_progress_bar_config(disable=None)

prompt = "A photograph of the inside of a subway train. There are raccoons sitting on the seats. One of them is reading a newspaper. The window shows the city in the background."

generator = torch.Generator(device="cpu").manual_seed(0)

output = pipe(prompt, num_inference_steps=10, generator=generator)
output = pipe(prompt, num_inference_steps=2, output_type="np", generator=generator)
image_embedding = output.image_embeddings

expected_image_embedding = load_pt(
"https://huggingface.co/datasets/hf-internal-testing/diffusers-images/resolve/main/stable_cascade/image_embedding.pt"
expected_image_embedding = load_numpy(
"https://huggingface.co/datasets/hf-internal-testing/diffusers-images/resolve/main/stable_cascade/stable_cascade_prior_image_embeddings.npy"
)

assert image_embedding.shape == (1, 16, 24, 24)

self.assertTrue(
np.allclose(
image_embedding.cpu().float().numpy(), expected_image_embedding.cpu().float().numpy(), atol=5e-2
)
)
max_diff = numpy_cosine_similarity_distance(image_embedding.flatten(), expected_image_embedding.flatten())
assert max_diff < 1e-4