Skip to content

Commit ad8068e

Browse files
arkajyotimitraDN6
andauthored
changed channel parameters for UNET and VAE. Changed configs parameters of CLIPText (#5370)
* changed channel parameters for UNET and VAE. Decreased hidden layers size with increased attention heads and intermediate size * changed the assertion check range * clean up --------- Co-authored-by: Dhruv Nair <dhruv.nair@gmail.com>
1 parent b4cbbd5 commit ad8068e

File tree

1 file changed

+18
-15
lines changed

1 file changed

+18
-15
lines changed

tests/pipelines/stable_diffusion/test_stable_diffusion.py

Lines changed: 18 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -82,6 +82,7 @@ def _test_stable_diffusion_compile(in_queue, out_queue, timeout):
8282

8383
assert image.shape == (1, 512, 512, 3)
8484
expected_slice = np.array([0.38019, 0.28647, 0.27321, 0.40377, 0.38290, 0.35446, 0.39218, 0.38165, 0.42239])
85+
8586
assert np.abs(image_slice - expected_slice).max() < 5e-3
8687
except Exception:
8788
error = f"{traceback.format_exc()}"
@@ -103,14 +104,15 @@ class StableDiffusionPipelineFastTests(
103104
def get_dummy_components(self):
104105
torch.manual_seed(0)
105106
unet = UNet2DConditionModel(
106-
block_out_channels=(32, 64),
107-
layers_per_block=2,
107+
block_out_channels=(4, 8),
108+
layers_per_block=1,
108109
sample_size=32,
109110
in_channels=4,
110111
out_channels=4,
111112
down_block_types=("DownBlock2D", "CrossAttnDownBlock2D"),
112113
up_block_types=("CrossAttnUpBlock2D", "UpBlock2D"),
113114
cross_attention_dim=32,
115+
norm_num_groups=2,
114116
)
115117
scheduler = DDIMScheduler(
116118
beta_start=0.00085,
@@ -121,22 +123,23 @@ def get_dummy_components(self):
121123
)
122124
torch.manual_seed(0)
123125
vae = AutoencoderKL(
124-
block_out_channels=[32, 64],
126+
block_out_channels=[4, 8],
125127
in_channels=3,
126128
out_channels=3,
127129
down_block_types=["DownEncoderBlock2D", "DownEncoderBlock2D"],
128130
up_block_types=["UpDecoderBlock2D", "UpDecoderBlock2D"],
129131
latent_channels=4,
132+
norm_num_groups=2,
130133
)
131134
torch.manual_seed(0)
132135
text_encoder_config = CLIPTextConfig(
133136
bos_token_id=0,
134137
eos_token_id=2,
135138
hidden_size=32,
136-
intermediate_size=37,
139+
intermediate_size=64,
137140
layer_norm_eps=1e-05,
138-
num_attention_heads=4,
139-
num_hidden_layers=5,
141+
num_attention_heads=8,
142+
num_hidden_layers=3,
140143
pad_token_id=1,
141144
vocab_size=1000,
142145
)
@@ -183,7 +186,7 @@ def test_stable_diffusion_ddim(self):
183186
image_slice = image[0, -3:, -3:, -1]
184187

185188
assert image.shape == (1, 64, 64, 3)
186-
expected_slice = np.array([0.5756, 0.6118, 0.5005, 0.5041, 0.5471, 0.4726, 0.4976, 0.4865, 0.4864])
189+
expected_slice = np.array([0.3203, 0.4555, 0.4711, 0.3505, 0.3973, 0.4650, 0.5137, 0.3392, 0.4045])
187190

188191
assert np.abs(image_slice.flatten() - expected_slice).max() < 1e-2
189192

@@ -317,7 +320,7 @@ def test_stable_diffusion_ddim_factor_8(self):
317320
image_slice = image[0, -3:, -3:, -1]
318321

319322
assert image.shape == (1, 136, 136, 3)
320-
expected_slice = np.array([0.5524, 0.5626, 0.6069, 0.4727, 0.386, 0.3995, 0.4613, 0.4328, 0.4269])
323+
expected_slice = np.array([0.4346, 0.5621, 0.5016, 0.3926, 0.4533, 0.4134, 0.5625, 0.5632, 0.5265])
321324

322325
assert np.abs(image_slice.flatten() - expected_slice).max() < 1e-2
323326

@@ -335,7 +338,7 @@ def test_stable_diffusion_pndm(self):
335338
image_slice = image[0, -3:, -3:, -1]
336339

337340
assert image.shape == (1, 64, 64, 3)
338-
expected_slice = np.array([0.5122, 0.5712, 0.4825, 0.5053, 0.5646, 0.4769, 0.5179, 0.4894, 0.4994])
341+
expected_slice = np.array([0.3411, 0.5032, 0.4704, 0.3135, 0.4323, 0.4740, 0.5150, 0.3498, 0.4022])
339342

340343
assert np.abs(image_slice.flatten() - expected_slice).max() < 1e-2
341344

@@ -375,7 +378,7 @@ def test_stable_diffusion_k_lms(self):
375378
image_slice = image[0, -3:, -3:, -1]
376379

377380
assert image.shape == (1, 64, 64, 3)
378-
expected_slice = np.array([0.4873, 0.5443, 0.4845, 0.5004, 0.5549, 0.4850, 0.5191, 0.4941, 0.5065])
381+
expected_slice = np.array([0.3149, 0.5246, 0.4796, 0.3218, 0.4469, 0.4729, 0.5151, 0.3597, 0.3954])
379382

380383
assert np.abs(image_slice.flatten() - expected_slice).max() < 1e-2
381384

@@ -394,7 +397,7 @@ def test_stable_diffusion_k_euler_ancestral(self):
394397
image_slice = image[0, -3:, -3:, -1]
395398

396399
assert image.shape == (1, 64, 64, 3)
397-
expected_slice = np.array([0.4872, 0.5444, 0.4846, 0.5003, 0.5549, 0.4850, 0.5189, 0.4941, 0.5067])
400+
expected_slice = np.array([0.3151, 0.5243, 0.4794, 0.3217, 0.4468, 0.4728, 0.5152, 0.3598, 0.3954])
398401

399402
assert np.abs(image_slice.flatten() - expected_slice).max() < 1e-2
400403

@@ -413,7 +416,7 @@ def test_stable_diffusion_k_euler(self):
413416
image_slice = image[0, -3:, -3:, -1]
414417

415418
assert image.shape == (1, 64, 64, 3)
416-
expected_slice = np.array([0.4873, 0.5443, 0.4845, 0.5004, 0.5549, 0.4850, 0.5191, 0.4941, 0.5065])
419+
expected_slice = np.array([0.3149, 0.5246, 0.4796, 0.3218, 0.4469, 0.4729, 0.5151, 0.3597, 0.3954])
417420

418421
assert np.abs(image_slice.flatten() - expected_slice).max() < 1e-2
419422

@@ -485,7 +488,7 @@ def test_stable_diffusion_negative_prompt(self):
485488
image_slice = image[0, -3:, -3:, -1]
486489

487490
assert image.shape == (1, 64, 64, 3)
488-
expected_slice = np.array([0.5114, 0.5706, 0.4772, 0.5028, 0.5637, 0.4732, 0.5169, 0.4881, 0.4977])
491+
expected_slice = np.array([0.3458, 0.5120, 0.4800, 0.3116, 0.4348, 0.4802, 0.5237, 0.3467, 0.3991])
489492

490493
assert np.abs(image_slice.flatten() - expected_slice).max() < 1e-2
491494

@@ -638,7 +641,7 @@ def test_stable_diffusion_1_1_pndm(self):
638641
image_slice = image[0, -3:, -3:, -1].flatten()
639642

640643
assert image.shape == (1, 512, 512, 3)
641-
expected_slice = np.array([0.43625, 0.43554, 0.36670, 0.40660, 0.39703, 0.38658, 0.43936, 0.43557, 0.40592])
644+
expected_slice = np.array([0.3149, 0.5246, 0.4796, 0.3218, 0.4469, 0.4729, 0.5151, 0.3597, 0.3954])
642645
assert np.abs(image_slice - expected_slice).max() < 3e-3
643646

644647
def test_stable_diffusion_v1_4_with_freeu(self):
@@ -665,7 +668,7 @@ def test_stable_diffusion_1_4_pndm(self):
665668
image_slice = image[0, -3:, -3:, -1].flatten()
666669

667670
assert image.shape == (1, 512, 512, 3)
668-
expected_slice = np.array([0.57400, 0.47841, 0.31625, 0.63583, 0.58306, 0.55056, 0.50825, 0.56306, 0.55748])
671+
expected_slice = np.array([0.3458, 0.5120, 0.4800, 0.3116, 0.4348, 0.4802, 0.5237, 0.3467, 0.3991])
669672
assert np.abs(image_slice - expected_slice).max() < 3e-3
670673

671674
def test_stable_diffusion_ddim(self):

0 commit comments

Comments
 (0)