The goal of this notebook is to compute the expected image slices against which the unit tests results will be checked

In [1]:
! apt-get update -qq
! apt-get install -y -qq libgl1-mesa-glx

debconf: delaying package configuration, since apt-utils is not installed
Selecting previously unselected package libelf1:amd64.
(Reading database ... 33355 files and directories currently installed.)
Preparing to unpack .../00-libelf1_0.176-1.1build1_amd64.deb ...
Unpacking libelf1:amd64 (0.176-1.1build1) ...
Selecting previously unselected package libdrm-common.
Preparing to unpack .../01-libdrm-common_2.4.107-8ubuntu1~20.04.2_all.deb ...
Unpacking libdrm-common (2.4.107-8ubuntu1~20.04.2) ...
Selecting previously unselected package libdrm2:amd64.
Preparing to unpack .../02-libdrm2_2.4.107-8ubuntu1~20.04.2_amd64.deb ...
Unpacking libdrm2:amd64 (2.4.107-8ubuntu1~20.04.2) ...
Selecting previously unselected package libdrm-amdgpu1:amd64.
Preparing to unpack .../03-libdrm-amdgpu1_2.4.107-8ubuntu1~20.04.2_amd64.deb ...
Unpacking libdrm-amdgpu1:amd64 (2.4.107-8ubuntu1~20.04.2) ...
Selecting previously unselected package libpciaccess0:amd64.
Preparing to unpack .../04-libpciaccess0_0.16-0ubu

In [2]:
!pipenv shell

/bin/bash: pipenv: command not found


In [3]:
import numpy as np
import torch
from transformers import CLIPTextConfig, CLIPTextModel, CLIPTokenizer

from diffusers import Prompt2PromptPipeline, DDIMScheduler, UNet2DConditionModel, AutoencoderKL
from diffusers.utils.testing_utils import enable_full_determinism, require_torch_gpu, slow, torch_device

np.set_printoptions(linewidth=140, precision=3)

  from .autonotebook import tqdm as notebook_tqdm


In [4]:
enable_full_determinism()

In [5]:
replace_steps = {
    "cross_replace_steps": 0.4,
    "self_replace_steps": 0.4
}

In [6]:
test_matrix = [
    (
        ["A turtle playing with a ball", "A monkey playing with a ball"],
        "replace",
        {**replace_steps},
        [9.956e-01, 5.785e-01, 4.675e-01, 9.930e-01, 0.0, 1.000, 1.199e-03, 2.648e-04, 5.101e-04] # todo: adapt
    ), 
    (
        ["A turtle playing with a ball", "A monkey playing with a ball"],
        "replace",
        {**replace_steps, "local_blend_words": ["turtle", "monkey"]},
        [9.956e-01, 5.785e-01, 4.675e-01, 9.930e-01, 0.0, 1.000, 1.199e-03, 2.648e-04, 5.101e-04] # todo: adapt
    ), 
    (
        ["A turtle", "A turtle in a forest"],
        "refine",
        {**replace_steps},
        [9.956e-01, 5.785e-01, 4.675e-01, 9.930e-01, 0.0, 1.000, 1.199e-03, 2.648e-04, 5.101e-04] # todo: adapt
    ),
    (
        ["A turtle", "A turtle in a forest"],
        "refine",
        {**replace_steps, "local_blend_words": ["in", "a" , "forest"]},
        [9.956e-01, 5.785e-01, 4.675e-01, 9.930e-01, 0.0, 1.000, 1.199e-03, 2.648e-04, 5.101e-04] # todo: adapt
    ), 
    (
        ["A smiling turtle"] * 2,
        "reweight",
        {**replace_steps, "equalizer_words": ["smiling"], "equalizer_strengths": [5]},
        [9.956e-01, 5.785e-01, 4.675e-01, 9.930e-01, 0.0, 1.000, 1.199e-03, 2.648e-04, 5.101e-04] # todo: adapt
    ), 
]

In [7]:
def get_dummy_components():
    torch.manual_seed(0)
    unet = UNet2DConditionModel(
        block_out_channels=(32, 64),
        layers_per_block=2,
        sample_size=32,
        in_channels=4,
        out_channels=4,
        down_block_types=("DownBlock2D", "CrossAttnDownBlock2D"),
        up_block_types=("CrossAttnUpBlock2D", "UpBlock2D"),
        cross_attention_dim=32,
        # SD2-specific config below
        attention_head_dim=(2, 4),
        use_linear_projection=True,
    )
    scheduler = DDIMScheduler(
        beta_start=0.00085,
        beta_end=0.012,
        beta_schedule="scaled_linear",
        clip_sample=False,
        set_alpha_to_one=False,
    )
    torch.manual_seed(0)
    vae = AutoencoderKL(
        block_out_channels=[32, 64],
        in_channels=3,
        out_channels=3,
        down_block_types=["DownEncoderBlock2D", "DownEncoderBlock2D"],
        up_block_types=["UpDecoderBlock2D", "UpDecoderBlock2D"],
        latent_channels=4,
        sample_size=128,
    )
    torch.manual_seed(0)
    text_encoder_config = CLIPTextConfig(
        bos_token_id=0,
        eos_token_id=2,
        hidden_size=32,
        intermediate_size=37,
        layer_norm_eps=1e-05,
        num_attention_heads=4,
        num_hidden_layers=5,
        pad_token_id=1,
        vocab_size=1000,
        # SD2-specific config below
        hidden_act="gelu",
        projection_dim=512,
    )
    text_encoder = CLIPTextModel(text_encoder_config)
    tokenizer = CLIPTokenizer.from_pretrained("hf-internal-testing/tiny-random-clip")

    components = {
        "unet": unet,
        "scheduler": scheduler,
        "vae": vae,
        "text_encoder": text_encoder,
        "tokenizer": tokenizer,
        "safety_checker": None,
        "feature_extractor": None,
    }
    return components

In [8]:
from functools import partial
def expand(test, matrix): return [partial(test, *params) for params in matrix]

In [9]:
def test_fast_inference(prompts, edit_type, edit_kwargs, expected_slice, seed=0, n_steps=2):
    print(f"Starting next test 🎢 (prompts={prompts}, edit_type={edit_type}, edit_kwargs={edit_kwargs})")

    device = "cpu"
    pipe = Prompt2PromptPipeline(**get_dummy_components())
    pipe.to(device)
    pipe.set_progress_bar_config(disable=None)

    generator = torch.Generator(device=device).manual_seed(seed)
    image = pipe(prompts, height=64, width=64, num_inference_steps=n_steps, generator=generator, edit_type=edit_type, edit_kwargs=edit_kwargs.copy(), output_type="numpy").images

    generator = torch.Generator(device=device).manual_seed(seed)
    image_from_tuple = pipe(prompts, height=64, width=64, num_inference_steps=n_steps, generator=generator, edit_type=edit_type, edit_kwargs=edit_kwargs.copy(), output_type="numpy", return_dict=False)[0]

    image_slice = image[0, -3:, -3:, -1]
    image_from_tuple_slice = image_from_tuple[0, -3:, -3:, -1]

    assert image.shape == (2, 64, 64, 3)
    expected_slice = np.array(expected_slice)
    
    return image_slice.flatten(), image_from_tuple_slice.flatten()

In [10]:
unit_tests = expand(test_fast_inference, test_matrix)

In [11]:
unit_tests[0]()

Starting next test 🎢 (prompts=['A turtle playing with a ball', 'A monkey playing with a ball'], edit_type=replace, edit_kwargs={'cross_replace_steps': 0.4, 'self_replace_steps': 0.4})


  "_class_name": "DDIMScheduler",
  "_diffusers_version": "0.20.0.dev0",
  "beta_end": 0.012,
  "beta_schedule": "scaled_linear",
  "beta_start": 0.00085,
  "clip_sample": false,
  "clip_sample_range": 1.0,
  "dynamic_thresholding_ratio": 0.995,
  "num_train_timesteps": 1000,
  "prediction_type": "epsilon",
  "rescale_betas_zero_snr": false,
  "sample_max_value": 1.0,
  "set_alpha_to_one": false,
  "steps_offset": 0,
  "thresholding": false,
  "timestep_spacing": "leading",
  "trained_betas": null
}
 is outdated. `steps_offset` should be set to 1 instead of 0. Please make sure to update the config accordingly as leaving `steps_offset` might led to incorrect results in future versions. If you have downloaded this checkpoint from the Hugging Face Hub, it would be very nice if you could open a Pull request for the `scheduler/scheduler_config.json` file
  deprecate("steps_offset!=1", "1.0.0", deprecation_message, standard_warn=False)
You have disabled the safety checker for <class 'diffuse

(array([0.582, 0.61 , 0.503, 0.507, 0.542, 0.471, 0.498, 0.49 , 0.487], dtype=float32),
 array([0.582, 0.61 , 0.503, 0.507, 0.542, 0.471, 0.498, 0.49 , 0.487], dtype=float32))

Now let's run it for all test cases

In [12]:
res = []
for t in unit_tests: res.append(t())

Starting next test 🎢 (prompts=['A turtle playing with a ball', 'A monkey playing with a ball'], edit_type=replace, edit_kwargs={'cross_replace_steps': 0.4, 'self_replace_steps': 0.4})


You have disabled the safety checker for <class 'diffusers.pipelines.prompt2prompt.pipeline_prompt2prompt.Prompt2PromptPipeline'> by passing `safety_checker=None`. Ensure that you abide to the conditions of the Stable Diffusion license and do not expose unfiltered results in services or applications open to the public. Both the diffusers team and Hugging Face strongly recommend to keep the safety filter enabled in all public facing circumstances, disabling it only for use-cases that involve analyzing network behavior or auditing its results. For more information, please have a look at https://github.com/huggingface/diffusers/pull/254 .


Starting next test 🎢 (prompts=['A turtle playing with a ball', 'A monkey playing with a ball'], edit_type=replace, edit_kwargs={'cross_replace_steps': 0.4, 'self_replace_steps': 0.4, 'local_blend_words': ['turtle', 'monkey']})


You have disabled the safety checker for <class 'diffusers.pipelines.prompt2prompt.pipeline_prompt2prompt.Prompt2PromptPipeline'> by passing `safety_checker=None`. Ensure that you abide to the conditions of the Stable Diffusion license and do not expose unfiltered results in services or applications open to the public. Both the diffusers team and Hugging Face strongly recommend to keep the safety filter enabled in all public facing circumstances, disabling it only for use-cases that involve analyzing network behavior or auditing its results. For more information, please have a look at https://github.com/huggingface/diffusers/pull/254 .


Starting next test 🎢 (prompts=['A turtle', 'A turtle in a forest'], edit_type=refine, edit_kwargs={'cross_replace_steps': 0.4, 'self_replace_steps': 0.4})


You have disabled the safety checker for <class 'diffusers.pipelines.prompt2prompt.pipeline_prompt2prompt.Prompt2PromptPipeline'> by passing `safety_checker=None`. Ensure that you abide to the conditions of the Stable Diffusion license and do not expose unfiltered results in services or applications open to the public. Both the diffusers team and Hugging Face strongly recommend to keep the safety filter enabled in all public facing circumstances, disabling it only for use-cases that involve analyzing network behavior or auditing its results. For more information, please have a look at https://github.com/huggingface/diffusers/pull/254 .


Starting next test 🎢 (prompts=['A turtle', 'A turtle in a forest'], edit_type=refine, edit_kwargs={'cross_replace_steps': 0.4, 'self_replace_steps': 0.4, 'local_blend_words': ['in', 'a', 'forest']})


You have disabled the safety checker for <class 'diffusers.pipelines.prompt2prompt.pipeline_prompt2prompt.Prompt2PromptPipeline'> by passing `safety_checker=None`. Ensure that you abide to the conditions of the Stable Diffusion license and do not expose unfiltered results in services or applications open to the public. Both the diffusers team and Hugging Face strongly recommend to keep the safety filter enabled in all public facing circumstances, disabling it only for use-cases that involve analyzing network behavior or auditing its results. For more information, please have a look at https://github.com/huggingface/diffusers/pull/254 .


Starting next test 🎢 (prompts=['A smiling turtle', 'A smiling turtle'], edit_type=reweight, edit_kwargs={'cross_replace_steps': 0.4, 'self_replace_steps': 0.4, 'equalizer_words': ['smiling'], 'equalizer_strengths': [5]})


You have disabled the safety checker for <class 'diffusers.pipelines.prompt2prompt.pipeline_prompt2prompt.Prompt2PromptPipeline'> by passing `safety_checker=None`. Ensure that you abide to the conditions of the Stable Diffusion license and do not expose unfiltered results in services or applications open to the public. Both the diffusers team and Hugging Face strongly recommend to keep the safety filter enabled in all public facing circumstances, disabling it only for use-cases that involve analyzing network behavior or auditing its results. For more information, please have a look at https://github.com/huggingface/diffusers/pull/254 .


In [14]:
for expected_slice1, expected_slice2 in res: print(expected_slice1, '\t',expected_slice2)

[0.582 0.61  0.503 0.507 0.542 0.471 0.498 0.49  0.487] 	 [0.582 0.61  0.503 0.507 0.542 0.471 0.498 0.49  0.487]
[0.582 0.61  0.503 0.507 0.542 0.471 0.498 0.49  0.487] 	 [0.582 0.61  0.503 0.507 0.542 0.471 0.498 0.49  0.487]
[0.571 0.605 0.499 0.502 0.541 0.468 0.5   0.484 0.483] 	 [0.571 0.605 0.499 0.502 0.541 0.468 0.5   0.484 0.483]
[0.571 0.605 0.499 0.502 0.541 0.468 0.5   0.484 0.483] 	 [0.571 0.605 0.499 0.502 0.541 0.468 0.5   0.484 0.483]
[0.573 0.607 0.502 0.504 0.54  0.469 0.5   0.486 0.483] 	 [0.573 0.607 0.502 0.504 0.54  0.469 0.5   0.486 0.483]


In [16]:
for expected_slice1, _ in res: print(expected_slice1)
print()
for _, expected_slice2 in res: print(expected_slice2)

[0.582 0.61  0.503 0.507 0.542 0.471 0.498 0.49  0.487]
[0.582 0.61  0.503 0.507 0.542 0.471 0.498 0.49  0.487]
[0.571 0.605 0.499 0.502 0.541 0.468 0.5   0.484 0.483]
[0.571 0.605 0.499 0.502 0.541 0.468 0.5   0.484 0.483]
[0.573 0.607 0.502 0.504 0.54  0.469 0.5   0.486 0.483]

[0.582 0.61  0.503 0.507 0.542 0.471 0.498 0.49  0.487]
[0.582 0.61  0.503 0.507 0.542 0.471 0.498 0.49  0.487]
[0.571 0.605 0.499 0.502 0.541 0.468 0.5   0.484 0.483]
[0.571 0.605 0.499 0.502 0.541 0.468 0.5   0.484 0.483]
[0.573 0.607 0.502 0.504 0.54  0.469 0.5   0.486 0.483]
