In [13]:
import torch
from diffusers import StableDiffusionControlNetPipeline, ControlNetModel
from diffusers import EulerDiscreteScheduler # Import samplers
from diffusers import DPMSolverMultistepScheduler # Import samplers
from diffusers.utils import load_image
from tqdm import tqdm
# Prompt generation
from prompt import prompt_generator, prompt_generator_no_human

Samplers (schedulers): https://huggingface.co/docs/diffusers/api/schedulers/overview

In [2]:
print("Torch version:",torch.__version__)
print("Is CUDA enabled?",torch.cuda.is_available())

Torch version: 2.2.2+cu121
Is CUDA enabled? True


https://thepythoncode.com/article/control-generated-images-with-controlnet-with-huggingface#code

# First Pipeline: ControlNet + Stable Diffusion 

In [8]:
# ControlNet model
controlnet = ControlNetModel.from_pretrained("lllyasviel/control_v11p_sd15_scribble", torch_dtype=torch.float16)

# Define stable diffusion pipeline with controlnet
# We use the realistic-vision-v20-2047 model for this example (finetunned on realistic images of people)
pipe = StableDiffusionControlNetPipeline.from_pretrained("stablediffusionapi/realistic-vision-v20-2047", controlnet=controlnet, safety_checker=None, torch_dtype=torch.float16)
pipe.scheduler = EulerDiscreteScheduler.from_config(pipe.scheduler.config)

# Enable efficient implementations using xformers for faster inference
pipe.enable_xformers_memory_efficient_attention()
pipe.enable_model_cpu_offload()

unet\diffusion_pytorch_model.safetensors not found
Loading pipeline components...: 100%|██████████| 6/6 [00:11<00:00,  1.97s/it]
You have disabled the safety checker for <class 'diffusers.pipelines.controlnet.pipeline_controlnet.StableDiffusionControlNetPipeline'> by passing `safety_checker=None`. Ensure that you abide to the conditions of the Stable Diffusion license and do not expose unfiltered results in services or applications open to the public. Both the diffusers team and Hugging Face strongly recommend to keep the safety filter enabled in all public facing circumstances, disabling it only for use-cases that involve analyzing network behavior or auditing its results. For more information, please have a look at https://github.com/huggingface/diffusers/pull/254 .


In [22]:
# Load wireframe image
image_input = load_image("wireframes_set1\\img3_4.jpg")

# Prompt
prompt = prompt_generator_no_human()
print("Prompt:",prompt)
neg_prompt = "multiple boxes in the background, cardboard, a person in the foreground, not realistic"
# Run the pipeline
image_output = pipe(prompt=prompt, negative_prompt=neg_prompt, image=image_input, num_inference_steps=30).images[0]

# Save the output
image_output.save("test.jpg")
print("Save test.jpg!")

Prompt: A medium shot view of a colorful cardboard box with red tape from the Bol.com webshop, in front of a quite neighborhood street and cars background, the weather is sunny, high photorealistic quality.


100%|██████████| 30/30 [00:15<00:00,  1.94it/s]


Save test.jpg!


In [23]:
prompt = prompt_generator()
print(prompt)

A mid shot view of an angry man holding a white cardboard box with red tape from the Adidas webshop in his hands, standing in front of a garbage curb background, the weather is cloudy, high photorealistic quality.


# Generating first synthetic dataset

wireframes_set1 are wireframes drawn on the bottom half region of the window, so they can easily be held  by persons.
Next, we will generate synthetic images of those wireframes in the directory output_set1.
- I generate random prompts
- Next I will validate them
- Finally, I will only keep the images with a single box in the image

ControlNet model: lllyasviel/control_v11p_sd15_scribble"
Stable diffusion model: stablediffusionapi/realistic-vision-v20-2047

In [26]:
import glob
import os
from prompt import prompt_generator

def generate_images(directory_path, output_directory_path, controlnet_model_path, sd_model_path, prompt_human):
    
    directory_path = directory_path
    output_directory_path = output_directory_path
    
    # ControlNet model
    controlnet = ControlNetModel.from_pretrained(controlnet_model_path, torch_dtype=torch.float16)

    # Define stable diffusion pipeline with controlnet
    # We use the realistic-vision-v20-2047 model for this example (finetunned on realistic images of people)
    pipe = StableDiffusionControlNetPipeline.from_pretrained(sd_model_path, controlnet=controlnet, safety_checker=None, torch_dtype=torch.float16)
    pipe.scheduler = EulerDiscreteScheduler.from_config(pipe.scheduler.config)

    # Enable efficient implementations using xformers for faster inference
    pipe.enable_xformers_memory_efficient_attention()
    pipe.enable_model_cpu_offload()
    

    for file in tqdm(glob.glob(os.path.join(directory_path, '*.jpg'))):
        image_input = load_image(file)
        prompt = prompt_generator_no_human() if prompt_human == 0 else prompt_generator()
        print(prompt)
        image_output = pipe(prompt, image_input, num_inference_steps=30).images[0]
        image_output.save(os.path.join(output_directory_path, os.path.basename(file)[3:]))

In [27]:
generate_images("wireframes_set1", "output_set1", "lllyasviel/control_v11p_sd15_scribble", "stablediffusionapi/realistic-vision-v20-2047", 1)

unet\diffusion_pytorch_model.safetensors not found
Loading pipeline components...: 100%|██████████| 6/6 [00:13<00:00,  2.30s/it]
You have disabled the safety checker for <class 'diffusers.pipelines.controlnet.pipeline_controlnet.StableDiffusionControlNetPipeline'> by passing `safety_checker=None`. Ensure that you abide to the conditions of the Stable Diffusion license and do not expose unfiltered results in services or applications open to the public. Both the diffusers team and Hugging Face strongly recommend to keep the safety filter enabled in all public facing circumstances, disabling it only for use-cases that involve analyzing network behavior or auditing its results. For more information, please have a look at https://github.com/huggingface/diffusers/pull/254 .
  0%|          | 0/25 [00:00<?, ?it/s]

A close-up view of a brown cardboard box with green tape from the H&M webshop, in front of a quite neighborhood street and garden background, the weather is rainy, high photorealistic quality.


100%|██████████| 30/30 [00:16<00:00,  1.84it/s]
  4%|▍         | 1/25 [00:20<08:06, 20.26s/it]

A close-up view of a colorful cardboard box with black tape from the Amazon webshop, in front of a garbage curb background, the weather is snowy, high photorealistic quality.


100%|██████████| 30/30 [00:16<00:00,  1.83it/s]
  8%|▊         | 2/25 [00:39<07:27, 19.47s/it]

A low angle view of a white cardboard box with a barcode from the Nike webshop, in front of a in the street background, the weather is snowy, high photorealistic quality.


100%|██████████| 30/30 [00:16<00:00,  1.77it/s]
 12%|█▏        | 3/25 [00:58<07:07, 19.42s/it]

A close-up view of a white cardboard box with a barcode from the Amazon webshop, in front of a garbage curb background, the weather is cloudy, high photorealistic quality.


100%|██████████| 30/30 [00:15<00:00,  1.94it/s]
 16%|█▌        | 4/25 [01:16<06:33, 18.74s/it]

A high angle view of a brown cardboard box with red tape from the Amazon webshop, in front of a quite neighborhood street and cars background, the weather is sunny, high photorealistic quality.


100%|██████████| 30/30 [00:15<00:00,  1.93it/s]
 20%|██        | 5/25 [01:34<06:08, 18.42s/it]

A high angle view of a black cardboard box with blue tape from the Coolblue webshop, in front of a quite neighborhood street and trees background, the weather is rainy, high photorealistic quality.


100%|██████████| 30/30 [00:15<00:00,  1.94it/s]
 24%|██▍       | 6/25 [01:51<05:45, 18.20s/it]

A close-up view of a black cardboard box with a barcode from the H&M webshop, in front of a city center background, the weather is cloudy, high photorealistic quality.


100%|██████████| 30/30 [00:15<00:00,  1.93it/s]
 28%|██▊       | 7/25 [02:09<05:25, 18.06s/it]

A close-up view of a black cardboard box with a number from the Bol.com webshop, in front of a busy city street and cars background, the weather is snowy, high photorealistic quality.


100%|██████████| 30/30 [00:15<00:00,  1.94it/s]
 32%|███▏      | 8/25 [02:27<05:05, 17.96s/it]

A full shot view of a white cardboard box with a number from the Zalando webshop, in front of a in the street background, the weather is rainy, high photorealistic quality.


100%|██████████| 30/30 [00:15<00:00,  1.93it/s]
 36%|███▌      | 9/25 [02:45<04:46, 17.90s/it]

A close-up view of a brown cardboard box with red tape from the Nike webshop, in front of a busy city street and people background, the weather is rainy, high photorealistic quality.


100%|██████████| 30/30 [00:15<00:00,  1.94it/s]
 40%|████      | 10/25 [03:02<04:27, 17.86s/it]

A close-up view of a white cardboard box with a barcode from the Zara webshop, in front of a in the street background, the weather is snowy, high photorealistic quality.


100%|██████████| 30/30 [00:15<00:00,  1.93it/s]
 44%|████▍     | 11/25 [03:20<04:09, 17.84s/it]

A long shot view of a white cardboard box with blue tape from the Zara webshop, in front of a city center background, the weather is cloudy, high photorealistic quality.


100%|██████████| 30/30 [00:15<00:00,  1.93it/s]
 48%|████▊     | 12/25 [03:38<03:51, 17.82s/it]

A long shot view of a black cardboard box with a number from the Nike webshop, in front of a city center background, the weather is snowy, high photorealistic quality.


100%|██████████| 30/30 [00:16<00:00,  1.86it/s]
 52%|█████▏    | 13/25 [03:57<03:36, 18.07s/it]

A long shot view of a white cardboard box with a qr code from the Coolblue webshop, in front of a busy city street and people background, the weather is cloudy, high photorealistic quality.


100%|██████████| 30/30 [00:15<00:00,  1.90it/s]
 56%|█████▌    | 14/25 [04:15<03:19, 18.09s/it]

A long shot view of a black cardboard box with blue tape from the Bol.com webshop, in front of a quite neighborhood street and cars background, the weather is snowy, high photorealistic quality.


100%|██████████| 30/30 [00:14<00:00,  2.05it/s]
 60%|██████    | 15/25 [04:32<02:56, 17.68s/it]

A low angle view of a colorful cardboard box with a number from the Coolblue webshop, in front of a garbage curb background, the weather is sunny, high photorealistic quality.


100%|██████████| 30/30 [00:14<00:00,  2.10it/s]
 64%|██████▍   | 16/25 [04:48<02:37, 17.45s/it]

A high angle view of a brown cardboard box with red tape from a webshop, in front of a busy city street and people background, the weather is snowy, high photorealistic quality.


100%|██████████| 30/30 [00:15<00:00,  1.93it/s]
 68%|██████▊   | 17/25 [05:07<02:21, 17.68s/it]

A high angle view of a white cardboard box with red tape from a webshop, in front of a quite neighborhood street and cars background, the weather is sunny, high photorealistic quality.


100%|██████████| 30/30 [00:16<00:00,  1.86it/s]
 72%|███████▏  | 18/25 [05:25<02:05, 17.99s/it]

A close-up view of a colorful cardboard box with blue tape from a webshop, in front of a busy city street and people background, the weather is cloudy, high photorealistic quality.


100%|██████████| 30/30 [00:16<00:00,  1.84it/s]
 76%|███████▌  | 19/25 [05:44<01:49, 18.27s/it]

A medium shot view of a brown cardboard box with blue tape from the H&M webshop, in front of a garbage curb background, the weather is rainy, high photorealistic quality.


100%|██████████| 30/30 [00:16<00:00,  1.85it/s]
 80%|████████  | 20/25 [06:03<01:32, 18.45s/it]

A full shot view of a brown cardboard box with red tape from the Zara webshop, in front of a quite neighborhood street and garden background, the weather is cloudy, high photorealistic quality.


100%|██████████| 30/30 [00:16<00:00,  1.83it/s]
 84%|████████▍ | 21/25 [06:22<01:14, 18.59s/it]

A high angle view of a colorful cardboard box with yellow tape from the Zara webshop, in front of a garbage curb background, the weather is sunny, high photorealistic quality.


100%|██████████| 30/30 [00:15<00:00,  1.88it/s]
 88%|████████▊ | 22/25 [06:41<00:55, 18.59s/it]

A high angle view of a brown cardboard box with green tape from the Zalando webshop, in front of a garbage curb background, the weather is sunny, high photorealistic quality.


100%|██████████| 30/30 [00:16<00:00,  1.86it/s]
 92%|█████████▏| 23/25 [06:59<00:37, 18.61s/it]

A medium shot view of a brown cardboard box with a barcode from the Coolblue webshop, in front of a garbage curb background, the weather is snowy, high photorealistic quality.


