In [1]:
import torch
from diffusers import StableDiffusionControlNetPipeline, ControlNetModel
from diffusers import EulerDiscreteScheduler # Euler Discrete Scheduler
from diffusers import DPMSolverMultistepScheduler # Import samplers
from diffusers import DDIMScheduler # Import samplers
from diffusers.utils import load_image
from tqdm import tqdm
from prompt import prompt_generator, prompt_generator_no_human # Prompt generation
import glob
import os

  from .autonotebook import tqdm as notebook_tqdm


Samplers (schedulers): https://huggingface.co/docs/diffusers/api/schedulers/overview

In [2]:
print("Torch version:",torch.__version__)
print("Is CUDA enabled?",torch.cuda.is_available())

Torch version: 2.2.2+cu121
Is CUDA enabled? True


https://thepythoncode.com/article/control-generated-images-with-controlnet-with-huggingface#code

# First Pipeline: ControlNet + Stable Diffusion 

In [15]:
# ControlNet model
controlnet = ControlNetModel.from_pretrained("lllyasviel/control_v11p_sd15_scribble", torch_dtype=torch.float16)

# Define stable diffusion pipeline with controlnet
# We use the realistic-vision-v20-2047 model for this example (finetunned on realistic images of people)
pipe = StableDiffusionControlNetPipeline.from_pretrained("stablediffusionapi/realistic-vision-v20-2047", controlnet=controlnet, safety_checker=None, torch_dtype=torch.float16)
DPM = DPMSolverMultistepScheduler(use_karras_sigmas=True)
pipe.scheduler = DPM.from_config(pipe.scheduler.config)

# Enable efficient implementations using xformers for faster inference
pipe.enable_xformers_memory_efficient_attention()
pipe.enable_model_cpu_offload()

unet\diffusion_pytorch_model.safetensors not found
Loading pipeline components...: 100%|██████████| 6/6 [00:10<00:00,  1.82s/it]
You have disabled the safety checker for <class 'diffusers.pipelines.controlnet.pipeline_controlnet.StableDiffusionControlNetPipeline'> by passing `safety_checker=None`. Ensure that you abide to the conditions of the Stable Diffusion license and do not expose unfiltered results in services or applications open to the public. Both the diffusers team and Hugging Face strongly recommend to keep the safety filter enabled in all public facing circumstances, disabling it only for use-cases that involve analyzing network behavior or auditing its results. For more information, please have a look at https://github.com/huggingface/diffusers/pull/254 .


In [26]:
# Load wireframe image
image_input = load_image("wireframes\\3_4.jpg")

# Prompt
prompt = prompt_generator()
print("Prompt:",prompt)
neg_prompt = "disfigured, not realistic, low quality"
# Run the pipeline
image_output = pipe(prompt=prompt, negative_prompt=neg_prompt, image=image_input, num_inference_steps=18).images[0]

# Save the output
image_output.save("test.jpg")
print("Save test.jpg!")

Prompt: A medium shot view of an old man holding a brown cardboard box with black tape from the Zara webshop in his hands, standing in front of a quite neighborhood street and cars background, the weather is rainy, high photorealistic quality.


100%|██████████| 18/18 [00:09<00:00,  1.97it/s]


Save test.jpg!


# Generating first synthetic dataset

"wireframes" are drawn in the bottom half region of the window, so they can easily be held  by persons.
Next, we will generate synthetic images of those wireframes, located in the directory "outputs".
- I generate random prompts
- Next I will validate them
- Finally, I will only keep the images with a single box in the image (classifier)

ControlNet model: lllyasviel/control_v11p_sd15_scribble"
Stable diffusion model: stablediffusionapi/realistic-vision-v20-2047

In [9]:
def generate_images(controlnet_model_path, sd_model_path, prompt_human, infer_steps):
        
    # ControlNet model
    controlnet = ControlNetModel.from_pretrained(controlnet_model_path, torch_dtype=torch.float16)

    # Define stable diffusion pipeline with controlnet
    # We use the realistic-vision-v20-2047 model for this example (finetunned on realistic images of people)
    pipe = StableDiffusionControlNetPipeline.from_pretrained(sd_model_path, controlnet=controlnet, safety_checker=None, torch_dtype=torch.float16)
    
    DPM = DPMSolverMultistepScheduler(use_karras_sigmas=True) # DPM++ 2M Karras
    pipe.scheduler = DPM.from_config(pipe.scheduler.config)

    # Enable efficient implementations using xformers for faster inference
    pipe.enable_xformers_memory_efficient_attention()
    pipe.enable_model_cpu_offload()
    
    # Read the files in the directory
    files = os.listdir("wireframes")
    sorted_files = sorted(files, key=lambda x: (int(x.split('_')[0]), int(x.split('_')[1].split('.')[0]))) 

    for file in tqdm(sorted_files):
        image_input = load_image("wireframes\\" + file)
        prompt = prompt_generator_no_human() if prompt_human == 0 else prompt_generator()
        # print(prompt)
        image_output = pipe(prompt=prompt, negative_prompt="flat background, person, human, disfigured, unrealistic, low quality", image=image_input, num_inference_steps = infer_steps).images[0]
        image_output.save(os.path.join("outputs\\", os.path.basename(file)))

In [10]:
generate_images("lllyasviel/control_v11p_sd15_scribble", "stablediffusionapi/realistic-vision-v20-2047", 0, infer_steps=15)

vae\diffusion_pytorch_model.safetensors not found
Loading pipeline components...: 100%|██████████| 6/6 [00:05<00:00,  1.17it/s]
You have disabled the safety checker for <class 'diffusers.pipelines.controlnet.pipeline_controlnet.StableDiffusionControlNetPipeline'> by passing `safety_checker=None`. Ensure that you abide to the conditions of the Stable Diffusion license and do not expose unfiltered results in services or applications open to the public. Both the diffusers team and Hugging Face strongly recommend to keep the safety filter enabled in all public facing circumstances, disabling it only for use-cases that involve analyzing network behavior or auditing its results. For more information, please have a look at https://github.com/huggingface/diffusers/pull/254 .
100%|██████████| 15/15 [00:08<00:00,  1.74it/s]
100%|██████████| 15/15 [00:08<00:00,  1.67it/s]it]
100%|██████████| 15/15 [00:08<00:00,  1.67it/s]it]
100%|██████████| 15/15 [00:08<00:00,  1.69it/s]it]
100%|██████████| 15/1