In [None]:
!nvidia-smi

First, generate prompts with LLAVA and BLIP

In [None]:
!python T2I-Adriver/llava_dataset_creation.py

## Other models

Run SD, SDI2I, ControlNet and T2I with each prompt

In [12]:
from transformers import pipeline
from diffusers import StableDiffusionPipeline, StableDiffusionImg2ImgPipeline, T2IAdapter, EulerDiscreteScheduler, ControlNetModel
import torch
import os
from PIL import Image
import cv2
import numpy as np
from diffusers import T2IAdapter, StableDiffusionAdapterPipeline
from diffusers.utils import load_image
import ipyplot
from utils_controlnet import reshape, to_canny, to_depth, to_segmentation
import pandas as pd
import warnings
warnings.filterwarnings('ignore')

# INPUTS
MODEL = "SD"  # Options: "SD", "SD Image to Image", "T2I", "ControlNet"
PROMPT = "Manual_Entry"
#FILE = pd.read_csv("T2I-Adapter/dataset/other_models/llava_long_prompts.csv") if PROMPT == "LLAVA" else pd.read_csv("T2I-Adapter/dataset/other_models/blip_prompts.csv") if PROMPT == "BLIP" else pd.read_csv("T2I-Adapter/dataset/other_models/manual_prompts.csv")
FILE = pd.read_csv("T2I-Adriver/dataset/val_short_prompts.csv")
PROMPTS = FILE["Labels"]
IMAGE_PATHS = "T2I-Adriver/dataset/val_6/"+FILE["Image"]
generator = torch.manual_seed(42)

In [3]:
import cv2
from PIL import Image
color = cv2.imread("cloudy_road.png")
color = cv2.cvtColor(color, cv2.COLOR_BGR2RGB)  # Convert from BGR to RGB

# Resize down and up using specified interpolation methods
color = cv2.resize(color, (512 // 64, 512 // 64), interpolation=cv2.INTER_CUBIC)
color = cv2.resize(color, (512, 512), interpolation=cv2.INTER_NEAREST)

# Convert back to PIL Image and store
color_image = Image.fromarray(color)
#color_images.append(color_image)
color_image.save("cloudy_road_color.png")

In [None]:
num_inference_steps = 50  # Modify as needed to change the noise schedule
guidance_scale = 7.5  # Modify to control the influence of the prompt

output_dir = "T2I-Adriver/dataset/other_models/"
os.makedirs(output_dir, exist_ok=True)

# Model mapping based on the variable
# Models
sd14 = "CompVis/stable-diffusion-v1-4"
sd15 = "runwayml/stable-diffusion-v1-5"
sdxl = "stabilityai/stable-diffusion-xl-base-1.0"

# T2I SD 1.4/1.5 checkpoints
canny_t2i = "TencentARC/t2iadapter_canny_sd15v2"
depth_t2i = "TencentARC/t2iadapter_depth_sd15v2" 
sketch_t2i = "TencentARC/t2iadapter_sketch_sd15v2"
segmentation_t2i = "TencentARC/t2iadapter_seg_sd14v1"
zoedepth_t2i = "TencentARC/t2iadapter_zoedepth_sd15v1"
openpose_t2i = "TencentARC/t2iadapter_openpose_sd14v1"
color_t2i = "TencentARC/t2iadapter_color_sd14v1"

# ControlNet checkpoints
canny_cn = "lllyasviel/sd-controlnet-canny"
depth_cn = "lllyasviel/control_v11f1p_sd15_depth"
segmentation_cn = "lllyasviel/control_v11f1p_sd15_depth"
openpose_cn = "lllyasviel/sd-controlnet-openpose"
#sketch_cn = "lllyasviel/control_sd15_sketch"

# Create conditionings
# Load images 
init_images = [Image.open(image_path).convert("RGB").resize((512, 512)) for image_path in IMAGE_PATHS]

canny_images=[]
for i, canny_image in enumerate(IMAGE_PATHS):
    canny_image = to_canny(canny_image)
    canny_image = reshape(canny_image,(512,512))
    canny_images.append(canny_image)
    
depth_images=[]
for i, depth_image in enumerate(IMAGE_PATHS):
    depth_image = to_depth(depth_image)
    depth_image = reshape(depth_image,(512,512))
    depth_images.append(depth_image)
    
segmentation_images=[]
for i, segmentation_image in enumerate(IMAGE_PATHS):
    segmentation_image = to_segmentation(segmentation_image)
    segmentation_image = reshape(segmentation_image,(512,512))
    segmentation_images.append(segmentation_image)
    
color_images = []
for i, image_path in enumerate(IMAGE_PATHS):
    # Load image
    color = cv2.imread(image_path)
    color = cv2.cvtColor(color, cv2.COLOR_BGR2RGB)  # Convert from BGR to RGB

    # Resize down and up using specified interpolation methods
    color = cv2.resize(color, (512 // 64, 512 // 64), interpolation=cv2.INTER_CUBIC)
    color = cv2.resize(color, (512, 512), interpolation=cv2.INTER_NEAREST)

    # Convert back to PIL Image and store
    color_image = Image.fromarray(color)
    color_images.append(color_image)
    
ipyplot.plot_images(color_images, max_images=20, img_width=200)

In [None]:
import os

# Define a function to create filenames
def create_filename(model_name, prompt, conditionings, index, guidance_scale):
    # Create a sanitized version of the prompt to use in the filename
    sanitized_prompt = prompt.replace(' ', '_').replace('/', '_').replace('\\', '_')
    # Create a filename with model name, prompt, and conditionings
    filename = f"{model_name}_{sanitized_prompt}_{'_'.join((conditionings))}_{guidance_scale}_{index+1}.png"
    return filename

# Model definition and generation of images
MODEL = "T2I"
guidance_scales = [7.5]
for guidance_scale in guidance_scales:
    if MODEL == "T2I":
        from diffusers import StableDiffusionAdapterPipeline, T2IAdapter
        import torch

        # Load the different T2I Adapter models
        adapter_sketch = T2IAdapter.from_pretrained(sketch_t2i, torch_dtype=torch.float16)
        #adapter_color = T2IAdapter.from_pretrained(color_t2i, torch_dtype=torch.float16)
        adapter_segmentation = T2IAdapter.from_pretrained(segmentation_t2i, torch_dtype=torch.float16)
        adapter_depth = T2IAdapter.from_pretrained(depth_t2i, torch_dtype=torch.float16)

        # Combine the adapters in a list
        adapters = [adapter_sketch, adapter_segmentation, adapter_depth]

        # Load the pipeline with all the adapters
        pipe = StableDiffusionAdapterPipeline.from_pretrained(
            sd14,
            adapter=adapters,
            torch_dtype=torch.float16,
        )
        pipe.to("cuda")

        t2i_images = []
        for i, prompt in enumerate(PROMPTS):
            # Define conditioning scales for each adapter
            adapter_scales = [1.0, 1.0, 1.0]  # Adjust these values as needed

            # Pass the conditioning images and corresponding scales
            generated_image = pipe(
                prompt,
                image=[canny_images[1], segmentation_images[1], depth_images[1]],  # Order matches the adapters list
                #generator=generator,
                num_inference_steps=num_inference_steps,
                negative_prompt="",
                adapter_conditioning_scale=adapter_scales,  # Pass the list of scales
                #guidance_scale=guidance_scale
            ).images[0]

            t2i_images.append(generated_image)

            #filename = create_filename(MODEL, PROMPT, ["color", "depth", "seg"], i, guidance_scale)
            #file_path = os.path.join(output_dir, filename)
            generated_image.save(f"T2I_{i}_{guidance_scale}.png")
        canny_images[1].save(f"T2I_sketch.png")
        segmentation_images[1].save(f"T2I_seg.png")
        depth_images[1].save(f"T2I_depth.png")
        ipyplot.plot_images(t2i_images, max_images=20, img_width=200)


In [None]:
ipyplot.plot_images(init_images, max_images=20, img_width=200)

In [24]:
#ipyplot.plot_images(segmentation_images, max_images=20, img_width=200)
color_images[11].save("road_t2i_1_color.png")
canny_images[11].save("road_t2i_1_sketch.png")
init_images[11].save("road_t2i_1.png")

In [None]:
MODEL = "ControlNet"
guidance_scales = [0,0.5,1,1.5, 5, 10]
for guidance_scale in guidance_scales:
    if MODEL == "ControlNet":
        from diffusers import StableDiffusionControlNetPipeline, ControlNetModel, UniPCMultistepScheduler

        # Load the different ControlNet models and move them to GPU
        controlnet_sketch = ControlNetModel.from_pretrained(canny_cn, torch_dtype=torch.float16).to("cuda")
        controlnet_segmentation = ControlNetModel.from_pretrained(segmentation_cn, torch_dtype=torch.float16).to("cuda")
        controlnet_depth = ControlNetModel.from_pretrained(depth_cn, torch_dtype=torch.float16).to("cuda")

        # Combine the ControlNet models in a list
        controlnets = [controlnet_sketch, controlnet_segmentation] #, controlnet_depth]

        # Load the pipeline with all the ControlNet models and move to GPU
        pipe = StableDiffusionControlNetPipeline.from_pretrained(
            sd14, controlnet=controlnets, torch_dtype=torch.float16,
        ).to("cuda")

        pipe.scheduler = UniPCMultistepScheduler.from_config(pipe.scheduler.config)
        pipe.enable_xformers_memory_efficient_attention()
        pipe.enable_model_cpu_offload()

        cn_images = []
        for i, prompt in enumerate(PROMPTS):
            # Pass the conditioning images in the same order as the ControlNet models
            cn_images.append(pipe(
                prompt,
                image=[canny_images[i], segmentation_images[i]],#, depth_images[i]],  # Order should match the controlnets list
                num_inference_steps=num_inference_steps,
                generator=generator,
                negative_prompt="",
                guidance_scale = guidance_scale
            ).images[0])

            #filename = create_filename(MODEL, PROMPT, ["sketch", "depth", "seg"], i, guidance_scale)
            #file_path = os.path.join(output_dir, filename)
            #cn_images[-1].save(file_path)
            cn_images[-1].save(f"ControlNetFail_test_{i}_{guidance_scale}.png")

        ipyplot.plot_images(cn_images, max_images=20, img_width=200)


In [None]:
MODEL = "SD"
PROMPTS = ["a sunset over the mountains"]
for guidance_scale in [5, 10]:
    if MODEL == "SD":
        from diffusers import StableDiffusionPipeline, UniPCMultistepScheduler

        pipe = StableDiffusionPipeline.from_pretrained(
            sd14,  torch_dtype=torch.float16,
        )
        pipe.scheduler = UniPCMultistepScheduler.from_config(pipe.scheduler.config)
        pipe.to("cuda")  
        sd_images = []
        for i, prompt in enumerate(PROMPTS):
            sd_images.append(pipe(
                prompt,
                generator=generator,
                num_inference_steps=num_inference_steps,
                negative_prompt="",  # Optionally include negative prompts if needed
                guidance_scale=guidance_scale
            ).images[0])

            filename = create_filename(MODEL, PROMPT, [""], i, "_")
            file_path = os.path.join(output_dir, filename)
            sd_images[-1].save(f"sunset_{guidance_scale}.png")

        ipyplot.plot_images(sd_images, max_images=20, img_width=200)


In [None]:
MODEL = "SDI2I"
for guidance_scale in guidance_scales:
    if MODEL == "SDI2I":
        from diffusers import StableDiffusionImg2ImgPipeline
        import torch

        # Load the Stable Diffusion Image-to-Image pipeline
        pipe = StableDiffusionImg2ImgPipeline.from_pretrained(
            sd14,  # Replace with your desired Stable Diffusion model version
            torch_dtype=torch.float16,
        ).to("cuda")

        pipe.enable_xformers_memory_efficient_attention()

        sdi2i_images = []

        for i, prompt in enumerate(PROMPTS):
            if i != 6:
                continue
            # Generate the image based on the corresponding prompt and initial image
            generated_image = pipe(
                prompt=prompt,
                image=init_images[i],  # Use the preloaded image
                strength=guidance_scale/10,  # Adjust the strength of the transformation (0.0-1.0)
                guidance_scale=guidance_scale,  # Adjust the guidance scale (how closely the image should match the prompt)
                generator=torch.manual_seed(42),  # Optional: Set a seed for reproducibility
                num_inference_steps=num_inference_steps,
                negative_prompt=""  # Optionally include a negative prompt
            ).images[0]

            sdi2i_images.append(generated_image)

            filename = create_filename(MODEL, PROMPT, [""], i, guidance_scale)
            file_path = os.path.join(output_dir, filename)
            generated_image.save(file_path)

        ipyplot.plot_images(sdi2i_images, max_images=20, img_width=200)
