Bu çalışmada T2I Adapter incelendi. T2I Adapter, ControlNet'e çok benzer. Aralarındaki farklara göz atalım. İlk olarak tek bir adapter kullanılarak, ardından multiadapter kullanılarak denemeler yapıldı.

In [None]:
from diffusers import DPMSolverMultistepScheduler 
from diffusers import T2IAdapter
from diffusers import MultiAdapter
from diffusers import StableDiffusionAdapterPipeline
import torch
import matplotlib.pyplot as plt
from PIL import Image

In [None]:
adapter = T2IAdapter.from_pretrained("TencentARC/t2iadapter_color_sd14v1")

pipe_t2i = StableDiffusionAdapterPipeline.from_pretrained("/home/sd_models/deliberate_v2/", adapter=adapter)
pipe_t2i.scheduler = DPMSolverMultistepScheduler.from_config(pipe_t2i.scheduler.config)
pipe_t2i.to("cuda", torch.float16)

In [None]:
input_image = Image.open("../media/input_images/167.png")
color_image = Image.open("../media/input_images/color.png")
depth_image = Image.open("../media/input_images/depth.png")

fig,ax = plt.subplots(1,3, figsize=(15,5))
ax[0].title.set_text("Input Image")
ax[0].imshow(input_image)
ax[1].title.set_text("Color Image")
ax[1].imshow(color_image)
ax[2].title.set_text("Depth Image")
ax[2].imshow(depth_image, cmap="gray")


In [None]:
def resize_divisible(img, divisor=8):
    return img.resize((img.width // divisor * divisor, img.height // divisor * divisor))

color_image = resize_divisible(color_image)
depth_image = resize_divisible(depth_image)
print(color_image.width, color_image.height)

In [None]:
output_image = pipe_t2i(
    prompt = "red and pink flowers with green leaves",
    image=color_image,
    height=color_image.height,
    width=color_image.width,
    num_inference_steps=25,
    generator = torch.Generator(device="cuda").manual_seed(42),
).images[0]

fig, ax = plt.subplots(1,2, figsize=(15,5))
ax[0].title.set_text("Input Image")
ax[0].imshow(color_image)
ax[1].title.set_text("Output Image")
ax[1].imshow(output_image)

In [None]:
from diffusers.utils import load_image, make_image_grid

cond_color = color_image.resize((512, 512))
cond_depth = depth_image.resize((512, 512)).convert("RGB")
print(cond_color.size, cond_color.mode)
print(cond_depth.size, cond_depth.mode)
cond = [cond_color, cond_depth]

prompt = ["pink and red flowers with green leaves, a beautiful flower with green leaves"]

In [None]:
adapters = MultiAdapter(
    [
        T2IAdapter.from_pretrained("TencentARC/t2iadapter_color_sd14v1"),
        T2IAdapter.from_pretrained("TencentARC/t2iadapter_depth_sd14v1"),
    ]
)
adapters = adapters.to(torch.float16)

pipe = StableDiffusionAdapterPipeline.from_pretrained(
    "/home/sd_models/deliberate_v2/",
    torch_dtype=torch.float16,
    adapter=adapters,
).to("cuda")

image = pipe(prompt, 
             cond, 
             width=512, 
             height=512, 
             adapter_conditioning_scale=[0.8, 0.8]).images[0]

make_image_grid([cond_color, cond_depth, image], rows=1, cols=3)

