In [None]:
from diffusers import StableDiffusionXLImg2ImgPipeline, StableDiffusionXLControlNetImg2ImgPipeline, AutoencoderKL, ControlNetModel, DDIMScheduler
from diffusers.utils import load_image
import torch
from PIL import Image, ImageOps
import numpy as np
import os, sys

In [None]:
__file__ = os.path.abspath('')
sys.path.insert(0, os.path.abspath(
    os.path.join(os.path.dirname(__file__))))
sys.path.insert(0, os.path.abspath(
    os.path.join(os.path.dirname(__file__), '..')))
sys.path.insert(0, os.path.abspath(
    os.path.join(os.path.dirname(__file__), '../..')))
from diffusion.common.SDXLLongPromptWeightingPipeline import get_weighted_text_embeddings_sdxl

def set_lpw_embeeds_sdxl(pipeline, prompt, neg_prompt, num_images_per_prompt, kwargs):
    (
            prompt_embeds,
            negative_prompt_embeds,
            pooled_prompt_embeds,
            negative_pooled_prompt_embeds,
    ) = get_weighted_text_embeddings_sdxl(
            pipe=pipeline, prompt=prompt, neg_prompt=neg_prompt, num_images_per_prompt=num_images_per_prompt
    )

    kwargs['prompt_embeds'] = prompt_embeds
    kwargs['negative_prompt_embeds'] = negative_prompt_embeds
    kwargs['pooled_prompt_embeds'] = pooled_prompt_embeds
    kwargs['negative_pooled_prompt_embeds'] = negative_pooled_prompt_embeds
    return kwargs

def resize_for_condition_image(input_image: Image, resolution: int):
    input_image = input_image.convert("RGB")
    W, H = input_image.size
    k = float(resolution) / min(H, W)
    H *= k
    W *= k
    H = int(round(H / 64.0)) * 64
    W = int(round(W / 64.0)) * 64
    img = input_image.resize((W, H), resample=Image.LANCZOS)
    return img

In [None]:
class NoWatermark:
    def apply_watermark(self, img):
        return img

controlnet = ControlNetModel.from_pretrained(
  "monster-labs/control_v1p_sdxl_qrcode_monster",
  torch_dtype=torch.float16
).to("cuda")

# base_pipeline = StableDiffusionImg2ImgPipeline.from_single_file("/home/oleksandr/projects/upwork/esov-api/models/realisticVisionV60B1_v51VAE.safetensors", torch_dtype=torch.float16).to("cuda")
base_pipeline = StableDiffusionXLImg2ImgPipeline.from_single_file(
    "/home/oleksandr/projects/upwork/LoDi-Engine-Runpod/models_hotswap/zavychromaxl_v30.10003.safetensors", 

    torch_dtype=torch.float16).to(torch.device('cuda'))
# vae = AutoencoderKL.from_single_file("/home/oleksandr/projects/upwork/esov-api/models/vae-ft-mse-840000-ema-pruned.ckpt", torch_dtype=torch.float16).to("cuda")
# base_pipeline.vae = vae
cn_pipeline = StableDiffusionXLControlNetImg2ImgPipeline(**base_pipeline.components, controlnet=controlnet).to("cuda")


# cn_pipeline = StableDiffusionControlNetImg2ImgPipeline.from_pretrained(
#     "stabilityai/stable-diffusion-2-1",
#     safety_checker=None,
#     torch_dtype=torch.float16,
#     controlnet=controlnet
# ).to("cuda")
cn_pipeline.scheduler = DDIMScheduler.from_config(cn_pipeline.scheduler.config)
cn_pipeline.watermark = NoWatermark()

In [None]:
qrimg = Image.open("/home/oleksandr/projects/upwork/esov-api/tmp/qr/Piece 01.png")
style_fold = "/home/oleksandr/projects/upwork/esov-api/tmp/qr/Reference - General Style"
img_fold = "/home/oleksandr/projects/upwork/esov-api/tmp/qr/Style Scene 001"

style_imgs = []
for file in os.listdir(style_fold):
    style_img = Image.open(os.path.join(style_fold, file)).convert("RGB")
    style_imgs.append(style_img)

imgs = []
for file in os.listdir(img_fold):
    img = Image.open(os.path.join(img_fold, file)).convert("RGB")
    imgs.append(img)
testqr = load_image("https://boofcv.org/images/3/35/Example_rendered_qrcode.png")
print(f"QR Image size: {qrimg.size}")
print(f"Style Image size: {style_imgs[0].size}")
print(f"Image size: {imgs[0].size}")

In [None]:
# overlay img[0] and qrimg with qrimg mask and 0.5 blend
qr_img_resized = qrimg.resize(imgs[0].size).convert("RGBA")
to_blend = imgs[0].copy().convert("RGBA")
blended = Image.blend(to_blend, qr_img_resized, 0.7)
pasted = imgs[0].copy()
pasted.paste(blended, (0, 0), ImageOps.invert(qrimg.convert("L")).resize(imgs[0].size))
pasted

In [None]:
w=1024
h=1024

prompt = "Fantasy painting in the style of Frank Frazetta, a blue rat running in a spice storage room, Comic book cover art, Sword and sorcery"
# prompt = "Fantasy painting in the style of Frank Frazetta"
negative_prompt = "humans, swords, monsters"

kwargs = {
    "image": resize_for_condition_image(imgs[0], w),
    "control_image": resize_for_condition_image(qrimg, w),
    "strength": 0.4,
    "guidance_scale": 7,
    "num_inference_steps": 60,
    "w": w,
    "h": h,
    "controlnet_conditioning_scale": 4.0,
    "control_guidance_start": 0.0,
    "control_guidance_end": 1.0,
}

set_lpw_embeeds_sdxl(cn_pipeline, prompt, negative_prompt, 1, kwargs)

generator = torch.Generator(device="cuda").manual_seed(1)
cn_pipeline(**kwargs).images[0]

In [None]:
cn_pipeline.unload_ip_adapter()

In [None]:
cn_pipeline.load_ip_adapter(
            "h94/IP-Adapter", 
            subfolder="sdxl_models", 
            weight_name="ip-adapter-plus_sdxl_vit-h.safetensors", 
            image_encoder_folder="models/image_encoder")

In [None]:
w=1024
h=1024
generator = torch.Generator(device="cuda").manual_seed(1)

cn_pipeline.set_ip_adapter_scale(0.0)

prompt = "Fantasy painting in the style of Frank Frazetta, a blue rat running in a spice storage room, Comic book cover art, Sword and sorcery"
# prompt = "Fantasy painting in the style of Frank Frazetta"
negative_prompt = "humans, swords, monsters"

kwargs = {
    "ip_adapter_image": imgs[0],
    "image": resize_for_condition_image(imgs[0], w),
    "control_image": resize_for_condition_image(qrimg, w),
    "strength": 0.4,
    "guidance_scale": 15,
    "num_inference_steps": 60,
    "w": w,
    "h": h,
    "controlnet_conditioning_scale": 1.0,
    "control_guidance_start": 0.0,
    "control_guidance_end": 1.0,
    "generator": generator,
}

kwargs = set_lpw_embeeds_sdxl(cn_pipeline, prompt, negative_prompt, 1, kwargs)

cn_pipeline(
   **kwargs
).images[0]