In [None]:
from diffusers import StableDiffusionImg2ImgPipeline, StableDiffusionControlNetImg2ImgPipeline, AutoencoderKL, ControlNetModel, DDIMScheduler
from diffusers.utils import load_image
import torch
from PIL import Image, ImageOps, ImageFilter
import numpy as np
import cv2
import os, sys
from transformers import pipeline

In [None]:
def cropPILImage(im: Image, maxWidth=1024, maxHeight=1024, log = True):
    imw, imh = im.size
    ratioSource = imw / imh
    ratioTarget = maxWidth / maxHeight

    sourceIsLandScape = ratioSource > 1
    targetIsLandScape = ratioTarget > 1

    # resizing image proportionally
    # so one of its side would be bigger than required, other eaual to required
    # it will allow to crop it to required size from bigger side by leaving smaller side intact
    # algorithm will check orientations and aspect ratios to do it correctly
    # and avoid 'black bars' on sides

    if sourceIsLandScape == targetIsLandScape:
        log and print("cropPILImage: same orientation")
        if ratioSource < ratioTarget:
            log and print("cropPILImage: source is wider")
            newWidth = maxWidth
            newHeight = int(maxWidth / ratioSource)
        else:
            log and print("cropPILImage: source is taller")
            newHeight = maxHeight
            newWidth = int(maxHeight * ratioSource)
    elif sourceIsLandScape:
        log and print("cropPILImage: source is wider")
        newHeight = maxHeight
        newWidth = int(maxHeight * ratioSource)
    else:
        log and print("cropPILImage: source is taller")
        newWidth = maxWidth
        newHeight = int(maxWidth / ratioSource)
    
    im = im.resize((newWidth, newHeight))
        
    # crop to max width and height evenly
    imw, imh = im.size
    deltax = (imw - maxWidth) // 2
    deltay = (imh - maxHeight) // 2
    im = im.crop((deltax, deltay, maxWidth + deltax, maxHeight + deltay))
    return im


In [None]:
__file__ = os.path.abspath('')
sys.path.insert(0, os.path.abspath(
    os.path.join(os.path.dirname(__file__))))
sys.path.insert(0, os.path.abspath(
    os.path.join(os.path.dirname(__file__), '..')))
sys.path.insert(0, os.path.abspath(
    os.path.join(os.path.dirname(__file__), '../..')))
from diffusion.common.lpw_processor import get_weighted_text_embeddings

In [None]:
# controlnet = ControlNetModel.from_pretrained(
#   "Nacholmo/controlnet-qr-pattern-v2",
#   torch_dtype=torch.float16
# ).to("cuda")

controlnet = ControlNetModel.from_pretrained(
  "monster-labs/control_v1p_sd15_qrcode_monster",
  torch_dtype=torch.float16
).to("cuda")

# base_pipeline = StableDiffusionImg2ImgPipeline.from_single_file("/home/oleksandr/projects/upwork/esov-api/models/juggernaut_reborn.safetensors", torch_dtype=torch.float16).to("cuda")
base_pipeline = StableDiffusionImg2ImgPipeline.from_pretrained(
    "runwayml/stable-diffusion-v1-5",
    safety_checker=None,
    torch_dtype=torch.float16
).to("cuda")
vae = AutoencoderKL.from_single_file("/home/oleksandr/projects/upwork/esov-api/models/vae-ft-mse-840000-ema-pruned.ckpt", torch_dtype=torch.float16).to("cuda")
base_pipeline.vae = vae
cn_pipeline = StableDiffusionControlNetImg2ImgPipeline(**base_pipeline.components, controlnet=controlnet).to("cuda")


# cn_pipeline = StableDiffusionControlNetImg2ImgPipeline.from_pretrained(
#     "stabilityai/stable-diffusion-2-1",
#     safety_checker=None,
#     torch_dtype=torch.float16,
#     controlnet=controlnet
# ).to("cuda")
cn_pipeline.scheduler = DDIMScheduler.from_config(cn_pipeline.scheduler.config)

In [None]:
qrimg = Image.open("/home/oleksandr/projects/upwork/esov-api/tmp/qr/Piece 01.png")
style_fold = "/home/oleksandr/projects/upwork/esov-api/tmp/qr/Reference - General Style"
img_fold = "/home/oleksandr/projects/upwork/esov-api/tmp/qr/Style Scene 001"

def get_sorted_files(folder):
    return sorted(os.listdir(folder), key=lambda x: int(x.split(".")[0]))

style_imgs = []
for file in get_sorted_files(style_fold):
    style_img = Image.open(os.path.join(style_fold, file)).convert("RGB")
    style_imgs.append(cropPILImage(style_img, 1024, 1024))

imgs = []
for file in get_sorted_files(img_fold):
    img = Image.open(os.path.join(img_fold, file)).convert("RGB")
    imgs.append(img)
testqr = load_image("https://boofcv.org/images/3/35/Example_rendered_qrcode.png")
print(f"QR Image size: {qrimg.size}")
print(f"Style Image size: {style_imgs[0].size}")
print(f"Image size: {imgs[0].size}")

In [None]:
# overlay img[0] and qrimg with qrimg mask and 0.5 blend
qr_img_resized = qrimg.resize(imgs[0].size).convert("RGBA")
to_blend = imgs[0].copy().convert("RGBA")
blended = Image.blend(to_blend, qr_img_resized, 0.7)
pasted = imgs[0].copy()
pasted.paste(blended, (0, 0), ImageOps.invert(qrimg.convert("L")).resize(imgs[0].size))
pasted

In [None]:
w=1024
h=1024

def resize_for_condition_image(input_image: Image, resolution: int):
    input_image = input_image.convert("RGB")
    W, H = input_image.size
    k = float(resolution) / min(H, W)
    H *= k
    W *= k
    H = int(round(H / 64.0)) * 64
    W = int(round(W / 64.0)) * 64
    img = input_image.resize((W, H), resample=Image.LANCZOS)
    return img

prompt = "Fantasy painting in the style of Frank Frazetta, a blue rat running in a spice storage room, Comic book cover art, Sword and sorcery"
# prompt = "Fantasy painting in the style of Frank Frazetta"
negative_prompt = "humans, swords, monsters"

prompt_embeds, negative_prompt_embeds = get_weighted_text_embeddings(cn_pipeline, prompt, negative_prompt, max_embeddings_multiples=3)

generator = torch.Generator(device="cuda").manual_seed(1)
cn_pipeline(
    prompt_embeds=prompt_embeds,
    negative_prompt_embeds=negative_prompt_embeds,
    
    # image=resize_for_condition_image(pasted, w),
    # control_image=resize_for_condition_image(ImageOps.invert(qrimg.convert("L")).resize(imgs[0].size), w),
    
    image=resize_for_condition_image(imgs[0], w),
    control_image=resize_for_condition_image(testqr, w),
    # control_image=resize_for_condition_image(qrimg, w),
    
    strength=0.4,
    guidance_scale=7,
    num_inference_steps=60,
    generator=generator,
    w=w,
    h=h,

    controlnet_conditioning_scale=4.0,
    control_guidance_start=0.0,
    control_guidance_end=1.0,
).images[0]

In [None]:
cn_pipeline.unload_ip_adapter()

In [None]:
cn_pipeline.load_ip_adapter(
            "h94/IP-Adapter", 
            subfolder="models", 
            weight_name="ip-adapter-plus_sd15.safetensors", 
            image_encoder_folder="models/image_encoder")

In [None]:
filtered = qrimg.filter(ImageFilter.GaussianBlur(radius = 24)) 
filtered

In [None]:
w=1024
h=1024
generator = torch.Generator(device="cuda").manual_seed(1)

prompt = "Fantasy painting in the style of Frank Frazetta, a blue rat running in a spice storage room, Sword and sorcery"
# prompt = "Fantasy painting in the style of Frank Frazetta"
negative_prompt = "humans, swords, monsters, (NSFW, nude, naked), bad proportions, undetailed, poorly drawn lines, illustration, 3d render, painting, unrealistic skin, ugly teeth, ugly pupil, (worst quality), (low quality), (normal quality), lowres, signature, watermark, username, cropped"

prompt_embeds, negative_prompt_embeds = get_weighted_text_embeddings(cn_pipeline, prompt, negative_prompt, max_embeddings_multiples=3)

cn_pipeline.set_ip_adapter_scale(0.7)
cn_pipeline(
    prompt_embeds=prompt_embeds,
    negative_prompt_embeds=negative_prompt_embeds,
    
    # image=resize_for_condition_image(pasted, w),
    # control_image=resize_for_condition_image(ImageOps.invert(qrimg.convert("L")).resize(imgs[0].size), w),
    
    ip_adapter_image=imgs[0],
    image=resize_for_condition_image(imgs[0], w),
    # control_image=resize_for_condition_image(testqr, w),
    control_image=resize_for_condition_image(filtered, w),
    
    strength=0.99,
    guidance_scale=15,
    num_inference_steps=30,
    generator=generator,
    w=w,
    h=h,

    controlnet_conditioning_scale=2.0,
    control_guidance_start=0.0,
    control_guidance_end=1.0,
).images[0]

In [None]:
imgs_ready = []
for img in imgs:
    w=1024
    h=1024
    generator = torch.Generator(device="cuda").manual_seed(1)

    prompt = "Fantasy painting in the style of Frank Frazetta, a blue rat running in a spice storage room, Comic book cover art, Sword and sorcery"
    # prompt = "Fantasy painting in the style of Frank Frazetta"
    negative_prompt = "humans, swords, monsters, (NSFW, nude, naked), bad proportions, undetailed, poorly drawn lines, illustration, 3d render, painting, unrealistic skin, ugly teeth, ugly pupil, (worst quality), (low quality), (normal quality), lowres, signature, watermark, username, cropped"

    prompt_embeds, negative_prompt_embeds = get_weighted_text_embeddings(cn_pipeline, prompt, negative_prompt, max_embeddings_multiples=3)

    cn_pipeline.set_ip_adapter_scale(0.7)
    imr = cn_pipeline(
        prompt_embeds=prompt_embeds,
        negative_prompt_embeds=negative_prompt_embeds,
        
        # image=resize_for_condition_image(pasted, w),
        # control_image=resize_for_condition_image(ImageOps.invert(qrimg.convert("L")).resize(imgs[0].size), w),
        
        ip_adapter_image=img,
        image=resize_for_condition_image(img, w),
        # control_image=resize_for_condition_image(testqr, w),
        control_image=resize_for_condition_image(filtered, w),
        
        strength=0.9,
        guidance_scale=15,
        num_inference_steps=30,
        generator=generator,
        w=w,
        h=h,

        controlnet_conditioning_scale=2.0,
        control_guidance_start=0.0,
        control_guidance_end=1.0,
    ).images[0]
    imgs_ready.append(imr)

vertical_two_grids = Image.new("RGB", (2048, 1024*len(imgs)))
for i, img in enumerate(imgs_ready):
    vertical_two_grids.paste(imgs[i].resize((1024,1024)), (0, 1024*i))
    vertical_two_grids.paste(imgs_ready[i], (1024, 1024*i))

vertical_two_grids.save("/home/oleksandr/projects/upwork/esov-api/tmp/out/m1.jpg")

images_out_dir = "/home/oleksandr/projects/upwork/esov-api/tmp/out/m"
os.makedirs(images_out_dir, exist_ok=True)
for i, img in enumerate(imgs_ready):
    img.save(f"{images_out_dir}/{i}.jpg")

In [None]:
imgs_ready = []
for img in style_imgs:
    w=1024
    h=1024
    generator = torch.Generator(device="cuda").manual_seed(1)

    prompt = "Fantasy painting in the style of Frank Frazetta"
    # prompt = "Fantasy painting in the style of Frank Frazetta"
    negative_prompt = "((((humans, swords, monsters, skeletons, naked woman)))) (NSFW, nude, naked), bad proportions, undetailed, poorly drawn lines, illustration, 3d render, painting, unrealistic skin, ugly teeth, ugly pupil, (worst quality), (low quality), (normal quality), lowres, signature, watermark, username, cropped"

    prompt_embeds, negative_prompt_embeds = get_weighted_text_embeddings(cn_pipeline, prompt, negative_prompt, max_embeddings_multiples=3)

    cn_pipeline.set_ip_adapter_scale(0.7)
    imr = cn_pipeline(
        prompt_embeds=prompt_embeds,
        negative_prompt_embeds=negative_prompt_embeds,
        
        # image=resize_for_condition_image(pasted, w),
        # control_image=resize_for_condition_image(ImageOps.invert(qrimg.convert("L")).resize(imgs[0].size), w),
        
        ip_adapter_image=img,
        image=resize_for_condition_image(img, w),
        # control_image=resize_for_condition_image(testqr, w),
        control_image=resize_for_condition_image(filtered, w),
        
        strength=0.9,
        guidance_scale=15,
        num_inference_steps=30,
        generator=generator,
        w=w,
        h=h,

        controlnet_conditioning_scale=2.0,
        control_guidance_start=0.0,
        control_guidance_end=1.0,
    ).images[0]
    imgs_ready.append(imr)

vertical_two_grids = Image.new("RGB", (2048, 1024*len(imgs)))
for i, img in enumerate(imgs_ready):
    vertical_two_grids.paste(style_imgs[i], (0, 1024*i))
    vertical_two_grids.paste(imgs_ready[i], (1024, 1024*i))

vertical_two_grids.save("/home/oleksandr/projects/upwork/esov-api/tmp/out/s1.jpg")

images_out_dir = "/home/oleksandr/projects/upwork/esov-api/tmp/out/s"
os.makedirs(images_out_dir, exist_ok=True)
for i, img in enumerate(imgs_ready):
    img.save(f"{images_out_dir}/{i}.jpg")