In [None]:
from IPython.utils import capture
from IPython.display import clear_output

print("[1;32mStarting...")

%pip install -U --pre xformers
%pip install -U --pre triton
%pip install diffusers accelerate transformers scipy safetensors
%pip install gradio

clear_output()

print("[1;32mDone.")

In [None]:
import gc
import torch
import numpy as np
import gradio as gr
from diffusers import (
    UnCLIPPipeline,
    StableDiffusionUpscalePipeline,
    DDIMScheduler,
    LMSDiscreteScheduler,
    EulerDiscreteScheduler,
)


def make_pipeline_generator():
    """Create Karlo pipeline"""
    pipe = UnCLIPPipeline.from_pretrained(
        "kakaobrain/karlo-v1-alpha",
        torch_dtype=torch.float16,
    )
    pipe = pipe.to("cuda")
    pipe.enable_attention_slicing()
    return pipe


def make_pipeline_upscaler(scheduler):
    """Create Stable-Diffusion upscaler pipeline with scpecified scheduler"""
    if scheduler == "Euler":
        scheduler = EulerDiscreteScheduler.from_pretrained(
            "stabilityai/stable-diffusion-x4-upscaler", subfolder="scheduler"
        )
    elif scheduler == "LMS":
        scheduler = LMSDiscreteScheduler.from_pretrained(
            "stabilityai/stable-diffusion-x4-upscaler", subfolder="scheduler"
        )
    else:
        scheduler = DDIMScheduler.from_pretrained(
            "stabilityai/stable-diffusion-x4-upscaler", subfolder="scheduler"
        )

    pipe = StableDiffusionUpscalePipeline.from_pretrained(
        "stabilityai/stable-diffusion-x4-upscaler",
        scheduler=scheduler,
        torch_dtype=torch.float16,
    )
    pipe = pipe.to("cuda")
    pipe.set_use_memory_efficient_attention_xformers(True)
    pipe.enable_attention_slicing()
    return pipe


def generate(prompt, n_images, n_prior, n_decoder, n_super_res, cfg_prior, cfg_decoder):
    """Generate image using the Karlo model"""
    pipe = karlo_pipe
    torch.cuda.empty_cache()
    images = pipe(
        prompt=prompt,
        num_images_per_prompt=n_images,
        prior_num_inference_steps=n_prior,
        decoder_num_inference_steps=n_decoder,
        super_res_num_inference_steps=n_super_res,
        prior_guidance_scale=cfg_prior,
        decoder_guidance_scale=cfg_decoder,
    ).images
    return images


def upscale(scheduler, prompt, neg_prompt, images, n_steps, cfg):
    """Upscale image using the Stable-Diffusion upscaling model"""
    batch_prompt = [prompt] * len(images)
    batch_neg_prompt = [neg_prompt] * len(images)

    pipe = up_pipe if scheduler=="DDIM" else make_pipeline_upscaler(scheduler)
    torch.cuda.empty_cache()
    images = pipe(
        image=images,
        prompt=batch_prompt,
        negative_prompt=batch_neg_prompt,
        num_inference_steps=n_steps,
        guidance_scale=cfg,
    ).images
    return images


def run(
    prompt,
    n_images,
    up,
    n_prior,
    n_decoder,
    n_super_res,
    cfg_prior,
    cfg_decoder,
    up_prompt,
    up_neg_prompt,
    up_n_steps,
    up_cfg,
    up_scheduler,
    show_original,
    progress=gr.Progress(track_tqdm=True)
):
    images = generate(
        prompt=prompt,
        n_images=n_images,
        n_prior=n_prior,
        n_decoder=n_decoder,
        n_super_res=n_super_res,
        cfg_prior=cfg_prior,
        cfg_decoder=cfg_decoder,
    )

    if up:
        images_up = upscale(
            scheduler=up_scheduler,
            prompt=up_prompt if up_prompt else prompt,
            neg_prompt=up_neg_prompt,
            images=images,
            n_steps=up_n_steps,
            cfg=up_cfg,
        )
        if show_original:
            concat = list(images_up) + list(images)
            return concat
        else:
            return images_up

    return images

# Set up and download pipelines
print("[1;32mDownloading models...")
karlo_pipe = make_pipeline_generator()
up_pipe = make_pipeline_upscaler("DDIM")
clear_output()
print("[1;32mDone.")

In [None]:
with gr.Blocks() as app:
    with gr.Row():
        with gr.Column(scale=1):
            gr.Markdown("# stable-karlo 🖼️")
            prompt = gr.Textbox(
                label="Prompt (77 words max)",
                interactive=True,
                value="A photo of a shiba inu wearing a stylish red scarf, high quality.",
            )
            n_images = gr.Slider(
                label="Number of images",
                interactive=True,
                value=1,
                minimum=0,
                maximum=8,
                step=1,
            )
            up = gr.Checkbox(
                label="Upscale with Stable-Diffusion", interactive=True, value=True
            )

            with gr.Accordion("Karlo Settings", open=False):
                n_prior = gr.Slider(
                    label="Number of prior steps",
                    interactive=True,
                    minimum=0,
                    maximum=100,
                    step=1,
                    value=25,
                )
                n_decoder = gr.Slider(
                    label="Number of decoder steps",
                    interactive=True,
                    minimum=0,
                    maximum=100,
                    step=1,
                    value=25,
                )
                n_super_res = gr.Slider(
                    label="Number of super res steps",
                    interactive=True,
                    minimum=0,
                    maximum=100,
                    step=1,
                    value=7,
                )
                cfg_prior = gr.Slider(
                    label="Prior guidance scale",
                    interactive=True,
                    minimum=0.0,
                    maximum=20.0,
                    step=0.1,
                    value=4.0,
                )
                cfg_decoder = gr.Slider(
                    label="Decoder guidance scale",
                    interactive=True,
                    minimum=0.0,
                    maximum=20.0,
                    step=0.1,
                    value=4.0,
                )

            with gr.Accordion("Stable-Diffusion Settings", open=False):
                up_prompt = gr.Textbox(
                    label="Prompt", placeholder="Leave blank to use Karlo prompt"
                )
                up_neg_prompt = gr.Textbox(label="Negative prompt")
                up_n_steps = gr.Slider(
                    label="Number of steps",
                    interactive=True,
                    minimum=0,
                    maximum=200,
                    step=1,
                    value=35,
                )
                up_cfg = gr.Slider(
                    label="Guidance scale",
                    interactive=True,
                    minimum=1.01,
                    maximum=20.0,
                    step=0.1,
                    value=7.5,
                )
                up_scheduler = gr.Radio(
                    label="Scheduler",
                    choices=["DDIM", "LMS", "Euler"],
                    interactive=True,
                    value="DDIM",
                )
                show_original = gr.Checkbox(
                    label="Show original images", interactive=True, value=False
                )

            btn = gr.Button("Generate")

        with gr.Column(scale=2):
            gallery = gr.Gallery(label="Output", show_label=False).style(grid=4)

        btn.click(
            fn=run,
            inputs=[
                prompt,
                n_images,
                up,
                n_prior,
                n_decoder,
                n_super_res,
                cfg_prior,
                cfg_decoder,
                up_prompt,
                up_neg_prompt,
                up_n_steps,
                up_cfg,
                up_scheduler,
                show_original,
            ],
            outputs=gallery,
        )

app.queue().launch(quiet=True, height=600)