In [1]:
%%capture
!cd /content/
!git clone https://github.com/amansyayf/Dreambooth_LoRA
!pip install -r "Dreambooth_LoRA/requirements.txt"
!pip install -U --pre triton
!pip install torchinfo

!git clone https://github.com/brian6091/lora --branch v0.0.5 --single-branch
!python -m pip install /content/lora/

In [2]:
!nvidia-smi -L

# Tested with Tesla T4 and A100 GPUs
!pip install xformers==0.0.16rc425

GPU 0: Tesla T4 (UUID: GPU-55c36406-d5a7-a252-d68f-62c16cff72a3)
Collecting xformers==0.0.16rc425
  Downloading xformers-0.0.16rc425-cp310-cp310-manylinux2014_x86_64.whl (50.9 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m50.9/50.9 MB[0m [31m10.3 MB/s[0m eta [36m0:00:00[0m
Collecting pyre-extensions==0.0.23 (from xformers==0.0.16rc425)
  Downloading pyre_extensions-0.0.23-py3-none-any.whl (11 kB)
Collecting torch==1.13.1 (from xformers==0.0.16rc425)
  Downloading torch-1.13.1-cp310-cp310-manylinux1_x86_64.whl (887.5 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m887.5/887.5 MB[0m [31m2.0 MB/s[0m eta [36m0:00:00[0m
[?25hCollecting typing-inspect (from pyre-extensions==0.0.23->xformers==0.0.16rc425)
  Downloading typing_inspect-0.9.0-py3-none-any.whl (8.8 kB)
Collecting nvidia-cuda-runtime-cu11==11.7.99 (from torch==1.13.1->xformers==0.0.16rc425)
  Downloading nvidia_cuda_runtime_cu11-11.7.99-py3-none-manylinux1_x86_64.whl (849 kB)
[2

In [3]:
#@title ## Mount Google Drive
from google.colab import drive
drive.mount('/content/gdrive')

Mounted at /content/gdrive


In [5]:
import torch
from diffusers import DiffusionPipeline, StableDiffusionPipeline, DPMSolverMultistepScheduler, AutoencoderKL
from PIL import Image
import os
import json
import random
import string
from lora_diffusion import monkeypatch_lora, tune_lora_scale

device = "cuda"

def image_grid(imgs, rows, cols):
    """
        Makes grid of given images.
    """
    assert len(imgs) == rows*cols
    w, h = imgs[0].size
    grid = Image.new('RGB', size=(cols*w, rows*h))
    grid_w, grid_h = grid.size
    for i, img in enumerate(imgs):
        grid.paste(img, box=(i%cols*w, i//cols*h))
    return grid

def get_pipeline(model_name_or_path,
                 vae_name_or_path=None,
                 text_encoder_name_or_path=None,
                 feature_extractor_name_or_path=None,
                 revision="fp16"):
    """
        Loades pipeline of untrained original model.
    """
    scheduler = DPMSolverMultistepScheduler(
        beta_start=0.00085,
        beta_end=0.012,
        beta_schedule="scaled_linear",
        num_train_timesteps=1000,
        trained_betas=None,
        prediction_type="epsilon",
        thresholding=False,
        algorithm_type="dpmsolver++",
        solver_type="midpoint",
        lower_order_final=True,
    )

    pipe = DiffusionPipeline.from_pretrained(
        model_name_or_path,
        safety_checker=None,
        revision=revision,
        scheduler=scheduler,
        vae=AutoencoderKL.from_pretrained(
            vae_name_or_path or model_name_or_path,
            subfolder=None if vae_name_or_path else "vae",
            revision=None if vae_name_or_path else revision,
            torch_dtype=torch.float16,
        ),
        feature_extractor=feature_extractor_name_or_path,
        torch_dtype=torch.float16
    ).to("cuda")

    #https://github.com/huggingface/diffusers/issues/1552
    #pipe.enable_attention_slicing()
    pipe.enable_xformers_memory_efficient_attention()
    return pipe

# Monkey patch LoRA pt files
# Returns pipeline
def get_lora_pipeline(model_dir, scale_unet=1.0, scale_text_encoder=1.0):
    """
        Makes grid of inference images.
    """

    pipe = get_pipeline(MODEL_NAME_OR_PATH, vae_name_or_path=VAE_NAME_OR_PATH)

    print('Monkey patching unet pt file')
    monkeypatch_lora(pipe.unet, torch.load(os.path.join(model_dir, "lora_unet.pt")))

    print('Monkey patching text encoder pt file')
    monkeypatch_lora(pipe.text_encoder, torch.load(os.path.join(model_dir, "lora_text_encoder.pt")), target_replace_module=["CLIPAttention"])

    tune_lora_scale(pipe.unet, scale_unet)
    tune_lora_scale(pipe.text_encoder, scale_text_encoder)

    return pipe

def get_config(filename=None,
               save_dir=None,
               prompt=None, negative_prompt=None,
               seeds=None,
               num_samples=4,
               width=512, height=512,
               inference_steps=20,
               guidance_scale=7.5,
               ):
    """
        Creates needed configuration for loading inference grid

    """

    if filename==None:
        num_prompts = len(prompt)
        if seeds==None:
            seeds = []

            for i in range(num_samples):
                seeds.append(i * 1000000)
        else:
            num_samples = len(seeds)

        tag = ''.join(random.choice(string.ascii_letters) for _ in range(8))
        config = {
            "tag": tag,
            "prompt": prompt,
            "negative_prompt": negative_prompt,
            "num_prompts": num_prompts,
            "num_samples": num_samples,
            "seeds": seeds,
            "height": height,
            "width": width,
            "inference_steps": inference_steps,
            "guidance_scale": guidance_scale,
        }

        with open(os.path.join(save_dir, "config_"+tag+".json"), "w") as outfile:
            json.dump(config, outfile)
    else:
        f = open(filename)
        config = json.load(f)

    return config

def get_images(pipe, sample_config, device="cuda"):
    """
        Creates inference images

    """
    generator = torch.Generator("cuda")
    with torch.autocast(device):
        num_cfg = len(sample_config['guidance_scale'])
        # Loop in order to use defined seed for each image in a batch
        all_images = []
        for i in range(sample_config['num_samples']):
            for cfg in sample_config['guidance_scale']:
                # Manually generate latent
                seed = sample_config['seeds'][i]
                generator = generator.manual_seed(seed)
                latent = torch.randn(
                    (1, pipe.unet.in_channels, sample_config['height'] // 8, sample_config['width'] // 8),
                    generator = generator,
                    device = device
                )
                images = pipe(sample_config['prompt'],
                    negative_prompt=sample_config['negative_prompt'] * len(sample_config['prompt']),
                    num_inference_steps=int(sample_config['inference_steps']),
                    guidance_scale=cfg,
                    latents=latent.repeat(sample_config['num_prompts'], 1, 1, 1),
                ).images
                all_images.extend(images)

    return all_images

def make_reversed_order(images, rows, cols):
    """
        Changes order of images for grid image

    """
    images = []
    for i in range(rows):
      for j in range(cols):
        images.append(all_images[rows*j+i])
    return images


In [4]:
MODEL_NAME_OR_PATH = "runwayml/stable-diffusion-v1-5"
OUTPUT_DIR = #Where trained LoRA models are located

In [24]:
#@title Specify which models to do inference with
model_list = [
              os.path.join(OUTPUT_DIR,'500'),
              os.path.join(OUTPUT_DIR,'1000'),
              os.path.join(OUTPUT_DIR,'1500'),
              os.path.join(OUTPUT_DIR,'2000'),
              os.path.join(OUTPUT_DIR,'2500'),
              ]

print(model_list)

['/content/gdrive/MyDrive/test/5e4/500', '/content/gdrive/MyDrive/test/5e4/1000', '/content/gdrive/MyDrive/test/5e4/1500', '/content/gdrive/MyDrive/test/5e4/2000', '/content/gdrive/MyDrive/test/5e4/2500']


In [25]:
#@title Generate or load a configuration for inference

config_name = None
#config_name = os.path.join(OUTPUT_DIR, "config_ZMasiqkP.json")

if config_name is None:

    prompt = ["sks penguin", "close-up sks penguin", "sks penguin in front of eiffel tower", "sks penguin riding a bicycle", "sks penguin wearing sunglasses and holding a phone"]
    negative_prompt = [""]
    guidance_scale = [7.5]
    seeds = [2000000]
    # seeds = [10000, 20000, 30000, 40000, 50000]

    config = get_config(save_dir=OUTPUT_DIR,
                        prompt=prompt, negative_prompt=negative_prompt,
                        seeds = seeds,
                        width=512, height=512,
                        inference_steps=50, guidance_scale=guidance_scale
                        )
else:
    config = get_config(filename=config_name)


print(config)

{'tag': 'ydQYRCjP', 'prompt': ['sks penguin', 'close-up sks penguin', 'sks penguin in front of eiffel tower', 'sks penguin riding a bicycle', 'sks penguin wearing sunglasses and holding a phone'], 'negative_prompt': [''], 'num_prompts': 5, 'num_samples': 1, 'seeds': [2000000], 'height': 512, 'width': 512, 'inference_steps': 50, 'guidance_scale': [7.5]}


In [27]:
#@title Make inference

LORA_SCALE_UNET = 1.0
LORA_SCALE_TENC = 1.0

all_images = []

for model in model_list:
    pipe = get_lora_pipeline(model, scale_unet=LORA_SCALE_UNET, scale_text_encoder=LORA_SCALE_TENC)
    images = get_images(pipe, config)
    display(images)
    all_images.extend(images)

    del pipe
    if torch.cuda.is_available():
        torch.cuda.empty_cache()

Fetching 15 files:   0%|          | 0/15 [00:00<?, ?it/s]

You have disabled the safety checker for <class 'diffusers.pipelines.stable_diffusion.pipeline_stable_diffusion.StableDiffusionPipeline'> by passing `safety_checker=None`. Ensure that you abide to the conditions of the Stable Diffusion license and do not expose unfiltered results in services or applications open to the public. Both the diffusers team and Hugging Face strongly recommend to keep the safety filter enabled in all public facing circumstances, disabling it only for use-cases that involve analyzing network behavior or auditing its results. For more information, please have a look at https://github.com/huggingface/diffusers/pull/254 .


Monkey patching unet pt file
Monkey patching text encoder pt file


  0%|          | 0/50 [00:00<?, ?it/s]

[<PIL.Image.Image image mode=RGB size=512x512>,
 <PIL.Image.Image image mode=RGB size=512x512>,
 <PIL.Image.Image image mode=RGB size=512x512>,
 <PIL.Image.Image image mode=RGB size=512x512>,
 <PIL.Image.Image image mode=RGB size=512x512>]

Fetching 15 files:   0%|          | 0/15 [00:00<?, ?it/s]

You have disabled the safety checker for <class 'diffusers.pipelines.stable_diffusion.pipeline_stable_diffusion.StableDiffusionPipeline'> by passing `safety_checker=None`. Ensure that you abide to the conditions of the Stable Diffusion license and do not expose unfiltered results in services or applications open to the public. Both the diffusers team and Hugging Face strongly recommend to keep the safety filter enabled in all public facing circumstances, disabling it only for use-cases that involve analyzing network behavior or auditing its results. For more information, please have a look at https://github.com/huggingface/diffusers/pull/254 .


Monkey patching unet pt file
Monkey patching text encoder pt file


  0%|          | 0/50 [00:00<?, ?it/s]

[<PIL.Image.Image image mode=RGB size=512x512>,
 <PIL.Image.Image image mode=RGB size=512x512>,
 <PIL.Image.Image image mode=RGB size=512x512>,
 <PIL.Image.Image image mode=RGB size=512x512>,
 <PIL.Image.Image image mode=RGB size=512x512>]

Fetching 15 files:   0%|          | 0/15 [00:00<?, ?it/s]

You have disabled the safety checker for <class 'diffusers.pipelines.stable_diffusion.pipeline_stable_diffusion.StableDiffusionPipeline'> by passing `safety_checker=None`. Ensure that you abide to the conditions of the Stable Diffusion license and do not expose unfiltered results in services or applications open to the public. Both the diffusers team and Hugging Face strongly recommend to keep the safety filter enabled in all public facing circumstances, disabling it only for use-cases that involve analyzing network behavior or auditing its results. For more information, please have a look at https://github.com/huggingface/diffusers/pull/254 .


Monkey patching unet pt file
Monkey patching text encoder pt file


  0%|          | 0/50 [00:00<?, ?it/s]

[<PIL.Image.Image image mode=RGB size=512x512>,
 <PIL.Image.Image image mode=RGB size=512x512>,
 <PIL.Image.Image image mode=RGB size=512x512>,
 <PIL.Image.Image image mode=RGB size=512x512>,
 <PIL.Image.Image image mode=RGB size=512x512>]

Fetching 15 files:   0%|          | 0/15 [00:00<?, ?it/s]

You have disabled the safety checker for <class 'diffusers.pipelines.stable_diffusion.pipeline_stable_diffusion.StableDiffusionPipeline'> by passing `safety_checker=None`. Ensure that you abide to the conditions of the Stable Diffusion license and do not expose unfiltered results in services or applications open to the public. Both the diffusers team and Hugging Face strongly recommend to keep the safety filter enabled in all public facing circumstances, disabling it only for use-cases that involve analyzing network behavior or auditing its results. For more information, please have a look at https://github.com/huggingface/diffusers/pull/254 .


Monkey patching unet pt file
Monkey patching text encoder pt file


  0%|          | 0/50 [00:00<?, ?it/s]

[<PIL.Image.Image image mode=RGB size=512x512>,
 <PIL.Image.Image image mode=RGB size=512x512>,
 <PIL.Image.Image image mode=RGB size=512x512>,
 <PIL.Image.Image image mode=RGB size=512x512>,
 <PIL.Image.Image image mode=RGB size=512x512>]

Fetching 15 files:   0%|          | 0/15 [00:00<?, ?it/s]

You have disabled the safety checker for <class 'diffusers.pipelines.stable_diffusion.pipeline_stable_diffusion.StableDiffusionPipeline'> by passing `safety_checker=None`. Ensure that you abide to the conditions of the Stable Diffusion license and do not expose unfiltered results in services or applications open to the public. Both the diffusers team and Hugging Face strongly recommend to keep the safety filter enabled in all public facing circumstances, disabling it only for use-cases that involve analyzing network behavior or auditing its results. For more information, please have a look at https://github.com/huggingface/diffusers/pull/254 .


Monkey patching unet pt file
Monkey patching text encoder pt file


  0%|          | 0/50 [00:00<?, ?it/s]

[<PIL.Image.Image image mode=RGB size=512x512>,
 <PIL.Image.Image image mode=RGB size=512x512>,
 <PIL.Image.Image image mode=RGB size=512x512>,
 <PIL.Image.Image image mode=RGB size=512x512>,
 <PIL.Image.Image image mode=RGB size=512x512>]

In [29]:
images = make_reversed_order(all_images, 5, 5)
grid = image_grid(images, rows=5, cols=5)
grid.save(os.path.join(OUTPUT_DIR, "grid_5e4.jpg"), quality=90, optimize=True)

In [None]:
#@title # Close Colab instance
from google.colab import runtime
runtime.unassign()