# Dense diffusion code

In [1]:
import os
import random
import pickle
import argparse
import pdb
import datetime
import hashlib
from collections import defaultdict

import numpy as np
from PIL import Image
from tqdm.auto import tqdm

import torch
import torch.nn.functional as F
from torchvision import transforms
import diffusers
from diffusers.pipelines.stable_diffusion import StableDiffusionPipeline
from diffusers.pipelines import DiffusionPipeline
from diffusers import DDIMScheduler, LCMScheduler
import transformers
from transformers import CLIPTextModel, CLIPTokenizer

In [2]:
command = "--model LCM --batch_size 1 -s 10 --reg_part 0.3 --idx 5 ".split()

In [3]:
parser = argparse.ArgumentParser()
parser.add_argument('--model', type=str, default='LCM', choices=['LCM', 'SD'])
parser.add_argument('--batch_size', type=int, default=1)
parser.add_argument('--idx', type=int, default=[1], nargs="*",
                    help='dense diffusion dataset image mask & caption index')
parser.add_argument('-s', '--num_inference_steps', type=int, default=50)
parser.add_argument('--reg_part', type=float, default=.3)
parser.add_argument('--sreg', type=float, default=.3)
parser.add_argument('--creg', type=float, default=1)
parser.add_argument('--pow_time', type=float, default=5)
parser.add_argument('-w', '--wo_modulation', action=argparse.BooleanOptionalAction, default=False,
                    help='when True, run inference without dense diffusion attention manipulation')
parser.add_argument('--save_attn', action=argparse.BooleanOptionalAction, default=False)
parser.add_argument('--seed', type=int, default=1)
parser.add_argument('--debug', type=str)
args = parser.parse_args(command)

In [4]:
args

Namespace(model='LCM', batch_size=1, idx=[5], num_inference_steps=10, reg_part=0.3, sreg=0.3, creg=1, pow_time=5, wo_modulation=False, save_attn=False, seed=1, debug=None)

In [5]:
## Set hyperparameters
device= "cuda"
num_inference_steps = args.num_inference_steps 
reg_part = args.reg_part if not args.wo_modulation else 0
sreg = args.sreg
creg = args.creg


## Load Model
if args.model == 'LCM':
    pipe = DiffusionPipeline.from_pretrained("SimianLuo/LCM_Dreamshaper_v7")
    pipe.to(device=device, dtype=torch.float16)
    num_inference_steps = num_inference_steps
    lcm_origin_steps = 50
    pipe.scheduler = LCMScheduler.from_config(pipe.scheduler.config)
    pipe.scheduler.set_timesteps(num_inference_steps=num_inference_steps,
                                 original_inference_steps=lcm_origin_steps,
                                 device=device)
else:
    pipe = diffusers.StableDiffusionPipeline.from_pretrained(
        "runwayml/stable-diffusion-v1-5",
        safety_checker=None,
        variant="fp16",
        cache_dir='./models/diffusers/'
    ).to(device)
    pipe.scheduler = DDIMScheduler.from_config(pipe.scheduler.config)
    pipe.scheduler.set_timesteps(num_inference_steps)


## Set attn modulation variables
num_attn_layers = 32
timesteps = pipe.scheduler.timesteps
sp_sz = pipe.unet.sample_size
bsz = args.batch_size

mod_counts = []

print("=== Experiment Settings ===")
print("- Model:", args.model, "/ N inference steps:", num_inference_steps, "/ Batch size:", bsz)
print("- Regulation part:", reg_part, "/ Self attention regulation:", sreg, "/ Cross attention regulation:", creg, "/ Time regulation:", args.pow_time)
print("Chosen timesteps:", timesteps)


## attention modulation function
def mod_forward(self, hidden_states, encoder_hidden_states=None, attention_mask=None, temb=None):
    global COUNT, treg, sret, creg, sreg_maps, creg_maps, reg_sizes, text_cond, step_store, attn_stores
    STEP = COUNT // 32
    if COUNT % 32 == 0 and STEP > 0:
        attn_stores.append(step_store)
        step_store = {"down_cross": [], "mid_cross": [], "up_cross": [],
                      "down_self": [],  "mid_self": [],  "up_self": []}

    residual = hidden_states 

    if self.spatial_norm is not None:
        hidden_states = self.spatial_norm(hidden_states, temb)

    input_ndim = hidden_states.ndim

    if input_ndim == 4:
        batch_size, channel, height, width = hidden_states.shape
        hidden_states = hidden_states.view(batch_size, channel, height * width).transpose(1, 2)

    batch_size, sequence_length, _ = (hidden_states.shape if encoder_hidden_states is None else encoder_hidden_states.shape)
    attention_mask = self.prepare_attention_mask(attention_mask, sequence_length, batch_size)

    if self.group_norm is not None:
        hidden_states = self.group_norm(hidden_states.transpose(1, 2)).transpose(1, 2)

    query = self.to_q(hidden_states)

    sa_ = True if encoder_hidden_states is None else False
    encoder_hidden_states = text_cond if encoder_hidden_states is not None else hidden_states
    if self.norm_cross:
        encoder_hidden_states = self.norm_encoder_hidden_states(encoder_hidden_states)

    key = self.to_k(encoder_hidden_states)
    value = self.to_v(encoder_hidden_states)

    query = self.head_to_batch_dim(query)
    key = self.head_to_batch_dim(key)
    value = self.head_to_batch_dim(value)

    if sa_ == False and args.model == 'LCM':
        key =  key[key.size(0)//2:,  ...]
        value = value[value.size(0)//2:,  ...]

    # modulate attention with dense diffusion
    if (COUNT/num_attn_layers < num_inference_steps*reg_part):
        mod_counts.append(COUNT)
        dtype = query.dtype
        if self.upcast_attention:
            query = query.float()
            key = key.float()

        sim = torch.baddbmm(torch.empty(query.shape[0], query.shape[1], key.shape[1], 
                                        dtype=query.dtype, device=query.device),
                            query, key.transpose(-1, -2), beta=0, alpha=self.scale)
        treg = torch.pow(timesteps[COUNT//num_attn_layers]/1000, args.pow_time)
        reg_map = sreg_maps if sa_ else creg_maps
        w_reg = sreg if sa_ else creg

        # manipulate attention
        batch_idx = int(sim.size(0)/2) if args.model != 'LCM' else 0 # why do we have to apply below operations for latter half of sim???
        min_value = sim[batch_idx:].min(-1)[0].unsqueeze(-1)
        max_value = sim[batch_idx:].max(-1)[0].unsqueeze(-1)  
        mask = reg_map[sim.size(1)].repeat(self.heads,1,1)
        size_reg = reg_sizes[sim.size(1)].repeat(self.heads,1,1)

        sim[batch_idx:] += (mask>0)*size_reg*w_reg*treg*(max_value-sim[batch_idx:])
        sim[batch_idx:] -= ~(mask>0)*size_reg*w_reg*treg*(sim[batch_idx:]-min_value)

        attention_probs = sim.softmax(dim=-1)
        attention_probs = attention_probs.to(dtype)
    else: # get original attention
        attention_probs = self.get_attention_scores(query, key, attention_mask)

    COUNT += 1
    if args.save_attn and (attention_probs.shape[1] <= 32 ** 2): # save attention in each place(up, down, mid) when attention shape is small
        step_store[f"{self.place_in_unet.lower()}_{'self' if sa_ else 'cross'}"].append(attention_probs)

    #################################################        
    hidden_states = torch.bmm(attention_probs, value)
    hidden_states = self.batch_to_head_dim(hidden_states)

    # linear proj
    hidden_states = self.to_out[0](hidden_states)
    # dropout
    hidden_states = self.to_out[1](hidden_states)

    if input_ndim == 4:
        hidden_states = hidden_states.transpose(-1, -2).reshape(batch_size, channel, height, width)

    if self.residual_connection:
        hidden_states = hidden_states + residual

    hidden_states = hidden_states / self.rescale_output_factor

    return hidden_states


## change call function of attn layers in Unet 
for _module in pipe.unet.modules():
    n = _module.__class__.__name__
    if 'CrossAttn' in n:
        for place in ['Up', 'Down', 'Mid']:
            if place in n:
                curr_place = place

    if n == "Attention":
        _module.__class__.__call__ = mod_forward
        _module.place_in_unet = curr_place


## Load naver-ai/DenseDiffusion dataset
with open('./dataset/valset.pkl', 'rb') as f:
    dataset = pickle.load(f)
layout_img_root = './dataset/valset_layout/'


## Main function which generates modulated image
def generate_index_img(idx):
    global COUNT, treg, sret, creg, sreg_maps, creg_maps, reg_sizes, text_cond, step_store, attn_stores

    layout_img_path = layout_img_root+str(idx)+'.png'
    prompts = [dataset[idx]['textual_condition']] + dataset[idx]['segment_descriptions']
    prompts_idx[idx] = prompts[0]
    ## prepare text condition embeddings
    ############
    text_input = pipe.tokenizer(prompts, padding="max_length", return_length=True, return_overflowing_tokens=False, 
                                max_length=pipe.tokenizer.model_max_length, truncation=True, return_tensors="pt")
    cond_embeddings = pipe.text_encoder(text_input.input_ids.to(device))[0]

    uncond_input = pipe.tokenizer([""]*bsz, padding="max_length", max_length=pipe.tokenizer.model_max_length,
                                  truncation=True, return_tensors="pt")
    uncond_embeddings = pipe.text_encoder(uncond_input.input_ids.to(device))[0]

    for i in range(1,len(prompts)):
        wlen = text_input['length'][i] - 2
        widx = text_input['input_ids'][i][1:1+wlen]
        for j in range(77):
            if (text_input['input_ids'][0][j:j+wlen] == widx).sum() == wlen:
                break

    ## set layout image masks
    ############
    layout_img_ = np.asarray(Image.open(layout_img_path).resize([sp_sz*8,sp_sz*8]))[:,:,:3]
    unique, counts = np.unique(np.reshape(layout_img_,(-1,3)), axis=0, return_counts=True)
    sorted_idx = np.argsort(-counts)

    layouts_ = []

    for i in range(len(prompts)-1):
        if (unique[sorted_idx[i]] == [0, 0, 0]).all() or (unique[sorted_idx[i]] == [255, 255, 255]).all():
            layouts_ = [((layout_img_ == unique[sorted_idx[i]]).sum(-1)==3).astype(np.uint8)] + layouts_
        else:
            layouts_.append(((layout_img_ == unique[sorted_idx[i]]).sum(-1)==3).astype(np.uint8))

    layouts = [torch.FloatTensor(l).unsqueeze(0).unsqueeze(0).cuda() for l in layouts_]
    layouts = F.interpolate(torch.cat(layouts),(sp_sz,sp_sz),mode='nearest')

    ############
    print('\n'.join(prompts))
    Image.fromarray(np.concatenate([255*_.squeeze().cpu().numpy() for _ in layouts], 1).astype(np.uint8))

    ###########################
    ###### prep for sreg ###### 
    ###########################
    sreg_maps = {}
    reg_sizes = {}
    for r in range(4):
        res = int(sp_sz/np.power(2,r))
        layouts_s = F.interpolate(layouts,(res, res),mode='nearest')
        layouts_s = (layouts_s.view(layouts_s.size(0),1,-1)*layouts_s.view(layouts_s.size(0),-1,1)).sum(0).unsqueeze(0).repeat(bsz,1,1)
        reg_sizes[np.power(res, 2)] = 1-1.*layouts_s.sum(-1, keepdim=True)/(np.power(res, 2))
        sreg_maps[np.power(res, 2)] = layouts_s


    ###########################
    ###### prep for creg ######
    ###########################
    pww_maps = torch.zeros(1, 77, sp_sz, sp_sz).to(device)
    for i in range(1,len(prompts)):
        wlen = text_input['length'][i] - 2
        widx = text_input['input_ids'][i][1:1+wlen]
        for j in range(77):
            if (text_input['input_ids'][0][j:j+wlen] == widx).sum() == wlen:
                pww_maps[:,j:j+wlen,:,:] = layouts[i-1:i]
                cond_embeddings[0][j:j+wlen] = cond_embeddings[i][1:1+wlen]
                print(prompts[i], i, '-th segment is handled.')
                break

    creg_maps = {}
    for r in range(4):
        res = int(sp_sz/np.power(2,r))
        layout_c = F.interpolate(pww_maps,(res,res),mode='nearest').view(1,77,-1).permute(0,2,1).repeat(bsz,1,1)
        creg_maps[np.power(res, 2)] = layout_c


    ###########################    
    #### prep for text_emb ####
    ###########################
    text_cond = torch.cat([uncond_embeddings, cond_embeddings[:1].repeat(bsz,1,1)])

    ## generate images
    COUNT = 0
    attn_stores = []
    step_store = {"down_cross": [], "mid_cross": [], "up_cross": [],
                  "down_self": [],  "mid_self": [],  "up_self": []}

    with torch.no_grad():
        latents = torch.randn(bsz,4,sp_sz,sp_sz, generator=torch.Generator().manual_seed(args.seed)).to(device) 
        if args.model == 'LCM':
            with torch.autocast('cuda'):
                image = pipe(prompts[:1]*bsz, latents=latents,
                             num_inference_steps=num_inference_steps,
                             lcm_origin_steps=lcm_origin_steps,
                             guidance_scale=8.0).images
        else:
            image = pipe(prompts[:1]*bsz, latents=latents).images

    imgs = [ Image.fromarray(np.asarray(image[i])) for i in range(len(image)) ]
    if imgs[0].size[0] > 512:
        imgs = [ x.resize((512,512)) for x in imgs ]
    
    
    imgs_idx[idx].append(imgs)
    attentions_idx[idx].append(attn_stores)
    if args.debug:
        return 
        
    img = Image.fromarray(np.concatenate([layout_img_.astype(np.uint8)]+[np.asarray(image[i]) for i in range(len(image))], 1))

Loading pipeline components...:   0%|          | 0/7 [00:00<?, ?it/s]

Some weights of StableDiffusionSafetyChecker were not initialized from the model checkpoint at /home01/x2519a05/.cache/huggingface/hub/models--SimianLuo--LCM_Dreamshaper_v7/snapshots/4721097975058205c4edcdece2cc574b7dd7bc04/safety_checker and are newly initialized: ['vision_model.vision_model.embeddings.position_ids']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


=== Experiment Settings ===
- Model: LCM / N inference steps: 10 / Batch size: 1
- Regulation part: 0.3 / Self attention regulation: 0.3 / Cross attention regulation: 1 / Time regulation: 5
Chosen timesteps: tensor([999, 899, 799, 699, 599, 499, 399, 299, 199,  99], device='cuda:0')


In [None]:
imgs_idx = dict()
attentions_idx = defaultdict(list)
prompts_idx = defaultdict(list)
for i in args.idx:
    print(f"=== Generate image for index {i} ===")
    generate_index_img(i)

In [18]:
import numpy as np
import torch
from PIL import Image, ImageDraw, ImageFont
import cv2
from typing import Optional, Union, Tuple, List, Callable, Dict
from IPython.display import display
from tqdm.notebook import tqdm
import diffusers
import matplotlib.pyplot as plt
import inspect

def text_under_image(image: np.ndarray, text: str, text_color: Tuple[int, int, int] = (0, 0, 0)):
    h, w, c = image.shape
    offset = int(h * .2)
    img = np.ones((h + offset, w, c), dtype=np.uint8) * 255
    font = cv2.FONT_HERSHEY_SIMPLEX
    img[:h] = image
    textsize = cv2.getTextSize(text, font, 1, 2)[0]
    text_x, text_y = (w - textsize[0]) // 2, h + offset - textsize[1] // 2
    cv2.putText(img, text, (text_x, text_y ), font, 1, text_color, 2)
    return img


def view_images(images, num_rows=1, offset_ratio=0.02):
    if type(images) is list:
        num_empty = len(images) % num_rows
    elif images.ndim == 4:
        num_empty = images.shape[0] % num_rows
    else:
        images = [images]
        num_empty = 0

    empty_images = np.ones(images[0].shape, dtype=np.uint8) * 255
    images = [image.astype(np.uint8) for image in images] + [empty_images] * num_empty
    num_items = len(images)

    h, w, c = images[0].shape
    offset = int(h * offset_ratio)
    num_cols = num_items // num_rows
    image_ = np.ones((h * num_rows + offset * (num_rows - 1),
                      w * num_cols + offset * (num_cols - 1), 3), dtype=np.uint8) * 255
    for i in range(num_rows):
        for j in range(num_cols):
            image_[i * (h + offset): i * (h + offset) + h:, j * (w + offset): j * (w + offset) + w] = images[
                i * num_cols + j]

    pil_img = Image.fromarray(image_)
    # display(pil_img)
    return pil_img

In [70]:
def aggregate_attention(attention_store, prompts, res:int, from_where: List[str], is_cross: bool, select: int):
    out = []
    num_pixels = res ** 2
    for location in from_where:
        for item in attention_store[f"{location}_{'cross' if is_cross else 'self'}"]:
            if item.shape[1] == num_pixels:
                cross_maps = item.reshape(1, -1, res, res, item.shape[-1])[0]
                out.append(cross_maps) 
    out = torch.cat(out, dim=0)
    out = out.sum(0) / out.shape[0]
    
    return out.cpu()

In [92]:
def is_common_words(text):
    cws = ['a', 'an', 'the', 'in', 'for', 'of', '.', ',',
          '<|startoftext|>', '<|endoftext|>']
    return (text in cws)

def show_cross_attention(attention_store, prompts, res:int, from_where: List[str], select: int = 0, num_rows=1):
    tokens = pipe.tokenizer.encode(prompts)
    decoder = pipe.tokenizer.decode
    attention_maps = aggregate_attention(attention_store, prompts, res, from_where, True, select)
    images = []
    for i in range(len(tokens)):
        text = decoder(int(tokens[i]))
        if is_common_words(text):
            continue
        image = attention_maps[:, :, i]
        image = 255 * image / image.max()
        image = image.unsqueeze(-1).expand(*image.shape, 3)
        image = image.numpy().astype(np.uint8)
        image = np.array(Image.fromarray(image).resize((256, 256)))
        image = text_under_image(image, decoder(int(tokens[i])))
        images.append(image)
        
    return view_images(np.stack(images, axis=0), num_rows)

In [93]:
def show_self_attention_comp(attention_store, prompts, res:int, from_where: List[str], select: int = 0, num_rows=1, max_com=10):
    attention_maps = aggregate_attention(attention_store, prompts, res, from_where, False, select)
    attention_maps = attention_maps.numpy().reshape((res ** 2, res ** 2)).astype(float)
    u, s, vh = np.linalg.svd(attention_maps - np.mean(attention_maps, axis=1, keepdims=True))
    images = []
    for i in range(max_com):
        image = vh[i].reshape(res, res)
        image = image - image.min()
        image = 255 * image / image.max()
        image = np.repeat(np.expand_dims(image, axis=2), 3, axis=2).astype(np.uint8)
        image = Image.fromarray(image).resize((256, 256))
        image = np.array(image)
        images.append(image)

    return view_images(np.concatenate(images, axis=1), 1)

In [94]:
def get_attention_timesteps(attention_store_timestep, prompts, res, from_where, select, num_rows):
    cross_attns = []
    self_attns = []
    for attention_store in attention_store_timestep:
        cross_attns.append(show_cross_attention(attention_store, prompts, res, from_where, select, num_rows))
        self_attns.append(show_self_attention_comp(attention_store, prompts, res, from_where, select, num_rows))     
    
    return cross_attns, self_attns

# Evaluate metrics

- https://huggingface.co/docs/diffusers/conceptual/evaluation

To calculate CLIP scores, we need to make prompts.  
Dense diffusion validtaion dataset have 20 image & prompts.  
Is it enought amount to report performance?  

In [6]:
imgs_idx = defaultdict(list)
attentions_idx = defaultdict(list)
prompts_idx = dict()
bsz = 2
args.idx = range(20)
seeds = range(1,11)
for seed in seeds:
    args.seed = seed
    print(f"=== Seed: {seed} ===")
    for i in args.idx:
        print(f"=== Generate image for index {i} ===")
        generate_index_img(i)

=== Seed: 1 ===
=== Generate image for index 0 ===
There is a cute monkey on a thick branch who is holding a pink rose. It is on the top of a huge tree, and the sky is so wide and blue.
the sky is so wide and blue
a huge tree
a thick branch
a cute monkey
a pink rose
the sky is so wide and blue 1 -th segment is handled.
a huge tree 2 -th segment is handled.
a thick branch 3 -th segment is handled.
a cute monkey 4 -th segment is handled.
a pink rose 5 -th segment is handled.


  0%|          | 0/10 [00:00<?, ?it/s]

=== Generate image for index 1 ===
A painting of a dog riding a flying bicycle, over a big city with a yellowish full moon in the night sky.
the night sky
a big city
a dog riding a flying bicycle
a yellowish full moon
the night sky 1 -th segment is handled.
a big city 2 -th segment is handled.
a dog riding a flying bicycle 3 -th segment is handled.
a yellowish full moon 4 -th segment is handled.


  0%|          | 0/10 [00:00<?, ?it/s]

=== Generate image for index 2 ===
a blue koala is reading a book next to a pile of colorful books in the jungle.
in the jungle
a pile of colorful books
a blue koala
in the jungle 1 -th segment is handled.
a pile of colorful books 2 -th segment is handled.
a blue koala 3 -th segment is handled.


  0%|          | 0/10 [00:00<?, ?it/s]

=== Generate image for index 3 ===
there are a robot made of gold and a robot made of silver are standing on an exotic planet with a red butterfly flying around them.
on an exotic planet
a robot made of gold
a robot made of silver
a red butterfly
on an exotic planet 1 -th segment is handled.
a robot made of gold 2 -th segment is handled.
a robot made of silver 3 -th segment is handled.
a red butterfly 4 -th segment is handled.


  0%|          | 0/10 [00:00<?, ?it/s]

=== Generate image for index 4 ===
a painting of a rabbit drinking a cup of coffee next to a fireplace in the rustic wooden house.
the rustic wooden house
a rabbit drinking a cup of coffee
a fireplace
the rustic wooden house 1 -th segment is handled.
a rabbit drinking a cup of coffee 2 -th segment is handled.
a fireplace 3 -th segment is handled.


  0%|          | 0/10 [00:00<?, ?it/s]

=== Generate image for index 5 ===
A painting of a couple holding a yellow umbrella in a street on a rainy night. The woman is wearing a white dress and the man is wearing a blue suit.
a street on a rainy night
the man is wearing a blue suit
a yellow umbrella
the woman is wearing a white dress
a street on a rainy night 1 -th segment is handled.
the man is wearing a blue suit 2 -th segment is handled.
a yellow umbrella 3 -th segment is handled.
the woman is wearing a white dress 4 -th segment is handled.


  0%|          | 0/10 [00:00<?, ?it/s]

=== Generate image for index 6 ===
A picture of a squirrel holding a sign with an apple painting at the desert.
at the desert
a squirrel
a sign with an apple painting
at the desert 1 -th segment is handled.
a squirrel 2 -th segment is handled.
a sign with an apple painting 3 -th segment is handled.


  0%|          | 0/10 [00:00<?, ?it/s]

=== Generate image for index 7 ===
A lion is reading a book at the beach.
the beach
a lion
a book
the beach 1 -th segment is handled.
a lion 2 -th segment is handled.
a book 3 -th segment is handled.


  0%|          | 0/10 [00:00<?, ?it/s]

=== Generate image for index 8 ===
a black elephant near a lake.

a black elephant
 1 -th segment is handled.
a black elephant 2 -th segment is handled.


  0%|          | 0/10 [00:00<?, ?it/s]

=== Generate image for index 9 ===
a mouse wearing boxing gloves is hitting a black punching bag on a snowy day.
on a snowy day
a mouse
a black punching bag
boxing gloves
on a snowy day 1 -th segment is handled.
a mouse 2 -th segment is handled.
a black punching bag 3 -th segment is handled.
boxing gloves 4 -th segment is handled.


  0%|          | 0/10 [00:00<?, ?it/s]

=== Generate image for index 10 ===
a painting of a red car in front of a snowy mountain.

a snowy mountain
a red car
 1 -th segment is handled.
a snowy mountain 2 -th segment is handled.
a red car 3 -th segment is handled.


  0%|          | 0/10 [00:00<?, ?it/s]

=== Generate image for index 11 ===
a mirror, a white sink and a vase with red flowers in a bathroom with an artificial light.

a mirror
a white sink
a vase with red flowers
 1 -th segment is handled.
a mirror 2 -th segment is handled.
a white sink 3 -th segment is handled.
a vase with red flowers 4 -th segment is handled.


  0%|          | 0/10 [00:00<?, ?it/s]

=== Generate image for index 12 ===
A grizzly bear is looking at a huge avocado near a river.
near a river
a grizzly bear
a huge avocado
near a river 1 -th segment is handled.
a grizzly bear 2 -th segment is handled.
a huge avocado 3 -th segment is handled.


  0%|          | 0/10 [00:00<?, ?it/s]

=== Generate image for index 13 ===
A picture of a German Shepherd dog and a Husky dog on a sunny day after the snow.
a sunny day after the snow
a German Shepherd dog
a Husky dog
a sunny day after the snow 1 -th segment is handled.
a German Shepherd dog 2 -th segment is handled.
a Husky dog 3 -th segment is handled.


  0%|          | 0/10 [00:00<?, ?it/s]

=== Generate image for index 14 ===
A red Ferrari car driving on a gravel road in a forest with rainbow beams in the distance.
forest
a gravel road
rainbow beams
a red Ferrari car
forest 1 -th segment is handled.
a gravel road 2 -th segment is handled.
rainbow beams 3 -th segment is handled.
a red Ferrari car 4 -th segment is handled.


  0%|          | 0/10 [00:00<?, ?it/s]

=== Generate image for index 15 ===
A painting of a black horse under a red full moon, in the style of The Starry Night.
in the style of The Starry Night
a black horse
a red full moon
in the style of The Starry Night 1 -th segment is handled.
a black horse 2 -th segment is handled.
a red full moon 3 -th segment is handled.


  0%|          | 0/10 [00:00<?, ?it/s]

=== Generate image for index 16 ===
There is a blue bowl on a wooden table, and a picture on the wall in a room with sunlight.
a room with sunlight
a wooden table
a picture on the wall
a blue bowl
a room with sunlight 1 -th segment is handled.
a wooden table 2 -th segment is handled.
a picture on the wall 3 -th segment is handled.
a blue bowl 4 -th segment is handled.


  0%|          | 0/10 [00:00<?, ?it/s]

=== Generate image for index 17 ===
a picture of a horse, and an astronaut, on the moon.
on the moon
an astronaut
a horse
on the moon 1 -th segment is handled.
an astronaut 2 -th segment is handled.
a horse 3 -th segment is handled.


  0%|          | 0/10 [00:00<?, ?it/s]

=== Generate image for index 18 ===
a black cat with a red sweater and a blue jeans in the forest.
in the forest
a black cat with a red sweater and a blue jeans
in the forest 1 -th segment is handled.
a black cat with a red sweater and a blue jeans 2 -th segment is handled.


  0%|          | 0/10 [00:00<?, ?it/s]

=== Generate image for index 19 ===
A picture of a juggling bear at the beach.
the beach
a juggling bear
the beach 1 -th segment is handled.
a juggling bear 2 -th segment is handled.


  0%|          | 0/10 [00:00<?, ?it/s]

=== Seed: 2 ===
=== Generate image for index 0 ===
There is a cute monkey on a thick branch who is holding a pink rose. It is on the top of a huge tree, and the sky is so wide and blue.
the sky is so wide and blue
a huge tree
a thick branch
a cute monkey
a pink rose
the sky is so wide and blue 1 -th segment is handled.
a huge tree 2 -th segment is handled.
a thick branch 3 -th segment is handled.
a cute monkey 4 -th segment is handled.
a pink rose 5 -th segment is handled.


  0%|          | 0/10 [00:00<?, ?it/s]

=== Generate image for index 1 ===
A painting of a dog riding a flying bicycle, over a big city with a yellowish full moon in the night sky.
the night sky
a big city
a dog riding a flying bicycle
a yellowish full moon
the night sky 1 -th segment is handled.
a big city 2 -th segment is handled.
a dog riding a flying bicycle 3 -th segment is handled.
a yellowish full moon 4 -th segment is handled.


  0%|          | 0/10 [00:00<?, ?it/s]

=== Generate image for index 2 ===
a blue koala is reading a book next to a pile of colorful books in the jungle.
in the jungle
a pile of colorful books
a blue koala
in the jungle 1 -th segment is handled.
a pile of colorful books 2 -th segment is handled.
a blue koala 3 -th segment is handled.


  0%|          | 0/10 [00:00<?, ?it/s]

=== Generate image for index 3 ===
there are a robot made of gold and a robot made of silver are standing on an exotic planet with a red butterfly flying around them.
on an exotic planet
a robot made of gold
a robot made of silver
a red butterfly
on an exotic planet 1 -th segment is handled.
a robot made of gold 2 -th segment is handled.
a robot made of silver 3 -th segment is handled.
a red butterfly 4 -th segment is handled.


  0%|          | 0/10 [00:00<?, ?it/s]

=== Generate image for index 4 ===
a painting of a rabbit drinking a cup of coffee next to a fireplace in the rustic wooden house.
the rustic wooden house
a rabbit drinking a cup of coffee
a fireplace
the rustic wooden house 1 -th segment is handled.
a rabbit drinking a cup of coffee 2 -th segment is handled.
a fireplace 3 -th segment is handled.


  0%|          | 0/10 [00:00<?, ?it/s]

=== Generate image for index 5 ===
A painting of a couple holding a yellow umbrella in a street on a rainy night. The woman is wearing a white dress and the man is wearing a blue suit.
a street on a rainy night
the man is wearing a blue suit
a yellow umbrella
the woman is wearing a white dress
a street on a rainy night 1 -th segment is handled.
the man is wearing a blue suit 2 -th segment is handled.
a yellow umbrella 3 -th segment is handled.
the woman is wearing a white dress 4 -th segment is handled.


  0%|          | 0/10 [00:00<?, ?it/s]

=== Generate image for index 6 ===
A picture of a squirrel holding a sign with an apple painting at the desert.
at the desert
a squirrel
a sign with an apple painting
at the desert 1 -th segment is handled.
a squirrel 2 -th segment is handled.
a sign with an apple painting 3 -th segment is handled.


  0%|          | 0/10 [00:00<?, ?it/s]

=== Generate image for index 7 ===
A lion is reading a book at the beach.
the beach
a lion
a book
the beach 1 -th segment is handled.
a lion 2 -th segment is handled.
a book 3 -th segment is handled.


  0%|          | 0/10 [00:00<?, ?it/s]

Potential NSFW content was detected in one or more images. A black image will be returned instead. Try again with a different prompt and/or seed.


=== Generate image for index 8 ===
a black elephant near a lake.

a black elephant
 1 -th segment is handled.
a black elephant 2 -th segment is handled.


  0%|          | 0/10 [00:00<?, ?it/s]

=== Generate image for index 9 ===
a mouse wearing boxing gloves is hitting a black punching bag on a snowy day.
on a snowy day
a mouse
a black punching bag
boxing gloves
on a snowy day 1 -th segment is handled.
a mouse 2 -th segment is handled.
a black punching bag 3 -th segment is handled.
boxing gloves 4 -th segment is handled.


  0%|          | 0/10 [00:00<?, ?it/s]

=== Generate image for index 10 ===
a painting of a red car in front of a snowy mountain.

a snowy mountain
a red car
 1 -th segment is handled.
a snowy mountain 2 -th segment is handled.
a red car 3 -th segment is handled.


  0%|          | 0/10 [00:00<?, ?it/s]

=== Generate image for index 11 ===
a mirror, a white sink and a vase with red flowers in a bathroom with an artificial light.

a mirror
a white sink
a vase with red flowers
 1 -th segment is handled.
a mirror 2 -th segment is handled.
a white sink 3 -th segment is handled.
a vase with red flowers 4 -th segment is handled.


  0%|          | 0/10 [00:00<?, ?it/s]

=== Generate image for index 12 ===
A grizzly bear is looking at a huge avocado near a river.
near a river
a grizzly bear
a huge avocado
near a river 1 -th segment is handled.
a grizzly bear 2 -th segment is handled.
a huge avocado 3 -th segment is handled.


  0%|          | 0/10 [00:00<?, ?it/s]

=== Generate image for index 13 ===
A picture of a German Shepherd dog and a Husky dog on a sunny day after the snow.
a sunny day after the snow
a German Shepherd dog
a Husky dog
a sunny day after the snow 1 -th segment is handled.
a German Shepherd dog 2 -th segment is handled.
a Husky dog 3 -th segment is handled.


  0%|          | 0/10 [00:00<?, ?it/s]

=== Generate image for index 14 ===
A red Ferrari car driving on a gravel road in a forest with rainbow beams in the distance.
forest
a gravel road
rainbow beams
a red Ferrari car
forest 1 -th segment is handled.
a gravel road 2 -th segment is handled.
rainbow beams 3 -th segment is handled.
a red Ferrari car 4 -th segment is handled.


  0%|          | 0/10 [00:00<?, ?it/s]

=== Generate image for index 15 ===
A painting of a black horse under a red full moon, in the style of The Starry Night.
in the style of The Starry Night
a black horse
a red full moon
in the style of The Starry Night 1 -th segment is handled.
a black horse 2 -th segment is handled.
a red full moon 3 -th segment is handled.


  0%|          | 0/10 [00:00<?, ?it/s]

=== Generate image for index 16 ===
There is a blue bowl on a wooden table, and a picture on the wall in a room with sunlight.
a room with sunlight
a wooden table
a picture on the wall
a blue bowl
a room with sunlight 1 -th segment is handled.
a wooden table 2 -th segment is handled.
a picture on the wall 3 -th segment is handled.
a blue bowl 4 -th segment is handled.


  0%|          | 0/10 [00:00<?, ?it/s]

=== Generate image for index 17 ===
a picture of a horse, and an astronaut, on the moon.
on the moon
an astronaut
a horse
on the moon 1 -th segment is handled.
an astronaut 2 -th segment is handled.
a horse 3 -th segment is handled.


  0%|          | 0/10 [00:00<?, ?it/s]

=== Generate image for index 18 ===
a black cat with a red sweater and a blue jeans in the forest.
in the forest
a black cat with a red sweater and a blue jeans
in the forest 1 -th segment is handled.
a black cat with a red sweater and a blue jeans 2 -th segment is handled.


  0%|          | 0/10 [00:00<?, ?it/s]

=== Generate image for index 19 ===
A picture of a juggling bear at the beach.
the beach
a juggling bear
the beach 1 -th segment is handled.
a juggling bear 2 -th segment is handled.


  0%|          | 0/10 [00:00<?, ?it/s]

=== Seed: 3 ===
=== Generate image for index 0 ===
There is a cute monkey on a thick branch who is holding a pink rose. It is on the top of a huge tree, and the sky is so wide and blue.
the sky is so wide and blue
a huge tree
a thick branch
a cute monkey
a pink rose
the sky is so wide and blue 1 -th segment is handled.
a huge tree 2 -th segment is handled.
a thick branch 3 -th segment is handled.
a cute monkey 4 -th segment is handled.
a pink rose 5 -th segment is handled.


  0%|          | 0/10 [00:00<?, ?it/s]

=== Generate image for index 1 ===
A painting of a dog riding a flying bicycle, over a big city with a yellowish full moon in the night sky.
the night sky
a big city
a dog riding a flying bicycle
a yellowish full moon
the night sky 1 -th segment is handled.
a big city 2 -th segment is handled.
a dog riding a flying bicycle 3 -th segment is handled.
a yellowish full moon 4 -th segment is handled.


  0%|          | 0/10 [00:00<?, ?it/s]

=== Generate image for index 2 ===
a blue koala is reading a book next to a pile of colorful books in the jungle.
in the jungle
a pile of colorful books
a blue koala
in the jungle 1 -th segment is handled.
a pile of colorful books 2 -th segment is handled.
a blue koala 3 -th segment is handled.


  0%|          | 0/10 [00:00<?, ?it/s]

=== Generate image for index 3 ===
there are a robot made of gold and a robot made of silver are standing on an exotic planet with a red butterfly flying around them.
on an exotic planet
a robot made of gold
a robot made of silver
a red butterfly
on an exotic planet 1 -th segment is handled.
a robot made of gold 2 -th segment is handled.
a robot made of silver 3 -th segment is handled.
a red butterfly 4 -th segment is handled.


  0%|          | 0/10 [00:00<?, ?it/s]

=== Generate image for index 4 ===
a painting of a rabbit drinking a cup of coffee next to a fireplace in the rustic wooden house.
the rustic wooden house
a rabbit drinking a cup of coffee
a fireplace
the rustic wooden house 1 -th segment is handled.
a rabbit drinking a cup of coffee 2 -th segment is handled.
a fireplace 3 -th segment is handled.


  0%|          | 0/10 [00:00<?, ?it/s]

=== Generate image for index 5 ===
A painting of a couple holding a yellow umbrella in a street on a rainy night. The woman is wearing a white dress and the man is wearing a blue suit.
a street on a rainy night
the man is wearing a blue suit
a yellow umbrella
the woman is wearing a white dress
a street on a rainy night 1 -th segment is handled.
the man is wearing a blue suit 2 -th segment is handled.
a yellow umbrella 3 -th segment is handled.
the woman is wearing a white dress 4 -th segment is handled.


  0%|          | 0/10 [00:00<?, ?it/s]

=== Generate image for index 6 ===
A picture of a squirrel holding a sign with an apple painting at the desert.
at the desert
a squirrel
a sign with an apple painting
at the desert 1 -th segment is handled.
a squirrel 2 -th segment is handled.
a sign with an apple painting 3 -th segment is handled.


  0%|          | 0/10 [00:00<?, ?it/s]

=== Generate image for index 7 ===
A lion is reading a book at the beach.
the beach
a lion
a book
the beach 1 -th segment is handled.
a lion 2 -th segment is handled.
a book 3 -th segment is handled.


  0%|          | 0/10 [00:00<?, ?it/s]

=== Generate image for index 8 ===
a black elephant near a lake.

a black elephant
 1 -th segment is handled.
a black elephant 2 -th segment is handled.


  0%|          | 0/10 [00:00<?, ?it/s]

=== Generate image for index 9 ===
a mouse wearing boxing gloves is hitting a black punching bag on a snowy day.
on a snowy day
a mouse
a black punching bag
boxing gloves
on a snowy day 1 -th segment is handled.
a mouse 2 -th segment is handled.
a black punching bag 3 -th segment is handled.
boxing gloves 4 -th segment is handled.


  0%|          | 0/10 [00:00<?, ?it/s]

=== Generate image for index 10 ===
a painting of a red car in front of a snowy mountain.

a snowy mountain
a red car
 1 -th segment is handled.
a snowy mountain 2 -th segment is handled.
a red car 3 -th segment is handled.


  0%|          | 0/10 [00:00<?, ?it/s]

=== Generate image for index 11 ===
a mirror, a white sink and a vase with red flowers in a bathroom with an artificial light.

a mirror
a white sink
a vase with red flowers
 1 -th segment is handled.
a mirror 2 -th segment is handled.
a white sink 3 -th segment is handled.
a vase with red flowers 4 -th segment is handled.


  0%|          | 0/10 [00:00<?, ?it/s]

=== Generate image for index 12 ===
A grizzly bear is looking at a huge avocado near a river.
near a river
a grizzly bear
a huge avocado
near a river 1 -th segment is handled.
a grizzly bear 2 -th segment is handled.
a huge avocado 3 -th segment is handled.


  0%|          | 0/10 [00:00<?, ?it/s]

=== Generate image for index 13 ===
A picture of a German Shepherd dog and a Husky dog on a sunny day after the snow.
a sunny day after the snow
a German Shepherd dog
a Husky dog
a sunny day after the snow 1 -th segment is handled.
a German Shepherd dog 2 -th segment is handled.
a Husky dog 3 -th segment is handled.


  0%|          | 0/10 [00:00<?, ?it/s]

=== Generate image for index 14 ===
A red Ferrari car driving on a gravel road in a forest with rainbow beams in the distance.
forest
a gravel road
rainbow beams
a red Ferrari car
forest 1 -th segment is handled.
a gravel road 2 -th segment is handled.
rainbow beams 3 -th segment is handled.
a red Ferrari car 4 -th segment is handled.


  0%|          | 0/10 [00:00<?, ?it/s]

=== Generate image for index 15 ===
A painting of a black horse under a red full moon, in the style of The Starry Night.
in the style of The Starry Night
a black horse
a red full moon
in the style of The Starry Night 1 -th segment is handled.
a black horse 2 -th segment is handled.
a red full moon 3 -th segment is handled.


  0%|          | 0/10 [00:00<?, ?it/s]

=== Generate image for index 16 ===
There is a blue bowl on a wooden table, and a picture on the wall in a room with sunlight.
a room with sunlight
a wooden table
a picture on the wall
a blue bowl
a room with sunlight 1 -th segment is handled.
a wooden table 2 -th segment is handled.
a picture on the wall 3 -th segment is handled.
a blue bowl 4 -th segment is handled.


  0%|          | 0/10 [00:00<?, ?it/s]

=== Generate image for index 17 ===
a picture of a horse, and an astronaut, on the moon.
on the moon
an astronaut
a horse
on the moon 1 -th segment is handled.
an astronaut 2 -th segment is handled.
a horse 3 -th segment is handled.


  0%|          | 0/10 [00:00<?, ?it/s]

=== Generate image for index 18 ===
a black cat with a red sweater and a blue jeans in the forest.
in the forest
a black cat with a red sweater and a blue jeans
in the forest 1 -th segment is handled.
a black cat with a red sweater and a blue jeans 2 -th segment is handled.


  0%|          | 0/10 [00:00<?, ?it/s]

=== Generate image for index 19 ===
A picture of a juggling bear at the beach.
the beach
a juggling bear
the beach 1 -th segment is handled.
a juggling bear 2 -th segment is handled.


  0%|          | 0/10 [00:00<?, ?it/s]

=== Seed: 4 ===
=== Generate image for index 0 ===
There is a cute monkey on a thick branch who is holding a pink rose. It is on the top of a huge tree, and the sky is so wide and blue.
the sky is so wide and blue
a huge tree
a thick branch
a cute monkey
a pink rose
the sky is so wide and blue 1 -th segment is handled.
a huge tree 2 -th segment is handled.
a thick branch 3 -th segment is handled.
a cute monkey 4 -th segment is handled.
a pink rose 5 -th segment is handled.


  0%|          | 0/10 [00:00<?, ?it/s]

=== Generate image for index 1 ===
A painting of a dog riding a flying bicycle, over a big city with a yellowish full moon in the night sky.
the night sky
a big city
a dog riding a flying bicycle
a yellowish full moon
the night sky 1 -th segment is handled.
a big city 2 -th segment is handled.
a dog riding a flying bicycle 3 -th segment is handled.
a yellowish full moon 4 -th segment is handled.


  0%|          | 0/10 [00:00<?, ?it/s]

=== Generate image for index 2 ===
a blue koala is reading a book next to a pile of colorful books in the jungle.
in the jungle
a pile of colorful books
a blue koala
in the jungle 1 -th segment is handled.
a pile of colorful books 2 -th segment is handled.
a blue koala 3 -th segment is handled.


  0%|          | 0/10 [00:00<?, ?it/s]

=== Generate image for index 3 ===
there are a robot made of gold and a robot made of silver are standing on an exotic planet with a red butterfly flying around them.
on an exotic planet
a robot made of gold
a robot made of silver
a red butterfly
on an exotic planet 1 -th segment is handled.
a robot made of gold 2 -th segment is handled.
a robot made of silver 3 -th segment is handled.
a red butterfly 4 -th segment is handled.


  0%|          | 0/10 [00:00<?, ?it/s]

=== Generate image for index 4 ===
a painting of a rabbit drinking a cup of coffee next to a fireplace in the rustic wooden house.
the rustic wooden house
a rabbit drinking a cup of coffee
a fireplace
the rustic wooden house 1 -th segment is handled.
a rabbit drinking a cup of coffee 2 -th segment is handled.
a fireplace 3 -th segment is handled.


  0%|          | 0/10 [00:00<?, ?it/s]

=== Generate image for index 5 ===
A painting of a couple holding a yellow umbrella in a street on a rainy night. The woman is wearing a white dress and the man is wearing a blue suit.
a street on a rainy night
the man is wearing a blue suit
a yellow umbrella
the woman is wearing a white dress
a street on a rainy night 1 -th segment is handled.
the man is wearing a blue suit 2 -th segment is handled.
a yellow umbrella 3 -th segment is handled.
the woman is wearing a white dress 4 -th segment is handled.


  0%|          | 0/10 [00:00<?, ?it/s]

=== Generate image for index 6 ===
A picture of a squirrel holding a sign with an apple painting at the desert.
at the desert
a squirrel
a sign with an apple painting
at the desert 1 -th segment is handled.
a squirrel 2 -th segment is handled.
a sign with an apple painting 3 -th segment is handled.


  0%|          | 0/10 [00:00<?, ?it/s]

=== Generate image for index 7 ===
A lion is reading a book at the beach.
the beach
a lion
a book
the beach 1 -th segment is handled.
a lion 2 -th segment is handled.
a book 3 -th segment is handled.


  0%|          | 0/10 [00:00<?, ?it/s]

=== Generate image for index 8 ===
a black elephant near a lake.

a black elephant
 1 -th segment is handled.
a black elephant 2 -th segment is handled.


  0%|          | 0/10 [00:00<?, ?it/s]

=== Generate image for index 9 ===
a mouse wearing boxing gloves is hitting a black punching bag on a snowy day.
on a snowy day
a mouse
a black punching bag
boxing gloves
on a snowy day 1 -th segment is handled.
a mouse 2 -th segment is handled.
a black punching bag 3 -th segment is handled.
boxing gloves 4 -th segment is handled.


  0%|          | 0/10 [00:00<?, ?it/s]

=== Generate image for index 10 ===
a painting of a red car in front of a snowy mountain.

a snowy mountain
a red car
 1 -th segment is handled.
a snowy mountain 2 -th segment is handled.
a red car 3 -th segment is handled.


  0%|          | 0/10 [00:00<?, ?it/s]

=== Generate image for index 11 ===
a mirror, a white sink and a vase with red flowers in a bathroom with an artificial light.

a mirror
a white sink
a vase with red flowers
 1 -th segment is handled.
a mirror 2 -th segment is handled.
a white sink 3 -th segment is handled.
a vase with red flowers 4 -th segment is handled.


  0%|          | 0/10 [00:00<?, ?it/s]

=== Generate image for index 12 ===
A grizzly bear is looking at a huge avocado near a river.
near a river
a grizzly bear
a huge avocado
near a river 1 -th segment is handled.
a grizzly bear 2 -th segment is handled.
a huge avocado 3 -th segment is handled.


  0%|          | 0/10 [00:00<?, ?it/s]

=== Generate image for index 13 ===
A picture of a German Shepherd dog and a Husky dog on a sunny day after the snow.
a sunny day after the snow
a German Shepherd dog
a Husky dog
a sunny day after the snow 1 -th segment is handled.
a German Shepherd dog 2 -th segment is handled.
a Husky dog 3 -th segment is handled.


  0%|          | 0/10 [00:00<?, ?it/s]

=== Generate image for index 14 ===
A red Ferrari car driving on a gravel road in a forest with rainbow beams in the distance.
forest
a gravel road
rainbow beams
a red Ferrari car
forest 1 -th segment is handled.
a gravel road 2 -th segment is handled.
rainbow beams 3 -th segment is handled.
a red Ferrari car 4 -th segment is handled.


  0%|          | 0/10 [00:00<?, ?it/s]

=== Generate image for index 15 ===
A painting of a black horse under a red full moon, in the style of The Starry Night.
in the style of The Starry Night
a black horse
a red full moon
in the style of The Starry Night 1 -th segment is handled.
a black horse 2 -th segment is handled.
a red full moon 3 -th segment is handled.


  0%|          | 0/10 [00:00<?, ?it/s]

=== Generate image for index 16 ===
There is a blue bowl on a wooden table, and a picture on the wall in a room with sunlight.
a room with sunlight
a wooden table
a picture on the wall
a blue bowl
a room with sunlight 1 -th segment is handled.
a wooden table 2 -th segment is handled.
a picture on the wall 3 -th segment is handled.
a blue bowl 4 -th segment is handled.


  0%|          | 0/10 [00:00<?, ?it/s]

=== Generate image for index 17 ===
a picture of a horse, and an astronaut, on the moon.
on the moon
an astronaut
a horse
on the moon 1 -th segment is handled.
an astronaut 2 -th segment is handled.
a horse 3 -th segment is handled.


  0%|          | 0/10 [00:00<?, ?it/s]

=== Generate image for index 18 ===
a black cat with a red sweater and a blue jeans in the forest.
in the forest
a black cat with a red sweater and a blue jeans
in the forest 1 -th segment is handled.
a black cat with a red sweater and a blue jeans 2 -th segment is handled.


  0%|          | 0/10 [00:00<?, ?it/s]

=== Generate image for index 19 ===
A picture of a juggling bear at the beach.
the beach
a juggling bear
the beach 1 -th segment is handled.
a juggling bear 2 -th segment is handled.


  0%|          | 0/10 [00:00<?, ?it/s]

=== Seed: 5 ===
=== Generate image for index 0 ===
There is a cute monkey on a thick branch who is holding a pink rose. It is on the top of a huge tree, and the sky is so wide and blue.
the sky is so wide and blue
a huge tree
a thick branch
a cute monkey
a pink rose
the sky is so wide and blue 1 -th segment is handled.
a huge tree 2 -th segment is handled.
a thick branch 3 -th segment is handled.
a cute monkey 4 -th segment is handled.
a pink rose 5 -th segment is handled.


  0%|          | 0/10 [00:00<?, ?it/s]

=== Generate image for index 1 ===
A painting of a dog riding a flying bicycle, over a big city with a yellowish full moon in the night sky.
the night sky
a big city
a dog riding a flying bicycle
a yellowish full moon
the night sky 1 -th segment is handled.
a big city 2 -th segment is handled.
a dog riding a flying bicycle 3 -th segment is handled.
a yellowish full moon 4 -th segment is handled.


  0%|          | 0/10 [00:00<?, ?it/s]

=== Generate image for index 2 ===
a blue koala is reading a book next to a pile of colorful books in the jungle.
in the jungle
a pile of colorful books
a blue koala
in the jungle 1 -th segment is handled.
a pile of colorful books 2 -th segment is handled.
a blue koala 3 -th segment is handled.


  0%|          | 0/10 [00:00<?, ?it/s]

=== Generate image for index 3 ===
there are a robot made of gold and a robot made of silver are standing on an exotic planet with a red butterfly flying around them.
on an exotic planet
a robot made of gold
a robot made of silver
a red butterfly
on an exotic planet 1 -th segment is handled.
a robot made of gold 2 -th segment is handled.
a robot made of silver 3 -th segment is handled.
a red butterfly 4 -th segment is handled.


  0%|          | 0/10 [00:00<?, ?it/s]

=== Generate image for index 4 ===
a painting of a rabbit drinking a cup of coffee next to a fireplace in the rustic wooden house.
the rustic wooden house
a rabbit drinking a cup of coffee
a fireplace
the rustic wooden house 1 -th segment is handled.
a rabbit drinking a cup of coffee 2 -th segment is handled.
a fireplace 3 -th segment is handled.


  0%|          | 0/10 [00:00<?, ?it/s]

=== Generate image for index 5 ===
A painting of a couple holding a yellow umbrella in a street on a rainy night. The woman is wearing a white dress and the man is wearing a blue suit.
a street on a rainy night
the man is wearing a blue suit
a yellow umbrella
the woman is wearing a white dress
a street on a rainy night 1 -th segment is handled.
the man is wearing a blue suit 2 -th segment is handled.
a yellow umbrella 3 -th segment is handled.
the woman is wearing a white dress 4 -th segment is handled.


  0%|          | 0/10 [00:00<?, ?it/s]

=== Generate image for index 6 ===
A picture of a squirrel holding a sign with an apple painting at the desert.
at the desert
a squirrel
a sign with an apple painting
at the desert 1 -th segment is handled.
a squirrel 2 -th segment is handled.
a sign with an apple painting 3 -th segment is handled.


  0%|          | 0/10 [00:00<?, ?it/s]

=== Generate image for index 7 ===
A lion is reading a book at the beach.
the beach
a lion
a book
the beach 1 -th segment is handled.
a lion 2 -th segment is handled.
a book 3 -th segment is handled.


  0%|          | 0/10 [00:00<?, ?it/s]

=== Generate image for index 8 ===
a black elephant near a lake.

a black elephant
 1 -th segment is handled.
a black elephant 2 -th segment is handled.


  0%|          | 0/10 [00:00<?, ?it/s]

=== Generate image for index 9 ===
a mouse wearing boxing gloves is hitting a black punching bag on a snowy day.
on a snowy day
a mouse
a black punching bag
boxing gloves
on a snowy day 1 -th segment is handled.
a mouse 2 -th segment is handled.
a black punching bag 3 -th segment is handled.
boxing gloves 4 -th segment is handled.


  0%|          | 0/10 [00:00<?, ?it/s]

=== Generate image for index 10 ===
a painting of a red car in front of a snowy mountain.

a snowy mountain
a red car
 1 -th segment is handled.
a snowy mountain 2 -th segment is handled.
a red car 3 -th segment is handled.


  0%|          | 0/10 [00:00<?, ?it/s]

=== Generate image for index 11 ===
a mirror, a white sink and a vase with red flowers in a bathroom with an artificial light.

a mirror
a white sink
a vase with red flowers
 1 -th segment is handled.
a mirror 2 -th segment is handled.
a white sink 3 -th segment is handled.
a vase with red flowers 4 -th segment is handled.


  0%|          | 0/10 [00:00<?, ?it/s]

=== Generate image for index 12 ===
A grizzly bear is looking at a huge avocado near a river.
near a river
a grizzly bear
a huge avocado
near a river 1 -th segment is handled.
a grizzly bear 2 -th segment is handled.
a huge avocado 3 -th segment is handled.


  0%|          | 0/10 [00:00<?, ?it/s]

=== Generate image for index 13 ===
A picture of a German Shepherd dog and a Husky dog on a sunny day after the snow.
a sunny day after the snow
a German Shepherd dog
a Husky dog
a sunny day after the snow 1 -th segment is handled.
a German Shepherd dog 2 -th segment is handled.
a Husky dog 3 -th segment is handled.


  0%|          | 0/10 [00:00<?, ?it/s]

=== Generate image for index 14 ===
A red Ferrari car driving on a gravel road in a forest with rainbow beams in the distance.
forest
a gravel road
rainbow beams
a red Ferrari car
forest 1 -th segment is handled.
a gravel road 2 -th segment is handled.
rainbow beams 3 -th segment is handled.
a red Ferrari car 4 -th segment is handled.


  0%|          | 0/10 [00:00<?, ?it/s]

=== Generate image for index 15 ===
A painting of a black horse under a red full moon, in the style of The Starry Night.
in the style of The Starry Night
a black horse
a red full moon
in the style of The Starry Night 1 -th segment is handled.
a black horse 2 -th segment is handled.
a red full moon 3 -th segment is handled.


  0%|          | 0/10 [00:00<?, ?it/s]

=== Generate image for index 16 ===
There is a blue bowl on a wooden table, and a picture on the wall in a room with sunlight.
a room with sunlight
a wooden table
a picture on the wall
a blue bowl
a room with sunlight 1 -th segment is handled.
a wooden table 2 -th segment is handled.
a picture on the wall 3 -th segment is handled.
a blue bowl 4 -th segment is handled.


  0%|          | 0/10 [00:00<?, ?it/s]

=== Generate image for index 17 ===
a picture of a horse, and an astronaut, on the moon.
on the moon
an astronaut
a horse
on the moon 1 -th segment is handled.
an astronaut 2 -th segment is handled.
a horse 3 -th segment is handled.


  0%|          | 0/10 [00:00<?, ?it/s]

=== Generate image for index 18 ===
a black cat with a red sweater and a blue jeans in the forest.
in the forest
a black cat with a red sweater and a blue jeans
in the forest 1 -th segment is handled.
a black cat with a red sweater and a blue jeans 2 -th segment is handled.


  0%|          | 0/10 [00:00<?, ?it/s]

=== Generate image for index 19 ===
A picture of a juggling bear at the beach.
the beach
a juggling bear
the beach 1 -th segment is handled.
a juggling bear 2 -th segment is handled.


  0%|          | 0/10 [00:00<?, ?it/s]

Potential NSFW content was detected in one or more images. A black image will be returned instead. Try again with a different prompt and/or seed.


=== Seed: 6 ===
=== Generate image for index 0 ===
There is a cute monkey on a thick branch who is holding a pink rose. It is on the top of a huge tree, and the sky is so wide and blue.
the sky is so wide and blue
a huge tree
a thick branch
a cute monkey
a pink rose
the sky is so wide and blue 1 -th segment is handled.
a huge tree 2 -th segment is handled.
a thick branch 3 -th segment is handled.
a cute monkey 4 -th segment is handled.
a pink rose 5 -th segment is handled.


  0%|          | 0/10 [00:00<?, ?it/s]

=== Generate image for index 1 ===
A painting of a dog riding a flying bicycle, over a big city with a yellowish full moon in the night sky.
the night sky
a big city
a dog riding a flying bicycle
a yellowish full moon
the night sky 1 -th segment is handled.
a big city 2 -th segment is handled.
a dog riding a flying bicycle 3 -th segment is handled.
a yellowish full moon 4 -th segment is handled.


  0%|          | 0/10 [00:00<?, ?it/s]

=== Generate image for index 2 ===
a blue koala is reading a book next to a pile of colorful books in the jungle.
in the jungle
a pile of colorful books
a blue koala
in the jungle 1 -th segment is handled.
a pile of colorful books 2 -th segment is handled.
a blue koala 3 -th segment is handled.


  0%|          | 0/10 [00:00<?, ?it/s]

=== Generate image for index 3 ===
there are a robot made of gold and a robot made of silver are standing on an exotic planet with a red butterfly flying around them.
on an exotic planet
a robot made of gold
a robot made of silver
a red butterfly
on an exotic planet 1 -th segment is handled.
a robot made of gold 2 -th segment is handled.
a robot made of silver 3 -th segment is handled.
a red butterfly 4 -th segment is handled.


  0%|          | 0/10 [00:00<?, ?it/s]

=== Generate image for index 4 ===
a painting of a rabbit drinking a cup of coffee next to a fireplace in the rustic wooden house.
the rustic wooden house
a rabbit drinking a cup of coffee
a fireplace
the rustic wooden house 1 -th segment is handled.
a rabbit drinking a cup of coffee 2 -th segment is handled.
a fireplace 3 -th segment is handled.


  0%|          | 0/10 [00:00<?, ?it/s]

=== Generate image for index 5 ===
A painting of a couple holding a yellow umbrella in a street on a rainy night. The woman is wearing a white dress and the man is wearing a blue suit.
a street on a rainy night
the man is wearing a blue suit
a yellow umbrella
the woman is wearing a white dress
a street on a rainy night 1 -th segment is handled.
the man is wearing a blue suit 2 -th segment is handled.
a yellow umbrella 3 -th segment is handled.
the woman is wearing a white dress 4 -th segment is handled.


  0%|          | 0/10 [00:00<?, ?it/s]

=== Generate image for index 6 ===
A picture of a squirrel holding a sign with an apple painting at the desert.
at the desert
a squirrel
a sign with an apple painting
at the desert 1 -th segment is handled.
a squirrel 2 -th segment is handled.
a sign with an apple painting 3 -th segment is handled.


  0%|          | 0/10 [00:00<?, ?it/s]

=== Generate image for index 7 ===
A lion is reading a book at the beach.
the beach
a lion
a book
the beach 1 -th segment is handled.
a lion 2 -th segment is handled.
a book 3 -th segment is handled.


  0%|          | 0/10 [00:00<?, ?it/s]

=== Generate image for index 8 ===
a black elephant near a lake.

a black elephant
 1 -th segment is handled.
a black elephant 2 -th segment is handled.


  0%|          | 0/10 [00:00<?, ?it/s]

=== Generate image for index 9 ===
a mouse wearing boxing gloves is hitting a black punching bag on a snowy day.
on a snowy day
a mouse
a black punching bag
boxing gloves
on a snowy day 1 -th segment is handled.
a mouse 2 -th segment is handled.
a black punching bag 3 -th segment is handled.
boxing gloves 4 -th segment is handled.


  0%|          | 0/10 [00:00<?, ?it/s]

=== Generate image for index 10 ===
a painting of a red car in front of a snowy mountain.

a snowy mountain
a red car
 1 -th segment is handled.
a snowy mountain 2 -th segment is handled.
a red car 3 -th segment is handled.


  0%|          | 0/10 [00:00<?, ?it/s]

=== Generate image for index 11 ===
a mirror, a white sink and a vase with red flowers in a bathroom with an artificial light.

a mirror
a white sink
a vase with red flowers
 1 -th segment is handled.
a mirror 2 -th segment is handled.
a white sink 3 -th segment is handled.
a vase with red flowers 4 -th segment is handled.


  0%|          | 0/10 [00:00<?, ?it/s]

=== Generate image for index 12 ===
A grizzly bear is looking at a huge avocado near a river.
near a river
a grizzly bear
a huge avocado
near a river 1 -th segment is handled.
a grizzly bear 2 -th segment is handled.
a huge avocado 3 -th segment is handled.


  0%|          | 0/10 [00:00<?, ?it/s]

=== Generate image for index 13 ===
A picture of a German Shepherd dog and a Husky dog on a sunny day after the snow.
a sunny day after the snow
a German Shepherd dog
a Husky dog
a sunny day after the snow 1 -th segment is handled.
a German Shepherd dog 2 -th segment is handled.
a Husky dog 3 -th segment is handled.


  0%|          | 0/10 [00:00<?, ?it/s]

=== Generate image for index 14 ===
A red Ferrari car driving on a gravel road in a forest with rainbow beams in the distance.
forest
a gravel road
rainbow beams
a red Ferrari car
forest 1 -th segment is handled.
a gravel road 2 -th segment is handled.
rainbow beams 3 -th segment is handled.
a red Ferrari car 4 -th segment is handled.


  0%|          | 0/10 [00:00<?, ?it/s]

=== Generate image for index 15 ===
A painting of a black horse under a red full moon, in the style of The Starry Night.
in the style of The Starry Night
a black horse
a red full moon
in the style of The Starry Night 1 -th segment is handled.
a black horse 2 -th segment is handled.
a red full moon 3 -th segment is handled.


  0%|          | 0/10 [00:00<?, ?it/s]

=== Generate image for index 16 ===
There is a blue bowl on a wooden table, and a picture on the wall in a room with sunlight.
a room with sunlight
a wooden table
a picture on the wall
a blue bowl
a room with sunlight 1 -th segment is handled.
a wooden table 2 -th segment is handled.
a picture on the wall 3 -th segment is handled.
a blue bowl 4 -th segment is handled.


  0%|          | 0/10 [00:00<?, ?it/s]

=== Generate image for index 17 ===
a picture of a horse, and an astronaut, on the moon.
on the moon
an astronaut
a horse
on the moon 1 -th segment is handled.
an astronaut 2 -th segment is handled.
a horse 3 -th segment is handled.


  0%|          | 0/10 [00:00<?, ?it/s]

=== Generate image for index 18 ===
a black cat with a red sweater and a blue jeans in the forest.
in the forest
a black cat with a red sweater and a blue jeans
in the forest 1 -th segment is handled.
a black cat with a red sweater and a blue jeans 2 -th segment is handled.


  0%|          | 0/10 [00:00<?, ?it/s]

=== Generate image for index 19 ===
A picture of a juggling bear at the beach.
the beach
a juggling bear
the beach 1 -th segment is handled.
a juggling bear 2 -th segment is handled.


  0%|          | 0/10 [00:00<?, ?it/s]

=== Seed: 7 ===
=== Generate image for index 0 ===
There is a cute monkey on a thick branch who is holding a pink rose. It is on the top of a huge tree, and the sky is so wide and blue.
the sky is so wide and blue
a huge tree
a thick branch
a cute monkey
a pink rose
the sky is so wide and blue 1 -th segment is handled.
a huge tree 2 -th segment is handled.
a thick branch 3 -th segment is handled.
a cute monkey 4 -th segment is handled.
a pink rose 5 -th segment is handled.


  0%|          | 0/10 [00:00<?, ?it/s]

=== Generate image for index 1 ===
A painting of a dog riding a flying bicycle, over a big city with a yellowish full moon in the night sky.
the night sky
a big city
a dog riding a flying bicycle
a yellowish full moon
the night sky 1 -th segment is handled.
a big city 2 -th segment is handled.
a dog riding a flying bicycle 3 -th segment is handled.
a yellowish full moon 4 -th segment is handled.


  0%|          | 0/10 [00:00<?, ?it/s]

=== Generate image for index 2 ===
a blue koala is reading a book next to a pile of colorful books in the jungle.
in the jungle
a pile of colorful books
a blue koala
in the jungle 1 -th segment is handled.
a pile of colorful books 2 -th segment is handled.
a blue koala 3 -th segment is handled.


  0%|          | 0/10 [00:00<?, ?it/s]

=== Generate image for index 3 ===
there are a robot made of gold and a robot made of silver are standing on an exotic planet with a red butterfly flying around them.
on an exotic planet
a robot made of gold
a robot made of silver
a red butterfly
on an exotic planet 1 -th segment is handled.
a robot made of gold 2 -th segment is handled.
a robot made of silver 3 -th segment is handled.
a red butterfly 4 -th segment is handled.


  0%|          | 0/10 [00:00<?, ?it/s]

=== Generate image for index 4 ===
a painting of a rabbit drinking a cup of coffee next to a fireplace in the rustic wooden house.
the rustic wooden house
a rabbit drinking a cup of coffee
a fireplace
the rustic wooden house 1 -th segment is handled.
a rabbit drinking a cup of coffee 2 -th segment is handled.
a fireplace 3 -th segment is handled.


  0%|          | 0/10 [00:00<?, ?it/s]

=== Generate image for index 5 ===
A painting of a couple holding a yellow umbrella in a street on a rainy night. The woman is wearing a white dress and the man is wearing a blue suit.
a street on a rainy night
the man is wearing a blue suit
a yellow umbrella
the woman is wearing a white dress
a street on a rainy night 1 -th segment is handled.
the man is wearing a blue suit 2 -th segment is handled.
a yellow umbrella 3 -th segment is handled.
the woman is wearing a white dress 4 -th segment is handled.


  0%|          | 0/10 [00:00<?, ?it/s]

=== Generate image for index 6 ===
A picture of a squirrel holding a sign with an apple painting at the desert.
at the desert
a squirrel
a sign with an apple painting
at the desert 1 -th segment is handled.
a squirrel 2 -th segment is handled.
a sign with an apple painting 3 -th segment is handled.


  0%|          | 0/10 [00:00<?, ?it/s]

=== Generate image for index 7 ===
A lion is reading a book at the beach.
the beach
a lion
a book
the beach 1 -th segment is handled.
a lion 2 -th segment is handled.
a book 3 -th segment is handled.


  0%|          | 0/10 [00:00<?, ?it/s]

=== Generate image for index 8 ===
a black elephant near a lake.

a black elephant
 1 -th segment is handled.
a black elephant 2 -th segment is handled.


  0%|          | 0/10 [00:00<?, ?it/s]

=== Generate image for index 9 ===
a mouse wearing boxing gloves is hitting a black punching bag on a snowy day.
on a snowy day
a mouse
a black punching bag
boxing gloves
on a snowy day 1 -th segment is handled.
a mouse 2 -th segment is handled.
a black punching bag 3 -th segment is handled.
boxing gloves 4 -th segment is handled.


  0%|          | 0/10 [00:00<?, ?it/s]

=== Generate image for index 10 ===
a painting of a red car in front of a snowy mountain.

a snowy mountain
a red car
 1 -th segment is handled.
a snowy mountain 2 -th segment is handled.
a red car 3 -th segment is handled.


  0%|          | 0/10 [00:00<?, ?it/s]

=== Generate image for index 11 ===
a mirror, a white sink and a vase with red flowers in a bathroom with an artificial light.

a mirror
a white sink
a vase with red flowers
 1 -th segment is handled.
a mirror 2 -th segment is handled.
a white sink 3 -th segment is handled.
a vase with red flowers 4 -th segment is handled.


  0%|          | 0/10 [00:00<?, ?it/s]

=== Generate image for index 12 ===
A grizzly bear is looking at a huge avocado near a river.
near a river
a grizzly bear
a huge avocado
near a river 1 -th segment is handled.
a grizzly bear 2 -th segment is handled.
a huge avocado 3 -th segment is handled.


  0%|          | 0/10 [00:00<?, ?it/s]

=== Generate image for index 13 ===
A picture of a German Shepherd dog and a Husky dog on a sunny day after the snow.
a sunny day after the snow
a German Shepherd dog
a Husky dog
a sunny day after the snow 1 -th segment is handled.
a German Shepherd dog 2 -th segment is handled.
a Husky dog 3 -th segment is handled.


  0%|          | 0/10 [00:00<?, ?it/s]

=== Generate image for index 14 ===
A red Ferrari car driving on a gravel road in a forest with rainbow beams in the distance.
forest
a gravel road
rainbow beams
a red Ferrari car
forest 1 -th segment is handled.
a gravel road 2 -th segment is handled.
rainbow beams 3 -th segment is handled.
a red Ferrari car 4 -th segment is handled.


  0%|          | 0/10 [00:00<?, ?it/s]

=== Generate image for index 15 ===
A painting of a black horse under a red full moon, in the style of The Starry Night.
in the style of The Starry Night
a black horse
a red full moon
in the style of The Starry Night 1 -th segment is handled.
a black horse 2 -th segment is handled.
a red full moon 3 -th segment is handled.


  0%|          | 0/10 [00:00<?, ?it/s]

=== Generate image for index 16 ===
There is a blue bowl on a wooden table, and a picture on the wall in a room with sunlight.
a room with sunlight
a wooden table
a picture on the wall
a blue bowl
a room with sunlight 1 -th segment is handled.
a wooden table 2 -th segment is handled.
a picture on the wall 3 -th segment is handled.
a blue bowl 4 -th segment is handled.


  0%|          | 0/10 [00:00<?, ?it/s]

=== Generate image for index 17 ===
a picture of a horse, and an astronaut, on the moon.
on the moon
an astronaut
a horse
on the moon 1 -th segment is handled.
an astronaut 2 -th segment is handled.
a horse 3 -th segment is handled.


  0%|          | 0/10 [00:00<?, ?it/s]

=== Generate image for index 18 ===
a black cat with a red sweater and a blue jeans in the forest.
in the forest
a black cat with a red sweater and a blue jeans
in the forest 1 -th segment is handled.
a black cat with a red sweater and a blue jeans 2 -th segment is handled.


  0%|          | 0/10 [00:00<?, ?it/s]

=== Generate image for index 19 ===
A picture of a juggling bear at the beach.
the beach
a juggling bear
the beach 1 -th segment is handled.
a juggling bear 2 -th segment is handled.


  0%|          | 0/10 [00:00<?, ?it/s]

=== Seed: 8 ===
=== Generate image for index 0 ===
There is a cute monkey on a thick branch who is holding a pink rose. It is on the top of a huge tree, and the sky is so wide and blue.
the sky is so wide and blue
a huge tree
a thick branch
a cute monkey
a pink rose
the sky is so wide and blue 1 -th segment is handled.
a huge tree 2 -th segment is handled.
a thick branch 3 -th segment is handled.
a cute monkey 4 -th segment is handled.
a pink rose 5 -th segment is handled.


  0%|          | 0/10 [00:00<?, ?it/s]

=== Generate image for index 1 ===
A painting of a dog riding a flying bicycle, over a big city with a yellowish full moon in the night sky.
the night sky
a big city
a dog riding a flying bicycle
a yellowish full moon
the night sky 1 -th segment is handled.
a big city 2 -th segment is handled.
a dog riding a flying bicycle 3 -th segment is handled.
a yellowish full moon 4 -th segment is handled.


  0%|          | 0/10 [00:00<?, ?it/s]

=== Generate image for index 2 ===
a blue koala is reading a book next to a pile of colorful books in the jungle.
in the jungle
a pile of colorful books
a blue koala
in the jungle 1 -th segment is handled.
a pile of colorful books 2 -th segment is handled.
a blue koala 3 -th segment is handled.


  0%|          | 0/10 [00:00<?, ?it/s]

=== Generate image for index 3 ===
there are a robot made of gold and a robot made of silver are standing on an exotic planet with a red butterfly flying around them.
on an exotic planet
a robot made of gold
a robot made of silver
a red butterfly
on an exotic planet 1 -th segment is handled.
a robot made of gold 2 -th segment is handled.
a robot made of silver 3 -th segment is handled.
a red butterfly 4 -th segment is handled.


  0%|          | 0/10 [00:00<?, ?it/s]

=== Generate image for index 4 ===
a painting of a rabbit drinking a cup of coffee next to a fireplace in the rustic wooden house.
the rustic wooden house
a rabbit drinking a cup of coffee
a fireplace
the rustic wooden house 1 -th segment is handled.
a rabbit drinking a cup of coffee 2 -th segment is handled.
a fireplace 3 -th segment is handled.


  0%|          | 0/10 [00:00<?, ?it/s]

=== Generate image for index 5 ===
A painting of a couple holding a yellow umbrella in a street on a rainy night. The woman is wearing a white dress and the man is wearing a blue suit.
a street on a rainy night
the man is wearing a blue suit
a yellow umbrella
the woman is wearing a white dress
a street on a rainy night 1 -th segment is handled.
the man is wearing a blue suit 2 -th segment is handled.
a yellow umbrella 3 -th segment is handled.
the woman is wearing a white dress 4 -th segment is handled.


  0%|          | 0/10 [00:00<?, ?it/s]

=== Generate image for index 6 ===
A picture of a squirrel holding a sign with an apple painting at the desert.
at the desert
a squirrel
a sign with an apple painting
at the desert 1 -th segment is handled.
a squirrel 2 -th segment is handled.
a sign with an apple painting 3 -th segment is handled.


  0%|          | 0/10 [00:00<?, ?it/s]

=== Generate image for index 7 ===
A lion is reading a book at the beach.
the beach
a lion
a book
the beach 1 -th segment is handled.
a lion 2 -th segment is handled.
a book 3 -th segment is handled.


  0%|          | 0/10 [00:00<?, ?it/s]

=== Generate image for index 8 ===
a black elephant near a lake.

a black elephant
 1 -th segment is handled.
a black elephant 2 -th segment is handled.


  0%|          | 0/10 [00:00<?, ?it/s]

=== Generate image for index 9 ===
a mouse wearing boxing gloves is hitting a black punching bag on a snowy day.
on a snowy day
a mouse
a black punching bag
boxing gloves
on a snowy day 1 -th segment is handled.
a mouse 2 -th segment is handled.
a black punching bag 3 -th segment is handled.
boxing gloves 4 -th segment is handled.


  0%|          | 0/10 [00:00<?, ?it/s]

=== Generate image for index 10 ===
a painting of a red car in front of a snowy mountain.

a snowy mountain
a red car
 1 -th segment is handled.
a snowy mountain 2 -th segment is handled.
a red car 3 -th segment is handled.


  0%|          | 0/10 [00:00<?, ?it/s]

=== Generate image for index 11 ===
a mirror, a white sink and a vase with red flowers in a bathroom with an artificial light.

a mirror
a white sink
a vase with red flowers
 1 -th segment is handled.
a mirror 2 -th segment is handled.
a white sink 3 -th segment is handled.
a vase with red flowers 4 -th segment is handled.


  0%|          | 0/10 [00:00<?, ?it/s]

=== Generate image for index 12 ===
A grizzly bear is looking at a huge avocado near a river.
near a river
a grizzly bear
a huge avocado
near a river 1 -th segment is handled.
a grizzly bear 2 -th segment is handled.
a huge avocado 3 -th segment is handled.


  0%|          | 0/10 [00:00<?, ?it/s]

=== Generate image for index 13 ===
A picture of a German Shepherd dog and a Husky dog on a sunny day after the snow.
a sunny day after the snow
a German Shepherd dog
a Husky dog
a sunny day after the snow 1 -th segment is handled.
a German Shepherd dog 2 -th segment is handled.
a Husky dog 3 -th segment is handled.


  0%|          | 0/10 [00:00<?, ?it/s]

=== Generate image for index 14 ===
A red Ferrari car driving on a gravel road in a forest with rainbow beams in the distance.
forest
a gravel road
rainbow beams
a red Ferrari car
forest 1 -th segment is handled.
a gravel road 2 -th segment is handled.
rainbow beams 3 -th segment is handled.
a red Ferrari car 4 -th segment is handled.


  0%|          | 0/10 [00:00<?, ?it/s]

=== Generate image for index 15 ===
A painting of a black horse under a red full moon, in the style of The Starry Night.
in the style of The Starry Night
a black horse
a red full moon
in the style of The Starry Night 1 -th segment is handled.
a black horse 2 -th segment is handled.
a red full moon 3 -th segment is handled.


  0%|          | 0/10 [00:00<?, ?it/s]

=== Generate image for index 16 ===
There is a blue bowl on a wooden table, and a picture on the wall in a room with sunlight.
a room with sunlight
a wooden table
a picture on the wall
a blue bowl
a room with sunlight 1 -th segment is handled.
a wooden table 2 -th segment is handled.
a picture on the wall 3 -th segment is handled.
a blue bowl 4 -th segment is handled.


  0%|          | 0/10 [00:00<?, ?it/s]

=== Generate image for index 17 ===
a picture of a horse, and an astronaut, on the moon.
on the moon
an astronaut
a horse
on the moon 1 -th segment is handled.
an astronaut 2 -th segment is handled.
a horse 3 -th segment is handled.


  0%|          | 0/10 [00:00<?, ?it/s]

=== Generate image for index 18 ===
a black cat with a red sweater and a blue jeans in the forest.
in the forest
a black cat with a red sweater and a blue jeans
in the forest 1 -th segment is handled.
a black cat with a red sweater and a blue jeans 2 -th segment is handled.


  0%|          | 0/10 [00:00<?, ?it/s]

=== Generate image for index 19 ===
A picture of a juggling bear at the beach.
the beach
a juggling bear
the beach 1 -th segment is handled.
a juggling bear 2 -th segment is handled.


  0%|          | 0/10 [00:00<?, ?it/s]

=== Seed: 9 ===
=== Generate image for index 0 ===
There is a cute monkey on a thick branch who is holding a pink rose. It is on the top of a huge tree, and the sky is so wide and blue.
the sky is so wide and blue
a huge tree
a thick branch
a cute monkey
a pink rose
the sky is so wide and blue 1 -th segment is handled.
a huge tree 2 -th segment is handled.
a thick branch 3 -th segment is handled.
a cute monkey 4 -th segment is handled.
a pink rose 5 -th segment is handled.


  0%|          | 0/10 [00:00<?, ?it/s]

=== Generate image for index 1 ===
A painting of a dog riding a flying bicycle, over a big city with a yellowish full moon in the night sky.
the night sky
a big city
a dog riding a flying bicycle
a yellowish full moon
the night sky 1 -th segment is handled.
a big city 2 -th segment is handled.
a dog riding a flying bicycle 3 -th segment is handled.
a yellowish full moon 4 -th segment is handled.


  0%|          | 0/10 [00:00<?, ?it/s]

=== Generate image for index 2 ===
a blue koala is reading a book next to a pile of colorful books in the jungle.
in the jungle
a pile of colorful books
a blue koala
in the jungle 1 -th segment is handled.
a pile of colorful books 2 -th segment is handled.
a blue koala 3 -th segment is handled.


  0%|          | 0/10 [00:00<?, ?it/s]

=== Generate image for index 3 ===
there are a robot made of gold and a robot made of silver are standing on an exotic planet with a red butterfly flying around them.
on an exotic planet
a robot made of gold
a robot made of silver
a red butterfly
on an exotic planet 1 -th segment is handled.
a robot made of gold 2 -th segment is handled.
a robot made of silver 3 -th segment is handled.
a red butterfly 4 -th segment is handled.


  0%|          | 0/10 [00:00<?, ?it/s]

=== Generate image for index 4 ===
a painting of a rabbit drinking a cup of coffee next to a fireplace in the rustic wooden house.
the rustic wooden house
a rabbit drinking a cup of coffee
a fireplace
the rustic wooden house 1 -th segment is handled.
a rabbit drinking a cup of coffee 2 -th segment is handled.
a fireplace 3 -th segment is handled.


  0%|          | 0/10 [00:00<?, ?it/s]

=== Generate image for index 5 ===
A painting of a couple holding a yellow umbrella in a street on a rainy night. The woman is wearing a white dress and the man is wearing a blue suit.
a street on a rainy night
the man is wearing a blue suit
a yellow umbrella
the woman is wearing a white dress
a street on a rainy night 1 -th segment is handled.
the man is wearing a blue suit 2 -th segment is handled.
a yellow umbrella 3 -th segment is handled.
the woman is wearing a white dress 4 -th segment is handled.


  0%|          | 0/10 [00:00<?, ?it/s]

=== Generate image for index 6 ===
A picture of a squirrel holding a sign with an apple painting at the desert.
at the desert
a squirrel
a sign with an apple painting
at the desert 1 -th segment is handled.
a squirrel 2 -th segment is handled.
a sign with an apple painting 3 -th segment is handled.


  0%|          | 0/10 [00:00<?, ?it/s]

=== Generate image for index 7 ===
A lion is reading a book at the beach.
the beach
a lion
a book
the beach 1 -th segment is handled.
a lion 2 -th segment is handled.
a book 3 -th segment is handled.


  0%|          | 0/10 [00:00<?, ?it/s]

Potential NSFW content was detected in one or more images. A black image will be returned instead. Try again with a different prompt and/or seed.


=== Generate image for index 8 ===
a black elephant near a lake.

a black elephant
 1 -th segment is handled.
a black elephant 2 -th segment is handled.


  0%|          | 0/10 [00:00<?, ?it/s]

=== Generate image for index 9 ===
a mouse wearing boxing gloves is hitting a black punching bag on a snowy day.
on a snowy day
a mouse
a black punching bag
boxing gloves
on a snowy day 1 -th segment is handled.
a mouse 2 -th segment is handled.
a black punching bag 3 -th segment is handled.
boxing gloves 4 -th segment is handled.


  0%|          | 0/10 [00:00<?, ?it/s]

=== Generate image for index 10 ===
a painting of a red car in front of a snowy mountain.

a snowy mountain
a red car
 1 -th segment is handled.
a snowy mountain 2 -th segment is handled.
a red car 3 -th segment is handled.


  0%|          | 0/10 [00:00<?, ?it/s]

=== Generate image for index 11 ===
a mirror, a white sink and a vase with red flowers in a bathroom with an artificial light.

a mirror
a white sink
a vase with red flowers
 1 -th segment is handled.
a mirror 2 -th segment is handled.
a white sink 3 -th segment is handled.
a vase with red flowers 4 -th segment is handled.


  0%|          | 0/10 [00:00<?, ?it/s]

=== Generate image for index 12 ===
A grizzly bear is looking at a huge avocado near a river.
near a river
a grizzly bear
a huge avocado
near a river 1 -th segment is handled.
a grizzly bear 2 -th segment is handled.
a huge avocado 3 -th segment is handled.


  0%|          | 0/10 [00:00<?, ?it/s]

=== Generate image for index 13 ===
A picture of a German Shepherd dog and a Husky dog on a sunny day after the snow.
a sunny day after the snow
a German Shepherd dog
a Husky dog
a sunny day after the snow 1 -th segment is handled.
a German Shepherd dog 2 -th segment is handled.
a Husky dog 3 -th segment is handled.


  0%|          | 0/10 [00:00<?, ?it/s]

=== Generate image for index 14 ===
A red Ferrari car driving on a gravel road in a forest with rainbow beams in the distance.
forest
a gravel road
rainbow beams
a red Ferrari car
forest 1 -th segment is handled.
a gravel road 2 -th segment is handled.
rainbow beams 3 -th segment is handled.
a red Ferrari car 4 -th segment is handled.


  0%|          | 0/10 [00:00<?, ?it/s]

=== Generate image for index 15 ===
A painting of a black horse under a red full moon, in the style of The Starry Night.
in the style of The Starry Night
a black horse
a red full moon
in the style of The Starry Night 1 -th segment is handled.
a black horse 2 -th segment is handled.
a red full moon 3 -th segment is handled.


  0%|          | 0/10 [00:00<?, ?it/s]

=== Generate image for index 16 ===
There is a blue bowl on a wooden table, and a picture on the wall in a room with sunlight.
a room with sunlight
a wooden table
a picture on the wall
a blue bowl
a room with sunlight 1 -th segment is handled.
a wooden table 2 -th segment is handled.
a picture on the wall 3 -th segment is handled.
a blue bowl 4 -th segment is handled.


  0%|          | 0/10 [00:00<?, ?it/s]

=== Generate image for index 17 ===
a picture of a horse, and an astronaut, on the moon.
on the moon
an astronaut
a horse
on the moon 1 -th segment is handled.
an astronaut 2 -th segment is handled.
a horse 3 -th segment is handled.


  0%|          | 0/10 [00:00<?, ?it/s]

=== Generate image for index 18 ===
a black cat with a red sweater and a blue jeans in the forest.
in the forest
a black cat with a red sweater and a blue jeans
in the forest 1 -th segment is handled.
a black cat with a red sweater and a blue jeans 2 -th segment is handled.


  0%|          | 0/10 [00:00<?, ?it/s]

=== Generate image for index 19 ===
A picture of a juggling bear at the beach.
the beach
a juggling bear
the beach 1 -th segment is handled.
a juggling bear 2 -th segment is handled.


  0%|          | 0/10 [00:00<?, ?it/s]

=== Seed: 10 ===
=== Generate image for index 0 ===
There is a cute monkey on a thick branch who is holding a pink rose. It is on the top of a huge tree, and the sky is so wide and blue.
the sky is so wide and blue
a huge tree
a thick branch
a cute monkey
a pink rose
the sky is so wide and blue 1 -th segment is handled.
a huge tree 2 -th segment is handled.
a thick branch 3 -th segment is handled.
a cute monkey 4 -th segment is handled.
a pink rose 5 -th segment is handled.


  0%|          | 0/10 [00:00<?, ?it/s]

=== Generate image for index 1 ===
A painting of a dog riding a flying bicycle, over a big city with a yellowish full moon in the night sky.
the night sky
a big city
a dog riding a flying bicycle
a yellowish full moon
the night sky 1 -th segment is handled.
a big city 2 -th segment is handled.
a dog riding a flying bicycle 3 -th segment is handled.
a yellowish full moon 4 -th segment is handled.


  0%|          | 0/10 [00:00<?, ?it/s]

=== Generate image for index 2 ===
a blue koala is reading a book next to a pile of colorful books in the jungle.
in the jungle
a pile of colorful books
a blue koala
in the jungle 1 -th segment is handled.
a pile of colorful books 2 -th segment is handled.
a blue koala 3 -th segment is handled.


  0%|          | 0/10 [00:00<?, ?it/s]

=== Generate image for index 3 ===
there are a robot made of gold and a robot made of silver are standing on an exotic planet with a red butterfly flying around them.
on an exotic planet
a robot made of gold
a robot made of silver
a red butterfly
on an exotic planet 1 -th segment is handled.
a robot made of gold 2 -th segment is handled.
a robot made of silver 3 -th segment is handled.
a red butterfly 4 -th segment is handled.


  0%|          | 0/10 [00:00<?, ?it/s]

=== Generate image for index 4 ===
a painting of a rabbit drinking a cup of coffee next to a fireplace in the rustic wooden house.
the rustic wooden house
a rabbit drinking a cup of coffee
a fireplace
the rustic wooden house 1 -th segment is handled.
a rabbit drinking a cup of coffee 2 -th segment is handled.
a fireplace 3 -th segment is handled.


  0%|          | 0/10 [00:00<?, ?it/s]

=== Generate image for index 5 ===
A painting of a couple holding a yellow umbrella in a street on a rainy night. The woman is wearing a white dress and the man is wearing a blue suit.
a street on a rainy night
the man is wearing a blue suit
a yellow umbrella
the woman is wearing a white dress
a street on a rainy night 1 -th segment is handled.
the man is wearing a blue suit 2 -th segment is handled.
a yellow umbrella 3 -th segment is handled.
the woman is wearing a white dress 4 -th segment is handled.


  0%|          | 0/10 [00:00<?, ?it/s]

=== Generate image for index 6 ===
A picture of a squirrel holding a sign with an apple painting at the desert.
at the desert
a squirrel
a sign with an apple painting
at the desert 1 -th segment is handled.
a squirrel 2 -th segment is handled.
a sign with an apple painting 3 -th segment is handled.


  0%|          | 0/10 [00:00<?, ?it/s]

=== Generate image for index 7 ===
A lion is reading a book at the beach.
the beach
a lion
a book
the beach 1 -th segment is handled.
a lion 2 -th segment is handled.
a book 3 -th segment is handled.


  0%|          | 0/10 [00:00<?, ?it/s]

=== Generate image for index 8 ===
a black elephant near a lake.

a black elephant
 1 -th segment is handled.
a black elephant 2 -th segment is handled.


  0%|          | 0/10 [00:00<?, ?it/s]

=== Generate image for index 9 ===
a mouse wearing boxing gloves is hitting a black punching bag on a snowy day.
on a snowy day
a mouse
a black punching bag
boxing gloves
on a snowy day 1 -th segment is handled.
a mouse 2 -th segment is handled.
a black punching bag 3 -th segment is handled.
boxing gloves 4 -th segment is handled.


  0%|          | 0/10 [00:00<?, ?it/s]

=== Generate image for index 10 ===
a painting of a red car in front of a snowy mountain.

a snowy mountain
a red car
 1 -th segment is handled.
a snowy mountain 2 -th segment is handled.
a red car 3 -th segment is handled.


  0%|          | 0/10 [00:00<?, ?it/s]

=== Generate image for index 11 ===
a mirror, a white sink and a vase with red flowers in a bathroom with an artificial light.

a mirror
a white sink
a vase with red flowers
 1 -th segment is handled.
a mirror 2 -th segment is handled.
a white sink 3 -th segment is handled.
a vase with red flowers 4 -th segment is handled.


  0%|          | 0/10 [00:00<?, ?it/s]

=== Generate image for index 12 ===
A grizzly bear is looking at a huge avocado near a river.
near a river
a grizzly bear
a huge avocado
near a river 1 -th segment is handled.
a grizzly bear 2 -th segment is handled.
a huge avocado 3 -th segment is handled.


  0%|          | 0/10 [00:00<?, ?it/s]

=== Generate image for index 13 ===
A picture of a German Shepherd dog and a Husky dog on a sunny day after the snow.
a sunny day after the snow
a German Shepherd dog
a Husky dog
a sunny day after the snow 1 -th segment is handled.
a German Shepherd dog 2 -th segment is handled.
a Husky dog 3 -th segment is handled.


  0%|          | 0/10 [00:00<?, ?it/s]

=== Generate image for index 14 ===
A red Ferrari car driving on a gravel road in a forest with rainbow beams in the distance.
forest
a gravel road
rainbow beams
a red Ferrari car
forest 1 -th segment is handled.
a gravel road 2 -th segment is handled.
rainbow beams 3 -th segment is handled.
a red Ferrari car 4 -th segment is handled.


  0%|          | 0/10 [00:00<?, ?it/s]

=== Generate image for index 15 ===
A painting of a black horse under a red full moon, in the style of The Starry Night.
in the style of The Starry Night
a black horse
a red full moon
in the style of The Starry Night 1 -th segment is handled.
a black horse 2 -th segment is handled.
a red full moon 3 -th segment is handled.


  0%|          | 0/10 [00:00<?, ?it/s]

=== Generate image for index 16 ===
There is a blue bowl on a wooden table, and a picture on the wall in a room with sunlight.
a room with sunlight
a wooden table
a picture on the wall
a blue bowl
a room with sunlight 1 -th segment is handled.
a wooden table 2 -th segment is handled.
a picture on the wall 3 -th segment is handled.
a blue bowl 4 -th segment is handled.


  0%|          | 0/10 [00:00<?, ?it/s]

=== Generate image for index 17 ===
a picture of a horse, and an astronaut, on the moon.
on the moon
an astronaut
a horse
on the moon 1 -th segment is handled.
an astronaut 2 -th segment is handled.
a horse 3 -th segment is handled.


  0%|          | 0/10 [00:00<?, ?it/s]

=== Generate image for index 18 ===
a black cat with a red sweater and a blue jeans in the forest.
in the forest
a black cat with a red sweater and a blue jeans
in the forest 1 -th segment is handled.
a black cat with a red sweater and a blue jeans 2 -th segment is handled.


  0%|          | 0/10 [00:00<?, ?it/s]

=== Generate image for index 19 ===
A picture of a juggling bear at the beach.
the beach
a juggling bear
the beach 1 -th segment is handled.
a juggling bear 2 -th segment is handled.


  0%|          | 0/10 [00:00<?, ?it/s]

In [21]:
for idx in imgs_idx:
    save_path=f'./outputs/{idx:02}/'
    for seed in range(len(imgs_idx[idx])):
        for i, img_ in enumerate(imgs_idx[idx][seed]):
            img_name = f'{args.model}_{args.num_inference_steps}steps_idx{idx:>02}_reg-ratio{reg_part:.1f}_sreg{sreg}_creg{creg}{args.wo_modulation*"_woModulation"}_seed{seed}_{i}.png'
            img_.save(save_path+img_name)

In [28]:
len(imgs_idx[0])

10

In [29]:
prompts_idx

{0: 'There is a cute monkey on a thick branch who is holding a pink rose. It is on the top of a huge tree, and the sky is so wide and blue.',
 1: 'A painting of a dog riding a flying bicycle, over a big city with a yellowish full moon in the night sky.',
 2: 'a blue koala is reading a book next to a pile of colorful books in the jungle.',
 3: 'there are a robot made of gold and a robot made of silver are standing on an exotic planet with a red butterfly flying around them.',
 4: 'a painting of a rabbit drinking a cup of coffee next to a fireplace in the rustic wooden house.',
 5: 'A painting of a couple holding a yellow umbrella in a street on a rainy night. The woman is wearing a white dress and the man is wearing a blue suit.',
 6: 'A picture of a squirrel holding a sign with an apple painting at the desert.',
 7: 'A lion is reading a book at the beach.',
 8: 'a black elephant near a lake.',
 9: 'a mouse wearing boxing gloves is hitting a black punching bag on a snowy day.',
 10: 'a 

In [40]:
prompts = []
eval_images = []
for idx in imgs_idx:
    save_path=f'./outputs/{idx:02}/'
    for seed in range(len(imgs_idx[idx])):
        for i, img_ in enumerate(imgs_idx[idx][seed]):
            prompts.append(prompts_idx[i])
            eval_images.append(np.array(img_))

In [50]:
concat_images = np.concatenate([eval_images])

In [59]:
np.delete(concat_images.copy(),[142,156,389],0).shape

(398, 512, 512, 3)

In [62]:
del_black_imgs = concat_images.copy()

In [61]:
black_imgs = []
for i, img_ in enumerate(concat_images):
    if img_.mean()==0:
        black_imgs.append(i)

In [None]:
black_imgs.po

In [63]:
for i in black_imgs[::-1]:
    print(i)
    prompts.pop(i)

389
156
142


array([[[[125, 147, 158],
         [124, 146, 157],
         [124, 145, 156],
         ...,
         [122, 147, 161],
         [122, 148, 161],
         [121, 146, 162]],

        [[125, 146, 156],
         [125, 145, 155],
         [124, 146, 156],
         ...,
         [125, 149, 161],
         [125, 147, 160],
         [123, 146, 161]],

        [[124, 146, 157],
         [126, 146, 156],
         [125, 146, 156],
         ...,
         [124, 148, 161],
         [124, 146, 160],
         [123, 147, 161]],

        ...,

        [[148, 147, 137],
         [146, 147, 137],
         [145, 146, 136],
         ...,
         [185, 187, 180],
         [185, 187, 180],
         [187, 187, 181]],

        [[147, 147, 136],
         [146, 146, 137],
         [146, 145, 136],
         ...,
         [185, 186, 180],
         [185, 187, 181],
         [186, 187, 181]],

        [[149, 149, 139],
         [147, 147, 137],
         [145, 146, 137],
         ...,
         [186, 187, 180],
        

In [65]:
del_black_imgs = np.delete(del_black_imgs, black_imgs,0)

In [66]:
from torchmetrics.functional.multimodal import clip_score
from functools import partial

clip_score_fn = partial(clip_score, model_name_or_path="openai/clip-vit-base-patch16")

def calculate_clip_score(images, prompts):
    images_int = (images * 255).astype("uint8")
    clip_score = clip_score_fn(torch.from_numpy(images_int).permute(0, 3, 1, 2), prompts).detach()
    return round(float(clip_score), 4)

sd_clip_score = calculate_clip_score(del_black_imgs, prompts)
print(f"CLIP score: {sd_clip_score}")

CLIP score: 17.7992
