In [28]:
import os
os.environ['CUDA_VISIBLE_DEVICES'] = '0'
import sys

import random

import json

import numpy as np
import pandas as pd

from PIL import Image

from datetime import datetime

import torch
from torchvision.transforms.functional import to_pil_image
from transformers import CLIPImageProcessor, AutoModel

from tqdm.auto import tqdm

In [3]:
sys.path.append(os.path.abspath('../kcg-ml-image-pipeline/'))

In [4]:
from stable_diffusion import StableDiffusion, CLIPTextEmbedder
from stable_diffusion.utils_image import get_image_data
from worker.image_generation.scripts.stable_diffusion_base_script import StableDiffusionBaseScript

[1;32mINFO: Created a temporary directory at /tmp/tmpownu7pg5[0m
[1;32mINFO: Writing /tmp/tmpownu7pg5/_remote_module_non_scriptable.py[0m


In [22]:
model_path = '../kcg-ml-image-pipeline/input/model/sd/v1-5-pruned-emaonly/v1-5-pruned-emaonly.safetensors'
tokenizer_path = '../kcg-ml-image-pipeline/input/model/clip/txt_emb_tokenizer'
transformer_path = '../kcg-ml-image-pipeline/input/model/clip/txt_emb_model'

# prompt_path = './generated/prompt/empty.tsv'
prompt_path = './generated/temperature/prompt/2023-11-30-independent-approx-v1-08-environmental.csv'
OUTPUT_DIR = './generated/temperature/8/'

In [23]:
sampler = "ddim"
sampler_steps = 20
cfg_strength=12
image_width=512
image_height=512

In [24]:
os.makedirs(os.path.join(OUTPUT_DIR, 'image'), exist_ok=True)
os.makedirs(os.path.join(OUTPUT_DIR, 'meta'), exist_ok=True)
os.makedirs(os.path.join(OUTPUT_DIR, 'clip'), exist_ok=True)
os.makedirs(os.path.join(OUTPUT_DIR, 'embedding'), exist_ok=True)

# load model

In [8]:
txt2img = StableDiffusionBaseScript(
    sampler_name=sampler,
    n_steps=sampler_steps,
    force_cpu=False,
    cuda_device='cuda',
)

In [None]:
txt2img.initialize_latent_diffusion(
    autoencoder=None, 
    clip_text_embedder=None, 
    unet_model=None,
    path=model_path, 
    force_submodels_init=True
)

  encoder initialization[32m...[DONE][0m[34m	1,611.43ms[0m                                      
  decoder initialization[32m...[DONE][0m[34m	530.85ms[0m                                        
Autoencoder initialization[32m...[DONE][0m[34m	2,150.81ms[0m                                    


In [None]:
clip_text_embedder = CLIPTextEmbedder(device='cuda')

In [None]:
clip_text_embedder.load_submodels(
    tokenizer_path=tokenizer_path,
    transformer_path=transformer_path
);

In [None]:
def worker(positive_prompt, negative_prompt, seed=-1, cfg_strength=12, image_width=512, image_height=512):
    
    if seed == -1:
        seed = random.randint(0, 2 ** 24 - 1)

    with torch.no_grad():

        embedded_prompts, positive_pooler_output, _ = clip_text_embedder.forward_return_all(positive_prompt)
        negative_embedded_prompts, negative_pooler_output, _ = clip_text_embedder.forward_return_all(negative_prompt)
        
        positive_pooler_output = positive_pooler_output.detach().cpu().numpy()
        negative_pooler_output = negative_pooler_output.detach().cpu().numpy()

        latent = txt2img.generate_images_latent_from_embeddings(
            batch_size=1,
            embedded_prompt=embedded_prompts,
            null_prompt=negative_embedded_prompts,
            uncond_scale=cfg_strength,
            seed=seed,
            w=image_width,
            h=image_height
        )

        images = txt2img.get_image_from_latent(latent)

        output_file_hash, img_byte_arr = get_image_data(images)
    
    return output_file_hash, img_byte_arr, seed, positive_pooler_output, negative_pooler_output

# load dataset

In [25]:
# prompts = pd.read_csv(prompt_path, sep='\t')
prompts = pd.read_csv(prompt_path)

# infer

In [26]:
done = set()
for fname in os.listdir(os.path.join(OUTPUT_DIR, 'image')):
    try:
        Image.open(os.path.join(OUTPUT_DIR, 'image', fname))
        np.load(os.path.join(OUTPUT_DIR, 'embedding', fname.replace('.jpg', '.npz')))
        js = json.load(open(os.path.join(OUTPUT_DIR, 'meta', fname.replace('.jpg', '.json'))))
        done.add((js['positive_prompt'], js['negative_prompt']))
    except:
        if os.path.exists(os.path.join(OUTPUT_DIR, 'image', fname)):
            os.system(f'rm {os.path.join(OUTPUT_DIR, "image", fname)}')
        if os.path.exists(os.path.join(OUTPUT_DIR, 'meta', fname.replace(".jpg", ".json"))):
            os.system(f'rm {os.path.join(OUTPUT_DIR, "meta", fname.replace(".jpg", ".json"))}')
        if os.path.exists(os.path.join(OUTPUT_DIR, 'embedding', fname.replace('.jpg', '.npz'))):
            os.system(f'rm {os.path.join(OUTPUT_DIR, "embedding", fname.replace(".jpg", ".npz"))}')
        if os.path.exists(os.path.join(OUTPUT_DIR, 'clip', fname.replace('.jpg', '.npy'))):
            os.system(f'rm {os.path.join(OUTPUT_DIR, "clip", fname.replace(".jpg", ".npy"))}')
        continue

In [None]:
for positive_prompt, negative_prompt in tqdm(prompts[['positive_prompt', 'negative_prompt']].itertuples(index=False), total=prompts.shape[0]):

    if (positive_prompt, negative_prompt) in done:
        continue
        
    output_file_hash, img_byte_arr, seed, positive_pooler_output, negative_pooler_output = worker(positive_prompt, negative_prompt, seed=-1)
    
    creation_time = datetime.now().strftime('%Y-%m-%d %H:%M:%S')
    
    meta = dict(
        positive_prompt=positive_prompt, 
        negative_prompt=negative_prompt,
        file_hash=output_file_hash,
        sampler=sampler,
        sampler_steps=sampler_steps,
        cfg_strength=cfg_strength,
        image_width=image_width,
        image_height=image_height,
        creation_time=creation_time
    )
    
    with open(os.path.join(OUTPUT_DIR, 'image', f'{output_file_hash}.jpg'), 'wb') as f:
        f.write(img_byte_arr.getbuffer())
    
    with open(os.path.join(OUTPUT_DIR, 'meta', f'{output_file_hash}.json'), 'wt') as f:
        json.dump(meta, f)
        
    np.savez(
        os.path.join(OUTPUT_DIR, 'embedding', f'{output_file_hash}.npz'),
        positive_pooler_output=positive_pooler_output,
        negative_pooler_output=negative_pooler_output
    )

  0%|          | 0/1024 [00:00<?, ?it/s]

# clip features

In [34]:
MODEL_NAME = 'openai/clip-vit-large-patch14'

BATCH_SIZE = 16

In [35]:
preprocessor = CLIPImageProcessor.from_pretrained(MODEL_NAME, local_files_only=True)

clip_model = AutoModel.from_pretrained(MODEL_NAME, local_files_only=True).cuda().eval()

`text_config_dict` is provided which will be used to initialize `CLIPTextConfig`. The value `text_config["id2label"]` will be overriden.


In [82]:
file_names = list()

for file_name in os.listdir(os.path.join(OUTPUT_DIR, 'image')):
    clip_path = os.path.join(OUTPUT_DIR, 'clip', file_name.replace('.jpg', '.npy'))
    
    if os.path.exists(clip_path):
        continue
        
    file_names.append(file_name)

In [83]:
for i in tqdm(range(0, len(file_names), BATCH_SIZE)):
    
    with torch.no_grad():

        images = list()
        for file_name in file_names[i:i+BATCH_SIZE]:
            image = Image.open(os.path.join(OUTPUT_DIR, 'image', file_name))
            image = preprocessor(images=image, return_tensors="pt")
            images.append(image['pixel_values'])

        images = torch.concat(images, dim=0)
    
        image_features = clip_model.get_image_features(pixel_values=images.to(clip_model.device))
        image_features = image_features.detach().cpu().numpy()
        
        for file_name, image_feature in zip(file_names[i:i+BATCH_SIZE], image_features):
            clip_path = os.path.join(OUTPUT_DIR, 'clip', file_name.replace('.jpg', '.npy'))
            np.save(clip_path, image_feature[None, ...])

  0%|          | 0/64 [00:00<?, ?it/s]