In [1]:
import torch
from diffusers import StableDiffusionXLPipeline
from PIL import Image

from ip_adapter import IPAdapterPlusXL
from ip_adapter.custom_pipelines import StableDiffusionXLCustomPipeline

  from .autonotebook import tqdm as notebook_tqdm
2024-08-11 23:32:16.691936: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations:  AVX2 AVX512F AVX512_VNNI FMA
To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags.
2024-08-11 23:32:16.841012: I tensorflow/core/util/util.cc:169] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`.
2024-08-11 23:32:16.871912: E tensorflow/stream_executor/cuda/cuda_blas.cc:2981] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered
2024-08-11 23:32:17.532615: W tensorflow/stream_executor/platform/default/dso_loader.cc:64] 

In [None]:
base_model_path = "/home/humw/Pretrain/RealVisXL_V3.0" # "stabilityai/stable-diffusion-xl-base-1.0"
image_encoder_path = "/home/humw/Pretrain/h94/IP-Adapter/models/image_encoder" # "models/image_encoder"
ip_ckpt = "/home/humw/Pretrain/h94/IP-Adapter/sdxl_models/ip-adapter-plus-face_sdxl_vit-h.bin" # "sdxl_models/ip-adapter-plus-face_sdxl_vit-h.bin" # a experimental version
device = "cuda:1"

In [None]:
from torchvision import transforms
from pathlib import Path

In [None]:
def load_data(data_dir, image_size=224, resample=2):
    import numpy as np
    def image_to_numpy(image):
        return np.array(image).astype(np.uint8)
    # more robust loading to avoid loaing non-image files
    images = [] 
    for i in sorted(list(Path(data_dir).iterdir())):
        if not i.suffix in [".jpg", ".png", ".jpeg"]:
            continue
        else:
            images.append(image_to_numpy(Image.open(i).convert("RGB")))
    # resize the images to 512 x 512, resample value 2 means BILINEAR
    images = [Image.fromarray(i).resize((image_size, image_size), resample) for i in images]
    return images

In [None]:
def image_grid(imgs, rows, cols):
    assert len(imgs) == rows*cols

    w, h = imgs[0].size
    grid = Image.new('RGB', size=(cols*w, rows*h))
    grid_w, grid_h = grid.size
    
    for i, img in enumerate(imgs):
        grid.paste(img, box=(i%cols*w, i//cols*h))
    return grid

In [None]:
# load SDXL pipeline
pipe = StableDiffusionXLCustomPipeline.from_pretrained(
    base_model_path,
    torch_dtype=torch.float16,
    add_watermarker=False,
)

In [None]:
# load ip-adapter
ip_model = IPAdapterPlusXL(pipe, image_encoder_path, ip_ckpt, device, num_tokens=16)

## Generate Image conditioned on face images
- You should use a cropped face image (you can the adjust the cropped area size to get different results)
- Since the background will have an impact on the resulting image, you'd better segment the face

In [None]:
# case 1
image = Image.open("/home/humw/Codes/FaceOff/output/Exp1/ipadapter/min-VGGFace2_ipadapter_out-512_mid-336_loss-n-mse_alpha6_eps16_num200_pre-test/n000050/0012_01.png")
image.resize((224, 224),resample=2)

# image = Image.open("/home/humw/Codes/FaceOff/output/Exp1/ipadapter/min-VGGFace2_ipadapter_out-512_mid-336_loss-n-mse_alpha6_eps16_num200_pre-test/n000050/0012_01.png")
# image.resize((224, 224),resample=2)

In [None]:
data_dir = "/home/humw/Codes/FaceOff/output/Exp1/ipadapter/min-VGGFace2_ipadapter_out-512_mid-336_loss-n-mse_alpha6_eps16_num200_pre-test/n000050"
images = load_data(data_dir, image_size=224)

In [None]:
images = ip_model.generate(pil_image=image, num_samples=1, num_inference_steps=30, seed=42,
        prompt="a photo of person")
# grid = image_grid(images, 1, 2)
# grid

In [None]:
grid = image_grid(images, 1,1)
grid

In [None]:
# case 1
image = Image.open("/home/humw/Datasets/mini-VGGFace2/n000050/set_B/0012_01.png")
image.resize((224, 224))

In [None]:
images = ip_model.generate(pil_image=image, num_samples=2, num_inference_steps=30, seed=42,
        prompt="a photo of person")
grid = image_grid(images, 1, 2)
grid

In [None]:
# case 2
image = Image.open("assets/images/ai_face2.png")
image.resize((224, 224))

In [None]:
images = ip_model.generate(pil_image=image, num_samples=2, num_inference_steps=30, seed=42,
        prompt="photo of a beautiful girl wearing casual shirt in a garden")
grid = image_grid(images, 1, 2)
grid

In [None]:
# # case 2
# image = Image.open("/home/humw/Datasets/VGGFace2/n000050/set_B/0012_01.png")
# image.resize((224, 224))
# images = ip_model.generate(pil_image=image, num_samples=2, num_inference_steps=30, seed=42,
#         prompt="a photo of person")
# import os
# save_prompt_dir = '/home/humw/Codes/FaceOff/target_model/IP-Adapter-main/output/sd_xl_plus'
# save_path = os.path.join(save_prompt_dir, 'n000050_original')
# os.makedirs(save_path, exist_ok=True)
# for idx, image in enumerate(images):
#     image.save(os.path.join(save_path, f"ipadapter_{idx:02d}.png"))
# grid = image_grid(images, 1, 2)
# grid
# case 2
import os
# images = list()
# img_dir = '/home/humw/Codes/FaceOff/output/min-VGGFace2_ipadapter_out-512_loss-n-mse_alpha6_eps16_num200_pre-test/n000050'
# for img in os.listdir(img_dir):
#     image = Image.open(os.path.join(img_dir, img))
#     image.resize((224, 224))
#     images.append(image)
image = Image.open("/home/humw/Codes/FaceOff/output/min-VGGFace2_ipadapter_out-512_loss-n-mse_alpha6_eps16_num200_pre-test/n000050/0012_01.png")
image.resize((224, 224))
images = ip_model.generate(pil_image=images, num_samples=2, num_inference_steps=30, seed=42,
        prompt="a photo of person")
grid = image_grid(images, 1, 2)
grid
save_prompt_dir = '/home/humw/Codes/FaceOff/target_model/IP-Adapter-main/output/sd_xl_plus/test'
save_path = os.path.join(save_prompt_dir, 'n000050')
os.makedirs(save_path, exist_ok=True)
for idx, image in enumerate(images):
    image.save(os.path.join(save_path, f"ipadapter_{idx:02d}.png"))


In [None]:
# case 2
# image = Image.open("/home/humw/Datasets/VGGFace2/n000057/set_B/0012_01.png")
# image.resize((224, 224))
# images = ip_model.generate(pil_image=image, num_samples=2, num_inference_steps=30, seed=42,
#         prompt="a photo of person")
# import os
# save_prompt_dir = '/home/humw/Codes/FaceOff/target_model/IP-Adapter-main/output/sd_xl_plus'
# save_path = os.path.join(save_prompt_dir, 'n000057_original')
# os.makedirs(save_path, exist_ok=True)
# for idx, image in enumerate(images):
#     image.save(os.path.join(save_path, f"ipadapter_{idx:02d}.png"))
# grid = image_grid(images, 1, 2)
# grid

image = Image.open("/home/humw/Codes/FaceOff/output/VGGFace2_photomaker_max_out-512_refiner-blur3-min75-inter40_loss-n-mse_alpha6_eps16_num200/n000057/0012_01.png")
image.resize((224, 224))
images = ip_model.generate(pil_image=image, num_samples=2, num_inference_steps=30, seed=42,
        prompt="a photo of person")
import os
save_prompt_dir = '/home/humw/Codes/FaceOff/target_model/IP-Adapter-main/output/sd_xl_plus'
save_path = os.path.join(save_prompt_dir, 'n000057_refiner')
os.makedirs(save_path, exist_ok=True)
for idx, image in enumerate(images):
    image.save(os.path.join(save_path, f"ipadapter_{idx:02d}.png"))
grid = image_grid(images, 1, 2)
grid

image = Image.open("/home/humw/Codes/FaceOff/output/VGGFace2_photomaker_max_out-512_loss-n-mse_alpha6_eps16_num200/n000057/0012_01.png")
image.resize((224, 224))
images = ip_model.generate(pil_image=image, num_samples=2, num_inference_steps=30, seed=42,
        prompt="a photo of person")
import os
save_prompt_dir = '/home/humw/Codes/FaceOff/target_model/IP-Adapter-main/output/sd_xl_plus'
save_path = os.path.join(save_prompt_dir, 'n000057_no_refiner')
os.makedirs(save_path, exist_ok=True)
for idx, image in enumerate(images):
    image.save(os.path.join(save_path, f"ipadapter_{idx:02d}.png"))
grid = image_grid(images, 1, 2)
grid

## Post-Conditioning
Use post-condition to generate more diverse images (like inpainting but you don't draw mask)

In [None]:
# generate the image only conditioned text
images = ip_model.generate(pil_image=image, num_samples=2, num_inference_steps=30, seed=2023,
        prompt="photo of a beautiful girl wearing casual shirt in a garden", control_guidance_start=1.)
grid = image_grid(images, 1, 2)
grid

In [None]:
# add face image condition
images = ip_model.generate(pil_image=image, num_samples=2, num_inference_steps=30, seed=2023,
        prompt="photo of a beautiful girl wearing casual shirt in a garden", control_guidance_start=0.5)
grid = image_grid(images, 1, 2)
grid