In [None]:
from ip_adapter_artist.utils.csd_clip import CSD_CLIP
from ip_adapter_artist.utils.ip_adapter import (
    load_ip_adapter,
)
import torch
from transformers import CLIPImageProcessor
from PIL import Image
from diffusers.utils import make_image_grid,load_image
from huggingface_hub import hf_hub_download
from diffusers import StableDiffusionXLPipeline

## Download Models

In [None]:
csd_clip_path = hf_hub_download(
    repo_id="AisingioroHao0/IP-Adapter-Artist", filename="csd_clip.pth"
)

In [None]:
ip_adapter_artist_path = hf_hub_download(
    repo_id="AisingioroHao0/IP-Adapter-Artist", filename="ip_adapter_artist_sdxl_512.pth"
)

## Load Model

In [None]:
csd_clip = torch.load(csd_clip_path).to("cuda")
csd_clip.requires_grad_(False)
csd_clip = csd_clip.eval()

In [None]:
pipe = StableDiffusionXLPipeline.from_pretrained(
    "stabilityai/stable-diffusion-xl-base-1.0",
    variant="fp16",
    torch_dtype=torch.float16,
).to("cuda")

In [None]:
image_processor = CLIPImageProcessor()

In [None]:
load_ip_adapter(
    pipe.unet,
    ip_adapter_artist_path,
)

In [None]:
scale = {"up": {"block_0": [0.0, 1.0, 0.0]}}
pipe.set_ip_adapter_scale(scale)

## Process Style Image

In [None]:
image = load_image('https://github.com/aihao2000/IP-Adapter-Artist/blob/main/README.assets/example.jpg?raw=true')
image

In [None]:
pixel_values = image_processor.preprocess(image, return_tensors="pt").pixel_values
_, __, style_embeds = csd_clip(pixel_values.to("cuda", torch.float32))
ip_adapter_image_embeds = torch.stack(
    [torch.zeros_like(style_embeds).to("cuda"), style_embeds]
).to("cuda", torch.float16)

## Infer

In [None]:
result = pipe(
    ip_adapter_image_embeds=[ip_adapter_image_embeds],
    prompt="A cat sitting on a table, top hat, best quality, masterpiece",
    negative_prompt="worst quality, low quality, low res, blurry, cropped image, jpeg artifacts, error, ugly, out of frame, deformed, poorly drawn",
    generator=torch.Generator("cuda").manual_seed(42),
    num_inference_steps=30,
    guidance_scale=5.0,
).images[0]
result

In [None]:
result = pipe(
    ip_adapter_image_embeds=[ip_adapter_image_embeds],
    prompt="A house covered with ice and snow.",
    negativ_prompt="multi view, worst quality, low quality, low res, blurry, cropped image, jpeg artifacts, error, ugly, out of frame, deformed, poorly drawn",
    generator=torch.Generator("cuda").manual_seed(42),
    num_inference_steps=30,
    guidance_scale=5.0,
).images[0]
result