## Test RefEdit-SD3

In [None]:
# For Editing with RefEdit-SD3
import torch
from diffusers import StableDiffusion3InstructPix2PixPipeline
from diffusers.utils import load_image
import requests
import PIL.Image
import PIL.ImageOps

pipe = StableDiffusion3InstructPix2PixPipeline.from_pretrained("bpathir1/RefEdit-SD3", torch_dtype=torch.float16)
pipe = pipe.to("cuda")
prompt = "Add a flower bunch to the person with a red jacket"
img = load_image("RefEdit/imgs/person_with_red_jacket.jpg").resize((512, 512))

image = pipe(
    prompt,
    image=img,
    mask_img=None,
    num_inference_steps=50,
    image_guidance_scale=1.5,
    guidance_scale=7.5,
).images[0]

image.save("RefEdit/imgs/edited_image.png")

  from .autonotebook import tqdm as notebook_tqdm
  deprecate("Transformer2DModelOutput", "1.0.0", deprecation_message)
Fetching 27 files: 100%|██████████| 27/27 [02:41<00:00,  5.99s/it]
Loading checkpoint shards: 100%|██████████| 3/3 [00:00<00:00,  8.56it/s]
Loading pipeline components...:  11%|█         | 1/9 [00:00<00:03,  2.14it/s]You set `add_prefix_space`. The tokenizer needs to be converted from the slow tokenizers
Loading pipeline components...: 100%|██████████| 9/9 [00:02<00:00,  3.59it/s]
100%|██████████| 50/50 [00:07<00:00,  6.77it/s]


## Generate edited images

In [4]:
import os
from tqdm import tqdm

os.makedirs("initial_images", exist_ok=True)
os.makedirs("refedit_sd3", exist_ok=True)
os.makedirs("refedit_sd3/refedit_bench", exist_ok=True)

In [2]:
# load it from the hub
from datasets import load_dataset

rf_bench = load_dataset("bpathir1/RefEdit-Bench")

Downloading readme: 100%|██████████| 450/450 [00:00<00:00, 2.20MB/s]
Downloading data: 100%|██████████| 9.14M/9.14M [00:00<00:00, 22.3MB/s]
Generating test split: 100%|██████████| 200/200 [00:00<00:00, 3834.37 examples/s]


In [5]:
generator = torch.Generator(device="cuda:1").manual_seed(3345)
for i in tqdm(range(200)):
    img = rf_bench["test"][i]["input_image"]
    prompt = rf_bench["test"][i]["editing_instruction"]

    image = pipe(
        prompt,
        image=img,
        mask_img=None,
        num_inference_steps=50,
        image_guidance_scale=1.5,
        guidance_scale=7.5,
        generator=generator,
    ).images[0]

    image.save(f"refedit_sd3/refedit_bench/{i}.png")

100%|██████████| 50/50 [00:06<00:00,  7.15it/s]
100%|██████████| 50/50 [00:07<00:00,  7.09it/s]
100%|██████████| 50/50 [00:07<00:00,  7.12it/s]
100%|██████████| 50/50 [00:03<00:00, 12.76it/s]
100%|██████████| 50/50 [00:07<00:00,  6.99it/s]
100%|██████████| 50/50 [00:07<00:00,  6.87it/s]
100%|██████████| 50/50 [00:07<00:00,  7.10it/s]
100%|██████████| 50/50 [00:07<00:00,  6.45it/s]
100%|██████████| 50/50 [00:07<00:00,  6.88it/s]
100%|██████████| 50/50 [00:05<00:00,  9.80it/s]
100%|██████████| 50/50 [00:05<00:00,  9.26it/s]]
100%|██████████| 50/50 [00:05<00:00,  8.88it/s]]
100%|██████████| 50/50 [00:06<00:00,  7.87it/s]]
100%|██████████| 50/50 [00:06<00:00,  7.57it/s]]
100%|██████████| 50/50 [00:06<00:00,  7.30it/s]]
100%|██████████| 50/50 [00:07<00:00,  7.13it/s]]
100%|██████████| 50/50 [00:07<00:00,  7.01it/s]]
100%|██████████| 50/50 [00:07<00:00,  6.89it/s]]
100%|██████████| 50/50 [00:06<00:00,  7.21it/s]]
100%|██████████| 50/50 [00:07<00:00,  6.91it/s]]
100%|██████████| 50/50 [00:07<

## Save initial images

In [8]:
import os
import json
import numpy as np
from PIL import Image, ImageDraw
import cv2

In [9]:
def mask_decode(encoded_mask, image):
    
    image_size = image.size
    
    # Create a blank image
    new_mask = Image.new('L', image_size, 0)
    new_draw = ImageDraw.Draw(new_mask)

    # Draw the polygon
    new_draw.polygon(encoded_mask, outline=1, fill=1)

    # Convert to numpy array for further processing if needed
    new_mask_array = np.array(new_mask)
    
    new_mask_array = cv2.dilate(new_mask_array, np.ones((15, 15), np.uint8), iterations=1)
    
    return new_mask_array

In [None]:
def save_initials(dir_pth):
    for i in range(len(rf_bench["test"])):
        item = rf_bench["test"][i]
        image = item["input_image"]
        mask = json.loads(item["mask"])[0]
        mask_array = mask_decode(mask, image)
        
        # Create a new image with the mask applied
        masked_original = Image.fromarray(np.array(image) * mask_array[:,:,None])

        # Save the masked image
        image.save(os.path.join(dir_pth, f"{i}_initial.png"))
        masked_original.save(os.path.join(dir_pth, f"{i}_masked.jpg"))

save_initials("initial_images") # You only need to run this once to save the initial images

## Prepare the edited images

In [15]:
def prepare_data(ckpt_pth):

    refedit_pth = f"refedit_bench"
    viescore_pth = f"viescore"
    os.makedirs(f"{ckpt_pth}/{viescore_pth}", exist_ok=True)

    for i in range(len(rf_bench['test'])):
        
        edited_image = Image.open(f"{ckpt_pth}/{refedit_pth}/{i}.png").convert("RGB")
        mask = json.loads(rf_bench['test'][i]['mask'])[0]

        mask_array = mask_decode(mask, edited_image)

        # create masked edited image
        masked_edited = Image.fromarray(np.array(edited_image) * mask_array[:,:,None])

        masked_edited_save_pth = f"{ckpt_pth}/{viescore_pth}/{i}_masked.jpg"

        masked_edited.save(masked_edited_save_pth)

prepare_data("refedit_sd3")

## SC score

In [16]:
sc_score_pth = "sc_score.txt"

with open(sc_score_pth, 'r') as file:
    sc_score = file.read()

print(sc_score)

You are a professional digital artist. You will have to evaluate the effectiveness of the AI-edited image(s) based on the given rules. You will have to give your output in this way (Keep your reasoning concise and short.):
{
"score" : [...],
"reasoning" : "..."
}

and don’t output anything else.
Two images will be provided: The first being the original image selected from COCO dataset and the second being an AI edited version of the first. The objective is to evaluate how successfully the editing instruction has been executed in the second image. Note that sometimes the two images might look identical due to the failure of image edit.
Both the original image and the edited image are masked images since the image contains multiple objects and we want you to only focus on the intended object.

From a scale 0 to 10:
A score from 0 to 10 will be given based on the success of the editing.
- 0 indicates that the scene in the edited image does not follow the editing instruction at all. 
- 10 

In [None]:
from openai import OpenAI
import base64

client = OpenAI(
    api_key="API_KEY",  # Replace with your OpenAI API key
)

# Function to encode the image
def encode_image(image_path):
    with open(image_path, "rb") as image_file:
        return base64.b64encode(image_file.read()).decode("utf-8")

def get_response(img_pth1, img_pth2, txt):
    base64_image1 = encode_image(img_pth1)
    base64_image2 = encode_image(img_pth2)
    response = client.chat.completions.create(
        model="gpt-4o",
        messages=[
            {
                "role": "user",
                "content": [
                    {
                        "type": "text",
                        "text": txt,
                    },
                    {
                        "type": "image_url",
                        "image_url": {"url": f"data:image/jpeg;base64,{base64_image1}"},
                    },
                    {
                        "type": "image_url",
                        "image_url": {"url": f"data:image/jpeg;base64,{base64_image2}"},
                    },
                ],
            }
        ],
        temperature=0.0,
    )
    return response.choices[0].message.content

In [None]:
ckpt_pth = "refedit_sd3"
initial_img_dir = "initial_images"

output = {}

imgs = os.listdir(f"{ckpt_pth}/refedit_bench")

assert len(imgs) == 200

for img_name in tqdm(imgs):

    id = int(img_name.split(".")[0])  # get the id from the file name
    
    masked_initial_img_pth = f"{initial_img_dir}/{id}_masked.jpg"
    # masked_initial_img_pth = f"{initial_img_dir}/{id}_initial.jpg" # For the original version of SC score

    masked_edited_img_pth = f"{ckpt_pth}/viescore/{id}_masked.jpg"
    # masked_edited_img_pth = f"{ckpt_pth}/refedit_bench/{img_name}" # For the original version of SC score

    editing_instruction = rf_bench['test'][id]['edit_instruction_single'] # we use the manually created edit_ins_single (editing instruction just for the masked image as the editing instruction. Example: "Add a red hat to the left person" is changed to "Add a red hat to the person" since the masked image is only for the left person.

    txt_input = f"{sc_score} {editing_instruction}"

    res = get_response(masked_initial_img_pth, masked_edited_img_pth, txt_input)

    try:
        output[id] = json.loads(res)
    except:
        output[id] = res

with open(f"{ckpt_pth}/viescore/sc_output.json", 'w') as f: # you might have to manually format the output since sometimes the output is not a valid JSON
    json.dump(output, f)

100%|██████████| 200/200 [08:42<00:00,  2.61s/it]


## PQ Score

In [30]:
pq_score_pth = "pq_score.txt"

with open(pq_score_pth, 'r') as file:
    pq_score = file.read()

print(pq_score)

You are a professional digital artist. You will have to evaluate the effectiveness of the AI-edited image.
Two images will be provided: The first being the original image selected from COCO dataset and the second being an AI edited version of the first. So you may not worry about privacy or confidentiality.

You must focus solely on the technical quality and artifacts in the edited image, and **do not consider whether the context is natural or not**.

Your evaluation should focus on:
- Distortions
- Unusual body parts or proportions
- Unnatural Object Shapes

Rate the edited image on a scale from 0 to 10, where:
- 0 indicates significant AI-artifacts.
- 10 indicates an artifact-free image.

You will have to give your output in this way (Keep your reasoning concise and short.): 
{
"score": ...,
"reasoning": "..."
}

and don’t output anything else.


In [31]:
ckpt_pth = "refedit_sd3"
initial_img_dir = "initial_images"

output = {}

imgs = os.listdir(f"{ckpt_pth}/refedit_bench")

assert len(imgs) == 200

for img_name in tqdm(imgs):

    id = int(img_name.split(".")[0])  # get the id from the file name
    
    initial_img_pth = f"{initial_img_dir}/{id}_initial.png"

    edited_img_pth = f"{ckpt_pth}/refedit_bench/{img_name}"

    txt_input = f"{pq_score}"

    res = get_response(initial_img_pth, edited_img_pth, txt_input)

    try:
        output[id] = json.loads(res)
    except:
        output[id] = res

with open(f"{ckpt_pth}/viescore/pq_output.json", 'w') as f: # you might have to manually format the output since sometimes the output is not a valid JSON
    json.dump(output, f)

100%|██████████| 200/200 [11:29<00:00,  3.45s/it]


## VIEScore

In [32]:
def return_vie_score_refedit(pth):
    sc_pth = f"{pth}/sc_output.json"
    pq_pth = f"{pth}/pq_output.json"

    with open(sc_pth) as f:
        sc_data = json.load(f)

    with open(pq_pth) as f:
        pq_data = json.load(f)

    assert len(sc_data) == 200
    assert len(pq_data) == 200

    sc1_easy = 0
    sc2_easy = 0
    sc_easy = 0
    pq_easy = 0
    o_easy = 0

    sc1_hard = 0
    sc2_hard = 0
    sc_hard = 0
    pq_hard = 0
    o_hard = 0

    for idx in sc_data:

        sc1 = sc_data[idx]["score"][0]
        sc2 = sc_data[idx]["score"][1]


        pq = pq_data[idx]["score"]

        sc = min(sc1, sc2)

        o = np.sqrt(sc * pq)

        if int(idx) < 100:
            sc1_easy += sc1
            sc2_easy += sc2
            sc_easy += sc
            pq_easy += pq
            o_easy += o
        else:
            sc1_hard += sc1
            sc2_hard += sc2
            sc_hard += sc
            pq_hard += pq
            o_hard += o

    # return avg results
    easy_vals = f"{sc1_easy/100:.2f}, {sc2_easy/100:.2f}, {sc_easy/100:.2f}, {pq_easy/100:.2f}, {o_easy/100:.2f}"
    hard_vals = f"{sc1_hard/100:.2f}, {sc2_hard/100:.2f}, {sc_hard/100:.2f}, {pq_hard/100:.2f}, {o_hard/100:.2f}"

    return easy_vals , hard_vals

In [33]:
return_vie_score_refedit("refedit_sd3/viescore")

('6.69, 8.21, 6.21, 5.31, 4.98', '5.41, 8.84, 5.02, 5.42, 4.00')