In [35]:
import os
import json
import numpy as np
from PIL import Image, ImageDraw
import cv2

In [None]:
def mask_decode(encoded_mask, base_image_path):
    
    org_image_path = base_image_path.replace("_512", "")

    org_image = Image.open(org_image_path)
    org_image_size = org_image.size
    
    # Create a blank image
    new_mask = Image.new('L', org_image_size, 0)
    new_draw = ImageDraw.Draw(new_mask)

    # Draw the polygon
    new_draw.polygon(encoded_mask, outline=1, fill=1)

    # Convert to numpy array for further processing if needed
    new_mask_array = np.array(new_mask)
    
    new_mask_array = cv2.resize(new_mask_array, (512, 512))
    
    new_mask_array = cv2.dilate(new_mask_array, np.ones((15, 15), np.uint8), iterations=1)
    
    return new_mask_array

In [None]:
def prepare_data(ckpt_pth):
    json_pth = "JSON_file.json"

    with open(json_pth) as f:
        data = json.load(f)

    epoch_lst = ["results"]

    for epoch_dir in epoch_lst:

        refedit_pth = f"refedit_final_512/{epoch_dir}"
        viescore_pth = f"viescore/{epoch_dir}"
        os.makedirs(f"{ckpt_pth}/{viescore_pth}", exist_ok=True)
        os.makedirs(f"{ckpt_pth}/{viescore_pth}/easy_512", exist_ok=True)
        os.makedirs(f"{ckpt_pth}/{viescore_pth}/hard_512", exist_ok=True)

        for dir in ["easy_512", "hard_512"]:
            imgs = os.listdir(f"{ckpt_pth}/{refedit_pth}/{dir}")
            for img in imgs:
                img_lst = img.split("_")
                initial_img = f"{dir}/{img_lst[0]}_{img_lst[1]}.jpg"

                masked_img = f"final_benchmark/{initial_img}"

                # read the img and it is the last 512 x 512 img - code works for both PnPInversion evaluation script outputs and huggingface pipeline outputs
                edited_img = f"{dir}/{img}"
                edited_img_full_pth = f"{ckpt_pth}/{refedit_pth}/{edited_img}"
                edited_image = Image.open(edited_img_full_pth)
                edited_image = edited_image.crop((edited_image.size[0] - 512, edited_image.size[1] - 512, edited_image.size[0], edited_image.size[1])) 
                edited_image.save(f"{ckpt_pth}/{viescore_pth}/{initial_img}")
                # get the mask
                for idx in data:
                    if data[idx]['image_path'] == initial_img:
                        mask = data[idx]['mask']
                        break

                mask_array = mask_decode(mask, masked_img)

                # create masked edited image
                masked_edited = Image.fromarray(np.array(edited_image) * mask_array[:,:,None])

                masked_edited_save_pth = f"{ckpt_pth}/{viescore_pth}/{initial_img}".replace(".jpg", "_masked.jpg")

                masked_edited.save(masked_edited_save_pth)
    

In [42]:
sc_score_pth = "sc_score.txt"

with open(sc_score_pth, 'r') as file:
    sc_score = file.read()

print(sc_score)

You are a professional digital artist. You will have to evaluate the effectiveness of the AI-edited image(s) based on the given rules. You will have to give your output in this way (Keep your reasoning concise and short.):
{
"score" : [...],
"reasoning" : "..."
}

and don’t output anything else.
Two images will be provided: The first being the original image selected from COCO dataset and the second being an AI edited version of the first. The objective is to evaluate how successfully the editing instruction has been executed in the second image. Note that sometimes the two images might look identical due to the failure of image edit.
Both the original image and the edited image are masked images since the image contains multiple objects and we want you to only focus on the intended object.

From a scale 0 to 10:
A score from 0 to 10 will be given based on the success of the editing.
- 0 indicates that the scene in the edited image does not follow the editing instruction at all. 
- 10 

In [None]:
from openai import OpenAI
import base64
client = OpenAI(
    api_key="API-KEY"
)

# Function to encode the image
def encode_image(image_path):
    with open(image_path, "rb") as image_file:
        return base64.b64encode(image_file.read()).decode("utf-8")

def get_response(img_pth1, img_pth2, txt):
    base64_image1 = encode_image(img_pth1)
    base64_image2 = encode_image(img_pth2)
    response = client.chat.completions.create(
        model="gpt-4o",
        messages=[
            {
                "role": "user",
                "content": [
                    {
                        "type": "text",
                        "text": txt,
                    },
                    {
                        "type": "image_url",
                        "image_url": {"url": f"data:image/jpeg;base64,{base64_image1}"},
                    },
                    {
                        "type": "image_url",
                        "image_url": {"url": f"data:image/jpeg;base64,{base64_image2}"},
                    },
                ],
            }
        ],
        temperature=0.0,
    )
    return response.choices[0].message.content

In [44]:
from tqdm import tqdm

In [None]:
ckpt_pth = "EDITED_IMAGES"
masked_initial_img_dir = "MASKED_ORIGINALS"
json_pth = "JSON_file.json"

with open(json_pth) as f:
    data = json.load(f)

name_to_idx = {}

for idx in data:
    name_to_idx[data[idx]['image_path']] = idx

output = {}

for dir in ["easy_512", "hard_512"]:
    imgs = os.listdir(f"{ckpt_pth}/{dir}")

    assert len(imgs) == 200

    for img_name in tqdm(imgs):
        if "_masked" in img_name:
            continue
        
        masked_initial_img_pth = f"{masked_initial_img_dir}/{dir}/{img_name}"

        masked_edited_img_pth = f"{ckpt_pth}/{dir}/{img_name.replace('.jpg', '_masked.jpg')}"

        img_id = name_to_idx[f"{dir}/{img_name}"]

        editing_instruction = data[str(img_id)]['edit_ins_single'] # we use the manually created edit_ins_single (editing instruction just for the masked image) as the editing instruction. Example: "Add a red hat to the left person" is changed to "Add a red hat to the perosn" since the masked image is only for the left person.

        txt_input = f"{sc_score} {editing_instruction}"

        res = get_response(masked_initial_img_pth, masked_edited_img_pth, txt_input)

        try:
            output[img_id] = json.loads(res)
        except:
            output[img_id] = res

with open(f"{ckpt_pth}/sc_output.json", 'w') as f:
    json.dump(output, f)

100%|██████████| 200/200 [03:55<00:00,  1.18s/it]
100%|██████████| 200/200 [03:55<00:00,  1.18s/it]


In [48]:
pq_score_pth = "pq_score.txt"

with open(pq_score_pth, 'r') as file:
    pq_score = file.read()

print(pq_score)

You are a professional digital artist. You will have to evaluate the effectiveness of the AI-edited image.
Two images will be provided: The first being the original image selected from COCO dataset and the second being an AI edited version of the first. So you may not worry about privacy or confidentiality.

You must focus solely on the technical quality and artifacts in the edited image, and **do not consider whether the context is natural or not**.

Your evaluation should focus on:
- Distortions
- Unusual body parts or proportions
- Unnatural Object Shapes

Rate the edited image on a scale from 0 to 10, where:
- 0 indicates significant AI-artifacts.
- 10 indicates an artifact-free image.

You will have to give your output in this way (Keep your reasoning concise and short.): 
{
"score": ...,
"reasoning": "..."
}

and don’t output anything else.


In [None]:
ckpt_pth = "EDITED_IMAGES"
initial_img_dir = "ORIGINALS"
json_pth = "JSON_file.json"

with open(json_pth) as f:
    data = json.load(f)

name_to_idx = {}

for idx in data:
    name_to_idx[data[idx]['image_path']] = idx

output = {}

for dir in ["easy_512", "hard_512"]:
    imgs = os.listdir(f"{ckpt_pth}/{dir}")

    assert len(imgs) == 200

    for img_name in tqdm(imgs):
        if "_masked" in img_name:
            continue
        
        initial_img_pth = f"{initial_img_dir}/{dir}/{img_name}"

        edited_img_pth = f"{ckpt_pth}/{dir}/{img_name}"

        img_id = name_to_idx[f"{dir}/{img_name}"]

        txt_input = f"{pq_score}"

        res = get_response(initial_img_pth, edited_img_pth, txt_input)

        try:
            output[img_id] = json.loads(res)
        except:
            output[img_id] = res

with open(f"{ckpt_pth}/pq_output.json", 'w') as f:
    json.dump(output, f)

100%|██████████| 200/200 [04:18<00:00,  1.29s/it]
100%|██████████| 200/200 [04:23<00:00,  1.32s/it]


In [None]:
import os, json
import numpy as np

def return_vie_score_refedit(pth):
    sc_pth = f"{pth}/sc_output.json"
    pq_pth = f"{pth}/pq_output.json"

    with open(sc_pth) as f:
        sc_data = json.load(f)

    # print(len(sc_data))

    with open(pq_pth) as f:
        pq_data = json.load(f)

    assert len(sc_data) == 200
    assert len(pq_data) == 200

    sc1_easy = 0
    sc2_easy = 0
    sc_easy = 0
    pq_easy = 0
    o_easy = 0

    sc1_hard = 0
    sc2_hard = 0
    sc_hard = 0
    pq_hard = 0
    o_hard = 0

    for idx in sc_data:
        # print(idx)
        sc1 = sc_data[idx]["score"][0]
        sc2 = sc_data[idx]["score"][1]


        pq = pq_data[idx]["score"]

        sc = min(sc1, sc2)

        o = np.sqrt(sc * pq)

        if int(idx) < 100:
            sc1_easy += sc1
            sc2_easy += sc2
            sc_easy += sc
            pq_easy += pq
            o_easy += o
        else:
            sc1_hard += sc1
            sc2_hard += sc2
            sc_hard += sc
            pq_hard += pq
            o_hard += o

    # return avg results
    easy_vals = f"{sc1_easy/100:.2f}, {sc2_easy/100:.2f}, {sc_easy/100:.2f}, {pq_easy/100:.2f}, {o_easy/100:.2f}"
    hard_vals = f"{sc1_hard/100:.2f}, {sc2_hard/100:.2f}, {sc_hard/100:.2f}, {pq_hard/100:.2f}, {o_hard/100:.2f}"

    return easy_vals , hard_vals