In [25]:
import pandas as pd
import random
from itertools import cycle

In [26]:
# Set up the model, prompt, and annotator lists
models = ['DALL-E3', 'StableCascade', 'SDXL']
prompts = ['wrestling in arena', 'physician examining patient', 'person jogging', 'people eating pizza', 
           'old couple in sauna', 'mother or father holding baby', 'five people sunbathing on beach', 
           'five people playing volleyball', 'couple hugging', 'athlete performing salto']
pic_nums = [f"{i:02d}" for i in range(1, 9)]  # Generates numbers from 01 to 08
annotators = ['Annotator1', 'Annotator2', 'Annotator3', 'Annotator4']

In [27]:
# Initialize the image pool
image_pool = [(model, prompt, pic_num) for model in models for prompt in prompts for pic_num in pic_nums]
image_pool = [f"{model.lower()}_{prompt.lower()}_{pic_num}.jpg" for model, prompt, pic_num in image_pool]

# Print the number of images in the pool to check
print(f"Total amount of images in the pool: {len(image_pool)}")

Total amount of images in the pool: 240


In [28]:
# Shuffle the image pool to randomize the order
random.shuffle(image_pool)

In [29]:
# Select double annotation images: randomly select 2 images per model-prompt pair, in total 60 images (3 models * 10 prompts * 2 images)
double_annot_images = []
for model in models:
    for prompt in prompts:
        model_prompt_images = [image for image in image_pool if image.startswith(f"{model.lower()}_{prompt.lower()}_")]
        # randomly select 2 images for double annotation
        random_images = random.sample(model_prompt_images, 2)
        double_annot_images.extend(random_images)

# Shuffle the list of double annotation images
random.shuffle(double_annot_images)

# Print the number of double annotation per model/prompt to check the distribution balance
double_annot_images_df = pd.DataFrame(double_annot_images, columns=['image'])
double_annot_images_df['model'] = double_annot_images_df['image'].apply(lambda x: x.split('_')[0])
print(f"Total amount of double annotation images: {len(double_annot_images)}")
print(f"Total amount of double annotation images per model: {double_annot_images_df['model'].value_counts()}")
print(f"Total amount of double annotation images per prompt: {double_annot_images_df['image'].apply(lambda x: x.split('_')[1]).value_counts()}")

Total amount of double annotation images: 60
Total amount of double annotation images per model: stablecascade    20
dall-e3          20
sdxl             20
Name: model, dtype: int64
Total amount of double annotation images per prompt: couple hugging                     6
physician examining patient        6
mother or father holding baby      6
athlete performing salto           6
five people playing volleyball     6
five people sunbathing on beach    6
person jogging                     6
old couple in sauna                6
people eating pizza                6
wrestling in arena                 6
Name: image, dtype: int64


In [30]:
# Assign images for double annotation to annotators
double_annotator_images_variants = {annotator: [] for annotator in annotators}
double_annot_images_variants = [f"{image[:-4]}_{i}.jpg" for image in double_annot_images for i in range(2)]
random.shuffle(double_annot_images_variants)

In [31]:
# Helper function to count how many images from each model an annotator has
def count_model_images(annotator, model):
    return sum(image.startswith(model.lower()) for image in double_annotator_images_variants[annotator])

# Assign double annotation images ensuring balanced distribution
for model in models:
    images = [image for image in double_annot_images_variants if image.startswith(f"{model.lower()}_")]
    annotator_cycle = cycle(annotators)
    image_pairs = [images[i:i+2] for i in range(0, len(images), 2)]
    for image_pair in image_pairs:
        for image in image_pair:
            image_base = image[:-6]
            while True:
                annotator = next(annotator_cycle)
                if count_model_images(annotator, model) < 10 and not any([annotator_image.startswith(image_base) for annotator_image in double_annotator_images_variants[annotator]]):
                    double_annotator_images_variants[annotator].append(image)
                    double_annot_images_variants.remove(image)
                    break

In [32]:
# Print the amount of double-annotated images per annotator to check, each annotator should have 30 images
for annotator in annotators:
    print(f"Annotator {annotator} has {len(double_annotator_images_variants[annotator])} double-annotated images.")

# Print the amount of double-annotated images per anntator per model to check, each annotator should have 10 images per model
for annotator in annotators:
    print(f"Annotator {annotator} has {len([image for image in double_annotator_images_variants[annotator] if image.startswith('dall-e3')])} DALL-E3 images.")
    print(f"Annotator {annotator} has {len([image for image in double_annotator_images_variants[annotator] if image.startswith('stablecascade')])} StableCascade images.")
    print(f"Annotator {annotator} has {len([image for image in double_annotator_images_variants[annotator] if image.startswith('sdxl')])} SDXL images.")

Annotator Annotator1 has 30 double-annotated images.
Annotator Annotator2 has 30 double-annotated images.
Annotator Annotator3 has 30 double-annotated images.
Annotator Annotator4 has 30 double-annotated images.
Annotator Annotator1 has 10 DALL-E3 images.
Annotator Annotator1 has 10 StableCascade images.
Annotator Annotator1 has 10 SDXL images.
Annotator Annotator2 has 10 DALL-E3 images.
Annotator Annotator2 has 10 StableCascade images.
Annotator Annotator2 has 10 SDXL images.
Annotator Annotator3 has 10 DALL-E3 images.
Annotator Annotator3 has 10 StableCascade images.
Annotator Annotator3 has 10 SDXL images.
Annotator Annotator4 has 10 DALL-E3 images.
Annotator Annotator4 has 10 StableCascade images.
Annotator Annotator4 has 10 SDXL images.


In [33]:
# Remove the images for double annotation from the image pool
image_pool = [image for image in image_pool if image not in double_annot_images]
# Print the number of images in the pool after removing the double annotation images, these images will be used for single annotation
print(f"Number of images remaining in the pool  for single annotation: {len(image_pool)}")

Number of images remaining in the pool  for single annotation: 180


In [34]:
# Assign the remaining images to annotators for single annotation
single_annots = {annotator: [] for annotator in annotators}
for annotator in annotators:
    for i in range(3):
        model_images = [image for image in image_pool if image.startswith(f"{models[i].lower()}_")]
        random_images = random.sample(model_images, 15)
        single_annots[annotator].extend(random_images)
        image_pool = [image for image in image_pool if image not in random_images]

In [35]:
# Print the amount of single-annotated images per model AND/OR per promtp AND/OR per annotator to check the distribution balance
single_annots_df = pd.DataFrame([(annotator, image) for annotator, images in single_annots.items() for image in images], columns=['annotator', 'image'])
single_annots_df['model'] = single_annots_df['image'].apply(lambda x: x.split('_')[0])
print(f"Total amount of single annotation images: {len(single_annots_df)}")
print(f"Total amount of single annotation images per model: {single_annots_df['model'].value_counts()}")
print(f"Total amount of single annotation images per annotator: {single_annots_df['annotator'].value_counts()}")
print(f"Total amount of single annotation images per prompt: {single_annots_df['image'].apply(lambda x: x.split('_')[1]).value_counts()}")
print(f"Total amount of single annotation images per model and prompt: {single_annots_df.groupby(['model', single_annots_df['image'].apply(lambda x: x.split('_')[1])]).size()}")
print(f"Total amount of single annotation images per model and annotator: {single_annots_df.groupby(['model', 'annotator']).size()}")
print(f"Total amount of single annotation images per prompt and annotator: {single_annots_df.groupby([single_annots_df['image'].apply(lambda x: x.split('_')[1]), 'annotator']).size()}")

Total amount of single annotation images: 180
Total amount of single annotation images per model: dall-e3          60
stablecascade    60
sdxl             60
Name: model, dtype: int64
Total amount of single annotation images per annotator: Annotator1    45
Annotator2    45
Annotator3    45
Annotator4    45
Name: annotator, dtype: int64
Total amount of single annotation images per prompt: athlete performing salto           18
person jogging                     18
five people sunbathing on beach    18
mother or father holding baby      18
people eating pizza                18
couple hugging                     18
physician examining patient        18
five people playing volleyball     18
wrestling in arena                 18
old couple in sauna                18
Name: image, dtype: int64
Total amount of single annotation images per model and prompt: model          image                          
dall-e3        athlete performing salto           6
               couple hugging            

In [36]:
# Add the double annotation images to the final list
for annotator, images in double_annotator_images_variants.items():
    for image in images:
        image_name = image[:-6]
        model = image_name.split('_')[0]
        prompt = image_name.split('_')[1]
        pic_num = image_name.split('_')[2]
        df = single_annots[annotator].append(f"{model}_{prompt}_{pic_num}.jpg")

In [37]:
# Print the amount of images per model AND/OR per prompt AND/OR per annotator to check the distribution balance
final_images_df = pd.DataFrame([(annotator, image) for annotator, images in single_annots.items() for image in images], columns=['annotator', 'image'])
final_images_df['model'] = final_images_df['image'].apply(lambda x: x.split('_')[0])
print(f"Total amount of images: {len(final_images_df)}")
print(f"Total amount of images per model: {final_images_df['model'].value_counts()}")
print(f"Total amount of images per annotator: {final_images_df['annotator'].value_counts()}")
print(f"Total amount of images per prompt: {final_images_df['image'].apply(lambda x: x.split('_')[1]).value_counts()}")
print(f"Total amount of images per model and prompt: {final_images_df.groupby(['model', final_images_df['image'].apply(lambda x: x.split('_')[1])]).size()}")
print(f"Total amount of images per model and annotator: {final_images_df.groupby(['model', 'annotator']).size()}")
print(f"Total amount of images per prompt and annotator: {final_images_df.groupby([final_images_df['image'].apply(lambda x: x.split('_')[1]), 'annotator']).size()}")

Total amount of images: 300
Total amount of images per model: dall-e3          100
stablecascade    100
sdxl             100
Name: model, dtype: int64
Total amount of images per annotator: Annotator1    75
Annotator2    75
Annotator3    75
Annotator4    75
Name: annotator, dtype: int64
Total amount of images per prompt: athlete performing salto           30
person jogging                     30
five people sunbathing on beach    30
mother or father holding baby      30
people eating pizza                30
couple hugging                     30
physician examining patient        30
five people playing volleyball     30
wrestling in arena                 30
old couple in sauna                30
Name: image, dtype: int64
Total amount of images per model and prompt: model          image                          
dall-e3        athlete performing salto           10
               couple hugging                     10
               five people playing volleyball     10
               five p

In [38]:
# Save the final list of images to a CSV file
final_images_df.to_csv('final_images.csv', index=False)
print("Final list of images saved to 'final_images.csv' file.")

Final list of images saved to 'final_images.csv' file.
