# Image project example

Let's walk through a toy example of using `🐥 pollo`!

In [1]:
# Autoreload extension for interactive development
%load_ext autoreload
%autoreload 2

# Standard library imports
import json
import os
from pprint import pprint
from dataclasses import dataclass
import glob

# Local application/library specific imports
from pollo.utils.hit import HitMaker
import pollo.utils.io as io_utils
from pollo.requester import Requester

# Put media in `data/media`

We created images with DALL-E 2. We created 10 images with each of the text prompts "realistic chicken" and "cartoon chicken" and resized the images to 256x256.

# Create and save HITs

In [2]:
@dataclass
class ImageHitMaker(HitMaker):
    """Image hit maker."""

    expected_seconds_per_query_example: int = 20
    endpoint: str = "https://pollo.ethanweber.me"

hit_maker = ImageHitMaker()
hit = hit_maker.get_default_hit()
pprint(hit)

{'settings': {'expected_seconds_per_query_example': 20,
              'image_height': 300,
              'image_max_width': 500,
              'min_seconds_per_query_example': 1}}


In [21]:
cc_filenames = sorted(glob.glob("data/media/cartoon-chicken/*.png"))
rc_filenames = sorted(glob.glob("data/media/realistic-chicken/*.png"))
cc_urls = [f"https://mturk.ethanweber.me/media/{filename[filename.find('data/media') + 11:]}" for filename in cc_filenames]
rc_urls = [f"https://mturk.ethanweber.me/media/{filename[filename.find('data/media') + 11:]}" for filename in rc_filenames]

pprint(cc_urls)

# create a HIT
query_examples = []
for i in range(len(cc_urls)):
    query_examples.append({
        "choices": [
            cc_urls[i],
            rc_urls[i],
        ]
    })
hit = hit_maker.get_default_hit()
hit["query_examples"] = query_examples
config_name = "hit-000000"
io_utils.write_to_json(os.path.join("data/hits", config_name + ".json"), hit)

['https://mturk.ethanweber.me/media/cartoon-chicken/cartoon-chicken-000.png',
 'https://mturk.ethanweber.me/media/cartoon-chicken/cartoon-chicken-001.png',
 'https://mturk.ethanweber.me/media/cartoon-chicken/cartoon-chicken-002.png',
 'https://mturk.ethanweber.me/media/cartoon-chicken/cartoon-chicken-003.png',
 'https://mturk.ethanweber.me/media/cartoon-chicken/cartoon-chicken-004.png',
 'https://mturk.ethanweber.me/media/cartoon-chicken/cartoon-chicken-005.png',
 'https://mturk.ethanweber.me/media/cartoon-chicken/cartoon-chicken-006.png',
 'https://mturk.ethanweber.me/media/cartoon-chicken/cartoon-chicken-007.png',
 'https://mturk.ethanweber.me/media/cartoon-chicken/cartoon-chicken-008.png',
 'https://mturk.ethanweber.me/media/cartoon-chicken/cartoon-chicken-009.png']


### Launch some HITs and monitor progress

In [None]:
# use_sandbox = False
# DATABASE_FILENAME = "mturk_database.pkl"
# CREDS_FILENAME = "mturk_creds.json"

# requester = Requester(use_sandbox=use_sandbox,
#                       database_filename=DATABASE_FILENAME,
#                       creds_filename=CREDS_FILENAME)
# requester.show_account_balance()

In [None]:
lifetime_hours = 1
per_hit_minutes = 15
cost_per_hit = '0.5'
days_until_autoapprove = 2
estimated_time_in_minutes = 10
task_attributes = {
    # Number of assignments per task.
    'MaxAssignments': 1,
    # How long the task will be available on MTurk
    'LifetimeInSeconds': 60 * 60 * lifetime_hours,
    # How long Workers have to complete each item
    'AssignmentDurationInSeconds': 60 * per_hit_minutes,
    # The reward you will offer Workers for each response
    'Reward': cost_per_hit,
    'Title': 'Choosing which video looks better. (~{} min)'.format(estimated_time_in_minutes),
    'Keywords': 'video, choice, computer vison, selection',
    'Description': 'Choose which of the two videos looks better.',
    'AutoApprovalDelayInSeconds': days_until_autoapprove * (60*60*24)
}
requester.set_task_attributes(task_attributes)

In [None]:
# existing_external_urls = set(external_urls)

In [None]:
# existing_external_urls

In [None]:
external_urls = []
for filename in sorted(os.listdir(HIT_FOLDER)):
    external_url = os.path.join(ENDPOINT, "interface", filename.replace(".json", ""))
    external_urls.append(external_url)

In [None]:
hit_ids = requester.submit_hits_from_external_urls(external_urls)

In [None]:
requester.update_database_with_responses()

In [None]:
responses = requester.get_responses_from_database()
len(responses)

In [None]:
# minutes per HIT on average
requester.get_mean_time_from_resonses(responses) / 60

In [None]:
requester.save_all_responses_to_files(responses)

In [None]:
# process all the results

responses_path = "/home/ethanweber/nerfiller-user-study/static/data/responses"
responses_filenames = sorted(os.listdir(responses_path))

from collections import defaultdict
baseline_to_choice = defaultdict(list)
baseline_to_dataset_name = defaultdict(list)

for response_filename in responses_filenames:
    response = json.load(open(os.path.join(responses_path, response_filename)))
    num_examples = len(response["QUERY_EXAMPLES_RESPONSES"])
    for example_idx in range(num_examples):
        answer_idx = int(response["QUERY_EXAMPLES_RESPONSES"][example_idx])
        global_config_name = response["GLOBAL_CONFIG_NAME"]
        baseline_name = global_config_name[4:global_config_name.find("-0000")]
        choice = response["GLOBAL_CONFIG"]["QUERY_EXAMPLES"][example_idx]["choices"][answer_idx]
        dataset_idx_s = choice.find("nerfiller_videos_occluder") + len("nerfiller_videos_occluder/")
        dataset_idx_e = choice.find("-", dataset_idx_s)
        dataset_name = choice[dataset_idx_s:dataset_idx_e]
        chosen_method = choice[dataset_idx_e+1:-4]
        baseline_to_choice[baseline_name].append(chosen_method)
        baseline_to_dataset_name[baseline_name].append(dataset_name)
        # print(baseline_name, ":::", dataset_name, ":::", chosen_method)

In [None]:
scene_dataset_names = [
    "billiards",
    # "dumptruck",
    "office",
    "backpack",
    "drawing",
    "norway",
    # "boot",
    # "bear",
    # "cat",
    # "turtle",
]
object_dataset_names = [
    # "billiards",
    "dumptruck",
    # "office",
    # "backpack",
    # "drawing",
    # "norway",
    "boot",
    "bear",
    "cat",
    "turtle",
]
dataset_filter = set(dataset_names)

num = 0
den = 0
baseline_name = "individual-lama-individual-inpaint-once"
for i in range(len(baseline_to_choice[baseline_name])):
    value = baseline_to_choice[baseline_name][i]
    dataset_name = baseline_to_dataset_name[baseline_name][i]
    if dataset_name not in dataset_filter:
        continue
    num += 1 if value == "none-grid-prior-du" else 0
    den += 1
print(baseline_name, num/den)

num = 0
den = 0
baseline_name = "individual-sd-image-individual-inpaint-once"
for i in range(len(baseline_to_choice[baseline_name])):
    value = baseline_to_choice[baseline_name][i]
    dataset_name = baseline_to_dataset_name[baseline_name][i]
    if dataset_name not in dataset_filter:
        continue
    num += 1 if value == "none-grid-prior-du" else 0
    den += 1
print(baseline_name, num/den)

num = 0
den = 0
baseline_name = "none-individual-inpaint-du"
for i in range(len(baseline_to_choice[baseline_name])):
    value = baseline_to_choice[baseline_name][i]
    dataset_name = baseline_to_dataset_name[baseline_name][i]
    if dataset_name not in dataset_filter:
        continue
    num += 1 if value == "none-grid-prior-du" else 0
    den += 1
print(baseline_name, num/den)

In [None]:
# worker_ids = []
# for hit_id in responses.keys():
#     status = requester.client.get_hit(HITId=hit_id)
#     listed_responses = requester.client.list_assignments_for_hit(HITId=hit_id)
#     worker_id = listed_responses["Assignments"][0]["WorkerId"]
#     print()
#     print("worker_id:", worker_id)
#     print()
#     worker_ids.append(worker_id)
#     assert len(responses[hit_id])