# Image project example

Let's walk through a toy example of using `🐥 pollo`!

In [1]:
# Autoreload extension for interactive development
%load_ext autoreload
%autoreload 2

# Standard library imports
import json
import os
from pprint import pprint
from dataclasses import dataclass
import glob
from IPython.core.display import HTML
from IPython.display import display

# Local application/library specific imports
from pollo.utils.hit import HitMaker
import pollo.utils.io as io_utils
from pollo.requester import Requester

# Put media in `data/media`

We created images with DALL-E 2. We created 10 images with each of the text prompts "realistic chicken" and "cartoon chicken" and resized the images to 256x256.

# Create and save HITs

In [2]:
@dataclass
class ImageHitMaker(HitMaker):
    """Image hit maker."""

    expected_seconds_per_query_example: int = 20
    endpoint: str = "http://localhost:40110"
    # endpoint: str = "https://191c-128-32-162-182.ngrok-free.app"

hit_maker = ImageHitMaker()
hit = hit_maker.get_default_hit()
pprint(hit)

{'settings': {'expected_seconds_per_query_example': 20,
              'image_height': 300,
              'image_max_width': 500,
              'min_seconds_per_query_example': 1,
              'task': 'nerfiller_tasks',
              'task_question': 'What image looks better?'}}


In [3]:
instructions_div = """
<p>Instructions: Given two images of chickens, identify which image looks more realistic. Judge based on quality.
<p></p>
<p>Description of the options:</p>
<ul>
    <li><strong>Image 0</strong>: The first option looks better.</li>
    <li><strong>Image 1</strong>: The second option looks better.</li>
</ul>
<p>There are quality checks and failure to be consistent in your responses will lead to the worker not being paid and their data will be excluded from our study.</p>
<h3>Examples that you'll encounter with suggested responses</h3>
<p>You must click the buttons below to see the preferred responses and in order to proceed.</p>
"""

print("Instructions:\n\n")

display(HTML(instructions_div))

print("Example template:\n\n")

Instructions:




Example template:




In [4]:
cc_filenames = sorted(glob.glob("data/media/cartoon-chicken/*.png"))
rc_filenames = sorted(glob.glob("data/media/realistic-chicken/*.png"))
cc_urls = [f"{hit_maker.endpoint}/media/{filename[filename.find('data/media') + 11:]}" for filename in cc_filenames]
rc_urls = [f"{hit_maker.endpoint}/media/{filename[filename.find('data/media') + 11:]}" for filename in rc_filenames]

pprint(cc_urls)

query_examples = []
for i in range(len(cc_urls)):
    query_examples.append({
        "template_name": "ImageExampleTemplate",
        "choices": [
            cc_urls[i],
            rc_urls[i],
        ]
    })
examples = []
for i in range(3):
    examples.append({
        "template_name": "ImageExampleTemplate",
        "choices": [
            cc_urls[i],
            rc_urls[i],
        ],
        "answer": "1",
        "description": "Video 0 is fuzzy in the pink region, which is unrealistic and not desired. Video 1 is sharper and more realistic."
    })
hit = hit_maker.get_default_hit()
hit["settings"]["instructions_div"] = instructions_div
hit["EXAMPLES"] = examples
hit["QUERY_EXAMPLES"] = query_examples
hit["GT_HIDDEN_EXAMPLES"] = []
config_name = "hit-000000"
io_utils.write_to_json(os.path.join("data/hits", config_name + ".json"), hit)

['http://localhost:40110/media/cartoon-chicken/cartoon-chicken-000.png',
 'http://localhost:40110/media/cartoon-chicken/cartoon-chicken-001.png',
 'http://localhost:40110/media/cartoon-chicken/cartoon-chicken-002.png',
 'http://localhost:40110/media/cartoon-chicken/cartoon-chicken-003.png',
 'http://localhost:40110/media/cartoon-chicken/cartoon-chicken-004.png',
 'http://localhost:40110/media/cartoon-chicken/cartoon-chicken-005.png',
 'http://localhost:40110/media/cartoon-chicken/cartoon-chicken-006.png',
 'http://localhost:40110/media/cartoon-chicken/cartoon-chicken-007.png',
 'http://localhost:40110/media/cartoon-chicken/cartoon-chicken-008.png',
 'http://localhost:40110/media/cartoon-chicken/cartoon-chicken-009.png']


# View the HITs

In [5]:
print("Created HIT:")
external_urls = [f"{hit_maker.endpoint}/hits-interface/{config_name}"] * 5
print("\n".join(external_urls))

Created HIT:
http://localhost:40110/hits-interface/hit-000000
http://localhost:40110/hits-interface/hit-000000
http://localhost:40110/hits-interface/hit-000000
http://localhost:40110/hits-interface/hit-000000
http://localhost:40110/hits-interface/hit-000000


### Launch some HITs and monitor progress

In [6]:
use_sandbox = True
DATABASE_FILENAME = "mturk_database.pkl"
CREDS_FILENAME = "mturk_creds.json"

requester = Requester(use_sandbox=use_sandbox,
                      database_filename=DATABASE_FILENAME,
                      creds_filename=CREDS_FILENAME)
requester.show_account_balance()

10000.00


In [7]:
lifetime_hours = 1
per_hit_minutes = 15
cost_per_hit = '0.5'
days_until_autoapprove = 2
estimated_time_in_minutes = 10
task_attributes = {
    # Number of assignments per task.
    'MaxAssignments': 1,
    # How long the task will be available on MTurk
    'LifetimeInSeconds': 60 * 60 * lifetime_hours,
    # How long Workers have to complete each item
    'AssignmentDurationInSeconds': 60 * per_hit_minutes,
    # The reward you will offer Workers for each response
    'Reward': cost_per_hit,
    'Title': 'Choosing which video looks better. (~{} min)'.format(estimated_time_in_minutes),
    'Keywords': 'video, choice, computer vison, selection',
    'Description': 'Choose which of the two videos looks better.',
    'AutoApprovalDelayInSeconds': days_until_autoapprove * (60*60*24)
}
requester.set_task_attributes(task_attributes)

In [8]:
hit_ids = requester.submit_hits_from_external_urls(external_urls)

  0%|          | 0/5 [00:00<?, ?it/s]


ClientError: An error occurred (ParameterValidationError) when calling the CreateHIT operation: The value "http://localhost:40110/hits-interface/hit-000000" is not valid for the parameter ExternalURL. This URL must use the HTTPS protocol.. (1706730928798 s)

In [None]:
requester.update_database_with_responses()

In [None]:
responses = requester.get_responses_from_database()
len(responses)

In [None]:
requester.save_all_responses_to_files(responses)