Skip to content

Commit

Permalink
1.2.2.1 supports displaying label suggestions
Browse files Browse the repository at this point in the history
  • Loading branch information
Jiaxin-Pei committed Mar 10, 2024
1 parent b71ed96 commit 1986f56
Show file tree
Hide file tree
Showing 10 changed files with 1,238 additions and 2 deletions.
Binary file added docs/img/label_suggestions.png
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
64 changes: 64 additions & 0 deletions docs/productivity.md
Original file line number Diff line number Diff line change
Expand Up @@ -188,3 +188,67 @@ for this function
"users": []
},
```

## Label suggestions
Starting from 1.2.2.1, Potato supports displaying suggestions to improve the productivity of annotators. Currently we
support two types of label suggestions: `prefill` and `highlight`. `prefill` will automatically
pre-select the labels or prefill the text inputs for the annotators while `highlight` will only
highlight the text of the labels. `highlight` can only be used for `multiselect` and `radio`.
`prefill` can also be used with textboxes.

There are two steps to set up label suggestions for your annotation tasks:

### Step 1: modify your configuration file
Labels suggestions are defined for each scheme. In your configuration file, you can simply add
a field named `label_suggestions` to specific annotation schemes. You can use different suggestion
types for different schemes.
``` yaml
{
"annotation_type": "multiselect",
"name": "sentiment",
"description": "What kind of sentiment does the given text hold?",
"labels": [
"positive", "neutral", "negative",
],

# If true, numbers [1-len(labels)] will be bound to each
# label. Aannotations with more than 10 are not supported with this
# simple keybinding and will need to use the full item specification
# to bind all labels to keys.
"sequential_key_binding": True,

#how to display the suggestions, currently support:
# "highlight": highlight the suggested labels with color
# "pre-select": directly prefill the suggested labels or content
# otherwise this feature is turned off
"label_suggestions":"highlight"
},
{
"annotation_type": "text",
"name": "explanation",
"description": "Why do you think so?",
# if you want to use multi-line textbox, turn on the text area and set the desired rows and cols of the textbox
"textarea": {
"on": True,
"rows": 2,
"cols": 40
},
#how to display the suggestions, currently support:
# "highlight": highlight the suggested labels with color
# "pre-select": directly prefill the suggested labels or content
# otherwise this feature is turned off
"label_suggestions": "prefill"
},
```

### Step 2: prepare your data
For each line of your input data, you can add a field named `label_suggestions`. `label_suggestions` defines
a mapping from the scheme name to labels. For example:
``` yaml
{"id":"1","text":"Good Job!","label_suggestions": {"sentiment": "positive", "explanation": "Because I think "}}
{"id":"2","text":"Great work!","label_suggestions": {"sentiment": "positive", "explanation": "Because I think "}}
```

You can check out our [example project](https://github.com/davidjurgens/potato/tree/master/project-hub/label_suggestions) in project hub regarding how to set up label suggestions

![Alt text](img/label_suggestions.png)
48 changes: 46 additions & 2 deletions potato/flask_server.py
Original file line number Diff line number Diff line change
Expand Up @@ -2193,8 +2193,32 @@ def annotate_page(username=None, action=None):
#
# NOTE: this code is probably going to break the span annotation's
# understanding of the instance. Need to check this...
updated_text, schema_labels_to_highlight = text, set()
if "keyword_highlights_file" in config:
updated_text, schema_labels_to_highlight, schema_content_to_prefill = text, set(), []

#prepare label suggestions
suggestions = instance['label_suggestions']
for scheme in config['annotation_schemes']:
if scheme['name'] not in suggestions:
continue
suggested_labels = suggestions[scheme['name']]
if type(suggested_labels) == str:
suggested_labels = [suggested_labels]
elif type(suggested_labels) == list:
suggested_labels = suggested_labels
else:
print("WARNING: Unsupported suggested label type %s, please check your input data" % type(s))
continue

if scheme.get('label_suggestions') == 'highlight':
for s in suggested_labels:
schema_labels_to_highlight.add((scheme['name'], s))
elif scheme.get('label_suggestions') == 'prefill':
for s in suggested_labels:
schema_content_to_prefill.append({'name':scheme['name'], 'label':s})
else:
print('WARNING: the style of suggested labels is not defined, please check your configuration file.')

if "keyword_highlights_file" in config and len(schema_labels_to_highlight) == 0:
updated_text, schema_labels_to_highlight = post_process(config, text)

# Fill in the kwargs that the user wanted us to include when rendering the page
Expand Down Expand Up @@ -2261,10 +2285,30 @@ def annotate_page(username=None, action=None):
if label_elem:
label_elem["style"] = "background-color: %s" % c


# If the user has annotated this before, walk the DOM and fill out what they
# did
annotations = get_annotations_for_user_on(username, instance_id)

# convert the label suggestions into annotations for front-end rendering
if annotations == None and schema_content_to_prefill:
scheme_dict = {}
annotations = defaultdict(dict)
for it in config['annotation_schemes']:
if it['annotation_type'] in ['radio', 'multiselect']:
it['label2value'] = {(l if type(l) == str else l['name']):str(i+1) for i,l in enumerate(it['labels'])}
scheme_dict[it['name']] = it
for s in schema_content_to_prefill:
if scheme_dict[s['name']]['annotation_type'] in ['radio', 'multiselect']:
annotations[s['name']][s['label']] = scheme_dict[s['name']]['label2value'][s['label']]
elif scheme_dict[s['name']]['annotation_type'] in ['text']:
if "labels" not in scheme_dict[s['name']]:
annotations[s['name']]['text_box'] = s['label']
else:
print('WARNING: label suggestions not supported for annotation_type %s, please submit a github issue to get support'%scheme_dict[s['name']]['annotation_type'])
#print(schema_content_to_prefill, annotations)


if annotations is not None:
# Reset the state
for schema, labels in annotations.items():
Expand Down
2 changes: 2 additions & 0 deletions potato/server_utils/cli_utlis.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,8 @@
'politeness_rating': 'https://github.com/davidjurgens/potato/raw/master/project-hub/politeness_rating.zip',
'offensiveness': 'https://github.com/davidjurgens/potato/raw/master/project-hub/offensiveness.zip',
'text_rewriting': 'https://github.com/davidjurgens/potato/raw/master/project-hub/text_rewriting.zip',
'prolific_api_example': 'https://github.com/davidjurgens/potato/raw/master/project-hub/prolific_api_example.zip',
'label_suggestions': 'https://github.com/davidjurgens/potato/raw/master/project-hub/label_suggestions.zip',
}

# get a speicific project from the hub
Expand Down
9 changes: 9 additions & 0 deletions potato/server_utils/prolific_apis.py
Original file line number Diff line number Diff line change
Expand Up @@ -232,6 +232,15 @@ def add_new_user(self, user):
self.session_status_dict[status].append(user['SESSION_ID'])

'''
prolific = ProlificStudy(token = 'yRB91_ngkHclqd36bhXCGWwl5fqU4iVlXX-2i61cfNoh7Tpvh4tH8R6IAxEBsYrkMnyc4X8tEpmmJhHXiHiRkFZYIm_Jr-pCoXFqyrIHX30qUuT5RMcIc7rG',
study_id='651ca114a0a3dc560dd00c2a', saving_dir='../')
prolific.list_all_studies()
start_time = time.time()
print(prolific.update_submission_status())
end_time = time.time()
execution_time = end_time - start_time
print(execution_time)
prolific = ProlificStudy(token = 'yRB91_ngkHclqd36bhXCGWwl5fqU4iVlXX-2i61cfNoh7Tpvh4tH8R6IAxEBsYrkMnyc4X8tEpmmJhHXiHiRkFZYIm_Jr-pCoXFqyrIHX30qUuT5RMcIc7rG',
study_id='6498cf2053b6c5b98075f52c', saving_dir='../')
Expand Down
Binary file added project-hub/label_suggestions.zip
Binary file not shown.
110 changes: 110 additions & 0 deletions project-hub/label_suggestions/configs/sentiment-analysis-preload.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,110 @@
{
"port": 9001,

"server_name": "potato annotator",

"annotation_task_name": "Sentiment Analysis",

# Potato will write the annotation file for all annotations to this
# directory, as well as per-annotator output files and state information
# necessary to restart annotation.
"output_annotation_dir": "annotation_output/",

# The output format for the all-annotator data. Allowed formats are:
# * jsonl
# * json (same output as jsonl)
# * csv
# * tsv
#
"output_annotation_format": "tsv",

# If annotators are using a codebook, this will be linked at the top to the
# instance for easy access
"annotation_codebook_url": "",

"data_files": [
"data_files/toy-example-with-suggested-labels.json"
],

"item_properties": {
"id_key": "id",
"text_key": "text",
"context_key": "context"
},


"user_config": {

"allow_all_users": False,

"authorized_users": ["pedropei@umich.edu"],
},

# How many seconds do you want the annotators spend on each instance, after
# that, an alert will be sent per alert_time_each_instance seconds.
"alert_time_each_instance": 10000000,


"annotation_schemes": [
{
"annotation_type": "multiselect",
"name": "sentiment",
"description": "What kind of sentiment does the given text hold?",
"labels": [
"positive", "neutral", "negative",
],

# If true, numbers [1-len(labels)] will be bound to each
# label. Aannotations with more than 10 are not supported with this
# simple keybinding and will need to use the full item specification
# to bind all labels to keys.
"sequential_key_binding": True,

#how to display the suggestions, currently support:
# "highlight": highlight the suggested labels with color
# "pre-select": directly prefill the suggested labels or content
# otherwise this feature is turned off
"label_suggestions":"highlight"
},
{
"annotation_type": "text",
"name": "explanation",
"description": "Why do you think so?",
# if you want to use multi-line textbox, turn on the text area and set the desired rows and cols of the textbox
"textarea": {
"on": True,
"rows": 2,
"cols": 40
},
#how to display the suggestions, currently support:
# "highlight": highlight the suggested labels with color
# "pre-select": directly prefill the suggested labels or content
# otherwise this feature is turned off
"label_suggestions": "prefill"
},
],

# The html that changes the visualiztation for your task. Change this file
# to influence the layout and description of your task. This is not a full
# HTML page, just the piece that does lays out your task's pieces
# you may use templates in our lib, if you want to use your own template,
# please replace the string as a path to the template
"html_layout": "default",
"surveyflow_html_layout": "fixed_keybinding",

# The core UI files for Potato. You should not need to change these normally.
#
# Exceptions to this might include:
# 1) You want to add custom CSS/fonts to style your task
# 2) Your layout requires additional JS/assets to render
# 3) You want to support additional keybinding magic
#
# if you want to use your own template,
# please replace the string as a path to the template
"base_html_template": "default",
"header_file": "default",

# This is where the actual HTML files will be generated
"site_dir": "default"

}
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
{"id":"1","text":"<div name=\"instance_text\" data-toggle=\"tooltip\" data-html=\"true\" data-placement=\"top\" data-original-title=\"test tool tip\">Tom: Isn't this awesome?!</div>","label_suggestions": {"sentiment": "positive", "explanation": "Because I think "}}
{"id":"2","text":"<div name=\"instance_text\" data-toggle=\"tooltip\" data-html=\"true\" data-placement=\"top\" data-original-title=\"test tool tip\">Tom: Isn't this awesome?!</div>","label_suggestions": {"sentiment": "negative"}}
{"id":"3","text":"<div name=\"instance_text\" data-toggle=\"tooltip\" data-html=\"true\" data-placement=\"top\" data-original-title=\"test tool tip\">Tom: Isn't this awesome?!</div>","label_suggestions": {"sentiment": "positive"}}
Loading

0 comments on commit 1986f56

Please sign in to comment.