-
Notifications
You must be signed in to change notification settings - Fork 58
/
utils.py
108 lines (94 loc) · 3.99 KB
/
utils.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
""" .. include::../docs/utils.md
"""
import logging
from lxml import etree
from collections import defaultdict
logger = logging.getLogger(__name__)
_LABEL_TAGS = {'Label', 'Choice'}
_NOT_CONTROL_TAGS = {'Filter', }
def parse_config(config_string):
"""Parse a given Label Studio labeling configuration and return a structured version of the configuration.
Useful for formatting results for predicted annotations and determining the type(s) of ML models that might
be relevant to the labeling project.
Parameters
----------
config_string: str
Label configuration XML as a string
Returns
-------
dict
structured config with the form:
```json
{
"<ControlTag>.name": {
"type": "ControlTag",
"to_name": ["<ObjectTag1>.name", "<ObjectTag2>.name"],
"inputs: [
{"type": "ObjectTag1", "value": "<ObjectTag1>.value"},
{"type": "ObjectTag2", "value": "<ObjectTag2>.value"}
],
"labels": ["Label1", "Label2", "Label3"] // taken from "alias" if it exists, else "value"
}
```
"""
"""
:param config_string:
"""
if not config_string:
return {}
def _is_input_tag(tag):
return tag.attrib.get('name') and tag.attrib.get('value')
def _is_output_tag(tag):
return tag.attrib.get('name') and tag.attrib.get('toName') and tag.tag not in _NOT_CONTROL_TAGS
def _get_parent_output_tag_name(tag, outputs):
# Find parental <Choices> tag for nested tags like <Choices><View><View><Choice>...
parent = tag
while True:
parent = parent.getparent()
if parent is None:
return
name = parent.attrib.get('name')
if name in outputs:
return name
xml_tree = etree.fromstring(config_string)
inputs, outputs, labels = {}, {}, defaultdict(dict)
for tag in xml_tree.iter():
if _is_output_tag(tag):
tag_info = {'type': tag.tag, 'to_name': tag.attrib['toName'].split(',')}
# Grab conditionals if any
conditionals = {}
if tag.attrib.get('perRegion') == 'true':
if tag.attrib.get('whenTagName'):
conditionals = {'type': 'tag', 'name': tag.attrib['whenTagName']}
elif tag.attrib.get('whenLabelValue'):
conditionals = {'type': 'label', 'name': tag.attrib['whenLabelValue']}
elif tag.attrib.get('whenChoiceValue'):
conditionals = {'type': 'choice', 'name': tag.attrib['whenChoiceValue']}
if conditionals:
tag_info['conditionals'] = conditionals
outputs[tag.attrib['name']] = tag_info
elif _is_input_tag(tag):
inputs[tag.attrib['name']] = {'type': tag.tag, 'value': tag.attrib['value'].lstrip('$')}
if tag.tag not in _LABEL_TAGS:
continue
parent_name = _get_parent_output_tag_name(tag, outputs)
if parent_name is not None:
actual_value = tag.attrib.get('alias') or tag.attrib.get('value')
if not actual_value:
logger.debug(
'Inspecting tag {tag_name}... found no "value" or "alias" attributes.'.format(
tag_name=etree.tostring(tag, encoding='unicode').strip()[:50]))
else:
labels[parent_name][actual_value] = dict(tag.attrib)
for output_tag, tag_info in outputs.items():
tag_info['inputs'] = []
for input_tag_name in tag_info['to_name']:
if input_tag_name not in inputs:
logger.warning(
f'to_name={input_tag_name} is specified for output tag name={output_tag}, '
'but we can\'t find it among input tags')
continue
tag_info['inputs'].append(inputs[input_tag_name])
tag_info['labels'] = list(labels[output_tag])
tag_info['labels_attrs'] = labels[output_tag]
return outputs