-
Notifications
You must be signed in to change notification settings - Fork 46
/
data_manager.py
276 lines (231 loc) · 8.11 KB
/
data_manager.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
""" # Data Manager module for the Label Studio SDK
Classes can be used to filter, order, and select items in `label_studio_sdk.project.Project.get_tasks`
and provides enumeration for all column names available in the Data Manager for tasks, and other helpers.
See the [client](client.html), [project](project.html) or [utils](utils.html) modules for other operations you
might want to perform.
Example:
```python
from label_studio_sdk.data_manager import Filters, Column, Operator, Type
filters = Filters.create(Filters.OR, [
Filters.item(
Column.id,
Operator.GREATER,
Type.Number,
Filters.value(42)
),
Filters.item(
Column.completed_at,
Operator.IN,
Type.Datetime,
Filters.value(
datetime(2021, 11, 1),
datetime.now()
)
)
])
tasks = project.get_tasks(filters=filters)
```
"""
from datetime import datetime
DATETIME_FORMAT = "%Y-%m-%dT%H:%M:%S.%fZ"
class Filters:
"""
Use the methods and variables in this class to create and combine filters for tasks on the Label Studio Data Manager.
"""
OR = "or"
"""Combine filters with an OR"""
AND = "and"
"""Combine filters with an AND"""
@staticmethod
def create(conjunction, items):
"""Create a filter for `label_studio_sdk.project.Project.get_tasks()`
Parameters
----------
conjunction: str
The conjunction operator between filters ('or' or 'and')
items: list
What to filter, use `Filter.item()` method to build it
Returns
-------
dict
containing specified parameters
"""
return {"conjunction": conjunction, "items": items}
@staticmethod
def item(name, operator, column_type, value):
"""Use in combination with other classes to specify the contents of a filter.
Parameters
----------
name: `Column` or str
Column.id, Column.completed_at, Column.data('my_field'), etc
operator: `Operator`
Operator.EQUAL, Operator.GREATER_OR_EQUAL, Operator.IN, etc
column_type: `Type`
Type.Number, Type.Boolean, Type.String, etc
value: `Filters.value()`
Filters.value(42), Filters.value('test'), Filters.value(datetime(2021, 01, 01), datetime.now())
Returns
-------
dict
"""
return {
"filter": "filter:" + name,
"operator": operator,
"type": column_type,
"value": value,
}
@staticmethod
def datetime(dt):
"""Date time string format for filtering the Data Manager.
Parameters
----------
dt
datetime instance
Returns
-------
str
datetime in `'%Y-%m-%dT%H:%M:%S.%fZ'` format
"""
assert isinstance(dt, datetime), "dt must be datetime type"
return dt.strftime(DATETIME_FORMAT)
@classmethod
def value(cls, value, maximum=None):
"""Set a filter value in the Data Manager.
Parameters
----------
value: str | int | float | datetime | boolean
value to use for filtering. If the maximum parameter is passed, then this value field is the minimum.
maximum: int | float | datetime
Specify a maximum for a filtering range with IN, NOT_IN operators.
Returns
-------
any
value for filtering
"""
if isinstance(value, datetime):
value = cls.datetime(value)
if maximum is not None:
if isinstance(maximum, datetime):
maximum = cls.datetime(maximum)
return {"min": value, "max": maximum}
return value
class Operator:
"""Specify the operator to use when creating a filter."""
EQUAL = "equal"
NOT_EQUAL = "not_equal"
LESS = "less"
GREATER = "greater"
LESS_OR_EQUAL = "less_or_equal"
GREATER_OR_EQUAL = "greater_or_equal"
IN = "in"
NOT_IN = "not_in"
IN_LIST = "in_list"
NOT_IN_LIST = "not_in_list"
EMPTY = "empty"
CONTAINS = "contains"
NOT_CONTAINS = "not_contains"
REGEX = "regex"
class Type:
"""Specify the type of data in a column."""
Number = "Number"
Datetime = "Datetime"
Boolean = "Boolean"
String = "String"
List = "List"
Unknown = "Unknown"
""" Unknown is explicitly converted to string format. """
class Column:
"""Specify the column on the Data Manager in Label Studio UI to use in the filter."""
id = "tasks:id"
"""Task ID"""
inner_id = "tasks:inner_id"
"""Task Inner ID, it starts from 1 for all projects"""
ground_truth = "tasks:ground_truth"
"""Ground truth status of the tasks"""
annotations_results = "tasks:annotations_results"
"""Annotation results for the tasks"""
reviewed = "tasks:reviewed"
"""Whether the tasks have been reviewed (Enterprise only)"""
predictions_score = "tasks:predictions_score"
"""Prediction score for the task"""
predictions_model_versions = "tasks:predictions_model_versions"
"""Model version used for the predictions"""
predictions_results = "tasks:predictions_results"
"""Prediction results for the tasks"""
file_upload = "tasks:file_upload"
"""Name of the file uploaded to create the tasks"""
created_at = "tasks:created_at"
"""Time the task was created at"""
updated_at = "tasks:updated_at"
"""Time the task was updated at (e.g. new annotation was created, review added, etc)"""
annotators = "tasks:annotators"
"""Annotators that completed the task (Community). Can include assigned annotators (Enterprise only)"""
total_predictions = "tasks:total_predictions"
"""Total number of predictions for the task"""
cancelled_annotations = "tasks:cancelled_annotations"
"""Number of cancelled or skipped annotations for the task"""
total_annotations = "tasks:total_annotations"
"""Total number of annotations on a task"""
completed_at = "tasks:completed_at"
"""Time when a task was fully annotated"""
agreement = "tasks:agreement"
"""Agreement for annotation results for a specific task (Enterprise only)"""
reviewers = "tasks:reviewers"
"""Reviewers that reviewed the task, or assigned reviewers (Enterprise only)"""
reviews_rejected = "tasks:reviews_rejected"
"""Number of annotations rejected for a task in review (Enterprise only)"""
reviews_accepted = "tasks:reviews_accepted"
"""Number of annotations accepted for a task in review (Enterprise only)"""
comments = "tasks:comments"
"""Number of comments in a task"""
unresolved_comment_count = "tasks:unresolved_comment_count"
"""Number of unresolved comments in a task"""
@staticmethod
def data(task_field):
"""Create a filter name for the task data field
Parameters
----------
task_field
Returns
-------
str
Filter name for task data
"""
return "tasks:data." + task_field
def _test():
"""Test it"""
filters = Filters.create(
Filters.OR,
[
Filters.item(Column.id, Operator.GREATER, Type.Number, Filters.value(42)),
Filters.item(
Column.completed_at,
Operator.IN,
Type.Datetime,
Filters.value(
datetime(2021, 11, 1),
datetime(2021, 11, 5),
),
),
],
)
assert filters == {
"conjunction": "or",
"items": [
{
"filter": "filter:tasks:id",
"operator": "greater",
"type": "Number",
"value": 42,
},
{
"filter": "filter:tasks:completed_at",
"operator": "in",
"type": "Datetime",
"value": {
"min": "2021-11-01T00:00:00.000000Z",
"max": "2021-11-05T00:00:00.000000Z",
},
},
],
}