/
cropobject.py
1619 lines (1330 loc) · 60.5 KB
/
cropobject.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
# -*- coding: utf-8 -*-
"""This module implements a Python representation of the CropObject,
the basic unit of annotation. See the :class:`CropObject` documentation."""
from __future__ import print_function, unicode_literals, division
from builtins import zip
from builtins import map
from builtins import str
from builtins import range
from builtins import object
import copy
import itertools
import logging
from typing import Any, Optional, List, Union, Tuple
import numpy
from muscima.utils import compute_connected_components
__version__ = "1.0"
__author__ = "Jan Hajic jr."
CROPOBJECT_MASK_ORDER = 'C'
#: The CropObject mask uses this numpy ordering when flattening the data.
##############################################################################
class CropObject(object):
"""One annotated object.
The CropObject represents one instance of an annotation. It implements
the following attributes:
* ``objid``: the unique number of the given annotation instance in the set
of annotations encoded in the containing `CropObjectList`.
* ``uid``: the global unique identifier of the annotation instance. String.
See :meth:`CropObject.parse_uid` method for format details.
* ``clsname``: the name of the label that was given to the annotation
(this is the human-readable string such as ``notehead-full``).
* ``top``: the vertical dimension (row) of the upper left corner pixel.
* ``left``: the horizontal dimension (column) of the upper left corner pixel.
* ``bottom``: the vertical dimension (row) of the lower right corner pixel + 1,
so that you can index the corresponding image rows using
``img[c.top:c.bottom]``.
* ``right``: the horizontal dimension (row) of the lower right corner pixel + 1,
so that you can index the corresponding image columns using
``img[:, c.left:c.right]``.
* ``width``: the amount of rows that the CropObject spans.
* ``height``: the amount of columns that the CropObject spans.
* ``mask``: a binary (0/1) numpy array that denotes the area within the
CropObject's bounding box (specified by ``top``, ``left``, ``height``
and ``width``) that the CropObject actually occupies. If the mask is
``None``, the object is understood to occupy the entire bounding box.
* ``data``: a dictionary that can be empty, or can contain anything. It is
generated from the optional ``<Data>`` element of a CropObject.
Constructing a simple CropObject that consists of the "b"-like flat music
notation symbol (never mind the ``uid`` for now):
>>> top = 10
>>> left = 15
>>> height = 10
>>> width = 4
>>> mask = numpy.array([[1, 1, 0, 0],
... [1, 0, 0, 0],
... [1, 0, 0, 0],
... [1, 0, 0, 0],
... [1, 0, 1, 1],
... [1, 1, 1, 1],
... [1, 0, 0, 1],
... [1, 0, 1, 1],
... [1, 1, 1, 0],
... [0, 1, 0, 0]])
>>> clsname = 'flat'
>>> uid = 'MUSCIMA++_1.0___muscima.cropobject.CropObject.doctest___0'
>>> c = CropObject(objid=0, clsname=clsname,
... top=top, left=left, height=height, width=width,
... inlinks=[], outlinks=[],
... mask=mask,
... uid=uid)
CropObjects can also form graphs, using the following attributes:
* ``outlinks``: Outgoing edges. A list of integers; it is assumed they are
valid ``objid`` within the same global/doc namespace.
* ``inlinks``: Incoming edges. A list of integers; it is assumed they are
valid ``objid`` within the same global/doc namespace.
So far, CropObject graphs do not support multiple relationship types.
**Unique identification**
The ``uid`` serves to identify the CropObject uniquely,
at least within the MUSCIMA dataset system. (We anticipate further
versions of the dataset, and need to plan for that.)
To uniquely identify a CropObject, we need three "levels":
* The "global", **dataset-level identification**: which dataset is this
CropObject coming from? (For this dataset: ``MUSCIMA++_1.0``)
* The "local", **document-level identification**: which document
(within the given dataset) is this CropObject coming from?
For MUSCIMA++ 1.0, this will usually be a string like
``CVC-MUSCIMA_W-35_N-08_D-ideal``, derived from the filename
under which the CropObjectList containing the given CropObject
is stored.
* The **within-document identification**, which is identical
to the ``objid``.
These three components are joined together into one string by
a delimiter: ``___``
The full ``uid`` of a CropObject then might look like this::
MUSCIMA-pp_1.0___CVC-MUSCIMA_W-35_N-08_D-ideal___611
You will need to use UIDs whenever you are combining CropObjects
from different documents, and/or datasets. (If you are really combining
datasets, make sure you know what you are doing -- some annotation
instructions may change between versions, so objects of the same class
might not exactly correspond to each other...) The dataset and document
names are available through appropriate instance attributes:
>>> c.doc
'muscima.cropobject.CropObject.doctest'
>>> c.dataset
'MUSCIMA++_1.0'
If you supply no ``uid`` at initialization time, a default UID will
be used:
>>> c.default_uid
'MUSCIMA_DEFAULT_DATASET_PLACEHOLDER___default-document___0'
(Don't abuse the default, though! It's intended just for transitioning
documents without UIDs to those that have them.)
On the other hand, the ``objid`` is a field intended to uniquely identify
a CropObject within the scope of one CropObject list (one annotation
document).
.. caution::
The scope of unique identification within MUSCIMA++ is only within
a ``<CropObjectList>``. Don't use ``objid`` to mix CropObjects from
multiple files!
**CropObjects and images**
CropObjects and images are not tightly bound. This is because the same
object can apply to multiple images: in the case of the CVC-MUSCIMA dataset,
for example, the same CropObjects are present both in the full image
and in the staff-less image. The limitation here is that CropObjects
are based on exact pixels, so in order to retain validity, the images
must correspond to each other exactly, as "layers".
Because CropObjects do not correspond to any given image, there is
no facility in the data format to link them to a specific one. You have to
take care of matching CropObject annotations to the right images by yourself.
The ``CropObject`` class implements some interactions with images.
To recover the area corresponding to a CropObject `c`, use:
>>> if c.mask is not None: crop = img[c.top:c.bottom, c.left:c.right] * c.mask #doctest: +SKIP
>>> if c.mask is None: crop = img[c.top:c.bottom, c.left:c.right] #doctest: +SKIP
Because this is clunky, we have implemented the following to get the crop:
>>> crop = c.project_to(img) #doctest: +SKIP
And to get the CropObject projected onto the entire image:
>>> crop = c.project_on(img) #doctest: +SKIP
Above, note the multiplicative role of the mask: while we typically would
expect the mask to be binary, in principle, this is not strictly necessary.
You could supply a different mask interpration, such as probabilistic.
However, we strongly advise not to misuse this feature unless you have
a really good reason; remember that the CropObject is supposed to represent
an annotation of a given image. (One possible use for a non-binary mask
that we can envision is aggregating multiple annotations of the same
image.)
For visualization, there is a more sophisticated method that renders
the CropObject as a transparent colored transparent rectangle over
an RGB image. (NOTE: this really changes the input image!)
>>> c_obj.render(img) #doctest: +SKIP
>>> plt.imshow(img); plt.show() #doctest: +SKIP
However, `CropObject.render()` currently does not support rendering
the mask.
**Disambiguating class names**
Since the class names are present
through the ``clsname`` attribute (``<MLClassName>`` element),
matching the list is no longer necessary for general understanding
of the file. The MLClassList file serves as a disambiguation tool:
there may be multiple annotation projects that use the same names
but maybe define them differently and use different guidelines,
and their respective MLClassLists allow you to interpret the symbol
names correctly, in light of the corresponding set of definitions.
.. note::
In MUSCIMarker, the MLClassList is currently necessary to define
how CropObjects are displayed: their color. (All noteheads are red,
all barlines are green, etc.) The other function, matching names
to ``clsid``, has been superseeded by the ``clsname`` CropObject
attribute.
**Merging CropObjects**
To merge a list of CropObjects into a new one, you need to:
* Compute the new object's bounding box: ``croobjects_merge_bbox()``
* Compute the new object's mask: ``cropobjects_merge_mask()``
* Determine the clsid and objid of the new object.
Since objid and clsid of merges may depend on external settings
and generally cannot be reliably determined from the merged
objects themselves (e.g. the merge of a notehead and a stem
should be a new note symbol), you need to supply them externally.
However, the bounding box and mask can be determined. The bounding
box is computed simply as the smallest bounding box that
encompasses all the CropObjects, and the mask is an OR operation
over the individual masks (or None, if the CropObjects don't
have masks). Note that the merge cannot deal with a situation
where only some of the objects have a mask.
**Implementation notes on the mask**
The mask is a numpy array that will be saved using run-length encoding.
The numpy array is first flattened, then runs of successive 0's and 1's
are encoded as e.g. ``0:10`` for a run of 10 zeros.
How much space does this take?
Objects tend to be relatively convex, so after flattening, we can expect
more or less two runs per row (flattening is done in ``C`` order). Because
each run takes (approximately) 5 characters, each mask takes roughly ``5 * n_rows``
bytes to encode. This makes it efficient for objects wider than 5 pixels, with
a compression ratio approximately ``n_cols / 5``.
(Also, the numpy array needs to be made C-contiguous for that, which
explains the ``order='C'`` hack in ``set_mask()``.)
"""
def __init__(self,
objid, # type: int
clsname, # type: str
top, # type: int
left, # type: int
width, # type: int
height, # type: int
outlinks=None, # type: Optional[List[int]]
inlinks=None, # type: Optional[List[int]]
mask=None, # type: numpy.ndarray
uid=None, # type: str
data=None
):
# logging.debug('Initializing CropObject with objid {0}, uid {5}, x={1},'
# ' y={2}, h={3}, w={4}'
# ''.format(objid, top, left, height, width, uid))
self.objid = objid
self.clsname = clsname
self.x = top
self.y = left
self.width = width
self.height = height
self.to_integer_bounds()
# The mask presupposes integer bounds.
# Applied relative to CropObject bounds, not the whole image.
self.mask = None
self.set_mask(mask)
if inlinks is None:
inlinks = []
self.inlinks = inlinks
if outlinks is None:
outlinks = []
self.outlinks = outlinks
# Deal with the UID
if uid is None:
uid = self.default_uid
self.set_uid(uid)
self.is_selected = False
# logging.debug('...done!')
if data is None:
data = dict()
self.data = data
##########################################################################
# Dealing with unique identification of a CropObject, also across
# anticipated dataset versions.
UID_DELIMITER = '___'
#: Delimits the CropObject UID fields (global, document namespaces, objid)
UID_DEFAULT_DATASET_NAMESPACE = 'MUSCIMA_DEFAULT_DATASET_PLACEHOLDER'
#: Default dataset name for CropObjects.
UID_DEFAULT_DOCUMENT_NAMESPACE = 'default-document'
#: Default document name for CropObjects.
@property
def default_uid(self):
# type: () -> str
"""Constructs the default ``uid`` that the CropObject would
have, unless one was supplied at initialization.
>>> c.default_uid # doctest: +SKIP
'MUSCIMA_DEFAULT_DATASET_PLACEHOLDER___default-document___0'
"""
return self.UID_DELIMITER.join([self.UID_DEFAULT_DATASET_NAMESPACE,
self.UID_DEFAULT_DOCUMENT_NAMESPACE,
str(self.objid)])
def parse_uid(self):
# type: () -> (str, str, int)
"""Parse the unique identifier of the CropObject. This
breaks down the UID into the global namespace, document
namespace (ie. CropObjectList name -- usually per image),
and the numeric ID of the CropObject within one CropObjectList.
This numeric ID should always match the ``objid``, which
acts as the "technical" identifier, since it is known to be
an integer and therefore usable for e.g. indexing within
the MUSCIMarker annotation app.
See :meth:`_parse_uid` for format & test. Compared
to :meth:`_parse_uid`, this method checks the parsed ``object_id``
in the ``uid`` against this CropObject's ``objid``,
to verify that the UID is really valid for this object.
The delimiter is expected to be ``___``
(kept as ``CropObject.UID_DELIMITER``)
"""
global_name, document_name, object_id = self._parse_uid(self.uid)
# Dealing with missing uid
if object_id is None:
object_id = self.objid
if object_id != self.objid:
raise ValueError('Got CropObject with different numeric ID'
' in UID and technical objid. UID record:'
' {0}, objid: {1}'.format(object_id, self.objid))
return global_name, document_name, object_id
@staticmethod
def _parse_uid(uid):
# type: (Optional[str]) -> (str, str, int)
"""Parse the unique identifier of the CropObject. This
breaks down the UID into the global namespace, document
namespace (ie. CropObjectList name -- usually per image),
and the numeric ID of the CropObject within one CropObjectList.
The delimiter is expected to be ``___``
(kept as ``CropObject.UID_DELIMITER``)
>>> CropObject._parse_uid('MUSCIMA++_1.0___CVC-MUSCIMA_W-05_N-19_D-ideal___424')
('MUSCIMA++_1.0', 'CVC-MUSCIMA_W-05_N-19_D-ideal', 424)
:returns: ``global_namespace, document_namespace, objid`` triplet.
The namespaces are strings, ``objid`` is an integer. If ``uid``
is ``None``, returns ``None`` as ``objid`` and expects it
to be filled in from the caller CropObject instance.
"""
if uid is None:
global_name = CropObject.UID_DEFAULT_DATASET_NAMESPACE
document_name = CropObject.UID_DEFAULT_DOCUMENT_NAMESPACE
object_id = None
else:
global_name, document_name, numid_str = uid.split(CropObject.UID_DELIMITER)
object_id = int(numid_str)
return global_name, document_name, object_id
@staticmethod
def build_uid(global_name, document_name, numid):
# type: (Any, Any, Any) -> str
return CropObject.UID_DELIMITER.join([str(global_name),
str(document_name),
str(numid)])
def set_uid(self, uid):
# type: (str) -> None
"""Assigns the given ``uid`` to the CropObject. This is the way
to do it, do not assign directly to ``cropobject.uid``! You need
to update other things (and perform integrity checks) when changing
the unique ID! See :class:`CropObject` class documentation for
information on how ``uid`` attributes work.
Do **NOT** use this function, unless you know what you are doing!
You could mess up the integrity of your copy of the dataset, and
you'd have to download it again...
"""
self.uid = uid
self._dataset_namespace, self._document_namespace, self._instance = \
self.parse_uid()
def set_doc(self, docname):
# type: (str) -> None
new_uid = self.UID_DELIMITER.join([self._dataset_namespace,
docname,
str(self._instance)])
self.set_uid(new_uid)
def set_dataset(self, dataset_name):
# type: (str) -> None
new_uid = self.UID_DELIMITER.join([dataset_name,
self._document_namespace,
str(self._instance)])
self.set_uid(new_uid)
def set_mask(self, mask):
# type: (numpy.ndarray) -> None
"""Sets the CropObject's mask to the given array. Performs
some compatibilty checks: size, dtype (converts to ``uint8``)."""
if mask is None:
self.mask = None
else:
# Check dimension
t, l, b, r = self.bbox_to_integer_bounds(self.top,
self.left,
self.bottom,
self.right) # .count()
if mask.shape != (b - t, r - l):
raise ValueError('Mask shape {0} does not correspond'
' to integer shape {1} of CropObject.'
''.format(mask.shape, (b - t, r - l)))
if str(mask.dtype) != 'uint8':
logging.debug('CropObject.set_mask(): Supplied non-integer mask'
' with dtype={0}'.format(mask.dtype))
self.mask = mask.astype('uint8')
def set_objid(self, objid):
# type: (int) -> None
"""Changes the objid and updates the UID with it.
Do NOT use this unless you know what you're doing;
changing the objid should be (1) checked against objid
conflics within the doc, (2) reflected in the outlinks
and inlinks.
"""
self.objid = objid
self._sync_objid_to_uid()
def _sync_objid_to_uid(self):
# type: () -> None
"""Resets the UID number to reflect the objid."""
g_name, doc_name, num = self._parse_uid(self.uid)
new_uid = self.build_uid(g_name, doc_name, self.objid)
self.set_uid(new_uid)
@property
def dataset(self):
# type: () -> str
"""Which dataset is this CropObject coming from?
For bookkeeping."""
# The ``_dataset_namespace`` is set during initialization.
return self._dataset_namespace
@property
def doc(self):
# type: () -> str
"""Which document within the dataset is this CropObject
coming from? The ``_document_namespace``
This is important when working with CropObjects
from multiple CropObjectList files, especially for properly
constructing CropObject graphs, because ``inlinks`` and
``outlinks`` use the numeric ``objids``, which point to
CropObjects within the same document.
``objid`` of each CropObject has to be unique within a document.
"""
# The ``_document_namespace`` is set during initialization.
return self._document_namespace
@property
def top(self):
# type: () -> int
"""Row coordinate of upper left corner."""
return self.x
@property
def bottom(self):
# type: () -> int
"""Row coordinate 1 beyond bottom right corner, so that indexing
in the form ``img[c.top:c.bottom]`` is possible."""
return self.x + self.height
@property
def left(self):
# type: () -> int
"""Column coordinate of upper left corner."""
return self.y
@property
def right(self):
# type: () -> int
"""Column coordinate 1 beyond bottom right corner, so that indexing
in the form ``img[:, c.left:c.right]`` is possible."""
return self.y + self.width
@property
def bounding_box(self):
# type: () -> (int, int, int, int)
"""The ``top, left, bottom, right`` tuple of the CropObject's
coordinates."""
return self.top, self.left, self.bottom, self.right
@property
def middle(self):
# type: () -> (int, int)
"""Returns the integer representation of where the middle
of the CropObject lies, as a ``(m_vert, m_horz)`` tuple.
The integers just get rounded down.
"""
vmid = self.top + (self.bottom - self.top) // 2
hmid = self.left + (self.right - self.left) // 2
return int(vmid), int(hmid)
@property
def is_empty(self):
# type: () -> bool
"""A CropObject is empty if it is composed of zero pixels.
This is measured through the mask. CropObjects without
a mask are assumed to be non-empty."""
if self.mask is None:
return False
return self.mask.sum() == 0
@property
def outlink_uids(self):
# type: () -> List[str]
return [self.build_uid(self.dataset, self.doc, o) for o in self.outlinks]
@property
def inlink_uids(self):
# type: () -> List[str]
return [self.build_uid(self.dataset, self.doc, i) for i in self.inlinks]
@staticmethod
def bbox_to_integer_bounds(ftop, fleft, fbottom, fright):
# type: (float,float,float,float) -> (int,int,int,int)
"""Rounds off the CropObject bounds to the nearest integer
so that no area is lost (e.g. bottom and right bounds are
rounded up, top and left bounds are rounded down).
Returns the rounded-off integers (top, left, bottom, right)
as integers.
>>> CropObject.bbox_to_integer_bounds(44.2, 18.9, 55.1, 92.99)
(44, 18, 56, 93)
>>> CropObject.bbox_to_integer_bounds(44, 18, 56, 92.99)
(44, 18, 56, 93)
"""
logging.debug('bbox_to_integer_bounds: inputs {0}'.format((ftop, fleft, fbottom, fright)))
top = ftop - (ftop % 1.0)
left = fleft - (fleft % 1.0)
bottom = fbottom - (fbottom % 1.0)
if fbottom % 1.0 != 0:
bottom += 1.0
right = fright - (fright % 1.0)
if fright % 1.0 != 0:
right += 1.0
if top != ftop:
logging.debug('bbox_to_integer_bounds: rounded top by {0}'.format(top - ftop))
if left != fleft:
logging.debug('bbox_to_integer_bounds: rounded left by {0}'.format(left - fleft))
if bottom != fbottom:
logging.debug('bbox_to_integer_bounds: rounded bottom by {0}'.format(bottom - fbottom))
if right != fright:
logging.debug('bbox_to_integer_bounds: rounded right by {0}'.format(right - fright))
return int(top), int(left), int(bottom), int(right)
def to_integer_bounds(self):
# type: () -> None
"""Ensures that the CropObject has an integer position and size.
(This is important whenever you want to use a mask, and reasonable
whenever you do not need sub-pixel resolution...)
"""
bbox = self.bounding_box
t, l, b, r = self.bbox_to_integer_bounds(*bbox)
height = b - t
width = r - l
self.x = t
self.y = l
self.height = height
self.width = width
def project_to(self, img):
# type: (numpy.ndarray) -> numpy.ndarray
"""This function returns the *crop* of the input image
corresponding to the CropObject (incl. masking).
Assumes zeros are background."""
# Make a copy! We don't want to modify the original image by the mask.
# Copy forced by the "* 1" part.
crop = img[self.top:self.bottom, self.left:self.right] * 1
if self.mask is not None:
crop *= self.mask
return crop
def project_on(self, img):
# type: (numpy.ndarray) -> numpy.ndarray
"""This function returns only those parts of the input image
that correspond to the CropObject and masks out everything else
with zeros. The dimension of the returned array is the same
as of the input image. This function basically reconstructs
the symbol as an indicator function over the pixels of
the annotated image."""
output = numpy.zeros(img.shape, img.dtype)
crop = self.project_to(img)
output[self.top:self.bottom, self.left:self.right] = crop
return output
def render(self, img, alpha=0.3, rgb=(1.0, 0.0, 0.0)):
# type: (numpy.ndarray, float, Tuple[float,float,float]) -> numpy.ndarray
"""Renders itself upon the given image as a rectangle
of the given color and transparency. Might help visualization.
:param img: A three-channel image (3-D numpy array,
with the last dimension being 3)."""
color = numpy.array(rgb)
logging.debug('Rendering object {0}, clsname {1}, t/b/l/r: {2}'
''.format(self.objid, self.clsname,
(self.top, self.bottom, self.left, self.right)))
# logging.debug('Shape: {0}'.format((self.height, self.width, 3)))
mask = numpy.ones((self.height, self.width, 3)) * color
crop = img[self.top:self.bottom, self.left:self.right]
# logging.debug('Mask done, creating crop')
logging.debug('Shape: {0}. Got crop. Crop shape: {1}, img shape: {2}'
''.format((self.height, self.width, 3), crop.shape, img.shape))
mix = (crop + alpha * mask) / (1 + alpha)
img[self.top:self.bottom, self.left:self.right] = mix
return img
def overlaps(self, bounding_box_or_cropobject):
# type: (Union[Tuple[int,int,int,int],CropObject]) -> bool
"""Check whether this CropObject overlaps the given bounding box or CropObject.
>>> c = CropObject(0, 'test', 10, 100, height=20, width=10)
>>> c.bounding_box
(10, 100, 30, 110)
>>> c.overlaps((10, 100, 30, 110)) # Exact match
True
>>> c.overlaps((0, 100, 8, 110)) # Row mismatch
False
>>> c.overlaps((10, 0, 30, 89)) # Column mismatch
False
>>> c.overlaps((0, 0, 8, 89)) # Total mismatch
False
>>> c.overlaps((9, 99, 31, 111)) # Encompasses CropObject
True
>>> c.overlaps((11, 101, 29, 109)) # Within CropObject
True
>>> c.overlaps((9, 101, 31, 109)) # Encompass horz., within vert.
True
>>> c.overlaps((11, 99, 29, 111)) # Encompasses vert., within horz.
True
>>> c.overlaps((11, 101, 31, 111)) # Corner within: top left
True
>>> c.overlaps((11, 99, 31, 109)) # Corner within: top right
True
>>> c.overlaps((9, 101, 29, 111)) # Corner within: bottom left
True
>>> c.overlaps((9, 99, 29, 109)) # Corner within: bottom right
True
"""
if isinstance(bounding_box_or_cropobject, CropObject):
t, l, b, r = bounding_box_or_cropobject.bounding_box
else:
t, l, b, r = bounding_box_or_cropobject
# Does it overlap vertically? Includes situations where the CropObject is
# inside the bounding box.
# Note that the bottom is +1 (fencepost), so the checks bottom vs. top need to be "less than",
# not leq. If one object's top would be equal to the other's bottom, they would be touching,
# not overlapping.
if max(t, self.top) < min(b, self.bottom):
if max(l, self.left) < min(r, self.right):
return True
return False
def contains(self, bounding_box_or_cropobject):
"""Check if this CropObject entirely contains the other bounding
box (or, the other cropobject's bounding box)."""
if isinstance(bounding_box_or_cropobject, CropObject):
t, l, b, r = bounding_box_or_cropobject.bounding_box
else:
t, l, b, r = bounding_box_or_cropobject
if self.top <= t <= b <= self.bottom:
if self.left <= l <= r <= self.right:
return True
return False
def bbox_intersection(self, bounding_box):
# type: (Tuple[int,int, int, int]) -> Optional[Tuple[int,int, int, int]]
"""Returns the sub-bounding box of this CropObject, relative to its size (so: 0,0
is the CropObject's upper left corner), that intersects the given bounding box.
If the intersection is empty, returns None.
>>> c = CropObject(0, 'test', 10, 100, height=20, width=10)
>>> c.bounding_box
(10, 100, 30, 110)
>>> other_bbox = 20, 100, 40, 105
>>> c.bbox_intersection(other_bbox)
(10, 0, 20, 5)
>>> containing_bbox = 4, 55, 44, 115
>>> c.bbox_intersection(containing_bbox)
(0, 0, 20, 10)
>>> contained_bbox = 12, 102, 22, 108
>>> c.bbox_intersection(contained_bbox)
(2, 2, 12, 8)
>>> non_overlapping_bbox = 0, 0, 3, 3
>>> c.bbox_intersection(non_overlapping_bbox) is None
True
"""
t, l, b, r = bounding_box
out_top = max(t, self.top)
out_bottom = min(b, self.bottom)
out_left = max(l, self.left)
out_right = min(r, self.right)
if (out_top < out_bottom) and (out_left < out_right):
return out_top - self.top, \
out_left - self.left, \
out_bottom - self.top, \
out_right - self.left
else:
return None
def crop_to_mask(self):
# type: () -> None
"""Crops itself to the minimum bounding box that contains all
its pixels, as determined by its mask.
If the mask is all zeros, does not do anything, because
at this point, the is_empty check should be invoked anyway
in any situation where you care whether the object is empty
or not (e.g. delete it after trimming).
>>> mask = numpy.zeros((20, 10))
>>> mask[5:15, 3:8] = 1
>>> c = CropObject(0, 'test', 10, 100, width=10, height=20, mask=mask)
>>> c.bounding_box
(10, 100, 30, 110)
>>> c.crop_to_mask()
>>> c.bounding_box
(15, 103, 25, 108)
>>> c.height, c.width
(10, 5)
Assumes integer bounds, which is ensured during CropObject initialization.
"""
if self.mask is None:
return
if self.is_empty:
return
# We know the object is not empty.
# How many rows/columns to trim from top, bottom, etc.
trim_top = -1
for i in range(self.mask.shape[0]):
if self.mask[i, :].sum() != 0:
trim_top = i
break
trim_left = -1
for j in range(self.mask.shape[1]):
if self.mask[:, j].sum() != 0:
trim_left = j
break
trim_bottom = -1
for k in range(self.mask.shape[0]):
if self.mask[-(k + 1), :].sum() != 0:
trim_bottom = k
break
trim_right = -1
for l in range(self.mask.shape[1]):
if self.mask[:, -(l + 1)].sum() != 0:
trim_right = l
break
logging.debug('Cropobject.crop: Trimming top={0}, left={1},'
'bottom={2}, right={3}'
''.format(trim_top, trim_left, trim_bottom, trim_right))
# new bounding box relative to the current bounding box -- used to trim
# the mask
rel_t = trim_top
rel_l = trim_left
rel_b = self.height - trim_bottom
rel_r = self.width - trim_right
new_mask = self.mask[rel_t:rel_b, rel_l:rel_r] * 1
logging.debug('Cropobject.crop: Old mask shape {0}, new mask shape {1}'
''.format(self.mask.shape, new_mask.shape))
# new bounding box, relative to image -- used to compute the CropObject's
# new position and size
abs_t = self.top + trim_top
abs_l = self.left + trim_left
abs_b = self.bottom - trim_bottom
abs_r = self.right - trim_right
self.x = abs_t
self.y = abs_l
self.height = abs_b - abs_t
self.width = abs_r - abs_l
self.set_mask(new_mask)
def __str__(self):
"""Format the CropObject as its XML representation. See the documentation
of :module:`muscima.io` for details."""
lines = []
lines.append('<CropObject xml:id="{0}">'.format(self.uid))
lines.append('\t<Id>{0}</Id>'.format(self.objid))
# lines.append('\t<UniqueId>{0}</UniqueId>'.format(self.uid))
lines.append('\t<ClassName>{0}</ClassName>'.format(self.clsname))
lines.append('\t<Top>{0}</Top>'.format(self.top))
lines.append('\t<Left>{0}</Left>'.format(self.left))
lines.append('\t<Width>{0}</Width>'.format(self.width))
lines.append('\t<Height>{0}</Height>'.format(self.height))
mask_string = self.encode_mask(self.mask)
lines.append('\t<Mask>{0}</Mask>'.format(mask_string))
if len(self.inlinks) > 0:
inlinks_string = ' '.join(list(map(str, self.inlinks)))
lines.append('\t<Inlinks>{0}</Inlinks>'.format(inlinks_string))
if len(self.outlinks) > 0:
outlinks_string = ' '.join(list(map(str, self.outlinks)))
lines.append('\t<Outlinks>{0}</Outlinks>'.format(outlinks_string))
data_string = self.encode_data(self.data)
if data_string is not None:
lines.append('\t<Data>\n{0}\n\t</Data>'.format(data_string))
lines.append('</CropObject>')
return '\n'.join(lines)
def encode_mask(self, mask, compress=False, mode='rle'):
# type: (numpy.ndarray, bool, str) -> str
"""Encode a binary array ``mask`` as a string, compliant
with the CropObject format specification in :mod:`muscima.io`.
"""
if mode == 'rle':
return self.encode_mask_rle(mask, compress=compress)
elif mode == 'bitmap':
return self.encode_mask_bitmap(mask, compress=compress)
def encode_data(self, data):
# type: () -> Optional[str]
if self.data is None:
return None
if len(self.data) == 0:
return None
lines = []
for k, v in list(self.data.items()):
vtype = 'str'
vval = v
if isinstance(v, int):
vtype = 'int'
vval = str(v)
elif isinstance(v, float):
vtype = 'float'
vval = str(v)
elif isinstance(v, list):
vtype = 'list[str]'
if len(v) > 0:
if isinstance(v[0], int):
vtype = 'list[int]'
elif isinstance(v[0], float):
vtype = 'list[float]'
vval = ' '.join([str(vv) for vv in v])
line = '\t\t<DataItem key="{0}" type="{1}">{2}</DataItem>' \
''.format(k, vtype, vval)
lines.append(line)
return '\n'.join(lines)
def data_display_text(self):
if self.data is None:
return '[No data]'
if len(self.data) == 0:
return '[No data]'
lines = []
for k, v in list(self.data.items()):
lines.append('{0}: {1}'.format(k, v))
return '\n'.join(lines)
@staticmethod
def encode_mask_bitmap(mask, compress=False):
# type: (numpy.ndarray, bool) -> str
"""Encodes the mask array in a compact form. Returns 'None' if mask
is None. If the mask is not None, uses the following algorithm:
* Flatten the mask (then use width and height of CropObject for
reshaping).
* Record as string, with whitespace separator
* Compress string using gz2 (if compress=True) NOT IMPLEMENTED
* Return resulting string
"""
if mask is None:
return 'None'
# By default works in row-major order.
# So we can just prescribe 'C' without losing data.
mask_flat = mask.flatten(order=CROPOBJECT_MASK_ORDER)
output = ' '.join(list(map(str, mask_flat)))
return output
@staticmethod
def encode_mask_rle(mask, compress=False):
# type: (numpy.ndarray, bool) -> str
"""Encodes the mask array in Run-Length Encoding. Instead of
having the bitmap ``0 0 1 1 1 0 0 0 1 1``, the RLE encodes
the mask as ``0:2 1:3 0:3 1:2``. This is much more compact.
Currently, the rows of the mask are not treated in any special
way. The mask just gets flattened and then encoded.
Implementation:
"""
if mask is None:
return 'None'
mask_flat = mask.flatten(order=CROPOBJECT_MASK_ORDER)
output_strings = []
current_run_type = 0
current_run_length = 0
for i in mask_flat:
if i == current_run_type:
current_run_length += 1
else:
s = '{0}:{1}'.format(current_run_type, current_run_length)
output_strings.append(s)
current_run_type = i
current_run_length = 1
s = '{0}:{1}'.format(current_run_type, current_run_length)
output_strings.append(s)
output = ' '.join(output_strings)
return output
def decode_mask(self, mask_string, shape):
# type: (str, Tuple[Any, ...]) -> Optional[numpy.ndarray]
"""Decodes a CropObject mask string into a binary
numpy array of the given shape."""
mode = self._determine_mask_mode(mask_string)
if mode == 'rle':
return self.decode_mask_rle(mask_string, shape=shape)
elif mode == 'bitmap':
return self.decode_mask_bitmap(mask_string, shape=shape)
def _determine_mask_mode(self, mask_string):
# type: (str) -> str
"""If the mask string starts with '0:' or '1:', or generally
if it contains a non-0 or 1 symbol, assume it is RLE."""
mode = 'bitmap'
if len(mask_string) < 3:
mode = 'bitmap'
elif ':' in mask_string[:3]:
mode = 'rle'
return mode
@staticmethod
def decode_mask_bitmap(mask_string, shape):
# type: (str, Tuple[Any, ...]) -> Optional[numpy.ndarray]