/
_opLazyCC.py
513 lines (433 loc) · 19.3 KB
/
_opLazyCC.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
#!/usr/bin/env python
# coding: utf-8
# author: Markus Döring
import numpy as np
import vigra
import logging
from collections import defaultdict
from functools import partial, wraps
#from itertools import count as InfiniteLabelIterator
from _tools import InfiniteLabelIterator
from lazyflow.operator import Operator, InputSlot, OutputSlot
from lazyflow.rtype import SubRegion
from lazyflow.operators import OpCompressedCache, OpReorderAxes
from lazyflow.request import Request, RequestPool
from lazyflow.request import RequestLock as ReqLock
# the lazyflow lock seems to have deadlock issues sometimes
from threading import Lock as HardLock
from threading import Condition
from _mockup import UnionFindArray
#from lazycc import UnionFindArray
# logging.basicConfig()
logger = logging.getLogger(__name__)
# logger.setLevel(logging.DEBUG)
_LABEL_TYPE = np.uint32
def threadsafe(method):
@wraps(method)
def wrapped(self, *args, **kwargs):
with self._lock:
return method(self, *args, **kwargs)
return wrapped
class _LabelManager(object):
def __init__(self):
self._lock = Condition()
self._managedLabels = defaultdict(dict)
self._iterator = InfiniteLabelIterator(1, dtype=int)
self._registered = set()
# call before doing anything
@threadsafe
def register(self):
n = self._iterator.next()
self._registered.add(n)
return n
# call when done with everything
@threadsafe
def unregister(self, n):
self._registered.remove(n)
self._lock.notify_all()
# call to wait for other processes
@threadsafe
def waitFor(self, others):
others = set(others)
remaining = others & self._registered
while len(remaining) > 0:
self._lock.wait()
remaining &= self._registered
# get a list of labels that _really_ need to be globalized by you
@threadsafe
def checkoutLabels(self, chunkIndex, labels, n):
others = set()
d = self._managedLabels[chunkIndex]
for otherProcess, otherLabels in d.iteritems():
inters = np.intersect1d(labels, otherLabels)
if inters.size > 0:
labels = np.setdiff1d(labels, inters)
others.add(otherProcess)
if labels.size > 0:
d[n] = labels
return labels, others
# locking decorator that locks per chunk
def _chunksynchronized(method):
@wraps(method)
def synchronizedmethod(self, chunkIndex, *args, **kwargs):
with self._chunk_locks[chunkIndex]:
return method(self, chunkIndex, *args, **kwargs)
return synchronizedmethod
# general approach
# ================
#
# There are 3 kinds of labels that we need to consider throughout the operator:
# * local labels: The output of the chunk wise labelVolume calls. These are
# stored in self._cache, a compressed VigraArray.
# aka 'local'
# * global indices: The mapping of local labels to unique global indices.
# The actual implemetation is hidden in self.localToGlobal().
# aka 'global'
# * global labels: The final labels that are communicated to the outside
# world. These must be contiguous, i.e. if global label j appears in the
# output, then, for every global label i<j, i also appears in the output.
# The actual implementation is hidden in self.globalToFinal().
# aka 'final'
#
class OpLazyCC(Operator):
# input data (usually segmented), in 'txyzc' order
Input = InputSlot()
# the spatial shape of one chunk, in 'xyz' order
ChunkShape = InputSlot()
# the labeled output, internally cached
Output = OutputSlot()
### INTERNALS -- DO NOT USE ###
_Input = OutputSlot()
_Output = OutputSlot()
def __init__(self, *args, **kwargs):
super(OpLazyCC, self).__init__(*args, **kwargs)
self._lock = HardLock()
# reordering operators - we want to handle txyzc inside this operator
self._opIn = OpReorderAxes(parent=self)
self._opIn.AxisOrder.setValue('txyzc')
self._opIn.Input.connect(self.Input)
self._Input.connect(self._opIn.Output)
self._opOut = OpReorderAxes(parent=self)
self._opOut.Input.connect(self._Output)
self.Output.connect(self._opOut.Output)
def setupOutputs(self):
self.Output.meta.assignFrom(self.Input.meta)
self.Output.meta.dtype = _LABEL_TYPE
self._Output.meta.assignFrom(self._Input.meta)
self._Output.meta.dtype = _LABEL_TYPE
assert self.Input.meta.dtype in [np.uint8, np.uint32, np.uint64],\
"Cannot label data type {}".format(self.Input.meta.dtype)
self._setDefaultInternals()
# go back to original order
self._opOut.AxisOrder.setValue(self.Input.meta.getAxisKeys())
def execute(self, slot, subindex, roi, result):
if slot is self._Output:
othersToWaitFor = set()
chunks = self._roiToChunkIndex(roi)
for chunk in chunks:
othersToWaitFor |= self.growRegion(chunk)
self._manager.waitFor(othersToWaitFor)
self._mapArray(roi, result)
else:
raise ValueError("Request to invalid slot {}".format(str(slot)))
def propagateDirty(self, slot, subindex, roi):
# TODO: this is already correct, but may be over-zealous
# we would have to label each chunk that was set dirty and check
# for changed labels. Therefore, we would have to check if the
# dirty region is 'small enough', etc etc.
self._setDefaultInternals()
self.Output.setDirty(slice(None))
# grow the requested region such that all labels inside that region are
# final
# @param chunkIndex the index of the chunk to finalize
def growRegion(self, chunkIndex):
ticket = self._manager.register()
othersToWaitFor = set()
# we want to finalize every label in our first chunk
localLabels = np.arange(1, self._numIndices[chunkIndex]+1)
localLabels = localLabels.astype(_LABEL_TYPE)
chunksToProcess = {chunkIndex: localLabels}
while chunksToProcess:
currentChunk, localLabels = chunksToProcess.popitem()
# label this chunk
self._label(chunkIndex)
# get the labels in use by this chunk
localLabels = np.arange(1, self._numIndices[currentChunk]+1)
localLabels = localLabels.astype(_LABEL_TYPE)
# tell the label manager that we are about to finalize some labels
actualLabels, others = self._manager.checkoutLabels(currentChunk,
localLabels,
ticket)
othersToWaitFor |= others
# now we have got a list of local labels for this chunk, which no
# other process is going to finalize
# start merging adjacent regions
otherChunks = self._generateNeighbours(currentChunk)
for other in otherChunks:
self._label(other)
a, b = self._orderPair(currentChunk, other)
me = 0 if a == chunkIndex else 1
res = self._merge(a, b)
myLabels, otherLabels = res[me], res[1-me]
# determine which objects from this chunk continue in the
# neighbouring chunk
extendingLabels = [b for a, b in zip(myLabels, otherLabels)
if a in actualLabels]
extendingLabels = np.unique(extendingLabels
).astype(_LABEL_TYPE)
# add the neighbour to our processing queue only if it actually
# shares objects
if extendingLabels.size > 0:
if other in chunksToProcess:
extendingLabels = np.union1d(chunksToProcess[other],
extendingLabels)
chunksToProcess[other] = extendingLabels
self._manager.unregister(ticket)
return othersToWaitFor
# label a chunk and store information
@_chunksynchronized
def _label(self, chunkIndex):
if self._numIndices[chunkIndex] >= 0:
# this chunk is already labeled
return
# get the raw data
roi = self._chunkIndexToRoi(chunkIndex)
inputChunk = self._Input.get(roi).wait()
inputChunk = vigra.taggedView(inputChunk, axistags='txyzc')
inputChunk = inputChunk.withAxes(*'xyz')
# label the raw data
labeled = vigra.analysis.labelVolumeWithBackground(inputChunk)
labeled = vigra.taggedView(labeled, axistags='xyz').withAxes(*'txyzc')
del inputChunk
#TODO this could be more efficiently combined with merging
# store the labeled data in cache
self._cache[roi.toSlice()] = labeled
# update the labeling information
numLabels = labeled.max() # we ignore 0 here
self._numIndices[chunkIndex] = numLabels
if numLabels > 0:
with self._lock:
# get 1 label that determines the offset
offset = self._uf.makeNewIndex()
# the offset is such that label 1 in the local chunk maps to
# 'offset' in the global context
self._globalLabelOffset[chunkIndex] = offset - 1
# get n-1 more labels
for i in range(numLabels-1):
self._uf.makeNewIndex()
# merge the labels of two adjacent chunks
# the chunks have to be ordered lexicographically, e.g. by self._orderPair
@_chunksynchronized
def _merge(self, chunkA, chunkB):
if chunkB in self._mergeMap[chunkA]:
return (np.zeros((0,), dtype=_LABEL_TYPE),)*2
self._mergeMap[chunkA].append(chunkB)
hyperplane_roi_a, hyperplane_roi_b = \
self._chunkIndexToHyperplane(chunkA, chunkB)
hyperplane_index_a = hyperplane_roi_a.toSlice()
hyperplane_index_b = hyperplane_roi_b.toSlice()
label_hyperplane_a = self._cache[hyperplane_index_a]
label_hyperplane_b = self._cache[hyperplane_index_b]
# see if we have border labels at all
adjacent_bool_inds = np.logical_and(label_hyperplane_a > 0,
label_hyperplane_b > 0)
if not np.any(adjacent_bool_inds):
return (np.zeros((0,), dtype=_LABEL_TYPE),)*2
# check if the labels do actually belong to the same component
hyperplane_a = self._Input[hyperplane_index_a].wait()
hyperplane_b = self._Input[hyperplane_index_b].wait()
adjacent_bool_inds = np.logical_and(adjacent_bool_inds,
hyperplane_a == hyperplane_b)
# union find manipulations are critical
with self._lock:
map_a = self.localToGlobal(chunkA, mapping=True)
map_b = self.localToGlobal(chunkB, mapping=True)
labels_a = map_a[label_hyperplane_a[adjacent_bool_inds]]
labels_b = map_b[label_hyperplane_b[adjacent_bool_inds]]
for a, b in zip(labels_a, labels_b):
self._uf.makeUnion(a, b)
correspondingLabelsA = label_hyperplane_a[adjacent_bool_inds]
correspondingLabelsB = label_hyperplane_b[adjacent_bool_inds]
return correspondingLabelsA, correspondingLabelsB
# get a rectangular region with final global labels
# @param roi region of interest
# @param result array of shape roi.stop - roi.start, will be filled
def _mapArray(self, roi, result, global_labels=True):
# TODO perhaps with pixeloperator?
assert np.all(roi.stop - roi.start == result.shape)
indices = self._roiToChunkIndex(roi)
for idx in indices:
newroi = self._chunkIndexToRoi(idx)
newroi.stop = np.minimum(newroi.stop, roi.stop)
newroi.start = np.maximum(newroi.start, roi.start)
self._mapChunk(idx)
chunk = self._cache[newroi.toSlice()]
newroi.start -= roi.start
newroi.stop -= roi.start
s = newroi.toSlice()
result[s] = chunk
@_chunksynchronized
def _mapChunk(self, chunkIndex):
if self._isFinal[chunkIndex]:
return
newroi = self._chunkIndexToRoi(chunkIndex)
s = newroi.toSlice()
chunk = self._cache[s]
labels = self.localToGlobal(chunkIndex, mapping=True)
self.globalToFinal(chunkIndex[0], chunkIndex[4], labels)
self._cache[s] = labels[chunk]
self._isFinal[chunkIndex] = True
# returns an array of global labels in use by this chunk if 'mapping' is
# False, a mapping of local labels to global labels otherwise
# if update is set to False, the labels will correspond to the originally
# assigned global labels, otherwise you will get the most recent results
# of UnionFind
def localToGlobal(self, chunkIndex, mapping=True, update=True):
offset = self._globalLabelOffset[chunkIndex]
numLabels = self._numIndices[chunkIndex]
labels = np.arange(1, numLabels+1, dtype=_LABEL_TYPE) + offset
if update:
labels = np.asarray(map(self._uf.findIndex, labels),
dtype=_LABEL_TYPE)
if not mapping:
return labels
else:
# we got 'numLabels' real labels, and one label '0', so our
# output has to have numLabels+1 elements
out = np.zeros((numLabels+1,), dtype=_LABEL_TYPE)
out[1:] = labels
return out
# map an array of global indices to final labels
# after calling this function, the labels passed in may not be used with
# UnionFind.makeUnion any more!
@threadsafe
def globalToFinal(self, t, c, labels):
d = self._globalToFinal[(t, c)]
labeler = self._labelIterators[(t, c)]
for k in np.unique(labels):
l = self._uf.findIndex(k)
if l == 0:
continue
# adding a global label is critical
if l not in d:
nextLabel = labeler.next()
d[l] = nextLabel
labels[labels == l] = d[l]
##########################################################################
##################### HELPER METHODS #####################################
##########################################################################
# create roi object from chunk index
def _chunkIndexToRoi(self, index):
shape = self._shape
start = self._chunkShape * np.asarray(index)
stop = self._chunkShape * (np.asarray(index) + 1)
stop = np.where(stop > shape, shape, stop)
roi = SubRegion(self.Input,
start=tuple(start), stop=tuple(stop))
return roi
# create a list of chunk indices needed for a particular roi
def _roiToChunkIndex(self, roi):
cs = self._chunkShape
start = np.asarray(roi.start)
stop = np.asarray(roi.stop)
start_cs = start / cs
stop_cs = stop / cs
# add one if division was not even
stop_cs += np.where(stop % cs, 1, 0)
chunks = []
#TODO do this smarter, perhaps?
for t in range(start_cs[0], stop_cs[0]):
for x in range(start_cs[1], stop_cs[1]):
for y in range(start_cs[2], stop_cs[2]):
for z in range(start_cs[3], stop_cs[3]):
for c in range(start_cs[4], stop_cs[4]):
chunks.append((t, x, y, z, c))
return chunks
# compute the adjacent hyperplanes of two chunks (1 pix wide)
# @return 2-tuple of roi's for the respective chunk
def _chunkIndexToHyperplane(self, chunkA, chunkB):
rev = False
assert chunkA[0] == chunkB[0] and chunkA[4] == chunkB[4],\
"these chunks are not spatially adjacent"
# just iterate over spatial axes
for i in range(1, 4):
if chunkA[i] > chunkB[i]:
rev = True
chunkA, chunkB = chunkB, chunkA
if chunkA[i] < chunkB[i]:
roiA = self._chunkIndexToRoi(chunkA)
roiB = self._chunkIndexToRoi(chunkB)
start = np.asarray(roiA.start)
start[i] = roiA.stop[i] - 1
roiA.start = tuple(start)
stop = np.asarray(roiB.stop)
stop[i] = roiB.start[i] + 1
roiB.stop = tuple(stop)
if rev:
return roiB, roiA
else:
return roiA, roiB
# generate a list of adjacent chunks
def _generateNeighbours(self, chunkIndex):
n = []
idx = np.asarray(chunkIndex, dtype=np.int)
# only spatial neighbours are considered
for i in range(1, 4):
if idx[i] > 0:
new = idx.copy()
new[i] -= 1
n.append(tuple(new))
if idx[i]+1 < self._chunkArrayShape[i]:
new = idx.copy()
new[i] += 1
n.append(tuple(new))
return n
# fills attributes with standard values, call on each setupOutputs
def _setDefaultInternals(self):
# chunk array shape calculation
#TODO change here when removing OpReorder
shape = self._Input.meta.shape
chunkShape = (1,) + self.ChunkShape.value + (1,)
assert len(shape) == len(chunkShape),\
"Encountered an invalid chunkShape"
f = lambda i: shape[i]//chunkShape[i] + (1 if shape[i] % chunkShape[i]
else 0)
self._chunkArrayShape = tuple(map(f, range(len(shape))))
self._chunkShape = np.asarray(chunkShape, dtype=np.int)
self._shape = shape
# manager object
self._manager = _LabelManager()
### local labels ###
# cache for local labels
self._cache = vigra.ChunkedArrayCompressed(shape, dtype=_LABEL_TYPE)
### global indices ###
# offset (global labels - local labels) per chunk
self._globalLabelOffset = np.ones(self._chunkArrayShape,
dtype=_LABEL_TYPE)
# keep track of number of indices in chunk (-1 == not labeled yet)
self._numIndices = -np.ones(self._chunkArrayShape, dtype=np.int32)
# union find data structure, tells us for every global index to which
# label it belongs
self._uf = UnionFindArray(_LABEL_TYPE(1))
### global labels ###
# keep track of assigned global labels
gen = partial(InfiniteLabelIterator, 1, dtype=_LABEL_TYPE)
self._labelIterators = defaultdict(gen)
self._globalToFinal = defaultdict(dict)
self._isFinal = np.zeros(self._chunkArrayShape, dtype=np.bool)
### algorithmic ###
# keep track of merged regions
self._mergeMap = defaultdict(list)
# locks that keep threads from changing a specific chunk
self._chunk_locks = defaultdict(HardLock)
# order a pair of chunk indices lexicographically
# (ret[0] is top-left-in-front-of of ret[1])
@staticmethod
def _orderPair(tupA, tupB):
for a, b in zip(tupA, tupB):
if a < b:
return tupA, tupB
if a > b:
return tupB, tupA
raise ValueError("tupA={} and tupB={} are the same".format(tupA, tupB))
return tupA, tupB