In [2]:
import h5py
import re
import tensorflow as tf
from tensorflow import gfile
import numpy as np


# train.define_data_input

In [3]:
label_volumes = 'validation1:third_party/neuroproof_examples/validation_sample/groundtruth.h5:stack'

label_volume_map = {}
for vol in label_volumes.split(','):
    volname, path, dataset = vol.split(':')
    label_volume_map[volname] = h5py.File(path)[dataset]

label_volume_map


{'validation1': <HDF5 dataset "stack": shape (520, 520, 520), type "<i8">}

## inputs.load_patch_coordinates

In [4]:
coordinates_file_pattern = 'gs://ffn-flyem-fib25/validation_sample/fib_flyem_validation1_label_lom24_24_24_part14_wbbox_coords-*-of-00025.gz'


In [5]:
def create_filename_queue(coordinates_file_pattern, shuffle=True):
    """Creates a queue for reading coordinates from coordinate file.

    Args:
    coordinates_file_pattern: File pattern for TFRecords of
                              input examples of the form of a glob
                              pattern or path@shards.
    shuffle: Whether to shuffle the coordinate file list. Note that the expanded
             coordinates_file_pattern is not guaranteed to be sorted
             alphabetically.

    Returns:
    Tensorflow queue with coordinate filenames
    """
    m = re.search(r'@(\d{1,})', coordinates_file_pattern)
    
    if m:
        num_shards = int(m.group(1))
        coord_file_list = [
            re.sub(r'@(\d{1,})', '-%.5d-of-%.5d' % (i, num_shards), 
                   coordinates_file_pattern)
        for i in range(num_shards)]
    
    else:
        coord_file_list = gfile.Glob(coordinates_file_pattern)

    return tf.train.string_input_producer(coord_file_list, shuffle=shuffle)


In [6]:
coord_file_list = gfile.Glob(coordinates_file_pattern)
coord_file_list[-5:]


['gs://ffn-flyem-fib25/validation_sample/fib_flyem_validation1_label_lom24_24_24_part14_wbbox_coords-00020-of-00025.gz',
 'gs://ffn-flyem-fib25/validation_sample/fib_flyem_validation1_label_lom24_24_24_part14_wbbox_coords-00021-of-00025.gz',
 'gs://ffn-flyem-fib25/validation_sample/fib_flyem_validation1_label_lom24_24_24_part14_wbbox_coords-00022-of-00025.gz',
 'gs://ffn-flyem-fib25/validation_sample/fib_flyem_validation1_label_lom24_24_24_part14_wbbox_coords-00023-of-00025.gz',
 'gs://ffn-flyem-fib25/validation_sample/fib_flyem_validation1_label_lom24_24_24_part14_wbbox_coords-00024-of-00025.gz']

In [7]:
filename_queue = create_filename_queue(coordinates_file_pattern, shuffle=True)
filename_queue


<tensorflow.python.ops.data_flow_ops.FIFOQueue at 0x7f8bcba44a90>

In [8]:
def load_patch_coordinates_from_filename_queue(filename_queue):
    """Loads coordinates and volume names from filename queue.

    Args:
    filename_queue: Tensorflow queue created from create_filename_queue()

    Returns:
    Tuple of coordinates (shape `[1, 3]`) and volume name (shape `[1]`) tensors.
    """
    
    record_options = tf.python_io.TFRecordOptions(
        tf.python_io.TFRecordCompressionType.GZIP)
    keys, protos = tf.TFRecordReader(options=record_options).read(filename_queue)
    examples = tf.parse_single_example(protos, features=dict(
        center=tf.FixedLenFeature(shape=[1, 3], dtype=tf.int64),
        label_volume_name=tf.FixedLenFeature(shape=[1], dtype=tf.string),
    ))
    coord = examples['center']
    volname = examples['label_volume_name']
    
    return coord, volname


In [9]:
record_options = tf.python_io.TFRecordOptions(
    tf.python_io.TFRecordCompressionType.GZIP)

keys, protos = tf.TFRecordReader(options=record_options).read(filename_queue)
print('keys: ' + str(keys) + '\n')
print('protos: ' + str(protos) + '\n')

examples = tf.parse_single_example(protos, features=dict(
    center=tf.FixedLenFeature(shape=[1, 3], dtype=tf.int64),
    label_volume_name=tf.FixedLenFeature(shape=[1], dtype=tf.string),
))

print('examples: ' + str(examples) + '\n')

coord = examples['center']
volname = examples['label_volume_name']

print('coord: ' + str(coord) + '\n')
print('volname: ' + str(volname) + '\n')


keys: Tensor("ReaderReadV2:0", shape=(), dtype=string)

protos: Tensor("ReaderReadV2:1", shape=(), dtype=string)

examples: {'center': <tf.Tensor 'ParseSingleExample/Squeeze_center:0' shape=(1, 3) dtype=int64>, 'label_volume_name': <tf.Tensor 'ParseSingleExample/Squeeze_label_volume_name:0' shape=(1,) dtype=string>}

coord: Tensor("ParseSingleExample/Squeeze_center:0", shape=(1, 3), dtype=int64)

volname: Tensor("ParseSingleExample/Squeeze_label_volume_name:0", shape=(1,), dtype=string)



In [10]:
load_patch_coordinates_from_filename_queue(filename_queue)


(<tf.Tensor 'ParseSingleExample_1/Squeeze_center:0' shape=(1, 3) dtype=int64>,
 <tf.Tensor 'ParseSingleExample_1/Squeeze_label_volume_name:0' shape=(1,) dtype=string>)

In [11]:
def load_patch_coordinates(coordinates_file_pattern,
                           shuffle=True,
                           scope='load_patch_coordinates'):
    """Loads coordinates and volume names from tables of VolumeStoreInputExamples.

    Args:
    coordinates_file_pattern: File pattern for TFRecords of
                              input examples of the form of a glob
                              pattern or path@shards.
    shuffle: Whether to shuffle the coordinate file list. Note that the expanded
             coordinates_file_pattern is not guaranteed to be sorted
             alphabetically.
    scope: Passed to name_scope.

    Returns:
    Tuple of coordinates (shape `[1, 3]`) and volume name (shape `[1]`) tensors.
    """
    
    with tf.name_scope(scope):
        filename_queue = create_filename_queue(
        coordinates_file_pattern, shuffle=shuffle)
        
    return load_patch_coordinates_from_filename_queue(filename_queue)


In [12]:
coord, volname = load_patch_coordinates(coordinates_file_pattern)
coord, volname


(<tf.Tensor 'ParseSingleExample_2/Squeeze_center:0' shape=(1, 3) dtype=int64>,
 <tf.Tensor 'ParseSingleExample_2/Squeeze_label_volume_name:0' shape=(1,) dtype=string>)

In [18]:
coordin = np.array([[128, 128, 128]])
coordin.shape

(1, 3)

In [None]:
with tf.Session() as sess:
    print(sess.run(coord[0, 0:5]))


## inputs.load_from_numpylike

In [None]:
def load_from_numpylike(coordinates, volume_names, shape, volume_map,
                        name=None):
    """TensorFlow Python op that loads data from Numpy-like volumes.

    The volume object must support Numpy-like indexing, as well as shape, ndim,
    and dtype properties.  The volume can be 3d or 4d.

    Args:
    coordinates: tensor of shape [1, 3] containing XYZ coordinates of the
        center of the subvolume to load.
    volume_names: tensor of shape [1] containing names of volumes to load data
        from.
    shape: a 3-sequence giving the XYZ shape of the data to load.
    volume_map: a dictionary mapping volume names to volume objects.  See above
        for API requirements of the Numpy-like volume objects.
    name: the op name.

    Returns:
    Tensor result of reading data of shape [1] + shape[::-1] + [num_channels]
    from given center coordinate and volume name.  Dtype matches input volumes.

    Raises:
    ValueError: if volumes in volume_map have inconsistent dtypes or number of
    channels.
    """
    
    def _num_channels(volume):
        if volume.ndim == 3:
            return 1
        return volume.shape[0]

  # Validate that all volumes are compatible.
    volumes = iter(volume_map.values())
    first_vol = next(volumes)
    dtype = first_vol.dtype
    num_channels = _num_channels(first_vol)
    for volume in volumes:
        if volume.dtype != dtype:
            raise ValueError('All volumes should have same dtype.')
        if _num_channels(volume) != num_channels:
            raise ValueError('All volumes should have same number of channels.')

    start_offset = (np.array(shape) - 1) // 2
    def _load_from_numpylike(coord, volname):
        """Load from coord and volname, handling 3d or 4d volumes."""
        volume = volume_map[volname.decode('ascii')]
        # Get data, including all channels if volume is 4d.
        starts = np.array(coord) - start_offset
        slc = bounding_box.BoundingBox(start=starts, size=shape).ToSlice()
        if volume.ndim == 4:
            slc = np.index_exp[:] + slc
        data = volume[slc]

        # If 4d, move channels to back.  Otherwise, just add flat channels dim.
        if data.ndim == 4:
            data = np.rollaxis(data, 0, start=4)
        else:
            data = np.expand_dims(data, 4)

        # Add flat batch dim and return.
        data = np.expand_dims(data, 0)
        return data

    with tf.name_scope(name, 'LoadFromNumpyLike',
                       [coordinates, volume_names]) as scope:
        # For historical reasons these have extra flat dims.
        coordinates = tf.squeeze(coordinates, axis=0)
        volume_names = tf.squeeze(volume_names, axis=0)

        loaded = tf.py_func(
            _load_from_numpylike, [coordinates, volume_names], [dtype],
            name=scope)[0]
        
    loaded.set_shape([1] + list(shape[::-1]) + [num_channels])
    return loaded


In [None]:
def _num_channels(volume):
    if volume.ndim == 3:
        return 1
    return volume.shape[0]

# Validate that all volumes are compatible.
volumes = iter(label_volume_map.values())
first_vol = next(volumes)
dtype = first_vol.dtype
num_channels = _num_channels(first_vol)
for volume in volumes:
    if volume.dtype != dtype:
        raise ValueError('All volumes should have same dtype.')
    if _num_channels(volume) != num_channels:
        raise ValueError('All volumes should have same number of channels.')


In [None]:
print(volumes)
print(first_vol)
print(dtype)
print(num_channels)


In [None]:
label_size = np.array([49, 49, 49])


In [None]:
start_offset = (np.array(label_size) - 1) // 2

def _load_from_numpylike(coord, volname):
    """Load from coord and volname, handling 3d or 4d volumes."""
    volume = volume_map[volname.decode('ascii')]
    # Get data, including all channels if volume is 4d.
    starts = np.array(coord) - start_offset
    slc = bounding_box.BoundingBox(start=starts, size=shape).ToSlice()
    if volume.ndim == 4:
        slc = np.index_exp[:] + slc
    data = volume[slc]

    # If 4d, move channels to back.  Otherwise, just add flat channels dim.
    if data.ndim == 4:
        data = np.rollaxis(data, 0, start=4)
    else:
        data = np.expand_dims(data, 4)

    # Add flat batch dim and return.
    data = np.expand_dims(data, 0)
    return data

with tf.name_scope(None, 'LoadFromNumpyLike',
                   [coord, volname]) as scope:
    # For historical reasons these have extra flat dims.
    coordinates = tf.squeeze(coord, axis=0)
    volume_names = tf.squeeze(volname, axis=0)

    loaded = tf.py_func(
        _load_from_numpylike, [coordinates, volume_names], [dtype],
        name=scope)[0]

loaded.set_shape([1] + list(label_size[::-1]) + [num_channels])
loaded


In [None]:
from ffn.utils import bounding_box

# If our coordinates are given as: 
coord = np.array([[128, 128, 128]])

starts = coord - start_offset
slc = bounding_box.BoundingBox(start=starts, size=label_size)# .ToSlice()
slc

In [None]:
labels = load_from_numpylike(
  coord, volname, label_size, label_volume_map)


labels


# train.get_batch

In [None]:
import six
import numpy as np


In [None]:
def _batch(iterable):
    for batch_vals in iterable:
        yield zip(*batch_vals)
        

In [None]:
def get_example():
    while True:
        for i in range(27): # offset duplicates
#             print(i, "it is i")
            predicted = np.full((1, 49, 49, 49, 1), 0)
            patches = np.full((1, 49, 49, 49, 1), 1)
            labels = np.full((1, 49, 49, 49, 1), 2)
            yield predicted, patches, labels
    

In [None]:
print(next(get_example())[0].shape) # seed array
print(next(get_example())[1].shape) # image array
print(next(get_example())[2].shape) # label array

In [None]:
six.moves.zip(*[get_example() for _ in range(batch_size)])

In [None]:
_batch(six.moves.zip(*[get_example() for _ in range(batch_size)]))

In [None]:
seeds, patches, labels = next(_batch(six.moves.zip(*[get_example() for _ in range(batch_size)])))

len(seeds)


In [None]:
def get_batch():
    batch_size = 8
    step = 0

    for seeds, patches, labels in _batch(six.moves.zip(*[get_example() for _ in range(batch_size)])): 

        batched_seeds = np.concatenate(seeds)
        print("seeds shape", len(seeds))

        yield (batched_seeds, np.concatenate(patches), np.concatenate(labels))
        
        for i in range(batch_size):
            print("batched_seeds[i, ...]", i,batched_seeds[i, ...].shape)
            seeds[i][:] = batched_seeds[i, ...]
            

In [None]:
seed, patches, labels = next(get_batch())
    

In [None]:
print(seed.shape)
print(patches.shape)
print(labels.shape)


In [None]:
tmp = []
for i, j, k in get_batch():
    print(i.shape, j.shape, k.shape)
#     tmp.append([i.shape, j.shape, k.shape])

# logit

In [None]:
from scipy.special import logit


In [None]:
p = 0.95

seed = np.full((1, 49, 49, 49, 1), 0.05)
seed[:, 24, 24, 24, :] = p
seed[:, 24, 24, 24, :]
logit_seed = logit(seed)
logit_seed[:, 24, 24, 24, :]
np.where(logit_seed == logit(0.95))
