In [1]:
import numpy as np
import pandas as pd
import tensorflow as tf
from tqdm import tqdm

In [2]:
labels_df = pd.read_csv('../data/labels.csv')

In [3]:
import skimage.io as sio
import os
import os.path as op
import fnmatch

In [4]:
cell_images = []
for cell_file in labels_df['file']:
    cell_images.append(sio.imread(op.join('../data/cells/', cell_file)))

img_data = np.array(cell_images)

In [5]:
img_data.shape

(844, 256, 256, 3)

In [6]:
labels_data = labels_df['label']

In [7]:
one_hot_labels = np.zeros((labels_data.shape[0], len(np.unique(labels_data))))
one_hot_labels[np.arange(labels_data.shape[0]), labels_data] = 1

In [8]:
import os.path as op
tfrecords_train_file = op.join('../data', 'cells_train.tfrecords')
tfrecords_test_file = op.join('../data', 'cells_test.tfrecords')

In [9]:
idx = np.arange(img_data.shape[0])

# one MUST randomly shuffle data before putting it into one of these
# formats. Without this, one cannot make use of tensorflow's great
# out of core shuffling.

np.random.shuffle(idx)

prop_train = 0.8

train_idx = idx[:int(prop_train*idx.shape[0])]
test_idx = idx[int(prop_train*idx.shape[0]):]

In [10]:
def _int64_feature(value):
      return tf.train.Feature(int64_list=tf.train.Int64List(value=[value]))


def _bytes_feature(value):
      return tf.train.Feature(bytes_list=tf.train.BytesList(value=[value]))

    

def write_tfrecords(img_data, labels_data, fname, idx):
    writer = tf.python_io.TFRecordWriter(fname)
    # iterate over each example
    # wrap with tqdm for a progress bar
    for example_idx in tqdm(idx):
        
        image = img_data[example_idx]
        label = labels_data[example_idx]
        rows = image.shape[0]
        cols = image.shape[1]
        depth = image.shape[2]
        image_raw = image.tostring()
        # construct the Example proto object
        example = tf.train.Example(
            # Example contains a Features proto object
            features=tf.train.Features(feature={
            # Features contains a map of string to Feature proto objects
                'image/height': _int64_feature(rows),
                'image/width': _int64_feature(cols), 
                'image/depth': _int64_feature(depth),
                'label': _int64_feature(int(label)),
                'image/raw': _bytes_feature(image_raw)}))
                
        # use the proto object to serialize the example to a string
        serialized = example.SerializeToString()
        # write the serialized object to disk
        writer.write(serialized)

    writer.close()


In [11]:
write_tfrecords(img_data, labels_data, tfrecords_train_file, train_idx)

100%|██████████| 675/675 [00:01<00:00, 382.94it/s]


In [12]:
write_tfrecords(img_data, labels_data, tfrecords_test_file, test_idx)

100%|██████████| 169/169 [00:00<00:00, 501.93it/s]
