In [2]:
import tensorflow as tf

In [3]:
tf.__version__

'1.15.2'

In [5]:
tf.enable_eager_execution()
tf.executing_eagerly()

True

In [24]:
sess = tf.Session()

In [8]:
import numpy as np

# TF.Example is message

In [9]:
# The following functions can be used to convert a value to a type compatible
# with tf.Example.

def _bytes_feature(value):
    """Returns a bytes_list from a string / byte."""
    if isinstance(value, type(tf.constant(0))):
        value = value.numpy() # BytesList won't unpack a string from an EagerTensor.
    return tf.train.Feature(bytes_list=tf.train.BytesList(value=[value]))

def _float_feature(value):
    """Returns a float_list from a float / double."""
    return tf.train.Feature(float_list=tf.train.FloatList(value=[value]))

def _int64_feature(value):
    """Returns an int64_list from a bool / enum / int / uint."""
    return tf.train.Feature(int64_list=tf.train.Int64List(value=[value]))

In [10]:
print(_bytes_feature(b'test_string'))
print(_bytes_feature(u'test_bytes'.encode('utf-8')))

print(_float_feature(np.exp(1)))

print(_int64_feature(True))
print(_int64_feature(1))

bytes_list {
  value: "test_string"
}

bytes_list {
  value: "test_bytes"
}

float_list {
  value: 2.7182817459106445
}

int64_list {
  value: 1
}

int64_list {
  value: 1
}



In [11]:
feature = _float_feature(np.exp(1))
print(feature)
print(feature.SerializeToString())

float_list {
  value: 2.7182817459106445
}

b'\x12\x06\n\x04T\xf8-@'


In [12]:
# The number of observations in the dataset.
n_observations = int(1e4)

# Boolean feature, encoded as False or True.
feature0 = np.random.choice([False, True], n_observations)

# Integer feature, random from 0 to 4.
feature1 = np.random.randint(0, 5, n_observations)

# String feature
strings = np.array([b'cat', b'dog', b'chicken', b'horse', b'goat'])
feature2 = strings[feature1]

# Float feature, from a standard normal distribution
feature3 = np.random.randn(n_observations)

In [13]:
def serialize_example(feature0, feature1, feature2, feature3):
    """
    Creates a tf.Example message ready to be written to a file.
    """
    # Create a dictionary mapping the feature name to the tf.Example-compatible
    # data type.
    feature = {
      'feature0': _int64_feature(feature0),
      'feature1': _int64_feature(feature1),
      'feature2': _bytes_feature(feature2),
      'feature3': _float_feature(feature3),
    }

    # Create a Features message using tf.train.Example.

    example_proto = tf.train.Example(features=tf.train.Features(feature=feature))
    return example_proto.SerializeToString()

In [14]:
# This is an example observation from the dataset.

example_observation = []

serialized_example = serialize_example(False, 4, b'goat', 0.9876)
serialized_example

b'\nR\n\x11\n\x08feature0\x12\x05\x1a\x03\n\x01\x00\n\x11\n\x08feature1\x12\x05\x1a\x03\n\x01\x04\n\x14\n\x08feature2\x12\x08\n\x06\n\x04goat\n\x14\n\x08feature3\x12\x08\x12\x06\n\x04[\xd3|?'

In [15]:
example_proto = tf.train.Example.FromString(serialized_example)
example_proto

features {
  feature {
    key: "feature0"
    value {
      int64_list {
        value: 0
      }
    }
  }
  feature {
    key: "feature1"
    value {
      int64_list {
        value: 4
      }
    }
  }
  feature {
    key: "feature2"
    value {
      bytes_list {
        value: "goat"
      }
    }
  }
  feature {
    key: "feature3"
    value {
      float_list {
        value: 0.9876000285148621
      }
    }
  }
}

# TFRecord

In [16]:
tf.data.Dataset.from_tensor_slices(feature1)

<DatasetV1Adapter shapes: (), types: tf.int64>

In [17]:
features_dataset = tf.data.Dataset.from_tensor_slices((feature0, feature1, feature2, feature3))

In [35]:
def f(features_dataset):
    # Use `take(1)` to only pull one example from the dataset.
    for f0,f1,f2,f3 in features_dataset.take(1):
        tf.print(f0)
        tf.print(f1)
        tf.print(f2)
        tf.print(f3)

In [36]:
g = tf.function(f)

In [37]:
g(features_dataset)

0
3
horse
0.19993429155434039


In [106]:
sess.run(g(features_dataset))

Tensor("args_1:0", shape=(), dtype=bool)
Tensor("args_2:0", shape=(), dtype=int64)
Tensor("args_3:0", shape=(), dtype=string)
Tensor("args_4:0", shape=(), dtype=float64)


array([[[1., 1.],
        [1., 1.]],

       [[1., 1.],
        [1., 1.]],

       [[1., 1.],
        [1., 1.]],

       [[1., 1.],
        [1., 1.]],

       [[1., 1.],
        [1., 1.]],

       [[1., 1.],
        [1., 1.]],

       [[1., 1.],
        [1., 1.]],

       [[1., 1.],
        [1., 1.]],

       [[1., 1.],
        [1., 1.]],

       [[1., 1.],
        [1., 1.]]], dtype=float32)

In [29]:
def tf_serialize_example(f0,f1,f2,f3):
    tf_string = tf.py_function(
        serialize_example,
        (f0,f1,f2,f3),  # pass these args to the above function.
        tf.string)      # the return type is `tf.string`.
    return tf.reshape(tf_string, ()) # The result is a scalar

In [74]:
def f(features_dataset):
    # Use `take(1)` to only pull one example from the dataset.
    f0, f1, f2, f3 = next(iter(features_dataset.take(1)))
    print(f0, f1, f2, f3)
    return tf_serialize_example(f0,f1,f2,f3)

In [75]:
g = tf.function(f)

In [76]:
g(features_dataset)

Tensor("IteratorGetNext:0", shape=(), dtype=bool) Tensor("IteratorGetNext:1", shape=(), dtype=int64) Tensor("IteratorGetNext:2", shape=(), dtype=string) Tensor("IteratorGetNext:3", shape=(), dtype=float64)


<tf.Tensor: id=297, shape=(), dtype=string, numpy=b'\nS\n\x11\n\x08feature0\x12\x05\x1a\x03\n\x01\x00\n\x11\n\x08feature1\x12\x05\x1a\x03\n\x01\x03\n\x15\n\x08feature2\x12\t\n\x07\n\x05horse\n\x14\n\x08feature3\x12\x08\x12\x06\n\x04\x93\xbbL>'>

In [79]:
serialized_features_dataset = features_dataset.map(tf_serialize_example)
serialized_features_dataset

<DatasetV1Adapter shapes: (), types: tf.string>

In [80]:
def generator():
    for features in features_dataset:
        yield serialize_example(*features)

# load TFReocrd

### tf.data.Dataset.from_tensor_slices()

In [9]:
tsr = tf.ones([10, 2], tf.float32)

dataset = tf.data.Dataset.from_tensor_slices(tsr)

In [10]:
dataset

<DatasetV1Adapter shapes: (2,), types: tf.float32>

In [8]:
dataset = dataset.flat_map(tf.data.TFRecordDataset)

ValueError: in converted code:
    relative to /opt/conda/lib/python3.7/site-packages/tensorflow_core/python:

    data/ops/readers.py:336 __init__
        filenames, compression_type, buffer_size, num_parallel_reads)
    data/ops/readers.py:296 __init__
        filenames = _create_or_validate_filenames_dataset(filenames)
    data/ops/readers.py:56 _create_or_validate_filenames_dataset
        filenames = ops.convert_to_tensor(filenames, dtype=dtypes.string)
    framework/ops.py:1184 convert_to_tensor
        return convert_to_tensor_v2(value, dtype, preferred_dtype, name)
    framework/ops.py:1242 convert_to_tensor_v2
        as_ref=False)
    framework/ops.py:1273 internal_convert_to_tensor
        (dtype.name, value.dtype.name, value))

    ValueError: Tensor conversion requested dtype string for Tensor with dtype float32: <tf.Tensor 'args_0:0' shape=(2,) dtype=float32>


# create TFRecord

In [5]:
mat = tf.ones([2, 2], tf.float32)
res = tf.norm(mat)

sess.run(res)

2.0

### TFReocrdWriter
- [tf.io](https://www.tensorflow.org/versions/r1.15/api_docs/python/tf/io/TFRecordWriter)
- Class tf.compat.v1.io.TFRecordWriter
- Class tf.compat.v1.python_io.TFRecordWriter
- Class tf.compat.v2.io.TFRecordWriter
- Class tf.python_io.TFRecordWriter

In [None]:
writer = tf.compat.v1.python_io.TFRecordWriter(output_filename) # tf.compat.v1.io.TFRecordWriter
"""
    갖고 있는 dataset 에서 하나씩 write 한다.
"""
writer.close()

In [16]:
data = np.ones((10,2,2), dtype=np.float32)

In [17]:
data

array([[[1., 1.],
        [1., 1.]],

       [[1., 1.],
        [1., 1.]],

       [[1., 1.],
        [1., 1.]],

       [[1., 1.],
        [1., 1.]],

       [[1., 1.],
        [1., 1.]],

       [[1., 1.],
        [1., 1.]],

       [[1., 1.],
        [1., 1.]],

       [[1., 1.],
        [1., 1.]],

       [[1., 1.],
        [1., 1.]],

       [[1., 1.],
        [1., 1.]]], dtype=float32)

In [18]:
tf_data = tf.convert_to_tensor(data)

In [19]:
tf_data.shape

TensorShape([Dimension(10), Dimension(2), Dimension(2)])

In [None]:
output_filename = '10x2x2.tfrecord'

writer = tf.compat.v1.python_io.TFRecordWriter(output_filename)
for idx, example in enumerate():
    try:
        writer.write(tf_data.SerializeToString())
    except ValueError:
        tf.logging.warning('Invalid example: %s, ignoring.', example)

writer.close()

In [None]:
writer = tf.compat.v1.python_io.TFRecordWriter(output_filename)

for idx, example in enumerate(examples):
    
    image_path = os.path.join(image_dir, example + '.jpg')
    label_path = os.path.join(label_dir, example + '.png')
    
    try:
        tf_example = dict_to_tf_example(image_path, label_path)
        writer.write(tf_example.SerializeToString())
    except ValueError:
        tf.logging.warning('Invalid example: %s, ignoring.', example)

writer.close()