
Practical Coding in TensorFlow 2.0
- https://towardsdatascience.com/practical-coding-in-tensorflow-2-0-fafd2d3863f6

In [36]:
import tensorflow as tf
tf.__version__

'2.1.0'

## Datasets in TF2

In [37]:
import numpy as np
np.random.seed(0)

data = np.random.randn(256, 8, 8, 3)
print(data.shape)
dataset = tf.data.Dataset.from_tensor_slices(data)
print(dataset)

(256, 8, 8, 3)
<TensorSliceDataset shapes: (8, 8, 3), types: tf.float64>


In [38]:
for i, batch in enumerate(dataset):
    if i == 255 or i == 256:
        print(i, batch.shape)

255 (8, 8, 3)


In [39]:
for i, batch in dataset.enumerate():
    if i == 255 or i == 256:
        print(i, batch.shape)
        print(i.numpy(), batch.shape)

tf.Tensor(255, shape=(), dtype=int64) (8, 8, 3)
255 (8, 8, 3)


In [40]:
for i, batch in dataset.repeat(3).enumerate():
    if i == 255 or i == 256:
        print(i.numpy(), batch.shape)

255 (8, 8, 3)
256 (8, 8, 3)


In [41]:
for i, batch in dataset.repeat(3).enumerate():
    if i == 255 or i == 256:
        print(i.numpy(), batch.shape)

255 (8, 8, 3)
256 (8, 8, 3)


In [42]:
# for i, batch in dataset.repeat().enumerate():
#     if i == 255 or i == 256:
#         print(i.numpy(), batch.shape)

In [43]:
for batch in dataset.take(3):
    print(batch.shape)


(8, 8, 3)
(8, 8, 3)
(8, 8, 3)


In [44]:
dataset = dataset.batch(16)

for batch in dataset.take(3):
    print(batch.shape)

(16, 8, 8, 3)
(16, 8, 8, 3)
(16, 8, 8, 3)


In [45]:
dataset = tf.data.Dataset.from_tensor_slices(np.arange(19))

for batch in dataset.batch(5):
    print(batch)

tf.Tensor([0 1 2 3 4], shape=(5,), dtype=int64)
tf.Tensor([5 6 7 8 9], shape=(5,), dtype=int64)
tf.Tensor([10 11 12 13 14], shape=(5,), dtype=int64)
tf.Tensor([15 16 17 18], shape=(4,), dtype=int64)


In [46]:
for batch in dataset.shuffle(5).batch(5):
    print(batch)

tf.Tensor([4 2 0 1 7], shape=(5,), dtype=int64)
tf.Tensor([ 9  3 11 10  6], shape=(5,), dtype=int64)
tf.Tensor([ 8 13 14 12 15], shape=(5,), dtype=int64)
tf.Tensor([17 18 16  5], shape=(4,), dtype=int64)


In [47]:
for batch in dataset.batch(5).shuffle(5):
    print(batch)

tf.Tensor([0 1 2 3 4], shape=(5,), dtype=int64)
tf.Tensor([5 6 7 8 9], shape=(5,), dtype=int64)
tf.Tensor([10 11 12 13 14], shape=(5,), dtype=int64)
tf.Tensor([15 16 17 18], shape=(4,), dtype=int64)


In [48]:
def transform(data):
    mean = tf.reduce_mean(data)
    return  data - mean

for batch in dataset.shuffle(5).batch(5):
    print(batch)

for batch in dataset.shuffle(5).batch(5).map(transform):
    print(batch)

tf.Tensor([0 5 4 2 3], shape=(5,), dtype=int64)
tf.Tensor([ 6  8 11  9 13], shape=(5,), dtype=int64)
tf.Tensor([12  7  1 15 16], shape=(5,), dtype=int64)
tf.Tensor([17 18 10 14], shape=(4,), dtype=int64)
tf.Tensor([ 1  2 -1  3 -2], shape=(5,), dtype=int64)
tf.Tensor([-4  3  4  1  0], shape=(5,), dtype=int64)
tf.Tensor([ 0 -3 -1  3  4], shape=(5,), dtype=int64)
tf.Tensor([ 0  3  4 -4], shape=(4,), dtype=int64)


In [49]:
for batch in dataset.batch(1):
    print(batch)


tf.Tensor([0], shape=(1,), dtype=int64)
tf.Tensor([1], shape=(1,), dtype=int64)
tf.Tensor([2], shape=(1,), dtype=int64)
tf.Tensor([3], shape=(1,), dtype=int64)
tf.Tensor([4], shape=(1,), dtype=int64)
tf.Tensor([5], shape=(1,), dtype=int64)
tf.Tensor([6], shape=(1,), dtype=int64)
tf.Tensor([7], shape=(1,), dtype=int64)
tf.Tensor([8], shape=(1,), dtype=int64)
tf.Tensor([9], shape=(1,), dtype=int64)
tf.Tensor([10], shape=(1,), dtype=int64)
tf.Tensor([11], shape=(1,), dtype=int64)
tf.Tensor([12], shape=(1,), dtype=int64)
tf.Tensor([13], shape=(1,), dtype=int64)
tf.Tensor([14], shape=(1,), dtype=int64)
tf.Tensor([15], shape=(1,), dtype=int64)
tf.Tensor([16], shape=(1,), dtype=int64)
tf.Tensor([17], shape=(1,), dtype=int64)
tf.Tensor([18], shape=(1,), dtype=int64)


In [50]:
dataset.shuffle(5).batch(5).prefetch(buffer_size= tf.data.experimental.AUTOTUNE)

<PrefetchDataset shapes: (None,), types: tf.int64>

## Making TFRecord file for images

In [51]:
print("test")

test


In [59]:
imgs = (np.random.randn(256, 8, 8, 3) * 255).astype(np.uint8)
acts = np.random.randn(256, 4).astype(np.float32)

In [71]:
print("imgs: ", imgs.shape)
print("acts: ", acts.shape)

imgs:  (256, 8, 8, 3)
acts:  (256, 4)


In [61]:
def _bytes_feature(value):
  
    if isinstance(value, type(tf.constant(0))):
        value = value.numpy()
    return tf.train.Feature(bytes_list=tf.train.BytesList(value=[value]))

In [62]:
def serialize_example(image, action):
    image = tf.image.encode_png(image)
    feature = {
        'image': _bytes_feature(image),
        'action' : _bytes_feature(tf.io.serialize_tensor(action)),
    }
    return tf.train.Example(features=tf.train.Features(feature=feature)).SerializeToString()

In [64]:
with tf.io.TFRecordWriter('test.tfrecord') as writer:
    for xi, ai in zip(imgs, acts):
        example = serialize_example(xi, ai)
        writer.write(example)

## Read TFRecord File

In [68]:
def parse_image_function (example_proto):
    image_feature_description = {
        'image': tf.io.FixedLenFeature([], tf.string),
        'action' : tf.io.FixedLenFeature([], tf.string),
    }
    
    return tf.io.parse_single_example(example_proto, image_feature_description)

def decode_image_function(record):
    record['image'] = tf.cast(tf.image.decode_image(record['image']), tf.float32) / 255.
    record['action'] = tf.io.parse_tensor(record['action'], out_type=tf.float32)
    return record['image'], record['action']

In [74]:
dataset = tf.data.TFRecordDataset('test.tfrecord')
dataset = dataset.map(parse_image_function, num_parallel_calls=tf.data.experimental.AUTOTUNE)
dataset = dataset.map(decode_image_function, num_parallel_calls=tf.data.experimental.AUTOTUNE)


In [76]:
for img, act in dataset.batch(64):
    print(img.shape, act.shape)

(64, 8, 8, 3) (64, 4)
(64, 8, 8, 3) (64, 4)
(64, 8, 8, 3) (64, 4)
(64, 8, 8, 3) (64, 4)


In [73]:
for img, act in dataset.batch(16).take(3):
    print(img.shape, act.shape)

(16, 8, 8, 3) (16, 4)
(16, 8, 8, 3) (16, 4)
(16, 8, 8, 3) (16, 4)
