## Import Packages, Environment Setting

In [1]:
import tensorflow as tf
import tensorflow_datasets as tfds
from colorama import Fore, Style

tfds.disable_progress_bar()

def prepend_tab(target):
    return f'{"":4}{target}'.replace('\n', f'\n{"":4}')

## Tensorflow Predefined Datasets

In [2]:
print(f'{Fore.RED}tfds.list_builders\n- List predefined dataset{Style.RESET_ALL}')
print(f'{tfds.list_builders()}')
print(f'{Fore.RED}\ntfds.load\n- Load dataset{Style.RESET_ALL}')
iris, info = tfds.load('iris', split='train', shuffle_files=True, with_info=True)
print(f'{Fore.MAGENTA}{"":4}DatasetInfo{Style.RESET_ALL}')
print(f'{prepend_tab(info)}')

[31mtfds.list_builders
- List predefined dataset[0m
['abstract_reasoning', 'aflw2k3d', 'amazon_us_reviews', 'bair_robot_pushing_small', 'bigearthnet', 'binarized_mnist', 'binary_alpha_digits', 'caltech101', 'caltech_birds2010', 'caltech_birds2011', 'cats_vs_dogs', 'celeb_a', 'celeb_a_hq', 'chexpert', 'cifar10', 'cifar100', 'cifar10_corrupted', 'clevr', 'cnn_dailymail', 'coco', 'coco2014', 'coil100', 'colorectal_histology', 'colorectal_histology_large', 'curated_breast_imaging_ddsm', 'cycle_gan', 'deep_weeds', 'definite_pronoun_resolution', 'diabetic_retinopathy_detection', 'downsampled_imagenet', 'dsprites', 'dtd', 'dummy_dataset_shared_generator', 'dummy_mnist', 'emnist', 'eurosat', 'fashion_mnist', 'flores', 'food101', 'gap', 'glue', 'groove', 'higgs', 'horses_or_humans', 'image_label_folder', 'imagenet2012', 'imagenet2012_corrupted', 'imdb_reviews', 'iris', 'kitti', 'kmnist', 'lfw', 'lm1b', 'lsun', 'mnist', 'mnist_corrupted', 'moving_mnist', 'multi_nli', 'nsynth', 'omniglot', 'ope

## Tensorflow DatasetInfo

In [3]:
print(f'{Fore.RED}info.name\n- Name of dataset{Style.RESET_ALL}')
print(f'{prepend_tab(info.name)}\n')
print(f'{Fore.RED}info.description\n- Description of dataset{Style.RESET_ALL}')
print(f'{prepend_tab(info.description)}')

[31minfo.name
- Name of dataset[0m
    iris

[31minfo.description
- Description of dataset[0m
    This is perhaps the best known database to be found in the pattern recognition
    literature. Fisher's paper is a classic in the field and is referenced
    frequently to this day. (See Duda & Hart, for example.) The data set contains
    3 classes of 50 instances each, where each class refers to a type of iris
    plant. One class is linearly separable from the other 2; the latter are NOT
    linearly separable from each other.
    


## Tensorflow Dataset Declaration

In [4]:
print(f'{Fore.RED}tf.data.Dataset.range')
print(f'- Create a dataset with scalar from 1 to n.{Style.RESET_ALL}')
dataset = tf.data.Dataset.range(3)
print(f'{prepend_tab([element.numpy() for element in dataset])}\n')
print(f'{Fore.RED}tf.data.Dataset.from_tensor_slices')
print(f'- Create a dataset from a tensor (the first axis will be the index for dataset)')
dataset = tf.data.Dataset.from_tensor_slices(tf.convert_to_tensor([[0, 1, 2], [3, 4, 5]]))
print(f'{Style.RESET_ALL}{prepend_tab([element.numpy() for element in dataset])}\n')

[31mtf.data.Dataset.range
- Create a dataset with scalar from 1 to n.[0m
    [0, 1, 2]

[31mtf.data.Dataset.from_tensor_slices
- Create a dataset from a tensor (the first axis will be the index for dataset)
[0m    [array([0, 1, 2]), array([3, 4, 5])]



## Tensorflow Dataset Operations

In [5]:
def negative_transform(x):
    x['negative_features'] = -1 * x['features']
    return x
iris_shuffled = iris.shuffle(10)
iris_example = iris_shuffled.take(10)
iris_batches = iris_example.batch(5)
print(f'{Fore.RED}dataset.batch\n- Create batches of samples with given batch size{Style.RESET_ALL}')
for i, batch in enumerate(iris_batches):
    print(f'{"":4}{Fore.MAGENTA}Features in batch {i + 1}{Style.RESET_ALL}')
    print(prepend_tab(batch['features'].numpy()))
    print(f'{"":4}{Fore.MAGENTA}Labels in batch {i + 1}{Style.RESET_ALL}')
    print(prepend_tab(batch['label'].numpy()))
print(f'{Fore.RED}\ndataset.filter\n- Filter based on specific condition{Style.RESET_ALL}')
iris_filtered = iris_example.filter(lambda x: tf.reduce_sum(x['features']) > 12.)
print(f'{"":4}{Fore.MAGENTA}Number of examples{Style.RESET_ALL}')
print(f'{"":4}{len(list(iris_filtered))}')
print(f'{Fore.RED}\ndataset.map\n- Map functions to every element in the dataset{Style.RESET_ALL}')
iris_modified = iris.take(3).map(negative_transform)
for element in iris_modified:
    print(f'{"":4}{element["negative_features"]}')
print(f'{Fore.RED}\niter\n- Get the iterators of the datasets{Style.RESET_ALL}')
print(f'{"":4}{iter(iris)}')
print(f'{Fore.RED}\ntf.data.DatasetSpec\n- Get the dataset specification{Style.RESET_ALL}')
print(f'{"":4}{tf.data.DatasetSpec(iris)}')

[31mdataset.batch
- Create batches of samples with given batch size[0m
    [35mFeatures in batch 1[0m
    [[6.5 3.2 5.1 2. ]
     [7.7 2.6 6.9 2.3]
     [6.2 2.2 4.5 1.5]
     [4.9 3.1 1.5 0.1]
     [6.3 3.3 4.7 1.6]]
    [35mLabels in batch 1[0m
    [2 2 1 0 1]
    [35mFeatures in batch 2[0m
    [[6.  2.9 4.5 1.5]
     [5.8 2.7 3.9 1.2]
     [6.8 2.8 4.8 1.4]
     [5.4 3.4 1.5 0.4]
     [5.6 2.9 3.6 1.3]]
    [35mLabels in batch 2[0m
    [1 1 1 0 1]
[31m
dataset.filter
- Filter based on specific condition[0m
    [35mNumber of examples[0m
    7
[31m
dataset.map
- Map functions to every element in the dataset[0m
    [-6.1 -2.8 -4.7 -1.2]
    [-5.7 -3.8 -1.7 -0.3]
    [-7.7 -2.6 -6.9 -2.3]
[31m
iter
- Get the iterators of the datasets[0m
    <tensorflow.python.data.ops.iterator_ops.OwnedIterator object at 0x00000168CCA6C988>
[31m
tf.data.DatasetSpec
- Get the dataset specification[0m
    DatasetSpec(<_OptionsDataset shapes: {features: (4,), label: ()}, types: {feature