In [1]:
import tensorflow as tf
import tensorflow_docs as tfdocs
tf.keras.backend.clear_session()
print('Tensorflow version: %s' % str(tf.__version__))

import sys
import os
if sys.platform.startswith('darwin'):
    print('Enable multiple instances of OpenMP on macOS')
    os.environ['KMP_DUPLICATE_LIB_OK']='True'
print()

%matplotlib inline
import matplotlib.pyplot as plt
import seaborn as sns
import IPython
print('Prepared data visualization')
print()

import numpy as np
import pandas as pd
import pathlib
import shutil
print('Prepared data manipulation, numpy version: ', str(np.__version__))

Tensorflow version: 2.0.0
Enable multiple instances of OpenMP on macOS

Prepared data visualization

Prepared data manipulation, numpy version:  1.19.1


In [2]:
print('Process images')
img_root = pathlib.Path(tf.keras.utils.get_file(
    origin='https://storage.googleapis.com/download.tensorflow.org/example_images/flower_photos.tgz',
    fname='flower_photos', untar=True))
print('Load flower images to ', img_root)

Process images
Load flower images to  /Users/hzhou/.keras/datasets/flower_photos


In [3]:
print('Setup list of all flower images')
images = [str(path) for path in list(img_root.glob('*/*'))]
print('Count of images: {}'.format(len(images)))
print()

print('Shuffle images')
import random
random.shuffle(images)

Setup list of all flower images
Count of images: 3670

Shuffle images


In [4]:
print('Load license info of images')
image_licenses = {}
with open(img_root/'LICENSE.txt', encoding='utf-8') as f:
    for line in f.readlines():
        parts = line.split(' CC-BY ')
        if len(parts) == 2:
            image_licenses[parts[0]] = ' - '.join(parts[1].split(' - ')[:-1])
print('Count of license info: {}'.format(len(image_licenses)))

Load license info of images
Count of license info: 3670


In [5]:
for n in range(3):
    image_path = random.choice(images)
    image_ref = str(pathlib.Path(image_path).relative_to(img_root))
    if image_ref in image_licenses:
        print('Photo ' + image_licenses[image_ref])
    else:
        print('No license info for ' + image_path)
    IPython.display.display(IPython.display.Image(image_path))

Photo by Ramón Portellano


<IPython.core.display.Image object>

Photo by Maggie McCain


<IPython.core.display.Image object>

Photo by Sean McMenemy


<IPython.core.display.Image object>

In [6]:
print('Prepare image labels')
flower_types = sorted(item.name for item in img_root.glob('*/') if item.is_dir())
flower_type_indexes = dict((name, index) for index, name in enumerate(flower_types))
print('Flower type indexes: ', end='')
print(flower_type_indexes)
image_labels = [flower_type_indexes[pathlib.Path(path).parent.name] for path in images]

Prepare image labels
Flower type indexes: {'daisy': 0, 'dandelion': 1, 'roses': 2, 'sunflowers': 3, 'tulips': 4}


In [26]:
print('Prepare image dataset')

# function to process image into fixed size data
def preprocess_image(image, size=[192,192]):
    image = tf.image.decode_jpeg(image, channels=3)
    image = tf.image.resize(image, size)
    image /= 255.0  # normalize to [0,1] range
    return image

# function to load and pre-process image file
def load_and_preprocess_image(path):
    image = tf.io.read_file(path)
    return preprocess_image(image)

# function to parse path into data
def load_and_preprocess_from_path_label(path, label):
    return load_and_preprocess_image(path), label

# function to prepare image dataset
def generate_image_dataset(image_paths, image_labels, repeat_cnt = None, batch_size = 32):
    image_count = len(image_paths)
    if image_count == 0:
        print('No image input')
        return tf.data.Dataset.range(0)
    if len(image_labels) != image_count:
        print('Sizes of images and labels are not match')
        return tf.data.Dataset.range(0)
    image_path_ds = tf.data.Dataset.from_tensor_slices((images, image_labels))
    image_ds = image_path_ds.map(load_and_preprocess_from_path_label)
    image_ds = image_ds.shuffle(buffer_size=image_count).repeat(repeat_cnt).batch(batch_size)
    return image_ds

image_ds = generate_image_dataset(images, image_labels)
image_ds

Prepare image dataset


<BatchDataset shapes: ((None, 192, 192, 3), (None,)), types: (tf.float32, tf.int32)>

In [25]:
tf.data.experimental.AUTOTUNE

-1