* Data pipeline for the image ETL

In [1557]:
from __future__ import absolute_import, division, print_function
import numpy as np
import tensorflow as tf
tf.enable_eager_execution()
print(tf.VERSION)
AUTOTUNE = tf.contrib.data.AUTOTUNE

1.11.0


In [1558]:
import pathlib
data_root = pathlib.Path('./data/train/')
print(data_root)
for item in data_root.iterdir():
  print(item)

data/train
data/train/barramundi
data/train/snapper
data/train/bream


In [1559]:
import random
all_image_paths = list(data_root.glob('*/*'))
all_image_paths = [str(path) for path in all_image_paths]
random.shuffle(all_image_paths)

image_count = len(all_image_paths)
image_count


40

In [1560]:
label_names = sorted(item.name for item in data_root.glob('*/') if item.is_dir())
label_names

['barramundi', 'bream', 'snapper']

In [1561]:
label_to_index = dict((name, index) for index,name in enumerate(label_names))
label_to_index

{'barramundi': 0, 'bream': 1, 'snapper': 2}

In [1562]:
all_image_labels = [label_to_index[pathlib.Path(path).parent.name]
                    for path in all_image_paths]

print("First 10 labels indices: ", all_image_labels[:6])

First 10 labels indices:  [0, 2, 1, 2, 1, 2]


In [1563]:
# import IPython.display as display
# for n in range(2):
#   image_path = random.choice(all_image_paths)
#   display.display(display.Image(image_path))
# #   print(caption_image(image_path))
#   print()

The preprocessing include a resize action. Currently we are using squash method - ignore the ration and turn it to square. Please check [this article](https://forums.fast.ai/t/impact-of-image-resizing-on-model-training-time-and-performance/1980) to see the comparison amoung the different methods of resizing

In [1564]:
def preprocess_image(image):
  image = tf.image.decode_jpeg(image, channels=3)
  image = tf.image.resize_images(image, [128, 128])
  image /= 255.0  # normalize to [0,1] range

  return image

In [1565]:
def load_and_preprocess_image(path):
  image = tf.read_file(path)
  return preprocess_image(image)

Take a look what happen after the image processing.

In [1566]:
# import matplotlib.pyplot as plt

# image_path = all_image_paths[1]
# label = all_image_labels[1]

# load_and_preprocess_image(image_path)

# plt.imshow(load_and_preprocess_image(image_path))
# plt.grid(False)
# plt.title(label_names[label].title())
# plt.show()

Start preparing the dataset

In [1567]:
path_ds = tf.data.Dataset.from_tensor_slices(all_image_paths)

In [1568]:
image_ds = path_ds.map(load_and_preprocess_image, num_parallel_calls=AUTOTUNE)
image_ds
label_ds = tf.data.Dataset.from_tensor_slices(tf.cast(all_image_labels, tf.int64))
label_ds

<TensorSliceDataset shapes: (), types: tf.int64>

In [1569]:
# image_label_ds = tf.data.Dataset.zip((image_ds, label_ds))
# BATCH_SIZE = 32

# # Setting a shuffle buffer size as large as the dataset ensures that the data is
# # completely shuffled.
# ds = image_label_ds.shuffle(buffer_size=image_count)
# ds = ds.repeat()
# ds = ds.batch(BATCH_SIZE)
# # `prefetch` lets the dataset fetch batches, in the background while the model is training.
# ds = ds.prefetch(buffer_size=AUTOTUNE)
# ds

In [1570]:
train_images = []
train_labels = []
for image_path in all_image_paths:
    train_images.append(load_and_preprocess_image(image_path))
for label_item in tf.cast(all_image_labels, tf.int64):
    train_labels.append(label_item)
ds=tf.data.Dataset.from_tensor_slices((train_images, train_labels)).batch(4).repeat(10)
    

Now Make the model ready

In [1571]:
# BATCH_SIZE = 32

# # Setting a shuffle buffer size as large as the dataset ensures that the data is
# # completely shuffled.
# ds = image_label_ds.shuffle(buffer_size=image_count)
# ds = ds.repeat()
# ds = ds.batch(BATCH_SIZE)
# # `prefetch` lets the dataset fetch batches, in the background while the model is training.
# ds = ds.prefetch(buffer_size=AUTOTUNE)
# ds

In [1572]:
# from tensorflow import keras
# model = keras.Sequential([
#     keras.layers.Flatten(input_shape=(128, 128, 3)),
#     keras.layers.Dense(128, activation=tf.nn.relu),
#     keras.layers.Dense(3, activation=tf.nn.softmax)
# ])

IMG_SIZE = 32
model = keras.Sequential()
model.add(keras.layers.Conv2D(32, kernel_size = (3, 3), activation='relu', input_shape=(128, 128, 3)))
model.add(keras.layers.MaxPooling2D(pool_size=(2,2)))
model.add(keras.layers.BatchNormalization())
model.add(keras.layers.Conv2D(64, kernel_size=(3,3), activation='relu'))
model.add(keras.layers.MaxPooling2D(pool_size=(2,2)))
model.add(keras.layers.BatchNormalization())
model.add(keras.layers.Conv2D(64, kernel_size=(3,3), activation='relu'))
model.add(keras.layers.MaxPooling2D(pool_size=(2,2)))
model.add(keras.layers.BatchNormalization())
model.add(keras.layers.Conv2D(96, kernel_size=(3,3), activation='relu'))
model.add(keras.layers.MaxPooling2D(pool_size=(2,2)))
model.add(keras.layers.BatchNormalization())
model.add(keras.layers.Conv2D(32, kernel_size=(3,3), activation='relu'))
model.add(keras.layers.MaxPooling2D(pool_size=(2,2)))
model.add(keras.layers.BatchNormalization())
model.add(keras.layers.Dropout(0.2))
model.add(keras.layers.Flatten())
model.add(keras.layers.Dense(128, activation='relu'))
model.add(keras.layers.Dropout(0.3))
model.add(keras.layers.Dense(3, activation = 'softmax'))

In [1573]:
model.compile(optimizer=tf.train.AdamOptimizer(), 
              loss=tf.keras.losses.sparse_categorical_crossentropy,
              metrics=["accuracy"])

In [1574]:
model.fit(ds, epochs=50, steps_per_epoch=1)

Epoch 1/50
Epoch 2/50
Epoch 3/50
Epoch 4/50
Epoch 5/50
Epoch 6/50
Epoch 7/50
Epoch 8/50
Epoch 9/50
Epoch 10/50
Epoch 11/50
Epoch 12/50
Epoch 13/50
Epoch 14/50
Epoch 15/50
Epoch 16/50
Epoch 17/50
Epoch 18/50
Epoch 19/50
Epoch 20/50
Epoch 21/50
Epoch 22/50
Epoch 23/50
Epoch 24/50
Epoch 25/50
Epoch 26/50
Epoch 27/50
Epoch 28/50
Epoch 29/50
Epoch 30/50
Epoch 31/50
Epoch 32/50
Epoch 33/50
Epoch 34/50
Epoch 35/50
Epoch 36/50
Epoch 37/50
Epoch 38/50
Epoch 39/50
Epoch 40/50
Epoch 41/50
Epoch 42/50
Epoch 43/50
Epoch 44/50
Epoch 45/50
Epoch 46/50
Epoch 47/50
Epoch 48/50
Epoch 49/50
Epoch 50/50


<tensorflow.python.keras.callbacks.History at 0x12515e790>

In [1575]:
test_loss, test_acc = model.evaluate(ds, steps=1)
print('Test loss:', test_loss)
print('Test accuracy:', test_acc)

Test loss: 1.2113207578659058
Test accuracy: 0.0
