<small>
Copyright (c) 2017 Andrew Glassner

Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions:

The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software.

THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
</small>



# Deep Learning From Basics to Practice
## by Andrew Glassner, https://dlbasics.com, http://glassner.com
------
## Chapter 23: Keras
### Notebook 12: Synthetic Data

In [None]:
from keras.datasets import mnist
from keras.models import Sequential
from keras.layers.core import Dense, Dropout, Activation, Flatten
from keras.layers.convolutional import Conv2D, MaxPooling2D
from keras.constraints import maxnorm
from keras.optimizers import Adam, SGD, RMSprop
from keras.preprocessing.image import ImageDataGenerator
from keras.utils import np_utils
from sklearn.pipeline import make_pipeline, Pipeline
from sklearn.model_selection import GridSearchCV
from keras.wrappers.scikit_learn import KerasClassifier
from keras.callbacks import EarlyStopping
import numpy as np
import matplotlib.pyplot as plt

from keras.utils import np_utils
import numpy as np

random_seed = 42
np.random.seed(random_seed)

image_size = 64
image_width = image_height = image_size
number_of_classes = 5

from keras import backend as keras_backend
keras_backend.set_image_data_format('channels_last')

In [None]:
# Make a File_Helper for saving and loading files.

save_files = True

import os, sys, inspect
current_dir = os.path.dirname(os.path.abspath(inspect.getfile(inspect.currentframe())))
sys.path.insert(0, os.path.dirname(current_dir)) # path to parent dir
from DLBasics_Utilities import File_Helper
file_helper = File_Helper(save_files)

In [None]:
# get MNIST data to show a block of transformed images
(X_train, y_train), (X_test, y_test) = mnist.load_data()
X_train = X_train.reshape(X_train.shape[0], 28, 28, 1)
X_test = X_test.reshape(X_test.shape[0], 28, 28, 1)
X_train = keras_backend.cast_to_floatx(X_train)
X_test = keras_backend.cast_to_floatx(X_test)

# Use just one image 
X_train = np.reshape(8 * [X_train[5]], (8, 28, 28, 1))
y_train = 8 * [y_train[5]]

image_generator = ImageDataGenerator(rotation_range=100, horizontal_flip=True)
image_generator.fit(X_train)

for X_batch, y_batch in image_generator.flow(X_train, y_train, batch_size=8, seed=42):
    for i in range(0, 8):
        plt.subplot(2, 4, i+1)
        plt.imshow(X_batch[i].reshape(28, 28), cmap='gray')
        plt.xticks([],[])
        plt.yticks([],[])
    break
plt.tight_layout()
file_helper.save_figure('MNIST-2-IDG')
plt.show()

In [None]:
def plot_accuracy_and_loss(history, plot_title, filename):
    xs = range(len(history.history['acc']))

    plt.figure(figsize=(10,3))
    plt.subplot(1, 2, 1)
    plt.plot(xs, history.history['acc'], label='train')
    plt.plot(xs, history.history['val_acc'], label='test')
    plt.legend(loc='lower left')
    plt.xlabel('epochs')
    plt.ylabel('accuracy')
    plt.title(plot_title+', Accuracy')

    plt.subplot(1, 2, 2)
    plt.plot(xs, history.history['loss'], label='train')
    plt.plot(xs, history.history['val_loss'], label='test')
    plt.legend(loc='upper left')
    plt.xlabel('epochs')
    plt.ylabel('loss')
    plt.title(plot_title+', Loss')

    #plt.tight_layout()
    file_helper.save_figure(filename)
    plt.show()

In [None]:
# Make synthetic data. Use random numbers to move the
# points around a little so they're all different.
# For some reason I thought of this as "wubbling."
import numpy as np
from numpy.random import randint, uniform
from keras.preprocessing.image import img_to_array
import cv2
import math
    
def makeSyntheticImage():
    # Create a black image
    half_size = int(image_size/2.0)
    img = np.zeros((image_size, image_size, 3), np.uint8)
    img_type = randint(0, number_of_classes)
    if img_type == 0: # circle
        cx = 32
        cy = 32
        r = half_size * uniform(.6, .9)
        cv2.circle(img, (wub(cx), wub(cy)), int(wub(r)), (255,255,255), 2)
    elif img_type == 1: # plus sign
        cv2.line(img, (wub(32), wub(10)),(wub(32), wub(54)), (255,255,255), 2)
        cv2.line(img, (wub(10), wub(32)),(wub(60), wub(32)), (255,255,255), 2)
    elif img_type == 2: # three  lines
        cv2.line(img,(wub(15), wub(10)), (wub(15), wub(54)), (255,255,255), 2)
        cv2.line(img,(wub(33), wub(10)), (wub(33), wub(54)), (255,255,255), 2)
        cv2.line(img,(wub(51), wub(10)), (wub(51), wub(54)), (255,255,255), 2)
    elif img_type == 3: # Z
        x1 = wub(54)
        y1 = wub(10)
        x2 = wub(10)
        y2 = wub(54)
        cv2.line(img, (wub(10), wub(10)), (x1,y1), (255,255,255), 2)
        cv2.line(img, (x1, y1), (x2, y2), (255, 255, 255), 2)
        cv2.line(img, (x2, y2), (wub(54), wub(54)), (255, 255, 255), 2)
    else: # U
        x1 = wub(10)
        y1 = wub(54)
        x2 = wub(54)
        y2 = wub(54)
        cv2.line(img, (wub(10), wub(10)), (x1,y1), (255,255,255), 2)
        cv2.line(img, (x1, y1), (x2, y2), (255, 255, 255), 2)
        cv2.line(img, (x2, y2), (wub(54), wub(10)), (255, 255, 255), 2)
    sample = img_to_array(img)
    sample = sample[:,:,0]/255.0
    sample = sample.reshape((sample.shape[0], sample.shape[1], 1))
    return (sample, img_type)

# create a little wubble (a uniform, or symmetrical, wobble)
def wub(p):
    range = 5
    return randint(p-range, p+range+1)

In [None]:
# Show a grid of random synthetic images
np.random.seed(5)
num_rows = 5
num_columns = 10
plt.figure(figsize=(10,6))
for y in range(num_rows):
    for x in range(num_columns):
        index = (y*num_columns)+x
        plt.subplot(num_rows, num_columns, 1 + index)
        (img, label) = makeSyntheticImage()
        img = img.reshape(64, 64)
        plt.imshow(img, cmap=plt.get_cmap('gray'))
        plt.xticks([],[])
        plt.yticks([],[])
plt.tight_layout()
file_helper.save_figure('synthetic-demo')
plt.show()

In [None]:
# build the ImageDataGenerator
# adapted from
#   https://www.kaggle.com/hexietufts/ultrasound-nerve-segmentation/easy-to-use-keras-imagedatagenerator
    
import numpy as np
import os
from numpy.random import randint, uniform
import cv2
from keras.preprocessing.image import ImageDataGenerator, array_to_img, img_to_array, load_img

from keras.preprocessing.image import Iterator, NumpyArrayIterator

class SyntheticImageGenerator(ImageDataGenerator):
    def __init__(self, batch_size=1, shuffle=True, seed=None):
        X = np.zeros(shape=(1, 64, 64, 1)) # 1 instance, of shape (64,64) with 1 channel
        self.batch_size = batch_size
        super(SyntheticImageGenerator, self).__init__(X.shape[0], batch_size, shuffle, seed)
    
    # this is why we're here - override the old flow()
    def flow(self, X, y=None, batch_size=32, shuffle=True, seed=None,
             save_to_dir=None, save_prefix='', save_format='jpeg'):
        return SyntheticIterator(
            X, y, self,
            batch_size=batch_size, shuffle=shuffle, seed=seed,
            save_to_dir=save_to_dir, save_prefix=save_prefix, save_format=save_format)
    
class SyntheticIterator(NumpyArrayIterator):

    def __init__(self, X, y, image_data_generator,
                 batch_size=32, shuffle=False, seed=None,
                 save_to_dir=None, save_prefix='', save_format='jpeg'):
        if len(X) != len(y):
            raise Exception('X (images tensor) and y (labels) '
                            'should have the same length. '
                            'Found: X.shape = %s, y.shape = %s' % (np.asarray(X).shape, np.asarray(y).shape))
        self.X = X
        self.y = y
        self.image_data_generator = image_data_generator
        self.save_to_dir = save_to_dir
        self.save_prefix = save_prefix
        self.save_format = save_format
        super(NumpyArrayIterator, self).__init__(X.shape[0], batch_size, shuffle, seed)

    def next(self):
        # for python 2.x.
        # Keeps under lock only the mechanism which advances
        # the indexing of each batch
        # see http://anandology.com/blog/using-iterators-and-generators/
        with self.lock:
            index_array, current_index, current_batch_size = next(self.index_generator)
        # The transformation of images is not under thread lock so it can be done in parallel
        batch_x = np.zeros(tuple([current_batch_size] + list(self.X.shape)[1:]))
        batch_y = np.zeros(tuple([current_batch_size] + list(self.y.shape)[1:]))
        for i, j in enumerate(index_array):
            (sample, label) = self._drawImage()
            batch_x[i] = sample
            batch_y[i] = np_utils.to_categorical([label], number_of_classes)

        if self.save_to_dir:
            for i in range(current_batch_size):
                img = array_to_img(batch_x[i], scale=True)
                fname = '{prefix}_{index}_{hash}.{format}'.format(prefix=self.save_prefix,
                                                                  index=current_index + i,
                                                                  hash=np.random.randint(1e4),
                                                                  format=self.save_format)
                img.save(os.path.join(self.save_to_dir, fname))
        return batch_x, batch_y
    
    
    def _drawImage(self):
        return makeSyntheticImage()

In [None]:
# Make a dataset so that we have something to test again when fitting,
# and come up with a validation accuracy and loss.
def make_dataset(number_of_images):
    X = np.zeros(shape=(number_of_images, image_height, image_width, 1))
    y = np.zeros(shape=(number_of_images), dtype='uint8')
    for i in range(number_of_images):
        (sample, label) = makeSyntheticImage()
        X[i] = sample
        y[i] = label
    return (X, y)

In [None]:
# A little routine to set up and run the learning process
def generator_run_and_report(model, plot_title, filename, epochs, batch_size, verbosity, steps_per_epoch):
    np.random.seed(random_seed)
    
    # make validation data
    (X_test, y_test) = make_dataset(10*batch_size)
    y_test = np_utils.to_categorical(y_test, number_of_classes)
    
    datagen = SyntheticImageGenerator()
    dummy_X_train = np.zeros(shape=(batch_size, image_height, image_width, 1))
    dummy_y_train = np.zeros(shape=(batch_size, number_of_classes))
    history = model.fit_generator(datagen.flow(dummy_X_train, dummy_y_train, batch_size=batch_size, 
                                               save_to_dir="flowdir"),
                                  steps_per_epoch=steps_per_epoch, 
                                  epochs=epochs, verbose=verbosity,
                                  validation_data=(X_test, y_test)
                                 )
    plot_accuracy_and_loss(history, plot_title, filename)
    return history

In [None]:
# build and return our little CNN
def make_model():
    model = Sequential()
    model.add(Conv2D(32, (3, 3), activation='relu', padding='same',
                     input_shape=(image_height, image_width, 1)))
    model.add(Flatten())
    model.add(Dense(number_of_classes, activation='softmax')) 
    model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=["accuracy"])
    return model

In [None]:
np.random.seed(random_seed)

model = make_model()

# steps_per_epoch / batch_size must be an integer (power of 2?) or we get a warning
history = generator_run_and_report(model, 'Synthetic CNN', 'Synthetic-CNN',
                                   epochs=100, batch_size=128, 
                                   verbosity=1, steps_per_epoch = 256)