# Data augmentation notebook

----------------------
This notebook outlines primary steps to utilize augmentation as part of the keras workflow. The later parts of the notebook mostly 
consist of checks and plots

To do for later use:

- Test different augmentation strategies
- Include the augmentation into the general workflow

In [None]:
from keras.preprocessing.image import ImageDataGenerator, array_to_img, img_to_array, load_img 
from skimage import io
from numpy import expand_dims
from matplotlib import pyplot as plt

In [None]:
# Load images, create a train, test, validation split
# Create categorical data for mask (basically follow the semantic_segmentation notebook)

In [None]:
X_train = (X_train.astype('float32')) / 255.
X_test = (X_test.astype('float32')) / 255.

In [None]:
# Data augmentation to improve the model

train_datagen = ImageDataGenerator(rotation_range=15,  #Too much rotation may hurt accuracy, especially for small datasets.
    width_shift_range=0.1,
    height_shift_range=0.1,
    zoom_range = 0.1,
    vertical_flip=False,
    horizontal_flip = True,
    fill_mode="reflect")

train_generator = train_datagen.flow(
    X_train,
    y_train,
    batch_size = 32)  #images to generate in a batch

In [None]:
# Sanity checks if the changes actually worked 
x = train_generator.next()
print(x[0].shape)  #Images
print(x[1].shape)  #Labels
print((x[0].shape[0]))

In [None]:
x = train_generator.next()
image = x[0][0]
title = np.argmax(x[1][0])
plt.figure(figsize=(1.5, 1.5))
plt.suptitle(title, fontsize=12)
plt.imshow(image)
plt.show()

In [None]:
#When we use fit_generator, the number of samples processed 
#for each epoch is batch_size * steps_per_epochs. 
#should typically be equal to the number of unique samples in our 
#dataset divided by the batch size.

batch_size = 32   #Match this to the batch_size from generator
steps_per_epoch = len(X_train) // batch_size  

print("Steps per epoch = ", steps_per_epoch)
print("Total data per epoch = ", steps_per_epoch*batch_size)

In [None]:
#Train using model.fit (older versions of keras, use model.fit_generator)
"""
history1 = model1.fit(
        train_generator,
        steps_per_epoch = steps_per_epoch,
        epochs = 50,
        validation_data = validation_generator,  _____ validation data generation
        validation_steps=val_steps_per_epoch)
"""

history = model1.fit(
        train_generator,
        steps_per_epoch = steps_per_epoch,
        epochs = 50,
        validation_data = (X_test, y_test))

In [None]:
#Accuracy with and without Augmentation
import pandas as pd
without_aug = {1000:36.4, 2000:45.2, 5000:51.7, 10000:58.4, 25000:69.4, 50000:77.3}
with_aug = {1000:44, 2000:48.4, 5000:54.7, 10000:60.8, 25000:70.7, 50000:78.4}
df = pd.DataFrame([without_aug, with_aug])
df = df.T
df.reset_index(inplace=True)


df.columns =['num_images', 'without_aug', 'with_aug']
print(df.head)

df.plot(x='num_images', y=['without_aug', 'with_aug'], kind='line')

In [None]:
#####################################################################
#plot the training and validation accuracy and loss at each epoch
#If validation loss is lower than training loss this could be becuase we are applying
#regularization (Dropout) during training which won't be applied during validation. 
#Also, training loss is measured during each epoch while validation is done after the epoch. 

history = history

loss = history.history['loss']
val_loss = history.history['val_loss']
epochs = range(1, len(loss) + 1)
plt.plot(epochs, loss, 'y', label='Training loss')
plt.plot(epochs, val_loss, 'r', label='Validation loss')
plt.title('Training and validation loss')
plt.xlabel('Epochs')
plt.ylabel('Loss')
plt.legend()
plt.show()


acc = history.history['accuracy']
val_acc = history.history['val_accuracy']
plt.plot(epochs, acc, 'y', label='Training acc')
plt.plot(epochs, val_acc, 'r', label='Validation acc')
plt.title('Training and validation accuracy')
plt.xlabel('Epochs')
plt.ylabel('Accuracy')
plt.legend()
plt.show()

In [None]:
# Alternative settings

# Construct an instance of the ImageDataGenerator class
# Pass the augmentation parameters through the constructor. 
datagen = ImageDataGenerator( 
        rotation_range = 45,      #Random rotation between 0 and 45
        width_shift_range=[-20,20],  #min and max shift in pixels
        height_shift_range=0.2,  #Can also define as % shift (min/max or %)
        shear_range = 0.2, 
        zoom_range = 0.2, 
        horizontal_flip = True, 
        brightness_range = (0.5, 1.5), fill_mode='constant') #Values less than 1 darkens and greater brightens


#Once data is augmented, you can use it to fit a model via: fit.generator (old keras) or model.fit (newer versions)