## NN performance of a new DataGenerator considering both, the classical Keras augmentation algorithm and albumentations.

#Need to install Keras
https://github.com/tensorflow/models/issues/4668
conda install python=3.6
pip install tensorflow
pip install keras

In [None]:
import sys
sys.path.insert(0, '../') #to load FileDataGenerator

In [None]:
from FileDataGenerator import FileDataGen 
import numpy as np
import skimage.io
import os
from keras.preprocessing.image import ImageDataGenerator #In order to compare the new class
import time
import matplotlib.pyplot as plt

In [None]:
from keras import Input, optimizers, Model
from keras.layers import Conv2D, MaxPooling2D, Dropout, Flatten, Dense

In [None]:
def get_model(img_shape):
    
    entrada = Input(shape=img_shape)
    
    conv = Conv2D(filters=32, kernel_size=3, activation='relu', name='Conv2D_1')(entrada)
    maxpool = MaxPooling2D(pool_size=2, name='Maxpool_1')(conv)
    
    conv = Conv2D(filters=64, kernel_size=3, activation='relu', name='Conv2D_2')(maxpool)
    maxpool = MaxPooling2D(pool_size=2, name='Maxpool_2')(conv)
    
    conv = Conv2D(filters=128, kernel_size=3, activation='relu', name='Conv2D_3')(maxpool)
    maxpool = MaxPooling2D(pool_size=2, name='Maxpool_3')(conv)
    
    conv = Conv2D(filters=128, kernel_size=3, activation='relu', name='Conv2D_4')(maxpool)
    maxpool = MaxPooling2D(pool_size=2, name='Maxpool_4')(conv)
    
    drop = Dropout(rate=0.5)(maxpool)
    
    flat = Flatten(name='Flatten')(drop)
    dense = Dense(units=512, activation='relu', name='Dense')(flat)
    
    output = Dense(units=1, activation='sigmoid', name='Output')(dense)
    
    model = Model(entrada, output)
    
    model.compile(optimizer=optimizers.RMSprop(lr=1e-4), loss = 'binary_crossentropy', metrics = ['acc'])
    
    return model
    

In [None]:
def plot_graphs(history):
    
    acc = history['acc']
    val_acc = history['val_acc']
    loss = history['loss']
    val_loss = history['val_loss']
    
    epochs = range(1, len(acc)+1)
    
    plt.plot(epochs, acc, 'bo', label='Training acc')
    plt.plot(epochs, val_acc, 'b', label='Validation acc')
    plt.title('Training and validation acc')
    plt.legend()
    
    plt.figure()
    
    plt.plot(epochs, loss, 'bo', label='Training loss')
    plt.plot(epochs, val_loss, 'b', label='Validation loss')
    plt.title('Training and validation loss')
    plt.legend()
    
    plt.show()
    
    return    

In [None]:
DB_Train_Path = '/Users/dfreire/Dropbox/Datasets/small_dataset/train'
DB_Val_Path = '/Users/dfreire/Dropbox/Datasets/small_dataset/validation'

### Prepare Data

In [None]:
def Read_Directory(path):
    data=[]
    labels=[]
    for class_ in os.listdir(path):
        dat = [os.path.join(path, class_, img) for img in os.listdir(os.path.join(path, class_))]
        lab = [class_ for i in os.listdir(os.path.join(path, class_))]
        labels = labels+lab
        data = data + dat

    data = np.array(data)
    labels = np.array(labels)
    return data, labels

In [None]:
train_data, train_labels = Read_Directory(DB_Train_Path)
val_data, val_labels = Read_Directory(DB_Val_Path)

In [None]:
print('Training samples: {}'.format(len(train_data)))
print('Validation samples: {}'.format(len(val_data)))

### Classical Keras ImageDataGenerator

In [None]:
train_datagen = ImageDataGenerator(rescale=1./255,
                            rotation_range=40,
                            width_shift_range=0.2,
                            height_shift_range=0.2,
                            shear_range=0.2,
                            zoom_range=0.2,
                            horizontal_flip=True)
train_datagene = train_datagen.flow_from_directory(
    DB_Train_Path,
    target_size=(150,150),
    batch_size=32,
    class_mode='binary'
)

val_datagen = ImageDataGenerator(rescale=1./255)
val_datagene = val_datagen.flow_from_directory(
    DB_Val_Path,
    target_size=(150,150),
    batch_size=32,
    class_mode='binary'
)

In [None]:
start_time = time.time()
model=get_model([150,150,3])
hist = model.fit_generator(train_datagene,
                          epochs=2,#100,
                          steps_per_epoch=5,#100,
                          validation_data = val_datagene,
                          validation_steps=50)
print("--- %s seconds ---" % (time.time() - start_time))

In [None]:
plot_graphs(hist.history)

### New FileDataGen --> Reading images not from a Path, but from a list of images (and labels)

In [None]:
#### Augmentation -> Classical augmentation (same as above)

In [None]:
train_datagen = FileDataGen(rescale=1./255,
                      rotation_range=40,
                      width_shift_range=0.2,
                      height_shift_range=0.2,
                      shear_range=0.2,
                      zoom_range=0.2,
                      horizontal_flip=True)

train_datagene = train_datagen.flow_from_filelist(train_data,
                                      train_labels,
                                      target_size=(150,150),
                                      batch_size=32,
                                      class_mode='binary')

val_datagen = FileDataGen(rescale=1./255)

train_datagene = val_datagen.flow_from_filelist(val_data,
                                      val_labels,
                                      target_size=(150,150),
                                      batch_size=32,
                                      class_mode='binary')

In [None]:
start_time = time.time()
model=get_model([150,150,3])
hist = model.fit_generator(train_datagene,
                          epochs=100,
                          steps_per_epoch=100,
                          validation_data = val_datagene,
                          validation_steps=50)
print("--- %s seconds ---" % (time.time() - start_time))

In [None]:
plot_graphs(hist.history)

In [None]:
#### Augmentation -> Albumentations --> ShiftScaleRotate

In [None]:
train_datagen = FileDataGen(rescale=1./255,
                      aug_mode = 'ShiftScaleRotate')


train_datagene = train_datagen.flow_from_filelist(train_data,
                                      train_labels,
                                      target_size=(150,150),
                                      batch_size=32,
                                      class_mode='binary')

val_datagen = FileDataGen(rescale=1./255)

train_datagene = val_datagen.flow_from_filelist(val_data,
                                      val_labels,
                                      target_size=(150,150),
                                      batch_size=32,
                                      class_mode='binary')

In [None]:
start_time = time.time()
model=get_model([150,150,3])
hist = model.fit_generator(train_datagene,
                          epochs=100,
                          steps_per_epoch=100,
                          validation_data = val_datagene,
                          validation_steps=50)
print("--- %s seconds ---" % (time.time() - start_time))

In [None]:
plot_graphs(hist.history)

In [None]:
#### Augmentation -> Albumentations --> IAAPerspective

In [None]:
train_datagen = FileDataGen(rescale=1./255,
                      aug_mode = 'IAAPerspective')

train_datagene = train_datagen.flow_from_filelist(train_data,
                                      train_labels,
                                      target_size=(150,150),
                                      batch_size=32,
                                      class_mode='binary')

val_datagen = FileDataGen(rescale=1./255)

train_datagene = val_datagen.flow_from_filelist(val_data,
                                      val_labels,
                                      target_size=(150,150),
                                      batch_size=32,
                                      class_mode='binary')

In [None]:
start_time = time.time()
model=get_model([150,150,3])
hist = model.fit_generator(train_datagene,
                          epochs=100,
                          steps_per_epoch=100,
                          validation_data = val_datagene,
                          validation_steps=50)
print("--- %s seconds ---" % (time.time() - start_time))

In [None]:
plot_graphs(hist.history)

In [None]:
#### Augmentation -> Albumentations --> MediumAug

In [None]:
train_datagen = FileDataGen(rescale=1./255,
                      aug_mode = 'MediumAug')

train_datagene = train_datagen.flow_from_filelist(train_data,
                                      train_labels,
                                      target_size=(150,150),
                                      batch_size=32,
                                      class_mode='binary')

val_datagen = FileDataGen(rescale=1./255)

train_datagene = val_datagen.flow_from_filelist(val_data,
                                      val_labels,
                                      target_size=(150,150),
                                      batch_size=32,
                                      class_mode='binary')

In [None]:
start_time = time.time()
model=get_model([150,150,3])
hist = model.fit_generator(train_datagene,
                          epochs=100,
                          steps_per_epoch=100,
                          validation_data = val_datagene,
                          validation_steps=50)
print("--- %s seconds ---" % (time.time() - start_time))

In [None]:
plot_graphs(hist.history)

In [None]:
#### Augmentation -> Albumentations --> StrongAug

In [None]:
train_datagen = FileDataGen(rescale=1./255,
                      aug_mode = 'StrongAug')

train_datagene = train_datagen.flow_from_filelist(train_data,
                                      train_labels,
                                      target_size=(150,150),
                                      batch_size=32,
                                      class_mode='binary')

val_datagen = FileDataGen(rescale=1./255)

train_datagene = val_datagen.flow_from_filelist(val_data,
                                      val_labels,
                                      target_size=(150,150),
                                      batch_size=32,
                                      class_mode='binary')

In [None]:
start_time = time.time()
model=get_model([150,150,3])
hist = model.fit_generator(train_datagene,
                          epochs=100,
                          steps_per_epoch=100,
                          validation_data = val_datagene,
                          validation_steps=50)
print("--- %s seconds ---" % (time.time() - start_time))

In [None]:
plot_graphs(hist.history)