# Sample Script - Data Augmentation

In [2]:
import keras
import os
import numpy as np

#from sklearn.preprocessing import LabelEncoder
#from keras.utils import to_categorical

from sklearn.preprocessing import LabelBinarizer

from keras.preprocessing.image import ImageDataGenerator

from MyUtils.Dataset import DatasetLoader
from MyUtils.ResnetBuilder import ResnetBuilder

from keras.optimizers import Adam

Using TensorFlow backend.


In [3]:
ROOT_DIR='/Users/abalaji/mydata/DataScience/TinyImageNet'
IMAGE_ROOT_DIR=os.path.join(ROOT_DIR, 'tiny-imagenet-200')

# utilities.
UTILS_ROOT_DIR='/Users/abalaji/Documents/GitProjects/Projects/Images_ML/TinyImageNet'
MY_UTILS_DIR= os.path.join(UTILS_ROOT_DIR,'MyUtils')
SUPPORT_FILES_DIR=os.path.join(MY_UTILS_DIR, 'SupportFiles')

#IMAGE_SIZE=64
#NUM_CHANNELS=3

## for training
NUM_CLASSES_TO_TRAIN_AND_VAL=4

# Hyperparameters
batch_size = 50
epochs = 50

In [13]:
## set up the data loader
dataLoader = DatasetLoader(IMAGE_ROOT_DIR)
dataLoader.set_support_files_dir(SUPPORT_FILES_DIR)

## get all class ids and their names.
(class_id_arr, class_names_arr) = dataLoader.get_class_info()

## load training images
(training_images, training_label_ids) = dataLoader.load_training_images_for_a_list_classes(
                                                class_id_arr[0:NUM_CLASSES_TO_TRAIN_AND_VAL], 
                                                resize_flag=False)

(val_images, val_label_ids) = dataLoader.load_validation_images_for_a_list_classes(
                                                class_id_arr[0:NUM_CLASSES_TO_TRAIN_AND_VAL], 
                                                resize_flag=False)

# shuffle the training data
np.random.seed(123)
shuffle_index = np.random.permutation(len(training_label_ids))
training_images = training_images[shuffle_index]
training_label_ids = training_label_ids[shuffle_index]

In [11]:
lb = LabelBinarizer()

training_lb = lb.fit(training_label_ids)

training_labels = training_lb.transform(training_label_ids)
val_labels      = training_lb.transform(val_label_ids)


In [12]:
training_labels[0:3]

array([[0, 0, 0, 1],
       [0, 0, 0, 1],
       [1, 0, 0, 0]])

## Set up the Image Datagenerator

In [16]:
?ImageDataGenerator

In [17]:
aug = ImageDataGenerator(rotation_range=18, zoom_range=0.15, 
                         width_shift_range=0.2, height_shift_range=0.2,
                         shear_range=0.15, horizontal_flip=True, fill_mode='nearest')

In [14]:
img_height, img_width, num_channel = training_images.shape[1],training_images.shape[2],training_images.shape[3]


In [15]:

# num_channels, rows, cols
input_shape = (num_channel,img_height,img_width)
num_classes = NUM_CLASSES_TO_TRAIN_AND_VAL
model = ResnetBuilder.build_resnet_50(input_shape, num_classes)

In [17]:
model.summary()

__________________________________________________________________________________________________
Layer (type)                    Output Shape         Param #     Connected to                     
input_2 (InputLayer)            (None, 64, 64, 3)    0                                            
__________________________________________________________________________________________________
conv2d_54 (Conv2D)              (None, 32, 32, 64)   9472        input_2[0][0]                    
__________________________________________________________________________________________________
batch_normalization_50 (BatchNo (None, 32, 32, 64)   256         conv2d_54[0][0]                  
__________________________________________________________________________________________________
activation_50 (Activation)      (None, 32, 32, 64)   0           batch_normalization_50[0][0]     
__________________________________________________________________________________________________
max_poolin

In [18]:
from keras.utils import plot_model

In [11]:
?plot_model

In [19]:
plot_model(model, to_file='res_net.png', show_shapes=True )

In [20]:
# determine Loss function and Optimizer
model.compile(loss='categorical_crossentropy',
              optimizer=Adam(),
              metrics=['accuracy'])

## to save the best model 
from keras.callbacks import ModelCheckpoint

checkpoint = ModelCheckpoint('tiny_best_model.hdf5', monitor='val_acc', verbose=1, save_best_only=True)

callbacks = [checkpoint]

## Train the network

In [21]:
x_train = training_images
y_train = training_labels

x_val   = val_images
y_val   = val_labels

In [23]:
?model.fit_generator

In [22]:
H = model.fit_generator(
            aug.flow(x_train, y_train, batch_size=50),
            validation_data=(x_val, y_val),
            epochs=epochs,
            verbose=1,
            steps_per_epoch=len(y_train) // 50
)

Epoch 1/50
Epoch 2/50
Epoch 3/50
Epoch 4/50
Epoch 5/50
 2/40 [>.............................] - ETA: 1:09 - loss: 4.7578 - acc: 0.5000

KeyboardInterrupt: 