In [1]:
""" NASNet training script. Validation of the script.
    
    First larg-scale experiment with cut out.
    Prev steps for further implementation in 
    NASNet Architectures to try to achieve SOTA results

    Adapted from keras example cifar10_cnn.py
    Train NASNet-CIFAR on the CIFAR10 small images dataset.
"""

from cutout_eraser import get_random_eraser

from __future__ import print_function
from keras.datasets import cifar10
from keras.preprocessing.image import ImageDataGenerator
from keras.utils import np_utils
from keras.callbacks import ModelCheckpoint
from keras.callbacks import ReduceLROnPlateau
from keras.callbacks import CSVLogger
from keras.optimizers import Adam
from nasnet import NASNetCIFAR, preprocess_input
import numpy as np
import keras.backend as K

Using TensorFlow backend.


In [2]:
def e_swish_2(x):
    return K.maximum(x*K.sigmoid(x), x*(2-K.sigmoid(x)))

In [3]:
# Callbacks for the model
logs_dir = "logs/"
weights_file = logs_dir+'NASNet-CIFAR-10.h5'
lr_reducer = ReduceLROnPlateau(factor=np.sqrt(0.5), cooldown=0, patience=5, min_lr=0.5e-5)
csv_logger = CSVLogger(logs_dir+'NASNet-CIFAR-10.csv')
model_checkpoint = ModelCheckpoint(weights_file, monitor='val_predictions_acc', save_best_only=True,
                                   save_weights_only=True, mode='max')

In [4]:
# Data Info
batch_size = 128
nb_classes = 10
nb_epoch = 10# 200 # should be 600
data_augmentation = True

# input image dimensions
img_rows, img_cols = 32, 32
# The CIFAR10 images are RGB.
img_channels = 3

In [5]:
# Data Retrieval and preprocess
# The data, shuffled and split between train and test sets:
(X_train, y_train), (X_test, y_test) = cifar10.load_data()

# Convert class vectors to binary class matrices.
Y_train = np_utils.to_categorical(y_train, nb_classes)
Y_test = np_utils.to_categorical(y_test, nb_classes)

X_train = X_train.astype('float32')
X_test = X_test.astype('float32')

# preprocess input
X_train = preprocess_input(X_train)
X_test = preprocess_input(X_test)

In [6]:
# Instantiate the model
# For training, the auxilary branch must be used to correctly train NASNet
model = NASNetCIFAR(act="relu", input_shape=(img_rows, img_cols, img_channels), use_auxiliary_branch=True)
model.summary()

__________________________________________________________________________________________________
Layer (type)                    Output Shape         Param #     Connected to                     
input_1 (InputLayer)            (None, 32, 32, 3)    0                                            
__________________________________________________________________________________________________
stem_conv1 (Conv2D)             (None, 32, 32, 32)   864         input_1[0][0]                    
__________________________________________________________________________________________________
stem_bn1 (BatchNormalization)   (None, 32, 32, 32)   128         stem_conv1[0][0]                 
__________________________________________________________________________________________________
activation_1 (Activation)       (None, 32, 32, 32)   0           stem_bn1[0][0]                   
__________________________________________________________________________________________________
normal_con

In [7]:
# Build the optimizer and compile it
optimizer = Adam(lr=1e-3, clipnorm=5)
model.compile(loss=['categorical_crossentropy', 'categorical_crossentropy'],
              optimizer=optimizer, metrics=['accuracy'], loss_weights=[1.0, 0.4])

In [8]:
# Data augmentation come at me
""" With 50% probability, erase 16*16 pixel squares. 
    - p=0.5 for 50% probability
    - s_l, s_h = 0.5 for only 16*16 (out of 32*32) pixels cut.
    - r_1, r_2 = 1 for squares, not rectangles
    - v_l, v_h = 0,255 since data is not normalized. Can change to 0,1 if it is.
"""
eraser = get_random_eraser(p=0.5, s_l=0.5, s_h = 0.5, r_1=1, r_2=2, v_l=0, v_h=255)

print('Using real-time data augmentation.')
# This will do preprocessing and realtime data augmentation:
datagen = ImageDataGenerator(
    featurewise_center=False,  # set input mean to 0 over the dataset
    samplewise_center=False,  # set each sample mean to 0
    featurewise_std_normalization=False,  # divide inputs by std of the dataset
    samplewise_std_normalization=False,  # divide each input by its std
    zca_whitening=False,  # apply ZCA whitening
    fill_mode = "constant", # Fill the points outside the input size
    rotation_range=0,  # randomly rotate images in the range (degrees, 0 to 180)
    width_shift_range=0.125,  # randomly shift images horizontally (fraction of total width)
    height_shift_range=0.125,  # randomly shift images vertically (fraction of total height)
    horizontal_flip=True,  # randomly flip images
    vertical_flip=False,
    preprocessing_function=eraser
)  # randomly flip images

# Compute quantities required for featurewise normalization
# (std, mean, and principal components if ZCA whitening is applied).
datagen.fit(X_train)

Using real-time data augmentation.


In [9]:
# wrap the ImageDataGenerator to yield two label batches [y, y] for each input batch X
def image_generator(image_datagenerator, batch_size):
    iterator = datagen.flow(X_train, Y_train, batch_size=batch_size)

    while True:
        X, y = next(iterator)  # get the next batch
        yield X, [y, y]  # duplicate the labels for each batch

In [10]:
# Fit the model on the batches generated by datagen.flow().
model.fit_generator(image_generator(datagen, batch_size),
                    steps_per_epoch=X_train.shape[0] // batch_size,
                    validation_data=(X_test, [Y_test, Y_test]),
                    epochs=nb_epoch, verbose=1,
                    callbacks=[lr_reducer, csv_logger, model_checkpoint])

Epoch 1/10


MemoryError: 

In [None]:
scores = model.evaluate(X_test, [Y_test, Y_test], batch_size=batch_size)
for score, metric_name in zip(scores, model.metrics_names):
    print("%s : %0.4f" % (metric_name, score))

In [None]:
model.save_weights(logs_dir+'nasnet_cutout_first_trial')