In [0]:
import keras

from keras.datasets import cifar10
from keras.models import Model, Sequential
from keras.layers import Dense, Dropout, Flatten, Input, AveragePooling2D, merge, Activation
from keras.layers import Conv2D, MaxPooling2D, BatchNormalization
from keras.layers import Concatenate, GlobalAveragePooling2D
from keras.optimizers import Adam, SGD
from keras import regularizers, initializers
from keras.layers.advanced_activations import LeakyReLU, ReLU, Softmax
from keras.layers import Reshape, Activation, Conv2D, Input, MaxPooling2D, BatchNormalization, Flatten, Dense, Lambda
from keras.layers.merge import concatenate
from keras.preprocessing.image import ImageDataGenerator

import tensorflow as tf
from keras import backend as k

config = tf.ConfigProto()
config.gpu_options.allow_growth = True

k.tensorflow_backend.set_session(tf.Session(config=config))

import os

import pandas as pd
import numpy as np

from sklearn.metrics import confusion_matrix
import matplotlib.pyplot as plt
%matplotlib inline

from PIL import Image
from matplotlib.pyplot import imshow, imsave
import imageio

Using TensorFlow backend.


In [0]:
batch_size = 128
num_classes = 10
epochs = 100

In [0]:
def conv_bn_relu(input, block_no):
  ch_in = 16*(2**block_no)
  c1 = Conv2D(ch_in, (3,3),
              kernel_regularizer=regularizers.l2(5e-4),  # weight decay = 5e-4
              kernel_initializer='glorot_uniform',
              padding='same', 
              name='begin_block{}_conv1'.format(block_no),
              use_bias=False)(input)
  c1 = BatchNormalization(name='begin_block{}_norm1'.format(block_no))(c1)
  c1 = ReLU()(c1)
  
  return c1

# Custom ResBlock
def add_resblock(input, dropout_rate = None, layers=2, block_no=1, first_block=False, final_block=False):
  ch_in = input.shape[3]*2
  temp = input
  
  for l in range(layers):
    c1 = Conv2D(int(ch_in*(2**l)),
                (3,3),
                kernel_regularizer=regularizers.l2(0.001),
#                 kernel_initializer='glorot_uniform',
                use_bias=False,
                padding='same', 
                name = 'res_block{}_conv{}'.format(block_no, l+1))(temp)
    if dropout_rate!=None:
      c1 = Dropout(dropout_rate)(c1)
      
    c1 = BatchNormalization(name = 'res_block{}_BN{}'.format(block_no, l+1))(c1)
        
    c1 = ReLU(name = 'res_block{}_relu{}'.format(block_no, l+1))(c1)
    
    temp = c1

    
  concat = Concatenate(axis=-1)([input,temp])


  if not final_block:

    tr_layer = Conv2D(int(ch_in), (1,1),
                      kernel_regularizer=regularizers.l2(0.001),
                      use_bias=False,
                      padding='same', 
                      name = 'res_block{}_transition'.format(block_no))(concat)
    tr_layer = BatchNormalization(name = 'res_block_transition1x1{}_BN'.format(block_no))(tr_layer)
    tr_layer = ReLU(name = 'res_block_transition1x1{}_relu'.format(block_no))(tr_layer)
    
    
    return MaxPooling2D(pool_size=(2, 2))(tr_layer)
  else:
    return concat

## Conv -> B1 -> B2 -> B3 -> B4 -> output

In [0]:
input = Input(shape=(32, 32, 3,))

First_Conv2D = conv_bn_relu(input, block_no=1)
# second_Conv2D = conv_bn_relu(First_Conv2D, block_no=2)
# first_maxpool = MaxPooling2D(pool_size=(2, 2), name = 'startmaxpool')(First_Conv2D)

block1 = add_resblock(First_Conv2D, layers=2, block_no=1, first_block=True)

block2 = add_resblock(block1, layers=2, block_no=2)

block3 = add_resblock(block2, layers=2, block_no=3)

block4 = add_resblock(block3, layers=2, block_no=4, final_block=True)

reduce_ch = Conv2D(num_classes, (1,1), name='number_of_classes', use_bias=False)(block4)
avg_pool = GlobalAveragePooling2D()(reduce_ch)
output = Softmax()(avg_pool)

model = Model(inputs=[input], outputs=[output])
model.summary()











Model: "model_1"
__________________________________________________________________________________________________
Layer (type)                    Output Shape         Param #     Connected to                     
input_1 (InputLayer)            (None, 32, 32, 3)    0                                            
__________________________________________________________________________________________________
begin_block1_conv1 (Conv2D)     (None, 32, 32, 32)   864         input_1[0][0]                    
__________________________________________________________________________________________________
begin_block1_norm1 (BatchNormal (None, 32, 32, 32)   128         begin_block1_conv1[0][0]         
__________________________________________________________________________________________________
re_lu_1 (ReLU)                  (None, 32, 32, 32)   0           begin_block1_norm1[0][0]         
__________________________________________________________________________________

In [0]:
import tensorflow as tf
import numpy as np
import os

(x_train, y_train), (x_test, y_test) = tf.keras.datasets.cifar10.load_data()

Downloading data from https://www.cs.toronto.edu/~kriz/cifar-10-python.tar.gz


In [0]:
import keras as k
y_train = k.utils.to_categorical(y_train, num_classes)
y_test = k.utils.to_categorical(y_test, num_classes)

In [0]:
from keras.callbacks import *

class CyclicLR(Callback):

    def __init__(self, base_lr=0.001, max_lr=0.006, step_size=2000., mode='triangular',
                 gamma=1., scale_fn=None, scale_mode='cycle'):
        super(CyclicLR, self).__init__()

        self.base_lr = base_lr
        self.max_lr = max_lr
        self.step_size = step_size
        self.mode = mode
        self.gamma = gamma
        if scale_fn == None:
            if self.mode == 'triangular':
                self.scale_fn = lambda x: 1.
                self.scale_mode = 'cycle'
            elif self.mode == 'triangular2':
                self.scale_fn = lambda x: 1/(2.**(x-1))
                self.scale_mode = 'cycle'
            elif self.mode == 'exp_range':
                self.scale_fn = lambda x: gamma**(x)
                self.scale_mode = 'iterations'
        else:
            self.scale_fn = scale_fn
            self.scale_mode = scale_mode
        self.clr_iterations = 0.
        self.trn_iterations = 0.
        self.history = {}

        self._reset()

    def _reset(self, new_base_lr=None, new_max_lr=None,
               new_step_size=None):
        
        if new_base_lr != None:
            self.base_lr = new_base_lr
        if new_max_lr != None:
            self.max_lr = new_max_lr
        if new_step_size != None:
            self.step_size = new_step_size
        self.clr_iterations = 0.
        
    def clr(self):
        cycle = np.floor(1+self.clr_iterations/(2*self.step_size))
        x = np.abs(self.clr_iterations/self.step_size - 2*cycle + 1)
        if self.scale_mode == 'cycle':
            return self.base_lr + (self.max_lr-self.base_lr)*np.maximum(0, (1-x))*self.scale_fn(cycle)
        else:
            return self.base_lr + (self.max_lr-self.base_lr)*np.maximum(0, (1-x))*self.scale_fn(self.clr_iterations)
        
    def on_train_begin(self, logs={}):
        logs = logs or {}

        if self.clr_iterations == 0:
            K.set_value(self.model.optimizer.lr, self.base_lr)
        else:
            K.set_value(self.model.optimizer.lr, self.clr())        
            
    def on_batch_end(self, epoch, logs=None):
        
        logs = logs or {}
        self.trn_iterations += 1
        self.clr_iterations += 1

        self.history.setdefault('lr', []).append(K.get_value(self.model.optimizer.lr))
        self.history.setdefault('iterations', []).append(self.trn_iterations)

        for k, v in logs.items():
            self.history.setdefault(k, []).append(v)
        
        K.set_value(self.model.optimizer.lr, self.clr())

In [0]:
clr = CyclicLR(base_lr=0.01, max_lr=0.1,step_size=780.)

## clr with simple augmentations .. .trial one


max acc : 89% obtained...


need tweaking with augmentations and lr scheduler.

In [0]:
#SDG 
sgd = SGD(lr=0.01, momentum = 0.9,nesterov=True)
model.compile(loss='categorical_crossentropy',
              optimizer=sgd,
              metrics=['accuracy'])

datagen = tf.keras.preprocessing.image.ImageDataGenerator(
    rotation_range=15,
    horizontal_flip=True,
    width_shift_range=0.1,
    height_shift_range=0.1
    )
datagen.fit(x_train)





In [0]:
#fit
model.fit_generator(datagen.flow(x_train, y_train, batch_size=batch_size),
                    steps_per_epoch = len(x_train) / 128, epochs=epochs, validation_data=(x_test, y_test),callbacks=[clr])

Instructions for updating:
Use tf.where in 2.0, which has the same broadcast rule as np.where


Epoch 1/100
Epoch 2/100
Epoch 3/100
Epoch 4/100
Epoch 5/100
Epoch 6/100
Epoch 7/100
Epoch 8/100
Epoch 9/100
Epoch 10/100
Epoch 11/100
Epoch 12/100
Epoch 13/100
Epoch 14/100
Epoch 15/100
Epoch 16/100
Epoch 17/100
Epoch 18/100
Epoch 19/100
Epoch 20/100
Epoch 21/100
Epoch 22/100
Epoch 23/100
Epoch 24/100
Epoch 25/100
Epoch 26/100
Epoch 27/100
Epoch 28/100
Epoch 29/100
Epoch 30/100
Epoch 31/100
Epoch 32/100
Epoch 33/100
Epoch 34/100
Epoch 35/100
Epoch 36/100
Epoch 37/100
Epoch 38/100
Epoch 39/100
Epoch 40/100
Epoch 41/100
Epoch 42/100
Epoch 43/100
Epoch 44/100
Epoch 45/100
Epoch 46/100
Epoch 47/100
Epoch 48/100
Epoch 49/100
Epoch 50/100
Epoch 51/100
Epoch 52/100
Epoch 53/100
Epoch 54/100
Epoch 55/100
Epoch 56/100
Epoch 57/100
Epoch 58/100
Epoch 59/100
Epoch 60/100
Epoch 61/100
Epoch 62/100
Epoch 63/100
Epoch 64/100
Epoch 65/100
Epoch 66/100
Epoch 67/100
Epoch 68/100
Epoch 69/100
Epoch 70/100
Epo

<keras.callbacks.History at 0x7f6f2dbbfcc0>

In [0]:
from keras.datasets import cifar10
import keras.callbacks as callbacks
import keras.utils.np_utils as kutils
from keras.preprocessing.image import ImageDataGenerator
from keras.utils import plot_model
from keras.optimizers import SGD
from keras.callbacks import LearningRateScheduler

## Random crop, normalization and padding 4 pix. 
### Do not re run... does not work

In [0]:
def random_crop(x, random_crop_size = (32,32), sync_seed=None):
    np.random.seed(sync_seed)
    w, h = x.shape[1], x.shape[2]
    rangew = (w - random_crop_size[0]) // 2
    rangeh = (h - random_crop_size[1]) // 2
    offsetw = 0 if rangew == 0 else np.random.randint(rangew)
    offseth = 0 if rangeh == 0 else np.random.randint(rangeh)
    return x[:, offsetw:offsetw+random_crop_size[0], offseth:offseth+random_crop_size[1]]

def pad(x, pad=4):
    return np.pad(x, ((0,0), (pad,pad),(pad,pad),(0,0)), mode='reflect')

In [0]:
batch_size = 64  ## 128 produces OOM
nb_epoch = 130
img_rows, img_cols = 32, 32

(trainX, trainY), (testX, testY) = cifar10.load_data()

trainX = pad(trainX)
testX = pad(testX)

trainX = trainX.astype('float32')
trainX = (trainX - [0.4914, 0.4822, 0.4465]) / [0.2023, 0.1994, 0.2010]
testX = testX.astype('float32')
testX = (testX - [0.4914, 0.4822, 0.4465]) / [0.2023, 0.1994, 0.2010]


trainY = kutils.to_categorical(trainY)
testY = kutils.to_categorical(testY)

generator = ImageDataGenerator(zca_epsilon=0,
                               rescale = 1./255,
                               horizontal_flip=True,
                               fill_mode='reflect',)

generator.fit(trainX, seed=0, augment=True)

test_generator = ImageDataGenerator(zca_epsilon=0,
                                    rescale = 1./255,
                                    horizontal_flip=True,
                                    fill_mode='reflect')

test_generator.fit(testX, seed=0, augment=True)

In [0]:
sgd = SGD(lr=0.001, momentum = 0.9,nesterov=True)
model.compile(loss='categorical_crossentropy',
              optimizer=sgd,
              metrics=['accuracy'])

In [0]:
model.summary()

Model: "model_1"
__________________________________________________________________________________________________
Layer (type)                    Output Shape         Param #     Connected to                     
input_1 (InputLayer)            (None, 32, 32, 3)    0                                            
__________________________________________________________________________________________________
begin_block1_conv1 (Conv2D)     (None, 32, 32, 32)   864         input_1[0][0]                    
__________________________________________________________________________________________________
begin_block1_norm1 (BatchNormal (None, 32, 32, 32)   128         begin_block1_conv1[0][0]         
__________________________________________________________________________________________________
re_lu_1 (ReLU)                  (None, 32, 32, 32)   0           begin_block1_norm1[0][0]         
____________________________________________________________________________________________

In [0]:
for i in range(0, nb_epoch):
    print('Epoch = ' + str(i+1))
    for x_batch, y_batch in generator.flow(trainX, trainY, batch_size=len(trainX)):
        for testx_batch, testy_batch in test_generator.flow(testX, testY, batch_size=len(testX)):
            x_batch = random_crop(x_batch)
            testx_batch = random_crop(testx_batch)
            model.fit(x_batch, y_batch, nb_epoch=1, callbacks=[clr], validation_data=(testx_batch, testy_batch))
            break
        break

scores = model.evaluate_generator(test_generator.flow(testX, testY, nb_epoch), (testX.shape[0] / batch_size + 1))
print("Accuracy = %f" % (100 * scores[1]))

Epoch = 1
Instructions for updating:
Use tf.where in 2.0, which has the same broadcast rule as np.where


  import sys




Train on 50000 samples, validate on 10000 samples
Epoch 1/1
Epoch = 2
Train on 50000 samples, validate on 10000 samples
Epoch 1/1
Epoch = 3
Train on 50000 samples, validate on 10000 samples
Epoch 1/1
Epoch = 4
Train on 50000 samples, validate on 10000 samples
Epoch 1/1
Epoch = 5
Train on 50000 samples, validate on 10000 samples
Epoch 1/1
Epoch = 6
Train on 50000 samples, validate on 10000 samples
Epoch 1/1
Epoch = 7
Train on 50000 samples, validate on 10000 samples
Epoch 1/1
Epoch = 8
Train on 50000 samples, validate on 10000 samples
Epoch 1/1
Epoch = 9
Train on 50000 samples, validate on 10000 samples
Epoch 1/1
Epoch = 10
Train on 50000 samples, validate on 10000 samples
Epoch 1/1
Epoch = 11
Train on 50000 samples, validate on 10000 samples
Epoch 1/1
Epoch = 12
Train on 50000 samples, validate on 10000 samples
Epoch 1/1
Epoch = 13
Train on 50000 samples, validate on 10000 samples
Epoch 1/1
Epoch = 14
Train on 50000 samples, validate on 10000 samples
Epoch 1/1
Epoch = 15
Train on 500

#### Not working.. .stopped further epochs

Saturates between 75 to 82%

## SGD Scheduler with other image augmentations

In [0]:
from keras.callbacks import Callback
import keras.backend as K
import numpy as np

class SGDRScheduler(Callback):
    
    def __init__(self,
                 min_lr,
                 max_lr,
                 steps_per_epoch,
                 lr_decay=1,
                 cycle_length=4,
                 mult_factor=1):

        self.min_lr = min_lr
        self.max_lr = max_lr
        self.lr_decay = lr_decay

        self.batch_since_restart = 0
        self.next_restart = cycle_length

        self.steps_per_epoch = steps_per_epoch

        self.cycle_length = cycle_length
        self.mult_factor = mult_factor

        self.history = {}

    def clr(self):
        '''Calculate the learning rate.'''
        fraction_to_restart = self.batch_since_restart / (self.steps_per_epoch * self.cycle_length)
        lr = self.min_lr + 0.5 * (self.max_lr - self.min_lr) * (1 + np.cos(fraction_to_restart * np.pi))
        return lr

    def on_train_begin(self, logs={}):
        '''Initialize the learning rate to the minimum value at the start of training.'''
        logs = logs or {}
        K.set_value(self.model.optimizer.lr, self.max_lr)

    def on_batch_end(self, batch, logs={}):
        '''Record previous batch statistics and update the learning rate.'''
        logs = logs or {}
        self.history.setdefault('lr', []).append(K.get_value(self.model.optimizer.lr))
        for k, v in logs.items():
            self.history.setdefault(k, []).append(v)

        self.batch_since_restart += 1
        K.set_value(self.model.optimizer.lr, self.clr())

    def on_epoch_end(self, epoch, logs={}):
        '''Check for end of current cycle, apply restarts when necessary.'''
        if epoch + 1 == self.next_restart:
            self.batch_since_restart = 0
            self.cycle_length = np.ceil(self.cycle_length * self.mult_factor)
            self.next_restart += self.cycle_length
            self.max_lr *= self.lr_decay
            self.best_weights = self.model.get_weights()

    def on_train_end(self, logs={}):
        '''Set weights to the values from the end of the most recent cycle for best performance.'''
        self.model.set_weights(self.best_weights)

In [0]:
import imgaug as ia
from imgaug import augmenters as iaa
import numpy as np

def other_augs(batches):
  shape_seq = iaa.Sequential([
    iaa.GaussianBlur(sigma=(0, 0.15)), # ex: 0.6
    iaa.Fliplr(0.5), # horizontally flip 50% of all images
    iaa.Sometimes(0.10, iaa.CropAndPad(percent=(0, .20))),
    iaa.Sometimes(0.5,iaa.Affine(
            scale={"x": (0.8, 1.2), "y": (0.8, 1.2)}, # scale images to 80-120% of their size, individually per axis
            translate_percent={"x": (-0.2, 0.2), "y": (-0.2, 0.2)}, # translate by -20 to +20 percent (per axis)
            rotate=(-10, 10), # rotate by -10 to +10 degrees
            shear=(-10, 10) # shear by -10 to +10 degrees
        )),
  ])
    
  while True:
    batch_x, batch_y = next(batches)
    shape_augmented = np.zeros((batch_x.shape[0], batch_x.shape[1], batch_x.shape[2], batch_x.shape[3])) 
    shape_augmented = shape_seq.augment_images(batch_x)
    yield (shape_augmented, batch_y)

In [0]:
clr = SGDRScheduler(0.01,
                 0.1,
                 390,
                 lr_decay=0.09,
                 cycle_length=4,
                 mult_factor=1,
                   )

#SDG 
sgd = SGD(lr=0.01, momentum = 0.9, nesterov=True)
model.compile(loss='categorical_crossentropy',
              optimizer=sgd,
              metrics=['accuracy'])


In [0]:
datagen = tf.keras.preprocessing.image.ImageDataGenerator(
    rescale=1./255
)
datagen.fit(x_train)

In [0]:
#fit
model.fit_generator(other_augs(datagen.flow(x_train, y_train, batch_size=batch_size)),
                    steps_per_epoch = len(x_train) / 128, epochs=epochs, validation_data=other_augs(datagen.flow(x_test, y_test,batch_size=batch_size)),validation_steps=len(x_test)/128,callbacks=[clr])

Instructions for updating:
Use tf.where in 2.0, which has the same broadcast rule as np.where


Epoch 1/100
Epoch 2/100
Epoch 3/100
Epoch 4/100
Epoch 5/100
Epoch 6/100
Epoch 7/100
Epoch 8/100
Epoch 9/100
Epoch 10/100
Epoch 11/100
Epoch 12/100
Epoch 13/100
Epoch 14/100
Epoch 15/100
Epoch 16/100
Epoch 17/100
Epoch 18/100
Epoch 19/100
Epoch 20/100
Epoch 21/100
Epoch 22/100
Epoch 23/100
Epoch 24/100
Epoch 25/100
Epoch 26/100
Epoch 27/100
Epoch 28/100
Epoch 29/100
Epoch 30/100
Epoch 31/100
Epoch 32/100
Epoch 33/100
Epoch 34/100
Epoch 35/100
Epoch 36/100
Epoch 37/100
Epoch 38/100
Epoch 39/100
Epoch 40/100
Epoch 41/100
Epoch 42/100
Epoch 43/100
Epoch 44/100
Epoch 45/100
Epoch 46/100
Epoch 47/100
Epoch 48/100
Epoch 49/100
Epoch 50/100
Epoch 51/100
Epoch 52/100
Epoch 53/100
Epoch 54/100
Epoch 55/100
Epoch 56/100
Epoch 57/100
Epoch 58/100
Epoch 59/100
Epoch 60/100
Epoch 61/100
Epoch 62/100
Epoch 63/100
Epoch 64/100
Epoch 65/100
Epoch 66/100
Epoch 67/100
Epoch 68/100
Epoch 69/100
Epoch 70/100
Epo

<keras.callbacks.History at 0x7f81bba19fd0>

In [0]:
print("Max Acc after 100 epochs : ",max(model.history.history['val_acc']))

Max Acc after 100 epochs :  0.8969


In [0]:
model.save_weights("fi100.h5")

In [0]:
#continue training for another 100 epochs after saving the model weights
model.fit_generator(other_augs(datagen.flow(x_train, y_train, batch_size=batch_size)),
                    steps_per_epoch = len(x_train) / 128, epochs=epochs, validation_data=other_augs(datagen.flow(x_test, y_test,batch_size=batch_size)),validation_steps=len(x_test)/128,callbacks=[clr])

Epoch 1/100
Epoch 2/100
Epoch 3/100
Epoch 4/100
Epoch 5/100
Epoch 6/100
Epoch 7/100
Epoch 8/100
Epoch 9/100
Epoch 10/100
Epoch 11/100
Epoch 12/100
Epoch 13/100
Epoch 14/100
Epoch 15/100
Epoch 16/100
Epoch 17/100
Epoch 18/100
Epoch 19/100
Epoch 20/100
Epoch 21/100
Epoch 22/100
Epoch 23/100
Epoch 24/100
Epoch 25/100
Epoch 26/100
Epoch 27/100
Epoch 28/100
Epoch 29/100
Epoch 30/100
Epoch 31/100
Epoch 32/100
Epoch 33/100
Epoch 34/100
Epoch 35/100
Epoch 36/100
Epoch 37/100
Epoch 38/100
Epoch 39/100
Epoch 40/100
Epoch 41/100
Epoch 42/100
Epoch 43/100
Epoch 44/100
Epoch 45/100
Epoch 46/100
Epoch 47/100
Epoch 48/100
Epoch 49/100
Epoch 50/100
Epoch 51/100
Epoch 52/100
Epoch 53/100
Epoch 54/100
Epoch 55/100
Epoch 56/100
Epoch 57/100
Epoch 58/100
Epoch 59/100
Epoch 60/100
Epoch 61/100
Epoch 62/100
Epoch 63/100
Epoch 64/100
Epoch 65/100
Epoch 66/100
Epoch 67/100
Epoch 68/100
Epoch 69/100
Epoch 70/100
Epoch 71/100
Epoch 72/100
Epoch 73/100
Epoch 74/100
Epoch 75/100
Epoch 76/100
Epoch 77/100
Epoch 78

<keras.callbacks.History at 0x7f81a8667780>

In [0]:
print("Max Acc after 200 epochs : ",max(model.history.history['val_acc']))

Max Acc after 200 epochs :  0.9221


##  The following worked after trying multiple strategies before.
1. SDG Learning rate schedule
2. Gaussian blue : 15% of the images randomly
3. Horizontal flip : 50% of the images randomly
4. Weight decay : 5e-4
5. Batch size : 128
6. Random crop and padding : 20% of the images
7. scaling(80%-120%), rotation(+-10), shear and translation on both x&y axis


saving model after every 100 epochs

## FINAL VAL ACC: 0.9221

In [0]:
model.save_weights("final_92_valacc_model.h5")

In [0]:
from google.colab import files
files.download("final_92_valacc_model.h5")