# Cats and Dogs Redux

We implement genetic algorithm to optimize a classification model that labels images of dogs and cats.
The model is a "light-weight" version of the  VGG16 architecture: 

1) Fewer blocks

2) Replaced regular convolution layer by **Dilated convolution** layer [https://arxiv.org/abs/1511.07122]

3) Use of **swish** activation function on the Fully connected layer [ https://arxiv.org/abs/1710.05941]

In [1]:
# Required libraries
from keras.layers import Conv2D, Activation, MaxPooling2D, Flatten, Dense, Dropout
from keras import backend as K
from keras.utils.generic_utils import get_custom_objects
from keras.utils import plot_model
from keras.models import Sequential
from keras.optimizers import Adam
from keras.callbacks import EarlyStopping
from operator import itemgetter
import numpy as np
import cv2
import glob
import matplotlib.pylab as plt
import hashlib
import random
from PIL import Image
import seaborn
import pickle
#set random seed
np.random.seed(72)


Using TensorFlow backend.


In [2]:
from utils import build_dataset

def swish(x):
    '''
    Keras implementation of swish https://arxiv.org/abs/1710.05941
    '''
    return (K.sigmoid(x) * x)

get_custom_objects().update({'swish': Activation(swish)})

train_data, validation_data = build_dataset(5000, 0.25)


Set size Training: 3750 | Validation: 1250


In [3]:
class Configs():
    
    def __init__(self):
        self.total_nparams = 500000
        self.population_sz = 20
        self.image_specs = [32, 3]
    
    def list_activations(self):
        return ["relu", "elu", swish]

    
    
class ClassifierDesign():
    
    def __init__(self, max_nparams, units_flatten, list_activations, max_cls_depth=10, lo_dropout=0.4, hi_dropout=1.0):
        self.max_nparams = max_nparams
        self.units_flatten = units_flatten
        self.list_activations = list_activations
        self.max_cls_depth = max_cls_depth
        self.hi_dropout = hi_dropout
        self.lo_dropout = lo_dropout
        self.design = list()
        self.build()
    
    
    def pick_num_units(self, current_nparams, units_prev_layer):
        '''
        Randomly pick number of units in a range so that the # of parameters of the model is below max_n_params
        
        before adding a layer (nl1: number of units of layer1)
        [nl1] - [nl2] - [nl3] - [output]
        n_params_before = nl1 * nl2 + nl2 * nl3 + nl3*output

        add nl4: [nl1] - [[nl2] - [nl3] > [nl4] < [output]
        n_params_after = nl1 * nl2 + nl2 * nl3 + nl3*nl4 + nl4*output
                       = n_params_init -nl3*output + nl3 * nl4 +nl4*output
        We want n_params_after <= max_nparams
        n_params_init -nl3*output + nl3 * nl4 +nl4*output <= max_n_params
        nl4 <= (max_n_params - n_params_init+nl3*output)/(nl3+output)
        '''
        low_units = 1
        high_units = int( (self.max_nparams - current_nparams + units_prev_layer)*1./(units_prev_layer + 1) )
        if high_units <= 1:
            return  1
        else:
            return np.random.randint(low=low_units, high=high_units)
        

        
    def build(self):
        '''
        build a classifier design with attributes randomly selected
        return a dict()
        '''
        cls_depth = np.random.choice( np.arange(0, self.max_cls_depth) )
        print("cls depth ", cls_depth)
        current_nparams = self.units_flatten
        units_prev_layer = self.units_flatten
        for layer_id in range(cls_depth - 1): #-1 for the output generated outside the loop
            if (self.max_nparams - current_nparams) < 1:
                #reaching the max number of parameters
                break
            else:
                layer_activ = np.random.choice( self.list_activations )
                layer_dropout = np.random.choice( np.arange(self.lo_dropout, self.hi_dropout, 0.1) )
                #pick num_units between 
                layer_units = self.pick_num_units(current_nparams, units_prev_layer)
                #Number of parameters when adding this layer
                current_nparams = current_nparams + units_prev_layer*(layer_units-1) + layer_units
                self.units_prev_layer = layer_units
                #attach new layer
                self.design.append({"units": layer_units, "activation": layer_activ, "dropout": layer_dropout})
            
        #attach output layer
        self.design.append({"units": 1, "activation": "sigmoid", "dropout": None})
        
        

class ConvNet():
    
    def __init__(self, img_sz, img_ch):
        self.img_sz = img_sz
        self.img_ch = img_ch
        
        
    def initialize(self):
        self.model = Sequential()

        
    def add_feature_extractor(self):
        #image size = (32, 32, 3)
        self.model.add(Conv2D(32, (3, 3), padding="same", activation='relu', input_shape=(self.img_sz, self.img_sz, self.img_ch)))
        self.model.add(Conv2D(32, (3, 3), padding="same", activation='relu'))
        self.model.add(MaxPooling2D(pool_size=(2, 2)))
        #image size = (16, 16, 32)
        self.model.add(Conv2D(64, (3, 3), padding="same", activation='relu'))
        self.model.add(Conv2D(64, (3, 3), padding="same", activation='relu'))
        self.model.add(MaxPooling2D(pool_size=(2, 2)))
        #image size = (16, 16, 64)
        self.model.add(Conv2D(128, (3, 3), padding="same", dilation_rate=(2, 2), activation='relu'))
        self.model.add(Conv2D(128, (3, 3), padding="valid", activation='relu'))
        self.model.add(MaxPooling2D(pool_size=(2, 2)))
        #image size = (7, 7, 128)
        self.model.add(Flatten(name="flatten") )        

        
    def add_classifier(self, cls_design):
        for layer in cls_design[:-1]:
            layer_activ = layer["activation"]
            layer_dropout = layer["dropout"]
            layer_units = layer["units"]
            
            self.model.add( Dense(layer_units, activation=layer_activ) )
            if layer_dropout:
                self.model.add( Dropout(layer_dropout) )

        #Add output layer
        self.model.add(Dense(1, activation='sigmoid'))



        
class GeneticAlgo():
    
    def __init__(self):
        self.history = {"scores": list(), "cls_designs": list(), "accuracy": list()}
        self.configs = Configs() 
        

    def design2model(self, specimen):
        cvnet = ConvNet(self.configs.image_specs[0], self.configs.image_specs[1])
        cvnet.initialize()
        #attach feature extractor : convolutional neural net
        cvnet.add_feature_extractor()
        #Attach NeuralNet to Classifier
        cvnet.add_classifier(specimen["cls_design"])
        specimen["model"] =  cvnet.model
        specimen["score"] = None
        return specimen
    
        
    def build_random_population(self):
        population = list()
        while (len(population) < self.configs.population_sz):
            cvnet = ConvNet(self.configs.image_specs[0], self.configs.image_specs[1])
            #Initialize sequential model
            cvnet.initialize()
            #attach feature extractor : convolutional neural net
            cvnet.add_feature_extractor()
            n_params_feat_extractor = cvnet.model.count_params()
            cls_max_nparams = self.configs.total_nparams - n_params_feat_extractor
            units_flatten = cvnet.model.get_layer(name="flatten").output_shape[1]
            #Generate random design for classifier
            cls = ClassifierDesign(cls_max_nparams, units_flatten, self.configs.list_activations())
            #Attach NeuralNet to Classifier
            cvnet.add_classifier(cls.design)
            specimen = {"model": cvnet.model, "cls_design": cls.design, "score": None}
            if specimen["cls_design"] not in (list( map(lambda x: x["cls_design"], population) ) ):
                #avoid duplicate designs
                population.append(specimen)
        return population
    
    
    def sorted_specimen(self, population):
        '''
        sort specimen by score value (validation loss) from smaller to largest
        '''
        return sorted(population, key=itemgetter('score'), reverse=False)
    
    ####################
    #Genetic operators = selection, mutation, breeding
    ####################
    def keep_top_specimen(self, n_top, sorted_population):
        print("Top specimen score: {}".format(list(map(lambda x: x["score"], sorted_population))))
        return sorted_population[0:n_top]
    

    # randomly pick simple specimen
    def keep_simple_specimen(self, n_top, n_random_pick, sorted_population):
        idx = np.arange(n_top, len(sorted_population))
        picks = np.random.choice(idx, n_random_pick)
        return [sorted_population[i] for i in picks]

    
    def breed(self, parents):
        """
        Generate a children from 2 parents
        parents is a list
        """

        #index of parent with most hidden layers
        deepest_parent = np.argmax( list(map(lambda x: len(x["cls_design"]), parents)))
        shallowest_parent = (not( bool(deepest_parent) )) * 1 #index of parent with les layers
        #Initialize child
        child_design = []   
        
        for layer_id in range( len(parents[ deepest_parent ]["cls_design"]) -1): #-1 = take out output layer
            layer = parents[ deepest_parent ]["cls_design"][layer_id]
            
            if layer_id > (len(parents[shallowest_parent]["cls_design"]) - 1):
                #shallowest parent does not have this layer level
                #==> randomly decide to pick layer of deepest or stop the breed
                if np.random.choice([0, 1]):
                    child_design.append(layer)
                break

            else:
                child_layer = dict()
                for key in layer.keys():
                    pick_gene_from = np.random.choice([deepest_parent, shallowest_parent])
                    child_layer[key] = parents[pick_gene_from]["cls_design"][layer_id][key]
                child_design.append(child_layer)
        
        #append the output layer
        child_design.append({"units": 1, "activation": "sigmoid", "dropout": None})
        child = {"model": None, "cls_design": child_design, "score": None}
        return self.design2model(child)

        
    def mutate(self, specimen):
        """Randomly mutate one part of the network.
            Args:
            network (dict): The network parameters to mutate
        """
        #pick_layer to mutate
        pick_layer = np.random.choice( np.arange( 0, len(specimen["cls_design"]) - 2, 1 ) ) #skip the last layer
        pick_key = np.random.choice( list( specimen["cls_design"][0].keys() ) )
        new_specimen["cls_design"] = specimen["cls_design"]
        
        if pick_key == "units":
            range_mutation = [i/10 for i in range(5, 16, 1) if i!=10]
            mutation_factor = 1
            while mutation_factor == 1: 
                #insure that it is a mutation not a cloning: value is changed
                #allows +/- 50% mutation in units
                mutation_factor = np.random.choice( range_mutation  )
                mutation = int(specimen["cls_design"][pick_layer][pick_key] * mutation_factor)
            mutation = np.clip( mutation, 1, None ) #units cannot be below 1
            new_specimen["cls_design"][pick_layer]["units"] = mutation
            #reinitialize model and score for the new specimen
            new_specimen = self.design2model( new_specimen )
            return new_specimen

        elif pick_key == "dropout":
            mutation_factor = 1
            range_mutation = [i/10 for i in range(5, 16, 1) if i!=10]
            mutation = specimen["cls_design"][pick_layer][pick_key]
            while mutation == specimen["cls_design"][pick_layer][pick_key]:
                mutation_factor = np.random.choice(  range_mutation )
                mutation = specimen["cls_design"][pick_layer][pick_key] * mutation_factor
                mutation = np.clip( mutation, 0.4, 1 )
            new_specimen["cls_design"][pick_layer]["dropout"] = mutation
            new_specimen = self.design2model( new_specimen)
            return new_specimen

        else:
            old_gene =  specimen["cls_design"][pick_layer]["activation"]
            mutation = old_gene
            while mutation == old_gene:
                mutation = np.random.choice(self.configs.list_activations())
            new_specimen["cls_design"][pick_layer]["activation"] = mutation
            new_specimen = self.design2model( new_specimen )
            return new_specimen


In [4]:
def batch_gen(batch_sz, data, resize):
    indexes = np.arange(0, len(data), 1)
    start = 0
    while 1:
        x_batch = np.zeros( (batch_sz, resize, resize, 3), dtype=np.float32)
        y_batch = np.zeros( (batch_sz, 1), dtype=np.float32)

        for row in range(batch_sz):
            sample = data[ indexes[row + start] ]
            img = cv2.imread(sample[0], cv2.IMREAD_COLOR)
            img = cv2.resize(img, (resize, resize), interpolation=cv2.INTER_CUBIC)
            #############
            # Add a few lines for data augmentation with affine distorsions
            ######
            img_arr = np.asarray(img)
            x_batch[row] = (img_arr[:,:,:]-128.)/ 128.
            y_batch[row, 0] = sample[1]
        start += batch_sz
        if start + batch_sz > len(indexes):
            np.random.shuffle(indexes)
            start = 0
        yield x_batch, y_batch


def train_specimen(train_data, validation_data, model, batch_sz=128, n_epochs=10, lr=0.000001, patience=5):
    configs = Configs()
    adam = Adam(lr=lr)
    model.compile(loss='binary_crossentropy', optimizer=adam, metrics=['accuracy'])
    steps_per_epoch = len(train_data) // batch_sz
    validation_steps = len(validation_data) // batch_sz
    early_stop = EarlyStopping(monitor='val_loss', patience=patience, verbose=1)

    list_callbacks = [early_stop]
    resize_img = configs.image_specs[0]
    model.fit_generator(batch_gen(batch_sz, train_data, resize_img), validation_data=batch_gen(batch_sz, validation_data, resize_img),\
                    steps_per_epoch=steps_per_epoch, epochs=n_epochs, validation_steps = validation_steps, verbose=True,
                    callbacks=list_callbacks)
    val_loss, val_acc = model.evaluate_generator(batch_gen(batch_sz, validation_data, resize_img), validation_steps)
    return val_loss, val_acc
    

from time import time    
genalgo = GeneticAlgo()
#initialize population
population = genalgo.build_random_population()
keep_top = 5
keep_simple = 5
n_mutants = 5
n_breeds = 5
n_epochs = 30
patience = 5
num_evolution = 100 

start_time = time()
for i in range(num_evolution):
    #Train each specimen
    print("**********************************")
    print("Evolution {}".format(i))
    print("**********************************")
    if i != 0:
        new_population = list()
        sorted_pop = genalgo.sorted_specimen(population)
        # Keep top specimens
        top_specimens = genalgo.keep_top_specimen( keep_top, sorted_pop )
        print(top_specimens)
        new_population += top_specimens
        print("Top population next gene: {}".format( list(map(lambda x: x["score"], new_population)) ) )
        #keep simple specimens
        simple_specimens = genalgo.keep_simple_specimen(keep_top, keep_simple, sorted_pop)
        new_population += simple_specimens
        
        #Generate breeds from top specimens
        n_breeded = 0
        while n_breeded < n_breeds:
            #randomly pick 2 parents in the previous population
            parents = np.random.choice( population, 2 )
            #only breed parent with more than 1 layer  (dimiss parent with flatten->output)
            if 1 not in list(map(lambda x: len(x["cls_design"]), parents) ):
                specimen = genalgo.breed(parents)
                new_population.append( specimen )
                n_breeded += 1
            
        #Generate mutation from top specimens
        n_mutated = 0
        while n_mutated < n_mutants:
            pick_specimen = np.random.choice( population )
            if len( pick_specimen["cls_design"]) > 1:
                specimen = genalgo.mutate(pick_specimen)
                new_population.append( specimen )
                n_mutated += 1
        
        population = new_population
    
    for idx, specimen in enumerate(population):
        #{"model": cvnet.model, "cls_design": cls.design, "score": None}
        print("----------")
        print(specimen["cls_design"])
        print()
        specimen["model"].summary()
        #train and update score
        if specimen["score"]:
            print("score: {:.2f}".format(specimen["score"]))
        else:
            specimen["score"], val_acc = train_specimen(train_data, validation_data, specimen["model"], batch_sz=128, 
                                           n_epochs=n_epochs, lr=0.0001, patience=patience)
            print("score: {} | Accuracy: {}".format(specimen["score"], val_acc) )
        print()
    # update history:
    ls_scores = list( map(lambda x: x["score"], population) )
    genalgo.history["scores"].append( ls_scores )
    genalgo.history["cls_designs"].append( list( map(lambda x: x["cls_design"], population) ) )
    print("-------------------------")
    print("Average Validation loss for Generation {}: {}".format(i, np.mean( ls_scores )))
    #plot
    x = list( map(lambda x: x["score"], population))
    plt.hist(x, bins=20)
    plt.show()

stop_time = time()
print("Duration: {}".format(stop_time - start_time))

cls depth  8
cls depth  2
cls depth  3
cls depth  1
cls depth  9
cls depth  4
cls depth  4
cls depth  4
cls depth  6
cls depth  7
cls depth  9
cls depth  6
cls depth  0
cls depth  6
cls depth  2
cls depth  1
cls depth  4
cls depth  0
cls depth  2
cls depth  0
cls depth  6
cls depth  3
cls depth  7
cls depth  7
**********************************
Evolution 0
**********************************
----------
[{'activation': 'elu', 'dropout': 0.59999999999999998, 'units': 42}, {'activation': 'relu', 'dropout': 0.89999999999999991, 'units': 95}, {'activation': 'elu', 'dropout': 0.79999999999999993, 'units': 47}, {'activation': <function swish at 0x7f64f9136ae8>, 'dropout': 0.69999999999999996, 'units': 1}, {'activation': 'relu', 'dropout': 0.79999999999999993, 'units': 1}, {'activation': 'elu', 'dropout': 0.40000000000000002, 'units': 2}, {'activation': <function swish at 0x7f64f9136ae8>, 'dropout': 0.89999999999999991, 'units': 1}, {'activation': 'sigmoid', 'dropout': None, 'units': 1}]

_____

NameError: name 'ls_acc' is not defined

In [None]:
##########################
# Save population: last generation
# I tried dumping list in a pickle file but that does not work: TypeError: can't pickle _thread.lock objects
# also tried hdf5
##########################

#Save each model in a separate hdf file and 
pop = list()
for i in range(len(population)):
    m = population[i]["model"]
    m.save("specimen"+str(i)+".hdf5")
    pop.append({"model_name": "specimen"+str(i)+".hdf5", "cls_design": population[i]["cls_design"],
                          "score": population[i]["score"]})

with open('population.p', 'wb') as handle:
    pickle.dump(pop, handle, protocol=pickle.HIGHEST_PROTOCOL)

with open('history.p', 'wb') as handle:
    pickle.dump(genalgo.history, handle, protocol=pickle.HIGHEST_PROTOCOL)


In [None]:
##################
# My best model
##################

def binary_classifier(img_sz, img_ch):
    model = Sequential()
    #image size = (32, 32, 3)
    model.add(Conv2D(32, (3, 3), activation='relu', padding="same", input_shape=(img_sz, img_sz, img_ch)))
    model.add(Conv2D(32, (3, 3), padding="same", activation='relu'))
    model.add(MaxPooling2D(pool_size=(2, 2)))
    #image size = (16, 16, 32)
    model.add(Conv2D(64, (3, 3), activation='relu', padding="same"))
    model.add(Conv2D(64, (3, 3), padding="same", activation='relu'))
    model.add(MaxPooling2D(pool_size=(2, 2)))
    #image size = (16, 16, 64)
    model.add(Conv2D(128, (3, 3), dilation_rate=(2, 2), activation='relu', padding="same"))
    model.add(Conv2D(128, (3, 3), padding="valid", activation='relu'))
    model.add(MaxPooling2D(pool_size=(2, 2)))
    #image size = (7, 7, 128)
    model.add(Flatten())
    model.add(Dense(64, activation=swish))
    #model.add(Dense(512, activation=swish_activation))
    model.add(Dropout(0.4))
    model.add(Dense(1, activation='sigmoid'))
    return model


In [None]:
# Parameters
batch_sz = 128
img_sz = 32
n_epochs = 30
patience = 2
lr = 0.0001

my_model = binary_classifier(img_sz, 3)
adam = Adam(lr=lr)
my_model.compile(loss='binary_crossentropy', optimizer=adam, metrics=['accuracy'])
steps_per_epoch = len(train_data) // batch_sz
validation_steps = len(validation_data) // batch_sz
early_stop = EarlyStopping(monitor='val_loss', patience=patience, verbose=1)
list_callbacks = [early_stop]
my_model.summary()
my_model.fit_generator(batch_gen(batch_sz, train_data, img_sz), validation_data=batch_gen(batch_sz, validation_data, img_sz),\
                    steps_per_epoch=steps_per_epoch, epochs=n_epochs, validation_steps = validation_steps, verbose=True,
                    callbacks=list_callbacks)


val_loss, val_acc = my_model.evaluate_generator(batch_gen(batch_sz, validation_data, img_sz), validation_steps)
print(val_loss, val_acc)

In [None]:
#best model : lowest loss
best_specimen = sorted(population, key=itemgetter("score"), reverse=False )[0]
m = best_specimen["model"]
steps_per_epoch = len(train_data) // batch_sz
validation_steps = len(validation_data) // batch_sz
early_stop = EarlyStopping(monitor='val_loss', patience=patience, verbose=1)
list_callbacks = [early_stop]
m.summary()
m.fit_generator(batch_gen(batch_sz, train_data, img_sz), validation_data=batch_gen(batch_sz, validation_data, img_sz),\
                    steps_per_epoch=steps_per_epoch, epochs=n_epochs, validation_steps = validation_steps, verbose=True,
                    callbacks=list_callbacks)


val_loss, val_acc = m.evaluate_generator(batch_gen(batch_sz, validation_data, img_sz), validation_steps)
print(val_loss, val_acc)