# Generating CNN architectures automatically with genetic algorithms

In [41]:
import random
import json
import time
import pprint

import numpy as np
import matplotlib.pyplot as plt
from tabulate import tabulate

import tensorflow as tf
from tensorflow.keras.layers import Input, Add, Dense, Activation, BatchNormalization, Flatten, Conv2D, AveragePooling2D, MaxPooling2D, GlobalMaxPooling2D
from tensorflow.keras.models import Model, load_model
from tensorflow.keras.utils import to_categorical

print(tf.__version__)

2.8.0


# ResNets

This project is based in ResNets, using this kind of Convolutional Neural Networks allows us to make very deep neural networks avoiding gradient vanishing and overfitting issues.

In [42]:
def skip_layer_encode():
    f1 = 2 ** random.randint(5, 9) # number from 32 to 512
    f2 = 2 ** random.randint(5, 9) # number from 32 to 512
    return f"{f1}_{f2}"

def pooling_layer_encode():
    q = random.random()
    if q < 0.5:
        # Max Pooling
        return "max"
    else:
        return "mean"

In [43]:
def random_layer():
    r = random.random()

    if r < 0.5:
        return skip_layer_encode()
    else:
        return pooling_layer_encode()

## Skip Layer

### What is a Skip Layer?

In [44]:
def skip_layer(X, f1, f2, kernel = (3,3), stride = (1,1)):
    inputs = X

    # First convolution
    layer = Conv2D(f1, kernel_size=kernel, strides=stride, padding="same")(X)
    layer = BatchNormalization(axis=3)(layer)
    layer = Activation("relu")(layer)

    # Second convolution
    layer = Conv2D(f2, kernel_size=kernel, strides=stride, padding="same")(layer)
    layer = BatchNormalization(axis=3)(layer)

    # Inter convolution (makes sure that the dimensionality at the skip layers are the same)
    inputs = Conv2D(f2, kernel_size=(1,1), strides=stride, padding="same")(inputs)

    # We add the input and the second convolution layers
    outputs = Add()([inputs, layer])
    outputs = Activation("relu")(outputs)

    return outputs

## Pooling Layer

In [45]:
def pooling_layer(X, pooling_type, kernel = (2,2), stride = (2,2)):
    pooling_choices = {
        "max": MaxPooling2D,
        "mean": AveragePooling2D
    }

    return pooling_choices[pooling_type](pool_size=kernel, strides=stride, padding="same")(X)

# Genetic Algorithm

## The CNN class (individual)

In [46]:
# This class represents each individual of our population
class CNN:

    def __init__(self, encoding:str, input_shape:tuple, output_shape:int) -> None:

        """
        Class constructor

        Args:
            encoding (str): Encoding representation of the CNN
            input_shape (tuple): Input shape of the CNN (height, width, channels)
            output_shape (int): Number of classes of the CNN
        """

        # Genetic algorithm stuff
        self.genes = encoding.split("-") # List of genes (cnn layers)
        self.num_genes = len(self.genes)
        self.fitness = 0.0               # Adaptation value

        # Convolutional Neural Network stuff
        self.encoding = encoding         # Encoded representation of the CNN
        self.input_shape = input_shape   # Input shape (WIDTH, HEIGHT, CHANNELS)
        self.output_shape = output_shape # Output shape (number of classes)
        self.accuracy = 0.0              # Accuracy of the model
        self.process_time = 0.0          # Time of the model to make an inference
        self.training_time = 0.0         # Time taken to train

    def generate_model(self) -> tf.keras.Model:
        """
        Generates a Keras model from the encoding

        Returns:
            model: tensorflow.keras.Model
        """
        inputs = Input(shape=self.input_shape)
        outputs = inputs

        # Create a list of layers from the encoding of the cnn
        layers = []
        for layer in self.encoding.split("-"):
            if layer == "mean":
                outputs = pooling_layer(outputs, "mean")
            elif layer == "max":
                outputs=  pooling_layer(outputs, "max")
            else:
                # Skip layer
                f1, f2 = layer.split("_")
                outputs = skip_layer(outputs, int(f1), int(f2))

        outputs = GlobalMaxPooling2D()(outputs)
        outputs = Dense(128, activation="relu")(outputs)
        outputs = Dense(self.output_shape, activation="softmax")(outputs)

        return Model(inputs = inputs, outputs = outputs)

    def compute_fitness(self):
        self.fitness = self.accuracy / self.process_time

    def __str__(self) -> str:

        """
        String representation of the object.
        """

        return f"""Model encoding: {self.encoding}, \nModel Accuracy: {self.accuracy},\nModel process time: {self.process_time}"""

    def get_info(self) -> dict:
        
        """
        Returns a dict with the information of the current individual.

        Returns:
            dict: Dictionary containing class important attributes.
        """
        
        return {
            "encoding" : self.encoding,
            "depth": self.num_genes,
            "fitness" : self.fitness,
            "accuracy" : self.accuracy,
            "process time": self.process_time,
            "training time": self.training_time,
        }

## The Population class

In [47]:
class Population:

    def __init__(self, n_individuals, min_genes, max_genes):
        self.n_individuals = n_individuals
        self.min_genes = min_genes
        self.max_genes = max_genes
        self.individuals = None
        self.best_individual = None
        self.mean_adaptation = 0.0

    def initialize(self, input_shape, output_shape):
        population = [] # List of CNN objects
        
        for _ in range(self.n_individuals):
            new_individual = self.generate_individual(input_shape, output_shape)
            population.append(new_individual)

        self.individuals = population

    def generate_individual(self, input_shape, output_shape):
        """_summary_

        Args:
            input_shape (tuple): Shape of the input (width, height, n_channels)
            output_shape (int): Number of output classes

        Returns:
            CNN: Randomly initialized instance of the CNN class
        """
        depth = random.randint(self.min_genes, self.max_genes)
        layers = [random_layer() for _ in range(depth)]
        layers = "-".join(layers)

        return CNN(layers, input_shape, output_shape)

    def compute_average_fitness(self):
        self.mean_adaptation = sum(individual.accuracy for individual in self.individuals)

    def print(self):
        individuals_info = []
        for individual in self.individuals:
            individuals_info.append(
                [individual.num_genes, individual.encoding, round(individual.accuracy, 4),
                 round(individual.process_time, 4), round(individual.fitness, 4)]
            )

        print(tabulate(individuals_info,
        headers=["CNN Depth", "Encoding", "Accuracy", "Process time", "Fitness"],
        numalign="center", stralign="left"))

## The GeneticAlgorithm class

In [48]:
class GeneticAlgorithm:

    def __init__(self, population_size, min_genes, max_genes, fitness_func, mutation_rate, 
                crossover_rate, num_generations, saved_cnns, training_params):
        self.population_size = population_size
        self.min_genes = min_genes
        self.max_genes = max_genes
        self.fitness_func = fitness_func
        self.num_generations = num_generations
        
        self.mutation_rate = mutation_rate
        self.crossover_rate = crossover_rate

        self.population = Population(population_size, min_genes, max_genes)
        self.input_shape = training_params["X_train"][0].shape
        self.output_shape = training_params["y_train_cat"][0].shape[0]
        
        self.saved_cnns = saved_cnns
        self.training_params = training_params

    def save_architectures(self):
        # Directly from dictionary
        with open('evaluated_architectures.json', 'w') as outfile:
            json.dump(self.saved_cnns, outfile, indent=4)

    def evaluate_population(self):

        # Get training params
        X_train = self.training_params["X_train"]
        X_test = self.training_params["X_test"]
        y_train_cat = self.training_params["y_train_cat"]
        y_test_cat = self.training_params["y_test_cat"]
        epochs = self.training_params["epochs"]
        batch_size = self.training_params["batch_size"]

        if self.population.best_individual is not None:
            best_acc = self.population.best_individual.accuracy
        else:
            best_acc = 0.0

        for individual in self.population.individuals:
            print("".center(100, "="))
            # Check if our architecture is already in the saved cnns dict
            if individual.encoding in self.saved_cnns:
                print(f"Architecture {individual.encoding} already evaluated")
            else:
                model = individual.generate_model()
                model.compile(optimizer="adam", loss="categorical_crossentropy", metrics=["accuracy"])

                print(f"Architecture: {individual.encoding}, Depth: {individual.num_genes}")
                print("Training ...")
                
                time1 = time.perf_counter()
                history = model.fit(X_train, y_train_cat, epochs = epochs, batch_size = batch_size, 
                                               validation_split=0.15, verbose=0)
                time2 = time.perf_counter()
                individual.training_time = (time2 - time1)/60

                print("Evaluating the model with unseen data ...")
                time1 = time.perf_counter()
                val_loss, val_acc = model.evaluate(X_test, y_test_cat)
                time2 = time.perf_counter()
                individual.process_time = ((time2 - time1)/X_test.shape[0]) * 1000
                print(f"Process time: {individual.process_time} milliseconds.")

                individual.accuracy = val_acc
                individual.compute_fitness()

                if val_acc > best_acc:
                    self.population.best_individual = individual
                    best_acc = val_acc

                self.saved_cnns[individual.encoding] = individual.get_info()
                self.saved_cnns[individual.encoding]["epochs"] = epochs

        print("\nEvery individual in the population has been evaluated! ...")


    def choose_winner(self, ind1, ind2):
        if abs(ind1.accuracy, ind2.accuracy) < 0.03:
            fastest = ind1 if ind1.process_time < ind2.process_time else ind2
            slowest = ind1 if ind1.process_time > ind2.process_time else ind2

            if fastest.process_time / slowest.process_time >= 0.7:
                return fastest

    def tournament_selection(self, num_individuals):

        print("Tournament Selection".center(30, "="))

        offspring = []

        # Fill the offspring by tournament
        while len(offspring) < num_individuals:
            # Select two individuals
            ind1 = random.choice(self.population.individuals)
            ind2 = random.choice(self.population.individuals)

            winner = self.choose_winner(ind1, ind2)

            print(f"Selected individuals: {ind1.encoding} acc: {round(ind1.accuracy, 4)} & {ind2.encoding} acc: {round(ind2.accuracy, 4)} Winner: {winner.encoding}")

            offspring.append(winner)

        return offspring

    def cross(self, parent1, parent2, cross_point):
        # "128_16 -mean-max-16_8"
        # "max- max_64_64-64_128"

        # "128_16-max-64_64-64_128"
        # "max-mean-max-16_8"
        genes1 = parent1.genes[:cross_point+1] + parent2.genes[cross_point+1:]
        genes2 = parent2.genes[:cross_point+1] + parent1.genes[cross_point+1:]

        genes1 = "-".join(genes1)
        genes2 = "-".join(genes2)

        son1 = CNN(genes1, self.input_shape, self.output_shape)
        son2 = CNN(genes2, self.input_shape, self.output_shape)

        return son1, son2

    def crossover(self):
        print("Crossover".center(20, "="))
        selected_indices = [] # Individuals selected for crossover
        num_selected = 0

        for index in range(self.population_size):
            r = random.random()
            # Se eligen los individuos de las posiciones i con a_i < prob_cruce
            if r < self.crossover_rate:
                selected_indices.append(index)
                num_selected += 1
        
        # El número de seleccionados se hace par
        if num_selected % 2 == 1:
            num_selected -= 1

        print(f"Num selected: {num_selected}")
        print(f"Selected indices: {selected_indices}")
        print(f"Individuals size: {len(self.population.individuals)}")

        for i in range(0, num_selected, 2):
            parent1 = self.population.individuals[selected_indices[i]]
            parent2 = self.population.individuals[selected_indices[i+1]]

            # We choose a random crossover point from shortest parent
            shortest_parent = parent1 if parent1.num_genes < parent2.num_genes else parent2
            cross_point = random.randint(1, shortest_parent.num_genes-1)

            # We create two individuals based on their parents
            son1, son2 = self.cross(parent1, parent2, cross_point)
            
            # New individuals replace their parents
            print(f"Parent {parent1.encoding} was replaced by {son1.encoding}")
            print(f"Parent {parent2.encoding} was replaced by {son2.encoding}")

            self.population.individuals[i]   = son1
            self.population.individuals[i+1] = son2

    def mutation(self):

        possible_mutations = ("increment_depth", "reduce_depth", "change_layer_type", "recreate_layer")

        # We loop over the genes of every individual in population
        for individual in self.population.individuals:

            # We make a copy so we dont modify our list while iterating
            mutated_genes = individual.genes.copy()

            for n_gene, gene in enumerate(individual.genes):
                r = random.random()
                if r < self.mutation_rate:
                    # Mutate
                    print("Mutation".center(20, "="))
                    print(f"Individual to mutate: {individual.encoding}")

                    mutation_type = random.choice(possible_mutations)
                    
                    if mutation_type == "increment_depth":
                        # Put a layer after this layer
                        new_layer = random_layer()
                        mutated_genes.insert(n_gene + 1, new_layer)
                        break    

                    elif mutation_type == "reduce_depth":
                        # Delete the current layer
                        removed = mutated_genes.pop(n_gene)
                        print(f"{removed} Removed from layers")
                        break

                    elif mutation_type == "change_layer_type":
                        # Put Skip layer if Mean layer or vice versa
                        if gene == "mean" or gene == "max":
                            # Generate a skip layer
                            mutated_genes[n_gene] = skip_layer_encode()
                        else:
                            mutated_genes[n_gene] = pooling_layer_encode()
                        break
                            
                    elif mutation_type == "recreate_layer":
                        if gene == "mean" or gene == "max":
                            # Generate a skip layer
                            mutated_genes[n_gene] = pooling_layer_encode()
                        else:
                            mutated_genes[n_gene] = skip_layer_encode()
                        break

            print(f"Individual before: {individual.genes}")
            individual.genes = mutated_genes
            print(f"Individual after: {individual.genes}")


    def main_loop(self):

        # Initialize populatiojn
        self.population.initialize(self.input_shape, self.output_shape)

        # Print individuals
        print("Starting evolution loop with the next population:")
        self.population.print()

        # Evaluate population
        self.evaluate_population()

        print("Individuals Summary".center(30, " "))
        self.population.print()
        self.save_architectures()

        for generation in range(self.num_generations):
            print("".center(100, "="))
            print(f"Generation {generation + 1}")

            # ========== Selection =============
            self.population.individuals = self.tournament_selection(self.population_size)

            # ========== Crossover ==========
            self.crossover()

            # ========== Mutation ==============
            self.mutation()
            
            best_individual = self.population.best_individual
            print(f"The best individual was {best_individual.encoding} with accuracy {round(best_individual.accuracy, 4)}")

            print("Individuals Summary".center(30, " "))
            self.population.print()
            self.save_architectures()

            # ========== Evaluation ============
            self.evaluate_population()

            # =========== Elitism ==============
            # Get worst individual only if is not already
            best = self.population.best_individual
            if best not in self.population.individuals:
                self.population.individuals = sorted(self.population.individuals, key=lambda x: x.accuracy, reverse=False)
                worst = self.population.individuals[0]
                print(f"Replacing individual {worst.encoding} with accuracy {worst.accuracy} with best individual {best.encoding} with accuracy {best.accuracy}")
                self.population.individuals[0] = best
        

# Running our Genetic Algorithm

## Preparing our parameters

### Lloading the architectures that have already been evaluated

In [49]:
# Dictionary of saved architectures with it's parameters
# with open("evaluated_architectures.json") as file:
#     saved_cnns = json.load(file)

### Lloading the data set with which the individuals will be evaluated

### Out dataset: CIFAR 10
It has 10 classes, wich are:

| Label | Description |
|-------|-------------|
|   0   |   airplane  |
|   1   |  automobile |
|   2   |     bird    |
|   3   |     cat     |
|   4   |     deer    |
|   5   |     dog     |
|   6   |     frog    |
|   7   |    horse    |
|   8   |     ship    |
|   9   |    truck    |

In [50]:
(X_train, y_train), (X_test, y_test) = tf.keras.datasets.cifar10.load_data()
y_train_cat = to_categorical(y_train)
y_test_cat = to_categorical(y_test)

### Setting the training parameters ready

In [51]:
training_params = {
    "epochs" : 10,
    "batch_size" : 32,
    "X_train" : X_train,
    "X_test" : X_test,
    "y_train_cat" : y_train_cat,
    "y_test_cat" : y_test_cat
}

## Creating a GeneticAlgorithm object

In [52]:
ga = GeneticAlgorithm(
    population_size=5,  # How many CNN will be in the population
    min_genes=6,        # Minimum depth of the CNN's
    max_genes=8,       # Maximum depth of the CNN's
    fitness_func="acc", # TODO: this should be changed to a function that takes response time into account
    num_generations=6,
    mutation_rate=0.05, # Mutation rate (value by convention)
    crossover_rate=0.4, # Crossover rate (value by convention)
    saved_cnns={}, # Document that saves the individuals who have already been evaluated, thus saving resources
    training_params=training_params # Parameters needed for training
)

In [None]:
t1 = time.perf_counter()
ga.main_loop()
t2 = time.perf_counter()
print(f"Time taken: {t2-t1} seconds.")

Starting evolution loop with the next population:
 CNN Depth   Encoding                                               Accuracy    Process time    Fitness
-----------  ----------------------------------------------------  ----------  --------------  ---------
     8       256_256-32_256-max-mean-512_256-32_512-64_128-mean        0             0             0
     6       max-32_512-256_512-256_128-mean-mean                      0             0             0
     8       mean-128_64-128_64-128_256-max-256_256-128_256-32_64      0             0             0
     7       mean-mean-max-512_32-mean-max-256_512                     0             0             0
     6       32_64-128_128-mean-256_32-32_32-512_128                   0             0             0
     6       32_32-128_32-64_32-max-64_128-128_512                     0             0             0
     8       mean-mean-256_128-mean-64_32-128_64-128_128-256_256       0             0             0
     8       32_32-max-max-mean-32

In [None]:
# ga.saved_cnns = {}

In [None]:
# from concurrent.futures import ProcessPoolExecutor
# t1 = time.perf_counter()
# with ProcessPoolExecutor() as executor:
#     executor.map(ga.main_loop())
# t2 = time.perf_counter()
# print(f"Time taken: {t2-t1} seconds.")

In [None]:
import multiprocessing
multiprocessing.cpu_count()

In [None]:
ga.population.best_individual