# NAS for CNN

## Structural elements

- Branches
- Connections
- Layers
    - Input
        - Input shape (first input is standard, all others are calculated)
        - Batch size 
    - 2D convolutional
        - Filters
        - Kernel size
        - Padding
        - Activation
    - Pooling
        - Max
            - Size
            - Strides
            - Padding
        - Average
            - Size
            - Strides
            - Padding
    - Dropout
        - Rate
    - Batch normalisation
    - Concatenate
        - Axis
    - Flatten
    - Dense
        - Units (1, 2)
        - Activation (sigmoid, softmax)
    - Global pooling
        - Max
        - Average

## Genetic elements

- GA
    - Type
    - Number of generations
- Population
    - Size
    - Encoding type
    - Individual size
        - Variable or fixed
- Fitness
- Selection
- Reproduction
- Mutation

In [198]:
import numpy as np
import tensorflow as tf
from scipy.spatial import distance


In [44]:
params = {
    'input':{
        'batch':[8, 16, 32, 64, 128, 256]
        },
    'conv':{
        'kernel':['1x1', '3x3', '5x5', '7x7'],
        'filter':[2, 4, 8, 16, 32, 64, 128],
        'padding':['valid', 'same'],
        'activation':['tanh', 'relu', 'selu', 'elu']
        },
    'pool':{
        'type':['max', 'average'],
        'size':['2x2', '3x3', '4x4', '5x5'],
        'padding':['valid', 'same']
        },
    'dropout':{
        'type':['dropout','spatial2D'],
        'rate':[0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9]
        },
    'output':{
        'type':['global', 'dense']
        }
}
    

In [35]:
individual = [
    # Input layer
    'batch_size_index', 
    
    # Convolutional layer
    'kernel_index', 'filter_index', 'padding_index', 'activation_index',

    # Pooling layer
    'type_index', 'size_index', 'padding_index',

    # Dropout layer
    'type_index', 'rate_index',

    # Output layer
    'type_index',

    # Add layer
    ## Layer/Branch 1
    'indicator', 'branch_indicator', 'type', 'from', 'to',

    ## Layer/Branch 2
    'indicator', 'branch_indicator', 'type', 'from', 'to',

    ## ... variable length array

    ## Layer/Branch n
    'indicator', 'branch_indicator', 'type', 'from', 'to'


    ]
    


[8, 16, 32, 64, 128, 256]

In [177]:
def get_state_space(parameters):

    """
    Get the encoding length of an individual based on the input parameters dictionary
    """

    len_lists = []
    len_values = [] # stores the maximum lengths of bits required to define the length of the lists in the parameters dict
    max_ind = []
    bit_max_int = []
    bit_max = ''

    for layer in params.keys():

            for value_list in list(params[layer].values()):

                max_ind.append(len(value_list) - 1)

                bit_max = bit_max + bin(len(value_list) - 1)[2:]

                len_values.append(len(bin(len(value_list) - 1)[2:])) 

                len_lists.append(len(value_list))
        

    gene_length = np.max(len_values)
    
    individual_length = len(len_lists) * gene_length

    len_values = np.array(len_values)

    len_lists = np.array(len_lists)

    return individual_length, gene_length, len_lists

In [178]:
ind_len, gene_len, len_list = get_state_space(params)
print(ind_len, gene_len, len_list)

44 4 [6 4 7 2 4 2 4 2 2 9 2]


In [180]:
def generate_population(length, parameter_len, n = 10, seed = None):
    """
    Generate population given the number of individuals in a population and the the required binary length
    """

    bit_length = int(length/len(parameter_len))

    if seed != None:

        np.random.seed(seed)

    population = np.zeros(shape=(n, length), dtype=int)
    
    for i in range(n):
        
        bits = ''
        for l in parameter_len:

            choice = np.random.choice(np.arange(0, l))

            bit_choice = bin(choice)[2:]

            bits += ('0' * (bit_length - len(bit_choice))) + bit_choice

        population[i] = np.array([int(x) for x in bits])
    
    return population


In [291]:
pop = generate_population(ind_len, len_list, n = 100, seed = 43)
print(pop[0])

[0 1 0 0 0 0 0 0 0 0 0 1 0 0 0 1 0 0 1 0 0 0 0 0 0 0 1 1 0 0 0 1 0 0 0 1 0
 0 1 0 0 0 0 0]


In [193]:
def decoder(chromosome, gene_length, parameter_lengths):

    """
    Converts the binary bitstring vector to integer phenotype and performs check to see if it is valid with respect to the constraints
    """

    chromo_length = len(chromosome)

    start_ind = np.arange(0, chromo_length, gene_length)

    phenotype = np.array([int(str(''.join(map(str, chromosome[x:x + gene_length]))), 2) for x in start_ind])

    is_valid = np.any(phenotype < parameter_lengths - 1)

    return phenotype, is_valid


In [195]:
pheno, valid = decoder(pop[0], gene_len, len_list)
print(pheno, valid)

[3 0 6 0 3 0 0 0 1 2 0] True


In [371]:
def hamming_distance(population, selection_probability = 0.1, seed = None):

    n = population.shape[0]

    n_select = int(np.ceil(n*selection_probability))

    mean_hamming = 0

    if seed != None:
        np.random.seed(seed)

    for individual in np.random.choice(np.arange(0, n, n_select), n_select):

        mean_hamming += np.mean([distance.hamming(population[individual], population[x]) for x in range(n)])

    return mean_hamming/n_select

In [379]:
hamming_distance(pop)

0.23049999999999998

In [370]:
def fitness(fitness_input, population = None, model_params = None):

    diversity = 1

    n_params = 1

    if population != None:
        diversity = hamming_distance(population)

    if model_params != None:
        n_params = 1 # need to determine the maximum number of possible parameters so that we can scale

    return 1/fitness_input * 1/diversity * n_params


In [465]:
def selection(population, k = 2, selection_probability = 0.9, seed = None):

    n = population.shape[0]

    if seed != None:
        np.random.seed(seed)

    sub_population = np.random.choice(np.arange(0, n, 1), k, replace = False)

    scores = []

    for i in sub_population:

        # train model

        scores.append(fitness(np.random.random()))

    rank_index = np.argsort(scores)

    rank_scores = np.array(scores)[rank_index]

    ranked = sub_population[rank_index]

    p_array = np.concatenate(([selection_probability], selection_probability*((1 - selection_probability)**rank_scores)))

    p_array = np.concatenate((p_array[p_array < 1], np.array([1 - np.sum(p_array[p_array < 1])])))

    p_array = np.concatenate((p_array[0:len(ranked) - 1], [np.sum(p_array[len(ranked) - 1:])]))


    return np.random.choice(ranked, p = p_array)


In [525]:
selection(pop)

[96 22]


96