In [120]:
import tensorflow as tf
from tensorflow.keras.datasets import mnist
from tensorflow.keras import layers, models, Model
from tensorflow.keras.utils import to_categorical
#from vis.utils.utils import apply_modifications
import matplotlib.pyplot as plt
import numpy as np
import random
import time

In [121]:
activation_functions = {
    'tanh': tf.tanh,
    'relu': tf.nn.relu,
    'sigmoid': tf.nn.sigmoid,
    'linear': tf.keras.activations.linear,
    'softmax': tf.nn.softmax,
}

**Loading Data**

In [125]:
%%time
t0 = time.time()    # compute total execution time

# numpy
_, (X_test, y_test) = mnist.load_data() # only care  about X_test

X_test = X_test.reshape(10000, 784).astype(np.float32) / 255.0
y_test = to_categorical(y_test)  # one-hot encoding

# tensorflow
X_test = tf.convert_to_tensor(X_test)
y_test = tf.convert_to_tensor(y_test)

CPU times: total: 250 ms
Wall time: 236 ms


**Model Definition**

In [126]:
MUTATE_RATE_MATRIX = 0.3
MUTATE_RATE_BIAS = 0.1
MUTATE_RATE_ACTIVATION_FUNCTION = 0.1
GAUSSIAN_NOISE_STDDEV = 1

In [None]:
class MultiLayerPerceptron(Model):
    def __init__(self, matrix1, bias1, activation1, matrix2, bias2, activation2):
        """
        Weight gnostic multi-layer feed forward neural network
        :param params: Params have to be in form: (matrix1=..., bias1=..., activation1=..., matrix2=..., ...)
        """
        super(MultiLayerPerceptron, self).__init__()

        self.linear1 = tf.keras.layers.Dense(32,
                                             activation=activation_functions[activation1],
                                             kernel_initializer=tf.keras.initializers.Constant(matrix1),
                                             bias_initializer=tf.keras.initializers.Constant(bias1))
        self.linear2 = tf.keras.layers.Dense(10,
                                             activation=activation_functions[activation2],
                                             kernel_initializer=tf.keras.initializers.Constant(matrix2),
                                             bias_initializer=tf.keras.initializers.Constant(bias2))

    def call(self, inputs):
        x = self.linear1(inputs)
        x = self.linear2(x)
        return x

    '''def fitness(self):
        return self.evaluate(X_test, y_test, verbose=0)[1]'''

    def mutate(self):
        self.build(X_test.shape)   # necessary to build params # TODO: required here?
        for layer_name in ('linear1', 'linear2'):
            layer = getattr(self, layer_name)

            # matrix
            matrix = layer.kernel
            mutation_stencil = tf.cast(tf.reshape(tf.random.categorical(
                tf.math.log([[1 - MUTATE_RATE_MATRIX, MUTATE_RATE_MATRIX]]),
                matrix.shape[0] * matrix.shape[1]), matrix.shape), tf.float32)
            noise = tf.random.normal(mean=0.0, stddev=GAUSSIAN_NOISE_STDDEV, shape=matrix.shape)  # TODO: tune stddev
            matrix = matrix + tf.multiply(mutation_stencil, noise)

            # bias
            bias = layer.bias
            mutation_stencil = tf.cast(tf.reshape(tf.random.categorical(
                tf.math.log([[1 - MUTATE_RATE_BIAS, MUTATE_RATE_BIAS]]),
                bias.shape[0]), bias.shape), tf.float32)
            noise = tf.random.normal(mean=0.0, stddev=GAUSSIAN_NOISE_STDDEV, shape=bias.shape)  # TODO: tune stddev
            bias = bias + tf.multiply(mutation_stencil, noise)

            # activation
            cleaner = lambda x: 'softmax' if x=='softmax_v2' else x
            activation = cleaner(layer.activation.__name__)
            if random.uniform(0, 1) < MUTATE_RATE_ACTIVATION_FUNCTION:
                activation = random.choice(list(activation_functions.keys()))

            try:
                setattr(self, layer_name,
                        tf.keras.layers.Dense(layer.units,
                                              activation=activation_functions[activation],
                                              kernel_initializer=tf.keras.initializers.Constant(matrix),
                                              bias_initializer=tf.keras.initializers.Constant(bias))
                        )
            except KeyError:
                print(activation)

        self.compile(metrics=['accuracy'])

In [None]:
class Population():
    def __init__(self, size=10, n_survivors=5):
        self.generation = 0
        self.size = size
        self.n_survivors = n_survivors
        self.elite = None
        self.fitness = None
        self.fitness_generation = -1  # generation when fitness was evaluated

        # initialization (gaussian)
        # TODO: max, min for now 7-bit integers
        self.organisms = []
        for _ in range(size):
            # TODO: for now fixed architecture
            bias1 = tf.random.normal(mean=0.0, stddev=1.0, shape=[32, 1])
            matrix1 = tf.random.normal(mean=0.0, stddev=1.0, shape=[32, 784])
            activation1 = 'sigmoid'

            bias2 = tf.random.normal(mean=0.0, stddev=1.0, shape=[10, 1])
            matrix2 = tf.random.normal(mean=0.0, stddev=1.0, shape=[10, 32])
            activation2 = 'softmax'

            model = MultiLayerPerceptron(matrix1, bias1, activation1, matrix2, bias2, activation2)
            model.compile(metrics=['accuracy'])

            self.organisms.append(model)

        self.history = [(max(self.organism_fitness()), self.average_fitness())]   # fitness of population over all generations

    def organism_fitness(self):
        if self.generation != self.fitness_generation:
            self.fitness = [organism.evaluate(X_test, y_test, verbose=0)[1] for organism in self.organisms]
            self.fitness_generation = self.generation

        return self.fitness

    def average_fitness(self):
        organism_fitness = self.organism_fitness()
        return sum(organism_fitness) / len(organism_fitness)

    def max_fitness(self):
        return max(self.organism_fitness())

    def selection(self):
        organism_fitness = self.organism_fitness()

        # elitism (n=1)
        elite_index = np.argmax(organism_fitness)
        self.elite = self.organisms.pop(elite_index)
        organism_fitness.pop(elite_index)

        probabilities = [fitness / sum(organism_fitness) for fitness in organism_fitness]  # normalized
        survivors = np.random.choice(self.organisms,
                                     size=self.n_survivors - 1,
                                     p=probabilities,
                                     replace=False)    # TODO: works without replacement and p?
        return [survivor for survivor in survivors]

    def crossover(self, parents):
        # TODO: for different type of networks
        # TODO: correct?
        children = []
        while len(children) < (self.size - 1):
            [father, mother] = random.sample(parents + [self.elite], k=2)  # sample without replacement

            # TODO: for now assume same no of layers
            # TODO: create new model - efficient?
            # TODO: init with **kwargs -> child init easy
            child_params = {}
            for i, layer_name in enumerate(['linear1', 'linear2']):
                father_layer = getattr(father, layer_name)
                mother_layer = getattr(mother, layer_name)

                # matrix - uniform crossover
                father_matrix = father_layer.kernel
                mother_matrix = mother_layer.kernel

                father_stencil = tf.round(tf.random.uniform(father_matrix.shape))
                mother_stencil = - (father_stencil - 1)

                child_matrix = tf.multiply(father_stencil, father_matrix) + tf.multiply(mother_stencil, mother_matrix)
                child_params['child_matrix' + str(i+1)] = child_matrix

                # bias - uniform crossover
                father_bias = father_layer.bias
                mother_bias = mother_layer.bias

                father_stencil = tf.round(tf.random.uniform(father_bias.shape))
                mother_stencil = - (father_stencil - 1)

                child_bias = tf.multiply(father_stencil, father_bias) + tf.multiply(mother_stencil, mother_bias)
                child_params['child_bias' + str(i+1)] = child_bias

                # activation
                cleaner = lambda x: 'softmax' if x=='softmax_v2' else x
                father_activation = cleaner(father_layer.activation.__name__)
                mother_activation = cleaner(mother_layer.activation.__name__)

                child_activation = father_activation if (random.uniform(0, 1) < 0.5) else mother_activation
                child_params['child_activation' + str(i+1)] = child_activation

            model = MultiLayerPerceptron(matrix1=child_params['child_matrix1'],
                                         bias1=child_params['child_bias1'],
                                         activation1=child_params['child_activation1'],
                                         matrix2=child_params['child_matrix2'],
                                         bias2=child_params['child_bias2'],
                                         activation2=child_params['child_activation2']
                                         )
            model.compile(metrics=['accuracy']) # TODO: necessary??
            model.build(X_test.shape)   # necessary to build params
            children.append(model)

        return children

    def mutate(self, organisms):
        for organism in organisms:
            organism.mutate()

    def breed(self):
        time_debug = ''

        t_a = time.time()
        parents = self.selection()
        t_b = time.time()
        time_debug += 'selection time: {}s - '.format(round(t_b - t_a, 4))

        t_a = time.time()
        children = self.crossover(parents)
        t_b = time.time()
        time_debug += 'crossover time: {}s - '.format(round(t_b - t_a, 4))

        t_a = time.time()
        self.mutate(children)  # TODO: mGA or GA?
        t_b = time.time()
        time_debug += 'mutation time: {}s - '.format(round(t_b - t_a, 4))

        print(time_debug)

        self.organisms = children + [self.elite]
        self.generation += 1
        self.history.append((self.max_fitness(), self.average_fitness()))

    def plot(self):
        plt.figure()
        plt.plot(np.arange(self.generation + 1), [score[0] for score in self.history],
                 label='max fitness')
        plt.plot(np.arange(self.generation + 1), [score[1] for score in self.history],
                 label='avg fitness', alpha=0.6)
        plt.title('Population fitness' + ' (n=' + str(self.size) + ')')
        plt.xlabel('Generations')
        plt.ylabel('Fitness score (accuracy)')
        plt.legend()
        plt.show()

**Training**

In [None]:
# initialization
GENERATIONS = 100
POPULATION_SIZE = 10
SURVIVORS = 5

In [None]:
# initial population
print('Starting training')
t_training = time.time()
population = Population(size=POPULATION_SIZE, n_survivors=SURVIVORS)
population_fitness = population.organism_fitness()
max_fitness = population.max_fitness()
t2 = time.time()
print('Gen', 0, ':',
      population_fitness, '- max:',
      max_fitness,
      '({}s)'.format(round(t2 - t_training, 2)))

# future populations
for generation in range(1, GENERATIONS):
    # breed new population
    t1 = time.time()
    population.breed()

    # evaluate new population
    ta = time.time
    population_fitness = population.organism_fitness()
    max_fitness = population.max_fitness()
    t2 = time.time()
    print('crossover time: {}s - '.format(round(t2 - ta, 4)))

    print('Gen', generation, ':',
          population_fitness, '- max:',
          max_fitness,
          '({}s)'.format(round(t2 - t1, 2)))

print('Finished training ({})'.format(round(time.time() - t_training, 2)))
print('\nTotal computation time: ({}s)'.format(round(time.time() - t0, 2)))

# performance of population
population.plot()

Gen 31 : [0.1662999987602234, 0.17170000076293945, 0.14069999754428864, 0.1923999935388565, 0.14910000562667847, 0.18320000171661377, 0.16670000553131104, 0.12020000070333481, 0.14079999923706055, 0.20430000126361847] - max: 0.20430000126361847 (13.31s)
selection time: 2.521s - crossover time: 0.1235s - mutation time: 0.1245s - 
Gen 32 : [0.13030000030994415, 0.15809999406337738, 0.15240000188350677, 0.2037000060081482, 0.1590999960899353, 0.14720000326633453, 0.14249999821186066, 0.16009999811649323, 0.17910000681877136, 0.20430000126361847] - max: 0.20430000126361847 (12.94s)
selection time: 2.514s - crossover time: 0.203s - mutation time: 0.1475s - 
Gen 33 : [0.19509999454021454, 0.11640000343322754, 0.18809999525547028, 0.10949999839067459, 0.1664000004529953, 0.1518000066280365, 0.14139999449253082, 0.16760000586509705, 0.18970000743865967, 0.20430000126361847] - max: 0.20430000126361847 (12.73s)
selection time: 2.478s - crossover time: 0.1225s - mutation time: 0.168s - 
Gen 34 : 

KeyboardInterrupt: 

In [None]:
bias1 = tf.random.normal(mean=0.0, stddev=1.0, shape=(32,))
matrix1 = tf.random.normal(mean=0.0, stddev=1.0, shape=(784, 32))
bias2 = tf.random.normal(mean=0.0, stddev=1.0, shape=(10, ))
matrix2 = tf.random.normal(mean=0.0, stddev=1.0, shape=(32, 10))

lin = LinModel(matrix1=matrix1, bias1=bias1)
lin.compile(metrics=['accuracy'])

In [None]:
%%timeit
lin.evaluate(X_test, y_test, verbose=0)[1]