In [1]:
# Loading libraries
import numpy as np

In [2]:
# Defining our activation function
def relu(x):
    return np.where(x>0,x,0)

In [3]:
# Defining the softmax function
def softmax(x):
    x = np.exp(x - np.max(x))
    return np.array(x / x.sum())

In [4]:
# Defining our random network generation function
def create_network(n_units=(128,64),input_shape=784,output_shape=10):
    # First we need to randomly initialize our weight and bias matrices
    weights = []
    biases = []
    # Creating the weights for the hidden layers
    for i in range(len(n_units)):
        if i==0:
            weights.append(np.random.uniform(-0.15,0.15,size=(input_shape,n_units[0])).astype('float32'))
            biases.append(np.zeros(n_units[0]).astype('float32'))
        else:
            weights.append(np.random.uniform(-0.15,0.15,size=(n_units[i-1],n_units[i])).astype('float32'))
            biases.append(np.zeros(n_units[i]).astype('float32'))
    # Creating weights and biases for output layer
    weights.append(np.random.uniform(-0.15,0.15,size=(n_units[-1],output_shape)).astype('float32'))
    biases.append(np.zeros(output_shape))
    return weights+biases

In [5]:
# Now we need to create our feed forward function
def feed_forward(inputs, network):
    # Dividing into the weights and biases
    weights = network[0:len(network)//2]
    biases = network[len(network)//2:]
    # First we need to propogate inputs
    a = relu((inputs@weights[0])+biases[0])
    # Now we need to iterate through all of the remaining elements
    for i in range(1,len(weights)):
        a = relu((a@weights[i])+biases[i])
    # Now we need to run softmax over the result
    probs = np.apply_along_axis(softmax, axis=1, arr=a)
    # Finally, return the max
    return np.argmax(probs,axis=1)

In [6]:
# We need to create a better offspring generation function that selects neurons rather than random weights
def offspring_network(weight_set1, weight_set2):
    # First we need to create the child
    child = [np.copy(i) for i in weight_set1]
    # We need to select random neurons from each layer
    # Axis 1 of W represents neurons
    biases = [np.copy(i) for i in weight_set2[len(weight_set2)//2:]]
    selected_biases = np.array([np.random.choice(range(i.shape[0]),size=i.shape[0]//2) for i in biases])
    # Now to need to crossover the neurons in the child network
    for i in range(int(np.floor(len(child)/2))):
        for j in selected_biases[i]:
            child[i][:,j] = weight_set2[i][:,j]
    for i in range((len(child)//2)+1, len(child)):
        for j in selected_biases[i-(len(child)//2)]:
            child[i][j] = weight_set2[i][j]
    return child

In [31]:
# Defining the function to mutate a single network
def mutate_layer(x, mutation_prob=0.1, type_probs=(0.8,0.1,0.1),shift_max=2.0,swap_max=2):
    # Flattening our array
    wts = x.flatten()
    # Selecting which neurons get mutated
    to_mutate = np.random.choice((0,1),size=wts.shape[0],p=(1-mutation_prob,mutation_prob))
    to_mutate = np.array(np.where(to_mutate==1))
    # Selecting which type of mutation to apply to each neuron
    if len(np.where(to_mutate==1))>0:
        mutation_type=np.random.choice(('shift','sign','swap'),size=to_mutate.shape[0],replace=True,p=type_probs)
        # Performing shift mutations
        to_shift = np.where(mutation_type=='shift')
        wts[to_shift] = np.multiply(wts[to_shift],np.random.uniform(low=0.0,high=shift_max,size=len(to_shift)))
        # Performing sign mutations
        to_sign = np.where(mutation_type=='sign')
        wts[to_sign] = wts[to_sign]*-1
        # Performing swap mutations
        to_swap = np.where(mutation_type=='swap')
        wts[to_swap] = np.random.uniform(low=-1*swap_max,high=swap_max,size=len(to_swap))
    return wts.reshape(x.shape)

In [8]:
# Defining the function that mutates an entire network
def mutate_network(network, mutation_prob=0.1, type_probs=(0.8,0.1,0.1),shift_max=2.0,swap_max=2):
    network = [mutate_layer(i,mutation_prob, type_probs,shift_max,swap_max) for i in network]
    return network

In [9]:
# Creating our die-off function
def die_off(pops, scores, rate=0.5):
    # Sorting our populations first
    sorted_inds = scores.argsort()
    sorted_scores = -np.sort(-scores)
    sorted_pops = pops[sorted_inds[::-1]]
    # Killing off the weak
    surviving_pop = sorted_pops[0:int(np.ceil(sorted_pops.shape[0]*(rate-1.)))]
    surviving_scores = sorted_scores[0:int(np.ceil(sorted_pops.shape[0]*(rate-1.)))]
    return surviving_pop, surviving_scores

In [10]:
# Creating a function for creating a child population based on fitness of parents
def mate(pops, scores, num_children, fit_preference=2, mutation_prob=0.1, type_probs=(0.8,0.1,0.1),shift_max=2.0,swap_max=2):
    # Creating standardized scores to use as mating probabilities
    fitness = np.power(scores, fit_preference)
    probs = fitness/fitness.sum()
    # Selecting two sets of parents
    parent1 = np.random.choice(a=range(pops.shape[0]), size=num_children, replace=True, p=probs)
    parent2 = np.random.choice(a=range(pops.shape[0]), size=num_children, replace=True, p=probs)
    parents = [(parent1[i], parent2[i]) for i in range(parent1.shape[0])]
    # Next we need to create the list of the children
    children = [offspring_network(pops[parents[i][0]], pops[parents[i][1]]) for i in range(len(parents))]
    # Time to mutate the children
    children = [mutate_network(i, mutation_prob, type_probs,shift_max,swap_max) for i in children]
    children = np.array(children)
    return np.concatenate([pops, children])

In [11]:
# Creating our accuracy measure
def accuracy(actual, preds):
    return np.mean(np.where(actual==preds,1,0))

In [12]:
# Defining a function for evaluating the fitness of our models
def evaluate_fitness(networks, X, y):
    # Creating a list comprehension of score evaluation
    scores = np.array([accuracy(feed_forward(X,i), y) for i in networks])
    return scores

In [13]:
# Loading the MNIST data
X_train = np.genfromtxt('X_train.csv', delimiter=',')
X_test = np.genfromtxt('X_test.csv', delimiter=',')
y_train = np.genfromtxt('y_train.csv', delimiter=',')
y_test = np.genfromtxt('y_test.csv', delimiter=',')

In [46]:
# Lets create a function now to perform our genetic modeling
# Setting the population size
pop_size = 50

# Setting the die-off rate
die_off_rate = 0.25

# Setting our fitness preference rate
fitness_pref = 1.0

# Setting the max number of iterations
max_iter = 500

# Setting mutation rates
mutation_rate = 0.2
type_probs = (0.8,0.1,0.1)
shift_max = 3.0
swap_max = 3

# Setting caps for mutation
mutation_rate_cap = 0.5

# How often do we want to display progress?
show_every = 1

# How much should we increase mutation by when not improving?
rate_increase = 0.0125
current_mutation_rate = mutation_rate

In [47]:
# Creating our initial set of models
models = np.array([create_network(n_units=(32,),input_shape=784,output_shape=10) for _ in range(pop_size)])

# Starting our loop to train models
for i in range(max_iter):
    
    # Evaluate fitness
    scores = evaluate_fitness(models, X_train, y_train)
    
    # Initializing the previous score variable
    if i==0:
        previous_score = scores.max().round(4)
    
    # Cause die-off
    models, scores = die_off(models, scores, die_off_rate)
    
    # Increasing mutation rate if necessary
    if scores.max().round(4) == previous_score and i != 0:
        current_mutation_rate += rate_increase
    else:
        current_mutation_rate = mutation_rate
    # Checking if we have hit mutation caps
    if current_mutation_rate > mutation_rate_cap:
        current_mutation_rate = mutation_rate_cap
    previous_score = scores.max().round(4)
    
    # Report best score
    if (i+1)%show_every==0:
        print('Generation',i+1,'| Best Score:',scores.max().round(4),'| Mutation Rate:',round(current_mutation_rate,3))
    
    # Create children
    models = mate(pops=models,
                  scores=scores,
                  num_children=pop_size-models.shape[0],
                  fit_preference=fitness_pref,
                  mutation_prob=current_mutation_rate, 
                  type_probs=type_probs,
                  shift_max=shift_max,
                  swap_max=swap_max)

# Done with training!
print('Done with training!')

Generation 1 | Best Score: 0.2064 | Mutation Rate: 0.2
Generation 2 | Best Score: 0.2064 | Mutation Rate: 0.213


KeyboardInterrupt: 

In [None]:
# Creating our GeneticMLP class
class GeneticMLP():
    def __init__(self, pop_size=100, die_off_rate=0.5,fitness_pref=1.0,generations=500,mutation_rate=0.2,
                 type_probs=(0.8,0.1,0.1),shift_max=2.0, swap_max=2.0,mutation_rate_cap=0.4,verbose=False,print_every=1,
                mutation_rate_increase=0.025):
        self.pop_size = pop_size
        self.die_off_rate = die_off_rate
        self.fitness_pref = fitness_pref
        self.generations = generations
        self.mutation_rate = mutation_rate
        self.type_probs = type_probs
        self.shift_max = shift_max
        self.swap_max = swap_max
        self.mutation_rate_cap = mutation_rate_cap
        self.verbose = verbose
        self.print_every = print_every
        self.mutation_rate_increase = mutation_rate_increase
        return self

    def fit(X, y):
        
    def predict(X):
    

In [None]:
# Lets test to make sure the feed forward command is working using keras
import tensorflow as tf

# Creating our network
model = tf.keras.models.Sequential([
    tf.keras.layers.Dense(64, input_shape=(784,),activation='relu'),
    tf.keras.layers.Dense(10, activation='softmax')
])
model.compile(optimizer='adam', loss='sparse_categorical_crossentropy', metrics=['accuracy'])
model.fit(X_train, y_train,epochs=5)
model.evaluate(X_test, y_test)

In [None]:
# Grabbing model weights
def convert_to_numpy(x):
    weights = x[::2]
    biases = x[1::2]
    return np.array(weights+biases)
keras_model = convert_to_numpy(model.get_weights())
print(keras_model)

In [None]:
# Running it though our propgation function
print(feed_forward(X_train, keras_model))
print(y_train)

In [None]:
# Testing the network offspring function
net1 = create_network(n_units=(32,),input_shape=784,output_shape=10)
net2 = create_network(n_units=(32,),input_shape=784,output_shape=10)
print(net1[0][0,0:6])
#print(net2)
child = offspring_network(net1,net2)
print(net1[0][0,0:6])
#print(net2)
#print(child)