In [1]:
# Loading libraries
import numpy as np

In [2]:
# Defining our activation function
def relu(x):
    return np.where(x>0,x,0)

In [3]:
# Defining the softmax function
def softmax(x):
    x = np.exp(x - np.max(x))
    return np.array(x / x.sum())

In [21]:
# Defining our random network generation function
def create_network(n_units=(128,64),input_shape=784,output_shape=10):
    # First we need to randomly initialize our weight and bias matrices
    weights = []
    biases = []
    # Creating the weights for the hidden layers
    for i in range(len(n_units)):
        if i==0:
            weights.append(np.random.uniform(-0.15,0.15,size=(input_shape,n_units[0])).astype('float32'))
            biases.append(np.zeros(n_units[0]).astype('float32'))
        else:
            weights.append(np.random.uniform(-0.15,0.15,size=(n_units[i-1],n_units[i])).astype('float32'))
            biases.append(np.zeros(n_units[i]).astype('float32'))
    # Creating weights and biases for output layer
    weights.append(np.random.uniform(-0.15,0.15,size=(n_units[-1],output_shape)).astype('float32'))
    biases.append(np.zeros(output_shape))
    return weights+biases

In [22]:
# Now we need to create our feed forward function
def feed_forward(inputs, network):
    # Dividing into the weights and biases
    weights = network[0:len(network)//2]
    biases = network[len(network)//2:]
    # First we need to propogate inputs
    a = relu((inputs@weights[0])+biases[0])
    # Now we need to iterate through all of the remaining elements
    for i in range(1,len(weights)):
        a = relu((a@weights[i])+biases[i])
    # Now we need to run softmax over the result
    probs = np.apply_along_axis(softmax, axis=1, arr=a)
    # Finally, return the max
    return np.argmax(probs,axis=1)

In [6]:
# Creating a function to create an offspring array
def offspring_array(x1, x2):
    # We need to save the final shape
    shape = x1.shape
    # We need to select half of x1 indices randomly
    child = x1.flatten()
    child[np.random.choice(range(child.shape[0]),size=int(np.ceil(child.shape[0]/2)))] = 0
    # Now we need to fill in the zero values
    child[np.where(child==0)[0]] = x2.flatten()[np.where(child==0)[0]]
    return child.reshape(shape)

In [7]:
# Lets try writing a simple offspring generator
def create_offspring(weight_set1, weight_set2):
    # First we need to iterate through and create offspring layer weights
    offspring = [offspring_array(weight_set1[i],weight_set2[i]) for i in range(weight_set1.shape[0])]
    return offspring

In [8]:
# Now lets define some of our base mutation functions
def weight_shift(x, prob=0.1, var=0.1):
    # Flattening out our array
    wts = x.flatten()
    # Selecting neurons to mutate
    to_mutate = np.random.choice(range(wts.shape[0]),size=int(np.ceil(prob*wts.shape[0])))
    # Applying mutation noise
    wts[to_mutate] = wts[to_mutate]+np.random.normal(0,var)
    return wts.reshape(x.shape)

def weight_random(x, prob=0.1, min_val = -4, max_val=4):
    # Flattening out our array
    wts = x.flatten()
    # Selecting neurons to mutate
    to_mutate = np.random.choice(range(wts.shape[0]),size=int(np.ceil(prob*wts.shape[0])))
    # Applying mutation noise
    wts[to_mutate] = np.random.uniform(low=min_val, high=max_val, size=len(to_mutate))
    return wts.reshape(x.shape)

# Mutation functions for networks
def mutate_weight_shift(x, prob=0.1, var=0.5):
    # Iterating over all layers applying mutations
    x = [weight_shift(x[i], prob, var) for i in range(len(x))]
    return x

def mutate_weight_random(x, prob=0.1, min_val=-4, max_val=4):
    # Iterating over all layers applying mutations
    x = [weight_random(x[i],prob,min_val,max_val) for i in range(len(x))]
    return x

In [9]:
# Creating our die-off function
def die_off(pops, scores, rate=0.5):
    # Sorting our populations first
    sorted_inds = scores.argsort()
    sorted_scores = -np.sort(-scores)
    sorted_pops = pops[sorted_inds[::-1]]
    # Killing off the weak
    surviving_pop = sorted_pops[0:int(np.ceil(sorted_pops.shape[0]*(rate-1.)))]
    surviving_scores = sorted_scores[0:int(np.ceil(sorted_pops.shape[0]*(rate-1.)))]
    return surviving_pop, surviving_scores

In [10]:
# Creating a function for creating a child population based on fitness of parents
def mate(pops, scores, num_children, shift_prob=0.1, random_prob=0.1, var=0.1, min_val=-4, max_val=4, fit_preference=2):
    # Creating standardized scores to use as mating probabilities
    fitness = np.power(scores, fit_preference)
    probs = fitness/fitness.sum()
    # Selecting two sets of parents
    parent1 = np.random.choice(a=range(pops.shape[0]), size=num_children, replace=True, p=probs)
    parent2 = np.random.choice(a=range(pops.shape[0]), size=num_children, replace=True, p=probs)
    parents = [(parent1[i], parent2[i]) for i in range(parent1.shape[0])]
    # Next we need to create the list of the children
    children = [create_offspring(pops[parents[i][0]], pops[parents[i][1]]) for i in range(len(parents))]
    # Time to mutate the children
    children = [mutate_weight_shift(i, shift_prob, var) for i in children]
    children = [mutate_weight_random(i, random_prob, min_val, max_val) for i in children]
    children = np.array(children)
    return np.concatenate([pops, children])

In [11]:
# Creating our accuracy measure
def accuracy(actual, preds):
    return np.mean(np.where(actual==preds,1,0))

In [12]:
# Defining a function for evaluating the fitness of our models
def evaluate_fitness(networks, X, y):
    # Creating a list comprehension of score evaluation
    scores = np.array([accuracy(feed_forward(X,i), y) for i in networks])
    return scores

In [13]:
# Loading the MNIST data
X_train = np.genfromtxt('X_train.csv', delimiter=',')
X_test = np.genfromtxt('X_test.csv', delimiter=',')
y_train = np.genfromtxt('y_train.csv', delimiter=',')
y_test = np.genfromtxt('y_test.csv', delimiter=',')

In [28]:
# Lets create a function now to perform our genetic modeling
# Setting the population size
pop_size = 500

# Setting the die-off rate
die_off_rate=0.5

# Setting our fitness preference rate
fitness_pref = 1.5

# Creating our initial set of models
models = np.array([create_network(n_units=(64,),input_shape=784,output_shape=10) for _ in range(pop_size)])

# Setting the max number of iterations
max_iter = 100

# Setting mutation rates
shift_mutate = 0.5
shift_var = 0.3
random_mutate=0.2

# Starting our loop to train models
for i in range(max_iter):
    # Evaluate fitness
    scores = evaluate_fitness(models, X_train, y_train)
    
    # Report best score
    print('Generation',i+1,'| Best Score:',scores.max().round(4))
    
    # Cause die-off
    models, scores = die_off(models, scores, die_off_rate)
    
    # Create children
    models = mate(pops=models, 
                  scores=scores, 
                  num_children=pop_size-models.shape[0], 
                  shift_prob=shift_mutate, 
                  random_prob=random_mutate, 
                  var=shift_var, 
                  fit_preference=fitness_pref)
    
# Done with training!
print('Done with training!')

Generation 1 | Best Score: 0.1709


KeyboardInterrupt: 