In [None]:
#Setup
import math
import numpy as np
import pandas as pd
import random
from sklearn.preprocessing import StandardScaler, MinMaxScaler
from sklearn.model_selection import KFold
from sklearn.metrics import mean_squared_error as mse
from sklearn.linear_model import ElasticNet
from matplotlib import pyplot as plt

In [None]:
def create_population(population_size, vector_length):
    return np.random.rand(population_size, vector_length)
population_size = 25
vector_length = 2
current_population = create_population(population_size, vector_length)

## Hyperparameters Ranges

IMPORTANT: All hyperparameters have specific domains; for example, they could be real numbers with values in prescribed intervals, integers, booleans, etc. Therefore, it is essential to understand all the aspects of the hyperparameters you need to find the best choices.

In [None]:
# you can check what current_population means
current_population

array([[0.25913828, 0.66615332],
       [0.97418968, 0.2873934 ],
       [0.32216297, 0.89052917],
       [0.07425726, 0.21574708],
       [0.28093637, 0.45826428],
       [0.34284181, 0.53354961],
       [0.52226578, 0.83274805],
       [0.05526837, 0.45293902],
       [0.55548276, 0.66593278],
       [0.72725387, 0.59706635],
       [0.08884335, 0.00709534],
       [0.01710591, 0.41944678],
       [0.99917283, 0.11118312],
       [0.36739105, 0.25682277],
       [0.94044864, 0.27913131],
       [0.21331705, 0.03552819],
       [0.25188695, 0.94081855],
       [0.51744978, 0.33086871],
       [0.49648683, 0.82406514],
       [0.05881906, 0.0055828 ],
       [0.10507888, 0.84632696],
       [0.1102769 , 0.10050991],
       [0.69640731, 0.79765709],
       [0.05350993, 0.22028968],
       [0.38914943, 0.63171894]])

## <font color='navy' size = 6pt> Survival of the fittest

We're going to need to *evolve* individuals from our population, so we need some way to check which of the population is the fittest.

### Closer (smaller distance) is better

For the sake of this visualisation, we're going to place a target on the plot and the "fitness" of a individual is how close they are to the target. We're going to calculate the distance using the euclidean distance metric.



In [None]:
#collapse-show
def mean_squared_error(y_true, y_pred):
    return ((y_true - y_pred)**2).mean(axis=0)

target_x, target_y = 0.2, 0.35
def problem(soln):
    global target_x #using globals so we can link this to the click event later
    global target_y
    return mean_squared_error(soln, [target_x, target_y])

Then we need a way to check, who's our fittest member of our community

In [None]:
#collapse-show
def assess_fitness(individual, problem):
    "Determines the fitness of an individual using the given problem"
    return problem(individual)

def find_current_best(population, problem):
    """Evaluates a given population and returns the fittest individual.
    This can be sped up to only loop over popuation once, but because this is a tutorial, 3 lines is nicer.
    """
    fitnesses = [assess_fitness(x, problem) for x in population]
    best_value = min(fitnesses) # Lowest is best
    best_index = fitnesses.index(best_value)
    return population[best_index]

## <font color='navy' size=6pt> Selection</font>

Now, we're going to let these potential solutions fight it out and only let a certain few have offspring. For this we will use "Tournament Selection" which is just grabbing a few individuals and having them compete to the death (the fittest survives!).

What's nice about this is that you can keep a bit of diversity within the population and it's not just **the best** that survive, some lucky unfit individuals might be matched up with worse folk, and so they'll survive.

In [None]:
#collapse-show
def selection(population, selection_size, problem):
    "Competes a number of challengers and returns the fittest one"
    challengers_indexes = np.random.choice(population.shape[0], selection_size, replace=True)
    challengers = population[challengers_indexes]
    return find_current_best(challengers, problem)

Then once we have done this twice, those two individuals can "mate" and have children... to keep the population the same across generations for simplicity, they'll have two children. We'll use [Two point Crossover](https://en.wikipedia.org/wiki/Crossover_(genetic_algorithm)), which is just splitting both parents in three parts and swapping the middle part to form two children.

In [None]:
#collapse-show
def crossover(parent_a, parent_b):
    "Performs two point crossover on two parents"
    l = parent_a.shape[0]
    c, d = random.randint(0, l), random.randint(0, l)

    # Flip if c greater than d
    if (c > d): d, c = c, d
    if (c == d): d += 1
    temp = np.copy(parent_a)
    child_a = np.concatenate([parent_a[0:c], parent_b[c:d], parent_a[d:]])
    child_b = np.concatenate([parent_b[0:c], temp[c:d], parent_b[d:]])
    return child_a, child_b

## <font color='navy' size=6pt> Mutation

For extra variety across generations, we want to introduce a bit of chaos to the system to produce the Marie Curie of each generation (but also probably our _least_ capable individuals too). This helps find _new_ solutions outside our current population's capability. So for each individual, there's a chance that their offspring will mutate (determined by `mutation_rate`).

In [None]:
def mutate(child, mutation_rate, mutation_scale):
    "May mutate a child using Gaussian convolution"
    if mutation_rate >= random.uniform(0, 1):
        size = child.shape[0]
        mutation_value = np.random.normal(0, mutation_scale, size)
        child = child + mutation_value
    return child

Here's the entirety of what happens to the population between generations.
To recap: a bunch from the current population are selected at random to compete to reproduce. Two parents then produce two children using a mix of the two parents for both children. Finally, each child has a chance that they will mutate. One we've created a new population of the same size as the original population, we have completed one "generation".

## <font color='navy' size=6pt> Update the population

In [None]:
#collapse-show
def update_population(current_population, problem, should_mutate, mutation_rate, mutation_scale):
    """Performs one generational update of Genetic Algorithm"""
    pop_size = len(current_population)
    next_population = np.empty((pop_size, 2))
    selection_size=2
    for i in range(int(pop_size / 2)):
        parent_a = selection(current_population, selection_size, problem)
        parent_b = selection(current_population, selection_size, problem)
        child_a, child_b = crossover(parent_a, parent_b)
        next_population[i] = mutate(child_a, mutation_rate, mutation_scale) if should_mutate else child_a
        position_child_b = i + (pop_size / 2)
        next_population[int(position_child_b)] = mutate(child_b, mutation_rate, mutation_scale) if should_mutate else child_b
    return next_population

In [None]:
current_population = create_population(population_size, vector_length)

In [None]:
parent_a = selection(current_population, 2, problem)
parent_b = selection(current_population, 2, problem)

In [None]:
parent_a

array([0.00644091, 0.848283  ])

In [None]:
parent_b

array([0.08895848, 0.91374314])

In [None]:
crossover(parent_a, parent_b)

(array([0.00644091, 0.91374314]), array([0.08895848, 0.848283  ]))

##<font color='navy' size=6pt> Keeping track of the evolution

In [None]:
#collapse-show
class GeneticAlgorithm(object):
    def __init__(self, population_size, vector_length, problem):
        self.problem = problem
        self.current_population = create_population(population_size, vector_length)
        self.current_best = find_current_best(self.current_population, self.problem)

    def next_generation(self, mrate, mscale, should_mutate):
        self.current_population = update_population(self.current_population, self.problem, should_mutate, mrate, mscale)
        self.current_best = find_current_best(self.current_population, self.problem)

ga = GeneticAlgorithm(population_size, vector_length, problem)

## Simulation Study

In [None]:
problem(np.array([[.3,1.3],[2.3,0.5]]))

array([2.21  , 0.4625])

In [None]:
ga.next_generation(0.3,1,True)

In [None]:
ga.current_best

array([0.10045145, 0.37381357])

##<font color='navy' size=6pt> Real Data Application

You need to code your own functions.

In [None]:
data = pd.read_csv('drive/MyDrive/Data Sets/concrete.csv')

In [None]:
def validate(h):
  alpha = h[0]
  l1_ratio = h[1]
  scale = MinMaxScaler()
  for idxtrain,idxtest in kf.split(x):

In [None]:
def metric(hyperparameters):

    return (((y_true - np.array(y_pred))**2).mean(axis=0))**(1/2)

target_x, target_y = 20.0, 35.0
def problem(soln):
    global target_x #using globals so we can link this to the click event later
    global target_y
    return metric(soln, [target_x, target_y])

In [None]:
#@title
# define the function "problem"
# in this example we use ElasticNet and with a set of hyperparameters and output the prediction error
kf = KFold(n_splits=5,shuffle=True,random_state=123)
scale = StandardScaler()
def problem(soln):
  if len(soln.shape) ==1:
    soln = soln.reshape(1,-1)
  MSE = np.zeros(len(soln)) + 1e5
  for i in range(len(soln)):
    alpha = soln[i,0]
    l1_ratio = soln[i,1]
    model = ElasticNet(alpha=alpha,l1_ratio=l1_ratio,max_iter=10000)
    MSE_test = []
    for idxtrain, idxtest in kf.split(x):
      xtrain = scale.fit_transform(x[idxtrain])
      xtest = scale.transform(x[idxtest])
      ytrain = y[idxtrain]
      ytest = y[idxtest]
      model.fit(xtrain,ytrain)
      MSE_test.append(mse(ytest,model.predict(xtest)))
    MSE[i] = np.mean(MSE_test)
  return MSE

In [None]:
problem(np.array([2.3,0.5]))

array([39.72751579])