#Goal - function regression
Evolve a representation of a function from data

Function: $$f(x)=x^4 + x^3 + x^2 + x$$

In [1]:
%matplotlib inline

import matplotlib.pyplot as plt
import random
import numpy as np

from deap import base
from deap import creator
from deap import gp

from deap import tools
from deap import algorithms

from deap import gp
import operator
import math



In [2]:
def safeDiv(left, right):
    try:
        return left / right
    except ZeroDivisionError:
        return 0

pset = gp.PrimitiveSet("MAIN", 1)
pset.addPrimitive(operator.add, 2)
pset.addPrimitive(operator.sub, 2)
pset.addPrimitive(operator.mul, 2)
pset.addPrimitive(safeDiv, 2)
pset.addPrimitive(operator.neg, 1)
pset.addPrimitive(math.cos, 1)
pset.addPrimitive(math.sin, 1)
pset.addEphemeralConstant("rand101", lambda: random.randint(-1,1))

In [3]:
pset.renameArguments(ARG0='x')


In [4]:
creator.create("FitnessMin", base.Fitness, weights=(-1.0,))
creator.create("Individual", gp.PrimitiveTree, fitness=creator.FitnessMin)

In [5]:
toolbox = base.Toolbox()
toolbox.register("expr", gp.genHalfAndHalf, pset=pset, min_=1, max_=2)
toolbox.register("individual", tools.initIterate, creator.Individual, toolbox.expr)
toolbox.register("population", tools.initRepeat, list, toolbox.individual)
toolbox.register("compile", gp.compile, pset=pset)

def evalSymbReg(individual, points):
    # Transform the tree expression in a callable function
    func = toolbox.compile(expr=individual)
    # Evaluate the mean squared error between the expression
    # and the real function : x**4 + x**3 + x**2 + x
    sqerrors = ((func(x) - x**4 - x**3 - x**2 - x)**2 for x in points)
    return math.fsum(sqerrors) / len(points),

toolbox.register("evaluate", evalSymbReg, points=[x/10. for x in range(-10,10)])
toolbox.register("select", tools.selTournament, tournsize=3)
toolbox.register("mate", gp.cxOnePoint)
toolbox.register("expr_mut", gp.genFull, min_=0, max_=2)
toolbox.register("mutate", gp.mutUniform, expr=toolbox.expr_mut, pset=pset)

In [6]:
stats_fit = tools.Statistics(lambda ind: ind.fitness.values)
stats_size = tools.Statistics(len)
mstats = tools.MultiStatistics(fitness=stats_fit, size=stats_size)
mstats.register("avg", np.mean)
mstats.register("std", np.std)
mstats.register("min", np.min)
mstats.register("max", np.max)

In [7]:
pop = toolbox.population(n=300)
hof = tools.HallOfFame(1)
pop, log = algorithms.eaSimple(pop, toolbox, 0.5, 0.1, 40, stats=mstats,
                               halloffame=hof, verbose=True)

   	      	                fitness                	             size             
   	      	---------------------------------------	------------------------------
gen	nevals	avg    	max    	min     	std    	avg    	max	min	std   
0  	300   	1.85978	18.9239	0.450825	2.11032	3.70333	7  	2  	1.6027
1  	172   	1.14284	14.1839	0.356827	1.31213	3.57667	10 	1  	1.54622
2  	183   	0.987048	14.1839	0.183711	1.24932	3.73333	12 	1  	1.68391
3  	186   	0.883343	14.1573	0.165572	1.21826	3.96   	10 	1  	1.82713
4  	172   	0.774399	15.5423	0.165572	0.955211	4.26   	12 	1  	2.05566
5  	168   	0.705869	14.2181	0.165572	0.886744	4.16667	11 	1  	1.91282
6  	187   	0.729145	13.6089	0.165572	1.05674 	4.66667	10 	1  	1.84632
7  	175   	0.676455	7.25482	0.13008 	0.698917	5.13333	12 	1  	1.76509
8  	151   	0.560943	4.24016	0.13008 	0.568443	5.64333	12 	1  	1.92599
9  	149   	0.442069	2.37244	0.104588	0.385957	6.12   	14 	1  	2.00971
10 	177   	0.413042	3.35292	0.104588	0.429922	6.32333	14 	1  	2.20578
11 	16

In [8]:
tree = hof[0]
print tree

add(x, mul(x, add(mul(x, add(x, mul(x, x))), x)))
