# Symbolic Regression multi-objective

#### 1. Libraries importation

In [1]:
import math
import random
import csv
import numpy
import operator
from deap import algorithms, base, creator , tools, gp

#### 2. Defining primitive set

In [2]:
# Define new functions
def protectedDiv(left, right):
    try:
        return left / right
    except ZeroDivisionError:
        return 1

#Second argument = number of variables in problem (this case, 'x')
pset = gp.PrimitiveSet("MAIN", 1)
#Second argument = arity
pset.addPrimitive(operator.add, 2)
pset.addPrimitive(operator.sub, 2)
pset.addPrimitive(operator.mul, 2)
pset.addPrimitive(protectedDiv, 2)
pset.addPrimitive(operator.neg, 1)
pset.addPrimitive(math.cos, 1)
pset.addPrimitive(math.sin, 1)
pset.addEphemeralConstant("rand101", lambda: random.randint(-1,1))
pset.renameArguments(ARG0='x')

#### 3. Parameters definition

In [3]:
creator.create("FitnessMin", base.Fitness, weights=(-1,-.01))
creator.create("Individual", gp.PrimitiveTree, fitness=creator.FitnessMin)

toolbox = base.Toolbox()
toolbox.register("expr", gp.genHalfAndHalf, pset=pset, min_=1, max_=2)
toolbox.register("individual", tools.initIterate, creator.Individual, toolbox.expr)
toolbox.register("population", tools.initRepeat, list, toolbox.individual)
toolbox.register("compile", gp.compile, pset=pset)

def evalSymbReg(individual, points):
    # Transform the tree expression in a callable function
    func = toolbox.compile(expr=individual)
    # Evaluate the mean squared error between the expression
    # and the real function : x**4 + x**3 + x**2 + x
    sqerrors = ((func(x) - x**4 - x**3 - x**2 - x )**2 for x in points)
    return math.fsum(sqerrors) / len(points)
def evaluate(individual, points): 
    return evalSymbReg(individual, points), individual.height 
    
toolbox.register("evaluate", evaluate, points=[x/10. for x in range(-10,10)])
toolbox.register("select", tools.selTournament, tournsize=3)
toolbox.register("mate", gp.cxOnePoint)
toolbox.register("expr_mut", gp.genFull, min_=0, max_=2)
toolbox.register("mutate", gp.mutUniform, expr=toolbox.expr_mut, pset=pset)

toolbox.decorate("mate", gp.staticLimit(key=operator.attrgetter("height"), max_value=17))
toolbox.decorate("mutate", gp.staticLimit(key=operator.attrgetter("height"), max_value=17))

#### 4. Algorithm initialization

In [4]:
def main():
    random.seed(318)

    pop = toolbox.population(n=300)
    hof = tools.HallOfFame(1)

    # Ver que el fitness no es el MSE, es la combinacion de MSE y height
    stats_fit_mse = tools.Statistics(lambda ind: ind.fitness.values[0])
    stats_fit_height = tools.Statistics(lambda ind: ind.fitness.values[1])
    stats_size = tools.Statistics(len)
    mstats = tools.MultiStatistics(mse=stats_fit_mse, height=stats_fit_height, size=stats_size)
    mstats.register("avg", numpy.mean)
    mstats.register("std", numpy.std)
    mstats.register("min", numpy.min)
    mstats.register("max", numpy.max)

    pop, log, hof = algorithms.eaSimpleOr(pop, toolbox, .8, .1, 40, stats=mstats,
                                   halloffame=hof, verbose=True)
    return pop, log, hof

if __name__ == "__main__":
    pop, log, hof = main()

Creating pandas dataframe


Unnamed: 0,mse_avg,mse_std,mse_min,mse_max,mse_gen,mse_nevals,height_avg,height_std,height_min,height_max,height_gen,height_nevals,size_avg,size_std,size_min,size_max,size_gen,size_nevals
0,1.464966,2.171681,0.328818,20.70058,1.0,271.0,1.49,0.62442,0.0,4.0,1.0,271.0,3.426667,1.554978,1.0,9.0,1.0,271.0
1,1.021861,0.732816,0.165572,6.859563,2.0,270.0,1.413333,0.665198,0.0,4.0,2.0,270.0,3.266667,1.508494,1.0,9.0,2.0,270.0
2,0.933856,0.65377,0.165572,6.709563,3.0,262.0,1.333333,0.674949,0.0,4.0,3.0,262.0,3.233333,1.584999,1.0,9.0,3.0,262.0
3,0.884201,0.594831,0.165572,3.352923,4.0,269.0,1.433333,0.773879,0.0,5.0,4.0,269.0,3.3,1.601041,1.0,9.0,4.0,269.0
4,0.839922,0.899475,0.165572,14.20104,5.0,261.0,1.443333,0.816571,0.0,5.0,5.0,261.0,3.346667,1.762523,1.0,13.0,5.0,261.0
5,0.782882,0.510445,0.165572,2.982903,6.0,270.0,1.45,0.942072,0.0,4.0,6.0,270.0,3.433333,1.893556,1.0,11.0,6.0,270.0
6,0.780256,1.160347,0.165572,18.97392,7.0,274.0,1.43,0.985782,0.0,4.0,7.0,274.0,3.546667,2.091528,1.0,11.0,7.0,274.0
7,0.690213,1.14769,0.165572,18.97392,8.0,270.0,1.456667,1.062131,0.0,4.0,8.0,270.0,3.73,2.128481,1.0,11.0,8.0,270.0
8,0.562558,0.925585,0.165572,15.59226,9.0,273.0,1.423333,1.069636,0.0,4.0,9.0,273.0,3.753333,2.208881,1.0,11.0,9.0,273.0
9,0.516643,0.995819,0.165572,16.296495,10.0,271.0,1.8,1.029563,0.0,6.0,10.0,271.0,4.626667,2.132437,1.0,12.0,10.0,271.0


### 5. Simplifying resultant equation

In [5]:
from sympy import sympify

locals = {
    'sub': lambda x, y : x - y,
    'protectedDiv': lambda x, y : x/y,
    'mul': lambda x, y : x*y,
    'add': lambda x, y : x + y,
    'neg': lambda x    : -x,
    'pow': lambda x, y : x**y
}

ind = hof.__getitem__(0).__str__()
print(f'original: {ind}')
expr = sympify(str(ind) , locals=locals)
print(f'simplified: {expr}')


original: add(x, mul(add(x, mul(x, mul(x, sub(x, -1)))), sin(x)))
simplified: x + (x**2*(x + 1) + x)*sin(x)
