# Symbolic Regression mono-objective

#### 1. Libraries importation

In [1]:
import math
import random
import csv
import numpy
import operator
from deap import algorithms, base, creator , tools, gp

#### 2. Defining primitive set

In [2]:
# Define new functions
def protectedDiv(left, right):
    try:
        return left / right
    except ZeroDivisionError:
        return 1

#Second argument = number of variables in problem (this case, 'x')
pset = gp.PrimitiveSet("MAIN", 1)
#Second argument = arity
pset.addPrimitive(operator.add, 2)
pset.addPrimitive(operator.sub, 2)
pset.addPrimitive(operator.mul, 2)
pset.addPrimitive(protectedDiv, 2)
pset.addPrimitive(operator.neg, 1)
pset.addPrimitive(math.cos, 1)
pset.addPrimitive(math.sin, 1)
pset.addEphemeralConstant("rand101", lambda: random.randint(-1,1))
pset.renameArguments(ARG0='x')

#### 3. Parameters definition

In [3]:
#Defining fitness class
creator.create("FitnessMin", base.Fitness, weights=(-1,))
#Defining individuals shape and associatinf fitness attribute
creator.create("Individual", gp.PrimitiveTree, fitness=creator.FitnessMin)

#Creating toolbox to register: population creation, evaluation function, selection mecanism
#and genetic operators
toolbox = base.Toolbox()
toolbox.register("expr", gp.genHalfAndHalf, pset=pset, min_=1, max_=2)
toolbox.register("individual", tools.initIterate, creator.Individual, toolbox.expr)
toolbox.register("population", tools.initRepeat, list, toolbox.individual)
toolbox.register("compile", gp.compile, pset=pset)

def evalSymbReg(individual, points):
    # Transform the tree expression in a callable function
    func = toolbox.compile(expr=individual)
    # Evaluate the mean squared error between the expression
    # and the real function : x**4 + x**3 + x**2 + x
    sqerrors = ((func(x) - x**4 - x**3 - x**2 - x )**2 for x in points)
    return math.fsum(sqerrors) / len(points),

toolbox.register("evaluate", evalSymbReg, points=[x/10. for x in range(-10,10)])
toolbox.register("select", tools.selTournament, tournsize=3)
toolbox.register("mate", gp.cxOnePoint)
toolbox.register("expr_mut", gp.genFull, min_=0, max_=2)
toolbox.register("mutate", gp.mutUniform, expr=toolbox.expr_mut, pset=pset)

toolbox.decorate("mate", gp.staticLimit(key=operator.attrgetter("height"), max_value=17))
toolbox.decorate("mutate", gp.staticLimit(key=operator.attrgetter("height"), max_value=17))

#### 4. Algorithm initialization

In [4]:
def main():
    random.seed(318)

    pop = toolbox.population(n=300)
    hof = tools.HallOfFame(1)

    stats_fit_mse = tools.Statistics(lambda ind: ind.fitness.values)
    stats_size = tools.Statistics(len)
    mstats = tools.MultiStatistics(mse=stats_fit_mse, size=stats_size)
    mstats.register("avg", numpy.mean)
    mstats.register("std", numpy.std)
    mstats.register("min", numpy.min)
    mstats.register("max", numpy.max)

    pop, log, hof = algorithms.eaSimpleOr(pop, toolbox, .8, .1, 40, stats=mstats,
                                   halloffame=hof, verbose=True)
    return pop, log, hof

if __name__ == "__main__":
    pop, log, hof = main()

Creating pandas dataframe


Unnamed: 0,mse_avg,mse_std,mse_min,mse_max,mse_gen,mse_nevals,size_avg,size_std,size_min,size_max,size_gen,size_nevals
0,1.34919,1.474642,0.3288178,20.70058,1.0,268.0,3.49,1.54593,1.0,8.0,1.0,268.0
1,1.09871,1.293501,0.1655718,18.97392,2.0,273.0,3.696667,1.75442,1.0,10.0,2.0,273.0
2,0.924523,0.556332,0.2008685,3.352923,3.0,274.0,3.54,1.679802,1.0,10.0,3.0,274.0
3,0.820574,1.017974,0.1655718,16.53226,4.0,272.0,3.39,1.795708,1.0,12.0,4.0,272.0
4,0.77146,1.002144,0.1655718,14.23392,5.0,269.0,3.2,1.90263,1.0,11.0,5.0,269.0
5,0.637212,0.353597,0.1655718,2.552,6.0,277.0,3.566667,1.976248,1.0,10.0,6.0,277.0
6,0.598153,0.389153,0.1435014,2.982903,7.0,270.0,4.21,2.103782,1.0,11.0,7.0,270.0
7,0.625496,1.320259,0.09835429,18.97392,8.0,268.0,4.846667,2.160977,1.0,12.0,8.0,268.0
8,0.426568,0.35026,0.09835429,2.541243,9.0,262.0,5.346667,2.267118,1.0,13.0,9.0,262.0
9,0.421652,0.376265,0.1015608,2.982903,10.0,266.0,5.786667,2.067484,1.0,14.0,10.0,266.0


### 5. Simplifying resultant equation

In [5]:
from sympy import sympify

locals = {
    'sub': lambda x, y : x - y,
    'protectedDiv': lambda x, y : x/y,
    'mul': lambda x, y : x*y,
    'add': lambda x, y : x + y,
    'neg': lambda x    : -x,
    'pow': lambda x, y : x**y
}

ind = hof.__getitem__(0).__str__()
print(f'original: {ind}')
expr = sympify(str(ind) , locals=locals)
print(f'simplified: {expr}')


original: add(mul(add(mul(x, add(x, mul(x, add(add(-1, x), cos(0))))), x), x), x)
simplified: x*(x*(x**2 + x) + x) + x
