In [2]:
import operator
import math
import random

import pandas as pd
import numpy
from deap import algorithms, base, creator, tools, gp

data = pd.read_excel("regression.xlsx")
x = data['x']
y = data['y']
X = []
for p in x:
    X.append(p)

pset = gp.PrimitiveSet("MAIN", 1)
pset.addPrimitive(operator.add, 2)
pset.addPrimitive(operator.sub, 2)
pset.addPrimitive(operator.mul, 2)
pset.addPrimitive(math.cos, 1)
pset.addPrimitive(math.sin, 1)
pset.addEphemeralConstant("rand101", lambda: random.randint(-1,1))
pset.renameArguments(ARG0='x')

In [19]:
data = pd.read_excel("regression.xlsx")
x = data['x']
y = data['y']

creator.create("FitnessMin", base.Fitness, weights=(-1.0,))
creator.create("Individual", gp.PrimitiveTree, fitness=creator.FitnessMin)

toolbox = base.Toolbox()
toolbox.register("expr", gp.genHalfAndHalf, pset=pset, min_=1, max_=10)
toolbox.register("individual", tools.initIterate, creator.Individual, toolbox.expr)
toolbox.register("population", tools.initRepeat, list, toolbox.individual)
toolbox.register("compile", gp.compile, pset=pset)

def evalSymbReg(individual):
    # Transform the tree expression in a callable function
    func = toolbox.compile(expr=individual)
    # Evaluate the mean squared error between the expression
    SSD = 0
    for i in range(len(x)):
        SSD += (y[i] - func(x[i]))**2 
    return SSD/len(X),

toolbox.register("evaluate", evalSymbReg) #X values
toolbox.register("select", tools.selTournament, tournsize=3)
toolbox.register("mate", gp.cxOnePoint)
toolbox.register("expr_mut", gp.genFull, min_=1, max_=6)
toolbox.register("mutate", gp.mutUniform, expr=toolbox.expr_mut, pset=pset)

toolbox.decorate("mate", gp.staticLimit(key=operator.attrgetter("height"), max_value=17))
toolbox.decorate("mutate", gp.staticLimit(key=operator.attrgetter("height"), max_value=17))

def main():
    random.seed(128)

    pop = toolbox.population(n=300)
    hof = tools.HallOfFame(1)

    stats_fit = tools.Statistics(lambda ind: ind.fitness.values)
    stats_size = tools.Statistics(len)
    mstats = tools.MultiStatistics(fitness=stats_fit, size=stats_size)
    mstats.register("avg", numpy.mean)
    mstats.register("std", numpy.std)
    mstats.register("min", numpy.min)
    mstats.register("max", numpy.max)

    pop, log = algorithms.eaSimple(pop, toolbox, 0.5, 0.1, 100, stats=mstats,
                                   halloffame=hof, verbose=True)
    # print log
    return pop, log, hof

if __name__ == "__main__":
    main()



   	      	                    fitness                    	                      size                     
   	      	-----------------------------------------------	-----------------------------------------------
gen	nevals	avg    	gen	max   	min    	nevals	std    	avg  	gen	max	min	nevals	std    
0  	300   	183.622	0  	4961.8	25.2466	300   	289.777	39.41	0  	457	2  	300   	63.5189
1  	169   	150.882	1  	1392.11	25.2466	169   	74.8596	50.04	1  	456	2  	169   	67.3398
2  	163   	135.935	2  	228.824	25.2466	163   	25.7886	65.95	2  	343	2  	163   	72.3879
3  	165   	118.484	3  	483.153	15.3436	165   	41.6159	76.2833	3  	343	2  	165   	76.1308
4  	164   	104.432	4  	2091.53	15.3436	164   	149.677	64.25  	4  	270	4  	164   	60.2405
5  	187   	85.6806	5  	2481.61	14.8316	187   	154.664	49.5033	5  	266	14 	187   	39.2014
6  	156   	77.5654	6  	2291.16	12.6332	156   	171.743	50.8333	6  	282	8  	156   	41.2119
7  	183   	78.4763	7  	2279.51	9.36144	183   	181.327	63.6067	7  	273	12 	183   	57.

88 	184   	147.362	88 	36469.3	0.0107541	184   	2112.19	315.55 	88 	394	173	184   	22.8592
89 	163   	13.9856	89 	1059.21	0.0107541	163   	74.3692	312.95 	89 	380	116	163   	27.2904
90 	165   	15.7246	90 	2097.88	0.0107541	165   	136.844	314.16 	90 	370	109	165   	25.7773
91 	162   	33.7855	91 	5508.21	0.0106209	162   	331.563	316.857	91 	402	113	162   	21.8905
92 	148   	19.6323	92 	2805.39	0.0106209	148   	193.885	316.487	92 	401	207	148   	20.3684
93 	158   	84.3481	93 	15742.8	0.0106209	158   	946.694	317.377	93 	387	176	158   	19.6347
94 	162   	50.8609	94 	10912.6	0.0106209	162   	639.846	316.797	94 	399	2  	162   	31.0646
95 	142   	45.4504	95 	4662.76	0.0106209	142   	372.097	319.79 	95 	398	3  	142   	26.5417
96 	155   	9.78808	96 	1217.46	0.00985749	155   	82.1695	320.8  	96 	391	179	155   	20.8194
97 	170   	122.134	97 	34003.4	0.00985749	170   	1960.16	319.95 	97 	391	177	170   	26.8467
98 	168   	39.3694	98 	6893.74	0.0099691 	168   	406.158	321.68 	98 	389	178	168   	25.1

In [None]:
128
122
222