https://deap.readthedocs.io/en/master/examples/gp_spambase.html
https://github.com/DEAP/deap/blob/4db155fb3c4fe1678f8d7cd03a638248a1a2f447/examples/gp/spambase.py

In [1]:
import random
import operator
import csv
import itertools

import numpy

from deap import algorithms
from deap import base
from deap import creator
from deap import tools
from deap import gp

In [2]:
with open("spambase.csv") as spambase:
    spamReader = csv.reader(spambase)
    spam = list(list(float(elem) for elem in row) for row in spamReader)
spam

[[0.0,
  0.64,
  0.64,
  0.0,
  0.32,
  0.0,
  0.0,
  0.0,
  0.0,
  0.0,
  0.0,
  0.64,
  0.0,
  0.0,
  0.0,
  0.32,
  0.0,
  1.29,
  1.93,
  0.0,
  0.96,
  0.0,
  0.0,
  0.0,
  0.0,
  0.0,
  0.0,
  0.0,
  0.0,
  0.0,
  0.0,
  0.0,
  0.0,
  0.0,
  0.0,
  0.0,
  0.0,
  0.0,
  0.0,
  0.0,
  0.0,
  0.0,
  0.0,
  0.0,
  0.0,
  0.0,
  0.0,
  0.0,
  0.0,
  0.0,
  0.0,
  0.778,
  0.0,
  0.0,
  3.756,
  61.0,
  278.0,
  1.0],
 [0.21,
  0.28,
  0.5,
  0.0,
  0.14,
  0.28,
  0.21,
  0.07,
  0.0,
  0.94,
  0.21,
  0.79,
  0.65,
  0.21,
  0.14,
  0.14,
  0.07,
  0.28,
  3.47,
  0.0,
  1.59,
  0.0,
  0.43,
  0.43,
  0.0,
  0.0,
  0.0,
  0.0,
  0.0,
  0.0,
  0.0,
  0.0,
  0.0,
  0.0,
  0.0,
  0.0,
  0.07,
  0.0,
  0.0,
  0.0,
  0.0,
  0.0,
  0.0,
  0.0,
  0.0,
  0.0,
  0.0,
  0.0,
  0.0,
  0.132,
  0.0,
  0.372,
  0.18,
  0.048,
  5.114,
  101.0,
  1028.0,
  1.0],
 [0.06,
  0.0,
  0.71,
  0.0,
  1.23,
  0.19,
  0.19,
  0.12,
  0.64,
  0.25,
  0.38,
  0.45,
  0.12,
  0.0,
  1.75,
  0.06,
  0.06,
  1.

In [3]:
# defined a new primitive set for strongly typed GP
pset = gp.PrimitiveSetTyped("MAIN", itertools.repeat(float, 57), bool, "IN")

# boolean operators
pset.addPrimitive(operator.and_, [bool, bool], bool)
pset.addPrimitive(operator.or_, [bool, bool], bool)
pset.addPrimitive(operator.not_, [bool], bool)


In [4]:
# floating point operators
# Define a protected division function
def protectedDiv(left, right):
    try: return left / right
    except ZeroDivisionError: return 1

pset.addPrimitive(operator.add, [float,float], float)
pset.addPrimitive(operator.sub, [float,float], float)
pset.addPrimitive(operator.mul, [float,float], float)
pset.addPrimitive(protectedDiv, [float,float], float)

# logic operators
# Define a new if-then-else function
def if_then_else(input, output1, output2):
    if input: return output1
    else: return output2

pset.addPrimitive(operator.lt, [float, float], bool)
pset.addPrimitive(operator.eq, [float, float], bool)
pset.addPrimitive(if_then_else, [bool, float, float], float)

# terminals
pset.addEphemeralConstant("rand100", lambda: random.random() * 100, float)
pset.addTerminal(False, bool)
pset.addTerminal(True, bool)

In [5]:
creator.create("FitnessMax", base.Fitness, weights=(1.0,))
creator.create("Individual", gp.PrimitiveTree, fitness=creator.FitnessMax)

toolbox = base.Toolbox()
toolbox.register("expr", gp.genHalfAndHalf, pset=pset, min_=1, max_=2)
toolbox.register("individual", tools.initIterate, creator.Individual, toolbox.expr)
toolbox.register("population", tools.initRepeat, list, toolbox.individual)
toolbox.register("compile", gp.compile, pset=pset)

def evalSpambase(individual):
    # Transform the tree expression in a callable function
    func = toolbox.compile(expr=individual)
    # Randomly sample 400 mails in the spam database
    spam_samp = random.sample(spam, 400)
    # Evaluate the sum of correctly identified mail as spam
    result = sum(bool(func(*mail[:57])) is bool(mail[57]) for mail in spam_samp)
    return result,
    
toolbox.register("evaluate", evalSpambase)
toolbox.register("select", tools.selTournament, tournsize=3)
toolbox.register("mate", gp.cxOnePoint)
toolbox.register("expr_mut", gp.genFull, min_=0, max_=2)
toolbox.register("mutate", gp.mutUniform, expr=toolbox.expr_mut, pset=pset)

In [6]:
def main():
    random.seed(10)
    pop = toolbox.population(n=100)
    hof = tools.HallOfFame(1)
    stats = tools.Statistics(lambda ind: ind.fitness.values)
    stats.register("avg", numpy.mean)
    stats.register("std", numpy.std)
    stats.register("min", numpy.min)
    stats.register("max", numpy.max)
    
    algorithms.eaSimple(pop, toolbox, 0.5, 0.2, 40, stats, halloffame=hof)

    return pop, stats, hof

In [7]:
main()

gen	nevals	avg  	std    	min	max
0  	100   	204.9	48.1299	109	318
1  	49    	235.1	42.0301	82 	318
2  	69    	254.54	34.3582	151	336
3  	57    	257.46	45.3196	112	336
4  	70    	267.03	42.8344	88 	324
5  	64    	272.46	39.2162	160	333
6  	64    	270.68	45.7762	137	333
7  	55    	286.88	34.5555	170	324
8  	55    	292.21	41.3902	85 	331
9  	63    	291.03	43.0494	131	331
10 	60    	294.15	39.9323	152	335
11 	64    	293.27	47.4438	131	335
12 	68    	296.29	39.0708	150	335
13 	71    	301.81	33.582 	166	335
14 	61    	298.59	41.8622	140	341
15 	54    	295.96	45.6585	147	341
16 	68    	295.23	47.5621	113	341
17 	55    	305.31	35.4423	166	341
18 	62    	306.26	38.3277	145	343
19 	62    	300.1 	43.5409	152	341
20 	60    	294.71	49.148 	152	341
21 	55    	296.76	46.6438	143	341
22 	66    	296.44	43.856 	152	341
23 	52    	297.76	46.6288	159	341
24 	59    	299.69	44.8251	138	341
25 	59    	296.48	47.9486	143	341
26 	72    	293.06	47.1877	151	341
27 	60    	289.86	52.412 	101	341
28 	65    	298.97

([[<deap.gp.Primitive at 0x7df2f48>,
   <deap.gp.Primitive at 0x7e017c8>,
   <deap.gp.Primitive at 0x7df20e8>,
   <deap.gp.Terminal at 0x7e05900>,
   <deap.gp.Terminal at 0x7e05990>,
   <deap.gp.Primitive at 0x7e017c8>,
   <deap.gp.Terminal at 0x7e05900>,
   <deap.gp.Primitive at 0x7e017c8>,
   <deap.gp.Terminal at 0x7e05900>,
   <deap.gp.Terminal at 0x7dfbfc0>,
   <deap.gp.Primitive at 0x7df2e08>,
   <deap.gp.Terminal at 0x7e05090>,
   <deap.gp.Primitive at 0x7e017c8>,
   <deap.gp.Primitive at 0x7df20e8>,
   <deap.gp.Terminal at 0x7e05900>,
   <deap.gp.Terminal at 0x7e05900>,
   <deap.gp.Primitive at 0x7e017c8>,
   <deap.gp.Terminal at 0x7e05900>,
   <deap.gp.Terminal at 0x7e05708>,
   <deap.gp.Primitive at 0x7df2db8>,
   <deap.gp.Primitive at 0x7df2db8>,
   <deap.gp.Terminal at 0x7e051f8>,
   <deap.gp.Terminal at 0x7dfba20>,
   <deap.gp.Primitive at 0x7df2e08>,
   <deap.gp.Terminal at 0x7dfbea0>,
   <deap.gp.Terminal at 0x7dfbca8>,
   <deap.gp.Terminal at 0x7e051f8>,
   <deap.gp.Term