In [1]:
from deap import base, creator, gp, tools

In [2]:
re_range = gp.Primitive("re_range", (str, str), str)
re_range.format("a", "b")

're_range(a, b)'

In [3]:
re_star = gp.Primitive("re_star", (gp.Primitive, ), gp.Primitive)
re_star.format(re_range.format("a", "b"))

're_star(re_range(a, b))'

In [32]:
pset = gp.PrimitiveSetTyped("MAIN", 1)
pset.addPrimitive(re_range, 2, "re_range")
pset.addPrimitive(re_star, 1, "re_star")

TypeError: __init__() takes at least 4 arguments (3 given)

In [30]:
pset.renameArguments(ARG0='x')

creator.create("Individual", gp.PrimitiveTree)

toolbox = base.Toolbox()
toolbox.register("expr", gp.genHalfAndHalf, pset=pset, min_=3, max_=5)
toolbox.register("individual", tools.initIterate, creator.Individual, toolbox.expr)

expr = toolbox.individual()
nodes, edges, labels = gp.graph(expr)

In [31]:
import pygraphviz as pgv

g = pgv.AGraph()
g.add_nodes_from(nodes)
g.add_edges_from(edges)
g.layout(prog="dot")

for i in nodes:
    n = g.get_node(i)
    n.attr["label"] = labels[i]

g.draw("tree.pdf")

## Strongly typed GP

In [418]:
class RegexRange(object):
    def __init__(self):
        self.values = []
        
    @staticmethod
    def create():
        return RegexRange()
    
    @staticmethod
    def add_value(rr, value):
        rr.values.append(value)
        return rr
        
    def get_value(self):
        return "[{0}]*".format("".join(self.values))
        

In [408]:
pset = gp.PrimitiveSetTyped("main", [], RegexRange)

pset.addPrimitive(lambda :"A-Z", [], str, "Letters")
pset.addPrimitive(lambda :"0-9", [], str, "Numbers")
pset.addPrimitive(lambda :",.!?", [], str, "Punctations")

pset.addPrimitive(RegexRange.create, [], RegexRange, "Range")

pset.addPrimitive(RegexRange.add_value, [RegexRange, str], RegexRange, "Range_AddValue")


In [462]:

creator.create("FitnessMax", base.Fitness, weights=(1.0, -1.0))
creator.create("Individual", gp.PrimitiveTree, fitness=creator.FitnessMax)

toolbox = base.Toolbox()
toolbox.register("expr", gp.genHalfAndHalf, pset=pset, min_=10, max_=15)
toolbox.register("individual", tools.initIterate, creator.Individual, toolbox.expr)

expr = toolbox.individual()
nodes, edges, labels = gp.graph(expr)
print edges

[]


In [463]:
import pygraphviz as pgv

g = pgv.AGraph()
g.add_nodes_from(nodes)
g.add_edges_from(edges)
g.layout(prog="dot")

for i in nodes:
    n = g.get_node(i)
    n.attr["label"] = labels[i]

g.draw("tree.pdf")

In [464]:
tree = gp.PrimitiveTree(expr)
r=gp.compile(tree, pset)

In [465]:
r.get_value()

'[]*'

In [466]:
import re
items = ["ALA", "Ala01", "ALA?", "1?", "ALA1!"]
def evaluate(individual):
    regex = individual
    tree = gp.PrimitiveTree(individual)
    regex = "^"+gp.compile(tree, pset).get_value()+"$"
    try:
        value = sum([re.search(regex, item) != None for item in items])
        return value, len(regex)
    except:
        return 0, len(regex)

In [470]:
ind1 = toolbox.individual()
print evaluate(ind1)

(2, 12)


In [None]:
children = toolbox.individual()
selected = tools.selBest([child1, child2], 2)