In [1]:
from deap import gp
import numpy as np
import os
import sys
sys.path.append(os.path.abspath('../..'))  # or the full path to the "project" directory. This hack should be really fixed
from gpbr.gp.funcs import pow2, sqrtabs, expplusone


pset = gp.PrimitiveSet("main", 1)
pset.addPrimitive(np.add, 2)
# pset.addPrimitive(np.subtract, 2)
pset.addPrimitive(np.multiply, 2)
pset.addPrimitive(np.cos, 1)
pset.addPrimitive(np.sin, 1)
pset.addPrimitive(sqrtabs, 1)
# pset.addPrimitive(pow2, 1)
pset.addPrimitive(expplusone, 1)
pset.addEphemeralConstant('rand', (np.random.rand, 1)[0])
# pset.addTerminal(np.pi, 'pi')

pset.renameArguments(ARG0="s")

In [2]:
from deap import creator, base, tools, algorithms
creator.create("FitnessMin", base.Fitness, weights=(-1.0,))
creator.create("Individual", gp.PrimitiveTree, fitness=creator.FitnessMin)

In [3]:
toolbox = base.Toolbox()
toolbox.register('expr', gp.genHalfAndHalf, pset=pset, min_=2, max_=12)
toolbox.register('individual', tools.initIterate, creator.Individual, toolbox.expr)
toolbox.register('population', tools.initRepeat, list, toolbox.individual)
toolbox.register('compile', gp.compile, pset=pset)

In [4]:
N = 100
test_set = np.linspace(0, 2*np.pi, N)
test_set

array([0.        , 0.06346652, 0.12693304, 0.19039955, 0.25386607,
       0.31733259, 0.38079911, 0.44426563, 0.50773215, 0.57119866,
       0.63466518, 0.6981317 , 0.76159822, 0.82506474, 0.88853126,
       0.95199777, 1.01546429, 1.07893081, 1.14239733, 1.20586385,
       1.26933037, 1.33279688, 1.3962634 , 1.45972992, 1.52319644,
       1.58666296, 1.65012947, 1.71359599, 1.77706251, 1.84052903,
       1.90399555, 1.96746207, 2.03092858, 2.0943951 , 2.15786162,
       2.22132814, 2.28479466, 2.34826118, 2.41172769, 2.47519421,
       2.53866073, 2.60212725, 2.66559377, 2.72906028, 2.7925268 ,
       2.85599332, 2.91945984, 2.98292636, 3.04639288, 3.10985939,
       3.17332591, 3.23679243, 3.30025895, 3.36372547, 3.42719199,
       3.4906585 , 3.55412502, 3.61759154, 3.68105806, 3.74452458,
       3.8079911 , 3.87145761, 3.93492413, 3.99839065, 4.06185717,
       4.12532369, 4.1887902 , 4.25225672, 4.31572324, 4.37918976,
       4.44265628, 4.5061228 , 4.56958931, 4.63305583, 4.69652

In [5]:
pop = toolbox.population(n=1000)

In [6]:
from lshashpy3 import LSHash
from lshashpy3.storage import RedisStorage
# # redis_config = {"host": 'localhost', "port":6379, "db": 0}
# redis_config = {"redis": {"host": "localhost", "port": 6379}}
# # create 6-bit hashes for input data of 8 dimensions:
# lsh = LSHash(32, N, num_hashtables=8, storage_config=redis_config)
lsh = LSHash(32, N, num_hashtables=8)

  import pkg_resources


In [7]:
for ind in pop:
    ind_compiled = toolbox.compile(expr=ind)
    values = ind_compiled(test_set)
    if isinstance(values, (np.float64)):
        values = np.ones(N) * values
    if np.isfinite(values).all():
        lsh.index(values, extra_data=str(ind))


  return np.exp(np.ones_like(x, dtype=np.float64) + x)


In [16]:
ind_new = toolbox.individual()
ind_new_compiled = toolbox.compile(expr=ind_new)
values_new = ind_new_compiled(test_set)

In [17]:
str(ind_new)

'add(expplusone(add(s, 0.5807639173933024)), expplusone(cos(0.3975398184092156)))'

In [18]:
if isinstance(values_new, (np.float64)):
    values_new = np.ones(N) * values_new
values_new

array([  11.69339108,   12.01174939,   12.35096775,   12.71241298,
         13.09754148,   13.50790507,   13.94515725,   14.41105986,
         14.90749017,   15.4364485 ,   16.00006619,   16.60061425,
         17.24051251,   17.92233935,   18.64884208,   19.42294803,
         20.24777636,   21.12665057,   22.06311198,   23.0609339 ,
         24.12413692,   25.25700505,   26.46410302,   27.75029464,
         29.12076244,   30.58102851,   32.13697678,   33.7948767 ,
         35.56140854,   37.44369028,   39.44930629,   41.5863379 ,
         43.86339596,   46.28965554,   48.8748929 ,   51.62952485,
         54.56465078,   57.69209734,   61.02446612,   64.5751844 ,
         68.35855926,   72.38983526,   76.68525582,   81.2621287 ,
         86.13889572,   91.3352071 ,   96.8720006 ,  102.77158589,
        109.0577345 ,  115.75577551,  122.89269768,  130.49725818,
        138.60009845,  147.23386769,  156.43335438,  166.23562648,
        176.6801808 ,  187.8091021 ,  199.66723273,  212.30235

In [19]:
results = lsh.query(values_new, num_results=5, distance_func="true_euclidean")
for res in results:
    print("Distance:", res[1], "Data:", res[0][1])

Distance: 406.62957682172924 Data: expplusone(add(s, 0.531203465630083))
Distance: 2003.250542279497 Data: expplusone(multiply(add(s, 0.8639575847111708), cos(0.11420531764886177)))
Distance: 2660.867444688165 Data: expplusone(add(s, 0.15314043833759483))
Distance: 3080.464675622483 Data: add(multiply(s, sqrtabs(cos(s))), add(add(expplusone(s), sqrtabs(expplusone(add(sqrtabs(0.9526555652464828), s)))), multiply(0.5617752127194987, s)))
Distance: 3327.3682502866254 Data: add(sin(sin(sin(sqrtabs(multiply(add(sqrtabs(s), expplusone(sin(sin(expplusone(0.27035782330848424))))), expplusone(multiply(add(multiply(s, multiply(0.48691162981558844, 0.7097701960562691)), sqrtabs(0.8676269866953497)), sqrtabs(sin(cos(s)))))))))), add(expplusone(s), s))


# Trivial expressions simplification

In [20]:
def simplify_constant_subtrees(ind, s_vals, tol=1e-8):
    """
    Traverse tree bottom-up, replace constant subtrees with a single constant node.
    Modifies `ind` in-place and returns it.
    """

    new_subtree = []
    i=0
    while i < len(ind):
        slice_ = ind.searchSubtree(i)
        subtree_complied = toolbox.compile(expr=gp.PrimitiveTree(ind[slice_]))
        subtree_values = subtree_complied(s_vals)

        if np.std(subtree_values) < tol:
            new_subtree.append(gp.Terminal(np.mean(subtree_values), False, np.float32))
            i = slice_.stop
        else:
            new_subtree.append(ind[i])
            i+=1
    return gp.PrimitiveTree(new_subtree)

In [21]:
ind = toolbox.individual()
str(ind)

'add(sin(sin(cos(sqrtabs(multiply(cos(s), add(0.20576337050993088, s)))))), add(add(multiply(add(cos(add(s, 0.3290118945514454)), sin(add(s, s))), expplusone(sqrtabs(cos(s)))), add(sin(add(add(s, s), sqrtabs(s))), cos(cos(cos(0.06869235478450741))))), sin(expplusone(multiply(sqrtabs(add(0.9328245965131038, s)), sin(expplusone(0.7005761195143507)))))))'

In [22]:
ind_simplified = simplify_constant_subtrees(ind, test_set)
str(ind_simplified)

'add(sin(sin(cos(sqrtabs(multiply(cos(s), add(0.20576337050993088, s)))))), add(add(multiply(add(cos(add(s, 0.3290118945514454)), sin(add(s, s))), expplusone(sqrtabs(cos(s)))), add(sin(add(add(s, s), sqrtabs(s))), 0.8565314773166892)), sin(expplusone(multiply(sqrtabs(add(0.9328245965131038, s)), -0.7215811080879834)))))'

In [25]:
pset.ret

object

In [None]:

def mutInsert(individual, pset):
    """Inserts a new branch at a random position in *individual*. The subtree
    at the chosen position is used as child node of the created subtree, in
    that way, it is really an insertion rather than a replacement. Note that
    the original subtree will become one of the children of the new primitive
    inserted, but not perforce the first (its position is randomly selected if
    the new primitive has more than one child).

    :param individual: The normal or typed tree to be mutated.
    :returns: A tuple of one tree.
    """
    index = random.randrange(len(individual))
    node = individual[index]
    slice_ = individual.searchSubtree(index)
    choice = random.choice

    # As we want to keep the current node as children of the new one,
    # it must accept the return value of the current node
    primitives = [p for p in pset.primitives[node.ret] if node.ret in p.args]

    if len(primitives) == 0:
        return individual,

    new_node = choice(primitives)
    new_subtree = [None] * len(new_node.args)
    position = choice([i for i, a in enumerate(new_node.args) if a == node.ret])

    for i, arg_type in enumerate(new_node.args):
        if i != position:
            term = choice(pset.terminals[arg_type])
            if isclass(term):
                term = term()
            new_subtree[i] = term

    new_subtree[position:position + 1] = individual[slice_]
    new_subtree.insert(0, new_node)
    individual[slice_] = new_subtree
    return individual,

In [None]:
        # string = ""
        # stack = []
        # for node in self:
        #     stack.append((node, []))
        #     while len(stack[-1][1]) == stack[-1][0].arity:
        #         prim, args = stack.pop()
        #         string = prim.format(*args)
        #         if len(stack) == 0:
        #             break  # If stack is empty, all nodes should have been seen
        #         stack[-1][1].append(string)

        # return string

    # for i in range(len(ind)):
    #     slice_ = ind.searchSubtree(i)
       


    # stack = [(ind.searchSubtree(0), 0)]
    # while stack:
    #     slice_, idx = stack.pop()
    #     subtree_complied = toolbox.compile(expr=gp.PrimitiveTree(ind[slice_]))
    #     subtree_values = subtree_complied(s_vals)
    #     if np.std(subtree_values) < tol:
    #         const_val = np.mean(subtree_values)
    #         ind[slice_] = gp.Terminal(const_val, False, np.float32)
    #         continue

    #     curr_idx = idx
    #     for i in ind.arity:
    #         slice_ = ind.searchSubtree(curr_idx + 1)
    #         stack.append((ind[slice_], curr_idx + 1))
    #         curr_idx = slice_.stop

        
    
    


    # Bottom-up: process children first
    i = len(ind) - 1
    while i >= 0:
        # Extract subtree rooted at index i
        slice_ = ind.searchSubtree(i)
        subtree = ind[slice_]

        # Skip single-node terminals (already minimal)
        if len(subtree) == 1 and subtree[0].arity == 0:
            i -= 1
            continue

        # Evaluate subtree
        subtree_complied = toolbox.compile(expr=gp.PrimitiveTree(subtree))
        subtree_values = subtree_complied(s_vals)
        if isinstance(subtree_values, (np.float64)):
            subtree_values = np.ones(len(s_vals)) * subtree_values
        is_const, const_val = is_constant(subtree_values, tol)

        if is_const:
            # Create a constant terminal
            const_node = gp.Terminal(const_val, False, float)
            # Replace entire subtree
            ind[slice_] = gp.PrimitiveTree([const_node])

            # Jump to before the replaced region
            i = slice_.start
        else:
            # Move to previous node
            i -= 1

    return ind

In [32]:
def is_constant(pred, tol=1e-8):
    """Check if prediction vector is constant (within tolerance)."""
    if pred is None or len(pred) == 0:
        return False, None
    if np.isnan(pred).any() or np.isinf(pred).any():
        return False, None
    var = np.var(pred)
    if var < tol:
        return True, float(np.mean(pred))
    return False, None

In [33]:
def simplify_constants(ind, s_vals, tol=1e-8):
    """
    Traverse tree bottom-up, replace constant subtrees with a single constant node.
    Modifies `ind` in-place and returns it.
    """
    if not isinstance(ind, gp.PrimitiveTree):
        return ind

    # Bottom-up: process children first
    i = len(ind) - 1
    while i >= 0:
        # Extract subtree rooted at index i
        slice_ = ind.searchSubtree(i)
        subtree = ind[slice_]

        # Skip single-node terminals (already minimal)
        if len(subtree) == 1 and subtree[0].arity == 0:
            i -= 1
            continue

        # Evaluate subtree
        subtree_complied = toolbox.compile(expr=gp.PrimitiveTree(subtree))
        subtree_values = subtree_complied(s_vals)
        if isinstance(subtree_values, (np.float64)):
            subtree_values = np.ones(len(s_vals)) * subtree_values
        is_const, const_val = is_constant(subtree_values, tol)

        if is_const:
            # Create a constant terminal
            const_node = gp.Terminal(const_val, False, float)
            # Replace entire subtree
            ind[slice_] = gp.PrimitiveTree([const_node])

            # Jump to before the replaced region
            i = slice_.start
        else:
            # Move to previous node
            i -= 1

    return ind

In [50]:
ind = toolbox.individual()
str(ind)

'multiply(expplusone(sin(sqrtabs(multiply(sqrtabs(0.7913988959132174), add(0.8853519067253816, 0.9769080183829577))))), cos(expplusone(sqrtabs(expplusone(expplusone(0.14336847005244946))))))'

In [51]:
simplify_constants(ind, test_set, tol=1e-8)

[<deap.gp.Terminal at 0x1f4e4ec2b40>]

In [52]:
str(ind)

'-5.340517239637064'

In [3]:
from lshashpy3 import LSHash

In [None]:
# create 6-bit hashes for input data of 8 dimensions:
lsh = LSHash(32, 8)

# index vector
lsh.index([2,3,4,5,6,7,8,9])

# index vector and extra data
lsh.index([10,12,99,1,5,31,2,3], extra_data="vec1")
lsh.index([10,12,99,1,5,31,2,3.1], extra_data="vec1.1")
lsh.index([10,11,94,1,4,31,2,3], extra_data="vec2")

# query a data point
top_n = 1
nn = lsh.query([10,12,93,1,5,36,7,7], num_results=top_n, distance_func="euclidean")
print(nn)

# unpack vector, extra data and vectorial distance
top_n = 3
nn = lsh.query([10,12,99,1,5,30,1,1], num_results=top_n, distance_func="euclidean")
for ((vec,extra_data),distance) in nn:
    print(vec, extra_data, distance)

[(((10, 11, 94, 1, 4, 31, 2, 3), 'vec2'), 69)]
(10, 12, 99, 1, 5, 31, 2, 3) vec1 6
(10, 12, 99, 1, 5, 31, 2, 3.1) vec1.1 6.41
(10, 11, 94, 1, 4, 31, 2, 3) vec2 33


In [19]:
# create 32-bit hashes for input data of 8 dimensions:
lsh = LSHash(32, 8)

In [20]:
# index vector and extra data
lsh.index([10,12,99,1,5,31,2,3], extra_data="vec1")
lsh.index([10,12,99,1,5,31,2,3.1], extra_data="vec2")
lsh.index([10,11,94,1,4,31,2,3], extra_data="vec3")


['10111101101000011110111000101100']

In [37]:
lsh.query([10,11,94,1,4,31,2,3.1], num_results=10, distance_func="euclidean")

[(((10, 11, 94, 1, 4, 31, 2, 3), 'vec3'), 0.010000000000000018),
 (((10, 12, 99, 1, 5, 31, 2, 3.1), 'vec2'), 27.0),
 (((10, 12, 99, 1, 5, 31, 2, 3), 'vec1'), 27.01)]