In [1]:
import numpy as np
import random
import torch
import copy
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")

In [2]:
class PrimitiveSet:
    def __init__(self):
        self.functions = []
        self.terminals = []
    
    def add_function(self, function, arity):
        self.functions.append(Function(function, arity))
    
    def add_terminal(self, variable):
        self.terminals.append(Terminal(variable))
    
    def choose_function(self):
        return np.random.choice(self.functions)
    
    def choose_terminal(self):
        return np.random.choice(self.terminals)
#        if np.random.random() < 0.5 or not self.terminals:
#            return Terminal(np.random.random())
#        else:
#            return np.random.choice(self.terminals)

class Function:
    def __init__(self, func, arity):
        self.func = func
        self.arity = arity
    
    def format(self, *args):
        args = args[:self.arity]
        return (self.func.__name__ + '(' + ', '.join(map(str, args)) + ')')

    def __str__(self):
        return self.func.__name__

    def __repr__(self):
        return self.func.__name__

class Terminal:
    def __init__(self, value):
        self.value = value
        self.arity = 0
    
    def format(self):
        return str(self.value)
    
    def __str__(self):
        if isinstance(self.value, str):
            return self.value
        else:
            return str(round(self.value, 3))

    def __repr__(self):
        if isinstance(self.value, str):
            return self.value
        else:
            return str(round(self.value, 3))


In [None]:
def add(x, y):
    return np.add(x, y)

def sub(x, y):
    return np.subtract(x, y)

def mul(x, y):
    return np.multiply(x, y)

In [3]:
def add(x, y):
    return torch.add(x, y)

def mul(x, y):
    return torch.multiply(x, y)

In [4]:
arity = 2
head_length = 2

pset = PrimitiveSet()
pset.add_function(add, arity)
pset.add_function(mul, arity)
pset.add_terminal('a')
pset.add_terminal('b')
pset.add_terminal('c')

In [5]:
def generate_genome(pset, head_length):
    functions = pset.functions
    tail_length = head_length * (arity - 1) + 1
    #genome = [None] * (head_length + tail_length) # init genome
    genome = [pset.choose_function()] * (head_length + tail_length) # init genome

    # gen head part (functions and terminals)
    for i in range(1, head_length):
        if np.random.random() < 0.5:
            genome[i] = pset.choose_function()
        else:
            genome[i] = pset.choose_terminal()
    # gen tail part (only terminals)
    for i in range(head_length, head_length + tail_length):
        genome[i] = pset.choose_terminal()
    return genome

In [6]:
genome = generate_genome(pset, 3)
genome

[mul, c, mul, c, c, a, c]

In [7]:
def get_kexpression(genome):
    expr = [genome[0]]
    i = 0
    j = 1
    while i < len(expr):
        for _ in range(genome[i].arity):
            expr.append(genome[j])
            j += 1
        i += 1
    return expr

In [8]:
kexpr = get_kexpression(genome)
kexpr

[mul, c, mul, c, c]

In [9]:
arity = 2
kexpr = get_kexpression(genome)
i = len(kexpr) - 1 
while i >= 0:
    if kexpr[i].arity > 0:
        args = []

        for _ in range(kexpr[i].arity):
            element = kexpr.pop()
            if isinstance(element, str):
                args.append(element)
            else:
                args.append(element.format())

        kexpr[i] = kexpr[i].format(*reversed(args))
    i -= 1
kexpr[0] if isinstance(kexpr[0], str) else kexpr[0].format()

'mul(c, mul(c, c))'

In [10]:
import pandas as pd

df_size = 1000
df = {'a': np.random.rand(df_size),
        'b': np.random.rand(df_size),
        'c': np.random.rand(df_size)}
df['y']= 2*df['a'] + 3*df['b'] + 4*df['c'] + np.random.rand(df_size)

df = pd.DataFrame(df)

In [11]:
a = torch.from_numpy(df.a.values).float().to(device)
b = torch.from_numpy(df.b.values).float().to(device)
c = torch.from_numpy(df.c.values).float().to(device)
target = torch.from_numpy(df.y.values).float().to(device)

In [12]:
eval(kexpr[0])

tensor([1.6027e-02, 6.4661e-07, 6.5229e-01, 1.6837e-01, 5.7881e-01, 4.6790e-04,
        6.1263e-04, 2.2417e-01, 3.4069e-01, 1.0351e-01], device='cuda:0')

In [None]:
guess = eval(kexpr[0])

error = np.sum(np.power(guess - df.y / df.y, 2))
mape = 1000 * (1 / (1 + (error) / df.size))

In [13]:
guess = eval(kexpr[0])
error = torch.square((guess - target) / target)
error = error.sum()
torch_mape = 1000 * (1 / (1 + (error / df.size)))
torch_mape.item()

810.1773681640625

In [14]:
pop = [generate_genome(pset, 4) for _ in range(5)]
pop

[[add, add, a, a, c, b, a, a, c],
 [mul, add, add, b, b, b, c, b, b],
 [add, add, add, c, a, b, a, a, b],
 [add, mul, mul, add, c, b, a, a, b],
 [add, add, add, b, c, b, c, a, a]]

In [15]:
import random
def random_select(population): return random.sample(pop, np.random.randint(1, len(pop)))

In [16]:
selected_pop = random_select(pop)
selected_pop

[[add, add, a, a, c, b, a, a, c], [add, mul, mul, add, c, b, a, a, b]]

In [17]:
def mutate(population, mutations_per_individual=2):
    tail_length = head_length * (arity - 1) + 1
    for individual in population:
        for _ in range(mutations_per_individual):
            mutation_index = random.randint(0, head_length + tail_length - 1)
            if mutation_index < head_length:
                # mutate function in head
                if random.random() < 0.5:
                    individual[mutation_index] = random.choice(pset.functions)
                else:
                    individual[mutation_index] = Terminal(random.random())
            else:
                # mutate terminals in tail
                individual[mutation_index] = Terminal(random.random())

In [18]:
mutate(selected_pop)
genome = selected_pop
genome

[[add, add, 0.027, a, c, b, a, a, c], [add, mul, mul, 0.958, c, b, a, a, b]]

In [38]:
class Gene:
    def __init__(self, pset, head_length):
        self._head_length = head_length
        self.genome = generate_genome(pset, head_length)

    def get_kexpression(self):
        genome = self.genome
        expr = [genome[0]]
        i = 0
        j = 1
        while i < len(expr):
            for _ in range(genome[i].arity):
                expr.append(genome[j])
                j += 1
            i += 1
        return expr

    def __str__(self):
        kexpr = self.get_kexpression()
        i = len(kexpr) - 1 # start by the last item in tree
        while i >= 0:
            # if the item is a function
            if kexpr[i].arity > 0:
                args = []

                for _ in range(kexpr[i].arity):
                    element = kexpr.pop() # remove last element
                    if isinstance(element, str):
                        # append to the args of the current function item
                        args.append(element)
                    else:
                        # append terminal to the args of the current function item
                        args.append(element.format())
                # when all args for this function has been acquired, format
                kexpr[i] = kexpr[i].format(*reversed(args))
            i -= 1
        # return formatted root
        return kexpr[0] if isinstance(kexpr[0], str) else kexpr[0].format()

class Population:
    def __init__(self):
        self.individuals = None
        self.n = None
        self.pset = None
        self.head_length = None

    def generate(self, n, pset, head_length):
        self.individuals = [Gene(pset, head_length) for _ in range(n)]
        self.n = n
        self.pset = pset
        self.head_length = head_length

def random_select(population):
    # random selection of individuals
    selected_individuals = random.sample(population.individuals, population.n)
    selected_individuals = [copy.deepcopy(individual) for individual in selected_individuals]

    selected_population = Population()
    selected_population.individuals = selected_individuals
    selected_population.n = population.n
    selected_population.pset = population.pset
    selected_population.head_length = population.head_length
    return(selected_population)

def mutate(population, mutations_per_individual=2):
    # Basic mutation of primitives
    pset = population.pset
    head_length = population.head_length
    arity = 2
    tail_length = head_length * (arity - 1) + 1
    for individual in population.individuals:
        for _ in range(mutations_per_individual):
            mutation_index = random.randint(0, head_length + tail_length - 1)
            if mutation_index < head_length:
                # mutate function or terminals in head
                if random.random() < 0.5:
                    individual.genome[mutation_index] = random.choice(pset.functions)
                else:
                    #individual.genome[mutation_index] = Terminal(random.random())
                    individual.genome[mutation_index] = random.choice(pset.terminals)
            else:
                # mutate terminals in tail
                #individual.genome[mutation_index] = Terminal(random.random())
                individual.genome[mutation_index] = random.choice(pset.terminals)

In [24]:
population = Population()
population.generate(5, pset, 5)

In [25]:
def get_next_generation(population):
    # user evaluates fitness
    selected_population = random_select(population)
    mutate(selected_population)
    return selected_population

In [26]:
for i, individual in enumerate(population.individuals):
    print(i, individual.genome)

0 [mul, a, add, c, mul, c, b, b, b, a, a]
1 [mul, b, a, c, add, a, b, c, c, b, a]
2 [add, b, mul, a, b, c, a, c, c, c, b]
3 [add, mul, add, c, c, c, b, c, b, a, a]
4 [add, c, mul, mul, a, a, a, a, a, c, b]


In [23]:
population = get_next_generation(population)
for i, individual in enumerate(population.individuals):
    print(i, individual.genome)

0 [0.341, 0.854, a, add, a, a, b, b, a, c, b]
1 [add, b, c, a, mul, c, 0.533, 0.992, c, b, b]
2 [add, mul, add, b, a, a, 0.982, 0.123, b, c, a]
3 [add, b, mul, 0.005, c, c, b, b, 0.422, c, a]
4 [add, b, mul, 0.901, b, a, a, a, a, a, b]


In [27]:
evaluations = [eval(str(individual.genome)) for individual in population.individuals]
evaluations

[[<function __main__.mul(x, y)>,
  tensor([0.6171, 0.8551, 0.8125, 0.7154, 0.6965, 0.6765, 0.5041, 0.3204, 0.9392,
          0.4133], device='cuda:0'),
  <function __main__.add(x, y)>,
  tensor([0.2521, 0.0086, 0.8673, 0.5522, 0.8334, 0.0776, 0.0849, 0.6075, 0.6984,
          0.4695], device='cuda:0'),
  <function __main__.mul(x, y)>,
  tensor([0.2521, 0.0086, 0.8673, 0.5522, 0.8334, 0.0776, 0.0849, 0.6075, 0.6984,
          0.4695], device='cuda:0'),
  tensor([0.2480, 0.3000, 0.9896, 0.2821, 0.7421, 0.3497, 0.2687, 0.0339, 0.7433,
          0.8214], device='cuda:0'),
  tensor([0.2480, 0.3000, 0.9896, 0.2821, 0.7421, 0.3497, 0.2687, 0.0339, 0.7433,
          0.8214], device='cuda:0'),
  tensor([0.2480, 0.3000, 0.9896, 0.2821, 0.7421, 0.3497, 0.2687, 0.0339, 0.7433,
          0.8214], device='cuda:0'),
  tensor([0.6171, 0.8551, 0.8125, 0.7154, 0.6965, 0.6765, 0.5041, 0.3204, 0.9392,
          0.4133], device='cuda:0'),
  tensor([0.6171, 0.8551, 0.8125, 0.7154, 0.6965, 0.6765, 0.5041, 0.

In [28]:
tensor_list = [t for sublist in evaluations for t in sublist if isinstance(t, torch.Tensor)]

In [29]:
stacked_tensor = torch.stack(tensor_list)

In [30]:
stacked_tensor

tensor([[0.6171, 0.8551, 0.8125, 0.7154, 0.6965, 0.6765, 0.5041, 0.3204, 0.9392,
         0.4133],
        [0.2521, 0.0086, 0.8673, 0.5522, 0.8334, 0.0776, 0.0849, 0.6075, 0.6984,
         0.4695],
        [0.2521, 0.0086, 0.8673, 0.5522, 0.8334, 0.0776, 0.0849, 0.6075, 0.6984,
         0.4695],
        [0.2480, 0.3000, 0.9896, 0.2821, 0.7421, 0.3497, 0.2687, 0.0339, 0.7433,
         0.8214],
        [0.2480, 0.3000, 0.9896, 0.2821, 0.7421, 0.3497, 0.2687, 0.0339, 0.7433,
         0.8214],
        [0.2480, 0.3000, 0.9896, 0.2821, 0.7421, 0.3497, 0.2687, 0.0339, 0.7433,
         0.8214],
        [0.6171, 0.8551, 0.8125, 0.7154, 0.6965, 0.6765, 0.5041, 0.3204, 0.9392,
         0.4133],
        [0.6171, 0.8551, 0.8125, 0.7154, 0.6965, 0.6765, 0.5041, 0.3204, 0.9392,
         0.4133],
        [0.2480, 0.3000, 0.9896, 0.2821, 0.7421, 0.3497, 0.2687, 0.0339, 0.7433,
         0.8214],
        [0.6171, 0.8551, 0.8125, 0.7154, 0.6965, 0.6765, 0.5041, 0.3204, 0.9392,
         0.4133],
        [0

In [31]:
error = torch.square((stacked_tensor - target) / target)
error = error.sum(axis=0)
error

tensor([34.3803, 33.4460, 33.6748, 34.1432, 33.7156, 32.5470, 34.0831, 34.4612,
        32.8696, 34.0243], device='cuda:0')

In [36]:
torch_mape = 1000 * (1 / (1 + (error / df.size)))
torch_mape

tensor([537.7770, 544.6176, 542.9268, 539.4962, 542.6257, 551.3670, 539.9343,
        537.1927, 548.9257, 540.3631], device='cuda:0')

In [37]:
torch_mape.max().item()

551.3670043945312

In [56]:
import pandas as pd

df_size = 5000
df = {'a': np.random.rand(df_size),
        'b': np.random.rand(df_size),
        'c': np.random.rand(df_size)}
df['y']= 2*df['a'] + 3*df['b'] + 4*df['c'] + np.random.rand(df_size)

df = pd.DataFrame(df)

a = torch.from_numpy(df.a.values).float().to(device)
b = torch.from_numpy(df.b.values).float().to(device)
c = torch.from_numpy(df.c.values).float().to(device)
target = torch.from_numpy(df.y.values).float().to(device)

In [57]:
population = Population()
population.generate(100, pset, 200)

def get_next_generation(population):
    # user evaluates fitness
    selected_population = random_select(population)
    mutate(selected_population)
    return selected_population

for i in range(10000000):
    population = get_next_generation(population)
    evaluations = [eval(str(individual.genome)) for individual in population.individuals]
    tensor_list = torch.stack([t for sublist in evaluations for t in sublist if isinstance(t, torch.Tensor)])
    error = (torch.square((tensor_list - target) / target)).sum(axis=0)
    torch_mape = (1000 * (1 / (1 + (error / df.size)))).max().item()
    print(' ### START GLORIOUS EVOLUTION ###')
    print(f'eval {i} :: ', torch_mape)
    if torch_mape >= 1000: break

OutOfMemoryError: CUDA out of memory. Tried to allocate 574.00 MiB. GPU 0 has a total capacity of 1.94 GiB of which 403.94 MiB is free. Including non-PyTorch memory, this process has 1.16 GiB memory in use. Of the allocated memory 1.12 GiB is allocated by PyTorch, and 1.91 MiB is reserved by PyTorch but unallocated. If reserved but unallocated memory is large try setting PYTORCH_CUDA_ALLOC_CONF=expandable_segments:True to avoid fragmentation.  See documentation for Memory Management  (https://pytorch.org/docs/stable/notes/cuda.html#environment-variables)

In [49]:
population = get_next_generation(population)
for i, individual in enumerate(population.individuals):
    print(i, individual.genome)

0 [a, add, b, c, a, b, c, b, c, c, b]
1 [add, c, a, c, c, c, a, c, a, a, a]
2 [a, mul, mul, b, add, c, b, c, c, c, b]
3 [a, add, mul, add, b, a, b, b, c, a, b]
4 [add, mul, mul, add, add, b, a, b, b, c, b]
