In [1]:
import numpy as np

In [3]:
class Node:

    def __init__(self, value, left = None, right = None):
        self.value = value
        self.left  = left
        self.right = right

    def evaluate(self, x: np.ndarray):
        if self.value in ['+', '-', '*', '/', '^']:
            left = self.left.evaluate(x) 
            right = self.right.evaluate(x) if self.right else 1

            try:
                if self.value == '+':
                    return left + right
                elif self.value == '-':
                    return left - right
                elif self.value == '*':
                    return left * right
                elif self.value == '/':
                    if np.abs(right) < 1e-10:
                        return float('inf')
                    return left / right
                elif self.value == '^':
                    if np.any(left < 0) and np.any(np.mod(right, 1) != 0):  #(-5^0.2, -5 undefined)
                        return float('inf') 
                    return np.power(left, right)
            except:
                return float('inf')

        elif self.value in ['sin', 'cos', 'tan', 'sigmoid', 'exp', 'log', 'sqrt', 'inv']:
            left = self.left.evaluate(x) 

            try:
                if self.value == 'sin':
                    return np.sin(left)
                elif self.value == 'cos':
                    return np.cos(left)
                elif self.value == 'tan':
                    return np.tan(left)
                elif self.value == 'sigmoid':
                    return 1/(1 + np.exp(-left))
                elif self.value == 'exp':
                    return np.exp(left) if np.all(left < 1000) else float('inf')
                elif self.value == 'log':
                    if np.any(right <= 0):
                        return -float('inf')
                    return np.log(left) 
                elif self.value == 'sqrt':
                    if np.any(left < 0):
                        return -float('inf')
                    return np.sqrt(left) 
                elif self.value == 'inv':
                    if np.any(left == 0):
                        return float('inf')
                    return 1/left
            except:
                return float('inf') 
        
        else:
            try:
                if isinstance(self.value, str) and self.value.startswith('x['):
                    index = int(self.value[2:-1])
                    return x[index]
            except:
                return float('inf')

    
    def count_nodes(self):
        count = 1
        if self.left:
            count += self.left.count_nodes()
        elif self.right:
            count += self.right.count_nodes()
        return count
        
    def __str__(self) -> str:
        if self.left and self.right:
            return f"({str(self.left)} {self.value} {str(self.right)})"
        elif self.left:
            return f"{self.value}({str(self.left)})"
        else:
            return str(self.value)

In [4]:
class Genetic_Programming:
    
    def __init__(self, pop_size: int, num_gen: int):
        self.pop_size = pop_size
        self.num_gen = num_gen
        self.population = []
        self.age = []

    def generate_random_tree(self, depth: int, shape: int, required_vars: set) -> Node:
        if depth == 0:
            if required_vars:
                choice = required_vars.pop()  
                return Node(choice)
            else:
                if np.random.random() < 0.05:
                    choice = 'x[' + str(np.random.randint(0, shape)) + ']'
                    return Node(choice)
                else:
                    choice = np.random.random_sample()
                    return Node(float(choice))

        else:
            operator_choices = np.array(['+', '-', '*', '/', 'sin', 'cos', 'tan', 'sigmoid', 'exp', 'inv']) # 'log', 'sqrt', '^' neglegted because i do not see any improvement for these operators for our datasets. 
            operator = np.random.choice(operator_choices)
            if operator in ['+', '-', '*', '/']:
                left = self.generate_random_tree(depth - 1, shape, required_vars)
                right = self.generate_random_tree(depth - 1, shape, required_vars)

                if required_vars:
                    missing_var = required_vars.pop()
                    right = Node('+', left=right, right=Node(missing_var))
            
                return Node(operator, left=left, right=right)
            
            else:
                return Node(operator, left=self.generate_random_tree(depth - 1, shape, required_vars))
     
    def initialize_population(self, depth: int, shape: int, required_vars: set):
        while len(self.population) < self.pop_size:
            
            tree_required_vars = required_vars.copy()
            tree = self.generate_random_tree(depth, shape, tree_required_vars)

            while tree_required_vars:  
                missing_var = tree_required_vars.pop()
                tree = Node('+', left=tree, right=Node(missing_var))

            self.population.append(tree)
            self.age.append(0)

    def fitness(self, individual: Node, x_train: np.ndarray, y_train: np.ndarray, alpha =1.0, beta=0.05) -> float:
        n_samples = x_train.shape[1]
        index = int(0.8 * n_samples)
        
        x_train_splitted = x_train[:, :index]
        y_train_splitted = y_train[:index]
        x_val = x_train[:, index:]
        y_val = y_train[index:]

        y_predictions_train = np.array([individual.evaluate(x) for x in x_train_splitted.T])
        y_predictions_val = np.array([individual.evaluate(x) for x in x_val.T])

        mse_train = np.mean((y_train_splitted - y_predictions_train) ** 2)
        mse_val = np.mean((y_val - y_predictions_val) ** 2)
        mse = 0.8 * mse_train + 0.2 * mse_val
        
        complexity = individual.count_nodes()
        
        fitness = alpha * mse + beta * complexity
        return fitness
    
    def parent1_and_parent2_selection(self, x_train: np.ndarray, y_train: np.ndarray) -> tuple[Node, Node]:
        indices = np.random.choice(self.pop_size, 20, replace=False)
        fitness_values = np.array([self.fitness(self.population[i], x_train, y_train) for i in indices])

        valid_mask = np.isfinite(fitness_values) 
        valid_indices = indices[valid_mask]
        valid_fitness_values = fitness_values[valid_mask]
        
        fitness_values_shifted = valid_fitness_values - valid_fitness_values.min()  #reducing selective pressure
        selection_probabilities = np.exp(-fitness_values_shifted)
        selection_probabilities += 1e-10 #non-zero probabilities
        selection_probabilities /= selection_probabilities.sum()
        non_zero_probs = np.count_nonzero(selection_probabilities)

        if non_zero_probs < 2:
            selected_indices = np.random.choice(valid_indices, size=2)
        else:
            selected_indices = np.random.choice(valid_indices, size=2, p=selection_probabilities)  #roulette wheel selection
                
        parent1 = self.population[selected_indices[0]]
        parent2 = self.population[selected_indices[1]]

        return parent1, parent2

    # def mutation(self, individual: Node, depth: int) -> Node:
    #     a = np.random.rand()
    #     if individual.value in ['+', '-', '*', '/', '^']:
    #         if a < 0.33:
    #             left = self.mutation(individual.left, depth - 1) if individual.left else None
    #             right = self.mutation(individual.right, depth - 1) if individual.right else None
    #             return Node(individual.value, left=left, right=right) 
    #         elif a >= 0.33 and a < 0.66:
    #             left = self.mutation(individual.left, depth - 1) if individual.left else None
    #             return Node(individual.value, left=left, right=individual.right) 
    #         else:
    #             right = self.mutation(individual.right, depth - 1) if individual.right else None
    #             return Node(individual.value, left=individual.left, right=right)
    #     elif individual.value in ['sin', 'cos', 'tan', 'sigmoid', 'exp', 'log', 'sqrt', 'inv']:
    #         left = self.mutation(individual.left, depth - 1) if individual.left else None
    #         return Node(individual.value, left=left)
    #     else:
    #         return individual
            
    def xover(self, parent1: Node, parent2: Node) -> Node:
        a = np.random.rand()
        if a < 0.25:
            return Node(parent1.value, left=parent1.left, right=parent2.right)
        elif a >= 0.25 and a < 0.5:
            return Node(parent1.value, left=parent2.left, right=parent1.right)
        elif a >= 0.5 and a < 0.75:
            return Node(parent2.value, left=parent1.left, right=parent2.right)
        else:
            return Node(parent2.value, left=parent2.left, right=parent1.right)
    
    def evolve(self, shape: int, x_train: np.ndarray, y_train: np.ndarray) -> Node:

        best_individual: Node = None
        depths = [np.random.randint(1, 4) for _ in range(self.pop_size)]
        required_vars = set(f'x[{i}]' for i in range(shape))
        self.initialize_population(max(depths), shape, required_vars)

        fitness_values = [self.fitness(individual, x_train, y_train) for individual in self.population]
        fitness_values = [fit if np.isfinite(fit) else float('inf') for fit in fitness_values] #avoid nan values
        best_fitness_value = min(fitness_values)
        best_individual = self.population[np.argmin(fitness_values)]

        temperature = 2000
        cooling_rate = 0.9

        for generation in range(self.num_gen):
            print(f"Generation_ {generation} Best fitness: {best_fitness_value} Best tree: {best_individual}")

            for _ in range(self.pop_size // 2):
                parent1, parent2 = self.parent1_and_parent2_selection(x_train, y_train)
                # a =  np.random.rand()
                # if a < 0.95:
                #     offspring = self.xover(parent1, parent2)
                # elif a >= 0.95 and a < 0.97:
                #     mutation_depth1 = np.random.randint(1, 4)
                #     offspring = self.mutation(parent1, mutation_depth1)
                # else:
                #     mutation_depth2 = np.random.randint(1, 4)
                #     offspring = self.mutation(parent2, mutation_depth2)

                offspring = self.xover(parent1, parent2)

                
                offspring_fitness = self.fitness(offspring, x_train, y_train)
                if not np.isfinite(offspring_fitness):
                    continue

                delta_fitness = best_fitness_value - offspring_fitness

                if delta_fitness > 0 or np.random.rand() < np.exp(delta_fitness / temperature):
                    oldest_index = np.argmax(self.age)  
                    self.population[oldest_index] = offspring
                    self.age[oldest_index] = 0

                    if offspring_fitness < best_fitness_value:
                        best_fitness_value = offspring_fitness
                        best_individual = offspring

            temperature *= cooling_rate  
            self.age = [age + 1 for age in self.age]

            for _ in range(50): #add new random individuals for each generation
                tree_required_vars = required_vars.copy()
                new_tree = self.generate_random_tree(np.random.randint(1, 4), shape, tree_required_vars)

                while tree_required_vars: 
                    missing_var = tree_required_vars.pop()
                    new_tree = Node('+', left=new_tree, right=Node(missing_var)) 

                self.population.append(new_tree)
                self.age.append(0)
                self.pop_size += 1

            fitness_values = [self.fitness(individual, x_train, y_train) for individual in self.population]
            fitness_values = [fit if np.isfinite(fit) else float('inf') for fit in fitness_values]

        return best_individual, best_fitness_value

In [None]:
# problem = np.load(f'../data/problem_0.npz')
# x_train = problem['x']
# y_train = problem['y']
# ga = Genetic_Programming(300, 20)
# shape = x_train.shape[0]
# print('shape:', shape)
# best_tree, best_fitness = ga.evolve(shape, x_train, y_train)
# print(f"Best tree_0: {best_tree} Best fitness_0: {best_fitness}")

In [7]:
problem = np.load(f'../data/problem_1.npz')
x_train = problem['x']
y_train = problem['y']
ga = Genetic_Programming(300, 20)
shape = x_train.shape[0]
print('shape:', shape)
best_tree, best_fitness = ga.evolve(shape, x_train, y_train)
print(f"Best tree_1: {best_tree} Best fitness_1: {best_fitness}")

shape: 1


  return np.sin(left)
  return left - right
  return np.tan(left)
  return np.cos(left)
  return left / right


Generation_ 0 Best fitness: 0.2 Best tree: inv(inv(sin(x[0])))
Generation_ 1 Best fitness: 0.2 Best tree: inv(inv(sin(x[0])))


  return left * right


Generation_ 2 Best fitness: 0.10328154522389471 Best tree: /(x[0])
Generation_ 3 Best fitness: 0.1 Best tree: sin(x[0])
Generation_ 4 Best fitness: 0.1 Best tree: sin(x[0])
Generation_ 5 Best fitness: 0.1 Best tree: sin(x[0])
Generation_ 6 Best fitness: 0.1 Best tree: sin(x[0])
Generation_ 7 Best fitness: 0.1 Best tree: sin(x[0])
Generation_ 8 Best fitness: 0.1 Best tree: sin(x[0])
Generation_ 9 Best fitness: 0.1 Best tree: sin(x[0])
Generation_ 10 Best fitness: 0.1 Best tree: sin(x[0])
Generation_ 11 Best fitness: 0.1 Best tree: sin(x[0])
Generation_ 12 Best fitness: 0.1 Best tree: sin(x[0])
Generation_ 13 Best fitness: 0.1 Best tree: sin(x[0])


  return left + right


Generation_ 14 Best fitness: 0.1 Best tree: sin(x[0])
Generation_ 15 Best fitness: 0.1 Best tree: sin(x[0])
Generation_ 16 Best fitness: 0.1 Best tree: sin(x[0])
Generation_ 17 Best fitness: 0.1 Best tree: sin(x[0])
Generation_ 18 Best fitness: 0.1 Best tree: sin(x[0])
Generation_ 19 Best fitness: 0.1 Best tree: sin(x[0])
Best tree_1: sin(x[0]) Best fitness_1: 0.1


In [6]:
problem = np.load(f'../data/problem_2.npz')
x_train = problem['x']
y_train = problem['y']
ga = Genetic_Programming(300, 20)
shape = x_train.shape[0]
print('shape:', shape)
best_tree, best_fitness = ga.evolve(shape, x_train, y_train)
print(f"Best tree_2: {best_tree} Best fitness_2: {best_fitness}")

shape: 3


  return left * right
  return np.sin(left)
  return left + right
  return 1/(1 + np.exp(-left))
  return np.tan(left)
  mse_train = np.mean((y_train_splitted - y_predictions_train) ** 2)
  return np.cos(left)
  return np.exp(left) if np.all(left < 1000) else float('inf')
  mse_val = np.mean((y_val - y_predictions_val) ** 2)
  return left - right


Generation_ 0 Best fitness: 29575308534880.043 Best tree: inv(exp((x[0] + (x[1] + x[2]))))
Generation_ 1 Best fitness: 29575308534880.043 Best tree: inv(exp((x[0] + (x[1] + x[2]))))
Generation_ 2 Best fitness: 29575308534880.043 Best tree: inv(exp((x[0] + (x[1] + x[2]))))
Generation_ 3 Best fitness: 29575308534880.043 Best tree: inv(exp((x[0] + (x[1] + x[2]))))
Generation_ 4 Best fitness: 29575308534880.043 Best tree: inv(exp((x[0] + (x[1] + x[2]))))
Generation_ 5 Best fitness: 29575308534880.043 Best tree: inv(exp((x[0] + (x[1] + x[2]))))
Generation_ 6 Best fitness: 29575308534880.043 Best tree: inv(exp((x[0] + (x[1] + x[2]))))
Generation_ 7 Best fitness: 29575308534880.043 Best tree: inv(exp((x[0] + (x[1] + x[2]))))
Generation_ 8 Best fitness: 29575308534880.043 Best tree: inv(exp((x[0] + (x[1] + x[2]))))


  return left / right


Generation_ 9 Best fitness: 29575308534880.043 Best tree: inv(exp((x[0] + (x[1] + x[2]))))
Generation_ 10 Best fitness: 29575308534880.043 Best tree: inv(exp((x[0] + (x[1] + x[2]))))
Generation_ 11 Best fitness: 29575308534880.043 Best tree: inv(exp((x[0] + (x[1] + x[2]))))
Generation_ 12 Best fitness: 29575308534880.043 Best tree: inv(exp((x[0] + (x[1] + x[2]))))
Generation_ 13 Best fitness: 29575308534880.043 Best tree: inv(exp((x[0] + (x[1] + x[2]))))
Generation_ 14 Best fitness: 29575308534880.043 Best tree: inv(exp((x[0] + (x[1] + x[2]))))
Generation_ 15 Best fitness: 29575308534880.043 Best tree: inv(exp((x[0] + (x[1] + x[2]))))
Generation_ 16 Best fitness: 29575308429996.047 Best tree: inv(sigmoid((x[0] + (x[1] + x[2]))))
Generation_ 17 Best fitness: 29575308429996.047 Best tree: inv(sigmoid((x[0] + (x[1] + x[2]))))
Generation_ 18 Best fitness: 29575308429996.047 Best tree: inv(sigmoid((x[0] + (x[1] + x[2]))))
Generation_ 19 Best fitness: 29575308429996.047 Best tree: inv(sigmoi

In [5]:
problem = np.load(f'../data/problem_3.npz')
x_train = problem['x']
y_train = problem['y']
ga = Genetic_Programming(300, 20)
shape = x_train.shape[0]
print('shape:', shape)
best_tree, best_fitness = ga.evolve(shape, x_train, y_train)
print(f"Best tree_3: {best_tree} Best fitness_3: {best_fitness}")

shape: 3


  return 1/(1 + np.exp(-left))
  return np.sin(left)
  return np.tan(left)
  return np.exp(left) if np.all(left < 1000) else float('inf')
  mse_train = np.mean((y_train_splitted - y_predictions_train) ** 2)
  mse_val = np.mean((y_val - y_predictions_val) ** 2)
  return np.cos(left)
  return left / right
  return 1/left
  return left - right


Generation_ 0 Best fitness: 2469.3256870293417 Best tree: ((exp(x[0]) + (sin(x[1]) + x[2])) / exp(exp(x[1])))


  return left + right


Generation_ 1 Best fitness: 2469.3256870293417 Best tree: ((exp(x[0]) + (sin(x[1]) + x[2])) / exp(exp(x[1])))
Generation_ 2 Best fitness: 2469.3256870293417 Best tree: ((exp(x[0]) + (sin(x[1]) + x[2])) / exp(exp(x[1])))
Generation_ 3 Best fitness: 2469.3256870293417 Best tree: ((exp(x[0]) + (sin(x[1]) + x[2])) / exp(exp(x[1])))
Generation_ 4 Best fitness: 2469.3256870293417 Best tree: ((exp(x[0]) + (sin(x[1]) + x[2])) / exp(exp(x[1])))
Generation_ 5 Best fitness: 2469.3256870293417 Best tree: ((exp(x[0]) + (sin(x[1]) + x[2])) / exp(exp(x[1])))
Generation_ 6 Best fitness: 2469.3256870293417 Best tree: ((exp(x[0]) + (sin(x[1]) + x[2])) / exp(exp(x[1])))
Generation_ 7 Best fitness: 2469.3256870293417 Best tree: ((exp(x[0]) + (sin(x[1]) + x[2])) / exp(exp(x[1])))
Generation_ 8 Best fitness: 2469.3256870293417 Best tree: ((exp(x[0]) + (sin(x[1]) + x[2])) / exp(exp(x[1])))
Generation_ 9 Best fitness: 2469.3256870293417 Best tree: ((exp(x[0]) + (sin(x[1]) + x[2])) / exp(exp(x[1])))
Generation

  return left * right


Generation_ 18 Best fitness: 2469.3256870293417 Best tree: ((exp(x[0]) + (sin(x[1]) + x[2])) / exp(exp(x[1])))
Generation_ 19 Best fitness: 2469.3256870293417 Best tree: ((exp(x[0]) + (sin(x[1]) + x[2])) / exp(exp(x[1])))
Best tree_3: (cos(tan(x[0])) + (inv(sigmoid(x[1])) + x[2])) Best fitness_3: 1323.0501087233956


In [11]:
problem = np.load(f'../data/problem_4.npz')
x_train = problem['x']
y_train = problem['y']
ga = Genetic_Programming(300, 20)
shape = x_train.shape[0]
print('shape:', shape)
best_tree, best_fitness = ga.evolve(shape, x_train, y_train)
print(f"Best tree_4: {best_tree} Best fitness_4: {best_fitness}")

shape: 2


  return np.sin(left)
  return np.tan(left)
  return np.exp(left) if np.all(left < 1000) else float('inf')
  mse_train = np.mean((y_train_splitted - y_predictions_train) ** 2)
  mse_val = np.mean((y_val - y_predictions_val) ** 2)
  return left * right
  return 1/(1 + np.exp(-left))
  return np.cos(left)
  return left / right
  return left - right


Generation_ 0 Best fitness: 22.60552490836873 Best tree: exp(sigmoid((x[0] + x[1])))
Generation_ 1 Best fitness: 21.801655622275085 Best tree: exp(sigmoid((x[0] / x[1])))
Generation_ 2 Best fitness: 21.801655622275085 Best tree: exp(sigmoid((x[0] / x[1])))


  return left + right


Generation_ 3 Best fitness: 21.581534117988824 Best tree: exp(cos((x[0] * x[1])))
Generation_ 4 Best fitness: 21.581534117988824 Best tree: exp(cos((x[0] * x[1])))
Generation_ 5 Best fitness: 21.581534117988824 Best tree: exp(cos((x[0] * x[1])))
Generation_ 6 Best fitness: 21.581534117988824 Best tree: exp(cos((x[0] * x[1])))
Generation_ 7 Best fitness: 21.581534117988824 Best tree: exp(cos((x[0] * x[1])))
Generation_ 8 Best fitness: 21.581534117988824 Best tree: exp(cos((x[0] * x[1])))
Generation_ 9 Best fitness: 21.581534117988824 Best tree: exp(cos((x[0] * x[1])))
Generation_ 10 Best fitness: 21.581534117988824 Best tree: exp(cos((x[0] * x[1])))
Generation_ 11 Best fitness: 21.581534117988824 Best tree: exp(cos((x[0] * x[1])))
Generation_ 12 Best fitness: 21.581534117988824 Best tree: exp(cos((x[0] * x[1])))
Generation_ 13 Best fitness: 21.581534117988824 Best tree: exp(cos((x[0] * x[1])))
Generation_ 14 Best fitness: 21.581534117988824 Best tree: exp(cos((x[0] * x[1])))
Generation_

In [12]:
problem = np.load(f'../data/problem_5.npz')
x_train = problem['x']
y_train = problem['y']
ga = Genetic_Programming(300, 20)
shape = x_train.shape[0]
print('shape:', shape)
best_tree, best_fitness = ga.evolve(shape, x_train, y_train)
print(f"Best tree_5: {best_tree} Best fitness_5: {best_fitness}")

shape: 2


  return 1/(1 + np.exp(-left))
  return np.tan(left)
  return left / right
  return np.cos(left)
  return np.sin(left)
  return np.exp(left) if np.all(left < 1000) else float('inf')
  mse_train = np.mean((y_train_splitted - y_predictions_train) ** 2)
  return left - right
  return left + right


Generation_ 0 Best fitness: 0.2 Best tree: (cos((x[0] * x[1])) / (inv(0.9927373019055301) * exp(0.5594164038978211)))


  return left * right


Generation_ 1 Best fitness: 0.2 Best tree: (cos((x[0] * x[1])) / (inv(0.9927373019055301) * exp(0.5594164038978211)))
Generation_ 2 Best fitness: 0.15000000000000002 Best tree: (cos(x[0]) / (x[1] / 0.02871653301747079))


  mse_val = np.mean((y_val - y_predictions_val) ** 2)


Generation_ 3 Best fitness: 0.15000000000000002 Best tree: (cos(x[0]) / (x[1] / 0.02871653301747079))
Generation_ 4 Best fitness: 0.15000000000000002 Best tree: (cos(x[0]) / (x[1] / 0.02871653301747079))
Generation_ 5 Best fitness: 0.15000000000000002 Best tree: (cos(x[0]) / (x[1] / 0.02871653301747079))
Generation_ 6 Best fitness: 0.15000000000000002 Best tree: (cos(x[0]) / (x[1] / 0.02871653301747079))
Generation_ 7 Best fitness: 0.15000000000000002 Best tree: (cos(x[0]) / (x[1] / 0.02871653301747079))
Generation_ 8 Best fitness: 0.15000000000000002 Best tree: (cos(x[0]) / (x[1] / 0.02871653301747079))
Generation_ 9 Best fitness: 0.15000000000000002 Best tree: (cos(x[0]) / (x[1] / 0.02871653301747079))
Generation_ 10 Best fitness: 0.15000000000000002 Best tree: (cos(x[0]) / (x[1] / 0.02871653301747079))
Generation_ 11 Best fitness: 0.15000000000000002 Best tree: (cos(x[0]) / (x[1] / 0.02871653301747079))
Generation_ 12 Best fitness: 0.15000000000000002 Best tree: (cos(x[0]) / (x[1] /

In [13]:
problem = np.load(f'../data/problem_6.npz')
x_train = problem['x']
y_train = problem['y']
ga = Genetic_Programming(300, 20)
shape = x_train.shape[0]
print('shape:', shape)
best_tree, best_fitness = ga.evolve(shape, x_train, y_train)
print(f"Best tree_6: {best_tree} Best fitness_6: {best_fitness}")

shape: 2


  return np.tan(left)
  return np.cos(left)
  return left - right
  return 1/(1 + np.exp(-left))
  return np.sin(left)
  mse_train = np.mean((y_train_splitted - y_predictions_train) ** 2)
  mse_val = np.mean((y_val - y_predictions_val) ** 2)
  return np.exp(left) if np.all(left < 1000) else float('inf')
  return left * right


Generation_ 0 Best fitness: 5.8683541086138105 Best tree: (cos(exp(sigmoid(x[0]))) + x[1])


  return left / right


Generation_ 1 Best fitness: 5.8683541086138105 Best tree: (cos(exp(sigmoid(x[0]))) + x[1])
Generation_ 2 Best fitness: 5.8683541086138105 Best tree: (cos(exp(sigmoid(x[0]))) + x[1])
Generation_ 3 Best fitness: 5.8683541086138105 Best tree: (cos(exp(sigmoid(x[0]))) + x[1])
Generation_ 4 Best fitness: 5.8683541086138105 Best tree: (cos(exp(sigmoid(x[0]))) + x[1])
Generation_ 5 Best fitness: 5.8683541086138105 Best tree: (cos(exp(sigmoid(x[0]))) + x[1])
Generation_ 6 Best fitness: 5.8683541086138105 Best tree: (cos(exp(sigmoid(x[0]))) + x[1])
Generation_ 7 Best fitness: 5.8683541086138105 Best tree: (cos(exp(sigmoid(x[0]))) + x[1])
Generation_ 8 Best fitness: 5.8683541086138105 Best tree: (cos(exp(sigmoid(x[0]))) + x[1])
Generation_ 9 Best fitness: 5.8683541086138105 Best tree: (cos(exp(sigmoid(x[0]))) + x[1])
Generation_ 10 Best fitness: 5.8683541086138105 Best tree: (cos(exp(sigmoid(x[0]))) + x[1])
Generation_ 11 Best fitness: 5.8683541086138105 Best tree: (cos(exp(sigmoid(x[0]))) + x[1

In [14]:
problem = np.load(f'../data/problem_7.npz')
x_train = problem['x']
y_train = problem['y']
ga = Genetic_Programming(300, 20)
shape = x_train.shape[0]
print('shape:', shape)
best_tree, best_fitness = ga.evolve(shape, x_train, y_train)
print(f"Best tree_7: {best_tree} Best fitness_7: {best_fitness}")

shape: 2


  return 1/(1 + np.exp(-left))
  return np.sin(left)
  return np.tan(left)
  mse_train = np.mean((y_train_splitted - y_predictions_train) ** 2)
  return np.exp(left) if np.all(left < 1000) else float('inf')
  return np.cos(left)
  return left / right
  return left * right
  mse_val = np.mean((y_val - y_predictions_val) ** 2)


Generation_ 0 Best fitness: 761.3717313874813 Best tree: (inv(sin(sigmoid(x[0]))) + x[1])
Generation_ 1 Best fitness: 757.8892236499787 Best tree: exp(exp(cos(sin(x[0]))))


  return left + right


Generation_ 2 Best fitness: 757.8892236499787 Best tree: exp(exp(cos(sin(x[0]))))
Generation_ 3 Best fitness: 757.8892236499787 Best tree: exp(exp(cos(sin(x[0]))))


  return left - right


Generation_ 4 Best fitness: 757.8892236499787 Best tree: exp(exp(cos(sin(x[0]))))
Generation_ 5 Best fitness: 663.2022925081708 Best tree: inv(sigmoid((x[0] + x[1])))
Generation_ 6 Best fitness: 663.2022925081708 Best tree: inv(sigmoid((x[0] + x[1])))
Generation_ 7 Best fitness: 626.076951791434 Best tree: exp((x[0] * x[1]))
Generation_ 8 Best fitness: 626.076951791434 Best tree: exp((x[0] * x[1]))
Generation_ 9 Best fitness: 626.076951791434 Best tree: exp((x[0] * x[1]))
Generation_ 10 Best fitness: 626.076951791434 Best tree: exp((x[0] * x[1]))
Generation_ 11 Best fitness: 626.076951791434 Best tree: exp((x[0] * x[1]))
Generation_ 12 Best fitness: 626.076951791434 Best tree: exp((x[0] * x[1]))
Generation_ 13 Best fitness: 626.076951791434 Best tree: exp((x[0] * x[1]))
Generation_ 14 Best fitness: 626.076951791434 Best tree: exp((x[0] * x[1]))
Generation_ 15 Best fitness: 626.076951791434 Best tree: exp((x[0] * x[1]))
Generation_ 16 Best fitness: 626.076951791434 Best tree: exp((x[0] 

In [9]:
problem = np.load(f'../data/problem_8.npz')
x_train = problem['x']
y_train = problem['y']
ga = Genetic_Programming(300, 20)
shape = x_train.shape[0]
print('shape:', shape)
best_tree, best_fitness = ga.evolve(shape, x_train, y_train)
print(f"Best tree_8: {best_tree} Best fitness_8: {best_fitness}")

shape: 6


  return np.exp(left) if np.all(left < 1000) else float('inf')
  mse_train = np.mean((y_train_splitted - y_predictions_train) ** 2)
  mse_val = np.mean((y_val - y_predictions_val) ** 2)
  return 1/(1 + np.exp(-left))
  return np.tan(left)
  return np.sin(left)


Generation_ 0 Best fitness: 22808361.13566367 Best tree: (((exp(x[5]) - (sigmoid(x[0]) + x[3])) + (tan(sigmoid(x[1])) + x[2])) + x[4])
Generation_ 1 Best fitness: 22808361.13566367 Best tree: (((exp(x[5]) - (sigmoid(x[0]) + x[3])) + (tan(sigmoid(x[1])) + x[2])) + x[4])
Generation_ 2 Best fitness: 22806813.515682444 Best tree: (((((exp(x[5]) + x[0]) + x[3]) + x[1]) + x[2]) + x[4])
Generation_ 3 Best fitness: 22806813.515682444 Best tree: (((((exp(x[5]) + x[0]) + x[3]) + x[1]) + x[2]) + x[4])
Generation_ 4 Best fitness: 22792042.371347327 Best tree: ((((exp(x[5]) - (exp(x[0]) + x[3])) + x[1]) + x[2]) + x[4])
Generation_ 5 Best fitness: 22792042.371347327 Best tree: ((((exp(x[5]) - (exp(x[0]) + x[3])) + x[1]) + x[2]) + x[4])
Generation_ 6 Best fitness: 22792042.371347327 Best tree: ((((exp(x[5]) - (exp(x[0]) + x[3])) + x[1]) + x[2]) + x[4])
Generation_ 7 Best fitness: 22792042.371347327 Best tree: ((((exp(x[5]) - (exp(x[0]) + x[3])) + x[1]) + x[2]) + x[4])
Generation_ 8 Best fitness: 2279

  return np.cos(left)


Generation_ 14 Best fitness: 22792042.371347327 Best tree: ((((exp(x[5]) - (exp(x[0]) + x[3])) + x[1]) + x[2]) + x[4])
Generation_ 15 Best fitness: 22792042.371347327 Best tree: ((((exp(x[5]) - (exp(x[0]) + x[3])) + x[1]) + x[2]) + x[4])
Generation_ 16 Best fitness: 22792042.371347327 Best tree: ((((exp(x[5]) - (exp(x[0]) + x[3])) + x[1]) + x[2]) + x[4])
Generation_ 17 Best fitness: 22792042.371347327 Best tree: ((((exp(x[5]) - (exp(x[0]) + x[3])) + x[1]) + x[2]) + x[4])
Generation_ 18 Best fitness: 22792042.371347327 Best tree: ((((exp(x[5]) - (exp(x[0]) + x[3])) + x[1]) + x[2]) + x[4])
Generation_ 19 Best fitness: 21321683.329181872 Best tree: (((exp(x[5]) * (exp(x[0]) + x[3])) - (tan(inv(x[1])) + x[2])) + x[4])
Best tree_8: (((exp(x[5]) * (exp(x[0]) + x[3])) - (tan(inv(x[1])) + x[2])) + x[4]) Best fitness_8: 21321683.329181872
