In [3]:
import numpy as np
from src.utils import *
from tqdm import tqdm

unary_operators=[np.sin, np.cos, np.exp, np.abs, np.log, np.tan]
binary_operators=[np.add, np.subtract, np.multiply, np.divide]


operators = unary_operators + binary_operators

In [4]:
problem = np.load("data/problem_0.npz")
x = problem["x"]
y = problem["y"]
print("x.shape:", x.shape)
print("y.shape:", y.shape)

x.shape: (2, 1000)
y.shape: (1000,)


In [5]:
x_train,y_train,x_val,y_val = split_dataset(x,y,0.8)
print(x_train.shape)
print(x_val.shape)
print(y_train.shape)
print(y_val.shape)

(2, 800)
(2, 200)
(800,)
(200,)


In [6]:
# my_node = Node(value=np.add, left=Node(value=None, feature_index=0), right=Node(value=np.multiply, left=Node(value=0.2), right=Node(value=None, feature_index=1)))
# my_inv = Individual(genome=my_node)
# assign_population_fitness_train([my_inv], train_x, train_y)
# print(my_inv.fitness)
# assign_population_fitness_val([my_inv], val_x, val_y)
# print(my_inv.fitness_val)

In [7]:
def evolve(train_x,train_y,val_x,val_y):

    NUM_POPULATION = 100
    NUM_GENERATIONS = 600
    MAX_DEPTH_INITIAL = 5
    DEDUPE_INTERVAL = 15
    ELITISM = False
    ELITE_COUNT = 3
    TOURNAMENT_SUBSET_SIZE = 3
    TOURNAMENT_WINNER_SIZE = 20
    XOVER = 0.8
    MUTATION = 0.8
    BREED_NEW = 15
    KILL_AGE = 16
    MAX_COMPLEXITY = 100
    CONSTANT_FIT_INTERVAL = 30
    CONSTANT_FIT_ITERATION = 6

    print("Population is creating with a size: ", NUM_POPULATION)
    my_population = create_population(NUM_POPULATION,MAX_DEPTH_INITIAL,train_x.shape[0])
    print("Population has created.")
    print("Individual fitness values are assigning")
    assign_population_fitness_train(my_population,train_x,train_y)
    assign_population_fitness_val(my_population,val_x,val_y)
    print(f" Population size: {len(my_population)}")
    print(f"Population mean fitness: {calculate_mean_fitness(my_population)}")
    print(f"Population mean complexity: {calculate_mean_complexity(my_population)}")

    # Sort the population by fitness (ascending), then take the top 5
    top_5_individuals = top_n_individuals(my_population, 5)

    # Print the top 5 individuals with the minimum fitness
    for i, individual in enumerate(top_5_individuals, 1):
        print(f"Top {i}: Fitness = {individual.fitness:.4f} Fitness_val = {individual.fitness_val:.4f} Genome = {individual.genome}")
    #----------------------------------------------------------------------------
    assign_population_fitness_train(my_population,train_x,train_y)
    assign_population_fitness_val(my_population,val_x,val_y)
    best_result = top_n_individuals(my_population, 1)[0]
    for generation in tqdm(range(NUM_GENERATIONS)):
        num_xover=0
        num_mut=0
        num_succesfull_mut=0
        age_population(my_population)
        

        # Kill eldest individuals
        kill_eldest(my_population, KILL_AGE)
        # print("Population size before killing constants: ", len(my_population))
        
        # print("Population size after killing constants: ", len(my_population))


        best_ind = tournament_selection(my_population,TOURNAMENT_SUBSET_SIZE,TOURNAMENT_WINNER_SIZE,ELITISM=ELITISM, elite_count=ELITE_COUNT)

        
        for _ in range(BREED_NEW): # CHECK
            if random.random() < XOVER:
                # Xover the best individual and replace worst individuals
                parent1 = my_population[best_ind[np.random.choice(len(best_ind))]]
                parent2 = my_population[best_ind[np.random.choice(len(best_ind))]]
                child1, child2 = crossover(parent1,parent2)
                my_population.append(child1)
                my_population.append(child2)
                num_xover+=1

            if random.random() < MUTATION:
                # Mutate best individuals and replace worst individuals
                parent = my_population[best_ind[np.random.choice(len(best_ind))]]
                
                child, success = mutation_w_sa(parent, x.shape[0], train_x, train_y)
                num_mut+=1
                if success:                
                    my_population.append(child)
                    num_succesfull_mut+=1

        assign_population_fitness_train(my_population,train_x,train_y)
        assign_population_fitness_val(my_population,val_x,val_y)

        # print(f"xover: {num_xover}, mut: {num_mut}, succesfull mut: {num_succesfull_mut}")
        simplify_constant_population(my_population)
        # Update elites age as 0
        elites = top_n_individuals(my_population, ELITE_COUNT)
        if best_result.fitness_val > elites[0].fitness_val:
            best_result = elites[0]
            print(f"New best result found: {best_result}")
        
        for elite in elites:
            my_population.remove(elite)
            elite.age = 0
            my_population.append(elite)

        # Deduplicate every few iterations
        if generation % DEDUPE_INTERVAL == DEDUPE_INTERVAL-1:
            init_population = len(my_population)
            my_population = deduplicate_population(my_population)
            dedup_population = len(my_population)
            simplify_operation_population(my_population)
            simplified_operations = len(my_population)
            kill_constant(my_population)
            no_constant = len(my_population)
            kill_complex(my_population, MAX_COMPLEXITY)
            no_complex = len(my_population)

            print(f"Initial: {init_population}, Deduplicated: {init_population-dedup_population}, Operation simplified: {dedup_population-simplified_operations}, Constant: {simplified_operations-no_constant}, Complex: {no_constant - no_complex}, Final: {no_complex}")
            print(f"Population mean complexity: {calculate_mean_complexity(my_population)}")
            print(f"Population mean fitness: {calculate_mean_fitness(my_population)}")
            print()

            elites = top_n_individuals(my_population, ELITE_COUNT)
            for elite in elites:
                my_population.remove(elite)
                elite.age = 0
                my_population.append(elite)
                
            best_5 = top_n_individuals(my_population, 5)
            for ind in range(len(best_5)):
                print(f"{ind}. Fitness: {best_5[ind].fitness:.4f}, Fitness_val = {best_5[ind].fitness_val:.4f}, Genome: {best_5[ind].genome}, Age: {best_5[ind].age}, Complexity: {best_5[ind].genome.complexity}")
                
        #     # Fit constants of every individual
        if generation % CONSTANT_FIT_INTERVAL == CONSTANT_FIT_INTERVAL-1:
            print(f"Generation {generation}: Mean fitness {calculate_mean_fitness(my_population)}")
            for ind in range(len(my_population)):
                stronger_inv = fit_constants(my_population[ind],CONSTANT_FIT_ITERATION, train_x, train_y)
                my_population[ind] = stronger_inv
            assign_population_fitness_val(my_population,val_x,val_y)
            print(f"Population mean fitness after constant fit: {calculate_mean_fitness(my_population)}")
            best_5 = top_n_individuals(my_population, 5)
            for ind in range(len(best_5)):
                print(f"{ind}. Fitness: {best_5[ind].fitness:.4f}, Fitness_val = {best_5[ind].fitness_val:.4f}, Genome: {best_5[ind].genome}, Age: {best_5[ind].age}, Complexity: {best_5[ind].genome.complexity}")
        

    my_population = deduplicate_population(my_population)
    #-------------------------------------------------------------------------------------------------------
    assign_population_fitness_train(my_population,train_x,train_y)
    for i in my_population:
        if i.fitness == None:
            print('Cannot assign fitness')
            return my_population
    assign_population_fitness_val(my_population,val_x,val_y)
    for i in my_population:
        if i.fitness_val == None:
            print('Cannot assign fitness_val')
            return my_population
    # Sort the population by fitness (ascending), then take the top 5
    top_5_individuals = top_n_individuals(my_population, 15)

    # Print the best individual
    print(f"Best individual: Fitness = {best_result.fitness}: Genome = {best_result.genome}")
    # Print the top 5 individuals with the minimum fitness in population
    for i, individual in enumerate(top_5_individuals, 1):
        print(f"Top {i}: Fitness = {individual.fitness} Fitness_val = {individual.fitness_val} Genome = {individual.genome}")

    # Print the mean fitness of the population
    print("Mean fitness of the population: ", calculate_mean_fitness(my_population))
    print("Population size: ", len(my_population))
    print(f"Population mean complexity: {calculate_mean_complexity(my_population)}")
    
    return my_population

In [None]:

problem = np.load("data/problem_0.npz")
x = problem["x"]
y = problem["y"]
print("x.shape:", x.shape)
print("y.shape:", y.shape)
print("x_train.shape: ", x_train.shape)
print("x_val.shape: ", x_val.shape)
print("y_train.shape: ", y_train.shape)
print("y_val.shape: ", y_val.shape)
x_train, y_train, x_val, y_val = split_dataset(x,y,0.8)
my_pop_0 = evolve(x_train,y_train,x_val,y_val)


In [8]:

problem = np.load("data/problem_1.npz")
x = problem["x"]
y = problem["y"]
print("x.shape:", x.shape)
print("y.shape:", y.shape)
print("x_train.shape: ", x_train.shape)
print("x_val.shape: ", x_val.shape)
print("y_train.shape: ", y_train.shape)
print("y_val.shape: ", y_val.shape)
x_train, y_train, x_val, y_val = split_dataset(x,y,0.8)
my_pop_1 = evolve(x_train,y_train,x_val,y_val)


x.shape: (1, 500)
y.shape: (500,)
x_train.shape:  (2, 800)
x_val.shape:  (2, 200)
y_train.shape:  (800,)
y_val.shape:  (200,)
Population is creating with a size:  100
Population has created.
Individual fitness values are assigning
 Population size: 69
Population mean fitness: 9.757507581658523e+256
Population mean complexity: 244.85507246376812
Top 1: Fitness = 0.0000 Fitness_val = 0.0000 Genome = sin(x[0])
Top 2: Fitness = 0.0548 Fitness_val = 0.0502 Genome = tan(x[0])
Top 3: Fitness = 0.2550 Fitness_val = 0.2518 Genome = (x[0] - x[0])
Top 4: Fitness = 0.2550 Fitness_val = 0.2523 Genome = ((sin(abs((x[0] - [0.90868873]))) * tan((([-1.93671622] / x[0]) - cos([1.66923625])))) * abs(log(cos(log([0.97654505])))))
Top 5: Fitness = 0.2708 Fitness_val = 0.2729 Genome = abs([0.1043224])


  2%|▎         | 15/600 [00:08<03:59,  2.44it/s]

Initial: 517, Deduplicated: 328, Operation simplified: 0, Constant: 103, Complex: 24, Final: 62
Population mean complexity: 26.677419354838708
Population mean fitness: 253.869982974211

0. Fitness: 0.0000, Fitness_val = 0.0000, Genome: sin(x[0]), Age: 0, Complexity: 8
1. Fitness: 0.0016, Fitness_val = 0.0015, Genome: sin(sin(x[0])), Age: 0, Complexity: 36
2. Fitness: 0.0033, Fitness_val = 0.0031, Genome: x[0], Age: 0, Complexity: 1
3. Fitness: 0.0548, Fitness_val = 0.0502, Genome: tan(x[0]), Age: 15, Complexity: 8
4. Fitness: 0.0605, Fitness_val = 0.0573, Genome: (sin(x[0]) + log(cos(x[0]))), Age: 11, Complexity: 54


  5%|▍         | 29/600 [00:15<03:12,  2.96it/s]

Initial: 441, Deduplicated: 303, Operation simplified: 0, Constant: 71, Complex: 5, Final: 62
Population mean complexity: 28.5
Population mean fitness: 30.7245468536767

0. Fitness: 0.0000, Fitness_val = 0.0000, Genome: sin(x[0]), Age: 0, Complexity: 8
1. Fitness: 0.0016, Fitness_val = 0.0015, Genome: sin(sin(x[0])), Age: 0, Complexity: 36
2. Fitness: 0.0033, Fitness_val = 0.0031, Genome: x[0], Age: 0, Complexity: 1
3. Fitness: 0.0699, Fitness_val = 0.0715, Genome: sin((sin(x[0]) + x[0])), Age: 13, Complexity: 44
4. Fitness: 0.0718, Fitness_val = 0.0737, Genome: sin((x[0] + x[0])), Age: 8, Complexity: 16
Generation 29: Mean fitness 30.724546853676703


  5%|▍         | 29/600 [00:27<08:55,  1.07it/s]


KeyboardInterrupt: 

In [None]:
problem = np.load("data/problem_2.npz")
x = problem["x"]
y = problem["y"]
print("x.shape:", x.shape)
print("y.shape:", y.shape)
print("x_train.shape: ", x_train.shape)
print("x_val.shape: ", x_val.shape)
print("y_train.shape: ", y_train.shape)
print("y_val.shape: ", y_val.shape)
x_train, y_train, x_val, y_val = split_dataset(x,y,0.8)
my_pop_2 = evolve(x_train,y_train,x_val,y_val)

x.shape: (3, 5000)
y.shape: (5000,)
x_train.shape:  (1, 400)
x_val.shape:  (1, 100)
y_train.shape:  (400,)
y_val.shape:  (100,)
Population is creating with a size:  100
Population has created.
Individual fitness values are assigning
 Population size: 66
Population mean fitness: 29524810111923.305
Population mean complexity: 162.83333333333334
Top 1: Fitness = 29524701472459.5430 Fitness_val = 29985551587121.1367 Genome = exp(x[0])
Top 2: Fitness = 29524783331679.5039 Fitness_val = 29985631766254.0078 Genome = exp(x[1])
Top 3: Fitness = 29524780499453.8164 Fitness_val = 29985634520632.1758 Genome = exp(x[2])
Top 4: Fitness = 29524823299209.3594 Fitness_val = 29985648160356.5430 Genome = tan((exp(cos(cos(x[1]))) / (sin(cos(x[0])) + ((x[1] / x[2]) * cos(x[2])))))
Top 5: Fitness = 29524790345413.5977 Fitness_val = 29985654002376.6484 Genome = (x[0] + x[2])


  0%|          | 3/600 [00:43<2:22:07, 14.28s/it]

New best result found: Individual(genome=(([-1.61456221] + exp((log([0.38027449]) - ([1.0602722] * x[2])))) / exp(x[2])), fitness=np.float64(29524789830885.04), fitness_val=np.float64(29985488721454.3), age=0, T=1)


  2%|▏         | 12/600 [01:46<57:36,  5.88s/it] 

New best result found: Individual(genome=(([-1.61456221] + exp(([0.61712626] - ([1.0602722] * x[2])))) / exp(x[2])), fitness=np.float64(29524754803028.957), fitness_val=np.float64(29984779310023.16), age=0, T=1)


  2%|▏         | 13/600 [01:52<57:26,  5.87s/it]

New best result found: Individual(genome=(x[2] + exp((x[0] + x[0]))), fitness=np.float64(29516861425659.258), fitness_val=np.float64(29975970146091.664), age=0, T=1)


  2%|▎         | 15/600 [01:56<39:34,  4.06s/it]

New best result found: Individual(genome=(x[0] + exp((x[0] + x[0]))), fitness=np.float64(29516847225458.496), fitness_val=np.float64(29975955661453.027), age=0, T=1)
Initial: 588, Deduplicated: 330, Operation simplified: 0, Constant: 116, Complex: 40, Final: 102
Population mean complexity: 18.441176470588236
Population mean fitness: 7.355822100420346e+127

0. Fitness: 29516847225458.4961, Fitness_val = 29975955661453.0273, Genome: (x[0] + exp((x[0] + x[0]))), Age: 0, Complexity: 22
1. Fitness: 29516861425659.2578, Fitness_val = 29975970146091.6641, Genome: (x[2] + exp((x[0] + x[0]))), Age: 0, Complexity: 22
2. Fitness: 29516865959254.4883, Fitness_val = 29975974610291.0469, Genome: exp((x[0] + x[0])), Age: 0, Complexity: 20
3. Fitness: 29524754803028.9570, Fitness_val = 29984779310023.1602, Genome: (([-1.61456221] + exp(([0.61712626] - ([1.0602722] * x[2])))) / exp(x[2])), Age: 1, Complexity: 43
4. Fitness: 29524416932985.1523, Fitness_val = 29984911673668.9844, Genome: (x[2] + exp((x[

  3%|▎         | 16/600 [02:02<45:22,  4.66s/it]

New best result found: Individual(genome=exp((x[0] + (x[0] + x[0]))), fitness=np.float64(28917327312487.477), fitness_val=np.float64(29286225920890.188), age=0, T=1)


  3%|▎         | 18/600 [02:19<1:04:47,  6.68s/it]

New best result found: Individual(genome=(exp((x[0] + (x[0] + x[0]))) + x[0]), fitness=np.float64(28917309464735.152), fitness_val=np.float64(29286208382535.098), age=0, T=1)


  4%|▍         | 26/600 [03:09<58:28,  6.11s/it]  

New best result found: Individual(genome=(exp((x[0] + x[0])) + exp((x[0] + (x[0] + x[0])))), fitness=np.float64(28911820829185.07), fitness_val=np.float64(29280966805946.332), age=0, T=1)


  4%|▍         | 27/600 [03:16<1:00:14,  6.31s/it]

In [None]:
try:
    problem = np.load("data/problem_3.npz")
    x = problem["x"]
    y = problem["y"]
    print("x.shape:", x.shape)
    print("y.shape:", y.shape)
    my_pop_3 = evolve(x,y)
except: 
    print("ERROR")

In [None]:
try:
    problem = np.load("data/problem_4.npz")
    x = problem["x"]
    y = problem["y"]
    print("x.shape:", x.shape)
    print("y.shape:", y.shape)
    my_pop_4 = evolve(x,y)
except: 
    print("ERROR")

In [None]:
try:
    problem = np.load("data/problem_5.npz")
    x = problem["x"]
    y = problem["y"]
    print("x.shape:", x.shape)
    print("y.shape:", y.shape)
    my_pop_5 = evolve(x,y)
except: 
    print("ERROR")

In [None]:
try:
    problem = np.load("data/problem_6.npz")
    x = problem["x"]
    y = problem["y"]
    print("x.shape:", x.shape)
    print("y.shape:", y.shape)
    my_pop_6 = evolve(x,y)
except: 
    print("ERROR")

In [None]:
try:
    problem = np.load("data/problem_7.npz")
    x = problem["x"]
    y = problem["y"]
    print("x.shape:", x.shape)
    print("y.shape:", y.shape)
    my_pop_7 = evolve(x,y)
except: 
    print("ERROR")

In [None]:
try:
    problem = np.load("data/problem_8.npz")
    x = problem["x"]
    y = problem["y"]
    print("x.shape:", x.shape)
    print("y.shape:", y.shape)
    my_pop_8 = evolve(x,y)
except: 
    print("ERROR")