In [1]:
import numpy as np
from src.utils import *
from tqdm import tqdm

unary_operators=[np.sin, np.cos, np.exp, np.abs, np.log, np.tan]
binary_operators=[np.add, np.subtract, np.multiply, np.divide]


operators = unary_operators + binary_operators

In [2]:
problem = np.load("data/problem_0.npz")
x = problem["x"]
y = problem["y"]
print("x.shape:", x.shape)
print("y.shape:", y.shape)

x.shape: (2, 1000)
y.shape: (1000,)


In [3]:
x.shape[1]

1000

In [4]:
train_split_ratio = 0.8
train_len = int(x.shape[1]*train_split_ratio)
train_x = x[:,:train_len]
val_x = x[:,train_len:]
train_y = y[:train_len]
val_y = y[train_len:]
print(train_x.shape)
print(val_x.shape)
print(train_y.shape)
print(val_y.shape)

(2, 800)
(2, 200)
(800,)
(200,)


In [5]:
my_node = Node(value=np.add, left=Node(value=None, feature_index=0), right=Node(value=np.multiply, left=Node(value=0.2), right=Node(value=None, feature_index=1)))
my_inv = Individual(genome=my_node)
assign_population_fitness_train([my_inv], train_x, train_y)
print(my_inv.fitness)
assign_population_fitness_val([my_inv], val_x, val_y)
print(my_inv.fitness_val)

0.00014421615402757763
0.00013792776500560924


In [6]:
def evolve(train_x,train_y,val_x,val_y):
    try:
        NUM_POPULATION = 100
        NUM_GENERATIONS = 600
        MAX_DEPTH_INITIAL = 5
        DEDUPE_INTERVAL = 15
        ELITISM = False
        ELITE_COUNT = 3
        TOURNAMENT_SUBSET_SIZE = 3
        TOURNAMENT_WINNER_SIZE = 20
        XOVER = 0.8
        MUTATION = 0.8
        BREED_NEW = 15
        KILL_AGE = 16
        CONSTANT_FIT_INTERVAL = 30
        CONSTANT_FIT_ITERATION = 6

        print("Population is creating with a size: ", NUM_POPULATION)
        my_population = create_population(NUM_POPULATION,MAX_DEPTH_INITIAL,train_x.shape[0])
        print("Population has created.")
        print("Individual fitness values are assigning")
        assign_population_fitness_train(my_population,train_x,train_y)
        assign_population_fitness_val(my_population,val_x,val_y)
        print(f" Population size: {len(my_population)}")
        print(f"Population mean fitness: {calculate_mean_fitness(my_population)}")
        print(f"Population mean complexity: {calculate_mean_complexity(my_population)}")

        # Sort the population by fitness (ascending), then take the top 5
        top_5_individuals = top_n_individuals(my_population, 5)

        # Print the top 5 individuals with the minimum fitness
        for i, individual in enumerate(top_5_individuals, 1):
            print(f"Top {i}: Fitness = {individual.fitness} Fitness_val = {individual.fitness_val} Genome = {individual.genome}")
        #----------------------------------------------------------------------------
        assign_population_fitness_train(my_population,train_x,train_y)
        assign_population_fitness_val(my_population,val_x,val_y)
        best_result = top_n_individuals(my_population, 1)[0]
        for generation in tqdm(range(NUM_GENERATIONS)):
            num_xover=0
            num_mut=0
            num_succesfull_mut=0
            age_population(my_population)
            

            # Kill eldest individuals
            kill_eldest(my_population, KILL_AGE)
            # print("Population size before killing constants: ", len(my_population))
            
            # print("Population size after killing constants: ", len(my_population))


            best_ind = tournament_selection(my_population,TOURNAMENT_SUBSET_SIZE,TOURNAMENT_WINNER_SIZE,ELITISM=ELITISM, elite_count=ELITE_COUNT)

            
            for _ in range(BREED_NEW): # CHECK
                if random.random() < XOVER:
                    # Xover the best individual and replace worst individuals
                    parent1 = my_population[best_ind[np.random.choice(len(best_ind))]]
                    parent2 = my_population[best_ind[np.random.choice(len(best_ind))]]
                    child1, child2 = crossover(parent1,parent2)
                    my_population.append(child1)
                    my_population.append(child2)
                    num_xover+=1

                if random.random() < MUTATION:
                    # Mutate best individuals and replace worst individuals
                    parent = my_population[best_ind[np.random.choice(len(best_ind))]]
                    
                    child, success = mutation_w_sa(parent, x.shape[0], train_x, train_y)
                    num_mut+=1
                    if success:                
                        my_population.append(child)
                        num_succesfull_mut+=1

            assign_population_fitness_train(my_population,train_x,train_y)
            assign_population_fitness_val(my_population,val_x,val_y)

            # print(f"xover: {num_xover}, mut: {num_mut}, succesfull mut: {num_succesfull_mut}")
            simplify_constant_population(my_population)
            # Update elites age as 0
            elites = top_n_individuals(my_population, ELITE_COUNT)
            if best_result.fitness > elites[0].fitness:
                best_result = elites[0]
                print(f"New best result found: {best_result}")
            
            for elite in elites:
                my_population.remove(elite)
                elite.age = 0
                my_population.append(elite)

            # Deduplicate every few iterations
            if generation % DEDUPE_INTERVAL == DEDUPE_INTERVAL-1:
                init_population = len(my_population)
                my_population = deduplicate_population(my_population)
                dedup_population = len(my_population)
                simplify_operation_population(my_population)
                simplified_operations = len(my_population)
                kill_constant(my_population)
                no_constant = len(my_population)

                print(f"Initial: {init_population}, Deduplicated: {init_population-dedup_population}, Operation simplified: {dedup_population-simplified_operations}, Constant: {simplified_operations-no_constant}, Final: {no_constant}")
                print(f"Population mean complexity: {calculate_mean_complexity(my_population)}")
                print(f"Population mean fitness: {calculate_mean_fitness(my_population)}")
                print()
                best_5 = top_n_individuals(my_population, 5)
                for ind in range(len(best_5)):
                    print(f"{ind}. Fitness: {best_5[ind].fitness:.3f}, Fitness_val = {best_5[ind].fitness_val}, Genome: {best_5[ind].genome}, Age: {best_5[ind].age}")
                    
            #     # Fit constants of every individual
            if generation % CONSTANT_FIT_INTERVAL == CONSTANT_FIT_INTERVAL-1:
                print(f"Generation {generation}: Mean fitness {calculate_mean_fitness(my_population)}")
                for ind in range(len(my_population)):
                    stronger_inv = fit_constants(my_population[ind],CONSTANT_FIT_ITERATION, train_x, train_y)
                    my_population[ind] = stronger_inv
                print(f"Population mean fitness after constant fit: {calculate_mean_fitness(my_population)}")
                best_5 = top_n_individuals(my_population, 5)
                for ind in range(len(best_5)):
                    print(f"{ind}. Fitness: {best_5[ind].fitness:.3f}, Fitness_val = {best_5[ind].fitness_val}, Genome: {best_5[ind].genome}, Age: {best_5[ind].age}")
            

        my_population = deduplicate_population(my_population)
        #-------------------------------------------------------------------------------------------------------
        assign_population_fitness_train(my_population,train_x,train_y)
        assign_population_fitness_val(my_population,val_x,val_y)
        # Sort the population by fitness (ascending), then take the top 5
        top_5_individuals = top_n_individuals(my_population, 15)

        # Print the best individual
        print(f"Best individual: Fitness = {best_result.fitness}: Genome = {best_result.genome}")
        # Print the top 5 individuals with the minimum fitness in population
        for i, individual in enumerate(top_5_individuals, 1):
            print(f"Top {i}: Fitness = {individual.fitness} Fitness_val = {individual.fitness_val} Genome = {individual.genome}")

        # Print the mean fitness of the population
        print("Mean fitness of the population: ", calculate_mean_fitness(my_population))
        print("Population size: ", len(my_population))
        print(f"Population mean complexity: {calculate_mean_complexity(my_population)}")
        
        return my_population
    except:
        print("ERROR")
        return my_population

In [7]:

problem = np.load("data/problem_0.npz")
x = problem["x"]
y = problem["y"]
print("x.shape:", x.shape)
print("y.shape:", y.shape)
my_pop_0 = evolve(train_x,train_y,val_x,val_y)


x.shape: (2, 1000)
y.shape: (1000,)
Population is creating with a size:  100
Population has created.
Individual fitness values are assigning
 Population size: 73
Population mean fitness: 1667.2640722921383
Population mean complexity: 166.34246575342465
Top 1: Fitness = 1.8754871708160181 Fitness_val = 1.9533305325663053 Genome = sin(x[0])
Top 2: Fitness = 1.8754871708160181 Fitness_val = 1.9533305325663053 Genome = sin(x[0])
Top 3: Fitness = 1.8754871708160181 Fitness_val = 1.9533305325663053 Genome = sin(x[0])
Top 4: Fitness = 3.3940767455582725 Fitness_val = 3.4620375559556074 Genome = abs([0.19357373])
Top 5: Fitness = 3.391886585581424 Fitness_val = 3.4626623716471405 Genome = log([1.20509943])


  0%|          | 1/600 [00:01<15:42,  1.57s/it]

New best result found: Individual(genome=x[0], fitness=np.float64(0.010763171848597946), fitness_val=np.float64(0.010325632633118373), age=0, T=1)


  2%|▎         | 15/600 [00:14<05:43,  1.70it/s]

Initial: 601, Deduplicated: 430, Operation simplified: 0, Constant: 100, Final: 71
Population mean complexity: 179.43661971830986
Population mean fitness: 39687.806029948246

0. Fitness: 0.011, Genome: x[0], Age: 0
1. Fitness: 0.172, Genome: (x[0] + sin(x[1])), Age: 12
2. Fitness: 0.512, Genome: (x[0] + sin(x[0])), Age: 10
3. Fitness: 1.875, Genome: sin(x[0]), Age: 0
4. Fitness: 2.026, Genome: (x[1] + sin(x[0])), Age: 12


  5%|▍         | 29/600 [00:28<06:37,  1.43it/s]

Initial: 531, Deduplicated: 389, Operation simplified: 0, Constant: 79, Final: 63
Population mean complexity: 98.4920634920635
Population mean fitness: 3.8098861168745377e+64

0. Fitness: 0.011, Genome: x[0], Age: 0
1. Fitness: 0.172, Genome: (x[0] + sin(x[1])), Age: 11
2. Fitness: 0.221, Genome: (x[0] + x[1]), Age: 10
3. Fitness: 0.512, Genome: (x[0] + sin(x[0])), Age: 1
4. Fitness: 0.646, Genome: ((x[0] + sin(x[1])) + sin(x[0])), Age: 11
Generation 29: Mean fitness 3.8098861168745377e+64


  5%|▍         | 29/600 [00:53<17:24,  1.83s/it]

ERROR





In [8]:
broken_inv = []
for i in my_pop_0:
    if i.fitness_val == None:
        broken_inv.append(i)
        print(i)

Individual(genome=([0.41403854] + tan(sin(([0.44292473] + x[1])))), fitness=np.float64(4.509277060551148), fitness_val=None, age=3, T=0.9025)
Individual(genome=([-0.71868489] * x[0]), fitness=np.float64(5.10356660636412), fitness_val=None, age=3, T=0.9025)
Individual(genome=([-1.20236102] * x[0]), fitness=np.float64(8.220756233402055), fitness_val=None, age=7, T=0.95)
Individual(genome=((tan((exp(x[0]) * [-1.18644836])) - sin(sin(exp(x[0])))) - [0.618507]), fitness=np.float64(369.639651358653), fitness_val=None, age=7, T=0.95)
Individual(genome=(x[0] * ([0.02163507] + ((exp(sin(x[0])) / [1.52493302]) + ([0.67748022] - (([1.02319925] / x[0]) + tan(x[1])))))), fitness=np.float64(13.25879418259408), fitness_val=None, age=7, T=0.95)
Individual(genome=([-1.35484562] * x[0]), fitness=np.float64(9.528804412833379), fitness_val=None, age=1, T=0.95)
Individual(genome=((tan((exp(x[0]) * cos(x[1]))) - [-0.66312093]) - [1.10564705]), fitness=np.float64(2523.258301909403), fitness_val=None, age=7, 

In [None]:
try:
    problem = np.load("data/problem_1.npz")
    x = problem["x"]
    y = problem["y"]
    print("x.shape:", x.shape)
    print("y.shape:", y.shape)
    my_pop_1 = evolve(x,y)
except:
    print("ERROR")

In [None]:
try:
    problem = np.load("data/problem_2.npz")
    x = problem["x"]
    y = problem["y"]
    print("x.shape:", x.shape)
    print("y.shape:", y.shape)
    my_pop_2 = evolve(x,y)
except:
    print("ERROR")

In [None]:
try:
    problem = np.load("data/problem_3.npz")
    x = problem["x"]
    y = problem["y"]
    print("x.shape:", x.shape)
    print("y.shape:", y.shape)
    my_pop_3 = evolve(x,y)
except: 
    print("ERROR")

In [None]:
try:
    problem = np.load("data/problem_4.npz")
    x = problem["x"]
    y = problem["y"]
    print("x.shape:", x.shape)
    print("y.shape:", y.shape)
    my_pop_4 = evolve(x,y)
except: 
    print("ERROR")

In [None]:
try:
    problem = np.load("data/problem_5.npz")
    x = problem["x"]
    y = problem["y"]
    print("x.shape:", x.shape)
    print("y.shape:", y.shape)
    my_pop_5 = evolve(x,y)
except: 
    print("ERROR")

In [None]:
try:
    problem = np.load("data/problem_6.npz")
    x = problem["x"]
    y = problem["y"]
    print("x.shape:", x.shape)
    print("y.shape:", y.shape)
    my_pop_6 = evolve(x,y)
except: 
    print("ERROR")

In [None]:
try:
    problem = np.load("data/problem_7.npz")
    x = problem["x"]
    y = problem["y"]
    print("x.shape:", x.shape)
    print("y.shape:", y.shape)
    my_pop_7 = evolve(x,y)
except: 
    print("ERROR")

In [None]:
try:
    problem = np.load("data/problem_8.npz")
    x = problem["x"]
    y = problem["y"]
    print("x.shape:", x.shape)
    print("y.shape:", y.shape)
    my_pop_8 = evolve(x,y)
except: 
    print("ERROR")